# Copyright 2018 Red Hat # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain # a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. import logging import json from nodepool.driver import Provider from nodepool.driver.azure import handler from nodepool import zk from . import azul class AzureProvider(Provider): log = logging.getLogger("nodepool.driver.azure.AzureProvider") def __init__(self, provider, *args): self.provider = provider self.zuul_public_key = provider.zuul_public_key self.resource_group = provider.resource_group self.resource_group_location = provider.resource_group_location self._zk = None def start(self, zk_conn): self.log.debug("Starting") self._zk = zk_conn self.log.debug( "Using %s as auth_path for Azure auth" % self.provider.auth_path) with open(self.provider.auth_path) as f: self.azul = azul.AzureCloud(json.load(f)) def stop(self): self.log.debug("Stopping") def listNodes(self): return self.azul.virtual_machines.list(self.resource_group) def listNICs(self): return self.azul.network_interfaces.list(self.resource_group) def listPIPs(self): return self.azul.public_ip_addresses.list(self.resource_group) def listDisks(self): return self.azul.disks.list(self.resource_group) def labelReady(self, name): return True def join(self): return True def getRequestHandler(self, poolworker, request): return handler.AzureNodeRequestHandler(poolworker, request) def cleanupLeakedResources(self): self._cleanupLeakedNodes() self._cleanupLeakedNICs() self._cleanupLeakedPIPs() self._cleanupLeakedDisks() def _cleanupLeakedDisks(self): for disk in self.listDisks(): if disk['tags'] is None: # Nothing to check ownership against, move on continue if 'nodepool_provider_name' not in disk['tags']: continue if disk['tags']['nodepool_provider_name'] != self.provider.name: # Another launcher, sharing this provider but configured # with a different name, owns this. continue if not self._zk.getNode(disk['tags']['nodepool_id']): self.log.warning( "Marking for delete leaked Disk %s (%s) in %s " "(unknown node id %s)", disk['name'], disk['id'], self.provider.name, disk['tags']['nodepool_id'] ) try: self.azul.wait_for_async_operation( self.azul.disks.delete( self.resource_group, disk['name'])) except azul.AzureError as e: self.log.warning( "Failed to cleanup Disk %s (%s). Error: %r", disk['name'], disk['id'], e ) def _cleanupLeakedNICs(self): for nic in self.listNICs(): if nic['tags'] is None: # Nothing to check ownership against, move on continue if 'nodepool_provider_name' not in nic['tags']: continue if nic['tags']['nodepool_provider_name'] != self.provider.name: # Another launcher, sharing this provider but configured # with a different name, owns this. continue if not self._zk.getNode(nic['tags']['nodepool_id']): self.log.warning( "Marking for delete leaked NIC %s (%s) in %s " "(unknown node id %s)", nic['name'], nic['id'], self.provider.name, nic['tags']['nodepool_id'] ) try: self.azul.wait_for_async_operation( self.azul.network_interfaces.delete( self.resource_group, nic['name'])) except azul.AzureError as e: self.log.warning( "Failed to cleanup NIC %s (%s). Error: %r", nic['name'], nic['id'], e ) def _cleanupLeakedPIPs(self): for pip in self.listPIPs(): if pip['tags'] is None: # Nothing to check ownership against, move on continue if 'nodepool_provider_name' not in pip['tags']: continue if pip['tags']['nodepool_provider_name'] != self.provider.name: # Another launcher, sharing this provider but configured # with a different name, owns this. continue if not self._zk.getNode(pip['tags']['nodepool_id']): self.log.warning( "Marking for delete leaked PIP %s (%s) in %s " "(unknown node id %s)", pip['name'], pip['id'], self.provider.name, pip['tags']['nodepool_id'] ) try: self.azul.wait_for_async_operation( self.azul.public_ip_addresses.delete( self.resource_group, pip['name'])) except azul.AzureError as e: self.log.warning( "Failed to cleanup IP %s (%s). Error: %r", pip['name'], pip['id'], e ) def _cleanupLeakedNodes(self): deleting_nodes = {} for node in self._zk.nodeIterator(): if node.state == zk.DELETING: if node.provider != self.provider.name: continue if node.provider not in deleting_nodes: deleting_nodes[node.provider] = [] deleting_nodes[node.provider].append(node.external_id) for n in self.listNodes(): if n['tags'] is None: # Nothing to check ownership against, move on continue if 'nodepool_provider_name' not in n['tags']: continue if n['tags']['nodepool_provider_name'] != self.provider.name: # Another launcher, sharing this provider but configured # with a different name, owns this. continue if (self.provider.name in deleting_nodes and n['id'] in deleting_nodes[self.provider.name]): # Already deleting this node continue if not self._zk.getNode(n['tags']['nodepool_id']): self.log.warning( "Marking for delete leaked instance %s (%s) in %s " "(unknown node id %s)", n['name'], n['id'], self.provider.name, n['tags']['nodepool_id'] ) node = zk.Node() node.external_id = n['name'] node.provider = self.provider.name node.state = zk.DELETING self._zk.storeNode(node) def cleanupNode(self, server_id): self.log.debug('Server ID: %s' % server_id) try: vm = self.azul.virtual_machines.get( self.resource_group, server_id) except azul.AzureError as e: if e.status_code == 404: return self.log.warning( "Failed to cleanup node %s. Error: %r", server_id, e ) self.azul.wait_for_async_operation( self.azul.virtual_machines.delete( self.resource_group, server_id)) self.azul.wait_for_async_operation( self.azul.network_interfaces.delete( self.resource_group, "%s-nic" % server_id)) self.azul.wait_for_async_operation( self.azul.public_ip_addresses.delete( self.resource_group, "%s-nic-pip" % server_id)) if self.provider.ipv6: self.azul.wait_for_async_operation( self.azul.public_ip_addresses.delete( self.resource_group, "%s-nic-v6-pip" % server_id)) disk_handle_list = [] for disk in self.listDisks(): if disk['tags'] is not None and \ disk['tags'].get('nodepool_id') == vm['tags']['nodepool_id']: async_disk_delete = self.azul.disks.delete( self.resource_group, disk['name']) disk_handle_list.append(async_disk_delete) for async_disk_delete in disk_handle_list: self.azul.wait_for_async_operation( async_disk_delete) def waitForNodeCleanup(self, server_id): # All async tasks are handled in cleanupNode return True def getInstance(self, server_id): return self.azul.virtual_machines.get( self.resource_group, server_id) def createInstance( self, hostname, label, nodepool_id, nodepool_node_label=None): self.log.debug("Create resouce group") tags = label.tags or {} tags['nodepool_provider_name'] = self.provider.name if nodepool_node_label: tags['nodepool_node_label'] = nodepool_node_label self.azul.resource_groups.create( self.resource_group, { 'location': self.provider.resource_group_location, 'tags': tags }) tags['nodepool_id'] = nodepool_id v4_params_create = { 'location': self.provider.location, 'tags': tags, 'properties': { 'publicIpAllocationMethod': 'dynamic', }, } v4_public_ip = self.azul.public_ip_addresses.create( self.resource_group, "%s-nic-pip" % hostname, v4_params_create, ) nic_data = { 'location': self.provider.location, 'tags': tags, 'properties': { 'ipConfigurations': [{ 'name': "nodepool-v4-ip-config", 'properties': { 'privateIpAddressVersion': 'IPv4', 'subnet': { 'id': self.provider.subnet_id }, 'publicIpAddress': { 'id': v4_public_ip['id'] } } }] } } if self.provider.ipv6: nic_data['properties']['ipConfigurations'].append({ 'name': "zuul-v6-ip-config", 'properties': { 'privateIpAddressVersion': 'IPv6', 'subnet': { 'id': self.provider.subnet_id } } }) nic = self.azul.network_interfaces.create( self.resource_group, "%s-nic" % hostname, nic_data ) vm = self.azul.virtual_machines.create( self.resource_group, hostname, { 'location': self.provider.location, 'tags': tags, 'properties': { 'osProfile': { 'computerName': hostname, 'adminUsername': label.cloud_image.username, 'linuxConfiguration': { 'ssh': { 'publicKeys': [{ 'path': "/home/%s/.ssh/authorized_keys" % ( label.cloud_image.username), 'keyData': label.cloud_image.key, }] }, "disablePasswordAuthentication": True, } }, 'hardwareProfile': { 'vmSize': label.hardware_profile["vm-size"] }, 'storageProfile': { 'imageReference': label.cloud_image.image_reference }, 'networkProfile': { 'networkInterfaces': [{ 'id': nic['id'], 'properties': { 'primary': True, } }] }, }, }) return vm def getIpaddress(self, instance): # Copied from https://github.com/Azure/azure-sdk-for-python/issues/897 ni_reference = (instance['properties']['networkProfile'] ['networkInterfaces'][0]) ni_reference = ni_reference['id'].split('/') ni_group = ni_reference[4] ni_name = ni_reference[8] net_interface = self.azul.network_interfaces.get( ni_group, ni_name) ip_reference = (net_interface['properties']['ipConfigurations'][0] ['properties']['publicIPAddress']) ip_reference = ip_reference['id'].split('/') ip_group = ip_reference[4] ip_name = ip_reference[8] public_ip = self.azul.public_ip_addresses.get( ip_group, ip_name) public_ip = public_ip['properties']['ipAddress'] return public_ip def getv6Ipaddress(self, instance): # Copied from https://github.com/Azure/azure-sdk-for-python/issues/897 ni_reference = (instance['properties']['networkProfile'] ['networkInterfaces'][0]) ni_reference = ni_reference['id'].split('/') ni_group = ni_reference[4] ni_name = ni_reference[8] net_interface = self.azul.network_interfaces.get( ni_group, ni_name) return (net_interface['properties']['ipConfigurations'][1] ['properties']['privateIPAddress'])