From 46e2d7a2b9d6573f926293300ee31b83427a9781 Mon Sep 17 00:00:00 2001 From: "James E. Blair" Date: Fri, 12 Mar 2021 18:45:33 -0800 Subject: [PATCH] Azure: Handle IPv6 Azure supports the following: Private IPv4 (with or without public IPv4) Private IPv6 (with or without public IPv6) Update the Azure state machine driver to handle all of the possible variants, and pick the best SKU/allocation method for the circumstances. Change-Id: Ia81edd5ccb8ac7b8f9e87cb6ce0a890748a80210 --- nodepool/driver/azurestate/adapter.py | 168 ++++++++++++++++++-------- nodepool/driver/azurestate/config.py | 40 +++++- nodepool/driver/statemachine.py | 9 +- 3 files changed, 156 insertions(+), 61 deletions(-) diff --git a/nodepool/driver/azurestate/adapter.py b/nodepool/driver/azurestate/adapter.py index 3e01f359b..71a1a53c7 100644 --- a/nodepool/driver/azurestate/adapter.py +++ b/nodepool/driver/azurestate/adapter.py @@ -24,10 +24,11 @@ from . import azul class AzureInstance(statemachine.Instance): - def __init__(self, vm, nic=None, pip=None): + def __init__(self, vm, nic=None, pip4=None, pip6=None): self.external_id = vm['name'] self.metadata = vm['tags'] or {} self.private_ipv4 = None + self.private_ipv6 = None self.public_ipv4 = None self.public_ipv6 = None @@ -36,12 +37,17 @@ class AzureInstance(statemachine.Instance): ip_config_prop = ip_config_data['properties'] if ip_config_prop['privateIPAddressVersion'] == 'IPv4': self.private_ipv4 = ip_config_prop['privateIPAddress'] + if ip_config_prop['privateIPAddressVersion'] == 'IPv6': + self.private_ipv6 = ip_config_prop['privateIPAddress'] # public_ipv6 - if pip: - self.public_ipv4 = pip['properties'].get('ipAddress') + if pip4: + self.public_ipv4 = pip4['properties'].get('ipAddress') + if pip6: + self.public_ipv6 = pip6['properties'].get('ipAddress') - self.interface_ip = self.public_ipv4 or self.private_ipv4 + self.interface_ip = (self.public_ipv4 or self.public_ipv6 or + self.private_ipv4 or self.private_ipv6) self.region = vm['location'] self.az = '' @@ -58,6 +64,9 @@ class AzureDeleteStateMachine(statemachine.StateMachine): self.adapter = adapter self.external_id = external_id self.disk_names = [] + self.disks = [] + self.pip4 = None + self.pip6 = None def advance(self): if self.state == self.START: @@ -78,13 +87,16 @@ class AzureDeleteStateMachine(statemachine.StateMachine): if self.state == self.NIC_DELETING: self.nic = self.adapter._refresh_delete(self.nic) if self.nic is None: - self.pip = self.adapter._deletePublicIPAddress( - self.external_id + '-nic-pip') + self.pip4 = self.adapter._deletePublicIPAddress( + self.external_id + '-pip-IPv4') + self.pip6 = self.adapter._deletePublicIPAddress( + self.external_id + '-pip-IPv6') self.state = self.PIP_DELETING if self.state == self.PIP_DELETING: - self.pip = self.adapter._refresh_delete(self.pip) - if self.pip is None: + self.pip4 = self.adapter._refresh_delete(self.pip4) + self.pip6 = self.adapter._refresh_delete(self.pip6) + if self.pip4 is None and self.pip6 is None: self.disks = [] for name in self.disk_names: disk = self.adapter._deleteDisk(name) @@ -119,25 +131,56 @@ class AzureCreateStateMachine(statemachine.StateMachine): self.tags.update(metadata) self.hostname = hostname self.label = label - self.pip = None + self.pip4 = None + self.pip6 = None self.nic = None self.vm = None + # There are two parameters for IP addresses: SKU and + # allocation method. SKU is "basic" or "standard". + # Allocation method is "static" or "dynamic". Between IPv4 + # and v6, SKUs cannot be mixed (the same sku must be used for + # both protocols). The standard SKU only supports static + # allocation. Static is cheaper than dynamic, but basic is + # cheaper than standard. Also, dynamic is faster than static. + # Therefore, if IPv6 is used at all, standard+static for + # everything; otherwise basic+dynamic in an IPv4-only + # situation. + if label.pool.ipv6: + self.ip_sku = 'Standard' + self.ip_method = 'static' + else: + self.ip_sku = 'Basic' + self.ip_method = 'dynamic' def advance(self): if self.state == self.START: - self.pip = self.adapter._createPublicIPAddress( - self.tags, self.hostname) - self.state = self.PIP_CREATING self.external_id = self.hostname + if self.label.pool.public_ipv4: + self.pip4 = self.adapter._createPublicIPAddress( + self.tags, self.hostname, self.ip_sku, 'IPv4', + self.ip_method) + if self.label.pool.public_ipv6: + self.pip6 = self.adapter._createPublicIPAddress( + self.tags, self.hostname, self.ip_sku, 'IPv6', + self.ip_method) + self.state = self.PIP_CREATING if self.state == self.PIP_CREATING: - self.pip = self.adapter._refresh(self.pip) - if self.adapter._succeeded(self.pip): - self.nic = self.adapter._createNetworkInterface( - self.tags, self.hostname, self.pip) - self.state = self.NIC_CREATING - else: - return + if self.pip4: + self.pip4 = self.adapter._refresh(self.pip4) + if not self.adapter._succeeded(self.pip4): + return + if self.pip6: + self.pip6 = self.adapter._refresh(self.pip6) + if not self.adapter._succeeded(self.pip6): + return + # At this point, every pip we have has succeeded (we may + # have 0, 1, or 2). + self.nic = self.adapter._createNetworkInterface( + self.tags, self.hostname, + self.label.pool.ipv4, self.label.pool.ipv6, + self.pip4, self.pip6) + self.state = self.NIC_CREATING if self.state == self.NIC_CREATING: self.nic = self.adapter._refresh(self.nic) @@ -163,20 +206,30 @@ class AzureCreateStateMachine(statemachine.StateMachine): if self.state == self.NIC_QUERY: self.nic = self.adapter._refresh(self.nic, force=True) + all_found = True for ip_config_data in self.nic['properties']['ipConfigurations']: ip_config_prop = ip_config_data['properties'] - if ip_config_prop['privateIPAddressVersion'] == 'IPv4': - if 'privateIPAddress' in ip_config_prop: - self.state = self.PIP_QUERY + if 'privateIPAddress' not in ip_config_prop: + all_found = False + if all_found: + self.state = self.PIP_QUERY if self.state == self.PIP_QUERY: - self.pip = self.adapter._refresh(self.pip, force=True) - if 'ipAddress' in self.pip['properties']: + all_found = True + if self.pip4: + self.pip4 = self.adapter._refresh(self.pip4, force=True) + if 'ipAddress' not in self.pip4['properties']: + all_found = False + if self.pip6: + self.pip6 = self.adapter._refresh(self.pip6, force=True) + if 'ipAddress' not in self.pip6['properties']: + all_found = False + if all_found: self.state = self.COMPLETE if self.state == self.COMPLETE: self.complete = True - return AzureInstance(self.vm, self.nic, self.pip) + return AzureInstance(self.vm, self.nic, self.pip4, self.pip6) class AzureAdapter(statemachine.Adapter): @@ -283,17 +336,22 @@ class AzureAdapter(statemachine.Adapter): def _listPublicIPAddresses(self): return self.azul.public_ip_addresses.list(self.resource_group) - def _createPublicIPAddress(self, tags, hostname): + def _createPublicIPAddress(self, tags, hostname, sku, version, + allocation_method): v4_params_create = { 'location': self.provider.location, 'tags': tags, + 'sku': { + 'name': sku, + }, 'properties': { - 'publicIpAllocationMethod': 'dynamic', + 'publicIpAddressVersion': version, + 'publicIpAllocationMethod': allocation_method, }, } return self.azul.public_ip_addresses.create( self.resource_group, - "%s-nic-pip" % hostname, + "%s-pip-%s" % (hostname, version), v4_params_create, ) @@ -310,37 +368,43 @@ class AzureAdapter(statemachine.Adapter): def _listNetworkInterfaces(self): return self.azul.network_interfaces.list(self.resource_group) - def _createNetworkInterface(self, tags, hostname, pip): + def _createNetworkInterface(self, tags, hostname, ipv4, ipv6, pip4, pip6): + + def make_ip_config(name, version, subnet_id, pip): + ip_config = { + 'name': name, + 'properties': { + 'privateIpAddressVersion': version, + 'subnet': { + 'id': subnet_id + }, + } + } + if pip: + ip_config['properties']['publicIpAddress'] = { + 'id': pip['id'] + } + return ip_config + + ip_configs = [] + + if ipv4: + ip_configs.append(make_ip_config('nodepool-v4-ip-config', + 'IPv4', self.provider.subnet_id, + pip4)) + if ipv6: + ip_configs.append(make_ip_config('nodepool-v6-ip-config', + 'IPv6', self.provider.subnet_id, + pip6)) + nic_data = { 'location': self.provider.location, 'tags': tags, 'properties': { - 'ipConfigurations': [{ - 'name': "nodepool-v4-ip-config", - 'properties': { - 'privateIpAddressVersion': 'IPv4', - 'subnet': { - 'id': self.provider.subnet_id - }, - 'publicIpAddress': { - 'id': pip['id'] - } - } - }] + 'ipConfigurations': ip_configs } } - if self.provider.ipv6: - nic_data['properties']['ipConfigurations'].append({ - 'name': "nodepool-v6-ip-config", - 'properties': { - 'privateIpAddressVersion': 'IPv6', - 'subnet': { - 'id': self.provider.subnet_id - } - } - }) - return self.azul.network_interfaces.create( self.resource_group, "%s-nic" % hostname, diff --git a/nodepool/driver/azurestate/config.py b/nodepool/driver/azurestate/config.py index bd6058975..c7f3a48cc 100644 --- a/nodepool/driver/azurestate/config.py +++ b/nodepool/driver/azurestate/config.py @@ -114,9 +114,20 @@ class AzurePool(ConfigPool): def load(self, pool_config): self.name = pool_config['name'] self.max_servers = pool_config['max-servers'] - self.use_internal_ip = bool(pool_config.get('use-internal-ip', False)) - self.host_key_checking = bool(pool_config.get( - 'host-key-checking', True)) + self.public_ipv4 = pool_config.get('public-ipv4', + self.provider.public_ipv4) + self.public_ipv6 = pool_config.get('public-ipv6', + self.provider.public_ipv6) + self.ipv4 = pool_config.get('ipv4', self.provider.ipv4) + self.ipv6 = pool_config.get('ipv6', self.provider.ipv6) + self.ipv4 = self.ipv4 or self.public_ipv4 + self.ipv6 = self.ipv6 or self.public_ipv6 + if not self.ipv4 or self.ipv6: + self.ipv4 = True + self.use_internal_ip = pool_config.get( + 'use-internal-ip', self.provider.use_internal_ip) + self.host_key_checking = pool_config.get( + 'host-key-checking', self.provider.use_internal_ip) @staticmethod def getSchema(): @@ -126,6 +137,12 @@ class AzurePool(ConfigPool): pool.update({ v.Required('name'): str, v.Required('labels'): [azure_label], + 'ipv4': bool, + 'ipv6': bool, + 'public-ipv4': bool, + 'public-ipv6': bool, + 'use-internal-ip': bool, + 'host-key-checking': bool, }) return pool @@ -157,8 +174,15 @@ class AzureProviderConfig(ProviderConfig): # TODO(corvus): remove self.zuul_public_key = self.provider['zuul-public-key'] self.location = self.provider['location'] - self.subnet_id = self.provider['subnet-id'] - self.ipv6 = self.provider.get('ipv6', False) + self.subnet_id = self.provider.get('subnet-id') + # Don't use these directly; these are default values for + # labels. + self.public_ipv4 = self.provider.get('public-ipv4', False) + self.public_ipv6 = self.provider.get('public-ipv6', False) + self.ipv4 = self.provider.get('ipv4', None) + self.ipv6 = self.provider.get('ipv6', None) + self.use_internal_ip = self.provider.get('use-internal-ip', False) + self.host_key_checking = self.provider.get('host-key-checking', True) self.resource_group = self.provider['resource-group'] self.resource_group_location = self.provider['resource-group-location'] self.auth_path = self.provider.get( @@ -192,6 +216,12 @@ class AzureProviderConfig(ProviderConfig): v.Required('subnet-id'): str, v.Required('cloud-images'): [provider_cloud_images], v.Required('auth-path'): str, + 'ipv4': bool, + 'ipv6': bool, + 'public-ipv4': bool, + 'public-ipv6': bool, + 'use-internal-ip': bool, + 'host-key-checking': bool, }) return v.Schema(provider) diff --git a/nodepool/driver/statemachine.py b/nodepool/driver/statemachine.py index 51eda413b..d48bc0f48 100644 --- a/nodepool/driver/statemachine.py +++ b/nodepool/driver/statemachine.py @@ -103,8 +103,9 @@ class StateMachineNodeLauncher(stats.StatsReporter): pool = self.handler.pool label = pool.labels[self.node.type[0]] - if pool.use_internal_ip and instance.private_ipv4: - server_ip = instance.private_ipv4 + if (pool.use_internal_ip and + (instance.private_ipv4 or instance.private_ipv6)): + server_ip = instance.private_ipv4 or instance.private_ipv6 else: server_ip = instance.interface_ip @@ -160,8 +161,9 @@ class StateMachineNodeLauncher(stats.StatsReporter): node.external_id = state_machine.external_id self.zk.storeNode(node) if state_machine.complete and not self.keyscan_future: - self.log.debug("Submitting keyscan request") self.updateNodeFromInstance(instance) + self.log.debug("Submitting keyscan request for %s", + node.interface_ip) future = self.manager.keyscan_worker.submit( keyscan, node.id, node.interface_ip, @@ -427,7 +429,6 @@ class StateMachineProvider(Provider, QuotaSupport): def stop(self): self.log.debug("Stopping") - self.running = False if self.state_machine_thread: while self.launchers or self.deleters: time.sleep(1)