From de94afde5f48ebf67cb7db63ad2dcd9bd134502b Mon Sep 17 00:00:00 2001
From: Theodoros Tsioutsias <theodoros.tsioutsias@cern.ch>
Date: Mon, 24 Jun 2019 13:49:42 +0200
Subject: [PATCH] [WIP] ng-9: Driver for nodegroup operations

This adds the support for creating and deleting worker  nodegroups
using different stack per nodegroup. In order to be backwards
compatible, default nodegroups will remain in one stack.

Having this in micd cluster status is now calculated aggregating the
statuses of the underlying stacks.

Change-Id: I97839ab8495ed5d860785dff1f6e3cc59b6a9ff7
---
 magnum/drivers/heat/driver.py                 | 145 ++++++++++++------
 magnum/drivers/k8s_fedora_atomic_v1/driver.py |  75 +++++++++
 .../templates/kubecluster.yaml                | 106 ++++++++++++-
 .../templates/kubemaster.yaml                 |   2 +-
 .../templates/kubeminion.yaml                 |   2 +-
 magnum/service/periodic.py                    |   4 +-
 6 files changed, 281 insertions(+), 53 deletions(-)

diff --git a/magnum/drivers/heat/driver.py b/magnum/drivers/heat/driver.py
index b3fb1e39e6..4c85d5b59a 100755
--- a/magnum/drivers/heat/driver.py
+++ b/magnum/drivers/heat/driver.py
@@ -11,6 +11,7 @@
 # under the License.
 
 import abc
+import collections
 import os
 import six
 
@@ -38,6 +39,10 @@ from magnum.objects import fields
 LOG = logging.getLogger(__name__)
 
 
+NodeGroupStatus = collections.namedtuple('NodeGroupStatus',
+                                         'status reason is_default')
+
+
 @six.add_metaclass(abc.ABCMeta)
 class HeatDriver(driver.Driver):
     """Base Driver class for using Heat
@@ -58,12 +63,14 @@ class HeatDriver(driver.Driver):
                                              scale_manager=scale_manager)
 
     def _extract_template_definition(self, context, cluster,
-                                     scale_manager=None):
+                                     scale_manager=None,
+                                     nodegroups=None):
         cluster_template = conductor_utils.retrieve_cluster_template(context,
                                                                      cluster)
         definition = self.get_template_definition()
         return definition.extract_definition(context, cluster_template,
                                              cluster,
+                                             nodegroups=nodegroups,
                                              scale_manager=scale_manager)
 
     def _get_env_files(self, template_path, env_rel_paths):
@@ -135,7 +142,12 @@ class HeatDriver(driver.Driver):
         self.pre_delete_cluster(context, cluster)
 
         LOG.info("Starting to delete cluster %s", cluster.uuid)
-        self._delete_stack(context, clients.OpenStackClients(context), cluster)
+        osc = clients.OpenStackClients(context)
+        for ng in cluster.nodegroups:
+            if ng.is_default:
+                continue
+            self._delete_stack(context, osc, ng.stack_id)
+        self._delete_stack(context, osc, cluster.default_ng_master.stack_id)
 
     def resize_cluster(self, context, cluster, resize_manager,
                        node_count, nodes_to_remove, nodegroup=None,
@@ -174,6 +186,8 @@ class HeatDriver(driver.Driver):
             # no cluster_create_timeout value was passed in to the request
             # so falling back on configuration file value
             heat_timeout = cfg.CONF.cluster_heat.create_timeout
+
+        heat_params['is_cluster_stack'] = True
         fields = {
             'stack_name': stack_name,
             'parameters': heat_params,
@@ -222,10 +236,10 @@ class HeatDriver(driver.Driver):
 
         # Find what changed checking the stack params
         # against the ones in the template_def.
-        stack = osc.heat().stacks.get(cluster.stack_id,
+        stack = osc.heat().stacks.get(nodegroup.stack_id,
                                       resolve_outputs=True)
         stack_params = stack.parameters
-        definition.add_nodegroup_params(cluster)
+        definition.add_nodegroup_params(cluster, nodegroups=[nodegroup])
         heat_params = definition.get_stack_diff(context, stack_params, cluster)
         LOG.debug('Updating stack with these params: %s', heat_params)
 
@@ -241,10 +255,10 @@ class HeatDriver(driver.Driver):
         }
 
         osc = clients.OpenStackClients(context)
-        osc.heat().stacks.update(cluster.stack_id, **fields)
+        osc.heat().stacks.update(nodegroup.stack_id, **fields)
 
-    def _delete_stack(self, context, osc, cluster):
-        osc.heat().stacks.delete(cluster.stack_id)
+    def _delete_stack(self, context, osc, stack_id):
+        osc.heat().stacks.delete(stack_id)
 
 
 class HeatPoller(object):
@@ -260,39 +274,79 @@ class HeatPoller(object):
     def poll_and_check(self):
         # TODO(yuanying): temporary implementation to update api_address,
         # node_addresses and cluster status
+        ng_statuses = list()
+        for nodegroup in self.cluster.nodegroups:
+            self.nodegroup = nodegroup
+            status = self.extract_nodegroup_status()
+            ng_statuses.append(status)
+        self.aggregate_nodegroup_statuses(ng_statuses)
+
+    def extract_nodegroup_status(self):
         try:
             # Do not resolve outputs by default. Resolving all
             # node IPs is expensive on heat.
             stack = self.openstack_client.heat().stacks.get(
-                self.cluster.stack_id, resolve_outputs=False)
+                self.nodegroup.stack_id, resolve_outputs=False)
+
+            # poll_and_check is detached and polling long time to check
+            # status, so another user/client can call delete cluster/stack.
+            if stack.stack_status == fields.ClusterStatus.DELETE_COMPLETE:
+                if self.nodegroup.is_default:
+                    if self.nodegroup.role == 'master':
+                        # Delete all cluster related info
+                        self._delete_complete()
+                else:
+                    self.nodegroup.destroy()
+
+            if stack.stack_status in (fields.ClusterStatus.CREATE_COMPLETE,
+                                      fields.ClusterStatus.UPDATE_COMPLETE):
+                # Resolve all outputs if the stack is COMPLETE
+                stack = self.openstack_client.heat().stacks.get(
+                    self.nodegroup.stack_id, resolve_outputs=True)
+
+                self._sync_cluster_and_template_status(stack)
+            elif stack.stack_status != self.nodegroup.status:
+                self.template_def.nodegroup_output_mappings = list()
+                self.template_def.update_outputs(
+                    stack, self.cluster_template, self.cluster,
+                    nodegroups=[self.nodegroup])
+                self._sync_cluster_status(stack)
+
+            if stack.stack_status in (fields.ClusterStatus.CREATE_FAILED,
+                                      fields.ClusterStatus.DELETE_FAILED,
+                                      fields.ClusterStatus.UPDATE_FAILED,
+                                      fields.ClusterStatus.ROLLBACK_COMPLETE,
+                                      fields.ClusterStatus.ROLLBACK_FAILED):
+                self._sync_cluster_and_template_status(stack)
+                self._cluster_failed(stack)
         except heatexc.NotFound:
             self._sync_missing_heat_stack()
-            return
+        return NodeGroupStatus(status=self.nodegroup.status,
+                               is_default=self.nodegroup.is_default,
+                               reason=self.nodegroup.status_reason)
 
-        # poll_and_check is detached and polling long time to check status,
-        # so another user/client can call delete cluster/stack.
-        if stack.stack_status == fields.ClusterStatus.DELETE_COMPLETE:
-            self._delete_complete()
+    def aggregate_nodegroup_statuses(self, ng_statuses):
+        # NOTE(ttsiouts): Aggregate the nodegroup statuses and set the
+        # cluster overall status.
+        FAIL = '_FAILED'
+        IN_PROGRESS = '_IN_PROGRESS'
+        COMPLETE = '_COMPLETE'
 
-        if stack.stack_status in (fields.ClusterStatus.CREATE_COMPLETE,
-                                  fields.ClusterStatus.UPDATE_COMPLETE):
-            # Resolve all outputs if the stack is COMPLETE
-            stack = self.openstack_client.heat().stacks.get(
-                self.cluster.stack_id, resolve_outputs=True)
-
-            self._sync_cluster_and_template_status(stack)
-        elif stack.stack_status != self.cluster.status:
-            self.template_def.update_outputs(stack, self.cluster_template,
-                                             self.cluster)
-            self._sync_cluster_status(stack)
-
-        if stack.stack_status in (fields.ClusterStatus.CREATE_FAILED,
-                                  fields.ClusterStatus.DELETE_FAILED,
-                                  fields.ClusterStatus.UPDATE_FAILED,
-                                  fields.ClusterStatus.ROLLBACK_COMPLETE,
-                                  fields.ClusterStatus.ROLLBACK_FAILED):
-            self._sync_cluster_and_template_status(stack)
-            self._cluster_failed(stack)
+        CREATE = 'CREATE'
+        DELETE = 'DELETE'
+        UPDATE = 'UPDATE'
+        # Keep priority to the states below
+        for state in (IN_PROGRESS, FAIL, COMPLETE):
+            if any(ns.status.endswith(state) for ns in ng_statuses):
+                status = getattr(fields.ClusterStatus, UPDATE+state)
+                self.cluster.status = status
+                for action in (CREATE, DELETE):
+                    if all(ns.status == action+state
+                           for ns in ng_statuses if ns.is_default):
+                        status = getattr(fields.ClusterStatus, action+state)
+                        self.cluster.status = status
+                break
+        self.cluster.save()
 
     def _delete_complete(self):
         LOG.info('Cluster has been deleted, stack_id: %s',
@@ -311,9 +365,9 @@ class HeatPoller(object):
                      self.cluster.uuid)
 
     def _sync_cluster_status(self, stack):
-        self.cluster.status = stack.stack_status
-        self.cluster.status_reason = stack.stack_status_reason
-        self.cluster.save()
+        self.nodegroup.status = stack.stack_status
+        self.nodegroup.status_reason = stack.stack_status_reason
+        self.nodegroup.save()
 
     def get_version_info(self, stack):
         stack_param = self.template_def.get_heat_param(
@@ -330,8 +384,10 @@ class HeatPoller(object):
         self.cluster.container_version = container_version
 
     def _sync_cluster_and_template_status(self, stack):
+        self.template_def.nodegroup_output_mappings = list()
         self.template_def.update_outputs(stack, self.cluster_template,
-                                         self.cluster)
+                                         self.cluster,
+                                         nodegroups=[self.nodegroup])
         self.get_version_info(stack)
         self._sync_cluster_status(stack)
 
@@ -345,19 +401,20 @@ class HeatPoller(object):
 
     def _sync_missing_heat_stack(self):
         if self.cluster.status == fields.ClusterStatus.DELETE_IN_PROGRESS:
-            self._delete_complete()
+            if self.nodegroup.is_default and self.nodegroup.role == 'master':
+                self._delete_complete()
         elif self.cluster.status == fields.ClusterStatus.CREATE_IN_PROGRESS:
             self._sync_missing_stack(fields.ClusterStatus.CREATE_FAILED)
         elif self.cluster.status == fields.ClusterStatus.UPDATE_IN_PROGRESS:
             self._sync_missing_stack(fields.ClusterStatus.UPDATE_FAILED)
 
     def _sync_missing_stack(self, new_status):
-        self.cluster.status = new_status
-        self.cluster.status_reason = _("Stack with id %s not found in "
-                                       "Heat.") % self.cluster.stack_id
-        self.cluster.save()
-        LOG.info("Cluster with id %(id)s has been set to "
+        self.nodegroup.status = new_status
+        self.nodegroup.status_reason = _("Stack with id %s not found in "
+                                         "Heat.") % self.cluster.stack_id
+        self.nodegroup.save()
+        LOG.info("Nodegroup with id %(id)s has been set to "
                  "%(status)s due to stack with id %(sid)s "
                  "not found in Heat.",
-                 {'id': self.cluster.id, 'status': self.cluster.status,
-                  'sid': self.cluster.stack_id})
+                 {'id': self.nodegroup.uuid, 'status': self.nodegroup.status,
+                  'sid': self.nodegroup.stack_id})
diff --git a/magnum/drivers/k8s_fedora_atomic_v1/driver.py b/magnum/drivers/k8s_fedora_atomic_v1/driver.py
index d5311a17ca..df63b17647 100644
--- a/magnum/drivers/k8s_fedora_atomic_v1/driver.py
+++ b/magnum/drivers/k8s_fedora_atomic_v1/driver.py
@@ -12,13 +12,20 @@
 # License for the specific language governing permissions and limitations
 # under the License.
 
+from oslo_config import cfg
 from oslo_log import log as logging
 from pbr.version import SemanticVersion as SV
 
+from string import ascii_letters
+from string import digits
+
+from heatclient.common import template_utils
+
 from magnum.common import clients
 from magnum.common import exception
 from magnum.common import keystone
 from magnum.common import octavia
+from magnum.common import short_id
 from magnum.drivers.common import k8s_monitor
 from magnum.drivers.heat import driver
 from magnum.drivers.k8s_fedora_atomic_v1 import template_def
@@ -109,3 +116,71 @@ class Driver(driver.HeatDriver):
             'disable_rollback': not rollback
         }
         osc.heat().stacks.update(cluster.stack_id, **fields)
+
+    def create_nodegroup(self, context, cluster, nodegroup):
+        osc = clients.OpenStackClients(context)
+        template_path, stack_params, env_files = (
+            self._extract_template_definition(context, cluster,
+                                              nodegroups=[nodegroup]))
+
+        tpl_files, template = template_utils.get_template_contents(
+            template_path)
+
+        environment_files, env_map = self._get_env_files(template_path,
+                                                         env_files)
+        tpl_files.update(env_map)
+
+        # Make sure we end up with a valid hostname
+        valid_chars = set(ascii_letters + digits + '-')
+
+        # valid hostnames are 63 chars long, leaving enough room
+        # to add the random id (for uniqueness)
+        stack_name = "%s-%s" % (cluster.name[:20], nodegroup.name[:9])
+        stack_name = stack_name.replace('_', '-')
+        stack_name = stack_name.replace('.', '-')
+        stack_name = ''.join(filter(valid_chars.__contains__, stack_name))
+
+        # Make sure no duplicate stack name
+        stack_name = '%s-%s' % (stack_name, short_id.generate_id())
+        stack_name = stack_name.lower()
+        if cluster.create_timeout:
+            heat_timeout = cluster.create_timeout
+        else:
+            heat_timeout = cfg.CONF.cluster_heat.create_timeout
+
+        stack_params['is_cluster_stack'] = False
+
+        first_master = cluster.master_addresses[0]
+        network = osc.heat().resources.get(cluster.stack_id, 'network')
+        secgroup = osc.heat().resources.get(cluster.stack_id,
+                                            'secgroup_kube_minion')
+        stack_params['existing_master_private_ip'] = first_master
+        stack_params['existing_security_group'] = secgroup.attributes['id']
+        stack_params['fixed_network'] = network.attributes['fixed_network']
+        stack_params['fixed_subnet'] = network.attributes['fixed_subnet']
+        stack_params['trust_id'] = cluster.trust_id
+        stack_params['trustee_password'] = cluster.trustee_password
+
+        for label in nodegroup.labels:
+            stack_params[label] = nodegroup.labels[label]
+
+        fields = {
+            'stack_name': stack_name,
+            'parameters': stack_params,
+            'environment_files': environment_files,
+            'template': template,
+            'files': tpl_files,
+            'timeout_mins': heat_timeout
+        }
+        created_stack = osc.heat().stacks.create(**fields)
+        nodegroup.stack_id = created_stack['stack']['id']
+        return created_stack
+
+    def delete_nodegroup(self, context, cluster, nodegroup):
+        # Default nodegroups share stack_id so it will be deleted
+        # as soon as the cluster gets destroyed
+        if not nodegroup.stack_id:
+            nodegroup.destroy()
+        else:
+            osc = clients.OpenStackClients(context)
+            self._delete_stack(context, osc, nodegroup.stack_id)
diff --git a/magnum/drivers/k8s_fedora_atomic_v1/templates/kubecluster.yaml b/magnum/drivers/k8s_fedora_atomic_v1/templates/kubecluster.yaml
index 34afa89c4b..3285e32d9d 100644
--- a/magnum/drivers/k8s_fedora_atomic_v1/templates/kubecluster.yaml
+++ b/magnum/drivers/k8s_fedora_atomic_v1/templates/kubecluster.yaml
@@ -1,12 +1,53 @@
-heat_template_version: 2014-10-16
+heat_template_version: queens
 
 description: >
   This template will boot a Kubernetes cluster with one or more
   minions (as specified by the number_of_minions parameter, which
   defaults to 1).
 
+conditions:
+  master_only:
+    or:
+    - equals:
+      - get_param: role
+      - "master"
+    - equals:
+      - get_param: is_cluster_stack
+      - true
+
+  worker_only:
+    or:
+    - equals:
+      - get_param: role
+      - "worker"
+    - equals:
+      - get_param: is_cluster_stack
+      - true
+
+  create_cluster_resources:
+    equals:
+    - get_param: is_cluster_stack
+    - true
+  
 parameters:
 
+  # needs to become a list if we want to join master nodes?
+  existing_master_private_ip:
+    type: string
+    default: ""
+
+  is_cluster_stack:
+    type: boolean
+    default: false
+
+  role:
+    type: string
+    default: ""
+
+  existing_security_group:
+    type: string
+    default: ""
+
   ssh_key_name:
     type: string
     description: name of ssh key to be provisioned on our server
@@ -33,10 +74,16 @@ parameters:
   master_image:
     type: string
     description: glance image used to boot the server
+    # When creating a new minion nodegroup this will not
+    # be provided by magnum. So make it default to ""
+    default: ""
 
   minion_image:
     type: string
     description: glance image used to boot the server
+    # When creating a new master nodegroup this will not
+    # be provided by magnum. So make it default to ""
+    default: ""
 
   master_flavor:
     type: string
@@ -660,6 +707,7 @@ resources:
   #
 
   network:
+    condition: create_cluster_resources
     type: ../../common/templates/network.yaml
     properties:
       existing_network: {get_param: fixed_network}
@@ -670,6 +718,7 @@ resources:
       private_network_name: private
 
   api_lb:
+    condition: create_cluster_resources
     type: ../../common/templates/lb_api.yaml
     properties:
       fixed_subnet: {get_attr: [network, fixed_subnet]}
@@ -678,6 +727,7 @@ resources:
       port: {get_param: kubernetes_port}
 
   etcd_lb:
+    condition: create_cluster_resources
     type: ../../common/templates/lb_etcd.yaml
     properties:
       fixed_subnet: {get_attr: [network, fixed_subnet]}
@@ -691,6 +741,7 @@ resources:
   #
 
   secgroup_kube_master:
+    condition: create_cluster_resources
     type: OS::Neutron::SecurityGroup
     properties:
       rules:
@@ -727,6 +778,7 @@ resources:
           port_range_max: 8472
 
   secgroup_kube_minion:
+    condition: create_cluster_resources
     type: OS::Neutron::SecurityGroup
     properties:
       rules:
@@ -756,6 +808,7 @@ resources:
 
   # allow any traffic between worker nodes
   secgroup_rule_tcp_kube_minion:
+    condition: create_cluster_resources
     type: OS::Neutron::SecurityGroupRule
     properties:
       protocol: tcp
@@ -764,6 +817,7 @@ resources:
       security_group: {get_resource: secgroup_kube_minion}
       remote_group: {get_resource: secgroup_kube_minion}
   secgroup_rule_udp_kube_minion:
+    condition: create_cluster_resources
     type: OS::Neutron::SecurityGroupRule
     properties:
       protocol: udp
@@ -779,6 +833,7 @@ resources:
   #
 
   api_address_lb_switch:
+    condition: create_cluster_resources
     type: Magnum::ApiGatewaySwitcher
     properties:
       pool_public_ip: {get_attr: [api_lb, floating_address]}
@@ -787,6 +842,7 @@ resources:
       master_private_ip: {get_attr: [kube_masters, resource.0.kube_master_ip]}
 
   etcd_address_lb_switch:
+    condition: create_cluster_resources
     type: Magnum::ApiGatewaySwitcher
     properties:
       pool_private_ip: {get_attr: [etcd_lb, address]}
@@ -799,6 +855,7 @@ resources:
   #
 
   api_address_floating_switch:
+    condition: create_cluster_resources
     type: Magnum::FloatingIPAddressSwitcher
     properties:
       public_ip: {get_attr: [api_address_lb_switch, public_ip]}
@@ -811,11 +868,13 @@ resources:
   #
 
   master_nodes_server_group:
+    condition: master_only
     type: OS::Nova::ServerGroup
     properties:
       policies: [{get_param: nodes_affinity_policy}]
 
   worker_nodes_server_group:
+    condition: worker_only
     type: OS::Nova::ServerGroup
     properties:
       policies: [{get_param: nodes_affinity_policy}]
@@ -827,6 +886,7 @@ resources:
   #
 
   kube_masters:
+    condition: master_only
     type: OS::Heat::ResourceGroup
     depends_on:
       - network
@@ -943,6 +1003,7 @@ resources:
           max_node_count: {get_param: max_node_count}
 
   kube_cluster_config:
+    condition: create_cluster_resources
     type: OS::Heat::SoftwareConfig
     properties:
       group: script
@@ -985,6 +1046,7 @@ resources:
             - get_file: ../../common/templates/kubernetes/fragments/install-helm-modules.sh
 
   kube_cluster_deploy:
+    condition: create_cluster_resources
     type: OS::Heat::SoftwareDeployment
     properties:
       actions: ['CREATE']
@@ -1002,6 +1064,7 @@ resources:
   #
 
   kube_minions:
+    condition: worker_only
     type: OS::Heat::ResourceGroup
     depends_on:
       - network
@@ -1021,12 +1084,32 @@ resources:
           ssh_key_name: {get_param: ssh_key_name}
           server_image: {get_param: minion_image}
           minion_flavor: {get_param: minion_flavor}
-          fixed_network: {get_attr: [network, fixed_network]}
-          fixed_subnet: {get_attr: [network, fixed_subnet]}
+#          fixed_network: {get_param: fixed_network}
+          fixed_network:
+            if:
+              - create_cluster_resources
+              - get_attr: [network, fixed_network]
+              - get_param: fixed_network
+#          fixed_subnet: {get_param: fixed_subnet}
+          fixed_subnet:
+            if:
+              - create_cluster_resources
+              - get_attr: [network, fixed_subnet]
+              - get_param: fixed_subnet
           network_driver: {get_param: network_driver}
           flannel_network_cidr: {get_param: flannel_network_cidr}
-          kube_master_ip: {get_attr: [api_address_lb_switch, private_ip]}
-          etcd_server_ip: {get_attr: [etcd_address_lb_switch, private_ip]}
+          #kube_master_ip: {get_param: existing_master_private_ip}
+          kube_master_ip:
+            if:
+              - create_cluster_resources
+              - get_attr: [api_address_lb_switch, private_ip]
+              - get_param: existing_master_private_ip
+          #etcd_server_ip: {get_param: existing_master_private_ip}
+          etcd_server_ip:
+            if:
+              - create_cluster_resources
+              - get_attr: [etcd_address_lb_switch, private_ip]
+              - get_param: existing_master_private_ip
           external_network: {get_param: external_network}
           kube_allow_priv: {get_param: kube_allow_priv}
           docker_volume_size: {get_param: docker_volume_size}
@@ -1050,7 +1133,12 @@ resources:
           kubernetes_port: {get_param: kubernetes_port}
           tls_disabled: {get_param: tls_disabled}
           verify_ca: {get_param: verify_ca}
-          secgroup_kube_minion_id: {get_resource: secgroup_kube_minion}
+#          secgroup_kube_minion_id: {get_param: existing_security_group}
+          secgroup_kube_minion_id:
+            if:
+              - create_cluster_resources
+              - get_resource: secgroup_kube_minion
+              - get_param: existing_security_group
           http_proxy: {get_param: http_proxy}
           https_proxy: {get_param: https_proxy}
           no_proxy: {get_param: no_proxy}
@@ -1079,6 +1167,7 @@ resources:
 outputs:
 
   api_address:
+    condition: create_cluster_resources
     value:
       str_replace:
         template: api_ip_address
@@ -1089,6 +1178,7 @@ outputs:
       the Kubernetes API.
 
   registry_address:
+    condition: create_cluster_resources
     value:
       str_replace:
         template: localhost:port
@@ -1099,22 +1189,26 @@ outputs:
       images.
 
   kube_masters_private:
+    condition: master_only
     value: {get_attr: [kube_masters, kube_master_ip]}
     description: >
       This is a list of the "private" IP addresses of all the Kubernetes masters.
 
   kube_masters:
+    condition: master_only
     value: {get_attr: [kube_masters, kube_master_external_ip]}
     description: >
       This is a list of the "public" IP addresses of all the Kubernetes masters.
       Use these IP addresses to log in to the Kubernetes masters via ssh.
 
   kube_minions_private:
+    condition: worker_only
     value: {get_attr: [kube_minions, kube_minion_ip]}
     description: >
       This is a list of the "private" IP addresses of all the Kubernetes minions.
 
   kube_minions:
+    condition: worker_only
     value: {get_attr: [kube_minions, kube_minion_external_ip]}
     description: >
       This is a list of the "public" IP addresses of all the Kubernetes minions.
diff --git a/magnum/drivers/k8s_fedora_atomic_v1/templates/kubemaster.yaml b/magnum/drivers/k8s_fedora_atomic_v1/templates/kubemaster.yaml
index fb48d6b06f..01acef07a0 100644
--- a/magnum/drivers/k8s_fedora_atomic_v1/templates/kubemaster.yaml
+++ b/magnum/drivers/k8s_fedora_atomic_v1/templates/kubemaster.yaml
@@ -1,4 +1,4 @@
-heat_template_version: 2014-10-16
+heat_template_version: queens
 
 description: >
   This is a nested stack that defines a single Kubernetes master, This stack is
diff --git a/magnum/drivers/k8s_fedora_atomic_v1/templates/kubeminion.yaml b/magnum/drivers/k8s_fedora_atomic_v1/templates/kubeminion.yaml
index 8c86996841..6ac2861fd0 100644
--- a/magnum/drivers/k8s_fedora_atomic_v1/templates/kubeminion.yaml
+++ b/magnum/drivers/k8s_fedora_atomic_v1/templates/kubeminion.yaml
@@ -1,4 +1,4 @@
-heat_template_version: 2014-10-16
+heat_template_version: queens
 
 description: >
   This is a nested stack that defines a single Kubernetes minion, This stack is
diff --git a/magnum/service/periodic.py b/magnum/service/periodic.py
index 5ea21f2b0a..d8a095acca 100755
--- a/magnum/service/periodic.py
+++ b/magnum/service/periodic.py
@@ -164,7 +164,9 @@ class MagnumPeriodicTasks(periodic_task.PeriodicTasks):
                       objects.fields.ClusterStatus.DELETE_IN_PROGRESS,
                       objects.fields.ClusterStatus.ROLLBACK_IN_PROGRESS]
             filters = {'status': status}
-            clusters = objects.Cluster.list(ctx, filters=filters)
+            nodegroups = objects.NodeGroup.list(ctx, filters=filters)
+            cluster_ids = set(ng.cluster_id for ng in nodegroups)
+            clusters = [objects.Cluster.get(ctx, cid) for cid in cluster_ids]
             if not clusters:
                 return