Browse Source

ng-9: Driver for nodegroup operations

This adds the support for creating and deleting worker  nodegroups
using different stack per nodegroup. In order to be backwards
compatible, default nodegroups will remain in one stack.

Having this in mind cluster status is now calculated aggregating the
statuses of the underlying stacks.

Change-Id: I97839ab8495ed5d860785dff1f6e3cc59b6a9ff7
changes/90/667090/18
Theodoros Tsioutsias 3 years ago
parent
commit
e52f77b299
  1. 257
      magnum/drivers/heat/driver.py
  2. 5
      magnum/drivers/heat/k8s_coreos_template_def.py
  3. 4
      magnum/drivers/heat/k8s_fedora_template_def.py
  4. 6
      magnum/drivers/heat/k8s_template_def.py
  5. 4
      magnum/drivers/heat/swarm_fedora_template_def.py
  6. 4
      magnum/drivers/heat/swarm_mode_template_def.py
  7. 11
      magnum/drivers/heat/template_def.py
  8. 16
      magnum/drivers/k8s_fedora_atomic_v1/driver.py
  9. 101
      magnum/drivers/k8s_fedora_atomic_v1/templates/kubecluster.yaml
  10. 4
      magnum/drivers/mesos_ubuntu_v1/template_def.py
  11. 10
      magnum/tests/unit/conductor/handlers/test_cluster_conductor.py
  12. 6
      magnum/tests/unit/conductor/handlers/test_k8s_cluster_conductor.py
  13. 654
      magnum/tests/unit/drivers/test_heat_driver.py

257
magnum/drivers/heat/driver.py

@ -11,6 +11,7 @@
# under the License.
import abc
import collections
import os
import six
@ -41,6 +42,10 @@ from magnum.objects import fields
LOG = logging.getLogger(__name__)
NodeGroupStatus = collections.namedtuple('NodeGroupStatus',
'name status reason is_default')
@six.add_metaclass(abc.ABCMeta)
class HeatDriver(driver.Driver):
"""Base Driver class for using Heat
@ -61,12 +66,14 @@ class HeatDriver(driver.Driver):
scale_manager=scale_manager)
def _extract_template_definition(self, context, cluster,
scale_manager=None):
scale_manager=None,
nodegroups=None):
cluster_template = conductor_utils.retrieve_cluster_template(context,
cluster)
definition = self.get_template_definition()
return definition.extract_definition(context, cluster_template,
cluster,
nodegroups=nodegroups,
scale_manager=scale_manager)
def _get_env_files(self, template_path, env_rel_paths):
@ -96,14 +103,20 @@ class HeatDriver(driver.Driver):
def delete_federation(self, context, federation):
return NotImplementedError("Must implement 'delete_federation'")
def create_nodegroup(self, context, cluster, nodegroup):
raise NotImplementedError("Must implement 'create_nodegroup'.")
def update_nodegroup(self, context, cluster, nodegroup):
raise NotImplementedError("Must implement 'update_nodegroup'.")
# we just need to save the nodegroup here. This is because,
# at the moment, this method is used to update min and max node
# counts.
nodegroup.save()
def delete_nodegroup(self, context, cluster, nodegroup):
raise NotImplementedError("Must implement 'delete_nodegroup'.")
# Default nodegroups share stack_id so it will be deleted
# as soon as the cluster gets destroyed
if not nodegroup.stack_id:
nodegroup.destroy()
else:
osc = clients.OpenStackClients(context)
self._delete_stack(context, osc, nodegroup.stack_id)
def update_cluster_status(self, context, cluster):
if cluster.stack_id is None:
@ -128,6 +141,16 @@ class HeatDriver(driver.Driver):
rollback=False):
self._update_stack(context, cluster, scale_manager, rollback)
def create_nodegroup(self, context, cluster, nodegroup):
stack = self._create_stack(context, clients.OpenStackClients(context),
cluster, cluster.create_timeout,
nodegroup=nodegroup)
nodegroup.stack_id = stack['stack']['id']
def get_nodegroup_extra_params(self, cluster, osc):
raise NotImplementedError("Must implement "
"'get_nodegroup_extra_params'")
@abc.abstractmethod
def upgrade_cluster(self, context, cluster, cluster_template,
max_batch_size, nodegroup, scale_manager=None,
@ -138,7 +161,14 @@ class HeatDriver(driver.Driver):
self.pre_delete_cluster(context, cluster)
LOG.info("Starting to delete cluster %s", cluster.uuid)
self._delete_stack(context, clients.OpenStackClients(context), cluster)
osc = clients.OpenStackClients(context)
for ng in cluster.nodegroups:
ng.status = fields.ClusterStatus.DELETE_IN_PROGRESS
ng.save()
if ng.is_default:
continue
self._delete_stack(context, osc, ng.stack_id)
self._delete_stack(context, osc, cluster.default_ng_master.stack_id)
def resize_cluster(self, context, cluster, resize_manager,
node_count, nodes_to_remove, nodegroup=None,
@ -147,9 +177,13 @@ class HeatDriver(driver.Driver):
node_count, nodes_to_remove, nodegroup=nodegroup,
rollback=rollback)
def _create_stack(self, context, osc, cluster, cluster_create_timeout):
def _create_stack(self, context, osc, cluster, cluster_create_timeout,
nodegroup=None):
nodegroups = [nodegroup] if nodegroup else None
template_path, heat_params, env_files = (
self._extract_template_definition(context, cluster))
self._extract_template_definition(context, cluster,
nodegroups=nodegroups))
tpl_files, template = template_utils.get_template_contents(
template_path)
@ -163,7 +197,10 @@ class HeatDriver(driver.Driver):
# valid hostnames are 63 chars long, leaving enough room
# to add the random id (for uniqueness)
stack_name = cluster.name[:30]
if nodegroup is None:
stack_name = cluster.name[:30]
else:
stack_name = "%s-%s" % (cluster.name[:20], nodegroup.name[:9])
stack_name = stack_name.replace('_', '-')
stack_name = stack_name.replace('.', '-')
stack_name = ''.join(filter(valid_chars.__contains__, stack_name))
@ -177,6 +214,14 @@ class HeatDriver(driver.Driver):
# no cluster_create_timeout value was passed in to the request
# so falling back on configuration file value
heat_timeout = cfg.CONF.cluster_heat.create_timeout
heat_params['is_cluster_stack'] = nodegroup is None
if nodegroup:
# In case we are creating a new stack for a new nodegroup then
# we need to extract more params.
heat_params.update(self.get_nodegroup_extra_params(cluster, osc))
fields = {
'stack_name': stack_name,
'parameters': heat_params,
@ -225,10 +270,10 @@ class HeatDriver(driver.Driver):
# Find what changed checking the stack params
# against the ones in the template_def.
stack = osc.heat().stacks.get(cluster.stack_id,
stack = osc.heat().stacks.get(nodegroup.stack_id,
resolve_outputs=True)
stack_params = stack.parameters
definition.add_nodegroup_params(cluster)
definition.add_nodegroup_params(cluster, nodegroups=[nodegroup])
heat_params = definition.get_stack_diff(context, stack_params, cluster)
LOG.debug('Updating stack with these params: %s', heat_params)
@ -244,10 +289,10 @@ class HeatDriver(driver.Driver):
}
osc = clients.OpenStackClients(context)
osc.heat().stacks.update(cluster.stack_id, **fields)
osc.heat().stacks.update(nodegroup.stack_id, **fields)
def _delete_stack(self, context, osc, cluster):
osc.heat().stacks.delete(cluster.stack_id)
def _delete_stack(self, context, osc, stack_id):
osc.heat().stacks.delete(stack_id)
class KubernetesDriver(HeatDriver):
@ -288,39 +333,123 @@ class HeatPoller(object):
def poll_and_check(self):
# TODO(yuanying): temporary implementation to update api_address,
# node_addresses and cluster status
ng_statuses = list()
self.default_ngs = list()
for nodegroup in self.cluster.nodegroups:
self.nodegroup = nodegroup
if self.nodegroup.is_default:
self.default_ngs.append(self.nodegroup)
status = self.extract_nodegroup_status()
# In case a non-default nodegroup is deleted, None
# is returned. We shouldn't add None in the list
if status is not None:
ng_statuses.append(status)
self.aggregate_nodegroup_statuses(ng_statuses)
def extract_nodegroup_status(self):
if self.nodegroup.stack_id is None:
# There is a slight window for a race condition here. If
# a nodegroup is created and just before the stack_id is
# assigned to it, this periodic task is executed, the
# periodic task would try to find the status of the
# stack with id = None. At that time the nodegroup status
# is already set to CREATE_IN_PROGRESS by the conductor.
# Keep this status for this loop until the stack_id is assigned.
return NodeGroupStatus(name=self.nodegroup.name,
status=self.nodegroup.status,
is_default=self.nodegroup.is_default,
reason=self.nodegroup.status_reason)
try:
# Do not resolve outputs by default. Resolving all
# node IPs is expensive on heat.
stack = self.openstack_client.heat().stacks.get(
self.cluster.stack_id, resolve_outputs=False)
self.nodegroup.stack_id, resolve_outputs=False)
# poll_and_check is detached and polling long time to check
# status, so another user/client can call delete cluster/stack.
if stack.stack_status == fields.ClusterStatus.DELETE_COMPLETE:
if self.nodegroup.is_default:
self._check_delete_complete()
else:
self.nodegroup.destroy()
return
if stack.stack_status in (fields.ClusterStatus.CREATE_COMPLETE,
fields.ClusterStatus.UPDATE_COMPLETE):
# Resolve all outputs if the stack is COMPLETE
stack = self.openstack_client.heat().stacks.get(
self.nodegroup.stack_id, resolve_outputs=True)
self._sync_cluster_and_template_status(stack)
elif stack.stack_status != self.nodegroup.status:
self.template_def.nodegroup_output_mappings = list()
self.template_def.update_outputs(
stack, self.cluster_template, self.cluster,
nodegroups=[self.nodegroup])
self._sync_cluster_status(stack)
if stack.stack_status in (fields.ClusterStatus.CREATE_FAILED,
fields.ClusterStatus.DELETE_FAILED,
fields.ClusterStatus.UPDATE_FAILED,
fields.ClusterStatus.ROLLBACK_COMPLETE,
fields.ClusterStatus.ROLLBACK_FAILED):
self._sync_cluster_and_template_status(stack)
self._nodegroup_failed(stack)
except heatexc.NotFound:
self._sync_missing_heat_stack()
return NodeGroupStatus(name=self.nodegroup.name,
status=self.nodegroup.status,
is_default=self.nodegroup.is_default,
reason=self.nodegroup.status_reason)
def aggregate_nodegroup_statuses(self, ng_statuses):
# NOTE(ttsiouts): Aggregate the nodegroup statuses and set the
# cluster overall status.
FAILED = '_FAILED'
IN_PROGRESS = '_IN_PROGRESS'
COMPLETE = '_COMPLETE'
UPDATE = 'UPDATE'
previous_state = self.cluster.status
self.cluster.status_reason = None
# Both default nodegroups will have the same status so it's
# enough to check one of them.
self.cluster.status = self.cluster.default_ng_master.status
default_ng = self.cluster.default_ng_master
if (default_ng.status.endswith(IN_PROGRESS) or
default_ng.status == fields.ClusterStatus.DELETE_COMPLETE):
self.cluster.save()
return
# poll_and_check is detached and polling long time to check status,
# so another user/client can call delete cluster/stack.
if stack.stack_status == fields.ClusterStatus.DELETE_COMPLETE:
self._delete_complete()
# Keep priority to the states below
for state in (IN_PROGRESS, FAILED, COMPLETE):
if any(ns.status.endswith(state) for ns in ng_statuses
if not ns.is_default):
status = getattr(fields.ClusterStatus, UPDATE+state)
self.cluster.status = status
if state == FAILED:
reasons = ["%s failed" % (ns.name)
for ns in ng_statuses
if ns.status.endswith(FAILED)]
self.cluster.status_reason = ' ,'.join(reasons)
break
if self.cluster.status == fields.ClusterStatus.CREATE_COMPLETE:
# Consider the scenario where the user:
# - creates the cluster (cluster: create_complete)
# - adds a nodegroup (cluster: update_complete)
# - deletes the nodegroup
# The cluster should go to CREATE_COMPLETE only if the previous
# state was CREATE_COMPLETE or CREATE_IN_PROGRESS. In all other
# cases, just go to UPDATE_COMPLETE.
if previous_state not in (fields.ClusterStatus.CREATE_COMPLETE,
fields.ClusterStatus.CREATE_IN_PROGRESS):
self.cluster.status = fields.ClusterStatus.UPDATE_COMPLETE
if stack.stack_status in (fields.ClusterStatus.CREATE_COMPLETE,
fields.ClusterStatus.UPDATE_COMPLETE):
# Resolve all outputs if the stack is COMPLETE
stack = self.openstack_client.heat().stacks.get(
self.cluster.stack_id, resolve_outputs=True)
self._sync_cluster_and_template_status(stack)
elif stack.stack_status != self.cluster.status:
self.template_def.update_outputs(stack, self.cluster_template,
self.cluster)
self._sync_cluster_status(stack)
if stack.stack_status in (fields.ClusterStatus.CREATE_FAILED,
fields.ClusterStatus.DELETE_FAILED,
fields.ClusterStatus.UPDATE_FAILED,
fields.ClusterStatus.ROLLBACK_COMPLETE,
fields.ClusterStatus.ROLLBACK_FAILED):
self._sync_cluster_and_template_status(stack)
self._cluster_failed(stack)
self.cluster.save()
def _delete_complete(self):
LOG.info('Cluster has been deleted, stack_id: %s',
@ -339,9 +468,9 @@ class HeatPoller(object):
self.cluster.uuid)
def _sync_cluster_status(self, stack):
self.cluster.status = stack.stack_status
self.cluster.status_reason = stack.stack_status_reason
self.cluster.save()
self.nodegroup.status = stack.stack_status
self.nodegroup.status_reason = stack.stack_status_reason
self.nodegroup.save()
def get_version_info(self, stack):
stack_param = self.template_def.get_heat_param(
@ -358,34 +487,44 @@ class HeatPoller(object):
self.cluster.container_version = container_version
def _sync_cluster_and_template_status(self, stack):
self.template_def.nodegroup_output_mappings = list()
self.template_def.update_outputs(stack, self.cluster_template,
self.cluster)
self.cluster,
nodegroups=[self.nodegroup])
self.get_version_info(stack)
self._sync_cluster_status(stack)
def _cluster_failed(self, stack):
LOG.error('Cluster error, stack status: %(cluster_status)s, '
def _nodegroup_failed(self, stack):
LOG.error('Nodegroup error, stack status: %(ng_status)s, '
'stack_id: %(stack_id)s, '
'reason: %(reason)s',
{'cluster_status': stack.stack_status,
'stack_id': self.cluster.stack_id,
'reason': self.cluster.status_reason})
{'ng_status': stack.stack_status,
'stack_id': self.nodegroup.stack_id,
'reason': self.nodegroup.status_reason})
def _sync_missing_heat_stack(self):
if self.cluster.status == fields.ClusterStatus.DELETE_IN_PROGRESS:
self._delete_complete()
elif self.cluster.status == fields.ClusterStatus.CREATE_IN_PROGRESS:
if self.nodegroup.status == fields.ClusterStatus.DELETE_IN_PROGRESS:
self._sync_missing_stack(fields.ClusterStatus.DELETE_COMPLETE)
if self.nodegroup.is_default:
self._check_delete_complete()
elif self.nodegroup.status == fields.ClusterStatus.CREATE_IN_PROGRESS:
self._sync_missing_stack(fields.ClusterStatus.CREATE_FAILED)
elif self.cluster.status == fields.ClusterStatus.UPDATE_IN_PROGRESS:
elif self.nodegroup.status == fields.ClusterStatus.UPDATE_IN_PROGRESS:
self._sync_missing_stack(fields.ClusterStatus.UPDATE_FAILED)
def _check_delete_complete(self):
default_ng_statuses = [ng.status for ng in self.default_ngs]
if all(status == fields.ClusterStatus.DELETE_COMPLETE
for status in default_ng_statuses):
self._delete_complete()
def _sync_missing_stack(self, new_status):
self.cluster.status = new_status
self.cluster.status_reason = _("Stack with id %s not found in "
"Heat.") % self.cluster.stack_id
self.cluster.save()
LOG.info("Cluster with id %(id)s has been set to "
self.nodegroup.status = new_status
self.nodegroup.status_reason = _("Stack with id %s not found in "
"Heat.") % self.cluster.stack_id
self.nodegroup.save()
LOG.info("Nodegroup with id %(id)s has been set to "
"%(status)s due to stack with id %(sid)s "
"not found in Heat.",
{'id': self.cluster.id, 'status': self.cluster.status,
'sid': self.cluster.stack_id})
{'id': self.nodegroup.uuid, 'status': self.nodegroup.status,
'sid': self.nodegroup.stack_id})

5
magnum/drivers/heat/k8s_coreos_template_def.py

@ -65,8 +65,11 @@ class CoreOSK8sTemplateDefinition(k8s_template_def.K8sTemplateDefinition):
'calico_tag',
'calico_kube_controllers_tag', 'calico_ipv4pool',
'etcd_tag', 'flannel_tag']
labels = self._get_relevant_labels(cluster, kwargs)
for label in label_list:
label_value = cluster.labels.get(label)
label_value = labels.get(label)
if label_value:
extra_params[label] = label_value

4
magnum/drivers/heat/k8s_fedora_template_def.py

@ -100,8 +100,10 @@ class K8sFedoraTemplateDefinition(k8s_template_def.K8sTemplateDefinition):
'draino_tag', 'autoscaler_tag',
'min_node_count', 'max_node_count', 'npd_enabled']
labels = self._get_relevant_labels(cluster, kwargs)
for label in label_list:
label_value = cluster.labels.get(label)
label_value = labels.get(label)
if label_value:
extra_params[label] = label_value

6
magnum/drivers/heat/k8s_template_def.py

@ -212,8 +212,10 @@ class K8sTemplateDefinition(template_def.BaseTemplateDefinition):
'kubescheduler_options',
'influx_grafana_dashboard_enabled']
labels = self._get_relevant_labels(cluster, kwargs)
for label in label_list:
extra_params[label] = cluster.labels.get(label)
extra_params[label] = labels.get(label)
ingress_controller = cluster.labels.get('ingress_controller',
'').lower()
@ -233,7 +235,7 @@ class K8sTemplateDefinition(template_def.BaseTemplateDefinition):
extra_params['registry_container'] = (
CONF.docker_registry.swift_registry_container)
kube_tag = (cluster.labels.get("kube_tag") or
kube_tag = (labels.get("kube_tag") or
cluster_template.labels.get("kube_tag"))
if kube_tag:
extra_params['kube_version'] = kube_tag

4
magnum/drivers/heat/swarm_fedora_template_def.py

@ -141,8 +141,10 @@ class SwarmFedoraTemplateDefinition(template_def.BaseTemplateDefinition):
'docker_volume_type', CONF.cinder.default_docker_volume_type)
extra_params['docker_volume_type'] = docker_volume_type
labels = self._get_relevant_labels(cluster, kwargs)
for label in label_list:
extra_params[label] = cluster.labels.get(label)
extra_params[label] = labels.get(label)
if cluster_template.registry_enabled:
extra_params['swift_region'] = CONF.docker_registry.swift_region

4
magnum/drivers/heat/swarm_mode_template_def.py

@ -127,8 +127,10 @@ class SwarmModeTemplateDefinition(template_def.BaseTemplateDefinition):
extra_params['nodes_affinity_policy'] = \
CONF.cluster.nodes_affinity_policy
labels = self._get_relevant_labels(cluster, kwargs)
for label in label_list:
extra_params[label] = cluster.labels.get(label)
extra_params[label] = labels.get(label)
# set docker_volume_type
# use the configuration default if None provided

11
magnum/drivers/heat/template_def.py

@ -158,6 +158,10 @@ class NodeGroupOutputMapping(OutputMapping):
# nodegroups are fetched from the database every
# time, so the bad thing here is that we need to
# save each change.
previous_value = getattr(ng, self.nodegroup_attr, None)
if previous_value == output_value:
# Avoid saving if it's not needed.
return
setattr(ng, self.nodegroup_attr, output_value)
ng.save()
@ -426,6 +430,13 @@ class BaseTemplateDefinition(TemplateDefinition):
nodegroup_uuid=nodegroup.uuid,
param_class=NodeGroupParameterMapping)
def _get_relevant_labels(self, cluster, kwargs):
nodegroups = kwargs.get('nodegroups', None)
labels = cluster.labels
if nodegroups is not None:
labels = nodegroups[0].labels
return labels
def update_outputs(self, stack, cluster_template, cluster,
nodegroups=None):
master_ng = cluster.default_ng_master

16
magnum/drivers/k8s_fedora_atomic_v1/driver.py

@ -95,3 +95,19 @@ class Driver(driver.KubernetesDriver):
'disable_rollback': not rollback
}
osc.heat().stacks.update(cluster.stack_id, **fields)
def get_nodegroup_extra_params(self, cluster, osc):
network = osc.heat().resources.get(cluster.stack_id, 'network')
secgroup = osc.heat().resources.get(cluster.stack_id,
'secgroup_kube_minion')
for output in osc.heat().stacks.get(cluster.stack_id).outputs:
if output['output_key'] == 'api_address':
api_address = output['output_value']
break
extra_params = {
'existing_master_private_ip': api_address,
'existing_security_group': secgroup.attributes['id'],
'fixed_network': network.attributes['fixed_network'],
'fixed_subnet': network.attributes['fixed_subnet'],
}
return extra_params

101
magnum/drivers/k8s_fedora_atomic_v1/templates/kubecluster.yaml

@ -1,12 +1,53 @@
heat_template_version: 2015-04-30
heat_template_version: queens
description: >
This template will boot a Kubernetes cluster with one or more
minions (as specified by the number_of_minions parameter, which
defaults to 1).
conditions:
master_only:
or:
- equals:
- get_param: role
- "master"
- equals:
- get_param: is_cluster_stack
- true
worker_only:
or:
- equals:
- get_param: role
- "worker"
- equals:
- get_param: is_cluster_stack
- true
create_cluster_resources:
equals:
- get_param: is_cluster_stack
- true
parameters:
# needs to become a list if we want to join master nodes?
existing_master_private_ip:
type: string
default: ""
is_cluster_stack:
type: boolean
default: false
role:
type: string
default: ""
existing_security_group:
type: string
default: ""
ssh_key_name:
type: string
description: name of ssh key to be provisioned on our server
@ -34,10 +75,16 @@ parameters:
master_image:
type: string
description: glance image used to boot the server
# When creating a new minion nodegroup this will not
# be provided by magnum. So make it default to ""
default: ""
minion_image:
type: string
description: glance image used to boot the server
# When creating a new master nodegroup this will not
# be provided by magnum. So make it default to ""
default: ""
master_flavor:
type: string
@ -693,6 +740,7 @@ resources:
#
network:
condition: create_cluster_resources
type: ../../common/templates/network.yaml
properties:
existing_network: {get_param: fixed_network}
@ -703,6 +751,7 @@ resources:
private_network_name: {get_param: fixed_network_name}
api_lb:
condition: create_cluster_resources
type: ../../common/templates/lb_api.yaml
properties:
fixed_subnet: {get_attr: [network, fixed_subnet]}
@ -711,6 +760,7 @@ resources:
port: {get_param: kubernetes_port}
etcd_lb:
condition: create_cluster_resources
type: ../../common/templates/lb_etcd.yaml
properties:
fixed_subnet: {get_attr: [network, fixed_subnet]}
@ -724,6 +774,7 @@ resources:
#
secgroup_kube_master:
condition: create_cluster_resources
type: OS::Neutron::SecurityGroup
properties:
rules:
@ -760,6 +811,7 @@ resources:
port_range_max: 8472
secgroup_kube_minion:
condition: create_cluster_resources
type: OS::Neutron::SecurityGroup
properties:
rules:
@ -789,6 +841,7 @@ resources:
# allow any traffic between worker nodes
secgroup_rule_tcp_kube_minion:
condition: create_cluster_resources
type: OS::Neutron::SecurityGroupRule
properties:
protocol: tcp
@ -797,6 +850,7 @@ resources:
security_group: {get_resource: secgroup_kube_minion}
remote_group: {get_resource: secgroup_kube_minion}
secgroup_rule_udp_kube_minion:
condition: create_cluster_resources
type: OS::Neutron::SecurityGroupRule
properties:
protocol: udp
@ -812,6 +866,7 @@ resources:
#
api_address_lb_switch:
condition: create_cluster_resources
type: Magnum::ApiGatewaySwitcher
properties:
pool_public_ip: {get_attr: [api_lb, floating_address]}
@ -820,6 +875,7 @@ resources:
master_private_ip: {get_attr: [kube_masters, resource.0.kube_master_ip]}
etcd_address_lb_switch:
condition: create_cluster_resources
type: Magnum::ApiGatewaySwitcher
properties:
pool_private_ip: {get_attr: [etcd_lb, address]}
@ -832,6 +888,7 @@ resources:
#
api_address_floating_switch:
condition: create_cluster_resources
type: Magnum::FloatingIPAddressSwitcher
properties:
public_ip: {get_attr: [api_address_lb_switch, public_ip]}
@ -844,11 +901,13 @@ resources:
#
master_nodes_server_group:
condition: master_only
type: OS::Nova::ServerGroup
properties:
policies: [{get_param: nodes_affinity_policy}]
worker_nodes_server_group:
condition: worker_only
type: OS::Nova::ServerGroup
properties:
policies: [{get_param: nodes_affinity_policy}]
@ -860,6 +919,7 @@ resources:
#
kube_masters:
condition: master_only
type: OS::Heat::ResourceGroup
depends_on:
- network
@ -983,6 +1043,7 @@ resources:
npd_enabled: {get_param: npd_enabled}
kube_cluster_config:
condition: create_cluster_resources
type: OS::Heat::SoftwareConfig
properties:
group: script
@ -1025,6 +1086,7 @@ resources:
- get_file: ../../common/templates/kubernetes/fragments/install-helm-modules.sh
kube_cluster_deploy:
condition: create_cluster_resources
type: OS::Heat::SoftwareDeployment
properties:
actions: ['CREATE']
@ -1042,6 +1104,7 @@ resources:
#
kube_minions:
condition: worker_only
type: OS::Heat::ResourceGroup
depends_on:
- network
@ -1061,12 +1124,28 @@ resources:
ssh_key_name: {get_param: ssh_key_name}
server_image: {get_param: minion_image}
minion_flavor: {get_param: minion_flavor}
fixed_network: {get_attr: [network, fixed_network]}
fixed_subnet: {get_attr: [network, fixed_subnet]}
fixed_network:
if:
- create_cluster_resources
- get_attr: [network, fixed_network]
- get_param: fixed_network
fixed_subnet:
if:
- create_cluster_resources
- get_attr: [network, fixed_subnet]
- get_param: fixed_subnet
network_driver: {get_param: network_driver}
flannel_network_cidr: {get_param: flannel_network_cidr}
kube_master_ip: {get_attr: [api_address_lb_switch, private_ip]}
etcd_server_ip: {get_attr: [etcd_address_lb_switch, private_ip]}
kube_master_ip:
if:
- create_cluster_resources
- get_attr: [api_address_lb_switch, private_ip]
- get_param: existing_master_private_ip
etcd_server_ip:
if:
- create_cluster_resources
- get_attr: [etcd_address_lb_switch, private_ip]
- get_param: existing_master_private_ip
external_network: {get_param: external_network}
kube_allow_priv: {get_param: kube_allow_priv}
boot_volume_size: {get_param: boot_volume_size}
@ -1092,7 +1171,11 @@ resources:
kubernetes_port: {get_param: kubernetes_port}
tls_disabled: {get_param: tls_disabled}
verify_ca: {get_param: verify_ca}
secgroup_kube_minion_id: {get_resource: secgroup_kube_minion}
secgroup_kube_minion_id:
if:
- create_cluster_resources
- get_resource: secgroup_kube_minion
- get_param: existing_security_group
http_proxy: {get_param: http_proxy}
https_proxy: {get_param: https_proxy}
no_proxy: {get_param: no_proxy}
@ -1123,6 +1206,7 @@ resources:
outputs:
api_address:
condition: create_cluster_resources
value:
str_replace:
template: api_ip_address
@ -1133,6 +1217,7 @@ outputs:
the Kubernetes API.
registry_address:
condition: create_cluster_resources
value:
str_replace:
template: localhost:port
@ -1143,22 +1228,26 @@ outputs:
images.
kube_masters_private:
condition: master_only
value: {get_attr: [kube_masters, kube_master_ip]}
description: >
This is a list of the "private" IP addresses of all the Kubernetes masters.
kube_masters:
condition: master_only
value: {get_attr: [kube_masters, kube_master_external_ip]}
description: >
This is a list of the "public" IP addresses of all the Kubernetes masters.
Use these IP addresses to log in to the Kubernetes masters via ssh.
kube_minions_private:
condition: worker_only
value: {get_attr: [kube_minions, kube_minion_ip]}
description: >
This is a list of the "private" IP addresses of all the Kubernetes minions.
kube_minions:
condition: worker_only
value: {get_attr: [kube_minions, kube_minion_external_ip]}
description: >
This is a list of the "public" IP addresses of all the Kubernetes minions.

4
magnum/drivers/mesos_ubuntu_v1/template_def.py

@ -101,8 +101,10 @@ class UbuntuMesosTemplateDefinition(template_def.BaseTemplateDefinition):
'mesos_slave_work_dir',
'mesos_slave_executor_env_variables']
labels = self._get_relevant_labels(cluster, kwargs)
for label in label_list:
extra_params[label] = cluster.labels.get(label)
extra_params[label] = labels.get(label)
return super(UbuntuMesosTemplateDefinition,
self).get_params(context, cluster_template, cluster,

10
magnum/tests/unit/conductor/handlers/test_cluster_conductor.py

@ -470,7 +470,8 @@ class TestHandler(db_base.DbTestCase):
timeout)
mock_extract_tmpl_def.assert_called_once_with(self.context,
cluster)
cluster,
nodegroups=None)
mock_get_template_contents.assert_called_once_with(
'the/template/path.yaml')
mock_process_mult.assert_called_once_with(
@ -487,7 +488,8 @@ class TestHandler(db_base.DbTestCase):
'file:///the/template/env_file_2':
'content of file:///the/template/env_file_2'
},
parameters={'heat_param_1': 'foo', 'heat_param_2': 'bar'},
parameters={'is_cluster_stack': True, 'heat_param_1': 'foo',
'heat_param_2': 'bar'},
stack_name=('%s-short_id' % cluster.name),
template='some template yaml',
timeout_mins=timeout)
@ -543,6 +545,8 @@ class TestHandler(db_base.DbTestCase):
osc = mock.MagicMock()
mock_openstack_client_class.return_value = osc
osc.heat.side_effect = exc.HTTPConflict
self.worker.create()
self.master.create()
self.assertRaises(exception.OperationInProgress,
self.handler.cluster_delete,
self.context,
@ -570,6 +574,8 @@ class TestHandler(db_base.DbTestCase):
mock_octavia.return_value = True
mock_driver.return_value = k8s_atomic_dr.Driver()
self.master.create()
self.worker.create()
self.handler.cluster_delete(self.context, self.cluster.uuid)
notifications = fake_notifier.NOTIFICATIONS

6
magnum/tests/unit/conductor/handlers/test_k8s_cluster_conductor.py

@ -1126,7 +1126,7 @@ class TestClusterConductorWithK8s(base.TestCase):
expected_args = {
'stack_name': expected_stack_name,
'parameters': {},
'parameters': {'is_cluster_stack': True},
'template': expected_template_contents,
'files': {},
'environment_files': [],
@ -1166,7 +1166,7 @@ class TestClusterConductorWithK8s(base.TestCase):
expected_args = {
'stack_name': expected_stack_name,
'parameters': {},
'parameters': {'is_cluster_stack': True},
'template': expected_template_contents,
'files': {},
'environment_files': [],
@ -1208,7 +1208,7 @@ class TestClusterConductorWithK8s(base.TestCase):
expected_args = {
'stack_name': expected_stack_name,
'parameters': {},
'parameters': {'is_cluster_stack': True},
'template': expected_template_contents,
'files': {},
'environment_files': [],

654
magnum/tests/unit/drivers/test_heat_driver.py

@ -13,6 +13,8 @@
import mock
from mock import patch
from heatclient import exc as heatexc
import magnum.conf
from magnum.drivers.heat import driver as heat_driver
from magnum.drivers.k8s_fedora_atomic_v1 import driver as k8s_atomic_dr
@ -26,25 +28,96 @@ CONF = magnum.conf.CONF
class TestHeatPoller(base.TestCase):
def setUp(self):
super(TestHeatPoller, self).setUp()
self.mock_stacks = dict()
self.def_ngs = list()
def _create_nodegroup(self, cluster, uuid, stack_id, role=None,
is_default=False, stack_status=None,
status_reason=None, stack_params=None,
stack_missing=False):
"""Create a new nodegroup
Util that creates a new non-default ng, adds it to the cluster
and creates the corresponding mock stack.
"""
role = 'worker' if role is None else role
ng = mock.MagicMock(uuid=uuid, role=role, is_default=is_default,
stack_id=stack_id)
cluster.nodegroups.append(ng)
if stack_status is None:
stack_status = cluster_status.CREATE_COMPLETE
if status_reason is None:
status_reason = 'stack created'
stack_params = dict() if stack_params is None else stack_params
stack = mock.MagicMock(stack_status=stack_status,
stack_status_reason=status_reason,
parameters=stack_params)
# In order to simulate a stack not found from osc we don't add the
# stack in the dict.
if not stack_missing:
self.mock_stacks.update({stack_id: stack})
else:
# In case the stack is missing we need
# to set the status to the ng, so that
# _sync_missing_heat_stack knows which
# was the previous state.
ng.status = stack_status
return ng
@patch('magnum.conductor.utils.retrieve_cluster_template')
@patch('oslo_config.cfg')
@patch('magnum.common.clients.OpenStackClients')
@patch('magnum.drivers.common.driver.Driver.get_driver')
def setup_poll_test(self, mock_driver, mock_openstack_client, cfg,
mock_retrieve_cluster_template):
mock_retrieve_cluster_template,
default_stack_status=None, status_reason=None,
stack_params=None, stack_missing=False):
cfg.CONF.cluster_heat.max_attempts = 10
worker_ng = mock.MagicMock(uuid='worker_ng', role='worker')
master_ng = mock.MagicMock(uuid='master_ng', role='master')
nodegroups = [worker_ng, master_ng]
cluster = mock.MagicMock(nodegroups=nodegroups,
default_ng_worker=worker_ng,
default_ng_master=master_ng)
if default_stack_status is None:
default_stack_status = cluster_status.CREATE_COMPLETE
cluster = mock.MagicMock(nodegroups=list())
def_worker = self._create_nodegroup(cluster, 'worker_ng', 'stack1',
role='worker', is_default=True,
stack_status=default_stack_status,
status_reason=status_reason,
stack_params=stack_params,
stack_missing=stack_missing)
def_master = self._create_nodegroup(cluster, 'master_ng', 'stack1',
role='master', is_default=True,
stack_status=default_stack_status,
status_reason=status_reason,
stack_params=stack_params,
stack_missing=stack_missing)
cluster.default_ng_worker = def_worker
cluster.default_ng_master = def_master
self.def_ngs = [def_worker, def_master]
def get_ng_stack(stack_id, resolve_outputs=False):
try:
return self.mock_stacks[stack_id]
except KeyError:
# In this case we intentionally didn't add the stack
# to the mock_stacks dict to simulte a not found error.
# For this reason raise heat NotFound exception.
raise heatexc.NotFound("stack not found")
cluster_template_dict = utils.get_test_cluster_template(
coe='kubernetes')
mock_heat_stack = mock.MagicMock()
mock_heat_client = mock.MagicMock()
mock_heat_client.stacks.get.return_value = mock_heat_stack
mock_heat_client.stacks.get = get_ng_stack
mock_openstack_client.heat.return_value = mock_heat_client
cluster_template = objects.ClusterTemplate(self.context,
**cluster_template_dict)
@ -54,174 +127,545 @@ class TestHeatPoller(base.TestCase):
mock.MagicMock(), cluster,
k8s_atomic_dr.Driver())
poller.get_version_info = mock.MagicMock()
return (mock_heat_stack, cluster, poller)
return (cluster, poller)
def test_poll_no_save(self):
mock_heat_stack, cluster, poller = self.setup_poll_test()
def test_poll_and_check_creating(self):
cluster, poller = self.setup_poll_test(
default_stack_status=cluster_status.CREATE_IN_PROGRESS)
cluster.status = cluster_status.CREATE_IN_PROGRESS
mock_heat_stack.stack_status = cluster_status.CREATE_IN_PROGRESS
poller.poll_and_check()
self.assertEqual(0, cluster.save.call_count)
def test_poll_save(self):
mock_heat_stack, cluster, poller = self.setup_poll_test()
for ng in cluster.nodegroups:
self.assertEqual(cluster_status.CREATE_IN_PROGRESS, ng.status)
self.assertEqual(cluster_status.CREATE_IN_PROGRESS, cluster.status)
self.assertEqual(1, cluster.save.call_count)
def test_poll_and_check_create_complete(self):
cluster, poller = self.setup_poll_test()
cluster.status = cluster_status.CREATE_IN_PROGRESS
poller.poll_and_check()
for ng in cluster.nodegroups:
self.assertEqual(cluster_status.CREATE_COMPLETE, ng.status)
self.assertEqual('stack created', ng.status_reason)
self.assertEqual(1, ng.save.call_count)
self.assertEqual(cluster_status.CREATE_COMPLETE, cluster.status)
self.assertEqual(1, cluster.save.call_count)
def test_poll_and_check_create_failed(self):
cluster, poller = self.setup_poll_test(
default_stack_status=cluster_status.CREATE_FAILED)
cluster.status = cluster_status.CREATE_IN_PROGRESS
mock_heat_stack.stack_status = cluster_status.CREATE_FAILED
mock_heat_stack.stack_status_reason = 'Create failed'
self.assertIsNone(poller.poll_and_check())
self.assertEqual(2, cluster.save.call_count)
for ng in cluster.nodegroups:
self.assertEqual(cluster_status.CREATE_FAILED, ng.status)
# Two calls to save since the stack ouptputs are synced too.
self.assertEqual(2, ng.save.call_count)
self.assertEqual(cluster_status.CREATE_FAILED, cluster.status)
self.assertEqual('Create failed', cluster.status_reason)
self.assertEqual(1, cluster.save.call_count)
@patch('os.path.join')
def test_poll_done(self, mock_join):
mock_heat_stack, cluster, poller = self.setup_poll_test()
def test_poll_and_check_updating(self):
cluster, poller = self.setup_poll_test(
default_stack_status=cluster_status.UPDATE_IN_PROGRESS)
mock_heat_stack.stack_status = cluster_status.DELETE_COMPLETE
self.assertIsNone(poller.poll_and_check())
cluster.status = cluster_status.UPDATE_IN_PROGRESS
poller.poll_and_check()
mock_heat_stack.stack_status = cluster_status.CREATE_FAILED
self.assertIsNone(poller.poll_and_check())
for ng in cluster.nodegroups:
self.assertEqual(cluster_status.UPDATE_IN_PROGRESS, ng.status)
self.assertEqual(1, ng.save.call_count)
def test_poll_done_by_update(self):
mock_heat_stack, cluster, poller = self.setup_poll_test()
self.assertEqual(cluster_status.UPDATE_IN_PROGRESS, cluster.status)
self.assertEqual(1, cluster.save.call_count)
mock_heat_stack.stack_status = cluster_status.UPDATE_COMPLETE
mock_heat_stack.parameters = {
def test_poll_and_check_update_complete(self):
stack_params = {
'number_of_minions': 2,
'number_of_masters': 1
}
cluster, poller = self.setup_poll_test(
default_stack_status=cluster_status.UPDATE_COMPLETE,
stack_params=stack_params)
cluster.status = cluster_status.UPDATE_IN_PROGRESS
self.assertIsNone(poller.poll_and_check())
self.assertEqual(1, cluster.save.call_count)
self.assertEqual(1, cluster.default_ng_worker.save.call_count)
self.assertEqual(1, cluster.default_ng_master.save.call_count)
self.assertEqual(cluster_status.UPDATE_COMPLETE, cluster.status)
for ng in cluster.nodegroups:
self.assertEqual(cluster_status.UPDATE_COMPLETE, ng.status)
self.assertEqual(2, cluster.default_ng_worker.save.call_count)
self.assertEqual(2, cluster.default_ng_master.save.call_count)
self.assertEqual(2, cluster.default_ng_worker.node_count)
self.assertEqual(1, cluster.default_ng_master.node_count)
def test_poll_done_by_update_failed(self):
mock_heat_stack, cluster, poller = self.setup_poll_test()
self.assertEqual(cluster_status.UPDATE_COMPLETE, cluster.status)
self.assertEqual(1, cluster.save.call_count)
mock_heat_stack.stack_status = cluster_status.UPDATE_FAILED
mock_heat_stack.parameters = {
def test_poll_and_check_update_failed(self):
stack_params = {
'number_of_minions': 2,
'number_of_masters': 1
}
self.assertIsNone(poller.poll_and_check())
cluster, poller = self.setup_poll_test(
default_stack_status=cluster_status.UPDATE_FAILED,
stack_params=stack_params)
cluster.status = cluster_status.UPDATE_IN_PROGRESS
poller.poll_and_check()
for ng in cluster.nodegroups:
self.assertEqual(cluster_status.UPDATE_FAILED, ng.status)
# We have several calls to save because the stack outputs are
# stored too.
self.assertEqual(3, ng.save.call_count)
self.assertEqual(2, cluster.save.call_count)
self.assertEqual(cluster_status.UPDATE_FAILED, cluster.status)
self.assertEqual(2, cluster.default_ng_worker.node_count)
self.assertEqual(1, cluster.default_ng_master.node_count)
def test_poll_done_by_rollback_complete(self):
mock_heat_stack, cluster, poller = self.setup_poll_test()
self.assertEqual(cluster_status.UPDATE_FAILED, cluster.status)
self.assertEqual(1, cluster.save.call_count)
def test_poll_and_check_deleting(self):
cluster, poller = self.setup_poll_test(
default_stack_status=cluster_status.DELETE_IN_PROGRESS)
cluster.status = cluster_status.DELETE_IN_PROGRESS
poller.poll_and_check()
for ng in cluster.nodegroups:
self.assertEqual(cluster_status.DELETE_IN_PROGRESS, ng.status)
# We have two calls to save because the stack outputs are
# stored too.
self.assertEqual(1, ng.save.call_count)
self.assertEqual(cluster_status.DELETE_IN_PROGRESS, cluster.status)
self.assertEqual(1, cluster.save.call_count)
def test_poll_and_check_deleted(self):
cluster, poller = self.setup_poll_test(
default_stack_status=cluster_status.DELETE_COMPLETE)
cluster.status = cluster_status.DELETE_IN_PROGRESS
self.assertIsNone(poller.poll_and_check())
self.assertEqual(cluster_status.DELETE_COMPLETE,
cluster.default_ng_worker.status)
self.assertEqual(1, cluster.default_ng_worker.save.call_count)
self.assertEqual(0, cluster.default_ng_worker.destroy.call_count)
self.assertEqual(cluster_status.DELETE_COMPLETE,
cluster.default_ng_master.status)
self.assertEqual(1, cluster.default_ng_master.save.call_count)
self.assertEqual(0, cluster.default_ng_master.destroy.call_count)
self.assertEqual(cluster_status.DELETE_COMPLETE, cluster.status)
self.assertEqual(1, cluster.save.call_count)
self.assertEqual(0, cluster.destroy.call_count)
def test_poll_and_check_delete_failed(self):
cluster, poller = self.setup_poll_test(
default_stack_status=cluster_status.DELETE_FAILED)
mock_heat_stack.stack_status = cluster_status.ROLLBACK_COMPLETE
mock_heat_stack.parameters = {
cluster.status = cluster_status.DELETE_IN_PROGRESS
poller.poll_and_check()
self.assertEqual(cluster_status.DELETE_FAILED,
cluster.default_ng_worker.status)
# We have two calls to save because the stack outputs are
# stored too.
self.assertEqual(2, cluster.default_ng_worker.save.call_count)
self.assertEqual(0, cluster.default_ng_worker.destroy.call_count)
self.assertEqual(cluster_status.DELETE_FAILED,
cluster.default_ng_master.status)
# We have two calls to save because the stack outputs are
# stored too.
self.assertEqual(2, cluster.default_ng_master.save.call_count)
self.assertEqual(0, cluster.default_ng_master.destroy.call_count)
self.assertEqual(cluster_status.DELETE_FAILED, cluster.status)
self.assertEqual(1, cluster.save.call_count)
self.assertEqual(0, cluster.destroy.call_count)
def test_poll_done_rollback_complete(self):
stack_params = {
'number_of_minions': 1,
'number_of_masters': 1
}
cluster, poller = self.setup_poll_test(
default_stack_status=cluster_status.ROLLBACK_COMPLETE,
stack_params=stack_params)
self.assertIsNone(poller.poll_and_check())
self.assertEqual(2, cluster.save.call_count)
self.assertEqual(1, cluster.save.call_count)
self.assertEqual(cluster_status.ROLLBACK_COMPLETE, cluster.status)
self.assertEqual(1, cluster.default_ng_worker.node_count)
self.assertEqual(1, cluster.default_ng_master.node_count)
def test_poll_done_by_rollback_failed(self):
mock_heat_stack, cluster, poller = self.setup_poll_test()
mock_heat_stack.stack_status = cluster_status.ROLLBACK_FAILED
mock_heat_stack.parameters = {
def test_poll_done_rollback_failed(self):
stack_params = {
'number_of_minions': 1,
'number_of_masters': 1
}
cluster, poller = self.setup_poll_test(
default_stack_status=cluster_status.ROLLBACK_FAILED,
stack_params=stack_params)
self.assertIsNone(poller.poll_and_check())
self.assertEqual(2, cluster.save.call_count)
self.assertEqual(1, cluster.save.call_count)
self.assertEqual(cluster_status.ROLLBACK_FAILED, cluster.status)
self.assertEqual(1, cluster.default_ng_worker.node_count)
self.assertEqual(1, cluster.default_ng_master.node_count)
@patch('os.path.join')
def test_poll_destroy(self, mock_join):
mo