Add migrate steps for hosts without instances

During the patch strategy creation the migrate-instances step
only happens for hosts who have instances running at that moment.
As a consequence, if an instance is migrated, during patching
operation, to a host that didn't have any instances running
previously, the patch operation will fail as it will try to lock
the host directly, without migrating its instances previously.
This issue can happen either during patch application or removal.

This commit changes the patching build strategy adding the
migrate-instances-from-host step that will be applied to all
worker hosts unconditionally (given they are OpenStack compute
nodes), and because the previous step (migrate-instances) was built
for a list of instances, some implementations had to take place to
allow building it for a list of hosts.

Test Plan
PASS: serial patch application runs successfully outside
      Openstack context;
PASS: parallel patch application runs successfully outside
      Openstack context;
PASS: serial patch application runs successfully with a host
      not having instances before patch operation begins and
      having an instance migrated to it during patch application;
PASS: parallel patch application runs successfully with a host
      not having instances before patch operation begins and
      having an instance migrated to it during patch application;

Closes-bug: 1960833
Change-Id: I99675ea0b5d0c75bc84c78864b118debc265ceb4
Signed-off-by: Heitor Matsui <HeitorVieira.Matsui@windriver.com>
Co-authored-by: Rafael Falcão <Rafael.VieiraFalcao@windriver.com>
This commit is contained in:
Heitor Matsui 2022-01-21 17:14:08 -03:00 committed by Rafael Falcão
parent d3be1d1d1d
commit 1c4e048465
5 changed files with 281 additions and 47 deletions

View File

@ -152,9 +152,12 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'total_stages': 3,
'stages': [
{'name': 'sw-patch-worker-hosts',
'total_steps': 6,
'total_steps': 8,
'steps': [
{'name': 'query-alarms'},
{'name': 'disable-host-services'},
{'name': 'migrate-instances-from-host',
'entity_names': []},
{'name': 'lock-hosts',
'entity_names': ['compute-2', 'compute-3']},
{'name': 'sw-patch-hosts',
@ -172,7 +175,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'steps': [
{'name': 'query-alarms'},
{'name': 'disable-host-services'},
{'name': 'migrate-instances',
{'name': 'migrate-instances-from-host',
'entity_names': ['test_instance_0']},
{'name': 'lock-hosts',
'entity_names': ['compute-0']},
@ -191,7 +194,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'steps': [
{'name': 'query-alarms'},
{'name': 'disable-host-services'},
{'name': 'migrate-instances',
{'name': 'migrate-instances-from-host',
'entity_names': ['test_instance_1']},
{'name': 'lock-hosts',
'entity_names': ['compute-1']},
@ -262,9 +265,12 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'total_stages': 5,
'stages': [
{'name': 'sw-patch-worker-hosts',
'total_steps': 6,
'total_steps': 8,
'steps': [
{'name': 'query-alarms'},
{'name': 'disable-host-services'},
{'name': 'migrate-instances-from-host',
'entity_names': []},
{'name': 'lock-hosts',
'entity_names': ['compute-1', 'compute-5']},
{'name': 'sw-patch-hosts',
@ -282,7 +288,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'steps': [
{'name': 'query-alarms'},
{'name': 'disable-host-services'},
{'name': 'migrate-instances',
{'name': 'migrate-instances-from-host',
'entity_names': ['test_instance_0',
'test_instance_2']},
{'name': 'lock-hosts',
@ -302,7 +308,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'steps': [
{'name': 'query-alarms'},
{'name': 'disable-host-services'},
{'name': 'migrate-instances',
{'name': 'migrate-instances-from-host',
'entity_names': ['test_instance_3',
'test_instance_4']},
{'name': 'lock-hosts',
@ -322,7 +328,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'steps': [
{'name': 'query-alarms'},
{'name': 'disable-host-services'},
{'name': 'migrate-instances',
{'name': 'migrate-instances-from-host',
'entity_names': ['test_instance_6',
'test_instance_7']},
{'name': 'lock-hosts',
@ -342,7 +348,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'steps': [
{'name': 'query-alarms'},
{'name': 'disable-host-services'},
{'name': 'migrate-instances',
{'name': 'migrate-instances-from-host',
'entity_names': ['test_instance_8',
'test_instance_9']},
{'name': 'lock-hosts',
@ -425,9 +431,12 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'total_stages': 5,
'stages': [
{'name': 'sw-patch-worker-hosts',
'total_steps': 6,
'total_steps': 8,
'steps': [
{'name': 'query-alarms'},
{'name': 'disable-host-services'},
{'name': 'migrate-instances-from-host',
'entity_names': []},
{'name': 'lock-hosts',
'entity_names': ['compute-1', 'compute-5']},
{'name': 'sw-patch-hosts',
@ -445,7 +454,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'steps': [
{'name': 'query-alarms'},
{'name': 'disable-host-services'},
{'name': 'migrate-instances',
{'name': 'migrate-instances-from-host',
'entity_names': ['test_instance_0',
'test_instance_6']},
{'name': 'lock-hosts',
@ -465,7 +474,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'steps': [
{'name': 'query-alarms'},
{'name': 'disable-host-services'},
{'name': 'migrate-instances',
{'name': 'migrate-instances-from-host',
'entity_names': ['test_instance_2',
'test_instance_7']},
{'name': 'lock-hosts',
@ -485,7 +494,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'steps': [
{'name': 'query-alarms'},
{'name': 'disable-host-services'},
{'name': 'migrate-instances',
{'name': 'migrate-instances-from-host',
'entity_names': ['test_instance_3',
'test_instance_8']},
{'name': 'lock-hosts',
@ -505,7 +514,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'steps': [
{'name': 'query-alarms'},
{'name': 'disable-host-services'},
{'name': 'migrate-instances',
{'name': 'migrate-instances-from-host',
'entity_names': ['test_instance_4',
'test_instance_9']},
{'name': 'lock-hosts',
@ -598,9 +607,12 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'total_stages': 5,
'stages': [
{'name': 'sw-patch-worker-hosts',
'total_steps': 6,
'total_steps': 8,
'steps': [
{'name': 'query-alarms'},
{'name': 'disable-host-services'},
{'name': 'migrate-instances-from-host',
'entity_names': []},
{'name': 'lock-hosts',
'entity_names': ['compute-1', 'compute-5']},
{'name': 'sw-patch-hosts',
@ -618,7 +630,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'steps': [
{'name': 'query-alarms'},
{'name': 'disable-host-services'},
{'name': 'migrate-instances',
{'name': 'migrate-instances-from-host',
'entity_names': ['test_instance_0',
'test_instance_6']},
{'name': 'lock-hosts',
@ -638,7 +650,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'steps': [
{'name': 'query-alarms'},
{'name': 'disable-host-services'},
{'name': 'migrate-instances',
{'name': 'migrate-instances-from-host',
'entity_names': ['test_instance_2',
'test_instance_7']},
{'name': 'lock-hosts',
@ -658,7 +670,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'steps': [
{'name': 'query-alarms'},
{'name': 'disable-host-services'},
{'name': 'migrate-instances',
{'name': 'migrate-instances-from-host',
'entity_names': ['test_instance_3',
'test_instance_8']},
{'name': 'lock-hosts',
@ -678,7 +690,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'steps': [
{'name': 'query-alarms'},
{'name': 'disable-host-services'},
{'name': 'migrate-instances',
{'name': 'migrate-instances-from-host',
'entity_names': ['test_instance_4',
'test_instance_9']},
{'name': 'lock-hosts',
@ -767,7 +779,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'steps': [
{'name': 'query-alarms'},
{'name': 'disable-host-services'},
{'name': 'migrate-instances',
{'name': 'migrate-instances-from-host',
'entity_names': ['test_instance_0',
'test_instance_2']},
{'name': 'lock-hosts',
@ -787,7 +799,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'steps': [
{'name': 'query-alarms'},
{'name': 'disable-host-services'},
{'name': 'migrate-instances',
{'name': 'migrate-instances-from-host',
'entity_names': ['test_instance_1',
'test_instance_3']},
{'name': 'lock-hosts',
@ -807,7 +819,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'steps': [
{'name': 'query-alarms'},
{'name': 'disable-host-services'},
{'name': 'migrate-instances',
{'name': 'migrate-instances-from-host',
'entity_names': ['test_instance_4',
'test_instance_7']},
{'name': 'lock-hosts',
@ -827,7 +839,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'steps': [
{'name': 'query-alarms'},
{'name': 'disable-host-services'},
{'name': 'migrate-instances',
{'name': 'migrate-instances-from-host',
'entity_names': ['test_instance_5',
'test_instance_8']},
{'name': 'lock-hosts',
@ -847,7 +859,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'steps': [
{'name': 'query-alarms'},
{'name': 'disable-host-services'},
{'name': 'migrate-instances',
{'name': 'migrate-instances-from-host',
'entity_names': ['test_instance_6',
'test_instance_9']},
{'name': 'lock-hosts',
@ -2339,9 +2351,11 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'total_stages': 4,
'stages': [
{'name': 'sw-patch-worker-hosts',
'total_steps': 6,
'total_steps': 7,
'steps': [
{'name': 'query-alarms'},
{'name': 'migrate-instances-from-host',
'entity_names': []},
{'name': 'lock-hosts',
'entity_names': ['compute-2']},
{'name': 'sw-patch-hosts',
@ -2355,9 +2369,11 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
]
},
{'name': 'sw-patch-worker-hosts',
'total_steps': 6,
'total_steps': 7,
'steps': [
{'name': 'query-alarms'},
{'name': 'migrate-instances-from-host',
'entity_names': []},
{'name': 'lock-hosts',
'entity_names': ['compute-3']},
{'name': 'sw-patch-hosts',
@ -2374,7 +2390,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'total_steps': 7,
'steps': [
{'name': 'query-alarms'},
{'name': 'migrate-instances',
{'name': 'migrate-instances-from-host',
'entity_names': ['test_instance_0']},
{'name': 'lock-hosts',
'entity_names': ['compute-0']},
@ -2392,7 +2408,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'total_steps': 7,
'steps': [
{'name': 'query-alarms'},
{'name': 'migrate-instances',
{'name': 'migrate-instances-from-host',
'entity_names': ['test_instance_1']},
{'name': 'lock-hosts',
'entity_names': ['compute-1']},

View File

@ -1889,6 +1889,76 @@ class InstanceDirector(object):
DLOG.info("Recover-Instances-Audit triggered by instance deletion.")
self.recover_instances()
def migrate_instances_from_hosts(self, hosts):
"""
Migrate Instances out of specific hosts
"""
host_table = tables.tables_get_host_table()
instance_table = tables.tables_get_instance_table()
# get instances running on each host, as this list may
# be different from the original instances list running
# on each hosts at the start of the patching operation
instance_uuids = []
for host in hosts:
for instance in instance_table.on_host(host):
instance_uuids.append(instance.uuid)
DLOG.info("Migrate instances uuids=%s." % instance_uuids)
overall_operation = Operation(OPERATION_TYPE.MIGRATE_INSTANCES)
host_operations = dict()
for instance_uuid in instance_uuids:
instance = instance_table.get(instance_uuid, None)
if instance is None:
reason = "Instance %s does not exist." % instance_uuid
DLOG.info(reason)
overall_operation.set_failed(reason)
return overall_operation
host = host_table.get(instance.host_name, None)
if host is None:
reason = "Host %s does not exist." % instance.host_name
DLOG.info(reason)
overall_operation.set_failed(reason)
return overall_operation
host_operation = self._host_operations.get(instance.host_name, None)
if host_operation is not None:
if host_operation.is_inprogress():
reason = ("Another host operation %s is already inprogress "
"for host %s." % (host_operation.operation_type,
instance.host_name))
DLOG.info(reason)
overall_operation.set_failed(reason)
return overall_operation
else:
del self._host_operations[instance.host_name]
host_operation = host_operations.get(instance.host_name, None)
if host_operation is None:
host_operation = Operation(OPERATION_TYPE.MIGRATE_INSTANCES)
host_operations[instance.host_name] = host_operation
for host_name, host_operation in host_operations.items():
self._host_operations[host_name] = host_operation
self._host_migrate_instances(host_table[host_name], host_operation)
if host_operation.is_inprogress():
overall_operation.add_host(host_name, OPERATION_STATE.INPROGRESS)
elif host_operation.is_failed():
overall_operation.add_host(host_name, OPERATION_STATE.FAILED)
overall_operation.update_failure_reason(host_operation.reason)
break
elif host_operation.is_timed_out():
overall_operation.add_host(host_name, OPERATION_STATE.TIMED_OUT)
overall_operation.update_failure_reason(host_operation.reason)
break
else:
overall_operation.add_host(host_name, OPERATION_STATE.COMPLETED)
return overall_operation
def migrate_instances(self, instance_uuids):
"""
Migrate Instances

View File

@ -33,6 +33,7 @@ from nfv_vim.strategy._strategy_steps import KubeUpgradeDownloadImagesStep # no
from nfv_vim.strategy._strategy_steps import KubeUpgradeNetworkingStep # noqa: F401
from nfv_vim.strategy._strategy_steps import KubeUpgradeStartStep # noqa: F401
from nfv_vim.strategy._strategy_steps import LockHostsStep # noqa: F401
from nfv_vim.strategy._strategy_steps import MigrateInstancesFromHostStep # noqa: F401
from nfv_vim.strategy._strategy_steps import MigrateInstancesStep # noqa: F401
from nfv_vim.strategy._strategy_steps import QueryAlarmsStep # noqa: F401
from nfv_vim.strategy._strategy_steps import QueryFwUpdateHostStep # noqa: F401

View File

@ -1109,8 +1109,11 @@ class UpdateWorkerHostsMixin(object):
for host_list in host_lists:
instance_list = list()
openstack_hosts = list()
for host in host_list:
if host.host_service_configured(HOST_SERVICES.COMPUTE):
openstack_hosts.append(host)
for instance in instance_table.on_host(host.name):
# Do not take action (migrate or stop-start) on an instance
# if it is locked (i.e. stopped).
@ -1135,28 +1138,23 @@ class UpdateWorkerHostsMixin(object):
# Swact controller before locking
stage.add_step(strategy.SwactHostsStep(host_list))
if 0 != len(instance_list):
# Migrate or stop instances as necessary
if SW_UPDATE_INSTANCE_ACTION.MIGRATE == \
self._default_instance_action:
if SW_UPDATE_APPLY_TYPE.PARALLEL == \
self._worker_apply_type:
# Migrate or stop instances as necessary
if SW_UPDATE_INSTANCE_ACTION.MIGRATE == self._default_instance_action:
if len(openstack_hosts):
if SW_UPDATE_APPLY_TYPE.PARALLEL == self._worker_apply_type:
# Disable host services before migrating to ensure
# instances do not migrate to worker hosts in the
# same set of hosts.
if host_list[0].host_service_configured(
HOST_SERVICES.COMPUTE):
stage.add_step(strategy.DisableHostServicesStep(
host_list, HOST_SERVICES.COMPUTE))
stage.add_step(strategy.DisableHostServicesStep(
openstack_hosts, HOST_SERVICES.COMPUTE))
# TODO(ksmith)
# When support is added for orchestration on
# non-OpenStack worker nodes, support for disabling
# kubernetes services will have to be added.
stage.add_step(strategy.MigrateInstancesStep(
instance_list))
else:
stage.add_step(strategy.StopInstancesStep(
instance_list))
stage.add_step(strategy.MigrateInstancesFromHostStep(
openstack_hosts, instance_list))
elif len(instance_list):
stage.add_step(strategy.StopInstancesStep(instance_list))
if hosts_to_lock:
wait_until_disabled = True
@ -1185,12 +1183,10 @@ class UpdateWorkerHostsMixin(object):
# Reboot hosts that were already locked
stage.add_step(strategy.RebootHostsStep(hosts_to_reboot))
if 0 != len(instance_list):
if len(instance_list):
# Start any instances that were stopped
if SW_UPDATE_INSTANCE_ACTION.MIGRATE != \
self._default_instance_action:
stage.add_step(strategy.StartInstancesStep(
instance_list))
if SW_UPDATE_INSTANCE_ACTION.MIGRATE != self._default_instance_action:
stage.add_step(strategy.StartInstancesStep(instance_list))
# After controller node(s) are unlocked, we need extra time to
# allow the OSDs to go back in sync and the storage related
# alarms to clear. Note: not all controller nodes will have

View File

@ -44,6 +44,7 @@ class StrategyStepNames(Constants):
FW_UPDATE_HOSTS = Constant('fw-update-hosts')
FW_UPDATE_ABORT_HOSTS = Constant('fw-update-abort-hosts')
MIGRATE_INSTANCES = Constant('migrate-instances')
MIGRATE_INSTANCES_FROM_HOST = Constant('migrate-instances-from-host')
STOP_INSTANCES = Constant('stop-instances')
START_INSTANCES = Constant('start-instances')
QUERY_ALARMS = Constant('query-alarms')
@ -1231,6 +1232,153 @@ class UpgradeCompleteStep(strategy.StrategyStep):
return data
class MigrateInstancesFromHostStep(strategy.StrategyStep):
"""
Migrate Instances From Host - Strategy Step
"""
def __init__(self, hosts, instances):
super(MigrateInstancesFromHostStep, self).__init__(
STRATEGY_STEP_NAME.MIGRATE_INSTANCES_FROM_HOST, timeout_in_secs=1800)
self._hosts = hosts
self._host_names = list()
self._instances = instances
self._instance_names = list()
self._instance_uuids = list()
self._instance_host_names = dict()
for host in hosts:
self._host_names.append(host.name)
for instance in instances:
self._instance_names.append(instance.name)
self._instance_uuids.append(instance.uuid)
self._instance_host_names[instance.uuid] = instance.host_name
def _all_instances_migrated(self):
"""
Returns true if all instances have migrated from the source hosts
"""
instance_table = tables.tables_get_instance_table()
for host_name in self._host_names:
if instance_table.exist_on_host(host_name):
return False, ""
return True, ""
def apply(self):
"""
Migrate all instances
"""
from nfv_vim import directors
if(self._instance_names):
DLOG.info("Step (%s) apply for instances %s running on hosts %s." % (
self._name,
self._instance_names,
self._host_names,
))
else:
DLOG.info("Step (%s) apply in no instances on empty hosts %s." % (
self._name,
self._host_names,
))
migrate_complete, reason = self._all_instances_migrated()
if migrate_complete:
return strategy.STRATEGY_STEP_RESULT.SUCCESS, ""
# Ensure none of the instances have moved since the strategy step was
# created. The instance_director.migrate_instances will migrate ALL
# instances on each host containing one of the self._instance_uuids. We
# want to ensure we are only migrating instances from the host(s) they
# were originally located on..
for instance in self._instances:
if instance.host_name != self._instance_host_names[instance.uuid]:
reason = ("instance %s has moved from %s to %s after strategy "
"created" %
(instance.name, self._instance_host_names[instance.uuid],
instance.host_name))
return strategy.STRATEGY_STEP_RESULT.FAILED, reason
instance_director = directors.get_instance_director()
operation = instance_director.migrate_instances_from_hosts(self._host_names)
if operation.is_inprogress():
return strategy.STRATEGY_STEP_RESULT.WAIT, ""
elif operation.is_failed():
return strategy.STRATEGY_STEP_RESULT.FAILED, operation.reason
return strategy.STRATEGY_STEP_RESULT.SUCCESS, ""
def handle_event(self, event, event_data=None):
"""
Handle Instance events
"""
DLOG.debug("Step (%s) handle event (%s)." % (self._name, event))
if event in [STRATEGY_EVENT.INSTANCE_STATE_CHANGED,
STRATEGY_EVENT.INSTANCE_AUDIT,
STRATEGY_EVENT.HOST_AUDIT]:
migrate_complete, reason = self._all_instances_migrated()
if not migrate_complete and reason:
result = strategy.STRATEGY_STEP_RESULT.FAILED
self.stage.step_complete(result, reason)
return True
if migrate_complete:
result = strategy.STRATEGY_STEP_RESULT.SUCCESS
self.stage.step_complete(result, '')
return True
elif STRATEGY_EVENT.MIGRATE_INSTANCES_FAILED == event:
result = strategy.STRATEGY_STEP_RESULT.FAILED
self.stage.step_complete(result, event_data)
return True
return False
def from_dict(self, data):
"""
Returns the migrate instances from hosts step object initialized using the given
dictionary
"""
super(MigrateInstancesFromHostStep, self).from_dict(data)
self._hosts = list()
self._host_names = data['host_names']
self._instance_uuids = data['entity_uuids']
self._instances = list()
self._instance_names = list()
self._instance_host_names = dict()
host_table = tables.tables_get_host_table()
for host_name in self._host_names:
host = host_table.get(host_name, None)
if host:
self._hosts.append(host)
instance_table = tables.tables_get_instance_table()
for instance_uuid in self._instance_uuids:
instance = instance_table.get(instance_uuid, None)
if instance is not None:
self._instances.append(instance)
self._instance_names.append(instance.name)
# Retrieve the host this instance was on when the step was
# created.
self._instance_host_names[instance.uuid] = \
data['instance_host_names'][instance.uuid]
return self
def as_dict(self):
"""
Represent the migrate instances from hosts step as a dictionary
"""
data = super(MigrateInstancesFromHostStep, self).as_dict()
data['entity_type'] = 'instances'
data['entity_names'] = self._instance_names
data['entity_uuids'] = self._instance_uuids
data['instance_host_names'] = self._instance_host_names
data['host_names'] = self._host_names
return data
class MigrateInstancesStep(strategy.StrategyStep):
"""
Migrate Instances - Strategy Step
@ -4306,6 +4454,9 @@ def strategy_step_rebuild_from_dict(data):
elif STRATEGY_STEP_NAME.MIGRATE_INSTANCES == data['name']:
step_obj = object.__new__(MigrateInstancesStep)
elif STRATEGY_STEP_NAME.MIGRATE_INSTANCES_FROM_HOST == data['name']:
step_obj = object.__new__(MigrateInstancesFromHostStep)
elif STRATEGY_STEP_NAME.START_INSTANCES == data['name']:
step_obj = object.__new__(StartInstancesStep)