Improve clustered instance failover handling

Instances can bounce between hosts many times in a short interval,
especially if the CSVs go down (as much as 20 times in less than 2
minutes).

We're not handling this properly. The failover handling logic is
prone to race conditions, as multiple hosts may attempt to claim
the instance, which will end up in an inconsistent state.

We're introducing distributed locks, preventing races between hosts.
At the same time, we're validating the events, as the instances can
move again by the time we process the event.

The distributed lock backend will have to be configured.

At the same time, we're now waiting for "pending" cluster groups,
which may not even be registered in Hyper-V, so any action we take
on the VM would fail.

Closes-Bug: #1795299
Closes-Bug: #1796673

Change-Id: I3dbdcf208bb7a96bd516b41e4725a5fcb37280d6
This commit is contained in:
Lucian Petrut 2018-10-09 17:20:16 +03:00
parent c96671885f
commit 9f5628e55b
7 changed files with 150 additions and 13 deletions

View File

@ -16,6 +16,7 @@
"""Management class for Cluster VM operations.""" """Management class for Cluster VM operations."""
import functools import functools
import time
from nova.compute import power_state from nova.compute import power_state
from nova.compute import task_states from nova.compute import task_states
@ -26,11 +27,13 @@ from nova import objects
from nova import utils from nova import utils
from nova.virt import block_device from nova.virt import block_device
from nova.virt import event as virtevent from nova.virt import event as virtevent
from os_win import constants as os_win_const
from os_win import exceptions as os_win_exc from os_win import exceptions as os_win_exc
from os_win import utilsfactory from os_win import utilsfactory
from oslo_log import log as logging from oslo_log import log as logging
import compute_hyperv.nova.conf import compute_hyperv.nova.conf
from compute_hyperv.nova import coordination
from compute_hyperv.nova import hostops from compute_hyperv.nova import hostops
from compute_hyperv.nova import serialconsoleops from compute_hyperv.nova import serialconsoleops
from compute_hyperv.nova import vmops from compute_hyperv.nova import vmops
@ -106,6 +109,7 @@ class ClusterOps(object):
instance.name, instance.host, instance.name, instance.host,
self._this_node) self._this_node)
@coordination.synchronized('failover-{instance_name}')
def _failover_migrate(self, instance_name, new_host): def _failover_migrate(self, instance_name, new_host):
"""This method will check if the generated event is a legitimate """This method will check if the generated event is a legitimate
failover to this node. If it is, it will proceed to prepare the failover to this node. If it is, it will proceed to prepare the
@ -126,6 +130,31 @@ class ClusterOps(object):
'new_host': new_host, 'new_host': new_host,
'old_host': old_host}) 'old_host': old_host})
# While the cluster group is in "pending" state, it may not even be
# registered in Hyper-V, so there's not much we can do. We'll have to
# wait for it to be handled by the Failover Cluster service.
self._wait_for_pending_instance(instance_name)
current_host = self._clustutils.get_vm_host(instance_name)
instance_moved_again = current_host.upper() != new_host.upper()
if instance_moved_again:
LOG.warning("While processing instance %(instance)s failover to "
"%(host)s, it has moved to %(current_host)s.",
dict(host=new_host,
current_host=current_host,
instance=instance_name))
new_host = current_host
host_changed = old_host.upper() != new_host.upper()
migrated_here = new_host.upper() == self._this_node.upper()
migrated_from_here = old_host.upper() == self._this_node.upper()
if not host_changed:
LOG.warning("The source node is the same as the destination "
"node: %(host)s. The instance %(instance)s may have "
"bounced between hosts due to a failure.",
dict(host=old_host, instance=instance_name))
if instance.task_state == task_states.MIGRATING: if instance.task_state == task_states.MIGRATING:
# In case of live migration triggered by the user, we get the # In case of live migration triggered by the user, we get the
# event that the instance changed host but we do not want # event that the instance changed host but we do not want
@ -135,11 +164,11 @@ class ClusterOps(object):
nw_info = self._network_api.get_instance_nw_info(self._context, nw_info = self._network_api.get_instance_nw_info(self._context,
instance) instance)
if old_host and old_host.upper() == self._this_node.upper(): if host_changed and migrated_from_here:
LOG.debug('Actions at source node.') LOG.debug('Cleaning up moved instance: %s.', instance_name)
self._vmops.unplug_vifs(instance, nw_info) self._vmops.unplug_vifs(instance, nw_info)
return return
elif new_host.upper() != self._this_node.upper(): if not migrated_here:
LOG.debug('Instance %s did not failover to this node.', LOG.debug('Instance %s did not failover to this node.',
instance_name) instance_name)
return return
@ -148,10 +177,26 @@ class ClusterOps(object):
{'instance': instance_name}) {'instance': instance_name})
self._nova_failover_server(instance, new_host) self._nova_failover_server(instance, new_host)
self._failover_migrate_networks(instance, old_host) if host_changed:
self._failover_migrate_networks(instance, old_host)
self._vmops.plug_vifs(instance, nw_info) self._vmops.plug_vifs(instance, nw_info)
self._serial_console_ops.start_console_handler(instance_name) self._serial_console_ops.start_console_handler(instance_name)
def _wait_for_pending_instance(self, instance_name):
# TODO(lpetrut): switch to an event listener. We'd probably want to
# avoid having one event listener per failed over instance, as there
# can be many of them.
group_state = self._clustutils.get_cluster_group_state_info(
instance_name)['state']
while group_state == os_win_const.CLUSTER_GROUP_PENDING:
LOG.debug("Waiting for pending instance cluster group: %s",
instance_name)
time.sleep(2)
group_state = self._clustutils.get_cluster_group_state_info(
instance_name)['state']
def _failover_migrate_networks(self, instance, source): def _failover_migrate_networks(self, instance, source):
"""This is called after a VM failovered to this node. """This is called after a VM failovered to this node.
This will change the owner of the neutron ports to this node. This will change the owner of the neutron ports to this node.

View File

@ -21,6 +21,8 @@ from compute_hyperv.nova import driver
class HyperVClusterDriver(driver.HyperVDriver): class HyperVClusterDriver(driver.HyperVDriver):
use_coordination = True
def __init__(self, virtapi): def __init__(self, virtapi):
super(HyperVClusterDriver, self).__init__(virtapi) super(HyperVClusterDriver, self).__init__(virtapi)

View File

@ -30,6 +30,7 @@ from os_win import utilsfactory
from oslo_log import log as logging from oslo_log import log as logging
import six import six
from compute_hyperv.nova import coordination
from compute_hyperv.nova import eventhandler from compute_hyperv.nova import eventhandler
from compute_hyperv.nova import hostops from compute_hyperv.nova import hostops
from compute_hyperv.nova import imagecache from compute_hyperv.nova import imagecache
@ -108,6 +109,8 @@ class HyperVDriver(driver.ComputeDriver):
"supports_trusted_certs": True, "supports_trusted_certs": True,
} }
use_coordination = False
def __init__(self, virtapi): def __init__(self, virtapi):
# check if the current version of Windows is supported before any # check if the current version of Windows is supported before any
# further driver initialisation. # further driver initialisation.
@ -151,6 +154,9 @@ class HyperVDriver(driver.ComputeDriver):
return False return False
def init_host(self, host): def init_host(self, host):
if self.use_coordination:
coordination.COORDINATOR.start()
self._serialconsoleops.start_console_handlers() self._serialconsoleops.start_console_handlers()
self._set_event_handler_callbacks() self._set_event_handler_callbacks()

View File

@ -85,6 +85,7 @@ class NoDBTestCase(base.BaseTestCase):
""" """
TIMEOUT_SCALING_FACTOR = 1 TIMEOUT_SCALING_FACTOR = 1
MOCK_TOOZ = True
def setUp(self): def setUp(self):
"""Run before each test method to initialize test environment.""" """Run before each test method to initialize test environment."""
@ -111,6 +112,11 @@ class NoDBTestCase(base.BaseTestCase):
self.useFixture(nova_fixtures.PoisonFunctions()) self.useFixture(nova_fixtures.PoisonFunctions())
if self.MOCK_TOOZ:
self.patch('compute_hyperv.nova.coordination.Coordinator.start')
self.patch('compute_hyperv.nova.coordination.Coordinator.stop')
self.patch('compute_hyperv.nova.coordination.Coordinator.get_lock')
def _clear_attrs(self): def _clear_attrs(self):
# Delete attributes that don't start with _ so they don't pin # Delete attributes that don't start with _ so they don't pin
# memory around unnecessarily for the duration of the test # memory around unnecessarily for the duration of the test
@ -124,6 +130,12 @@ class NoDBTestCase(base.BaseTestCase):
for k, v in six.iteritems(kw): for k, v in six.iteritems(kw):
CONF.set_override(k, v, group) CONF.set_override(k, v, group)
def patch(self, path, *args, **kwargs):
patcher = mock.patch(path, *args, **kwargs)
result = patcher.start()
self.addCleanup(patcher.stop)
return result
def assertPublicAPISignatures(self, baseinst, inst): def assertPublicAPISignatures(self, baseinst, inst):
def get_public_apis(inst): def get_public_apis(inst):
methods = {} methods = {}

View File

@ -21,6 +21,7 @@ from nova.compute import vm_states
from nova.network.neutronv2 import api as network_api from nova.network.neutronv2 import api as network_api
from nova import objects from nova import objects
from nova.virt import event as virtevent from nova.virt import event as virtevent
from os_win import constants as os_win_const
from os_win import exceptions as os_win_exc from os_win import exceptions as os_win_exc
from compute_hyperv.nova.cluster import clusterops from compute_hyperv.nova.cluster import clusterops
@ -144,10 +145,12 @@ class ClusterOpsTestCase(test_base.HyperVBaseTestCase):
mock_instance1.name, mock_instance1.host, mock_instance1.name, mock_instance1.host,
self.clusterops._this_node) self.clusterops._this_node)
@mock.patch.object(clusterops.ClusterOps, '_wait_for_pending_instance')
@mock.patch.object(clusterops, 'LOG') @mock.patch.object(clusterops, 'LOG')
@mock.patch.object(clusterops.ClusterOps, '_get_instance_by_name') @mock.patch.object(clusterops.ClusterOps, '_get_instance_by_name')
def test_failover_migrate_no_instance(self, mock_get_instance_by_name, def test_failover_migrate_no_instance(self, mock_get_instance_by_name,
mock_LOG): mock_LOG,
mock_wait_pending_instance):
mock_get_instance_by_name.return_value = None mock_get_instance_by_name.return_value = None
self.clusterops._failover_migrate(mock.sentinel.instance_name, self.clusterops._failover_migrate(mock.sentinel.instance_name,
@ -159,35 +162,40 @@ class ClusterOpsTestCase(test_base.HyperVBaseTestCase):
self.assertFalse( self.assertFalse(
self.clusterops._network_api.get_instance_nw_info.called) self.clusterops._network_api.get_instance_nw_info.called)
@mock.patch.object(clusterops.ClusterOps, '_wait_for_pending_instance')
@mock.patch.object(clusterops, 'LOG') @mock.patch.object(clusterops, 'LOG')
@mock.patch.object(clusterops.ClusterOps, '_get_instance_by_name') @mock.patch.object(clusterops.ClusterOps, '_get_instance_by_name')
def test_failover_migrate_migrating(self, mock_get_instance_by_name, def test_failover_migrate_migrating(self, mock_get_instance_by_name,
mock_LOG): mock_LOG, mock_wait_pending_instance):
instance = mock_get_instance_by_name.return_value instance = mock_get_instance_by_name.return_value
instance.task_state = task_states.MIGRATING instance.task_state = task_states.MIGRATING
self.clusterops._failover_migrate(mock.sentinel.instance_name, self.clusterops._failover_migrate(mock.sentinel.instance_name,
mock.sentinel.new_host) 'new_host')
mock_LOG.debug.assert_called_once_with( mock_LOG.debug.assert_called_once_with(
'Instance %s is live migrating.', mock.sentinel.instance_name) 'Instance %s is live migrating.', mock.sentinel.instance_name)
@mock.patch.object(clusterops.ClusterOps, '_wait_for_pending_instance')
@mock.patch.object(clusterops.ClusterOps, '_get_instance_by_name') @mock.patch.object(clusterops.ClusterOps, '_get_instance_by_name')
def test_failover_migrate_at_source_node(self, mock_get_instance_by_name): def test_failover_migrate_at_source_node(self, mock_get_instance_by_name,
mock_wait_pending_instance):
instance = mock_get_instance_by_name.return_value instance = mock_get_instance_by_name.return_value
instance.host = 'old_host' instance.host = 'old_host'
self.clusterops._this_node = instance.host self.clusterops._this_node = instance.host
self.clusterops._failover_migrate(mock.sentinel.instance_name, self.clusterops._failover_migrate(mock.sentinel.instance_name,
mock.sentinel.new_host) 'new_host')
self.clusterops._vmops.unplug_vifs.assert_called_once_with(instance, self.clusterops._vmops.unplug_vifs.assert_called_once_with(instance,
self.clusterops._network_api.get_instance_nw_info.return_value) self.clusterops._network_api.get_instance_nw_info.return_value)
@mock.patch.object(clusterops.ClusterOps, '_wait_for_pending_instance')
@mock.patch.object(clusterops, 'LOG') @mock.patch.object(clusterops, 'LOG')
@mock.patch.object(clusterops.ClusterOps, '_get_instance_by_name') @mock.patch.object(clusterops.ClusterOps, '_get_instance_by_name')
def test_failover_migrate_not_this_node(self, mock_get_instance_by_name, def test_failover_migrate_not_this_node(self, mock_get_instance_by_name,
mock_LOG): mock_LOG,
mock_wait_pending_instance):
self.clusterops._this_node = 'new_host' self.clusterops._this_node = 'new_host'
self.clusterops._failover_migrate(mock.sentinel.instance_name, self.clusterops._failover_migrate(mock.sentinel.instance_name,
@ -197,21 +205,28 @@ class ClusterOpsTestCase(test_base.HyperVBaseTestCase):
'Instance %s did not failover to this node.', 'Instance %s did not failover to this node.',
mock.sentinel.instance_name) mock.sentinel.instance_name)
@mock.patch.object(clusterops.ClusterOps, '_wait_for_pending_instance')
@mock.patch.object(clusterops.ClusterOps, '_failover_migrate_networks') @mock.patch.object(clusterops.ClusterOps, '_failover_migrate_networks')
@mock.patch.object(clusterops.ClusterOps, '_nova_failover_server') @mock.patch.object(clusterops.ClusterOps, '_nova_failover_server')
@mock.patch.object(clusterops.ClusterOps, '_get_instance_by_name') @mock.patch.object(clusterops.ClusterOps, '_get_instance_by_name')
def test_failover_migrate_this_node(self, mock_get_instance_by_name, def test_failover_migrate_changed_host(self, mock_get_instance_by_name,
mock_nova_failover_server, mock_nova_failover_server,
mock_failover_migrate_networks): mock_failover_migrate_networks,
mock_wait_pending_instance):
instance = mock_get_instance_by_name.return_value instance = mock_get_instance_by_name.return_value
old_host = 'old_host' old_host = 'old_host'
new_host = 'new_host' new_host = 'new_host'
instance.host = old_host instance.host = old_host
self.clusterops._this_node = new_host self.clusterops._this_node = new_host
self._clustutils.get_vm_host.return_value = new_host
self.clusterops._failover_migrate(mock.sentinel.instance_name, self.clusterops._failover_migrate(mock.sentinel.instance_name,
new_host) new_host)
mock_wait_pending_instance.assert_called_once_with(
mock.sentinel.instance_name)
self._clustutils.get_vm_host.assert_called_once_with(
mock.sentinel.instance_name)
mock_get_instance_by_name.assert_called_once_with( mock_get_instance_by_name.assert_called_once_with(
mock.sentinel.instance_name) mock.sentinel.instance_name)
get_inst_nw_info = self.clusterops._network_api.get_instance_nw_info get_inst_nw_info = self.clusterops._network_api.get_instance_nw_info
@ -225,6 +240,50 @@ class ClusterOpsTestCase(test_base.HyperVBaseTestCase):
c_handler = self.clusterops._serial_console_ops.start_console_handler c_handler = self.clusterops._serial_console_ops.start_console_handler
c_handler.assert_called_once_with(mock.sentinel.instance_name) c_handler.assert_called_once_with(mock.sentinel.instance_name)
@mock.patch.object(clusterops.ClusterOps, '_wait_for_pending_instance')
@mock.patch.object(clusterops.ClusterOps, '_failover_migrate_networks')
@mock.patch.object(clusterops.ClusterOps, '_nova_failover_server')
@mock.patch.object(clusterops.ClusterOps, '_get_instance_by_name')
def test_failover_same_node(self, mock_get_instance_by_name,
mock_nova_failover_server,
mock_failover_migrate_networks,
mock_wait_pending_instance):
# In some cases, the instances may bounce between hosts. We're testing
# the case in which the instance is actually returning to the initial
# host during the time in which we're processing events.
instance = mock_get_instance_by_name.return_value
old_host = 'old_host'
new_host = 'new_host'
instance.host = old_host
self.clusterops._this_node = old_host
self._clustutils.get_vm_host.return_value = old_host
self.clusterops._failover_migrate(mock.sentinel.instance_name,
new_host)
get_inst_nw_info = self.clusterops._network_api.get_instance_nw_info
get_inst_nw_info.assert_called_once_with(self.clusterops._context,
instance)
mock_nova_failover_server.assert_called_once_with(instance, old_host)
self.clusterops._vmops.unplug_vifs.assert_not_called()
self.clusterops._vmops.plug_vifs.assert_called_once_with(
instance, get_inst_nw_info.return_value)
mock_failover_migrate_networks.assert_not_called()
c_handler = self.clusterops._serial_console_ops.start_console_handler
c_handler.assert_called_once_with(mock.sentinel.instance_name)
@mock.patch('time.sleep')
def test_wait_for_pending_instance(self, mock_sleep):
self._clustutils.get_cluster_group_state_info.side_effect = [
dict(state=os_win_const.CLUSTER_GROUP_PENDING),
dict(state=os_win_const.CLUSTER_GROUP_ONLINE)]
self.clusterops._wait_for_pending_instance(mock.sentinel.instance_name)
self._clustutils.get_cluster_group_state_info.assert_has_calls(
[mock.call(mock.sentinel.instance_name)] * 2)
mock_sleep.assert_called_once_with(2)
def test_failover_migrate_networks(self): def test_failover_migrate_networks(self):
mock_instance = fake_instance.fake_instance_obj(self.context) mock_instance = fake_instance.fake_instance_obj(self.context)
fake_source = mock.MagicMock() fake_source = mock.MagicMock()

View File

@ -50,6 +50,8 @@ class MockToozLock(tooz.locking.Lock):
@mock.patch('tooz.coordination.get_coordinator') @mock.patch('tooz.coordination.get_coordinator')
class CoordinatorTestCase(test_base.HyperVBaseTestCase): class CoordinatorTestCase(test_base.HyperVBaseTestCase):
MOCK_TOOZ = False
def test_coordinator_start(self, get_coordinator): def test_coordinator_start(self, get_coordinator):
crd = get_coordinator.return_value crd = get_coordinator.return_value

View File

@ -0,0 +1,11 @@
---
upgrade:
- |
When using the cluster driver, a distributed lock backend will have to be
configured.
fixes:
- |
In order to fix race conditions that can occur when handling instance
failovers, the cluster driver is now using distributed locks. A
distributed lock backend (e.g. etcd, mysql, file based, etc) will have to
be configured.