Improve clustered instance failover handling

Instances can bounce between hosts many times in a short interval,
especially if the CSVs go down (as much as 20 times in less than 2
minutes).

We're not handling this properly. The failover handling logic is
prone to race conditions, as multiple hosts may attempt to claim
the instance, which will end up in an inconsistent state.

We're introducing distributed locks, preventing races between hosts.
At the same time, we're validating the events, as the instances can
move again by the time we process the event.

The distributed lock backend will have to be configured.

At the same time, we're now waiting for "pending" cluster groups,
which may not even be registered in Hyper-V, so any action we take
on the VM would fail.

Closes-Bug: #1795299
Closes-Bug: #1796673

Change-Id: I3dbdcf208bb7a96bd516b41e4725a5fcb37280d6
This commit is contained in:
Lucian Petrut 2018-10-09 17:20:16 +03:00
parent c96671885f
commit 9f5628e55b
7 changed files with 150 additions and 13 deletions

View File

@ -16,6 +16,7 @@
"""Management class for Cluster VM operations."""
import functools
import time
from nova.compute import power_state
from nova.compute import task_states
@ -26,11 +27,13 @@ from nova import objects
from nova import utils
from nova.virt import block_device
from nova.virt import event as virtevent
from os_win import constants as os_win_const
from os_win import exceptions as os_win_exc
from os_win import utilsfactory
from oslo_log import log as logging
import compute_hyperv.nova.conf
from compute_hyperv.nova import coordination
from compute_hyperv.nova import hostops
from compute_hyperv.nova import serialconsoleops
from compute_hyperv.nova import vmops
@ -106,6 +109,7 @@ class ClusterOps(object):
instance.name, instance.host,
self._this_node)
@coordination.synchronized('failover-{instance_name}')
def _failover_migrate(self, instance_name, new_host):
"""This method will check if the generated event is a legitimate
failover to this node. If it is, it will proceed to prepare the
@ -126,6 +130,31 @@ class ClusterOps(object):
'new_host': new_host,
'old_host': old_host})
# While the cluster group is in "pending" state, it may not even be
# registered in Hyper-V, so there's not much we can do. We'll have to
# wait for it to be handled by the Failover Cluster service.
self._wait_for_pending_instance(instance_name)
current_host = self._clustutils.get_vm_host(instance_name)
instance_moved_again = current_host.upper() != new_host.upper()
if instance_moved_again:
LOG.warning("While processing instance %(instance)s failover to "
"%(host)s, it has moved to %(current_host)s.",
dict(host=new_host,
current_host=current_host,
instance=instance_name))
new_host = current_host
host_changed = old_host.upper() != new_host.upper()
migrated_here = new_host.upper() == self._this_node.upper()
migrated_from_here = old_host.upper() == self._this_node.upper()
if not host_changed:
LOG.warning("The source node is the same as the destination "
"node: %(host)s. The instance %(instance)s may have "
"bounced between hosts due to a failure.",
dict(host=old_host, instance=instance_name))
if instance.task_state == task_states.MIGRATING:
# In case of live migration triggered by the user, we get the
# event that the instance changed host but we do not want
@ -135,11 +164,11 @@ class ClusterOps(object):
nw_info = self._network_api.get_instance_nw_info(self._context,
instance)
if old_host and old_host.upper() == self._this_node.upper():
LOG.debug('Actions at source node.')
if host_changed and migrated_from_here:
LOG.debug('Cleaning up moved instance: %s.', instance_name)
self._vmops.unplug_vifs(instance, nw_info)
return
elif new_host.upper() != self._this_node.upper():
if not migrated_here:
LOG.debug('Instance %s did not failover to this node.',
instance_name)
return
@ -148,10 +177,26 @@ class ClusterOps(object):
{'instance': instance_name})
self._nova_failover_server(instance, new_host)
self._failover_migrate_networks(instance, old_host)
if host_changed:
self._failover_migrate_networks(instance, old_host)
self._vmops.plug_vifs(instance, nw_info)
self._serial_console_ops.start_console_handler(instance_name)
def _wait_for_pending_instance(self, instance_name):
# TODO(lpetrut): switch to an event listener. We'd probably want to
# avoid having one event listener per failed over instance, as there
# can be many of them.
group_state = self._clustutils.get_cluster_group_state_info(
instance_name)['state']
while group_state == os_win_const.CLUSTER_GROUP_PENDING:
LOG.debug("Waiting for pending instance cluster group: %s",
instance_name)
time.sleep(2)
group_state = self._clustutils.get_cluster_group_state_info(
instance_name)['state']
def _failover_migrate_networks(self, instance, source):
"""This is called after a VM failovered to this node.
This will change the owner of the neutron ports to this node.

View File

@ -21,6 +21,8 @@ from compute_hyperv.nova import driver
class HyperVClusterDriver(driver.HyperVDriver):
use_coordination = True
def __init__(self, virtapi):
super(HyperVClusterDriver, self).__init__(virtapi)

View File

@ -30,6 +30,7 @@ from os_win import utilsfactory
from oslo_log import log as logging
import six
from compute_hyperv.nova import coordination
from compute_hyperv.nova import eventhandler
from compute_hyperv.nova import hostops
from compute_hyperv.nova import imagecache
@ -108,6 +109,8 @@ class HyperVDriver(driver.ComputeDriver):
"supports_trusted_certs": True,
}
use_coordination = False
def __init__(self, virtapi):
# check if the current version of Windows is supported before any
# further driver initialisation.
@ -151,6 +154,9 @@ class HyperVDriver(driver.ComputeDriver):
return False
def init_host(self, host):
if self.use_coordination:
coordination.COORDINATOR.start()
self._serialconsoleops.start_console_handlers()
self._set_event_handler_callbacks()

View File

@ -85,6 +85,7 @@ class NoDBTestCase(base.BaseTestCase):
"""
TIMEOUT_SCALING_FACTOR = 1
MOCK_TOOZ = True
def setUp(self):
"""Run before each test method to initialize test environment."""
@ -111,6 +112,11 @@ class NoDBTestCase(base.BaseTestCase):
self.useFixture(nova_fixtures.PoisonFunctions())
if self.MOCK_TOOZ:
self.patch('compute_hyperv.nova.coordination.Coordinator.start')
self.patch('compute_hyperv.nova.coordination.Coordinator.stop')
self.patch('compute_hyperv.nova.coordination.Coordinator.get_lock')
def _clear_attrs(self):
# Delete attributes that don't start with _ so they don't pin
# memory around unnecessarily for the duration of the test
@ -124,6 +130,12 @@ class NoDBTestCase(base.BaseTestCase):
for k, v in six.iteritems(kw):
CONF.set_override(k, v, group)
def patch(self, path, *args, **kwargs):
patcher = mock.patch(path, *args, **kwargs)
result = patcher.start()
self.addCleanup(patcher.stop)
return result
def assertPublicAPISignatures(self, baseinst, inst):
def get_public_apis(inst):
methods = {}

View File

@ -21,6 +21,7 @@ from nova.compute import vm_states
from nova.network.neutronv2 import api as network_api
from nova import objects
from nova.virt import event as virtevent
from os_win import constants as os_win_const
from os_win import exceptions as os_win_exc
from compute_hyperv.nova.cluster import clusterops
@ -144,10 +145,12 @@ class ClusterOpsTestCase(test_base.HyperVBaseTestCase):
mock_instance1.name, mock_instance1.host,
self.clusterops._this_node)
@mock.patch.object(clusterops.ClusterOps, '_wait_for_pending_instance')
@mock.patch.object(clusterops, 'LOG')
@mock.patch.object(clusterops.ClusterOps, '_get_instance_by_name')
def test_failover_migrate_no_instance(self, mock_get_instance_by_name,
mock_LOG):
mock_LOG,
mock_wait_pending_instance):
mock_get_instance_by_name.return_value = None
self.clusterops._failover_migrate(mock.sentinel.instance_name,
@ -159,35 +162,40 @@ class ClusterOpsTestCase(test_base.HyperVBaseTestCase):
self.assertFalse(
self.clusterops._network_api.get_instance_nw_info.called)
@mock.patch.object(clusterops.ClusterOps, '_wait_for_pending_instance')
@mock.patch.object(clusterops, 'LOG')
@mock.patch.object(clusterops.ClusterOps, '_get_instance_by_name')
def test_failover_migrate_migrating(self, mock_get_instance_by_name,
mock_LOG):
mock_LOG, mock_wait_pending_instance):
instance = mock_get_instance_by_name.return_value
instance.task_state = task_states.MIGRATING
self.clusterops._failover_migrate(mock.sentinel.instance_name,
mock.sentinel.new_host)
'new_host')
mock_LOG.debug.assert_called_once_with(
'Instance %s is live migrating.', mock.sentinel.instance_name)
@mock.patch.object(clusterops.ClusterOps, '_wait_for_pending_instance')
@mock.patch.object(clusterops.ClusterOps, '_get_instance_by_name')
def test_failover_migrate_at_source_node(self, mock_get_instance_by_name):
def test_failover_migrate_at_source_node(self, mock_get_instance_by_name,
mock_wait_pending_instance):
instance = mock_get_instance_by_name.return_value
instance.host = 'old_host'
self.clusterops._this_node = instance.host
self.clusterops._failover_migrate(mock.sentinel.instance_name,
mock.sentinel.new_host)
'new_host')
self.clusterops._vmops.unplug_vifs.assert_called_once_with(instance,
self.clusterops._network_api.get_instance_nw_info.return_value)
@mock.patch.object(clusterops.ClusterOps, '_wait_for_pending_instance')
@mock.patch.object(clusterops, 'LOG')
@mock.patch.object(clusterops.ClusterOps, '_get_instance_by_name')
def test_failover_migrate_not_this_node(self, mock_get_instance_by_name,
mock_LOG):
mock_LOG,
mock_wait_pending_instance):
self.clusterops._this_node = 'new_host'
self.clusterops._failover_migrate(mock.sentinel.instance_name,
@ -197,21 +205,28 @@ class ClusterOpsTestCase(test_base.HyperVBaseTestCase):
'Instance %s did not failover to this node.',
mock.sentinel.instance_name)
@mock.patch.object(clusterops.ClusterOps, '_wait_for_pending_instance')
@mock.patch.object(clusterops.ClusterOps, '_failover_migrate_networks')
@mock.patch.object(clusterops.ClusterOps, '_nova_failover_server')
@mock.patch.object(clusterops.ClusterOps, '_get_instance_by_name')
def test_failover_migrate_this_node(self, mock_get_instance_by_name,
mock_nova_failover_server,
mock_failover_migrate_networks):
def test_failover_migrate_changed_host(self, mock_get_instance_by_name,
mock_nova_failover_server,
mock_failover_migrate_networks,
mock_wait_pending_instance):
instance = mock_get_instance_by_name.return_value
old_host = 'old_host'
new_host = 'new_host'
instance.host = old_host
self.clusterops._this_node = new_host
self._clustutils.get_vm_host.return_value = new_host
self.clusterops._failover_migrate(mock.sentinel.instance_name,
new_host)
mock_wait_pending_instance.assert_called_once_with(
mock.sentinel.instance_name)
self._clustutils.get_vm_host.assert_called_once_with(
mock.sentinel.instance_name)
mock_get_instance_by_name.assert_called_once_with(
mock.sentinel.instance_name)
get_inst_nw_info = self.clusterops._network_api.get_instance_nw_info
@ -225,6 +240,50 @@ class ClusterOpsTestCase(test_base.HyperVBaseTestCase):
c_handler = self.clusterops._serial_console_ops.start_console_handler
c_handler.assert_called_once_with(mock.sentinel.instance_name)
@mock.patch.object(clusterops.ClusterOps, '_wait_for_pending_instance')
@mock.patch.object(clusterops.ClusterOps, '_failover_migrate_networks')
@mock.patch.object(clusterops.ClusterOps, '_nova_failover_server')
@mock.patch.object(clusterops.ClusterOps, '_get_instance_by_name')
def test_failover_same_node(self, mock_get_instance_by_name,
mock_nova_failover_server,
mock_failover_migrate_networks,
mock_wait_pending_instance):
# In some cases, the instances may bounce between hosts. We're testing
# the case in which the instance is actually returning to the initial
# host during the time in which we're processing events.
instance = mock_get_instance_by_name.return_value
old_host = 'old_host'
new_host = 'new_host'
instance.host = old_host
self.clusterops._this_node = old_host
self._clustutils.get_vm_host.return_value = old_host
self.clusterops._failover_migrate(mock.sentinel.instance_name,
new_host)
get_inst_nw_info = self.clusterops._network_api.get_instance_nw_info
get_inst_nw_info.assert_called_once_with(self.clusterops._context,
instance)
mock_nova_failover_server.assert_called_once_with(instance, old_host)
self.clusterops._vmops.unplug_vifs.assert_not_called()
self.clusterops._vmops.plug_vifs.assert_called_once_with(
instance, get_inst_nw_info.return_value)
mock_failover_migrate_networks.assert_not_called()
c_handler = self.clusterops._serial_console_ops.start_console_handler
c_handler.assert_called_once_with(mock.sentinel.instance_name)
@mock.patch('time.sleep')
def test_wait_for_pending_instance(self, mock_sleep):
self._clustutils.get_cluster_group_state_info.side_effect = [
dict(state=os_win_const.CLUSTER_GROUP_PENDING),
dict(state=os_win_const.CLUSTER_GROUP_ONLINE)]
self.clusterops._wait_for_pending_instance(mock.sentinel.instance_name)
self._clustutils.get_cluster_group_state_info.assert_has_calls(
[mock.call(mock.sentinel.instance_name)] * 2)
mock_sleep.assert_called_once_with(2)
def test_failover_migrate_networks(self):
mock_instance = fake_instance.fake_instance_obj(self.context)
fake_source = mock.MagicMock()

View File

@ -50,6 +50,8 @@ class MockToozLock(tooz.locking.Lock):
@mock.patch('tooz.coordination.get_coordinator')
class CoordinatorTestCase(test_base.HyperVBaseTestCase):
MOCK_TOOZ = False
def test_coordinator_start(self, get_coordinator):
crd = get_coordinator.return_value

View File

@ -0,0 +1,11 @@
---
upgrade:
- |
When using the cluster driver, a distributed lock backend will have to be
configured.
fixes:
- |
In order to fix race conditions that can occur when handling instance
failovers, the cluster driver is now using distributed locks. A
distributed lock backend (e.g. etcd, mysql, file based, etc) will have to
be configured.