diff --git a/nova/tests/virt/xenapi/test_vm_utils.py b/nova/tests/virt/xenapi/test_vm_utils.py index 6006b695d795..84e494bede04 100644 --- a/nova/tests/virt/xenapi/test_vm_utils.py +++ b/nova/tests/virt/xenapi/test_vm_utils.py @@ -18,7 +18,9 @@ import contextlib import uuid +from eventlet import greenthread import fixtures +import mock import mox from oslo.config import cfg @@ -1280,3 +1282,67 @@ class CreateKernelRamdiskTestCase(test.NoDBTestCase): result = vm_utils.create_kernel_and_ramdisk(self.context, self.session, self.instance, self.name_label) self.assertEqual(("k", None), result) + + +class ScanSrTestCase(test.NoDBTestCase): + @mock.patch.object(vm_utils, "_scan_sr") + @mock.patch.object(vm_utils, "safe_find_sr") + def test_scan_default_sr(self, mock_safe_find_sr, mock_scan_sr): + mock_safe_find_sr.return_value = "sr_ref" + + self.assertEqual("sr_ref", vm_utils.scan_default_sr("fake_session")) + + mock_scan_sr.assert_called_once_with("fake_session", "sr_ref") + + def test_scan_sr_works(self): + session = mock.Mock() + vm_utils._scan_sr(session, "sr_ref") + session.call_xenapi.assert_called_once_with('SR.scan', "sr_ref") + + def test_scan_sr_unknown_error_fails_once(self): + session = mock.Mock() + session.call_xenapi.side_effect = test.TestingException + self.assertRaises(test.TestingException, + vm_utils._scan_sr, session, "sr_ref") + session.call_xenapi.assert_called_once_with('SR.scan', "sr_ref") + + @mock.patch.object(greenthread, 'sleep') + def test_scan_sr_known_error_retries_then_throws(self, mock_sleep): + session = mock.Mock() + + class FakeException(Exception): + details = ['SR_BACKEND_FAILURE_40', "", "", ""] + + session.XenAPI.Failure = FakeException + session.call_xenapi.side_effect = FakeException + + self.assertRaises(FakeException, + vm_utils._scan_sr, session, "sr_ref") + + session.call_xenapi.assert_called_with('SR.scan', "sr_ref") + self.assertEqual(4, session.call_xenapi.call_count) + mock_sleep.assert_has_calls([mock.call(2), mock.call(4), mock.call(8)]) + + @mock.patch.object(greenthread, 'sleep') + def test_scan_sr_known_error_retries_then_succeeds(self, mock_sleep): + session = mock.Mock() + + class FakeException(Exception): + details = ['SR_BACKEND_FAILURE_40', "", "", ""] + + session.XenAPI.Failure = FakeException + sr_scan_call_count = 0 + + def fake_call_xenapi(*args): + fake_call_xenapi.count += 1 + if fake_call_xenapi.count != 2: + raise FakeException() + + fake_call_xenapi.count = 0 + session.call_xenapi.side_effect = fake_call_xenapi + + vm_utils._scan_sr(session, "sr_ref") + + session.call_xenapi.assert_called_with('SR.scan', "sr_ref") + self.assertEqual(2, session.call_xenapi.call_count) + mock_sleep.assert_called_once_with(2) diff --git a/nova/tests/virt/xenapi/test_xenapi.py b/nova/tests/virt/xenapi/test_xenapi.py index 534e3175ae73..160fb22ef80b 100644 --- a/nova/tests/virt/xenapi/test_xenapi.py +++ b/nova/tests/virt/xenapi/test_xenapi.py @@ -2163,7 +2163,7 @@ class XenAPIHostTestCase(stubs.XenAPITestBase): def test_update_stats_caches_hostname(self): self.mox.StubOutWithMock(host, 'call_xenhost') - self.mox.StubOutWithMock(vm_utils, 'safe_find_sr') + self.mox.StubOutWithMock(vm_utils, 'scan_default_sr') self.mox.StubOutWithMock(self.conn._session, 'call_xenapi') data = {'disk_total': 0, 'disk_used': 0, @@ -2179,9 +2179,8 @@ class XenAPIHostTestCase(stubs.XenAPITestBase): for i in range(3): host.call_xenhost(mox.IgnoreArg(), 'host_data', {}).AndReturn(data) - vm_utils.safe_find_sr(self.conn._session).AndReturn(None) - self.conn._session.call_xenapi('SR.scan', None) - self.conn._session.call_xenapi('SR.get_record', None).AndReturn( + vm_utils.scan_default_sr(self.conn._session).AndReturn("ref") + self.conn._session.call_xenapi('SR.get_record', "ref").AndReturn( sr_rec) if i == 2: # On the third call (the second below) change the hostname diff --git a/nova/virt/xenapi/host.py b/nova/virt/xenapi/host.py index 904911d3b32c..53156729b370 100644 --- a/nova/virt/xenapi/host.py +++ b/nova/virt/xenapi/host.py @@ -167,8 +167,7 @@ class HostState(object): LOG.debug(_("Updating host stats")) data = call_xenhost(self._session, "host_data", {}) if data: - sr_ref = vm_utils.safe_find_sr(self._session) - self._session.call_xenapi("SR.scan", sr_ref) + sr_ref = vm_utils.scan_default_sr(self._session) sr_rec = self._session.call_xenapi("SR.get_record", sr_ref) total = int(sr_rec["physical_size"]) used = int(sr_rec["physical_utilisation"]) diff --git a/nova/virt/xenapi/vm_utils.py b/nova/virt/xenapi/vm_utils.py index 248f3cfa066f..29b48909e8c9 100644 --- a/nova/virt/xenapi/vm_utils.py +++ b/nova/virt/xenapi/vm_utils.py @@ -487,7 +487,7 @@ def get_vdi_uuid_for_volume(session, connection_data): vdi_uuid = None if 'vdi_uuid' in connection_data: - session.call_xenapi("SR.scan", sr_ref) + _scan_sr(session, sr_ref) vdi_uuid = connection_data['vdi_uuid'] else: try: @@ -586,9 +586,7 @@ def _safe_copy_vdi(session, sr_ref, instance, vdi_to_copy_ref): root_uuid = imported_vhds['root']['uuid'] - # TODO(sirp): for safety, we should probably re-scan the SR after every - # call to a dom0 plugin, since there is a possibility that the underlying - # VHDs changed + # rescan to discover new VHDs scan_default_sr(session) vdi_ref = session.call_xenapi('VDI.get_by_uuid', root_uuid) return vdi_ref @@ -1258,8 +1256,8 @@ def _fetch_vhd_image(context, session, instance, image_id): vdis = default_handler.download_image( context, session, instance, image_id) - sr_ref = safe_find_sr(session) - _scan_sr(session, sr_ref) + # Ensure we can see the import VHDs as VDIs + scan_default_sr(session) try: _check_vdi_size(context, session, instance, vdis['root']['uuid']) @@ -1628,16 +1626,39 @@ def fetch_bandwidth(session): return bw -def _scan_sr(session, sr_ref=None): - """Scans the SR specified by sr_ref.""" +def _scan_sr(session, sr_ref=None, max_attempts=4): if sr_ref: - LOG.debug(_("Re-scanning SR %s"), sr_ref) - session.call_xenapi('SR.scan', sr_ref) + # NOTE(johngarbutt) xenapi will collapse any duplicate requests + # for SR.scan if there is already a scan in progress. + # However, we don't want that, because the scan may have started + # before we modified the underlying VHDs on disk through a plugin. + # Using our own mutex will reduce cases where our periodic SR scan + # in host.update_status starts racing the sr.scan after a plugin call. + @utils.synchronized('sr-scan-' + sr_ref) + def do_scan(sr_ref): + LOG.debug(_("Scanning SR %s"), sr_ref) + + attempt = 1 + while True: + try: + return session.call_xenapi('SR.scan', sr_ref) + except session.XenAPI.Failure as exc: + with excutils.save_and_reraise_exception() as ctxt: + if exc.details[0] == 'SR_BACKEND_FAILURE_40': + if attempt < max_attempts: + ctxt.reraise = False + LOG.warn(_("Retry SR scan due to error: %s") + % exc) + greenthread.sleep(2 ** attempt) + attempt += 1 + do_scan(sr_ref) def scan_default_sr(session): """Looks for the system default SR and triggers a re-scan.""" - _scan_sr(session, _find_sr(session)) + sr_ref = safe_find_sr(session) + _scan_sr(session, sr_ref) + return sr_ref def safe_find_sr(session): @@ -1860,12 +1881,10 @@ def _wait_for_vhd_coalesce(session, instance, sr_ref, vdi_ref, base_uuid = _get_vhd_parent_uuid(session, parent_ref) return parent_uuid, base_uuid - # NOTE(sirp): This rescan is necessary to ensure the VM's `sm_config` - # matches the underlying VHDs. - _scan_sr(session, sr_ref) - max_attempts = CONF.xenapi_vhd_coalesce_max_attempts for i in xrange(max_attempts): + # NOTE(sirp): This rescan is necessary to ensure the VM's `sm_config` + # matches the underlying VHDs. _scan_sr(session, sr_ref) parent_uuid = _get_vhd_parent_uuid(session, vdi_ref) if parent_uuid and (parent_uuid != original_parent_uuid):