Add service version check workaround for FFU
We recently added a hard failure to nova service startup for the case where computes were more than one version old (as indicated by their service record). This helps to prevent starting up new control services when a very old compute is still running. However, during an FFU, control services that have skipped multiple versions will be started and find the older compute records (which could not be updated yet due to their reliance on the control services being up) and refuse to start. This creates a cross-dependency which is not resolvable without hacking the database. This patch adds a workaround flag to allow turning that hard fail into a warning to proceed past the issue. This less-than-ideal solution is simple and backportable, but perhaps a better solution can be implemented for the future. Related-Bug: #1958883 Change-Id: Iddbc9b2a13f19cea9a996aeadfe891f4ef3b0264
This commit is contained in:
parent
52b974acb7
commit
7d2e481589
@ -47,7 +47,13 @@ def _get_config_files(env=None):
|
||||
|
||||
|
||||
def _setup_service(host, name):
|
||||
utils.raise_if_old_compute()
|
||||
try:
|
||||
utils.raise_if_old_compute()
|
||||
except exception.TooOldComputeService as e:
|
||||
if CONF.workarounds.disable_compute_service_check_for_ffu:
|
||||
LOG.warning(str(e))
|
||||
else:
|
||||
raise
|
||||
|
||||
binary = name if name.startswith('nova-') else "nova-%s" % name
|
||||
|
||||
|
@ -369,6 +369,16 @@ to update network switches in the post live migration phase on the destination.
|
||||
Related options:
|
||||
|
||||
* :oslo.config:option:`DEFAULT.compute_driver` (libvirt)
|
||||
"""),
|
||||
cfg.BoolOpt('disable_compute_service_check_for_ffu',
|
||||
default=False,
|
||||
help="""
|
||||
If this is set, the normal safety check for old compute services will be
|
||||
treated as a warning instead of an error. This is only to be enabled to
|
||||
facilitate a Fast-Forward upgrade where new control services are being started
|
||||
before compute nodes have been able to update their service record. In an FFU,
|
||||
the service records in the database will be more than one version old until
|
||||
the compute nodes start up, but control services need to be online first.
|
||||
"""),
|
||||
]
|
||||
|
||||
|
@ -261,7 +261,13 @@ class Service(service.Service):
|
||||
# up before it allows the service to be created. The
|
||||
# raise_if_old_compute() depends on the RPC to be up and does not
|
||||
# implement its own retry mechanism to connect to the conductor.
|
||||
utils.raise_if_old_compute()
|
||||
try:
|
||||
utils.raise_if_old_compute()
|
||||
except exception.TooOldComputeService as e:
|
||||
if CONF.workarounds.disable_compute_service_check_for_ffu:
|
||||
LOG.warning(str(e))
|
||||
else:
|
||||
raise
|
||||
|
||||
return service_obj
|
||||
|
||||
|
@ -18,6 +18,7 @@ from oslo_config import fixture as config_fixture
|
||||
from oslotest import base
|
||||
|
||||
from nova.api.openstack import wsgi_app
|
||||
from nova import exception
|
||||
from nova import test
|
||||
from nova.tests import fixtures as nova_fixtures
|
||||
|
||||
@ -87,3 +88,19 @@ document_root = /tmp
|
||||
wsgi_app.init_application('nova-api')
|
||||
self.assertIn('Global data already initialized, not re-initializing.',
|
||||
self.stdlog.logger.output)
|
||||
|
||||
@mock.patch('nova.objects.Service.get_by_host_and_binary')
|
||||
@mock.patch('nova.utils.raise_if_old_compute')
|
||||
def test_setup_service_version_workaround(self, mock_check_old, mock_get):
|
||||
mock_check_old.side_effect = exception.TooOldComputeService(
|
||||
oldest_supported_version='2',
|
||||
scope='scope',
|
||||
min_service_level=2,
|
||||
oldest_supported_service=1)
|
||||
|
||||
self.assertRaises(exception.TooOldComputeService,
|
||||
wsgi_app._setup_service, 'myhost', 'api')
|
||||
wsgi_app.CONF.set_override(
|
||||
'disable_compute_service_check_for_ffu', True,
|
||||
group='workarounds')
|
||||
wsgi_app._setup_service('myhost', 'api')
|
||||
|
@ -287,6 +287,29 @@ class ServiceTestCase(test.NoDBTestCase):
|
||||
mock_check_old.assert_called_once_with()
|
||||
mock_wait.assert_called_once_with(mock.ANY)
|
||||
|
||||
@mock.patch('nova.utils.raise_if_old_compute')
|
||||
def test_old_compute_version_check_workaround(
|
||||
self, mock_check_old):
|
||||
|
||||
mock_check_old.side_effect = exception.TooOldComputeService(
|
||||
oldest_supported_version='2',
|
||||
scope='scope',
|
||||
min_service_level=2,
|
||||
oldest_supported_service=1)
|
||||
|
||||
self.assertRaises(exception.TooOldComputeService,
|
||||
service.Service.create,
|
||||
self.host, 'nova-conductor', self.topic,
|
||||
'nova.tests.unit.test_service.FakeManager')
|
||||
|
||||
CONF.set_override('disable_compute_service_check_for_ffu', True,
|
||||
group='workarounds')
|
||||
|
||||
service.Service.create(self.host, 'nova-conductor', self.topic,
|
||||
'nova.tests.unit.test_service.FakeManager')
|
||||
|
||||
mock_check_old.assert_has_calls([mock.call(), mock.call()])
|
||||
|
||||
|
||||
class TestWSGIService(test.NoDBTestCase):
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user