Get instances from conductor in init_host.
Update compute's init_host() to get the list of instances on this host via the conductor service. This removes a db read from compute. Most of the test fixes are to ensure conductor is running anywhere that compute is running. The EC2 availability zones test change (from 13 to 15) is because this output includes info on each service that's running, so running nova-conductor in this test suite added more info here. Note that this uses the previously-added ping() call in conductor to determine when the service is available. The compute manager pings the conductor every ten seconds for ten attempts and then falls back to the default configured RPC timeout. This should be a reasonable compromise between requiring strict service startup ordering and extreme delays. Part of blueprint no-db-compute. Change-Id: Ie2953f7ae79819a1b6e24e8997ed4332fd4d2356
This commit is contained in:
parent
e1c7b18c7f
commit
60965a50bc
|
@ -66,6 +66,7 @@ from nova.openstack.common import lockutils
|
|||
from nova.openstack.common import log as logging
|
||||
from nova.openstack.common.notifier import api as notifier
|
||||
from nova.openstack.common import rpc
|
||||
from nova.openstack.common.rpc import common as rpc_common
|
||||
from nova.openstack.common import timeutils
|
||||
from nova import quota
|
||||
from nova.scheduler import rpcapi as scheduler_rpcapi
|
||||
|
@ -359,6 +360,32 @@ class ComputeManager(manager.SchedulerDependentManager):
|
|||
'trying to set it to ERROR'),
|
||||
instance_uuid=instance_uuid)
|
||||
|
||||
def _get_instances_at_startup(self, context):
|
||||
'''Get instances for this host during service init.'''
|
||||
attempt = 0
|
||||
timeout = 10
|
||||
while True:
|
||||
# NOTE(danms): Try ten times with a short timeout, and then punt
|
||||
# to the configured RPC timeout after that
|
||||
if attempt == 10:
|
||||
timeout = None
|
||||
attempt += 1
|
||||
|
||||
# NOTE(russellb): This is running during service startup. If we
|
||||
# allow an exception to be raised, the service will shut down.
|
||||
# This may fail the first time around if nova-conductor wasn't
|
||||
# running when nova-compute started.
|
||||
try:
|
||||
self.conductor_api.ping(context, '1.21 GigaWatts',
|
||||
timeout=timeout)
|
||||
break
|
||||
except rpc_common.Timeout as e:
|
||||
LOG.exception(_('Timed out waiting for nova-conductor. '
|
||||
'Is it running? Or did nova-compute start '
|
||||
'before nova-conductor?'))
|
||||
|
||||
return self.conductor_api.instance_get_all_by_host(context, self.host)
|
||||
|
||||
def _init_instance(self, context, instance):
|
||||
'''Initialize this instance during service init.'''
|
||||
db_state = instance['power_state']
|
||||
|
@ -417,10 +444,7 @@ class ComputeManager(manager.SchedulerDependentManager):
|
|||
"""Initialization for a standalone compute service."""
|
||||
self.driver.init_host(host=self.host)
|
||||
context = nova.context.get_admin_context()
|
||||
|
||||
# NOTE(danms): this requires some care since conductor
|
||||
# may not be up and fielding requests by the time compute is
|
||||
instances = self.db.instance_get_all_by_host(context, self.host)
|
||||
instances = self._get_instances_at_startup(context)
|
||||
|
||||
if CONF.defer_iptables_apply:
|
||||
self.driver.filter_defer_apply_on()
|
||||
|
|
|
@ -124,6 +124,8 @@ class CinderCloudTestCase(test.TestCase):
|
|||
self.flags(use_local=True, group='conductor')
|
||||
|
||||
# set up services
|
||||
self.conductor = self.start_service('conductor',
|
||||
manager=CONF.conductor.manager)
|
||||
self.compute = self.start_service('compute')
|
||||
self.scheduler = self.start_service('scheduler')
|
||||
self.network = self.start_service('network')
|
||||
|
|
|
@ -139,6 +139,8 @@ class CloudTestCase(test.TestCase):
|
|||
self.flags(use_local=True, group='conductor')
|
||||
|
||||
# set up services
|
||||
self.conductor = self.start_service('conductor',
|
||||
manager=CONF.conductor.manager)
|
||||
self.compute = self.start_service('compute')
|
||||
self.scheduler = self.start_service('scheduler')
|
||||
self.network = self.start_service('network')
|
||||
|
@ -730,7 +732,7 @@ class CloudTestCase(test.TestCase):
|
|||
result = self.cloud.describe_availability_zones(admin_ctxt,
|
||||
zone_name='verbose')
|
||||
|
||||
self.assertEqual(len(result['availabilityZoneInfo']), 13)
|
||||
self.assertEqual(len(result['availabilityZoneInfo']), 15)
|
||||
db.service_destroy(self.context, service1['id'])
|
||||
db.service_destroy(self.context, service2['id'])
|
||||
|
||||
|
|
|
@ -52,6 +52,8 @@ class EC2ValidateTestCase(test.TestCase):
|
|||
self.cloud = cloud.CloudController()
|
||||
|
||||
# set up services
|
||||
self.conductor = self.start_service('conductor',
|
||||
manager=CONF.conductor.manager)
|
||||
self.compute = self.start_service('compute')
|
||||
self.scheduter = self.start_service('scheduler')
|
||||
self.network = self.start_service('network')
|
||||
|
|
|
@ -3095,6 +3095,21 @@ class ComputeTestCase(BaseTestCase):
|
|||
instance = self._create_fake_instance(params)
|
||||
self.compute._instance_update(self.context, instance['uuid'])
|
||||
|
||||
def test_startup_conductor_ping(self):
|
||||
timeouts = []
|
||||
calls = dict(count=0)
|
||||
|
||||
def fake_ping(context, message, timeout):
|
||||
timeouts.append(timeout)
|
||||
calls['count'] += 1
|
||||
if calls['count'] < 15:
|
||||
raise rpc_common.Timeout("fake")
|
||||
|
||||
self.stubs.Set(self.compute.conductor_api, 'ping', fake_ping)
|
||||
self.compute._get_instances_at_startup(self.context)
|
||||
self.assertEqual(timeouts.count(10), 10)
|
||||
self.assertTrue(None in timeouts)
|
||||
|
||||
|
||||
class ComputeAPITestCase(BaseTestCase):
|
||||
|
||||
|
|
|
@ -24,6 +24,7 @@ import string
|
|||
import uuid
|
||||
|
||||
import nova.image.glance
|
||||
from nova.openstack.common import cfg
|
||||
from nova.openstack.common.log import logging
|
||||
from nova import service
|
||||
from nova import test # For the flags
|
||||
|
@ -32,6 +33,7 @@ import nova.tests.image.fake
|
|||
from nova.tests.integrated.api import client
|
||||
|
||||
|
||||
CONF = cfg.CONF
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
@ -73,12 +75,12 @@ class _IntegratedTestBase(test.TestCase):
|
|||
'chance.ChanceScheduler')
|
||||
|
||||
# set up services
|
||||
self.conductor = self.start_service('conductor',
|
||||
manager=CONF.conductor.manager)
|
||||
self.compute = self.start_service('compute')
|
||||
self.scheduler = self.start_service('cert')
|
||||
self.network = self.start_service('network')
|
||||
self.scheduler = self.start_service('scheduler')
|
||||
self.conductor = self.start_service(
|
||||
'conductor', manager='nova.conductor.manager.ConductorManager')
|
||||
|
||||
self._start_api_service()
|
||||
|
||||
|
|
|
@ -27,6 +27,7 @@ from nova import exception
|
|||
from nova import ipv6
|
||||
from nova.network import linux_net
|
||||
from nova.network import manager as network_manager
|
||||
from nova.openstack.common import cfg
|
||||
from nova.openstack.common import importutils
|
||||
from nova.openstack.common import log as logging
|
||||
from nova.openstack.common import rpc
|
||||
|
@ -39,6 +40,7 @@ from nova.tests import matchers
|
|||
from nova import utils
|
||||
|
||||
|
||||
CONF = cfg.CONF
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
@ -1585,6 +1587,8 @@ class AllocateTestCase(test.TestCase):
|
|||
def test_allocate_for_instance(self):
|
||||
address = "10.10.10.10"
|
||||
self.flags(auto_assign_floating_ip=True)
|
||||
self.conductor = self.start_service(
|
||||
'conductor', manager=CONF.conductor.manager)
|
||||
self.compute = self.start_service('compute')
|
||||
self.network = self.start_service('network')
|
||||
|
||||
|
|
|
@ -18,10 +18,15 @@
|
|||
|
||||
"""Tests for the testing base code."""
|
||||
|
||||
from nova.openstack.common import cfg
|
||||
from nova.openstack.common import rpc
|
||||
from nova import test
|
||||
|
||||
|
||||
CONF = cfg.CONF
|
||||
CONF.import_opt('use_local', 'nova.conductor.api', group='conductor')
|
||||
|
||||
|
||||
class IsolationTestCase(test.TestCase):
|
||||
"""Ensure that things are cleaned up after failed tests.
|
||||
|
||||
|
@ -30,6 +35,7 @@ class IsolationTestCase(test.TestCase):
|
|||
|
||||
"""
|
||||
def test_service_isolation(self):
|
||||
self.flags(use_local=True, group='conductor')
|
||||
self.useFixture(test.ServiceFixture('compute'))
|
||||
|
||||
def test_rpc_consumer_isolation(self):
|
||||
|
|
Loading…
Reference in New Issue