Get instances from conductor in init_host.

Update compute's init_host() to get the list of instances on this host
via the conductor service.  This removes a db read from compute.

Most of the test fixes are to ensure conductor is running anywhere that
compute is running.  The EC2 availability zones test change (from 13 to
15) is because this output includes info on each service that's running,
so running nova-conductor in this test suite added more info here.

Note that this uses the previously-added ping() call in conductor to
determine when the service is available. The compute manager pings the
conductor every ten seconds for ten attempts and then falls back to the
default configured RPC timeout. This should be a reasonable compromise
between requiring strict service startup ordering and extreme delays.

Part of blueprint no-db-compute.

Change-Id: Ie2953f7ae79819a1b6e24e8997ed4332fd4d2356
This commit is contained in:
Russell Bryant 2012-11-29 22:03:34 -05:00 committed by Dan Smith
parent e1c7b18c7f
commit 60965a50bc
8 changed files with 64 additions and 7 deletions

View File

@ -66,6 +66,7 @@ from nova.openstack.common import lockutils
from nova.openstack.common import log as logging
from nova.openstack.common.notifier import api as notifier
from nova.openstack.common import rpc
from nova.openstack.common.rpc import common as rpc_common
from nova.openstack.common import timeutils
from nova import quota
from nova.scheduler import rpcapi as scheduler_rpcapi
@ -359,6 +360,32 @@ class ComputeManager(manager.SchedulerDependentManager):
'trying to set it to ERROR'),
instance_uuid=instance_uuid)
def _get_instances_at_startup(self, context):
'''Get instances for this host during service init.'''
attempt = 0
timeout = 10
while True:
# NOTE(danms): Try ten times with a short timeout, and then punt
# to the configured RPC timeout after that
if attempt == 10:
timeout = None
attempt += 1
# NOTE(russellb): This is running during service startup. If we
# allow an exception to be raised, the service will shut down.
# This may fail the first time around if nova-conductor wasn't
# running when nova-compute started.
try:
self.conductor_api.ping(context, '1.21 GigaWatts',
timeout=timeout)
break
except rpc_common.Timeout as e:
LOG.exception(_('Timed out waiting for nova-conductor. '
'Is it running? Or did nova-compute start '
'before nova-conductor?'))
return self.conductor_api.instance_get_all_by_host(context, self.host)
def _init_instance(self, context, instance):
'''Initialize this instance during service init.'''
db_state = instance['power_state']
@ -417,10 +444,7 @@ class ComputeManager(manager.SchedulerDependentManager):
"""Initialization for a standalone compute service."""
self.driver.init_host(host=self.host)
context = nova.context.get_admin_context()
# NOTE(danms): this requires some care since conductor
# may not be up and fielding requests by the time compute is
instances = self.db.instance_get_all_by_host(context, self.host)
instances = self._get_instances_at_startup(context)
if CONF.defer_iptables_apply:
self.driver.filter_defer_apply_on()

View File

@ -124,6 +124,8 @@ class CinderCloudTestCase(test.TestCase):
self.flags(use_local=True, group='conductor')
# set up services
self.conductor = self.start_service('conductor',
manager=CONF.conductor.manager)
self.compute = self.start_service('compute')
self.scheduler = self.start_service('scheduler')
self.network = self.start_service('network')

View File

@ -139,6 +139,8 @@ class CloudTestCase(test.TestCase):
self.flags(use_local=True, group='conductor')
# set up services
self.conductor = self.start_service('conductor',
manager=CONF.conductor.manager)
self.compute = self.start_service('compute')
self.scheduler = self.start_service('scheduler')
self.network = self.start_service('network')
@ -730,7 +732,7 @@ class CloudTestCase(test.TestCase):
result = self.cloud.describe_availability_zones(admin_ctxt,
zone_name='verbose')
self.assertEqual(len(result['availabilityZoneInfo']), 13)
self.assertEqual(len(result['availabilityZoneInfo']), 15)
db.service_destroy(self.context, service1['id'])
db.service_destroy(self.context, service2['id'])

View File

@ -52,6 +52,8 @@ class EC2ValidateTestCase(test.TestCase):
self.cloud = cloud.CloudController()
# set up services
self.conductor = self.start_service('conductor',
manager=CONF.conductor.manager)
self.compute = self.start_service('compute')
self.scheduter = self.start_service('scheduler')
self.network = self.start_service('network')

View File

@ -3095,6 +3095,21 @@ class ComputeTestCase(BaseTestCase):
instance = self._create_fake_instance(params)
self.compute._instance_update(self.context, instance['uuid'])
def test_startup_conductor_ping(self):
timeouts = []
calls = dict(count=0)
def fake_ping(context, message, timeout):
timeouts.append(timeout)
calls['count'] += 1
if calls['count'] < 15:
raise rpc_common.Timeout("fake")
self.stubs.Set(self.compute.conductor_api, 'ping', fake_ping)
self.compute._get_instances_at_startup(self.context)
self.assertEqual(timeouts.count(10), 10)
self.assertTrue(None in timeouts)
class ComputeAPITestCase(BaseTestCase):

View File

@ -24,6 +24,7 @@ import string
import uuid
import nova.image.glance
from nova.openstack.common import cfg
from nova.openstack.common.log import logging
from nova import service
from nova import test # For the flags
@ -32,6 +33,7 @@ import nova.tests.image.fake
from nova.tests.integrated.api import client
CONF = cfg.CONF
LOG = logging.getLogger(__name__)
@ -73,12 +75,12 @@ class _IntegratedTestBase(test.TestCase):
'chance.ChanceScheduler')
# set up services
self.conductor = self.start_service('conductor',
manager=CONF.conductor.manager)
self.compute = self.start_service('compute')
self.scheduler = self.start_service('cert')
self.network = self.start_service('network')
self.scheduler = self.start_service('scheduler')
self.conductor = self.start_service(
'conductor', manager='nova.conductor.manager.ConductorManager')
self._start_api_service()

View File

@ -27,6 +27,7 @@ from nova import exception
from nova import ipv6
from nova.network import linux_net
from nova.network import manager as network_manager
from nova.openstack.common import cfg
from nova.openstack.common import importutils
from nova.openstack.common import log as logging
from nova.openstack.common import rpc
@ -39,6 +40,7 @@ from nova.tests import matchers
from nova import utils
CONF = cfg.CONF
LOG = logging.getLogger(__name__)
@ -1585,6 +1587,8 @@ class AllocateTestCase(test.TestCase):
def test_allocate_for_instance(self):
address = "10.10.10.10"
self.flags(auto_assign_floating_ip=True)
self.conductor = self.start_service(
'conductor', manager=CONF.conductor.manager)
self.compute = self.start_service('compute')
self.network = self.start_service('network')

View File

@ -18,10 +18,15 @@
"""Tests for the testing base code."""
from nova.openstack.common import cfg
from nova.openstack.common import rpc
from nova import test
CONF = cfg.CONF
CONF.import_opt('use_local', 'nova.conductor.api', group='conductor')
class IsolationTestCase(test.TestCase):
"""Ensure that things are cleaned up after failed tests.
@ -30,6 +35,7 @@ class IsolationTestCase(test.TestCase):
"""
def test_service_isolation(self):
self.flags(use_local=True, group='conductor')
self.useFixture(test.ServiceFixture('compute'))
def test_rpc_consumer_isolation(self):