Added a instance state update notification

Added a instance update notification (compute.instance.update) that
will report on changes to vm_state and task_state. The goal here is
to provide useful insight into instance state transitions.  (e.g.
BUILDING->ACTIVE)

The new notification has minimial dependencies and is intended for
wide use across the different layers/packages within nova.  Calls
in compute api/manager, scheduler, and the virt layer that modify
the instance state have been instrumented with this notification.

Change-Id: I223eb7eccc8aa079b782f6bb17727cd0b71d18ed
This commit is contained in:
Brian Elliott
2012-05-13 21:06:29 +00:00
parent 59fc07c8c9
commit 4555dda7ae
5 changed files with 295 additions and 19 deletions

242
nova/notifications.py Normal file
View File

@@ -0,0 +1,242 @@
# vim: tabstop=4 shiftwidth=4 softtabstop=4
# Copyright (c) 2012 OpenStack, LLC.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
"""Functionality related to notifications common to multiple layers of
the system.
"""
import nova.context
from nova import db
from nova import exception
from nova import flags
from nova import log
from nova import network
from nova.network import model as network_model
from nova.notifier import api as notifier_api
from nova.openstack.common import cfg
from nova import utils
LOG = log.getLogger(__name__)
notify_state_opt = cfg.StrOpt('notify_on_state_change', default=None,
help='If set, send compute.instance.update notifications on instance '
'state changes. Valid values are None for no notifications, '
'"vm_state" for notifications on VM state changes, or '
'"vm_and_task_state" for notifications on VM and task state '
'changes.')
FLAGS = flags.FLAGS
FLAGS.register_opt(notify_state_opt)
def send_update(context, old_instance, new_instance, host=None):
"""Send compute.instance.update notification to report changes
in vm state and (optionally) task state
"""
send_update_with_states(context, new_instance, old_instance["vm_state"],
new_instance["vm_state"], old_instance["task_state"],
new_instance["task_state"], host)
def send_update_with_states(context, instance, old_vm_state, new_vm_state,
old_task_state, new_task_state, host=None):
"""Send compute.instance.update notification to report changes
in vm state and (optionally) task state
"""
if not FLAGS.notify_on_state_change:
# skip all this if state updates are disabled
return
fire_update = False
if old_vm_state != new_vm_state:
# yes, the vm state is changing:
fire_update = True
elif FLAGS.notify_on_state_change.lower() == "vm_and_task_state" and \
old_task_state != new_task_state:
# yes, the task state is changing:
fire_update = True
if fire_update:
try:
_send_instance_update_notification(context, instance, old_vm_state,
old_task_state, new_vm_state, new_task_state, host)
except Exception:
LOG.exception(_("Failed to send state update notification"),
instance=instance)
def _send_instance_update_notification(context, instance, old_vm_state,
old_task_state, new_vm_state, new_task_state, host=None):
"""Send 'compute.instance.exists' notification to inform observers
about instance state changes"""
payload = usage_from_instance(context, instance, None, None)
states_payload = {
"old_state": old_vm_state,
"state": new_vm_state,
"old_task_state": old_task_state,
"new_task_state": new_task_state,
}
payload.update(states_payload)
# add audit fields:
(audit_start, audit_end) = audit_period_bounds(current_period=True)
payload["audit_period_beginning"] = audit_start
payload["audit_period_ending"] = audit_end
# add bw usage info:
bw = bandwidth_usage(instance, audit_start)
payload["bandwidth"] = bw
try:
system_metadata = db.instance_system_metadata_get(
context, instance.uuid)
except exception.NotFound:
system_metadata = {}
# add image metadata
image_meta_props = image_meta(system_metadata)
payload["image_meta"] = image_meta_props
if not host:
host = FLAGS.host
notifier_api.notify(context, host, 'compute.instance.update',
notifier_api.INFO, payload)
def audit_period_bounds(current_period=False):
"""Get the start and end of the relevant audit usage period
:param current_period: if True, this will generate a usage for the
current usage period; if False, this will generate a usage for the
previous audit period.
"""
begin, end = utils.last_completed_audit_period()
if current_period:
audit_start = end
audit_end = utils.utcnow()
else:
audit_start = begin
audit_end = end
return (audit_start, audit_end)
def bandwidth_usage(instance_ref, audit_start,
ignore_missing_network_data=True):
"""Get bandwidth usage information for the instance for the
specified audit period.
"""
admin_context = nova.context.get_admin_context(read_deleted='yes')
if (instance_ref.get('info_cache') and
instance_ref['info_cache'].get('network_info')):
cached_info = instance_ref['info_cache']['network_info']
nw_info = network_model.NetworkInfo.hydrate(cached_info)
else:
try:
nw_info = network.API().get_instance_nw_info(admin_context,
instance_ref)
except Exception:
LOG.exception('Failed to get nw_info', instance=instance_ref)
if ignore_missing_network_data:
return
raise
macs = [vif['address'] for vif in nw_info]
uuids = [instance_ref["uuid"]]
bw_usages = db.bw_usage_get_by_uuids(admin_context, uuids, audit_start)
bw_usages = [b for b in bw_usages if b.mac in macs]
bw = {}
for b in bw_usages:
label = 'net-name-not-found-%s' % b['mac']
for vif in nw_info:
if vif['address'] == b['mac']:
label = vif['network']['label']
break
bw[label] = dict(bw_in=b.bw_in, bw_out=b.bw_out)
return bw
def image_meta(system_metadata):
"""Format image metadata for use in notifications from the instance
system metadata.
"""
image_meta = {}
for md_key, md_value in system_metadata.iteritems():
if md_key.startswith('image_'):
image_meta[md_key[6:]] = md_value
return image_meta
def usage_from_instance(context, instance_ref, network_info,
system_metadata, **kw):
"""Get usage information for an instance which is common to all
notifications.
:param network_info: network_info provided if not None
:param system_metadata: system_metadata DB entries for the instance,
if not None. *NOTE*: Currently unused here in trunk, but needed for
potential custom modifications.
"""
def null_safe_str(s):
return str(s) if s else ''
image_ref_url = utils.generate_image_url(instance_ref['image_ref'])
instance_type_name = instance_ref.get('instance_type', {}).get('name', '')
usage_info = dict(
tenant_id=instance_ref['project_id'],
user_id=instance_ref['user_id'],
instance_id=instance_ref['uuid'],
instance_type=instance_type_name,
instance_type_id=instance_ref['instance_type_id'],
memory_mb=instance_ref['memory_mb'],
disk_gb=instance_ref['root_gb'] + instance_ref['ephemeral_gb'],
display_name=instance_ref['display_name'],
created_at=str(instance_ref['created_at']),
# Nova's deleted vs terminated instance terminology is confusing,
# this should be when the instance was deleted (i.e. terminated_at),
# not when the db record was deleted. (mdragon)
deleted_at=null_safe_str(instance_ref.get('terminated_at')),
launched_at=null_safe_str(instance_ref.get('launched_at')),
image_ref_url=image_ref_url,
state=instance_ref['vm_state'],
state_description=null_safe_str(instance_ref.get('task_state')))
if network_info is not None:
usage_info['fixed_ips'] = network_info.fixed_ips()
usage_info.update(kw)
return usage_info

View File

@@ -28,6 +28,7 @@ from nova import db
from nova import exception
from nova import flags
from nova import log as logging
from nova import notifications
from nova.openstack.common import cfg
from nova.openstack.common import importutils
from nova.openstack.common import jsonutils
@@ -226,7 +227,11 @@ class Scheduler(object):
# Changing instance_state.
values = {"vm_state": vm_states.MIGRATING}
db.instance_update(context, instance_id, values)
# update instance state and notify
(old_ref, new_instance_ref) = db.instance_update_and_get_original(
context, instance_id, values)
notifications.send_update(context, old_ref, new_instance_ref)
src = instance_ref['host']
cast_to_compute_host(context, src, 'live_migration',

View File

@@ -29,6 +29,7 @@ from nova import exception
from nova import flags
from nova import log as logging
from nova import manager
from nova import notifications
from nova.notifier import api as notifier
from nova.openstack.common import cfg
from nova.openstack.common import excutils
@@ -173,7 +174,11 @@ class SchedulerManager(manager.Manager):
state = vm_state.upper()
LOG.warning(_('Setting instance to %(state)s state.'), locals(),
instance_uuid=instance_uuid)
db.instance_update(context, instance_uuid, updates)
# update instance state and notify on the transition
(old_ref, new_ref) = db.instance_update_and_get_original(context,
instance_uuid, updates)
notifications.send_update(context, old_ref, new_ref)
payload = dict(request_spec=request_spec,
instance_properties=properties,

View File

@@ -28,6 +28,7 @@ from nova import context
from nova import db
from nova import exception
from nova import flags
from nova import notifications
from nova import rpc
from nova.rpc import common as rpc_common
from nova.scheduler import driver
@@ -232,9 +233,10 @@ class SchedulerManagerTestCase(test.TestCase):
"""
fake_instance_uuid = 'fake-instance-id'
inst = {"vm_state": "", "task_state": ""}
self._mox_schedule_method_helper('schedule_run_instance')
self.mox.StubOutWithMock(db, 'instance_update')
self.mox.StubOutWithMock(db, 'instance_update_and_get_original')
request_spec = {'instance_properties':
{'uuid': fake_instance_uuid}}
@@ -243,8 +245,8 @@ class SchedulerManagerTestCase(test.TestCase):
self.manager.driver.schedule_run_instance(self.context,
*self.fake_args, **self.fake_kwargs).AndRaise(
exception.NoValidHost(reason=""))
db.instance_update(self.context, fake_instance_uuid,
{'vm_state': vm_states.ERROR})
db.instance_update_and_get_original(self.context, fake_instance_uuid,
{"vm_state": vm_states.ERROR}).AndReturn((inst, inst))
self.mox.ReplayAll()
self.manager.run_instance(self.context, self.topic,
@@ -255,10 +257,11 @@ class SchedulerManagerTestCase(test.TestCase):
the instance in ACTIVE state
"""
fake_instance_uuid = 'fake-instance-id'
inst = {"vm_state": "", "task_state": ""}
self._mox_schedule_method_helper('schedule_prep_resize')
self.mox.StubOutWithMock(db, 'instance_update')
self.mox.StubOutWithMock(db, 'instance_update_and_get_original')
request_spec = {'instance_properties':
{'uuid': fake_instance_uuid}}
@@ -267,9 +270,9 @@ class SchedulerManagerTestCase(test.TestCase):
self.manager.driver.schedule_prep_resize(self.context,
*self.fake_args, **self.fake_kwargs).AndRaise(
exception.NoValidHost(reason=""))
db.instance_update(self.context, fake_instance_uuid,
{'vm_state': vm_states.ACTIVE,
'task_state': None})
db.instance_update_and_get_original(self.context, fake_instance_uuid,
{"vm_state": vm_states.ACTIVE, "task_state": None}).AndReturn(
(inst, inst))
self.mox.ReplayAll()
self.manager.prep_resize(self.context, self.topic,
@@ -283,7 +286,7 @@ class SchedulerManagerTestCase(test.TestCase):
self._mox_schedule_method_helper('schedule_prep_resize')
self.mox.StubOutWithMock(db, 'instance_update')
self.mox.StubOutWithMock(db, 'instance_update_and_get_original')
request_spec = {'instance_properties':
{'uuid': fake_instance_uuid}}
@@ -292,8 +295,13 @@ class SchedulerManagerTestCase(test.TestCase):
self.manager.driver.schedule_prep_resize(self.context,
*self.fake_args, **self.fake_kwargs).AndRaise(
self.AnException('something happened'))
db.instance_update(self.context, fake_instance_uuid,
{'vm_state': vm_states.ERROR})
inst = {
"vm_state": "",
"task_state": "",
}
db.instance_update_and_get_original(self.context, fake_instance_uuid,
{"vm_state": vm_states.ERROR}).AndReturn((inst, inst))
self.mox.ReplayAll()
@@ -421,7 +429,9 @@ class SchedulerTestCase(test.TestCase):
'power_state': power_state.RUNNING,
'memory_mb': 1024,
'root_gb': 1024,
'ephemeral_gb': 0}
'ephemeral_gb': 0,
'vm_state': '',
'task_state': ''}
def test_live_migration_basic(self):
"""Test basic schedule_live_migration functionality"""
@@ -429,7 +439,7 @@ class SchedulerTestCase(test.TestCase):
self.mox.StubOutWithMock(self.driver, '_live_migration_src_check')
self.mox.StubOutWithMock(self.driver, '_live_migration_dest_check')
self.mox.StubOutWithMock(self.driver, '_live_migration_common_check')
self.mox.StubOutWithMock(db, 'instance_update')
self.mox.StubOutWithMock(db, 'instance_update_and_get_original')
self.mox.StubOutWithMock(driver, 'cast_to_compute_host')
dest = 'fake_host2'
@@ -443,8 +453,9 @@ class SchedulerTestCase(test.TestCase):
dest, block_migration, disk_over_commit)
self.driver._live_migration_common_check(self.context, instance,
dest, block_migration, disk_over_commit)
db.instance_update(self.context, instance['id'],
{'vm_state': vm_states.MIGRATING})
db.instance_update_and_get_original(self.context, instance['id'],
{"vm_state": vm_states.MIGRATING}).AndReturn(
(instance, instance))
driver.cast_to_compute_host(self.context, instance['host'],
'live_migration', update_db=False,
@@ -468,7 +479,7 @@ class SchedulerTestCase(test.TestCase):
self.mox.StubOutWithMock(db, 'queue_get_for')
self.mox.StubOutWithMock(rpc, 'call')
self.mox.StubOutWithMock(rpc, 'cast')
self.mox.StubOutWithMock(db, 'instance_update')
self.mox.StubOutWithMock(db, 'instance_update_and_get_original')
self.mox.StubOutWithMock(driver, 'cast_to_compute_host')
dest = 'fake_host2'
@@ -530,8 +541,9 @@ class SchedulerTestCase(test.TestCase):
{'method': 'compare_cpu',
'args': {'cpu_info': 'fake_cpu_info'}}).AndReturn(True)
db.instance_update(self.context, instance['id'],
{'vm_state': vm_states.MIGRATING})
db.instance_update_and_get_original(self.context, instance['id'],
{"vm_state": vm_states.MIGRATING}).AndReturn(
(instance, instance))
driver.cast_to_compute_host(self.context, instance['host'],
'live_migration', update_db=False,

View File

@@ -211,6 +211,18 @@ class DbApiTestCase(test.TestCase):
system_meta = db.instance_system_metadata_get(ctxt, instance.uuid)
self.assertEqual('baz', system_meta['original_image_ref'])
def test_instance_update_with_and_get_original(self):
ctxt = context.get_admin_context()
# Create an instance with some metadata
values = {'vm_state': 'building'}
instance = db.instance_create(ctxt, values)
(old_ref, new_ref) = db.instance_update_and_get_original(ctxt,
instance['id'], {'vm_state': 'needscoffee'})
self.assertEquals("building", old_ref["vm_state"])
self.assertEquals("needscoffee", new_ref["vm_state"])
def test_instance_fault_create(self):
"""Ensure we can create an instance fault"""
ctxt = context.get_admin_context()