From 2a9fa44364cccfb3535535d41cdeaa18ef1d7a33 Mon Sep 17 00:00:00 2001 From: Peter Stachowski Date: Thu, 5 May 2016 11:37:30 -0400 Subject: [PATCH] Persist error messages and display on 'show' When an error occurs in Trove, it is very difficult to determine the cause without access to the server logs. To make these errors available to the end user, they are now persisted in the database and can be viewed using the standard 'show' command. Also fixed TESTS_USE_INSTANCE_ID test path, as it somehow got broken over time. Change-Id: I84ed28ee73a24a2dd6bdbf895662d26e406e9fae Depends-On: I5d3339e9cbfd6aeb0c3ff6936fefa8dbe9e841f8 Implements: blueprint persist-error-message --- ...ersist-error-message-fb69ddf885bcde84.yaml | 5 + trove/cmd/api.py | 4 + trove/cmd/conductor.py | 4 + trove/cmd/taskmanager.py | 4 + trove/common/notification.py | 13 +- trove/common/utils.py | 39 ++++++ trove/conductor/api.py | 1 + trove/conductor/manager.py | 8 +- trove/db/sqlalchemy/mappers.py | 2 + .../versions/038_instance_faults.py | 56 ++++++++ trove/instance/models.py | 75 +++++++++++ trove/instance/views.py | 10 ++ trove/taskmanager/models.py | 34 ++++- trove/tests/api/instances.py | 8 +- trove/tests/fakes/guestagent.py | 2 +- .../scenario/groups/database_actions_group.py | 4 +- .../scenario/groups/instance_create_group.py | 41 +++++- .../scenario/groups/user_actions_group.py | 4 +- .../runners/instance_create_runners.py | 120 ++++++++++++++++-- trove/tests/scenario/runners/test_runners.py | 37 +++++- .../unittests/common/test_notification.py | 26 ++++ trove/tests/unittests/common/test_utils.py | 24 +++- .../instance/test_instance_models.py | 18 +++ .../unittests/instance/test_instance_views.py | 17 +++ 24 files changed, 508 insertions(+), 48 deletions(-) create mode 100644 releasenotes/notes/persist-error-message-fb69ddf885bcde84.yaml create mode 100644 trove/db/sqlalchemy/migrate_repo/versions/038_instance_faults.py diff --git a/releasenotes/notes/persist-error-message-fb69ddf885bcde84.yaml b/releasenotes/notes/persist-error-message-fb69ddf885bcde84.yaml new file mode 100644 index 0000000000..3510c085b6 --- /dev/null +++ b/releasenotes/notes/persist-error-message-fb69ddf885bcde84.yaml @@ -0,0 +1,5 @@ +--- +features: + - Errors that occur in Trove are now persisted in + the database and are returned in the standard + 'show' command. diff --git a/trove/cmd/api.py b/trove/cmd/api.py index 60f78c3e58..0c0756b65e 100644 --- a/trove/cmd/api.py +++ b/trove/cmd/api.py @@ -20,8 +20,12 @@ from trove.common import profile @with_initialize def main(CONF): from trove.common import cfg + from trove.common import notification from trove.common import wsgi + from trove.instance import models as inst_models + notification.DBaaSAPINotification.register_notify_callback( + inst_models.persist_instance_fault) cfg.set_api_config_defaults() profile.setup_profiler('api', CONF.host) conf_file = CONF.find_file(CONF.api_paste_config) diff --git a/trove/cmd/conductor.py b/trove/cmd/conductor.py index 62fd76894b..66499190fb 100644 --- a/trove/cmd/conductor.py +++ b/trove/cmd/conductor.py @@ -20,9 +20,13 @@ from trove.cmd.common import with_initialize @with_initialize def main(conf): + from trove.common import notification from trove.common.rpc import service as rpc_service from trove.common.rpc import version as rpc_version + from trove.instance import models as inst_models + notification.DBaaSAPINotification.register_notify_callback( + inst_models.persist_instance_fault) topic = conf.conductor_queue server = rpc_service.RpcService( manager=conf.conductor_manager, topic=topic, diff --git a/trove/cmd/taskmanager.py b/trove/cmd/taskmanager.py index 3aa4ef97d3..58f7ed125a 100644 --- a/trove/cmd/taskmanager.py +++ b/trove/cmd/taskmanager.py @@ -22,9 +22,13 @@ extra_opts = [openstack_cfg.StrOpt('taskmanager_manager')] def startup(conf, topic): + from trove.common import notification from trove.common.rpc import service as rpc_service from trove.common.rpc import version as rpc_version + from trove.instance import models as inst_models + notification.DBaaSAPINotification.register_notify_callback( + inst_models.persist_instance_fault) server = rpc_service.RpcService( manager=conf.taskmanager_manager, topic=topic, rpc_api_version=rpc_version.RPC_API_VERSION) diff --git a/trove/common/notification.py b/trove/common/notification.py index 0760bf1ba5..ee702adc3e 100644 --- a/trove/common/notification.py +++ b/trove/common/notification.py @@ -295,6 +295,15 @@ class DBaaSAPINotification(object): ''' event_type_format = 'dbaas.%s.%s' + notify_callback = None + + @classmethod + def register_notify_callback(cls, callback): + """A callback registered here will be fired whenever + a notification is sent out. The callback should + take a notification object, and event_qualifier. + """ + cls.notify_callback = callback @abc.abstractmethod def event_type(self): @@ -324,7 +333,7 @@ class DBaaSAPINotification(object): def optional_error_traits(self): 'Returns list of optional traits for error notification' - return [] + return ['instance_id'] def required_base_traits(self): return ['tenant_id', 'client_ip', 'server_ip', 'server_type', @@ -395,6 +404,8 @@ class DBaaSAPINotification(object): del context.notification notifier = rpc.get_notifier(service=self.payload['server_type']) notifier.info(context, qualified_event_type, self.payload) + if self.notify_callback: + self.notify_callback(event_qualifier) def notify_start(self, **kwargs): self._notify('start', self.required_start_traits(), diff --git a/trove/common/utils.py b/trove/common/utils.py index 79ce5ad9b7..402663c19e 100644 --- a/trove/common/utils.py +++ b/trove/common/utils.py @@ -331,3 +331,42 @@ def is_collection(item): """ return (isinstance(item, collections.Iterable) and not isinstance(item, (bytes, six.text_type))) + + +def format_output(message, format_len=79, truncate_len=None, replace_index=0): + """Recursive function to try and keep line lengths below a certain amount, + so they can be displayed nicely on the command-line or UI. + Tries replacement patterns one at a time (in round-robin fashion) + that insert \n at strategic spots. + """ + replacements = [['. ', '.\n'], [' (', '\n('], [': ', ':\n ']] + replace_index %= len(replacements) + if not isinstance(message, list): + message = message.splitlines(1) + msg_list = [] + for line in message: + if len(line) > format_len: + ok_to_split_again = False + for count in range(0, len(replacements)): + lines = line.replace( + replacements[replace_index][0], + replacements[replace_index][1], + 1 + ).splitlines(1) + replace_index = (replace_index + 1) % len(replacements) + if len(lines) > 1: + ok_to_split_again = True + break + for item in lines: + # If we spilt, but a line is still too long, do it again + if ok_to_split_again and len(item) > format_len: + item = format_output(item, format_len=format_len, + replace_index=replace_index) + msg_list.append(item) + else: + msg_list.append(line) + + msg_str = "".join(msg_list) + if truncate_len and len(msg_str) > truncate_len: + msg_str = msg_str[:truncate_len - 3] + '...' + return msg_str diff --git a/trove/conductor/api.py b/trove/conductor/api.py index 617ccb5e7f..d83aef5c73 100644 --- a/trove/conductor/api.py +++ b/trove/conductor/api.py @@ -90,6 +90,7 @@ class API(object): context = self.context serialized = SerializableNotification.serialize(context, context.notification) + serialized.update({'instance_id': CONF.guest_id}) cctxt.cast(self.context, "notify_exc_info", serialized_notification=serialized, message=message, exception=exception) diff --git a/trove/conductor/manager.py b/trove/conductor/manager.py index 3519eb25cc..1d0e7f8968 100644 --- a/trove/conductor/manager.py +++ b/trove/conductor/manager.py @@ -18,14 +18,14 @@ from oslo_service import periodic_task from trove.backup import models as bkup_models from trove.common import cfg -from trove.common import exception +from trove.common import exception as trove_exception from trove.common.i18n import _ from trove.common.instance import ServiceStatus from trove.common.rpc import version as rpc_version from trove.common.serializable_notification import SerializableNotification from trove.conductor.models import LastSeen from trove.extensions.mysql import models as mysql_models -from trove.instance import models as t_models +from trove.instance import models as inst_models LOG = logging.getLogger(__name__) CONF = cfg.CONF @@ -57,7 +57,7 @@ class Manager(periodic_task.PeriodicTasks): try: seen = LastSeen.load(instance_id=instance_id, method_name=method_name) - except exception.NotFound: + except trove_exception.NotFound: # This is fine. pass @@ -86,7 +86,7 @@ class Manager(periodic_task.PeriodicTasks): LOG.debug("Instance ID: %(instance)s, Payload: %(payload)s" % {"instance": str(instance_id), "payload": str(payload)}) - status = t_models.InstanceServiceStatus.find_by( + status = inst_models.InstanceServiceStatus.find_by( instance_id=instance_id) if self._message_too_old(instance_id, 'heartbeat', sent): return diff --git a/trove/db/sqlalchemy/mappers.py b/trove/db/sqlalchemy/mappers.py index 68063ba976..ff90e262fd 100644 --- a/trove/db/sqlalchemy/mappers.py +++ b/trove/db/sqlalchemy/mappers.py @@ -26,6 +26,8 @@ def map(engine, models): return orm.mapper(models['instance'], Table('instances', meta, autoload=True)) + orm.mapper(models['instance_faults'], + Table('instance_faults', meta, autoload=True)) orm.mapper(models['root_enabled_history'], Table('root_enabled_history', meta, autoload=True)) orm.mapper(models['datastore'], diff --git a/trove/db/sqlalchemy/migrate_repo/versions/038_instance_faults.py b/trove/db/sqlalchemy/migrate_repo/versions/038_instance_faults.py new file mode 100644 index 0000000000..49b4570cf0 --- /dev/null +++ b/trove/db/sqlalchemy/migrate_repo/versions/038_instance_faults.py @@ -0,0 +1,56 @@ +# Copyright 2016 Tesora, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# + +from sqlalchemy import ForeignKey +from sqlalchemy.schema import Column +from sqlalchemy.schema import MetaData + +from trove.db.sqlalchemy.migrate_repo.schema import Boolean +from trove.db.sqlalchemy.migrate_repo.schema import create_tables +from trove.db.sqlalchemy.migrate_repo.schema import DateTime +from trove.db.sqlalchemy.migrate_repo.schema import drop_tables +from trove.db.sqlalchemy.migrate_repo.schema import String +from trove.db.sqlalchemy.migrate_repo.schema import Table +from trove.db.sqlalchemy.migrate_repo.schema import Text + + +meta = MetaData() + +instance_faults = Table( + 'instance_faults', + meta, + Column('id', String(length=64), primary_key=True, nullable=False), + Column('instance_id', String(length=64), + ForeignKey('instances.id', ondelete="CASCADE", + onupdate="CASCADE"), nullable=False), + Column('message', String(length=255), nullable=False), + Column('details', Text(length=65535), nullable=False), + Column('created', DateTime(), nullable=False), + Column('updated', DateTime(), nullable=False), + Column('deleted', Boolean(), default=0, nullable=False), + Column('deleted_at', DateTime()), +) + + +def upgrade(migrate_engine): + meta.bind = migrate_engine + Table('instances', meta, autoload=True) + create_tables([instance_faults]) + + +def downgrade(migrate_engine): + meta.bind = migrate_engine + drop_tables([instance_faults]) diff --git a/trove/instance/models.py b/trove/instance/models.py index c4bcf9656d..383b2ce189 100644 --- a/trove/instance/models.py +++ b/trove/instance/models.py @@ -165,6 +165,8 @@ class SimpleInstance(object): self.db_info = db_info self.datastore_status = datastore_status self.root_pass = root_password + self._fault = None + self._fault_loaded = False if ds_version is None: self.ds_version = (datastore_models.DatastoreVersion. load_by_uuid(self.db_info.datastore_version_id)) @@ -375,6 +377,20 @@ class SimpleInstance(object): def root_password(self): return self.root_pass + @property + def fault(self): + # Fault can be non-existent, so we have a loaded flag + if not self._fault_loaded: + try: + self._fault = DBInstanceFault.find_by(instance_id=self.id) + # Get rid of the stack trace if we're not admin + if not self.context.is_admin: + self._fault.details = None + except exception.ModelNotFoundError: + pass + self._fault_loaded = True + return self._fault + @property def configuration(self): if self.db_info.configuration_id is not None: @@ -612,6 +628,7 @@ class BaseInstance(SimpleInstance): self.update_db(deleted=True, deleted_at=deleted_at, task_status=InstanceTasks.NONE) self.set_servicestatus_deleted() + self.set_instance_fault_deleted() # Delete associated security group if CONF.trove_security_groups_support: SecurityGroup.delete_for_instance(self.db_info.id, @@ -640,6 +657,15 @@ class BaseInstance(SimpleInstance): del_instance.set_status(tr_instance.ServiceStatuses.DELETED) del_instance.save() + def set_instance_fault_deleted(self): + try: + del_fault = DBInstanceFault.find_by(instance_id=self.id) + del_fault.deleted = True + del_fault.deleted_at = datetime.utcnow() + del_fault.save() + except exception.ModelNotFoundError: + pass + @property def volume_client(self): if not self._volume_client: @@ -1355,6 +1381,54 @@ class DBInstance(dbmodels.DatabaseModelBase): task_status = property(get_task_status, set_task_status) +def persist_instance_fault(notification, event_qualifier): + """This callback is registered to be fired whenever a + notification is sent out. + """ + if "error" == event_qualifier: + instance_id = notification.payload.get('instance_id') + message = notification.payload.get( + 'message', 'Missing notification message') + details = notification.payload.get('exception', []) + server_type = notification.server_type + if server_type: + details.insert(0, "Server type: %s\n" % server_type) + save_instance_fault(instance_id, message, details) + + +def save_instance_fault(instance_id, message, details): + if instance_id: + try: + # Make sure it's a valid id - sometimes the error is related + # to an invalid id and we can't save those + DBInstance.find_by(id=instance_id, deleted=False) + msg = utils.format_output(message, truncate_len=255) + det = utils.format_output(details) + try: + fault = DBInstanceFault.find_by(instance_id=instance_id) + fault.set_info(msg, det) + fault.save() + except exception.ModelNotFoundError: + DBInstanceFault.create( + instance_id=instance_id, + message=msg, details=det) + except exception.ModelNotFoundError: + # We don't need to save anything if the instance id isn't valid + pass + + +class DBInstanceFault(dbmodels.DatabaseModelBase): + _data_fields = ['instance_id', 'message', 'details', + 'created', 'updated', 'deleted', 'deleted_at'] + + def __init__(self, **kwargs): + super(DBInstanceFault, self).__init__(**kwargs) + + def set_info(self, message, details): + self.message = message + self.details = details + + class InstanceServiceStatus(dbmodels.DatabaseModelBase): _data_fields = ['instance_id', 'status_id', 'status_description', 'updated_at'] @@ -1400,6 +1474,7 @@ class InstanceServiceStatus(dbmodels.DatabaseModelBase): def persisted_models(): return { 'instance': DBInstance, + 'instance_faults': DBInstanceFault, 'service_statuses': InstanceServiceStatus, } diff --git a/trove/instance/views.py b/trove/instance/views.py index 5292311ab9..83f96d5878 100644 --- a/trove/instance/views.py +++ b/trove/instance/views.py @@ -92,6 +92,9 @@ class InstanceDetailView(InstanceView): result['instance']['datastore']['version'] = (self.instance. datastore_version.name) + if self.instance.fault: + result['instance']['fault'] = self._build_fault_info() + if self.instance.slaves: result['instance']['replicas'] = self._build_slaves_info() @@ -122,6 +125,13 @@ class InstanceDetailView(InstanceView): return result + def _build_fault_info(self): + return { + "message": self.instance.fault.message, + "created": self.instance.fault.updated, + "details": self.instance.fault.details, + } + def _build_slaves_info(self): data = [] for slave in self.instance.slaves: diff --git a/trove/taskmanager/models.py b/trove/taskmanager/models.py index 568a6fef27..dfb496af46 100644 --- a/trove/taskmanager/models.py +++ b/trove/taskmanager/models.py @@ -348,6 +348,8 @@ class FreshInstanceTasks(FreshInstance, NotifyMixin, ConfigurationMixin): # Make sure the service becomes active before sending a usage # record to avoid over billing a customer for an instance that # fails to build properly. + error_message = '' + error_details = '' try: utils.poll_until(self._service_is_active, sleep_time=USAGE_SLEEP_TIME, @@ -355,14 +357,22 @@ class FreshInstanceTasks(FreshInstance, NotifyMixin, ConfigurationMixin): LOG.info(_("Created instance %s successfully.") % self.id) TroveInstanceCreate(instance=self, instance_size=flavor['ram']).notify() - except PollTimeOut: + except PollTimeOut as ex: LOG.error(_("Failed to create instance %s. " "Timeout waiting for instance to become active. " "No usage create-event was sent.") % self.id) self.update_statuses_on_time_out() - except Exception: + error_message = "%s" % ex + error_details = traceback.format_exc() + except Exception as ex: LOG.exception(_("Failed to send usage create-event for " "instance %s.") % self.id) + error_message = "%s" % ex + error_details = traceback.format_exc() + finally: + if error_message: + inst_models.save_instance_fault( + self.id, error_message, error_details) def create_instance(self, flavor, image_id, databases, users, datastore_manager, packages, volume_size, @@ -621,10 +631,18 @@ class FreshInstanceTasks(FreshInstance, NotifyMixin, ConfigurationMixin): raise TroveError(_("Service not active, status: %s") % status) c_id = self.db_info.compute_instance_id - nova_status = self.nova_client.servers.get(c_id).status - if nova_status in [InstanceStatus.ERROR, - InstanceStatus.FAILED]: - raise TroveError(_("Server not active, status: %s") % nova_status) + server = self.nova_client.servers.get(c_id) + server_status = server.status + if server_status in [InstanceStatus.ERROR, + InstanceStatus.FAILED]: + server_message = '' + if server.fault: + server_message = "\nServer error: %s" % ( + server.fault.get('message', 'Unknown')) + raise TroveError(_("Server not active, status: %(status)s" + "%(srv_msg)s") % + {'status': server_status, + 'srv_msg': server_message}) return False def _create_server_volume(self, flavor_id, image_id, security_groups, @@ -844,7 +862,9 @@ class FreshInstanceTasks(FreshInstance, NotifyMixin, ConfigurationMixin): "exc": exc, "trace": traceback.format_exc()}) self.update_db(task_status=task_status) - raise TroveError(message=message) + exc_message = '\n%s' % exc if exc else '' + full_message = "%s%s" % (message, exc_message) + raise TroveError(message=full_message) def _create_volume(self, volume_size, volume_type, datastore_manager): LOG.debug("Begin _create_volume for id: %s" % self.id) diff --git a/trove/tests/api/instances.py b/trove/tests/api/instances.py index 494c05ed42..219c1ff3da 100644 --- a/trove/tests/api/instances.py +++ b/trove/tests/api/instances.py @@ -481,7 +481,7 @@ class CreateInstanceFail(object): 'hostname', 'id', 'name', 'datastore', 'server_state_description', 'status', 'updated', 'users', 'volume', 'root_enabled_at', - 'root_enabled_by'] + 'root_enabled_by', 'fault'] with CheckInstance(result._info) as check: check.contains_allowed_attrs( result._info, allowed_attrs, @@ -693,7 +693,7 @@ class CreateInstance(object): # Check these attrs only are returned in create response allowed_attrs = ['created', 'flavor', 'addresses', 'id', 'links', - 'name', 'status', 'updated', 'datastore'] + 'name', 'status', 'updated', 'datastore', 'fault'] if ROOT_ON_CREATE: allowed_attrs.append('password') if VOLUME_SUPPORT: @@ -1156,7 +1156,7 @@ class TestInstanceListing(object): def test_get_instance(self): allowed_attrs = ['created', 'databases', 'flavor', 'hostname', 'id', 'links', 'name', 'status', 'updated', 'ip', - 'datastore'] + 'datastore', 'fault'] if VOLUME_SUPPORT: allowed_attrs.append('volume') else: @@ -1244,7 +1244,7 @@ class TestInstanceListing(object): 'flavor', 'guest_status', 'host', 'hostname', 'id', 'name', 'root_enabled_at', 'root_enabled_by', 'server_state_description', 'status', 'datastore', - 'updated', 'users', 'volume'] + 'updated', 'users', 'volume', 'fault'] with CheckInstance(result._info) as check: check.contains_allowed_attrs( result._info, allowed_attrs, diff --git a/trove/tests/fakes/guestagent.py b/trove/tests/fakes/guestagent.py index 42996383b3..79e0a02d70 100644 --- a/trove/tests/fakes/guestagent.py +++ b/trove/tests/fakes/guestagent.py @@ -241,7 +241,7 @@ class FakeGuest(object): status.status = rd_instance.ServiceStatuses.RUNNING status.save() AgentHeartBeat.create(instance_id=self.id) - eventlet.spawn_after(3.0, update_db) + eventlet.spawn_after(3.5, update_db) def _set_task_status(self, new_status='RUNNING'): from trove.instance.models import InstanceServiceStatus diff --git a/trove/tests/scenario/groups/database_actions_group.py b/trove/tests/scenario/groups/database_actions_group.py index 660e42c4b1..1262d055ca 100644 --- a/trove/tests/scenario/groups/database_actions_group.py +++ b/trove/tests/scenario/groups/database_actions_group.py @@ -137,7 +137,7 @@ class DatabaseActionsInstCreateWaitGroup(TestGroup): @test def wait_for_instances(self): """Waiting for all instances to become active.""" - self.instance_create_runner.wait_for_created_instances() + self.instance_create_runner.run_wait_for_created_instances() @test(depends_on=[wait_for_instances]) def add_initialized_instance_data(self): @@ -180,4 +180,4 @@ class DatabaseActionsInstDeleteWaitGroup(TestGroup): @test def wait_for_delete_initialized_instance(self): """Wait for the initialized instance to delete.""" - self.instance_create_runner.run_wait_for_initialized_instance_delete() + self.instance_create_runner.run_wait_for_error_init_delete() diff --git a/trove/tests/scenario/groups/instance_create_group.py b/trove/tests/scenario/groups/instance_create_group.py index 0cd2140cf0..958a925180 100644 --- a/trove/tests/scenario/groups/instance_create_group.py +++ b/trove/tests/scenario/groups/instance_create_group.py @@ -55,6 +55,16 @@ class InstanceCreateGroup(TestGroup): """Create an instance with initial properties.""" self.test_runner.run_initialized_instance_create() + @test(runs_after=[create_initialized_instance]) + def create_error_instance(self): + """Create an instance in error state.""" + self.test_runner.run_create_error_instance() + + @test(runs_after=[create_error_instance]) + def create_error2_instance(self): + """Create another instance in error state.""" + self.test_runner.run_create_error2_instance() + @test(depends_on_groups=[groups.INST_CREATE], groups=[GROUP, groups.INST_CREATE_WAIT], @@ -67,9 +77,30 @@ class InstanceCreateWaitGroup(TestGroup): InstanceCreateRunnerFactory.instance()) @test + def wait_for_error_instances(self): + """Wait for the error instances to fail.""" + self.test_runner.run_wait_for_error_instances() + + @test(depends_on=[wait_for_error_instances]) + def validate_error_instance(self): + """Validate the error instance fault message.""" + self.test_runner.run_validate_error_instance() + + @test(depends_on=[wait_for_error_instances], + runs_after=[validate_error_instance]) + def validate_error2_instance(self): + """Validate the error2 instance fault message as admin.""" + self.test_runner.run_validate_error2_instance() + + @test(runs_after=[validate_error_instance, validate_error2_instance]) + def delete_error_instances(self): + """Delete the error instances.""" + self.test_runner.run_delete_error_instances() + + @test(runs_after=[delete_error_instances]) def wait_for_instances(self): """Waiting for all instances to become active.""" - self.test_runner.wait_for_created_instances() + self.test_runner.run_wait_for_created_instances() @test(depends_on=[wait_for_instances]) def add_initialized_instance_data(self): @@ -107,11 +138,11 @@ class InstanceInitDeleteWaitGroup(TestGroup): InstanceCreateRunnerFactory.instance()) @test - def wait_for_initialized_instance_delete(self): - """Wait for the initialized instance to be deleted.""" - self.test_runner.run_wait_for_initialized_instance_delete() + def wait_for_error_init_delete(self): + """Wait for the initialized and error instances to be gone.""" + self.test_runner.run_wait_for_error_init_delete() - @test(runs_after=[wait_for_initialized_instance_delete]) + @test(runs_after=[wait_for_error_init_delete]) def delete_initial_configuration(self): """Delete the initial configuration group.""" self.test_runner.run_initial_configuration_delete() diff --git a/trove/tests/scenario/groups/user_actions_group.py b/trove/tests/scenario/groups/user_actions_group.py index 686ac0d449..fb18caf6a4 100644 --- a/trove/tests/scenario/groups/user_actions_group.py +++ b/trove/tests/scenario/groups/user_actions_group.py @@ -225,7 +225,7 @@ class UserActionsInstCreateWaitGroup(TestGroup): @test def wait_for_instances(self): """Waiting for all instances to become active.""" - self.instance_create_runner.wait_for_created_instances() + self.instance_create_runner.run_wait_for_created_instances() @test(depends_on=[wait_for_instances]) def validate_initialized_instance(self): @@ -264,4 +264,4 @@ class UserActionsInstDeleteWaitGroup(TestGroup): @test def wait_for_delete_initialized_instance(self): """Wait for the initialized instance to delete.""" - self.instance_create_runner.run_wait_for_initialized_instance_delete() + self.instance_create_runner.run_wait_for_error_init_delete() diff --git a/trove/tests/scenario/runners/instance_create_runners.py b/trove/tests/scenario/runners/instance_create_runners.py index 7490800bdf..832f675fe0 100644 --- a/trove/tests/scenario/runners/instance_create_runners.py +++ b/trove/tests/scenario/runners/instance_create_runners.py @@ -28,6 +28,8 @@ class InstanceCreateRunner(TestRunner): def __init__(self): super(InstanceCreateRunner, self).__init__() + self.error_inst_id = None + self.error2_inst_id = None self.init_inst_id = None self.init_inst_dbs = None self.init_inst_users = None @@ -40,10 +42,10 @@ class InstanceCreateRunner(TestRunner): self, expected_states=['BUILD', 'ACTIVE'], expected_http_code=200): name = self.instance_info.name flavor = self._get_instance_flavor() - trove_volume_size = CONFIG.get('trove_volume_size', 1) + volume_size = self.instance_info.volume_size instance_info = self.assert_instance_create( - name, flavor, trove_volume_size, [], [], None, None, + name, flavor, volume_size, [], [], None, None, CONFIG.dbaas_datastore, CONFIG.dbaas_datastore_version, expected_states, expected_http_code, create_helper_user=True, locality='affinity') @@ -92,7 +94,7 @@ class InstanceCreateRunner(TestRunner): configuration_id = configuration_id or self.config_group_id name = self.instance_info.name + name_suffix flavor = self._get_instance_flavor() - trove_volume_size = CONFIG.get('trove_volume_size', 1) + volume_size = self.instance_info.volume_size self.init_inst_dbs = (self.test_helper.get_valid_database_definitions() if with_dbs else []) self.init_inst_users = (self.test_helper.get_valid_user_definitions() @@ -100,7 +102,7 @@ class InstanceCreateRunner(TestRunner): self.init_inst_config_group_id = configuration_id if (self.init_inst_dbs or self.init_inst_users or configuration_id): info = self.assert_instance_create( - name, flavor, trove_volume_size, + name, flavor, volume_size, self.init_inst_dbs, self.init_inst_users, configuration_id, None, CONFIG.dbaas_datastore, CONFIG.dbaas_datastore_version, @@ -113,12 +115,19 @@ class InstanceCreateRunner(TestRunner): # the empty instance test. raise SkipTest("No testable initial properties provided.") - def _get_instance_flavor(self): + def _get_instance_flavor(self, fault_num=None): + name_format = 'instance%s%s_flavor_name' + default = 'm1.tiny' + fault_str = '' + eph_str = '' + if fault_num: + fault_str = '_fault_%d' % fault_num if self.EPHEMERAL_SUPPORT: - flavor_name = CONFIG.values.get('instance_eph_flavor_name', - 'eph.rd-tiny') - else: - flavor_name = CONFIG.values.get('instance_flavor_name', 'm1.tiny') + eph_str = '_eph' + default = 'eph.rd-tiny' + + name = name_format % (fault_str, eph_str) + flavor_name = CONFIG.values.get(name, default) return self.get_flavor(flavor_name) @@ -238,7 +247,86 @@ class InstanceCreateRunner(TestRunner): return instance_info - def wait_for_created_instances(self, expected_states=['BUILD', 'ACTIVE']): + def run_create_error_instance( + self, expected_states=['BUILD', 'ERROR'], expected_http_code=200): + if self.is_using_existing_instance: + raise SkipTest("Using an existing instance.") + + name = self.instance_info.name + '_error' + flavor = self._get_instance_flavor(fault_num=1) + volume_size = self.instance_info.volume_size + + inst = self.assert_instance_create( + name, flavor, volume_size, [], [], None, None, + CONFIG.dbaas_datastore, CONFIG.dbaas_datastore_version, + expected_states, expected_http_code, create_helper_user=False) + self.assert_client_code(expected_http_code) + self.error_inst_id = inst.id + + def run_create_error2_instance( + self, expected_states=['BUILD', 'ERROR'], expected_http_code=200): + if self.is_using_existing_instance: + raise SkipTest("Using an existing instance.") + + name = self.instance_info.name + '_error2' + flavor = self._get_instance_flavor(fault_num=2) + volume_size = self.instance_info.volume_size + + inst = self.assert_instance_create( + name, flavor, volume_size, [], [], None, None, + CONFIG.dbaas_datastore, CONFIG.dbaas_datastore_version, + expected_states, expected_http_code, create_helper_user=False) + self.assert_client_code(expected_http_code) + self.error2_inst_id = inst.id + + def run_wait_for_error_instances(self, expected_states=['ERROR']): + error_ids = [] + if self.error_inst_id: + error_ids.append(self.error_inst_id) + if self.error2_inst_id: + error_ids.append(self.error2_inst_id) + + if error_ids: + self.assert_all_instance_states( + error_ids, expected_states, fast_fail_status=[]) + + def run_validate_error_instance(self): + if not self.error_inst_id: + raise SkipTest("No error instance created.") + + instance = self.get_instance(self.error_inst_id) + with CheckInstance(instance._info) as check: + check.fault() + + err_msg = "disk is too small for requested image" + self.assert_true(err_msg in instance.fault['message'], + "Message '%s' does not contain '%s'" % + (instance.fault['message'], err_msg)) + + def run_validate_error2_instance(self): + if not self.error2_inst_id: + raise SkipTest("No error2 instance created.") + + instance = self.get_instance( + self.error2_inst_id, client=self.admin_client) + with CheckInstance(instance._info) as check: + check.fault(is_admin=True) + + err_msg = "Quota exceeded for ram" + self.assert_true(err_msg in instance.fault['message'], + "Message '%s' does not contain '%s'" % + (instance.fault['message'], err_msg)) + + def run_delete_error_instances(self, expected_http_code=202): + if self.error_inst_id: + self.auth_client.instances.delete(self.error_inst_id) + self.assert_client_code(expected_http_code) + if self.error2_inst_id: + self.auth_client.instances.delete(self.error2_inst_id) + self.assert_client_code(expected_http_code) + + def run_wait_for_created_instances( + self, expected_states=['BUILD', 'ACTIVE']): instances = [self.instance_info.id] if self.init_inst_id: instances.append(self.init_inst_id) @@ -324,10 +412,16 @@ class InstanceCreateRunner(TestRunner): else: raise SkipTest("Cleanup is not required.") - def run_wait_for_initialized_instance_delete(self, - expected_states=['SHUTDOWN']): + def run_wait_for_error_init_delete(self, expected_states=['SHUTDOWN']): + delete_ids = [] + if self.error_inst_id: + delete_ids.append(self.error_inst_id) + if self.error2_inst_id: + delete_ids.append(self.error2_inst_id) if self.init_inst_id: - self.assert_all_gone(self.init_inst_id, expected_states[-1]) + delete_ids.append(self.init_inst_id) + if delete_ids: + self.assert_all_gone(delete_ids, expected_states[-1]) else: raise SkipTest("Cleanup is not required.") self.init_inst_id = None diff --git a/trove/tests/scenario/runners/test_runners.py b/trove/tests/scenario/runners/test_runners.py index 9e2d282ac6..6a4718895c 100644 --- a/trove/tests/scenario/runners/test_runners.py +++ b/trove/tests/scenario/runners/test_runners.py @@ -153,10 +153,12 @@ class InstanceTestInfo(object): self.dbaas_flavor_href = None # The flavor of the instance. self.dbaas_datastore = None # The datastore id self.dbaas_datastore_version = None # The datastore version id + self.volume_size = None # The size of volume the instance will have. self.volume = None # The volume the instance will have. self.nics = None # The dict of type/id for nics used on the intance. self.user = None # The user instance who owns the instance. self.users = None # The users created on the instance. + self.databases = None # The databases created on the instance. class TestRunner(object): @@ -207,9 +209,11 @@ class TestRunner(object): CONFIG.dbaas_datastore_version) self.instance_info.user = CONFIG.users.find_user_by_name('alt_demo') if self.VOLUME_SUPPORT: + self.instance_info.volume_size = CONFIG.get('trove_volume_size', 1) self.instance_info.volume = { - 'size': CONFIG.get('trove_volume_size', 1)} + 'size': self.instance_info.volume_size} else: + self.instance_info.volume_size = None self.instance_info.volume = None self._auth_client = None @@ -418,13 +422,17 @@ class TestRunner(object): self.assert_equal(expected_http_code, client.last_http_code, "Unexpected client status code") - def assert_all_instance_states(self, instance_ids, expected_states): + def assert_all_instance_states(self, instance_ids, expected_states, + fast_fail_status=None, + require_all_states=False): self.report.log("Waiting for states (%s) for instances: %s" % (expected_states, instance_ids)) def _make_fn(inst_id): return lambda: self._assert_instance_states( - inst_id, expected_states) + inst_id, expected_states, + fast_fail_status=fast_fail_status, + require_all_states=require_all_states) tasks = [build_polling_task(_make_fn(instance_id), sleep_time=self.def_sleep_time, time_out=self.def_timeout) @@ -441,7 +449,7 @@ class TestRunner(object): self.fail(str(task.poll_exception())) def _assert_instance_states(self, instance_id, expected_states, - fast_fail_status=['ERROR', 'FAILED'], + fast_fail_status=None, require_all_states=False): """Keep polling for the expected instance states until the instance acquires either the last or fast-fail state. @@ -454,6 +462,9 @@ class TestRunner(object): self.report.log("Waiting for states (%s) for instance: %s" % (expected_states, instance_id)) + + if fast_fail_status is None: + fast_fail_status = ['ERROR', 'FAILED'] found = False for status in expected_states: if require_all_states or found or self._has_status( @@ -595,8 +606,9 @@ class TestRunner(object): if server_group: self.fail("Found left-over server group: %s" % server_group) - def get_instance(self, instance_id): - return self.auth_client.instances.get(instance_id) + def get_instance(self, instance_id, client=None): + client = client or self.auth_client + return client.instances.get(instance_id) def get_instance_host(self, instance_id=None): instance_id = instance_id or self.instance_info.id @@ -782,3 +794,16 @@ class CheckInstance(AttrCheck): slave, allowed_attrs, msg="Replica links not found") self.links(slave['links']) + + def fault(self, is_admin=False): + if 'fault' not in self.instance: + self.fail("'fault' not found in instance.") + else: + allowed_attrs = ['message', 'created', 'details'] + self.contains_allowed_attrs( + self.instance['fault'], allowed_attrs, + msg="Fault") + if is_admin and not self.instance['fault']['details']: + self.fail("Missing fault details") + if not is_admin and self.instance['fault']['details']: + self.fail("Fault details provided for non-admin") diff --git a/trove/tests/unittests/common/test_notification.py b/trove/tests/unittests/common/test_notification.py index cf01024b3f..cdbc09cc57 100644 --- a/trove/tests/unittests/common/test_notification.py +++ b/trove/tests/unittests/common/test_notification.py @@ -383,3 +383,29 @@ class TestDBaaSNotification(trove_testtools.TestCase): a, _ = notifier().info.call_args payload = a[2] self.assertTrue('instance_id' in payload) + + def _test_notify_callback(self, fn, *args, **kwargs): + with patch.object(rpc, 'get_notifier') as notifier: + mock_callback = Mock() + self.test_n.register_notify_callback(mock_callback) + mock_context = Mock() + mock_context.notification = Mock() + self.test_n.context = mock_context + fn(*args, **kwargs) + self.assertTrue(notifier().info.called) + self.assertTrue(mock_callback.called) + self.test_n.register_notify_callback(None) + + def test_notify_callback(self): + required_keys = { + 'datastore': 'ds', + 'name': 'name', + 'flavor_id': 'flav_id', + 'instance_id': 'inst_id', + } + self._test_notify_callback(self.test_n.notify_start, + **required_keys) + self._test_notify_callback(self.test_n.notify_end, + **required_keys) + self._test_notify_callback(self.test_n.notify_exc_info, + 'error', 'exc') diff --git a/trove/tests/unittests/common/test_utils.py b/trove/tests/unittests/common/test_utils.py index aaf83bdfe8..4b3de75b07 100644 --- a/trove/tests/unittests/common/test_utils.py +++ b/trove/tests/unittests/common/test_utils.py @@ -22,15 +22,15 @@ from trove.common import utils from trove.tests.unittests import trove_testtools -class TestTroveExecuteWithTimeout(trove_testtools.TestCase): +class TestUtils(trove_testtools.TestCase): def setUp(self): - super(TestTroveExecuteWithTimeout, self).setUp() + super(TestUtils, self).setUp() self.orig_utils_execute = utils.execute self.orig_utils_log_error = utils.LOG.error def tearDown(self): - super(TestTroveExecuteWithTimeout, self).tearDown() + super(TestUtils, self).tearDown() utils.execute = self.orig_utils_execute utils.LOG.error = self.orig_utils_log_error @@ -81,3 +81,21 @@ class TestTroveExecuteWithTimeout(trove_testtools.TestCase): def test_pagination_limit(self): self.assertEqual(5, utils.pagination_limit(5, 9)) self.assertEqual(5, utils.pagination_limit(9, 5)) + + def test_format_output(self): + data = [ + ['', ''], + ['Single line', 'Single line'], + ['Long line no breaks ' * 10, 'Long line no breaks ' * 10], + ['Long line. Has breaks ' * 5, + 'Long line.\nHas breaks ' * 2 + 'Long line. Has breaks ' * 3], + ['Long line with semi: ' * 4, + 'Long line with semi:\n ' + + 'Long line with semi: ' * 3], + ['Long line with brack (' * 4, + 'Long line with brack\n(' + + 'Long line with brack (' * 3], + ] + for index, datum in enumerate(data): + self.assertEqual(datum[1], utils.format_output(datum[0]), + "Error formatting line %d of data" % index) diff --git a/trove/tests/unittests/instance/test_instance_models.py b/trove/tests/unittests/instance/test_instance_models.py index af31025308..f39b9e8dfe 100644 --- a/trove/tests/unittests/instance/test_instance_models.py +++ b/trove/tests/unittests/instance/test_instance_models.py @@ -22,6 +22,7 @@ from trove.common.instance import ServiceStatuses from trove.datastore import models as datastore_models from trove.instance import models from trove.instance.models import DBInstance +from trove.instance.models import DBInstanceFault from trove.instance.models import filter_ips from trove.instance.models import Instance from trove.instance.models import InstanceServiceStatus @@ -39,12 +40,14 @@ class SimpleInstanceTest(trove_testtools.TestCase): def setUp(self): super(SimpleInstanceTest, self).setUp() + self.context = trove_testtools.TroveTestContext(self, is_admin=True) db_info = DBInstance( InstanceTasks.BUILDING, name="TestInstance") self.instance = SimpleInstance( None, db_info, InstanceServiceStatus( ServiceStatuses.BUILDING), ds_version=Mock(), ds=Mock(), locality='affinity') + self.instance.context = self.context db_info.addresses = {"private": [{"addr": "123.123.123.123"}], "internal": [{"addr": "10.123.123.123"}], "public": [{"addr": "15.123.123.123"}]} @@ -106,6 +109,21 @@ class SimpleInstanceTest(trove_testtools.TestCase): def test_locality(self): self.assertEqual('affinity', self.instance.locality) + def test_fault(self): + fault_message = 'Error' + fault_details = 'details' + fault_date = 'now' + temp_fault = Mock() + temp_fault.message = fault_message + temp_fault.details = fault_details + temp_fault.updated = fault_date + fault_mock = Mock(return_value=temp_fault) + with patch.object(DBInstanceFault, 'find_by', fault_mock): + fault = self.instance.fault + self.assertEqual(fault_message, fault.message) + self.assertEqual(fault_details, fault.details) + self.assertEqual(fault_date, fault.updated) + class CreateInstanceTest(trove_testtools.TestCase): diff --git a/trove/tests/unittests/instance/test_instance_views.py b/trove/tests/unittests/instance/test_instance_views.py index 948185ec50..e8458c42f0 100644 --- a/trove/tests/unittests/instance/test_instance_views.py +++ b/trove/tests/unittests/instance/test_instance_views.py @@ -63,6 +63,13 @@ class InstanceDetailViewTest(trove_testtools.TestCase): self.instance.slave_of_id = None self.instance.slaves = [] self.instance.locality = 'affinity' + self.fault_message = 'Error' + self.fault_details = 'details' + self.fault_date = 'now' + self.instance.fault = Mock() + self.instance.fault.message = self.fault_message + self.instance.fault.details = self.fault_details + self.instance.fault.updated = self.fault_date def tearDown(self): super(InstanceDetailViewTest, self).tearDown() @@ -98,3 +105,13 @@ class InstanceDetailViewTest(trove_testtools.TestCase): result = view.data() self.assertEqual(self.instance.locality, result['instance']['locality']) + + def test_fault(self): + view = InstanceDetailView(self.instance, Mock()) + result = view.data() + self.assertEqual(self.fault_message, + result['instance']['fault']['message']) + self.assertEqual(self.fault_details, + result['instance']['fault']['details']) + self.assertEqual(self.fault_date, + result['instance']['fault']['created'])