diff --git a/releasenotes/notes/persist-error-message-fb69ddf885bcde84.yaml b/releasenotes/notes/persist-error-message-fb69ddf885bcde84.yaml new file mode 100644 index 0000000000..3510c085b6 --- /dev/null +++ b/releasenotes/notes/persist-error-message-fb69ddf885bcde84.yaml @@ -0,0 +1,5 @@ +--- +features: + - Errors that occur in Trove are now persisted in + the database and are returned in the standard + 'show' command. diff --git a/trove/cmd/api.py b/trove/cmd/api.py index 60f78c3e58..0c0756b65e 100644 --- a/trove/cmd/api.py +++ b/trove/cmd/api.py @@ -20,8 +20,12 @@ from trove.common import profile @with_initialize def main(CONF): from trove.common import cfg + from trove.common import notification from trove.common import wsgi + from trove.instance import models as inst_models + notification.DBaaSAPINotification.register_notify_callback( + inst_models.persist_instance_fault) cfg.set_api_config_defaults() profile.setup_profiler('api', CONF.host) conf_file = CONF.find_file(CONF.api_paste_config) diff --git a/trove/cmd/conductor.py b/trove/cmd/conductor.py index 62fd76894b..66499190fb 100644 --- a/trove/cmd/conductor.py +++ b/trove/cmd/conductor.py @@ -20,9 +20,13 @@ from trove.cmd.common import with_initialize @with_initialize def main(conf): + from trove.common import notification from trove.common.rpc import service as rpc_service from trove.common.rpc import version as rpc_version + from trove.instance import models as inst_models + notification.DBaaSAPINotification.register_notify_callback( + inst_models.persist_instance_fault) topic = conf.conductor_queue server = rpc_service.RpcService( manager=conf.conductor_manager, topic=topic, diff --git a/trove/cmd/taskmanager.py b/trove/cmd/taskmanager.py index 3aa4ef97d3..58f7ed125a 100644 --- a/trove/cmd/taskmanager.py +++ b/trove/cmd/taskmanager.py @@ -22,9 +22,13 @@ extra_opts = [openstack_cfg.StrOpt('taskmanager_manager')] def startup(conf, topic): + from trove.common import notification from trove.common.rpc import service as rpc_service from trove.common.rpc import version as rpc_version + from trove.instance import models as inst_models + notification.DBaaSAPINotification.register_notify_callback( + inst_models.persist_instance_fault) server = rpc_service.RpcService( manager=conf.taskmanager_manager, topic=topic, rpc_api_version=rpc_version.RPC_API_VERSION) diff --git a/trove/common/notification.py b/trove/common/notification.py index 0760bf1ba5..ee702adc3e 100644 --- a/trove/common/notification.py +++ b/trove/common/notification.py @@ -295,6 +295,15 @@ class DBaaSAPINotification(object): ''' event_type_format = 'dbaas.%s.%s' + notify_callback = None + + @classmethod + def register_notify_callback(cls, callback): + """A callback registered here will be fired whenever + a notification is sent out. The callback should + take a notification object, and event_qualifier. + """ + cls.notify_callback = callback @abc.abstractmethod def event_type(self): @@ -324,7 +333,7 @@ class DBaaSAPINotification(object): def optional_error_traits(self): 'Returns list of optional traits for error notification' - return [] + return ['instance_id'] def required_base_traits(self): return ['tenant_id', 'client_ip', 'server_ip', 'server_type', @@ -395,6 +404,8 @@ class DBaaSAPINotification(object): del context.notification notifier = rpc.get_notifier(service=self.payload['server_type']) notifier.info(context, qualified_event_type, self.payload) + if self.notify_callback: + self.notify_callback(event_qualifier) def notify_start(self, **kwargs): self._notify('start', self.required_start_traits(), diff --git a/trove/common/utils.py b/trove/common/utils.py index 79ce5ad9b7..402663c19e 100644 --- a/trove/common/utils.py +++ b/trove/common/utils.py @@ -331,3 +331,42 @@ def is_collection(item): """ return (isinstance(item, collections.Iterable) and not isinstance(item, (bytes, six.text_type))) + + +def format_output(message, format_len=79, truncate_len=None, replace_index=0): + """Recursive function to try and keep line lengths below a certain amount, + so they can be displayed nicely on the command-line or UI. + Tries replacement patterns one at a time (in round-robin fashion) + that insert \n at strategic spots. + """ + replacements = [['. ', '.\n'], [' (', '\n('], [': ', ':\n ']] + replace_index %= len(replacements) + if not isinstance(message, list): + message = message.splitlines(1) + msg_list = [] + for line in message: + if len(line) > format_len: + ok_to_split_again = False + for count in range(0, len(replacements)): + lines = line.replace( + replacements[replace_index][0], + replacements[replace_index][1], + 1 + ).splitlines(1) + replace_index = (replace_index + 1) % len(replacements) + if len(lines) > 1: + ok_to_split_again = True + break + for item in lines: + # If we spilt, but a line is still too long, do it again + if ok_to_split_again and len(item) > format_len: + item = format_output(item, format_len=format_len, + replace_index=replace_index) + msg_list.append(item) + else: + msg_list.append(line) + + msg_str = "".join(msg_list) + if truncate_len and len(msg_str) > truncate_len: + msg_str = msg_str[:truncate_len - 3] + '...' + return msg_str diff --git a/trove/conductor/api.py b/trove/conductor/api.py index 617ccb5e7f..d83aef5c73 100644 --- a/trove/conductor/api.py +++ b/trove/conductor/api.py @@ -90,6 +90,7 @@ class API(object): context = self.context serialized = SerializableNotification.serialize(context, context.notification) + serialized.update({'instance_id': CONF.guest_id}) cctxt.cast(self.context, "notify_exc_info", serialized_notification=serialized, message=message, exception=exception) diff --git a/trove/conductor/manager.py b/trove/conductor/manager.py index 3519eb25cc..1d0e7f8968 100644 --- a/trove/conductor/manager.py +++ b/trove/conductor/manager.py @@ -18,14 +18,14 @@ from oslo_service import periodic_task from trove.backup import models as bkup_models from trove.common import cfg -from trove.common import exception +from trove.common import exception as trove_exception from trove.common.i18n import _ from trove.common.instance import ServiceStatus from trove.common.rpc import version as rpc_version from trove.common.serializable_notification import SerializableNotification from trove.conductor.models import LastSeen from trove.extensions.mysql import models as mysql_models -from trove.instance import models as t_models +from trove.instance import models as inst_models LOG = logging.getLogger(__name__) CONF = cfg.CONF @@ -57,7 +57,7 @@ class Manager(periodic_task.PeriodicTasks): try: seen = LastSeen.load(instance_id=instance_id, method_name=method_name) - except exception.NotFound: + except trove_exception.NotFound: # This is fine. pass @@ -86,7 +86,7 @@ class Manager(periodic_task.PeriodicTasks): LOG.debug("Instance ID: %(instance)s, Payload: %(payload)s" % {"instance": str(instance_id), "payload": str(payload)}) - status = t_models.InstanceServiceStatus.find_by( + status = inst_models.InstanceServiceStatus.find_by( instance_id=instance_id) if self._message_too_old(instance_id, 'heartbeat', sent): return diff --git a/trove/db/sqlalchemy/mappers.py b/trove/db/sqlalchemy/mappers.py index 68063ba976..ff90e262fd 100644 --- a/trove/db/sqlalchemy/mappers.py +++ b/trove/db/sqlalchemy/mappers.py @@ -26,6 +26,8 @@ def map(engine, models): return orm.mapper(models['instance'], Table('instances', meta, autoload=True)) + orm.mapper(models['instance_faults'], + Table('instance_faults', meta, autoload=True)) orm.mapper(models['root_enabled_history'], Table('root_enabled_history', meta, autoload=True)) orm.mapper(models['datastore'], diff --git a/trove/db/sqlalchemy/migrate_repo/versions/038_instance_faults.py b/trove/db/sqlalchemy/migrate_repo/versions/038_instance_faults.py new file mode 100644 index 0000000000..49b4570cf0 --- /dev/null +++ b/trove/db/sqlalchemy/migrate_repo/versions/038_instance_faults.py @@ -0,0 +1,56 @@ +# Copyright 2016 Tesora, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# + +from sqlalchemy import ForeignKey +from sqlalchemy.schema import Column +from sqlalchemy.schema import MetaData + +from trove.db.sqlalchemy.migrate_repo.schema import Boolean +from trove.db.sqlalchemy.migrate_repo.schema import create_tables +from trove.db.sqlalchemy.migrate_repo.schema import DateTime +from trove.db.sqlalchemy.migrate_repo.schema import drop_tables +from trove.db.sqlalchemy.migrate_repo.schema import String +from trove.db.sqlalchemy.migrate_repo.schema import Table +from trove.db.sqlalchemy.migrate_repo.schema import Text + + +meta = MetaData() + +instance_faults = Table( + 'instance_faults', + meta, + Column('id', String(length=64), primary_key=True, nullable=False), + Column('instance_id', String(length=64), + ForeignKey('instances.id', ondelete="CASCADE", + onupdate="CASCADE"), nullable=False), + Column('message', String(length=255), nullable=False), + Column('details', Text(length=65535), nullable=False), + Column('created', DateTime(), nullable=False), + Column('updated', DateTime(), nullable=False), + Column('deleted', Boolean(), default=0, nullable=False), + Column('deleted_at', DateTime()), +) + + +def upgrade(migrate_engine): + meta.bind = migrate_engine + Table('instances', meta, autoload=True) + create_tables([instance_faults]) + + +def downgrade(migrate_engine): + meta.bind = migrate_engine + drop_tables([instance_faults]) diff --git a/trove/instance/models.py b/trove/instance/models.py index c4bcf9656d..383b2ce189 100644 --- a/trove/instance/models.py +++ b/trove/instance/models.py @@ -165,6 +165,8 @@ class SimpleInstance(object): self.db_info = db_info self.datastore_status = datastore_status self.root_pass = root_password + self._fault = None + self._fault_loaded = False if ds_version is None: self.ds_version = (datastore_models.DatastoreVersion. load_by_uuid(self.db_info.datastore_version_id)) @@ -375,6 +377,20 @@ class SimpleInstance(object): def root_password(self): return self.root_pass + @property + def fault(self): + # Fault can be non-existent, so we have a loaded flag + if not self._fault_loaded: + try: + self._fault = DBInstanceFault.find_by(instance_id=self.id) + # Get rid of the stack trace if we're not admin + if not self.context.is_admin: + self._fault.details = None + except exception.ModelNotFoundError: + pass + self._fault_loaded = True + return self._fault + @property def configuration(self): if self.db_info.configuration_id is not None: @@ -612,6 +628,7 @@ class BaseInstance(SimpleInstance): self.update_db(deleted=True, deleted_at=deleted_at, task_status=InstanceTasks.NONE) self.set_servicestatus_deleted() + self.set_instance_fault_deleted() # Delete associated security group if CONF.trove_security_groups_support: SecurityGroup.delete_for_instance(self.db_info.id, @@ -640,6 +657,15 @@ class BaseInstance(SimpleInstance): del_instance.set_status(tr_instance.ServiceStatuses.DELETED) del_instance.save() + def set_instance_fault_deleted(self): + try: + del_fault = DBInstanceFault.find_by(instance_id=self.id) + del_fault.deleted = True + del_fault.deleted_at = datetime.utcnow() + del_fault.save() + except exception.ModelNotFoundError: + pass + @property def volume_client(self): if not self._volume_client: @@ -1355,6 +1381,54 @@ class DBInstance(dbmodels.DatabaseModelBase): task_status = property(get_task_status, set_task_status) +def persist_instance_fault(notification, event_qualifier): + """This callback is registered to be fired whenever a + notification is sent out. + """ + if "error" == event_qualifier: + instance_id = notification.payload.get('instance_id') + message = notification.payload.get( + 'message', 'Missing notification message') + details = notification.payload.get('exception', []) + server_type = notification.server_type + if server_type: + details.insert(0, "Server type: %s\n" % server_type) + save_instance_fault(instance_id, message, details) + + +def save_instance_fault(instance_id, message, details): + if instance_id: + try: + # Make sure it's a valid id - sometimes the error is related + # to an invalid id and we can't save those + DBInstance.find_by(id=instance_id, deleted=False) + msg = utils.format_output(message, truncate_len=255) + det = utils.format_output(details) + try: + fault = DBInstanceFault.find_by(instance_id=instance_id) + fault.set_info(msg, det) + fault.save() + except exception.ModelNotFoundError: + DBInstanceFault.create( + instance_id=instance_id, + message=msg, details=det) + except exception.ModelNotFoundError: + # We don't need to save anything if the instance id isn't valid + pass + + +class DBInstanceFault(dbmodels.DatabaseModelBase): + _data_fields = ['instance_id', 'message', 'details', + 'created', 'updated', 'deleted', 'deleted_at'] + + def __init__(self, **kwargs): + super(DBInstanceFault, self).__init__(**kwargs) + + def set_info(self, message, details): + self.message = message + self.details = details + + class InstanceServiceStatus(dbmodels.DatabaseModelBase): _data_fields = ['instance_id', 'status_id', 'status_description', 'updated_at'] @@ -1400,6 +1474,7 @@ class InstanceServiceStatus(dbmodels.DatabaseModelBase): def persisted_models(): return { 'instance': DBInstance, + 'instance_faults': DBInstanceFault, 'service_statuses': InstanceServiceStatus, } diff --git a/trove/instance/views.py b/trove/instance/views.py index 5292311ab9..83f96d5878 100644 --- a/trove/instance/views.py +++ b/trove/instance/views.py @@ -92,6 +92,9 @@ class InstanceDetailView(InstanceView): result['instance']['datastore']['version'] = (self.instance. datastore_version.name) + if self.instance.fault: + result['instance']['fault'] = self._build_fault_info() + if self.instance.slaves: result['instance']['replicas'] = self._build_slaves_info() @@ -122,6 +125,13 @@ class InstanceDetailView(InstanceView): return result + def _build_fault_info(self): + return { + "message": self.instance.fault.message, + "created": self.instance.fault.updated, + "details": self.instance.fault.details, + } + def _build_slaves_info(self): data = [] for slave in self.instance.slaves: diff --git a/trove/taskmanager/models.py b/trove/taskmanager/models.py index 568a6fef27..dfb496af46 100644 --- a/trove/taskmanager/models.py +++ b/trove/taskmanager/models.py @@ -348,6 +348,8 @@ class FreshInstanceTasks(FreshInstance, NotifyMixin, ConfigurationMixin): # Make sure the service becomes active before sending a usage # record to avoid over billing a customer for an instance that # fails to build properly. + error_message = '' + error_details = '' try: utils.poll_until(self._service_is_active, sleep_time=USAGE_SLEEP_TIME, @@ -355,14 +357,22 @@ class FreshInstanceTasks(FreshInstance, NotifyMixin, ConfigurationMixin): LOG.info(_("Created instance %s successfully.") % self.id) TroveInstanceCreate(instance=self, instance_size=flavor['ram']).notify() - except PollTimeOut: + except PollTimeOut as ex: LOG.error(_("Failed to create instance %s. " "Timeout waiting for instance to become active. " "No usage create-event was sent.") % self.id) self.update_statuses_on_time_out() - except Exception: + error_message = "%s" % ex + error_details = traceback.format_exc() + except Exception as ex: LOG.exception(_("Failed to send usage create-event for " "instance %s.") % self.id) + error_message = "%s" % ex + error_details = traceback.format_exc() + finally: + if error_message: + inst_models.save_instance_fault( + self.id, error_message, error_details) def create_instance(self, flavor, image_id, databases, users, datastore_manager, packages, volume_size, @@ -621,10 +631,18 @@ class FreshInstanceTasks(FreshInstance, NotifyMixin, ConfigurationMixin): raise TroveError(_("Service not active, status: %s") % status) c_id = self.db_info.compute_instance_id - nova_status = self.nova_client.servers.get(c_id).status - if nova_status in [InstanceStatus.ERROR, - InstanceStatus.FAILED]: - raise TroveError(_("Server not active, status: %s") % nova_status) + server = self.nova_client.servers.get(c_id) + server_status = server.status + if server_status in [InstanceStatus.ERROR, + InstanceStatus.FAILED]: + server_message = '' + if server.fault: + server_message = "\nServer error: %s" % ( + server.fault.get('message', 'Unknown')) + raise TroveError(_("Server not active, status: %(status)s" + "%(srv_msg)s") % + {'status': server_status, + 'srv_msg': server_message}) return False def _create_server_volume(self, flavor_id, image_id, security_groups, @@ -844,7 +862,9 @@ class FreshInstanceTasks(FreshInstance, NotifyMixin, ConfigurationMixin): "exc": exc, "trace": traceback.format_exc()}) self.update_db(task_status=task_status) - raise TroveError(message=message) + exc_message = '\n%s' % exc if exc else '' + full_message = "%s%s" % (message, exc_message) + raise TroveError(message=full_message) def _create_volume(self, volume_size, volume_type, datastore_manager): LOG.debug("Begin _create_volume for id: %s" % self.id) diff --git a/trove/tests/api/instances.py b/trove/tests/api/instances.py index 494c05ed42..219c1ff3da 100644 --- a/trove/tests/api/instances.py +++ b/trove/tests/api/instances.py @@ -481,7 +481,7 @@ class CreateInstanceFail(object): 'hostname', 'id', 'name', 'datastore', 'server_state_description', 'status', 'updated', 'users', 'volume', 'root_enabled_at', - 'root_enabled_by'] + 'root_enabled_by', 'fault'] with CheckInstance(result._info) as check: check.contains_allowed_attrs( result._info, allowed_attrs, @@ -693,7 +693,7 @@ class CreateInstance(object): # Check these attrs only are returned in create response allowed_attrs = ['created', 'flavor', 'addresses', 'id', 'links', - 'name', 'status', 'updated', 'datastore'] + 'name', 'status', 'updated', 'datastore', 'fault'] if ROOT_ON_CREATE: allowed_attrs.append('password') if VOLUME_SUPPORT: @@ -1156,7 +1156,7 @@ class TestInstanceListing(object): def test_get_instance(self): allowed_attrs = ['created', 'databases', 'flavor', 'hostname', 'id', 'links', 'name', 'status', 'updated', 'ip', - 'datastore'] + 'datastore', 'fault'] if VOLUME_SUPPORT: allowed_attrs.append('volume') else: @@ -1244,7 +1244,7 @@ class TestInstanceListing(object): 'flavor', 'guest_status', 'host', 'hostname', 'id', 'name', 'root_enabled_at', 'root_enabled_by', 'server_state_description', 'status', 'datastore', - 'updated', 'users', 'volume'] + 'updated', 'users', 'volume', 'fault'] with CheckInstance(result._info) as check: check.contains_allowed_attrs( result._info, allowed_attrs, diff --git a/trove/tests/fakes/guestagent.py b/trove/tests/fakes/guestagent.py index 42996383b3..79e0a02d70 100644 --- a/trove/tests/fakes/guestagent.py +++ b/trove/tests/fakes/guestagent.py @@ -241,7 +241,7 @@ class FakeGuest(object): status.status = rd_instance.ServiceStatuses.RUNNING status.save() AgentHeartBeat.create(instance_id=self.id) - eventlet.spawn_after(3.0, update_db) + eventlet.spawn_after(3.5, update_db) def _set_task_status(self, new_status='RUNNING'): from trove.instance.models import InstanceServiceStatus diff --git a/trove/tests/scenario/groups/database_actions_group.py b/trove/tests/scenario/groups/database_actions_group.py index 660e42c4b1..1262d055ca 100644 --- a/trove/tests/scenario/groups/database_actions_group.py +++ b/trove/tests/scenario/groups/database_actions_group.py @@ -137,7 +137,7 @@ class DatabaseActionsInstCreateWaitGroup(TestGroup): @test def wait_for_instances(self): """Waiting for all instances to become active.""" - self.instance_create_runner.wait_for_created_instances() + self.instance_create_runner.run_wait_for_created_instances() @test(depends_on=[wait_for_instances]) def add_initialized_instance_data(self): @@ -180,4 +180,4 @@ class DatabaseActionsInstDeleteWaitGroup(TestGroup): @test def wait_for_delete_initialized_instance(self): """Wait for the initialized instance to delete.""" - self.instance_create_runner.run_wait_for_initialized_instance_delete() + self.instance_create_runner.run_wait_for_error_init_delete() diff --git a/trove/tests/scenario/groups/instance_create_group.py b/trove/tests/scenario/groups/instance_create_group.py index 0cd2140cf0..958a925180 100644 --- a/trove/tests/scenario/groups/instance_create_group.py +++ b/trove/tests/scenario/groups/instance_create_group.py @@ -55,6 +55,16 @@ class InstanceCreateGroup(TestGroup): """Create an instance with initial properties.""" self.test_runner.run_initialized_instance_create() + @test(runs_after=[create_initialized_instance]) + def create_error_instance(self): + """Create an instance in error state.""" + self.test_runner.run_create_error_instance() + + @test(runs_after=[create_error_instance]) + def create_error2_instance(self): + """Create another instance in error state.""" + self.test_runner.run_create_error2_instance() + @test(depends_on_groups=[groups.INST_CREATE], groups=[GROUP, groups.INST_CREATE_WAIT], @@ -67,9 +77,30 @@ class InstanceCreateWaitGroup(TestGroup): InstanceCreateRunnerFactory.instance()) @test + def wait_for_error_instances(self): + """Wait for the error instances to fail.""" + self.test_runner.run_wait_for_error_instances() + + @test(depends_on=[wait_for_error_instances]) + def validate_error_instance(self): + """Validate the error instance fault message.""" + self.test_runner.run_validate_error_instance() + + @test(depends_on=[wait_for_error_instances], + runs_after=[validate_error_instance]) + def validate_error2_instance(self): + """Validate the error2 instance fault message as admin.""" + self.test_runner.run_validate_error2_instance() + + @test(runs_after=[validate_error_instance, validate_error2_instance]) + def delete_error_instances(self): + """Delete the error instances.""" + self.test_runner.run_delete_error_instances() + + @test(runs_after=[delete_error_instances]) def wait_for_instances(self): """Waiting for all instances to become active.""" - self.test_runner.wait_for_created_instances() + self.test_runner.run_wait_for_created_instances() @test(depends_on=[wait_for_instances]) def add_initialized_instance_data(self): @@ -107,11 +138,11 @@ class InstanceInitDeleteWaitGroup(TestGroup): InstanceCreateRunnerFactory.instance()) @test - def wait_for_initialized_instance_delete(self): - """Wait for the initialized instance to be deleted.""" - self.test_runner.run_wait_for_initialized_instance_delete() + def wait_for_error_init_delete(self): + """Wait for the initialized and error instances to be gone.""" + self.test_runner.run_wait_for_error_init_delete() - @test(runs_after=[wait_for_initialized_instance_delete]) + @test(runs_after=[wait_for_error_init_delete]) def delete_initial_configuration(self): """Delete the initial configuration group.""" self.test_runner.run_initial_configuration_delete() diff --git a/trove/tests/scenario/groups/user_actions_group.py b/trove/tests/scenario/groups/user_actions_group.py index 686ac0d449..fb18caf6a4 100644 --- a/trove/tests/scenario/groups/user_actions_group.py +++ b/trove/tests/scenario/groups/user_actions_group.py @@ -225,7 +225,7 @@ class UserActionsInstCreateWaitGroup(TestGroup): @test def wait_for_instances(self): """Waiting for all instances to become active.""" - self.instance_create_runner.wait_for_created_instances() + self.instance_create_runner.run_wait_for_created_instances() @test(depends_on=[wait_for_instances]) def validate_initialized_instance(self): @@ -264,4 +264,4 @@ class UserActionsInstDeleteWaitGroup(TestGroup): @test def wait_for_delete_initialized_instance(self): """Wait for the initialized instance to delete.""" - self.instance_create_runner.run_wait_for_initialized_instance_delete() + self.instance_create_runner.run_wait_for_error_init_delete() diff --git a/trove/tests/scenario/runners/instance_create_runners.py b/trove/tests/scenario/runners/instance_create_runners.py index 7490800bdf..832f675fe0 100644 --- a/trove/tests/scenario/runners/instance_create_runners.py +++ b/trove/tests/scenario/runners/instance_create_runners.py @@ -28,6 +28,8 @@ class InstanceCreateRunner(TestRunner): def __init__(self): super(InstanceCreateRunner, self).__init__() + self.error_inst_id = None + self.error2_inst_id = None self.init_inst_id = None self.init_inst_dbs = None self.init_inst_users = None @@ -40,10 +42,10 @@ class InstanceCreateRunner(TestRunner): self, expected_states=['BUILD', 'ACTIVE'], expected_http_code=200): name = self.instance_info.name flavor = self._get_instance_flavor() - trove_volume_size = CONFIG.get('trove_volume_size', 1) + volume_size = self.instance_info.volume_size instance_info = self.assert_instance_create( - name, flavor, trove_volume_size, [], [], None, None, + name, flavor, volume_size, [], [], None, None, CONFIG.dbaas_datastore, CONFIG.dbaas_datastore_version, expected_states, expected_http_code, create_helper_user=True, locality='affinity') @@ -92,7 +94,7 @@ class InstanceCreateRunner(TestRunner): configuration_id = configuration_id or self.config_group_id name = self.instance_info.name + name_suffix flavor = self._get_instance_flavor() - trove_volume_size = CONFIG.get('trove_volume_size', 1) + volume_size = self.instance_info.volume_size self.init_inst_dbs = (self.test_helper.get_valid_database_definitions() if with_dbs else []) self.init_inst_users = (self.test_helper.get_valid_user_definitions() @@ -100,7 +102,7 @@ class InstanceCreateRunner(TestRunner): self.init_inst_config_group_id = configuration_id if (self.init_inst_dbs or self.init_inst_users or configuration_id): info = self.assert_instance_create( - name, flavor, trove_volume_size, + name, flavor, volume_size, self.init_inst_dbs, self.init_inst_users, configuration_id, None, CONFIG.dbaas_datastore, CONFIG.dbaas_datastore_version, @@ -113,12 +115,19 @@ class InstanceCreateRunner(TestRunner): # the empty instance test. raise SkipTest("No testable initial properties provided.") - def _get_instance_flavor(self): + def _get_instance_flavor(self, fault_num=None): + name_format = 'instance%s%s_flavor_name' + default = 'm1.tiny' + fault_str = '' + eph_str = '' + if fault_num: + fault_str = '_fault_%d' % fault_num if self.EPHEMERAL_SUPPORT: - flavor_name = CONFIG.values.get('instance_eph_flavor_name', - 'eph.rd-tiny') - else: - flavor_name = CONFIG.values.get('instance_flavor_name', 'm1.tiny') + eph_str = '_eph' + default = 'eph.rd-tiny' + + name = name_format % (fault_str, eph_str) + flavor_name = CONFIG.values.get(name, default) return self.get_flavor(flavor_name) @@ -238,7 +247,86 @@ class InstanceCreateRunner(TestRunner): return instance_info - def wait_for_created_instances(self, expected_states=['BUILD', 'ACTIVE']): + def run_create_error_instance( + self, expected_states=['BUILD', 'ERROR'], expected_http_code=200): + if self.is_using_existing_instance: + raise SkipTest("Using an existing instance.") + + name = self.instance_info.name + '_error' + flavor = self._get_instance_flavor(fault_num=1) + volume_size = self.instance_info.volume_size + + inst = self.assert_instance_create( + name, flavor, volume_size, [], [], None, None, + CONFIG.dbaas_datastore, CONFIG.dbaas_datastore_version, + expected_states, expected_http_code, create_helper_user=False) + self.assert_client_code(expected_http_code) + self.error_inst_id = inst.id + + def run_create_error2_instance( + self, expected_states=['BUILD', 'ERROR'], expected_http_code=200): + if self.is_using_existing_instance: + raise SkipTest("Using an existing instance.") + + name = self.instance_info.name + '_error2' + flavor = self._get_instance_flavor(fault_num=2) + volume_size = self.instance_info.volume_size + + inst = self.assert_instance_create( + name, flavor, volume_size, [], [], None, None, + CONFIG.dbaas_datastore, CONFIG.dbaas_datastore_version, + expected_states, expected_http_code, create_helper_user=False) + self.assert_client_code(expected_http_code) + self.error2_inst_id = inst.id + + def run_wait_for_error_instances(self, expected_states=['ERROR']): + error_ids = [] + if self.error_inst_id: + error_ids.append(self.error_inst_id) + if self.error2_inst_id: + error_ids.append(self.error2_inst_id) + + if error_ids: + self.assert_all_instance_states( + error_ids, expected_states, fast_fail_status=[]) + + def run_validate_error_instance(self): + if not self.error_inst_id: + raise SkipTest("No error instance created.") + + instance = self.get_instance(self.error_inst_id) + with CheckInstance(instance._info) as check: + check.fault() + + err_msg = "disk is too small for requested image" + self.assert_true(err_msg in instance.fault['message'], + "Message '%s' does not contain '%s'" % + (instance.fault['message'], err_msg)) + + def run_validate_error2_instance(self): + if not self.error2_inst_id: + raise SkipTest("No error2 instance created.") + + instance = self.get_instance( + self.error2_inst_id, client=self.admin_client) + with CheckInstance(instance._info) as check: + check.fault(is_admin=True) + + err_msg = "Quota exceeded for ram" + self.assert_true(err_msg in instance.fault['message'], + "Message '%s' does not contain '%s'" % + (instance.fault['message'], err_msg)) + + def run_delete_error_instances(self, expected_http_code=202): + if self.error_inst_id: + self.auth_client.instances.delete(self.error_inst_id) + self.assert_client_code(expected_http_code) + if self.error2_inst_id: + self.auth_client.instances.delete(self.error2_inst_id) + self.assert_client_code(expected_http_code) + + def run_wait_for_created_instances( + self, expected_states=['BUILD', 'ACTIVE']): instances = [self.instance_info.id] if self.init_inst_id: instances.append(self.init_inst_id) @@ -324,10 +412,16 @@ class InstanceCreateRunner(TestRunner): else: raise SkipTest("Cleanup is not required.") - def run_wait_for_initialized_instance_delete(self, - expected_states=['SHUTDOWN']): + def run_wait_for_error_init_delete(self, expected_states=['SHUTDOWN']): + delete_ids = [] + if self.error_inst_id: + delete_ids.append(self.error_inst_id) + if self.error2_inst_id: + delete_ids.append(self.error2_inst_id) if self.init_inst_id: - self.assert_all_gone(self.init_inst_id, expected_states[-1]) + delete_ids.append(self.init_inst_id) + if delete_ids: + self.assert_all_gone(delete_ids, expected_states[-1]) else: raise SkipTest("Cleanup is not required.") self.init_inst_id = None diff --git a/trove/tests/scenario/runners/test_runners.py b/trove/tests/scenario/runners/test_runners.py index 9e2d282ac6..6a4718895c 100644 --- a/trove/tests/scenario/runners/test_runners.py +++ b/trove/tests/scenario/runners/test_runners.py @@ -153,10 +153,12 @@ class InstanceTestInfo(object): self.dbaas_flavor_href = None # The flavor of the instance. self.dbaas_datastore = None # The datastore id self.dbaas_datastore_version = None # The datastore version id + self.volume_size = None # The size of volume the instance will have. self.volume = None # The volume the instance will have. self.nics = None # The dict of type/id for nics used on the intance. self.user = None # The user instance who owns the instance. self.users = None # The users created on the instance. + self.databases = None # The databases created on the instance. class TestRunner(object): @@ -207,9 +209,11 @@ class TestRunner(object): CONFIG.dbaas_datastore_version) self.instance_info.user = CONFIG.users.find_user_by_name('alt_demo') if self.VOLUME_SUPPORT: + self.instance_info.volume_size = CONFIG.get('trove_volume_size', 1) self.instance_info.volume = { - 'size': CONFIG.get('trove_volume_size', 1)} + 'size': self.instance_info.volume_size} else: + self.instance_info.volume_size = None self.instance_info.volume = None self._auth_client = None @@ -418,13 +422,17 @@ class TestRunner(object): self.assert_equal(expected_http_code, client.last_http_code, "Unexpected client status code") - def assert_all_instance_states(self, instance_ids, expected_states): + def assert_all_instance_states(self, instance_ids, expected_states, + fast_fail_status=None, + require_all_states=False): self.report.log("Waiting for states (%s) for instances: %s" % (expected_states, instance_ids)) def _make_fn(inst_id): return lambda: self._assert_instance_states( - inst_id, expected_states) + inst_id, expected_states, + fast_fail_status=fast_fail_status, + require_all_states=require_all_states) tasks = [build_polling_task(_make_fn(instance_id), sleep_time=self.def_sleep_time, time_out=self.def_timeout) @@ -441,7 +449,7 @@ class TestRunner(object): self.fail(str(task.poll_exception())) def _assert_instance_states(self, instance_id, expected_states, - fast_fail_status=['ERROR', 'FAILED'], + fast_fail_status=None, require_all_states=False): """Keep polling for the expected instance states until the instance acquires either the last or fast-fail state. @@ -454,6 +462,9 @@ class TestRunner(object): self.report.log("Waiting for states (%s) for instance: %s" % (expected_states, instance_id)) + + if fast_fail_status is None: + fast_fail_status = ['ERROR', 'FAILED'] found = False for status in expected_states: if require_all_states or found or self._has_status( @@ -595,8 +606,9 @@ class TestRunner(object): if server_group: self.fail("Found left-over server group: %s" % server_group) - def get_instance(self, instance_id): - return self.auth_client.instances.get(instance_id) + def get_instance(self, instance_id, client=None): + client = client or self.auth_client + return client.instances.get(instance_id) def get_instance_host(self, instance_id=None): instance_id = instance_id or self.instance_info.id @@ -782,3 +794,16 @@ class CheckInstance(AttrCheck): slave, allowed_attrs, msg="Replica links not found") self.links(slave['links']) + + def fault(self, is_admin=False): + if 'fault' not in self.instance: + self.fail("'fault' not found in instance.") + else: + allowed_attrs = ['message', 'created', 'details'] + self.contains_allowed_attrs( + self.instance['fault'], allowed_attrs, + msg="Fault") + if is_admin and not self.instance['fault']['details']: + self.fail("Missing fault details") + if not is_admin and self.instance['fault']['details']: + self.fail("Fault details provided for non-admin") diff --git a/trove/tests/unittests/common/test_notification.py b/trove/tests/unittests/common/test_notification.py index cf01024b3f..cdbc09cc57 100644 --- a/trove/tests/unittests/common/test_notification.py +++ b/trove/tests/unittests/common/test_notification.py @@ -383,3 +383,29 @@ class TestDBaaSNotification(trove_testtools.TestCase): a, _ = notifier().info.call_args payload = a[2] self.assertTrue('instance_id' in payload) + + def _test_notify_callback(self, fn, *args, **kwargs): + with patch.object(rpc, 'get_notifier') as notifier: + mock_callback = Mock() + self.test_n.register_notify_callback(mock_callback) + mock_context = Mock() + mock_context.notification = Mock() + self.test_n.context = mock_context + fn(*args, **kwargs) + self.assertTrue(notifier().info.called) + self.assertTrue(mock_callback.called) + self.test_n.register_notify_callback(None) + + def test_notify_callback(self): + required_keys = { + 'datastore': 'ds', + 'name': 'name', + 'flavor_id': 'flav_id', + 'instance_id': 'inst_id', + } + self._test_notify_callback(self.test_n.notify_start, + **required_keys) + self._test_notify_callback(self.test_n.notify_end, + **required_keys) + self._test_notify_callback(self.test_n.notify_exc_info, + 'error', 'exc') diff --git a/trove/tests/unittests/common/test_utils.py b/trove/tests/unittests/common/test_utils.py index aaf83bdfe8..4b3de75b07 100644 --- a/trove/tests/unittests/common/test_utils.py +++ b/trove/tests/unittests/common/test_utils.py @@ -22,15 +22,15 @@ from trove.common import utils from trove.tests.unittests import trove_testtools -class TestTroveExecuteWithTimeout(trove_testtools.TestCase): +class TestUtils(trove_testtools.TestCase): def setUp(self): - super(TestTroveExecuteWithTimeout, self).setUp() + super(TestUtils, self).setUp() self.orig_utils_execute = utils.execute self.orig_utils_log_error = utils.LOG.error def tearDown(self): - super(TestTroveExecuteWithTimeout, self).tearDown() + super(TestUtils, self).tearDown() utils.execute = self.orig_utils_execute utils.LOG.error = self.orig_utils_log_error @@ -81,3 +81,21 @@ class TestTroveExecuteWithTimeout(trove_testtools.TestCase): def test_pagination_limit(self): self.assertEqual(5, utils.pagination_limit(5, 9)) self.assertEqual(5, utils.pagination_limit(9, 5)) + + def test_format_output(self): + data = [ + ['', ''], + ['Single line', 'Single line'], + ['Long line no breaks ' * 10, 'Long line no breaks ' * 10], + ['Long line. Has breaks ' * 5, + 'Long line.\nHas breaks ' * 2 + 'Long line. Has breaks ' * 3], + ['Long line with semi: ' * 4, + 'Long line with semi:\n ' + + 'Long line with semi: ' * 3], + ['Long line with brack (' * 4, + 'Long line with brack\n(' + + 'Long line with brack (' * 3], + ] + for index, datum in enumerate(data): + self.assertEqual(datum[1], utils.format_output(datum[0]), + "Error formatting line %d of data" % index) diff --git a/trove/tests/unittests/instance/test_instance_models.py b/trove/tests/unittests/instance/test_instance_models.py index af31025308..f39b9e8dfe 100644 --- a/trove/tests/unittests/instance/test_instance_models.py +++ b/trove/tests/unittests/instance/test_instance_models.py @@ -22,6 +22,7 @@ from trove.common.instance import ServiceStatuses from trove.datastore import models as datastore_models from trove.instance import models from trove.instance.models import DBInstance +from trove.instance.models import DBInstanceFault from trove.instance.models import filter_ips from trove.instance.models import Instance from trove.instance.models import InstanceServiceStatus @@ -39,12 +40,14 @@ class SimpleInstanceTest(trove_testtools.TestCase): def setUp(self): super(SimpleInstanceTest, self).setUp() + self.context = trove_testtools.TroveTestContext(self, is_admin=True) db_info = DBInstance( InstanceTasks.BUILDING, name="TestInstance") self.instance = SimpleInstance( None, db_info, InstanceServiceStatus( ServiceStatuses.BUILDING), ds_version=Mock(), ds=Mock(), locality='affinity') + self.instance.context = self.context db_info.addresses = {"private": [{"addr": "123.123.123.123"}], "internal": [{"addr": "10.123.123.123"}], "public": [{"addr": "15.123.123.123"}]} @@ -106,6 +109,21 @@ class SimpleInstanceTest(trove_testtools.TestCase): def test_locality(self): self.assertEqual('affinity', self.instance.locality) + def test_fault(self): + fault_message = 'Error' + fault_details = 'details' + fault_date = 'now' + temp_fault = Mock() + temp_fault.message = fault_message + temp_fault.details = fault_details + temp_fault.updated = fault_date + fault_mock = Mock(return_value=temp_fault) + with patch.object(DBInstanceFault, 'find_by', fault_mock): + fault = self.instance.fault + self.assertEqual(fault_message, fault.message) + self.assertEqual(fault_details, fault.details) + self.assertEqual(fault_date, fault.updated) + class CreateInstanceTest(trove_testtools.TestCase): diff --git a/trove/tests/unittests/instance/test_instance_views.py b/trove/tests/unittests/instance/test_instance_views.py index 948185ec50..e8458c42f0 100644 --- a/trove/tests/unittests/instance/test_instance_views.py +++ b/trove/tests/unittests/instance/test_instance_views.py @@ -63,6 +63,13 @@ class InstanceDetailViewTest(trove_testtools.TestCase): self.instance.slave_of_id = None self.instance.slaves = [] self.instance.locality = 'affinity' + self.fault_message = 'Error' + self.fault_details = 'details' + self.fault_date = 'now' + self.instance.fault = Mock() + self.instance.fault.message = self.fault_message + self.instance.fault.details = self.fault_details + self.instance.fault.updated = self.fault_date def tearDown(self): super(InstanceDetailViewTest, self).tearDown() @@ -98,3 +105,13 @@ class InstanceDetailViewTest(trove_testtools.TestCase): result = view.data() self.assertEqual(self.instance.locality, result['instance']['locality']) + + def test_fault(self): + view = InstanceDetailView(self.instance, Mock()) + result = view.data() + self.assertEqual(self.fault_message, + result['instance']['fault']['message']) + self.assertEqual(self.fault_details, + result['instance']['fault']['details']) + self.assertEqual(self.fault_date, + result['instance']['fault']['created'])