diff --git a/api-ref/source/instance-actions.inc b/api-ref/source/instance-actions.inc index 15787b13ad..14715f202a 100644 --- a/api-ref/source/instance-actions.inc +++ b/api-ref/source/instance-actions.inc @@ -311,4 +311,34 @@ Request Example --------------- .. literalinclude:: samples/instance-mgmt-action-reset-task-status-request.json + :language: javascript + + +Rebuild instance +~~~~~~~~~~~~~~~~ + +.. rest_method:: POST /v1.0/{project_id}/mgmt/instances/{instanceId}/action + +Admin only API. Rebuild the Nova server's operating system for the database +instance. The rebuild operation is mainly for Trove upgrade, especially when +the interface between Trove controller and guest agent changes. After Trove +controller is upgraded, the cloud administrator needs to send rebuild request +with the new guest image ID. Communication with the end users is needed as the +database service goes offline during the process. User's data in the database +is not affected. + +Normal response codes: 202 + +Request +------- + +.. rest_parameters:: parameters.yaml + + - project_id: project_id + - instanceId: instanceId + +Request Example +--------------- + +.. literalinclude:: samples/instance-mgmt-action-rebuild-instance-request.json :language: javascript \ No newline at end of file diff --git a/api-ref/source/samples/instance-mgmt-action-rebuild-instance-request.json b/api-ref/source/samples/instance-mgmt-action-rebuild-instance-request.json new file mode 100644 index 0000000000..99befd1fde --- /dev/null +++ b/api-ref/source/samples/instance-mgmt-action-rebuild-instance-request.json @@ -0,0 +1,5 @@ +{ + "rebuild": { + "image_id": "3e50414a-8532-4646-982c-a66fe8f0411b" + } +} \ No newline at end of file diff --git a/devstack/plugin.sh b/devstack/plugin.sh index 8bbcbac766..0e133e182c 100644 --- a/devstack/plugin.sh +++ b/devstack/plugin.sh @@ -218,6 +218,7 @@ function configure_trove { iniset_conditional $TROVE_CONF DEFAULT usage_timeout $TROVE_USAGE_TIMEOUT iniset_conditional $TROVE_CONF DEFAULT state_change_wait_time $TROVE_STATE_CHANGE_WAIT_TIME iniset_conditional $TROVE_CONF DEFAULT reboot_time_out 300 + iniset $TROVE_CONF DEFAULT controller_address ${SERVICE_HOST} configure_keystone_authtoken_middleware $TROVE_CONF trove iniset $TROVE_CONF service_credentials username trove diff --git a/integration/scripts/functions_qemu b/integration/scripts/functions_qemu index 395756fca0..e6c00c91db 100644 --- a/integration/scripts/functions_qemu +++ b/integration/scripts/functions_qemu @@ -41,6 +41,7 @@ function build_guest_image() { export DIB_RELEASE=${guest_release} export DIB_CLOUD_INIT_DATASOURCES="ConfigDrive" + export DIB_CLOUD_INIT_ETC_HOSTS="localhost" # https://cloud-images.ubuntu.com/releases is more stable than the daily # builds (https://cloud-images.ubuntu.com/xenial/current/), diff --git a/releasenotes/notes/victoria-rebuild-instance.yaml b/releasenotes/notes/victoria-rebuild-instance.yaml new file mode 100644 index 0000000000..28445f52bc --- /dev/null +++ b/releasenotes/notes/victoria-rebuild-instance.yaml @@ -0,0 +1,8 @@ +--- +features: + - Support db instance rebuild. The rebuild operation is mainly for Trove + upgrade, especially when the interface between Trove controller and guest + agent changes. After Trove controller is upgraded, the cloud administrator + needs to send rebuild request with the new guest image ID. Communication + with the end users is needed as the database service is offline during the + process. User's data in the database is not affected. diff --git a/trove/cmd/guest.py b/trove/cmd/guest.py index ff86e4f30d..e5946c632d 100644 --- a/trove/cmd/guest.py +++ b/trove/cmd/guest.py @@ -23,6 +23,7 @@ from trove.common import debug_utils from trove.common.i18n import _ from trove.guestagent import api as guest_api from trove.guestagent.common import operating_system +from trove.guestagent import volume CONF = cfg.CONF # The guest_id opt definition must match the one in common/cfg.py @@ -64,6 +65,21 @@ def main(): uid = cfg.get_configuration_property('database_service_uid') operating_system.create_user('database', uid) + # Mount device if needed. + # When doing rebuild, the device should be already formatted but not + # mounted. + device_path = CONF.get(CONF.datastore_manager).device_path + mount_point = CONF.get(CONF.datastore_manager).mount_point + device = volume.VolumeDevice(device_path) + if not device.mount_points(device_path): + LOG.info('Preparing the storage for %s, mount path %s', + device_path, mount_point) + device.format() + device.mount(mount_point) + operating_system.chown(mount_point, CONF.database_service_uid, + CONF.database_service_uid, + recursive=True, as_root=True) + # rpc module must be loaded after decision about thread monkeypatching # because if thread module is not monkeypatched we can't use eventlet # executor from oslo_messaging library. diff --git a/trove/common/apischema.py b/trove/common/apischema.py index 0388faceb1..5b7594f167 100644 --- a/trove/common/apischema.py +++ b/trove/common/apischema.py @@ -548,6 +548,21 @@ mgmt_instance = { "type": "object" } } + }, + "rebuild": { + "type": "object", + "required": ["rebuild"], + "additionalProperties": True, + "properties": { + "rebuild": { + "type": "object", + "required": ["image_id"], + "additionalProperties": False, + "properties": { + "image_id": uuid + } + } + } } } } diff --git a/trove/common/cfg.py b/trove/common/cfg.py index ed099961ba..b173d9d8b2 100644 --- a/trove/common/cfg.py +++ b/trove/common/cfg.py @@ -188,6 +188,10 @@ common_opts = [ 'commands to complete.'), # The guest_id opt definition must match the one in cmd/guest.py cfg.StrOpt('guest_id', default=None, help="ID of the Guest Instance."), + cfg.StrOpt('controller_address', + help='The address used to download Trove code by guest agent ' + 'in developer mode. This address is inserted into the ' + 'file /etc/trove/controller.conf inside the guest.'), cfg.IntOpt('state_change_wait_time', default=180, help='Maximum time (in seconds) to wait for database state ' 'change.'), diff --git a/trove/extensions/mgmt/instances/service.py b/trove/extensions/mgmt/instances/service.py index 7c4f95dcef..0d14b7781d 100644 --- a/trove/extensions/mgmt/instances/service.py +++ b/trove/extensions/mgmt/instances/service.py @@ -106,7 +106,8 @@ class MgmtInstanceController(InstanceController): 'stop': self._action_stop, 'reboot': self._action_reboot, 'migrate': self._action_migrate, - 'reset-task-status': self._action_reset_task_status + 'reset-task-status': self._action_reset_task_status, + 'rebuild': self._action_rebuild } selected_action = None for key in body: @@ -161,6 +162,14 @@ class MgmtInstanceController(InstanceController): return wsgi.Result(None, 202) + def _action_rebuild(self, context, instance, req, body): + LOG.info("Rebuild instance %s.", instance.id) + req_body = body['rebuild'] + image_id = req_body['image_id'] + + instance.rebuild(image_id) + return wsgi.Result(None, 202) + @admin_context def root(self, req, tenant_id, id): """Return the date and time root was enabled on an instance, diff --git a/trove/guestagent/api.py b/trove/guestagent/api.py index 329980b48d..19870b989b 100644 --- a/trove/guestagent/api.py +++ b/trove/guestagent/api.py @@ -396,7 +396,7 @@ class API(object): self._call("restart", self.agent_high_timeout, version=version) - def start_db_with_conf_changes(self, config_contents): + def start_db_with_conf_changes(self, config_contents, ds_version): """Start the database server.""" LOG.debug("Sending the call to start the database process on " "the Guest with a timeout of %s.", @@ -404,7 +404,8 @@ class API(object): version = self.API_BASE_VERSION self._call("start_db_with_conf_changes", self.agent_high_timeout, - version=version, config_contents=config_contents) + version=version, config_contents=config_contents, + ds_version=ds_version) def reset_configuration(self, configuration): """Ignore running state of the database server; just change @@ -650,3 +651,18 @@ class API(object): return self._call("module_remove", self.agent_high_timeout, version=version, module=module) + + def rebuild(self, ds_version, config_contents=None, config_overrides=None): + """Make an asynchronous call to rebuild the database service.""" + LOG.debug("Sending the call to rebuild database service in the guest.") + version = self.API_BASE_VERSION + + # Taskmanager is a publisher, guestagent is a consumer. Usually + # consumer creates a queue, but in this case we have to make sure + # "prepare" doesn't get lost if for some reason guest was delayed and + # didn't create a queue on time. + self._create_guest_queue() + + self._cast("rebuild", version=version, + ds_version=ds_version, config_contents=config_contents, + config_overrides=config_overrides) diff --git a/trove/guestagent/common/guestagent_utils.py b/trove/guestagent/common/guestagent_utils.py index c5c6b7e8e8..4bbc960fc7 100644 --- a/trove/guestagent/common/guestagent_utils.py +++ b/trove/guestagent/common/guestagent_utils.py @@ -19,8 +19,12 @@ import re import six +from trove.common import cfg from trove.common import pagination from trove.common import utils +from trove.guestagent.common import operating_system + +CONF = cfg.CONF def update_dict(updates, target): @@ -164,3 +168,16 @@ def get_filesystem_volume_stats(fs_path): 'used': used_gb } return output + + +def get_conf_dir(): + """Get the config directory for the database related settings. + + For now, the files inside the config dir are mainly for instance rebuild. + """ + mount_point = CONF.get(CONF.datastore_manager).mount_point + conf_dir = os.path.join(mount_point, 'conf.d') + if not operating_system.exists(conf_dir, is_directory=True, as_root=True): + operating_system.create_directory(conf_dir, as_root=True) + + return conf_dir diff --git a/trove/guestagent/datastore/manager.py b/trove/guestagent/datastore/manager.py index f1bec02e07..e66cf25368 100644 --- a/trove/guestagent/datastore/manager.py +++ b/trove/guestagent/datastore/manager.py @@ -895,3 +895,8 @@ class Manager(periodic_task.PeriodicTasks): LOG.debug("Waiting for transaction.") raise exception.DatastoreOperationNotSupported( operation='wait_for_txn', datastore=self.manager) + + def rebuild(self, context, ds_version, config_contents=None, + config_overrides=None): + raise exception.DatastoreOperationNotSupported( + operation='rebuild', datastore=self.manager) diff --git a/trove/guestagent/datastore/mysql_common/manager.py b/trove/guestagent/datastore/mysql_common/manager.py index eeb1707a16..2c277fc061 100644 --- a/trove/guestagent/datastore/mysql_common/manager.py +++ b/trove/guestagent/datastore/mysql_common/manager.py @@ -25,7 +25,6 @@ from trove.common import exception from trove.common import utils from trove.common.notification import EndNotification from trove.guestagent import guest_log -from trove.guestagent import volume from trove.guestagent.common import operating_system from trove.guestagent.datastore import manager from trove.guestagent.strategies import replication as repl_strategy @@ -137,31 +136,13 @@ class MySqlManager(manager.Manager): cluster_config, snapshot, ds_version=None): """This is called from prepare in the base class.""" data_dir = mount_point + '/data' - if device_path: - LOG.info('Preparing the storage for %s, mount path %s', - device_path, mount_point) - - self.app.stop_db() - - device = volume.VolumeDevice(device_path) - # unmount if device is already mounted - device.unmount_device(device_path) - device.format() - if operating_system.list_files_in_directory(mount_point): - # rsync existing data to a "data" sub-directory - # on the new volume - device.migrate_data(mount_point, target_subdir="data") - # mount the volume - device.mount(mount_point) - operating_system.chown(mount_point, CONF.database_service_uid, - CONF.database_service_uid, - recursive=True, as_root=True) - - operating_system.create_directory(data_dir, - user=CONF.database_service_uid, - group=CONF.database_service_uid, - as_root=True) - self.app.set_data_dir(data_dir) + self.app.stop_db() + operating_system.create_directory(data_dir, + user=CONF.database_service_uid, + group=CONF.database_service_uid, + as_root=True) + # This makes sure the include dir is created. + self.app.set_data_dir(data_dir) # Prepare mysql configuration LOG.info('Preparing database configuration') @@ -177,7 +158,11 @@ class MySqlManager(manager.Manager): # Start database service. # Cinder volume initialization(after formatted) may leave a # lost+found folder - command = f'--ignore-db-dir=lost+found --datadir={data_dir}' + # The --ignore-db-dir option is deprecated in MySQL 5.7. With the + # introduction of the data dictionary in MySQL 8.0, it became + # superfluous and was removed in that version. + command = (f'--ignore-db-dir=lost+found --ignore-db-dir=conf.d ' + f'--datadir={data_dir}') self.app.start_db(ds_version=ds_version, command=command) self.app.secure() @@ -212,8 +197,8 @@ class MySqlManager(manager.Manager): def restart(self, context): self.app.restart() - def start_db_with_conf_changes(self, context, config_contents): - self.app.start_db_with_conf_changes(config_contents) + def start_db_with_conf_changes(self, context, config_contents, ds_version): + self.app.start_db_with_conf_changes(config_contents, ds_version) def get_datastore_log_defs(self): owner = cfg.get_configuration_property('database_service_uid') @@ -437,3 +422,41 @@ class MySqlManager(manager.Manager): LOG.info('Starting to upgrade database, upgrade_info: %s', upgrade_info) self.app.upgrade(upgrade_info) + + def rebuild(self, context, ds_version, config_contents=None, + config_overrides=None): + """Restore datastore service after instance rebuild.""" + LOG.info("Starting to restore database service") + self.status.begin_install() + + mount_point = CONF.get(CONF.datastore_manager).mount_point + data_dir = mount_point + '/data' + operating_system.create_directory(data_dir, + user=CONF.database_service_uid, + group=CONF.database_service_uid, + as_root=True) + # This makes sure the include dir is created. + self.app.set_data_dir(data_dir) + + try: + # Prepare mysql configuration + LOG.debug('Preparing database configuration') + self.app.configuration_manager.save_configuration(config_contents) + self.app.update_overrides(config_overrides) + + # Start database service. + # Cinder volume initialization(after formatted) may leave a + # lost+found folder + # The --ignore-db-dir option is deprecated in MySQL 5.7. With the + # introduction of the data dictionary in MySQL 8.0, it became + # superfluous and was removed in that version. + command = (f'--ignore-db-dir=lost+found --ignore-db-dir=conf.d ' + f'--datadir={data_dir}') + self.app.start_db(ds_version=ds_version, command=command) + except Exception as e: + LOG.error(f"Failed to restore database service after rebuild, " + f"error: {str(e)}") + self.prepare_error = True + raise + finally: + self.status.end_install(error_occurred=self.prepare_error) diff --git a/trove/guestagent/datastore/mysql_common/service.py b/trove/guestagent/datastore/mysql_common/service.py index 937405df25..1f51aa395d 100644 --- a/trove/guestagent/datastore/mysql_common/service.py +++ b/trove/guestagent/datastore/mysql_common/service.py @@ -473,7 +473,7 @@ class BaseMySqlApp(object): @classmethod def get_auth_password(cls, file="os_admin.cnf"): auth_config = operating_system.read_file( - cls.get_client_auth_file(file), codec=cls.CFG_CODEC) + cls.get_client_auth_file(file), codec=cls.CFG_CODEC, as_root=True) return auth_config['client']['password'] @classmethod @@ -488,7 +488,10 @@ class BaseMySqlApp(object): @classmethod def get_client_auth_file(cls, file="os_admin.cnf"): - return guestagent_utils.build_file_path("/opt/trove-guestagent", file) + # Save the password inside the mount point directory so we could + # restore everyting when rebuilding the instance. + conf_dir = guestagent_utils.get_conf_dir() + return guestagent_utils.build_file_path(conf_dir, file) def _create_admin_user(self, client, password): """ @@ -522,8 +525,10 @@ class BaseMySqlApp(object): content = {'client': {'user': user, 'password': password, 'host': "localhost"}} - operating_system.write_file('/opt/trove-guestagent/%s.cnf' % user, - content, codec=IniCodec()) + + conf_dir = guestagent_utils.get_conf_dir() + operating_system.write_file( + f'{conf_dir}/{user}.cnf', content, codec=IniCodec(), as_root=True) def secure(self): LOG.info("Securing MySQL now.") @@ -587,6 +592,7 @@ class BaseMySqlApp(object): def start_db(self, update_db=False, ds_version=None, command=None, extra_volumes=None): + """Start and wait for database service.""" docker_image = CONF.get(CONF.datastore_manager).docker_image image = (f'{docker_image}:latest' if not ds_version else f'{docker_image}:{ds_version}') @@ -644,15 +650,16 @@ class BaseMySqlApp(object): ): raise exception.TroveError(_("Failed to start mysql")) - def start_db_with_conf_changes(self, config_contents): + def start_db_with_conf_changes(self, config_contents, ds_version): + LOG.info(f"Starting database service with new configuration and " + f"datastore version {ds_version}.") + if self.status.is_running: LOG.info("Stopping MySQL before applying changes.") self.stop_db() - LOG.info("Resetting configuration.") self._reset_configuration(config_contents) - - self.start_db(update_db=True) + self.start_db(update_db=True, ds_version=ds_version) def stop_db(self, update_db=False): LOG.info("Stopping MySQL.") diff --git a/trove/guestagent/volume.py b/trove/guestagent/volume.py index 0239607345..4b9442b44a 100644 --- a/trove/guestagent/volume.py +++ b/trove/guestagent/volume.py @@ -239,8 +239,14 @@ class VolumeDevice(object): def format(self): """Formats the device at device_path and checks the filesystem.""" self._check_device_exists() - self._format() - self._check_format() + + try: + self._check_format() + LOG.debug(f"Device {self.device_path} already formatted.") + return + except exception.GuestError: + self._format() + self._check_format() def mount(self, mount_point, write_to_fstab=True): """Mounts, and writes to fstab.""" diff --git a/trove/instance/models.py b/trove/instance/models.py index 19daf98052..e1624df0bb 100644 --- a/trove/instance/models.py +++ b/trove/instance/models.py @@ -934,12 +934,15 @@ class BaseInstance(SimpleInstance): guest_info_file = os.path.join(injected_config_location, guest_info) - files = {guest_info_file: ( - "[DEFAULT]\n" - "guest_id=%s\n" - "datastore_manager=%s\n" - "tenant_id=%s\n" - % (self.id, datastore_manager, self.tenant_id))} + files = { + guest_info_file: ( + "[DEFAULT]\n" + "guest_id=%s\n" + "datastore_manager=%s\n" + "tenant_id=%s\n" + % (self.id, datastore_manager, self.tenant_id) + ) + } instance_key = get_instance_encryption_key(self.id) if instance_key: @@ -953,6 +956,14 @@ class BaseInstance(SimpleInstance): files[os.path.join(injected_config_location, "trove-guestagent.conf")] = f.read() + # For trove guest agent service init in dev mode + # Before Nova version 2.57, userdata is not supported when doing + # rebuild, have to use injected files instead. + if CONF.controller_address: + files['/etc/trove/controller.conf'] = ( + f"CONTROLLER={CONF.controller_address}" + ) + return files def reset_status(self): @@ -969,6 +980,15 @@ class BaseInstance(SimpleInstance): reset_instance.set_status(status) reset_instance.save() + def prepare_userdata(self, datastore_manager): + userdata = None + cloudinit = os.path.join(CONF.get('cloudinit_location'), + "%s.cloudinit" % datastore_manager) + if os.path.isfile(cloudinit): + with open(cloudinit, "r") as f: + userdata = f.read() + return userdata + class FreshInstance(BaseInstance): @classmethod @@ -1667,6 +1687,10 @@ class Instance(BuiltInstance): task_api.API(self.context).upgrade(self.id, datastore_version.id) + def rebuild(self, image_id): + self.update_db(task_status=InstanceTasks.BUILDING) + task_api.API(self.context).rebuild(self.id, image_id) + def create_server_list_matcher(server_list): # Returns a method which finds a server from the given list. diff --git a/trove/taskmanager/api.py b/trove/taskmanager/api.py index 025495e6df..61f71f4a76 100644 --- a/trove/taskmanager/api.py +++ b/trove/taskmanager/api.py @@ -165,6 +165,13 @@ class API(object): self._cast("migrate", version=version, instance_id=instance_id, host=host) + def rebuild(self, instance_id, image_id): + LOG.debug("Making async call to rebuild instance: %s", instance_id) + version = self.API_BASE_VERSION + + self._cast("rebuild", version=version, instance_id=instance_id, + image_id=image_id) + def delete_instance(self, instance_id): LOG.debug("Making async call to delete instance: %s", instance_id) version = self.API_BASE_VERSION diff --git a/trove/taskmanager/manager.py b/trove/taskmanager/manager.py index 01eaa741c7..2ac5de8734 100644 --- a/trove/taskmanager/manager.py +++ b/trove/taskmanager/manager.py @@ -290,6 +290,10 @@ class Manager(periodic_task.PeriodicTasks): instance_id) instance_tasks.migrate(host) + def rebuild(self, context, instance_id, image_id): + instance_tasks = models.BuiltInstanceTasks.load(context, instance_id) + instance_tasks.rebuild(image_id) + def delete_instance(self, context, instance_id): with EndNotification(context): try: diff --git a/trove/taskmanager/models.py b/trove/taskmanager/models.py index 44e6577a38..6200744557 100755 --- a/trove/taskmanager/models.py +++ b/trove/taskmanager/models.py @@ -13,7 +13,6 @@ # under the License. import copy -import os.path import time import traceback @@ -952,19 +951,10 @@ class FreshInstanceTasks(FreshInstance, NotifyMixin, ConfigurationMixin): 'mount_point': mount_point} return volume_info - def _prepare_userdata(self, datastore_manager): - userdata = None - cloudinit = os.path.join(CONF.get('cloudinit_location'), - "%s.cloudinit" % datastore_manager) - if os.path.isfile(cloudinit): - with open(cloudinit, "r") as f: - userdata = f.read() - return userdata - def _create_server(self, flavor_id, image_id, datastore_manager, block_device_mapping_v2, availability_zone, nics, files={}, scheduler_hints=None): - userdata = self._prepare_userdata(datastore_manager) + userdata = self.prepare_userdata(datastore_manager) name = self.hostname or self.name bdmap_v2 = block_device_mapping_v2 config_drive = CONF.use_nova_server_config_drive @@ -1126,6 +1116,11 @@ class BuiltInstanceTasks(BuiltInstance, NotifyMixin, ConfigurationMixin): action = MigrateAction(self, host) action.execute() + def rebuild(self, image_id): + LOG.info(f"Rebuilding instance {self.id}, new image {image_id}") + action = RebuildAction(self, image_id) + action.execute() + def create_backup(self, backup_info): LOG.info("Initiating backup for instance %s, backup_info: %s", self.id, backup_info) @@ -1747,6 +1742,8 @@ class ResizeActionBase(object): :type instance: trove.taskmanager.models.BuiltInstanceTasks """ self.instance = instance + self.wait_status = ['VERIFY_RESIZE'] + self.ignore_stop_error = False def _assert_guest_is_ok(self): # The guest will never set the status to PAUSED. @@ -1767,6 +1764,9 @@ class ResizeActionBase(object): "exp_status": 'VERIFY_RESIZE'} raise TroveError(msg) + def _assert_nova_action_was_successful(self): + pass + def _assert_datastore_is_ok(self): self._start_datastore() @@ -1797,11 +1797,22 @@ class ResizeActionBase(object): self.instance.id) self.instance.server.revert_resize() + def _record_action_success(self): + pass + def execute(self): """Initiates the action.""" try: LOG.debug("Instance %s calling stop_db...", self.instance.id) self.instance.guest.stop_db() + except Exception as e: + if self.ignore_stop_error: + LOG.warning(f"Failed to stop db {self.instance.id}, error: " + f"{str(e)}") + else: + raise + + try: self._perform_nova_action() finally: if self.instance.db_info.task_status != ( @@ -1855,15 +1866,20 @@ class ResizeActionBase(object): self.instance.id) def _wait_for_nova_action(self): - # Wait for the flavor to change. + LOG.info(f"Waiting for Nova server status changed to " + f"{self.wait_status}") + def update_server_info(): self.instance.refresh_compute_server_info() - return not self.instance.server_status_matches(['RESIZE']) + if self.instance.server.status.upper() == 'ERROR': + raise TroveError("Nova server is in ERROR status") + return self.instance.server_status_matches(self.wait_status) utils.poll_until( update_server_info, - sleep_time=3, - time_out=CONF.resize_time_out) + sleep_time=5, + time_out=CONF.resize_time_out, + initial_delay=10) def _wait_for_revert_nova_action(self): # Wait for the server to return to ACTIVE after revert. @@ -1926,7 +1942,9 @@ class ResizeAction(ResizeActionBase): def _start_datastore(self): config = self.instance._render_config(self.new_flavor) - self.instance.guest.start_db_with_conf_changes(config.config_contents) + self.instance.guest.start_db_with_conf_changes( + config.config_contents, + self.instance.datastore_version.name) class MigrateAction(ResizeActionBase): @@ -1956,6 +1974,59 @@ class MigrateAction(ResizeActionBase): self.instance.guest.restart() +class RebuildAction(ResizeActionBase): + def __init__(self, instance, image_id): + super(RebuildAction, self).__init__(instance) + self.image_id = image_id + self.ignore_stop_error = True + self.wait_status = ['ACTIVE'] + + def _initiate_nova_action(self): + files = self.instance.get_injected_files(self.instance.datastore.name) + + LOG.debug(f"Rebuilding Nova server {self.instance.server.id}") + # Before Nova version 2.57, userdata is not supported when doing + # rebuild, have to use injected files instead. + self.instance.server.rebuild( + self.image_id, + files=files, + ) + + def _assert_nova_status_is_ok(self): + pass + + def _assert_nova_action_was_successful(self): + if self.instance.server.image['id'] != self.image_id: + msg = (f"Assertion failed! The service image ID is " + f"{self.instance.server.image['id']} not {self.image_id}") + raise TroveError(msg) + + def _assert_processes_are_ok(self): + pass + + def _revert_nova_action(self): + pass + + def _wait_for_revert_nova_action(self): + pass + + def _confirm_nova_action(self): + """Send rebuild async request to the guest.""" + flavor = self.instance.nova_client.flavors.get(self.instance.flavor_id) + config = self.instance._render_config(flavor) + config_contents = config.config_contents + + overrides = {} + if self.instance.configuration: + overrides = self.instance.configuration. \ + get_configuration_overrides() + + LOG.info(f"Sending rebuild request to the instance {self.instance.id}") + self.instance.guest.rebuild( + self.instance.datastore_version.name, + config_contents=config_contents, config_overrides=overrides) + + def load_cluster_tasks(context, cluster_id): manager = Cluster.manager_from_cluster_id(context, cluster_id) strat = strategy.load_taskmanager_strategy(manager) diff --git a/trove/tests/api/instances_resize.py b/trove/tests/api/instances_resize.py index f9bec976fe..d0225c1b3a 100644 --- a/trove/tests/api/instances_resize.py +++ b/trove/tests/api/instances_resize.py @@ -114,7 +114,8 @@ class ResizeTests(ResizeTestBase): datastore.manager = 'mysql' config = template.SingleInstanceConfigTemplate( datastore, NEW_FLAVOR.__dict__, self.instance.id) - self.instance.guest.start_db_with_conf_changes(config.render()) + self.instance.guest.start_db_with_conf_changes(config.render(), + datastore.name) def test_guest_wont_stop_mysql(self): self.guest.stop_db.side_effect = RPCException("Could not stop MySQL!") diff --git a/trove/tests/api/replication.py b/trove/tests/api/replication.py index 0d4e341aee..2f5157fc63 100644 --- a/trove/tests/api/replication.py +++ b/trove/tests/api/replication.py @@ -50,7 +50,7 @@ backup_count = None def _get_user_count(server_info): cmd = ( - 'docker exec -e MYSQL_PWD=$(sudo cat /opt/trove-guestagent/root.cnf | ' + 'docker exec -e MYSQL_PWD=$(sudo cat /var/lib/mysql/conf.d/root.cnf | ' 'grep password | awk "{print \$3}") database mysql -uroot -N -e ' '"select count(*) from mysql.user where user like \\"slave_%\\""' ) @@ -68,7 +68,7 @@ def slave_is_running(running=True): server = create_server_connection(slave_instance.id) cmd = ( 'docker exec -e MYSQL_PWD=$(sudo cat ' - '/opt/trove-guestagent/root.cnf | grep password ' + '/var/lib/mysql/conf.d/root.cnf | grep password ' '| awk "{print \$3}") database mysql -uroot -N -e ' '"SELECT SERVICE_STATE FROM ' 'performance_schema.replication_connection_status"' @@ -198,7 +198,7 @@ class VerifySlave(object): """test_slave_is_read_only""" cmd = ( 'docker exec -e MYSQL_PWD=$(sudo cat ' - '/opt/trove-guestagent/root.cnf | grep password | ' + '/var/lib/mysql/conf.d/root.cnf | grep password | ' 'awk "{print \$3}") database mysql -uroot -NBq -e ' '"select @@read_only"' ) @@ -403,7 +403,7 @@ class DetachReplica(object): def check_not_read_only(): cmd = ( 'docker exec -e MYSQL_PWD=$(sudo cat ' - '/opt/trove-guestagent/root.cnf | grep password | ' + '/var/lib/mysql/conf.d/root.cnf | grep password | ' 'awk "{print \$3}") database mysql -uroot -NBq -e ' '"select @@read_only"' ) diff --git a/trove/tests/fakes/guestagent.py b/trove/tests/fakes/guestagent.py index dc22bb536d..554aff9a13 100644 --- a/trove/tests/fakes/guestagent.py +++ b/trove/tests/fakes/guestagent.py @@ -265,7 +265,7 @@ class FakeGuest(object): # There's nothing to do here, since there is no config to update. pass - def start_db_with_conf_changes(self, config_contents): + def start_db_with_conf_changes(self, config_contents, ds_version): time.sleep(2) self._set_task_status('HEALTHY') diff --git a/trove/tests/unittests/taskmanager/test_models.py b/trove/tests/unittests/taskmanager/test_models.py index d542b34bec..3c09876656 100644 --- a/trove/tests/unittests/taskmanager/test_models.py +++ b/trove/tests/unittests/taskmanager/test_models.py @@ -301,7 +301,7 @@ class FreshInstanceTasksTest(BaseFreshInstanceTasksTest): new_callable=PropertyMock, return_value='fake-hostname') def test_servers_create_block_device_mapping_v2(self, mock_hostname): - self.freshinstancetasks._prepare_userdata = Mock(return_value=None) + self.freshinstancetasks.prepare_userdata = Mock(return_value=None) mock_nova_client = self.freshinstancetasks.nova_client = Mock() mock_servers_create = mock_nova_client.servers.create self.freshinstancetasks._create_server('fake-flavor', 'fake-image', @@ -698,7 +698,7 @@ class BuiltInstanceTasksTest(trove_testtools.TestCase): def setUp(self): super(BuiltInstanceTasksTest, self).setUp() self.new_flavor = {'id': 8, 'ram': 768, 'name': 'bigger_flavor'} - stub_nova_server = MagicMock() + stub_nova_server = MagicMock(id='fake_id') self.rpc_patches = patch.multiple( rpc, get_notifier=MagicMock(), get_client=MagicMock()) self.rpc_mocks = self.rpc_patches.start() @@ -749,6 +749,7 @@ class BuiltInstanceTasksTest(trove_testtools.TestCase): self.stub_running_server.flavor = {'id': 6, 'ram': 512} self.stub_verifying_server = MagicMock( spec=novaclient.v2.servers.Server) + self.stub_verifying_server.id = 'fake_id' self.stub_verifying_server.status = 'VERIFY_RESIZE' self.stub_verifying_server.flavor = {'id': 8, 'ram': 768} self.stub_server_mgr.get = MagicMock(