Keep user defined configuration after resizing instance

* Never remove user defined config, changing the function name from
  save_configuration to reset_configuration in trove-guestagent.
* Improved some logs
* Do not remove Innodb Log Files after resize which will cause error:
  Can't open and lock privilege tables: Table './mysql/user' is marked
  as crashed and should be repaired

Story: 2009033
Task: 42773
Change-Id: I9e3165ed9b38b15714542e35456415e65d438497
This commit is contained in:
Lingxian Kong 2021-07-07 09:34:17 +12:00
parent b050996b9f
commit c274ab9f1a
10 changed files with 58 additions and 49 deletions

View File

@ -0,0 +1,4 @@
---
fixes:
- Fixed an issue that the replication configuration is lost after resizing
instance.

View File

@ -408,7 +408,10 @@ class API(object):
self._call("restart", self.agent_high_timeout, version=version)
def start_db_with_conf_changes(self, config_contents, ds_version):
"""Start the database server."""
"""Start the database with given configuration.
This method is called after resize.
"""
LOG.debug("Sending the call to start the database process on "
"the Guest with a timeout of %s.",
self.agent_high_timeout)
@ -424,8 +427,10 @@ class API(object):
version=version, **start_args)
def reset_configuration(self, configuration):
"""Ignore running state of the database server; just change
the config file to a new flavor.
"""Reset the database base configuration.
Ignore running state of the database server, just change the config
file to a new flavor.
"""
LOG.debug("Sending the call to change the database conf file on the "
"Guest with a timeout of %s.",

View File

@ -132,20 +132,23 @@ class ConfigurationManager(object):
return base_options
def save_configuration(self, options):
def reset_configuration(self, options, remove_overrides=False):
"""Write given contents to the base configuration file.
Remove all existing overrides (both system and user).
:param options Contents of the configuration file.
:type options string or dict
Remove all existing overrides (both system and user) as required.
:param options: Contents of the configuration file (string or dict).
:param remove_overrides: Remove the overrides or not.
"""
if isinstance(options, dict):
# Serialize a dict of options for writing.
self.save_configuration(self._codec.serialize(options))
self.reset_configuration(self._codec.serialize(options),
remove_overrides=remove_overrides)
else:
self._override_strategy.remove(self.USER_GROUP)
self._override_strategy.remove(self.SYSTEM_PRE_USER_GROUP)
self._override_strategy.remove(self.SYSTEM_POST_USER_GROUP)
if remove_overrides:
self._override_strategy.remove(self.USER_GROUP)
self._override_strategy.remove(self.SYSTEM_PRE_USER_GROUP)
self._override_strategy.remove(self.SYSTEM_POST_USER_GROUP)
operating_system.write_file(
self._base_config_path, options, as_root=self._requires_root)

View File

@ -328,6 +328,10 @@ class Manager(periodic_task.PeriodicTasks):
pass
def start_db_with_conf_changes(self, context, config_contents, ds_version):
"""Start the database with given configuration.
This method is called after resize.
"""
self.app.start_db_with_conf_changes(config_contents, ds_version)
def stop_db(self, context):
@ -392,15 +396,17 @@ class Manager(periodic_task.PeriodicTasks):
# Configuration
###############
def reset_configuration(self, context, configuration):
"""The default implementation should be sufficient if a
"""Reset database base configuration.
The default implementation should be sufficient if a
configuration_manager is provided. Even if one is not, this
method needs to be implemented to allow the rollback of
flavor-resize on the guestagent side.
"""
LOG.info("Resetting configuration.")
if self.configuration_manager:
LOG.info("Resetting configuration.")
config_contents = configuration['config_contents']
self.configuration_manager.save_configuration(config_contents)
self.configuration_manager.reset_configuration(config_contents)
def apply_overrides_on_prepare(self, context, overrides):
self.update_overrides(context, overrides)

View File

@ -79,7 +79,7 @@ class MySqlManager(manager.Manager):
# Prepare mysql configuration
LOG.info('Preparing database configuration')
self.app.configuration_manager.save_configuration(config_contents)
self.app.configuration_manager.reset_configuration(config_contents)
self.app.update_overrides(overrides)
# Restore data from backup and reset root password
@ -310,7 +310,7 @@ class MySqlManager(manager.Manager):
try:
# Prepare mysql configuration
LOG.debug('Preparing database configuration')
self.app.configuration_manager.save_configuration(config_contents)
self.app.configuration_manager.reset_configuration(config_contents)
self.app.update_overrides(config_overrides)
# Start database service.

View File

@ -641,11 +641,6 @@ class BaseMySqlApp(service.BaseDbApp):
LOG.exception("Could not delete logfile.")
raise
def reset_configuration(self, configuration):
LOG.info("Resetting configuration.")
self.configuration_manager.save_configuration(configuration)
self.wipe_ib_logfiles()
def restart(self):
LOG.info("Restarting mysql")

View File

@ -54,7 +54,7 @@ class PostgresManager(manager.Manager):
as_root=True)
LOG.info('Preparing database config files')
self.app.configuration_manager.save_configuration(config_contents)
self.app.configuration_manager.reset_configuration(config_contents)
self.app.set_data_dir(self.app.datadir)
self.app.update_overrides(overrides)

View File

@ -147,9 +147,6 @@ class PgSqlApp(service.BaseDbApp):
if overrides:
self.configuration_manager.apply_user_override(overrides)
def reset_configuration(self, configuration):
self.configuration_manager.save_configuration(configuration)
def apply_overrides(self, overrides):
"""Reload config."""
cmd = "pg_ctl reload"

View File

@ -378,7 +378,8 @@ class BaseDbApp(object):
self.configuration_manager.remove_user_override()
def reset_configuration(self, configuration):
pass
LOG.info("Resetting configuration.")
self.configuration_manager.reset_configuration(configuration)
def stop_db(self, update_db=False):
LOG.info("Stopping database.")
@ -399,6 +400,10 @@ class BaseDbApp(object):
pass
def start_db_with_conf_changes(self, config_contents, ds_version):
"""Start the database with given configuration.
This method is called after resize.
"""
LOG.info(f"Starting database service with new configuration and "
f"datastore version {ds_version}.")

View File

@ -58,7 +58,6 @@ from trove.common.utils import try_recover
from trove.extensions.mysql import models as mysql_models
from trove.instance import models as inst_models
from trove.instance import service_status as srvstatus
from trove.instance.models import BuiltInstance
from trove.instance.models import DBInstance
from trove.instance.models import FreshInstance
from trove.instance.models import Instance
@ -1093,7 +1092,7 @@ class FreshInstanceTasks(FreshInstance, NotifyMixin, ConfigurationMixin):
return sg_id
class BuiltInstanceTasks(BuiltInstance, NotifyMixin, ConfigurationMixin):
class BuiltInstanceTasks(Instance, NotifyMixin, ConfigurationMixin):
"""
BuiltInstanceTasks contains the tasks related an instance that already
associated with a compute server.
@ -1193,7 +1192,7 @@ class BuiltInstanceTasks(BuiltInstance, NotifyMixin, ConfigurationMixin):
self.reset_task_status()
def attach_replica(self, master, restart=False):
LOG.info("Attaching replica %s to master %s", self.id, master.id)
LOG.info("Attaching replica %s to primary %s", self.id, master.id)
try:
replica_info = master.guest.get_replica_context()
flavor = self.nova_client.flavors.get(self.flavor_id)
@ -1856,15 +1855,17 @@ class ResizeActionBase(object):
pass
def _assert_datastore_is_ok(self):
LOG.info(f"Re-config database for instance {self.instance.id} after "
f"resize")
self._start_datastore()
def _assert_processes_are_ok(self):
"""Checks the procs; if anything is wrong, reverts the operation."""
# Tell the guest to turn back on, and make sure it can start.
LOG.info(f"Waiting for database status changed after resizing "
f"{self.instance.id}")
self._assert_guest_is_ok()
LOG.debug("Nova guest is ok.")
self._assert_datastore_is_ok()
LOG.debug("Datastore is ok.")
def _confirm_nova_action(self):
LOG.debug("Instance %s calling Compute confirm resize...",
@ -1893,7 +1894,7 @@ class ResizeActionBase(object):
def execute(self):
"""Initiates the action."""
try:
LOG.debug("Instance %s calling stop_db...", self.instance.id)
LOG.info(f"Stopping database service for {self.instance.id}")
self.instance.guest.stop_db(do_not_start_on_reboot=True)
except Exception as e:
if self.ignore_stop_error:
@ -1927,25 +1928,17 @@ class ResizeActionBase(object):
def _perform_nova_action(self):
"""Calls Nova to resize or migrate an instance, and confirms."""
LOG.debug("Begin resize method _perform_nova_action instance: %s",
self.instance.id)
need_to_revert = False
try:
LOG.debug("Initiating nova action")
self._initiate_nova_action()
LOG.debug("Waiting for nova action completed")
self._wait_for_nova_action()
LOG.debug("Asserting nova status is ok")
self._assert_nova_status_is_ok()
need_to_revert = True
LOG.debug("Asserting nova action success")
self._assert_nova_action_was_successful()
LOG.debug("Asserting processes are OK")
self._assert_processes_are_ok()
LOG.debug("Confirming nova action")
self._confirm_nova_action()
except Exception:
LOG.exception("Exception during nova action.")
LOG.exception(f"Failed to resize instance {self.instance.id}")
if need_to_revert:
LOG.error("Reverting action for instance %s",
self.instance.id)
@ -1953,13 +1946,12 @@ class ResizeActionBase(object):
self._wait_for_revert_nova_action()
if self.instance.server_status_matches(['ACTIVE']):
LOG.error("Restarting datastore.")
LOG.error(f"Restarting instance {self.instance.id}")
self.instance.guest.restart()
else:
LOG.error("Cannot restart datastore because "
"Nova server status is not ACTIVE")
LOG.error(f"Cannot restart instance {self.instance.id} "
f"because Nova server status is not ACTIVE")
LOG.error("Error resizing instance %s.", self.instance.id)
raise
self._record_action_success()
@ -1967,8 +1959,8 @@ class ResizeActionBase(object):
self.instance.id)
def _wait_for_nova_action(self):
LOG.info(f"Waiting for Nova server status changed to "
f"{self.wait_status}")
LOG.debug(f"Waiting for Nova server status changed to "
f"{self.wait_status} for {self.instance.id}")
def update_server_info():
self.instance.refresh_compute_server_info()
@ -2014,6 +2006,7 @@ class ResizeAction(ResizeActionBase):
raise TroveError(msg)
def _initiate_nova_action(self):
LOG.info(f"Resizing Nova server for instance {self.instance.id}")
self.instance.server.resize(self.new_flavor_id)
def _revert_nova_action(self):
@ -2087,7 +2080,7 @@ class RebuildAction(ResizeActionBase):
self.instance.datastore.name,
self.instance.datastore_version.name)
LOG.debug(f"Rebuilding Nova server {self.instance.server.id}")
LOG.info(f"Rebuilding Nova server for instance {self.instance.id}")
# Before Nova version 2.57, userdata is not supported when doing
# rebuild, have to use injected files instead.
self.instance.server.rebuild(
@ -2120,7 +2113,8 @@ class RebuildAction(ResizeActionBase):
self.instance.datastore_version.version,
config_contents=config_contents, config_overrides=overrides)
LOG.info(f"Waiting for instance {self.instance.id} healthy")
LOG.info(f"Waiting for instance {self.instance.id} healthy after "
f"rebuild")
self._assert_guest_is_ok()
self.wait_for_healthy()
LOG.info(f"Finished to rebuild {self.instance.id}")