diff --git a/fenix/db/migration/alembic_migrations/versions/001_initial.py b/fenix/db/migration/alembic_migrations/versions/001_initial.py index 59b87f5..b38ef79 100644 --- a/fenix/db/migration/alembic_migrations/versions/001_initial.py +++ b/fenix/db/migration/alembic_migrations/versions/001_initial.py @@ -56,6 +56,7 @@ def upgrade(): sa.Column('type', sa.String(length=32), nullable=True), sa.Column('maintained', sa.Boolean, default=False), sa.Column('disabled', sa.Boolean, default=False), + sa.Column('details', sa.String(length=255), nullable=True), sa.UniqueConstraint('session_id', 'hostname', name='_session_host_uc'), sa.PrimaryKeyConstraint('id')) diff --git a/fenix/db/sqlalchemy/models.py b/fenix/db/sqlalchemy/models.py index b6575e7..152e540 100644 --- a/fenix/db/sqlalchemy/models.py +++ b/fenix/db/sqlalchemy/models.py @@ -81,6 +81,8 @@ class MaintenanceHost(mb.FenixBase): hostname = sa.Column(sa.String(length=255), primary_key=True) type = sa.Column(sa.String(length=32), nullable=True) maintained = sa.Column(sa.Boolean, default=False) + disabled = sa.Column(sa.Boolean, default=False) + details = sa.Column(sa.String(length=255), nullable=True) def to_dict(self): return super(MaintenanceHost, self).to_dict() diff --git a/fenix/workflow/workflow.py b/fenix/workflow/workflow.py index 49b206e..9ebacdf 100644 --- a/fenix/workflow/workflow.py +++ b/fenix/workflow/workflow.py @@ -46,7 +46,6 @@ class BaseWorkflow(Thread): self.hosts = self.init_hosts(self.convert(data['hosts'])) else: self.hosts = [] - LOG.info('%s: hosts %s' % (self.session_id, self.hosts)) # TBD API to support action plugins # self.actions = self.projects = [] @@ -122,10 +121,32 @@ class BaseWorkflow(Thread): def get_maintained_hosts(self): return [host.hostname for host in self.hosts if host.maintained] + def get_disabled_hosts(self): + return [host for host in self.hosts if host.disabled] + + def get_host_by_name(self, hostname): + host_obj = [host for host in self.hosts if + host.hostname == hostname] + if host_obj: + if len(host_obj) == 1: + return host_obj[0] + else: + raise Exception('get_host_by_name: %s has duplicate entries' % + hostname) + else: + raise Exception('get_host_by_name: %s not found' % hostname) + def host_maintained(self, hostname): host_obj = [host for host in self.hosts if - host.hostname == hostname][0] - host_obj.maintained = True + host.hostname == hostname] + if host_obj: + if len(host_obj) == 1: + host_obj[0].maintained = True + else: + raise Exception('host_maintained: %s has duplicate entries' % + hostname) + else: + raise Exception('host_maintained: %s not found' % hostname) def add_instance(self, instance): return db_api.create_instance(instance) @@ -139,8 +160,16 @@ class BaseWorkflow(Thread): db_api.remove_instance(self.session_id, instance_id) def project(self, project_id): - return ([project for project in self.projects if - project.project_id == project_id][0]) + project = ([project for project in self.projects if + project.project_id == project_id]) + if project: + if len(project) == 1: + return project[0] + else: + raise Exception('project: %s has duplicate entries' % + project_id) + else: + raise Exception('project: %s not found' % project_id) def project_names(self): return [project.project_id for project in self.projects] @@ -233,13 +262,27 @@ class BaseWorkflow(Thread): def instance_by_name(self, instance_name): instance = [instance for instance in self.instances if - instance.instance_name == instance_name][0] - return instance + instance.instance_name == instance_name] + if instance: + if len(instance) == 1: + return instance[0] + else: + raise Exception('instance_by_name: %s has duplicate entries' % + instance_name) + else: + raise Exception('instance_by_name: %s not found' % instance_name) def instance_by_id(self, instance_id): instance = [instance for instance in self.instances if - instance.instance_id == instance_id][0] - return instance + instance.instance_id == instance_id] + if instance: + if len(instance) == 1: + return instance[0] + else: + raise Exception('instance_by_id: %s has duplicate entries' % + instance_id) + else: + raise Exception('instance_by_id: %s not found' % instance_id) def __str__(self): info = 'Instance info:\n' diff --git a/fenix/workflow/workflows/default.py b/fenix/workflow/workflows/default.py index 7e05f63..8c0d2d8 100644 --- a/fenix/workflow/workflows/default.py +++ b/fenix/workflow/workflows/default.py @@ -21,6 +21,7 @@ from novaclient.exceptions import BadRequest from oslo_log import log as logging import time +from fenix.db import api as db_api from fenix.utils.time import datetime_to_str from fenix.utils.time import is_time_after_time from fenix.utils.time import reply_time_str @@ -36,7 +37,13 @@ class Workflow(BaseWorkflow): def __init__(self, conf, session_id, data): super(Workflow, self).__init__(conf, session_id, data) - self.nova = novaclient.Client(nova_max_version.get_string(), + nova_version = nova_max_version.get_string() + if float(nova_version) < 2.53: + LOG.error("%s: initialize failed. Nova version %s too old" % + (self.session_id, nova_version)) + raise Exception("%s: initialize failed. Nova version too old" % + self.session_id) + self.nova = novaclient.Client(nova_version, session=self.auth_session) self._init_update_hosts() LOG.info("%s: initialized" % self.session_id) @@ -51,8 +58,11 @@ class Workflow(BaseWorkflow): if match: host.type = 'compute' if match[0].status == 'disabled': - LOG.info("compute status from services") - host.disabled = True + LOG.error("%s: %s nova-compute disabled before maintenance" + % (self.session_id, hostname)) + raise Exception("%s: %s already disabled" + % (self.session_id, hostname)) + host.details = match[0].id continue if ([controller for controller in controllers if hostname == controller.host]): @@ -60,6 +70,20 @@ class Workflow(BaseWorkflow): continue host.type = 'other' + def disable_host_nova_compute(self, hostname): + LOG.info('%s: disable nova-compute on host %s' % (self.session_id, + hostname)) + host = self.get_host_by_name(hostname) + self.nova.services.disable_log_reason(host.details, 'maintenance') + host.disabled = True + + def enable_host_nova_compute(self, hostname): + LOG.info('%s: enable nova-compute on host %s' % (self.session_id, + hostname)) + host = self.get_host_by_name(hostname) + self.nova.services.enable(host.details) + host.disabled = False + def get_compute_hosts(self): return [host.hostname for host in self.hosts if host.type == 'compute'] @@ -558,6 +582,12 @@ class Workflow(BaseWorkflow): return maintained_hosts = self.get_maintained_hosts() if not maintained_hosts: + computes = self.get_compute_hosts() + for compute in computes: + # When we start to maintain compute hosts, all these hosts + # nova-compute service is disabled, so projects cannot have + # instances scheduled to not maintained hosts + self.disable_host_nova_compute(compute) # First we maintain all empty hosts for host in empty_hosts: # TBD we wait host VCPUs to report right, but this is not @@ -573,6 +603,8 @@ class Workflow(BaseWorkflow): self._admin_notify(self.conf.workflow_project, host, 'MAINTENANCE_COMPLETE', self.session_id) + + self.enable_host_nova_compute(host) LOG.info('MAINTENANCE_COMPLETE host %s' % host) self.host_maintained(host) else: @@ -590,8 +622,9 @@ class Workflow(BaseWorkflow): self._admin_notify(self.conf.workflow_project, host, 'MAINTENANCE_COMPLETE', self.session_id) - LOG.info('MAINTENANCE_COMPLETE host %s' % host) + self.enable_host_nova_compute(host) + LOG.info('MAINTENANCE_COMPLETE host %s' % host) self.host_maintained(host) maintained_hosts = self.get_maintained_hosts() if len(maintained_hosts) != len(self.hosts): @@ -635,3 +668,7 @@ class Workflow(BaseWorkflow): def maintenance_failed(self): LOG.info("%s: maintenance_failed called" % self.session_id) + + def cleanup(self): + LOG.info("%s: cleanup" % self.session_id) + db_api.remove_session(self.session_id)