Browse Source

Disable nova-compute for not maintained hosts

By disabling nova-compute all instances created or migrated will only land
on maintaned host that will have nova-compute enabled.
In Nova terms disabled nova-compute also means the host is in maintenance.

Story: 2003848
Task: #26641

Change-Id: I8b05de9bd3653187fc210700b5b39faeb2fffc03
Signed-off-by: Tomi Juvonen <tomi.juvonen@nokia.com>
changes/91/620891/4
Tomi Juvonen 4 years ago
parent
commit
9eba5512ea
  1. 1
      fenix/db/migration/alembic_migrations/versions/001_initial.py
  2. 2
      fenix/db/sqlalchemy/models.py
  3. 61
      fenix/workflow/workflow.py
  4. 45
      fenix/workflow/workflows/default.py

1
fenix/db/migration/alembic_migrations/versions/001_initial.py

@ -56,6 +56,7 @@ def upgrade():
sa.Column('type', sa.String(length=32), nullable=True),
sa.Column('maintained', sa.Boolean, default=False),
sa.Column('disabled', sa.Boolean, default=False),
sa.Column('details', sa.String(length=255), nullable=True),
sa.UniqueConstraint('session_id', 'hostname', name='_session_host_uc'),
sa.PrimaryKeyConstraint('id'))

2
fenix/db/sqlalchemy/models.py

@ -81,6 +81,8 @@ class MaintenanceHost(mb.FenixBase):
hostname = sa.Column(sa.String(length=255), primary_key=True)
type = sa.Column(sa.String(length=32), nullable=True)
maintained = sa.Column(sa.Boolean, default=False)
disabled = sa.Column(sa.Boolean, default=False)
details = sa.Column(sa.String(length=255), nullable=True)
def to_dict(self):
return super(MaintenanceHost, self).to_dict()

61
fenix/workflow/workflow.py

@ -46,7 +46,6 @@ class BaseWorkflow(Thread):
self.hosts = self.init_hosts(self.convert(data['hosts']))
else:
self.hosts = []
LOG.info('%s: hosts %s' % (self.session_id, self.hosts))
# TBD API to support action plugins
# self.actions =
self.projects = []
@ -122,10 +121,32 @@ class BaseWorkflow(Thread):
def get_maintained_hosts(self):
return [host.hostname for host in self.hosts if host.maintained]
def get_disabled_hosts(self):
return [host for host in self.hosts if host.disabled]
def get_host_by_name(self, hostname):
host_obj = [host for host in self.hosts if
host.hostname == hostname]
if host_obj:
if len(host_obj) == 1:
return host_obj[0]
else:
raise Exception('get_host_by_name: %s has duplicate entries' %
hostname)
else:
raise Exception('get_host_by_name: %s not found' % hostname)
def host_maintained(self, hostname):
host_obj = [host for host in self.hosts if
host.hostname == hostname][0]
host_obj.maintained = True
host.hostname == hostname]
if host_obj:
if len(host_obj) == 1:
host_obj[0].maintained = True
else:
raise Exception('host_maintained: %s has duplicate entries' %
hostname)
else:
raise Exception('host_maintained: %s not found' % hostname)
def add_instance(self, instance):
return db_api.create_instance(instance)
@ -139,8 +160,16 @@ class BaseWorkflow(Thread):
db_api.remove_instance(self.session_id, instance_id)
def project(self, project_id):
return ([project for project in self.projects if
project.project_id == project_id][0])
project = ([project for project in self.projects if
project.project_id == project_id])
if project:
if len(project) == 1:
return project[0]
else:
raise Exception('project: %s has duplicate entries' %
project_id)
else:
raise Exception('project: %s not found' % project_id)
def project_names(self):
return [project.project_id for project in self.projects]
@ -233,13 +262,27 @@ class BaseWorkflow(Thread):
def instance_by_name(self, instance_name):
instance = [instance for instance in self.instances if
instance.instance_name == instance_name][0]
return instance
instance.instance_name == instance_name]
if instance:
if len(instance) == 1:
return instance[0]
else:
raise Exception('instance_by_name: %s has duplicate entries' %
instance_name)
else:
raise Exception('instance_by_name: %s not found' % instance_name)
def instance_by_id(self, instance_id):
instance = [instance for instance in self.instances if
instance.instance_id == instance_id][0]
return instance
instance.instance_id == instance_id]
if instance:
if len(instance) == 1:
return instance[0]
else:
raise Exception('instance_by_id: %s has duplicate entries' %
instance_id)
else:
raise Exception('instance_by_id: %s not found' % instance_id)
def __str__(self):
info = 'Instance info:\n'

45
fenix/workflow/workflows/default.py

@ -21,6 +21,7 @@ from novaclient.exceptions import BadRequest
from oslo_log import log as logging
import time
from fenix.db import api as db_api
from fenix.utils.time import datetime_to_str
from fenix.utils.time import is_time_after_time
from fenix.utils.time import reply_time_str
@ -36,7 +37,13 @@ class Workflow(BaseWorkflow):
def __init__(self, conf, session_id, data):
super(Workflow, self).__init__(conf, session_id, data)
self.nova = novaclient.Client(nova_max_version.get_string(),
nova_version = nova_max_version.get_string()
if float(nova_version) < 2.53:
LOG.error("%s: initialize failed. Nova version %s too old" %
(self.session_id, nova_version))
raise Exception("%s: initialize failed. Nova version too old" %
self.session_id)
self.nova = novaclient.Client(nova_version,
session=self.auth_session)
self._init_update_hosts()
LOG.info("%s: initialized" % self.session_id)
@ -51,8 +58,11 @@ class Workflow(BaseWorkflow):
if match:
host.type = 'compute'
if match[0].status == 'disabled':
LOG.info("compute status from services")
host.disabled = True
LOG.error("%s: %s nova-compute disabled before maintenance"
% (self.session_id, hostname))
raise Exception("%s: %s already disabled"
% (self.session_id, hostname))
host.details = match[0].id
continue
if ([controller for controller in controllers if
hostname == controller.host]):
@ -60,6 +70,20 @@ class Workflow(BaseWorkflow):
continue
host.type = 'other'
def disable_host_nova_compute(self, hostname):
LOG.info('%s: disable nova-compute on host %s' % (self.session_id,
hostname))
host = self.get_host_by_name(hostname)
self.nova.services.disable_log_reason(host.details, 'maintenance')
host.disabled = True
def enable_host_nova_compute(self, hostname):
LOG.info('%s: enable nova-compute on host %s' % (self.session_id,
hostname))
host = self.get_host_by_name(hostname)
self.nova.services.enable(host.details)
host.disabled = False
def get_compute_hosts(self):
return [host.hostname for host in self.hosts
if host.type == 'compute']
@ -558,6 +582,12 @@ class Workflow(BaseWorkflow):
return
maintained_hosts = self.get_maintained_hosts()
if not maintained_hosts:
computes = self.get_compute_hosts()
for compute in computes:
# When we start to maintain compute hosts, all these hosts
# nova-compute service is disabled, so projects cannot have
# instances scheduled to not maintained hosts
self.disable_host_nova_compute(compute)
# First we maintain all empty hosts
for host in empty_hosts:
# TBD we wait host VCPUs to report right, but this is not
@ -573,6 +603,8 @@ class Workflow(BaseWorkflow):
self._admin_notify(self.conf.workflow_project, host,
'MAINTENANCE_COMPLETE',
self.session_id)
self.enable_host_nova_compute(host)
LOG.info('MAINTENANCE_COMPLETE host %s' % host)
self.host_maintained(host)
else:
@ -590,8 +622,9 @@ class Workflow(BaseWorkflow):
self._admin_notify(self.conf.workflow_project, host,
'MAINTENANCE_COMPLETE',
self.session_id)
LOG.info('MAINTENANCE_COMPLETE host %s' % host)
self.enable_host_nova_compute(host)
LOG.info('MAINTENANCE_COMPLETE host %s' % host)
self.host_maintained(host)
maintained_hosts = self.get_maintained_hosts()
if len(maintained_hosts) != len(self.hosts):
@ -635,3 +668,7 @@ class Workflow(BaseWorkflow):
def maintenance_failed(self):
LOG.info("%s: maintenance_failed called" % self.session_id)
def cleanup(self):
LOG.info("%s: cleanup" % self.session_id)
db_api.remove_session(self.session_id)

Loading…
Cancel
Save