Adding Fencing plugable system

Change-Id: I23988cc1c09b08a8e72215bffbb55aa1a7ae4afe
This commit is contained in:
Saad Zaher 2015-12-23 14:51:09 +00:00
parent f911b2965f
commit 788b98dee5
19 changed files with 386 additions and 84 deletions

View File

View File

View File

@ -0,0 +1,5 @@
[DEFAULT]
output_file = etc/osha.conf.sample
wrap_width = 79
namespace = osha
namespace = oslo.log

View File

@ -11,49 +11,48 @@
# From oslo.log
#
# Print debugging output (set logging level to DEBUG instead of
# default WARNING level). (boolean value)
# Print debugging output (set logging level to DEBUG instead of default WARNING
# level). (boolean value)
#debug = false
# Print more verbose output (set logging level to INFO instead of
# default WARNING level). (boolean value)
# Print more verbose output (set logging level to INFO instead of default
# WARNING level). (boolean value)
#verbose = false
# The name of a logging configuration file. This file is appended to
# any existing logging configuration files. For details about logging
# configuration files, see the Python logging module documentation.
# (string value)
# The name of a logging configuration file. This file is appended to any
# existing logging configuration files. For details about logging configuration
# files, see the Python logging module documentation. (string value)
# Deprecated group/name - [DEFAULT]/log_config
#log_config_append = <None>
# DEPRECATED. A logging.Formatter log message format string which may
# use any of the available logging.LogRecord attributes. This option
# is deprecated. Please use logging_context_format_string and
# logging_default_format_string instead. (string value)
# DEPRECATED. A logging.Formatter log message format string which may use any
# of the available logging.LogRecord attributes. This option is deprecated.
# Please use logging_context_format_string and logging_default_format_string
# instead. (string value)
#log_format = <None>
# Format string for %%(asctime)s in log records. Default: %(default)s
# . (string value)
# Format string for %%(asctime)s in log records. Default: %(default)s . (string
# value)
#log_date_format = %Y-%m-%d %H:%M:%S
# (Optional) Name of log file to output to. If no default is set,
# logging will go to stdout. (string value)
# (Optional) Name of log file to output to. If no default is set, logging will
# go to stdout. (string value)
# Deprecated group/name - [DEFAULT]/logfile
#log_file = <None>
# (Optional) The base directory used for relative --log-file paths.
# (string value)
# (Optional) The base directory used for relative --log-file paths. (string
# value)
# Deprecated group/name - [DEFAULT]/logdir
#log_dir = <None>
# Use syslog for logging. Existing syslog format is DEPRECATED during
# I, and will change in J to honor RFC5424. (boolean value)
# Use syslog for logging. Existing syslog format is DEPRECATED during I, and
# will change in J to honor RFC5424. (boolean value)
#use_syslog = false
# (Optional) Enables or disables syslog rfc5424 format for logging. If
# enabled, prefixes the MSG part of the syslog message with APP-NAME
# (RFC5424). The format without the APP-NAME is deprecated in I, and
# will be removed in J. (boolean value)
# (Optional) Enables or disables syslog rfc5424 format for logging. If enabled,
# prefixes the MSG part of the syslog message with APP-NAME (RFC5424). The
# format without the APP-NAME is deprecated in I, and will be removed in J.
# (boolean value)
#use_syslog_rfc_format = false
# Syslog facility to receive log lines. (string value)
@ -65,15 +64,13 @@
# Format string to use for log messages with context. (string value)
#logging_context_format_string = %(asctime)s.%(msecs)03d %(process)d %(levelname)s %(name)s [%(request_id)s %(user_identity)s] %(instance)s%(message)s
# Format string to use for log messages without context. (string
# value)
# Format string to use for log messages without context. (string value)
#logging_default_format_string = %(asctime)s.%(msecs)03d %(process)d %(levelname)s %(name)s [-] %(instance)s%(message)s
# Data to append to log format when level is DEBUG. (string value)
#logging_debug_format_suffix = %(funcName)s %(pathname)s:%(lineno)d
# Prefix each line of exception output with this format. (string
# value)
# Prefix each line of exception output with this format. (string value)
#logging_exception_prefix = %(asctime)s.%(msecs)03d %(process)d TRACE %(name)s %(instance)s
# List of logger=LEVEL pairs. (list value)
@ -85,47 +82,68 @@
# Enables or disables fatal status of deprecations. (boolean value)
#fatal_deprecations = false
# The format for an instance that is passed with the log message.
# (string value)
# The format for an instance that is passed with the log message. (string
# value)
#instance_format = "[instance: %(uuid)s] "
# The format for an instance UUID that is passed with the log message.
# (string value)
# The format for an instance UUID that is passed with the log message. (string
# value)
#instance_uuid_format = "[instance: %(uuid)s] "
[fencor]
#
# From osha
#
# YAML File contains the required credentials for compute nodes (string value)
#credentials_file = <None>
# Number of retries to fence the each compute node. (integer value)
#retries = 1
# Time in seconds to wait between retries (integer value)
#hold_period = 10
# Choose the best fencor driver i.e.(ipmi, libvirt, .. (string value)
#driver = osha.fencors.drivers.ipmi.driver.IpmiDriver
# List of kwargs to customize the fencor operation. You fencor driver should
# support these options. Options should be in key:value format (dict value)
#options =
[keystone]
#
# From osha
#
# Name used for authentication with the OpenStack Identity service.
# Defaults to env[OS_USERNAME]. (string value)
# Name used for authentication with the OpenStack Identity service. Defaults to
# env[OS_USERNAME]. (string value)
#os_username =
# Password used for authentication with the OpenStack Identity
# service. Defaults to env[OS_PASSWORD]. (string value)
# Password used for authentication with the OpenStack Identity service.
# Defaults to env[OS_PASSWORD]. (string value)
#os_password =
# Project name to scope to. Defaults to env[OS_PROJECT_NAME]. (string
# value)
# Project name to scope to. Defaults to env[OS_PROJECT_NAME]. (string value)
#os_project_name =
# Domain name containing project. Defaults to
# env[OS_PROJECT_DOMAIN_NAME]. (string value)
# Domain name containing project. Defaults to env[OS_PROJECT_DOMAIN_NAME].
# (string value)
#os_project_domain_name =
# User's domain name. Defaults to env[OS_USER_DOMAIN_NAME]. (string
# value)
# User's domain name. Defaults to env[OS_USER_DOMAIN_NAME]. (string value)
#os_user_domain_name =
# Tenant to request authorization on. Defaults to env[OS_TENANT_NAME].
# (string value)
# Tenant to request authorization on. Defaults to env[OS_TENANT_NAME]. (string
# value)
#os_tenant_name =
# Tenant to request authorization on. Defaults to env[OS_TENANT_ID].
# (string value)
# Tenant to request authorization on. Defaults to env[OS_TENANT_ID]. (string
# value)
#os_tenant_id =
# Specify the Identity endpoint to use for authentication. Defaults to
@ -136,22 +154,20 @@
# env[OS_BACKUP_URL]. (string value)
#os_backup_url =
# Specify the region to use. Defaults to env[OS_REGION_NAME]. (string
# value)
# Specify the region to use. Defaults to env[OS_REGION_NAME]. (string value)
#os_region_name =
# Specify an existing token to use instead of retrieving one via
# authentication (e.g. with username & password). Defaults to
# env[OS_TOKEN]. (string value)
# Specify an existing token to use instead of retrieving one via authentication
# (e.g. with username & password). Defaults to env[OS_TOKEN]. (string value)
#os_token =
# Identity API version: 2.0 or 3. Defaults to
# env[OS_IDENTITY_API_VERSION] (string value)
# Identity API version: 2.0 or 3. Defaults to env[OS_IDENTITY_API_VERSION]
# (string value)
#os_identity_api_version =
# Endpoint type to select. Valid endpoint types: "public" or
# "publicURL", "internal" or "internalURL", "admin" or "adminURL".
# Defaults to env[OS_ENDPOINT_TYPE] or "public" (string value)
# Endpoint type to select. Valid endpoint types: "public" or "publicURL",
# "internal" or "internalURL", "admin" or "adminURL". Defaults to
# env[OS_ENDPOINT_TYPE] or "public" (string value)
# Allowed values: public, publicURL, internal, internalURL, admin, adminURL
#os_endpoint_type = public
@ -163,19 +179,17 @@
#
# Driver used to get a status updates of compute nodes (string value)
#driver = osha.monitors.plugins.osha.OshaDriver
#driver = osha.monitors.drivers.osha.driver.OshaDriver
# username to be used to initialize the monitoring driver (string
# value)
# username to be used to initialize the monitoring driver (string value)
#username = <None>
# Password to be used for initializing monitoring driver (string
# value)
# Password to be used for initializing monitoring driver (string value)
#password = <None>
# Monitoring system API endpoint (string value)
#endpoint = <None>
# List of kwargs if you want to pass it to initialize the monitoring
# driver (dict value)
#kwargs = <None>
# List of kwargs if you want to pass it to initialize the monitoring driver.
# should be provided in key:value format (dict value)
#kwargs =

View File

@ -21,7 +21,7 @@ CONF = cfg.CONF
_MONITORS = [
cfg.StrOpt('driver',
default='osha.monitors.plugins.osha.OshaDriver',
default='osha.monitors.drivers.osha.driver.OshaDriver',
help='Driver used to get a status updates of compute nodes'),
cfg.StrOpt('username',
help='username to be used to initialize the monitoring driver'),
@ -32,7 +32,8 @@ _MONITORS = [
cfg.DictOpt('kwargs',
default={},
help='List of kwargs if you want to pass it to initialize'
' the monitoring driver')
' the monitoring driver. should be provided in key:value '
'format')
]
@ -42,6 +43,29 @@ _COMMON = [
help='Time to wait between different operations')
]
_FENCOR = [
cfg.StrOpt('credentials-file',
help='YAML File contains the required credentials for compute '
'nodes'),
cfg.IntOpt('retries',
default=1,
help='Number of retries to fence the each compute node. Must be'
' at least 1 to try first the soft shutdown'),
cfg.IntOpt('hold-period',
default=10,
help='Time in seconds to wait between retries. Should be '
'reasonable amount of time as different servers take '
'different times to shut off'),
cfg.StrOpt('driver',
default='osha.fencors.drivers.ipmi.driver.IpmiDriver',
help='Choose the best fencor driver i.e.(ipmi, libvirt, ..'),
cfg.DictOpt('options',
default={},
help='List of kwargs to customize the fencor operation. You '
'fencor driver should support these options. Options '
'should be in key:value format')
]
def build_os_options():
osclient_opts = [
@ -130,6 +154,13 @@ def configure():
CONF.register_group(monitors_grp)
CONF.register_opts(_MONITORS, group='monitoring')
fencors_grp = cfg.OptGroup('fencor',
title='Fencor Options',
help='Fencor Driver/plugin to be used to '
'fence compute nodes')
CONF.register_group(fencors_grp)
CONF.register_opts(_FENCOR, group='fencor')
default_conf = cfg.find_config_files('osha', 'osha',
'.conf')
log.register_options(CONF)
@ -160,7 +191,8 @@ def list_opts():
_OPTS = {
None: _COMMON,
'monitoring': _MONITORS,
'keystone': build_os_options()
'keystone': build_os_options(),
'fencor': _FENCOR
}
return _OPTS.items()

View File

@ -1,4 +1,16 @@
# __author__ = 'saad'
# (c) Copyright 2014,2015 Hewlett-Packard Development Company, L.P.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import yaml
import os
@ -20,9 +32,9 @@ class YamlParser(object):
def parse(self):
if not self.file:
raise "No file specified !"
raise Exception('No file specified !')
if not os.path.exists(self.file) or not os.path.isfile(self.file):
raise "File desn't exists"
raise Exception('File desnot exists')
stream = file(self.file, 'r')
data = yaml.load(stream)
@ -57,4 +69,4 @@ class YamlParser(object):
if server.get(key) == value:
return server
return None
return None

1
osha/fencors/__init__.py Normal file
View File

@ -0,0 +1 @@
__author__ = 'saad'

View File

@ -0,0 +1 @@
__author__ = 'saad'

View File

@ -0,0 +1,66 @@
# (c) Copyright 2014,2015 Hewlett-Packard Development Company, L.P.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import abc
import six
@six.add_metaclass(abc.ABCMeta)
class FencorBaseDriver(object):
"""
Abstract class that all fencor plugins should implement to have a
unified interface and as many plugins as we want...
"""
def __init__(self, node_ip, node_username, node_password, **kwargs):
"""
Initializing the driver. Any fencor driver requires the following
parameters to do the api calls. All these parameters can be passed from
the configuration file in /etc/osha/osha.conf (default)
:param credentials_file: path to the credentials file
(/etc/osha/servers.yml) ?
:param kwargs: any additional parameters can be passed using this config
option.
"""
self.username = node_username
self.password = node_password
self.ip = node_ip
self.kwargs = kwargs
@abc.abstractmethod
def graceful_shutdown(self):
"""
Gracefully shutdown the compute node to evacuate it.
"""
@abc.abstractmethod
def force_shutdown(self):
"""
Force shutdown the compute node to evacuate it. May be you can try force
shutdown if the graceful shutdown failed
"""
@abc.abstractmethod
def status(self):
"""
Get compute node status. should return 1 if on and 0 if off or
-1 if error or unknown power status
"""
@abc.abstractmethod
def get_info(self):
"""
Get Driver information ..
:return: dict of name, version, author, ...
"""

View File

@ -0,0 +1,76 @@
# (c) Copyright 2014,2015 Hewlett-Packard Development Company, L.P.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from oslo_config import cfg
from oslo_log import log
from oslo_utils import importutils
from osha.common.yaml_parser import YamlParser
from time import sleep
CONF = cfg.CONF
LOG = log.getLogger(__name__)
class FencorManager(object):
def __init__(self, nodes):
self.fencor = CONF.get('fencor')
self.nodes = nodes
self.parser = YamlParser(self.fencor.get('credentials_file'))
def fence(self):
"""
Try to shutdown nodes and wait for configurable amount of times
:return: list of nodes and either they are shutdown or failed
"""
processed_nodes = []
for node in self.nodes:
node_details = self.parser.find_server_by_ip(node.get('ip'))
driver = importutils.import_object(
self.fencor.get('driver'),
node_details.get('fencor-ip'),
node_details.get('fencor-user'),
node_details.get('fencor-password'),
**self.fencor.get('options')
)
node['status'] = self.do_shutdown_procedure(driver)
print "Shit Happens", driver.status()
processed_nodes.append(node)
return processed_nodes
def do_shutdown_procedure(self, driver):
for retry in range(0, self.fencor.get('retries', 1)):
if driver.status():
try:
driver.graceful_shutdown()
except Exception as e:
LOG.error(e)
else:
return True
# try to wait a pre-configured amount of time before redoing
# the fence call again :)
sleep(self.fencor.get('hold_period', 10))
LOG.info('wait for %d seconds before retrying to gracefully '
'shutdown' % self.fencor.get('hold_period', 10))
LOG.info('Retrying to gracefully shutdown the node.')
try:
driver.force_shutdown()
except Exception as e:
LOG.error(e)
if not driver.status():
return True
return False

View File

@ -0,0 +1 @@
__author__ = 'saad'

View File

@ -0,0 +1 @@
__author__ = 'saad'

View File

@ -0,0 +1,56 @@
# (c) Copyright 2014,2015 Hewlett-Packard Development Company, L.P.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from osha.fencors.common.driver import FencorBaseDriver
from osha.fencors.drivers.ipmi.ipmitool import IpmiInterface
from oslo_log import log
from oslo_config import cfg
CONF = cfg.CONF
LOG = log.getLogger(__name__)
class IpmiDriver(FencorBaseDriver):
def __init__(self, node_ip, node_username, node_password, **kwargs):
super(IpmiDriver, self).__init__(node_ip, node_username, node_password,
**kwargs)
self.interface = IpmiInterface(node_ip, node_username, node_password,
verbose=CONF.debug)
def force_shutdown(self):
try:
self.interface.power_down()
except Exception as e:
LOG.error(e)
def graceful_shutdown(self):
try:
self.interface.power_soft()
except Exception as e:
LOG.error(e)
def status(self):
return self.interface.get_power_status()
# @todo remove this fn as it's for testing purposes only :)
def power_on(self):
self.interface.power_on()
def get_info(self):
return {
'name': 'IPMI Interface driver',
'version': 1.0,
'author': 'Hewlett-Packard Development Company, L.P'
}

View File

@ -1,7 +1,23 @@
# __author__ = 'saad'
# (c) Copyright 2014,2015 Hewlett-Packard Development Company, L.P.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import subprocess
from distutils import spawn
from oslo_log import log
LOG = log.getLogger(__name__)
class IpmiInterface:
@ -31,6 +47,7 @@ class IpmiInterface:
password=password,
interface=interface
)
LOG.debug('IPMI Interface initialized')
def _update_cmd_credentials(self, host, username, password, interface):
"""
@ -58,7 +75,7 @@ class IpmiInterface:
cmd = self._cmd + ' chassis power status'
output = self._process_request(cmd)
if self._verbose:
print "[Debug]: ", output
LOG.debug(output)
if 'is on'.lower() in output.lower():
return 1
elif 'is off'.lower() in output.lower():
@ -67,10 +84,22 @@ class IpmiInterface:
def power_down(self):
"""
shutdown the machine
Force shutdown the machine
"""
cmd = self._cmd + ' chassis power down'
output = self._process_request(cmd)
LOG.info('IPMI interface force shutdown node: %s, output: %s' %
(self._host, output))
return output
def power_soft(self):
"""
Softly shutdown the machine
"""
cmd = self._cmd + 'chassis power soft'
output = self._process_request(cmd)
LOG.info('IPMI interface soft shutdown node: %s, output: %s' %
(self._host, output))
return output
def power_reset(self):
@ -89,7 +118,7 @@ class IpmiInterface:
def _process_request(self, cmd):
if self._verbose:
print "Executing IPMI command: ", cmd
LOG.debug('Executing IPMI command:', cmd)
process = subprocess.Popen(cmd, shell=True,
stdout=subprocess.PIPE,
@ -97,10 +126,11 @@ class IpmiInterface:
output, error = process.communicate()
if self._verbose:
print "[Debug] Process Output: ", output
print "[Debug] Process Error: ", error
LOG.debug('IPMI Output: ', output)
LOG.debug('IPMI Error', error)
if process.returncode:
LOG.error(cmd)
raise Exception(error)
return output
@ -112,4 +142,7 @@ class IpmiInterface:
:return: output of the command you sent or raise error
"""
cmd = self._cmd + cmd
return self._process_request(cmd)
output = self._process_request(cmd)
LOG.info('Executing IPMI custom command: %s with output: %s' %
(cmd, output))
return output

View File

@ -14,8 +14,8 @@
from osha.common import config
from oslo_config import cfg
from oslo_log import log
from oslo_utils import importutils
from osha.monitors.common.manager import MonitorManager
from osha.fencors.common.manager import FencorManager
CONF = cfg.CONF
LOG = log.getLogger(__name__)
@ -30,7 +30,11 @@ def main():
# Do the monitoring procedure
# Monitor, analyse, nodes down ?, wait, double check ? evacuate ..
nodes = monitor.monitor()
print "Evacuate those nodes:> ", nodes
if nodes:
# evacuate process goes here !
pass
# @todo put node in maintenance mode :) Not working with virtual
# deployments
# Load Fence driver
# Shutdown the node
fencor = FencorManager(nodes)
nodes = fencor.fence()
print "Fenced nodes are", nodes

View File

@ -23,7 +23,7 @@ class MonitorBaseDriver(object):
unified interface and as many plugins as we want...
"""
def __init__(self, username, password, endpoint, kwargs):
def __init__(self, username, password, endpoint, **kwargs):
"""
Initializing the driver. Any monitoring system requires the following
parameters to call it's api. All these parameters can be passed from the

View File

@ -26,7 +26,7 @@ LOG = log.getLogger(__name__)
class OshaDriver(MonitorBaseDriver):
def __init__(self, username, password, endpoint, **kwargs):
super(OshaDriver, self).__init__(username, password, endpoint, kwargs)
super(OshaDriver, self).__init__(username, password, endpoint, **kwargs)
client = OSClient(
authurl=endpoint,
username=username,