[PoC] Call masakari APIs from a resource agent
This is a PoC of a resource agent that calls masakari APIs if instance's host failure is occurred. The purposes of this patch is below: - Show how to call the masakari APIs from nova-host-alerter. - Indicate the implementation of the masakari driver. This is just a PoC based on an idea called as "Modular architecture". On this patch, we assumed that nova-host-alerter has 'driver' param in the primitive definition, and that 'masakari' or 'mistral' is set for the driver param. And we plan to place masakari_driver.py and masakari_driver.conf in the same directory as nova-host-alerter. reference: * The idea of modular architecture https://aspiers.github.io/openstack-day-israel-2017-compute-ha/#/nova-host-alerter https://aspiers.github.io/openstack-day-israel-2017-compute-ha/#/modular * Specs of a method to recover all virtual machines https://github.com/openstack/openstack-resource-agents-specs/blob/master/specs/newton/approved/newton-instance-ha-host-monitoring-spec.rst https://github.com/openstack/openstack-resource-agents-specs/blob/master/specs/newton/approved/newton-instance-ha-host-recovery.rst https://review.openstack.org/#/c/406659/ Change-Id: I6768a1822ed5f19bc66f0d6d6887194bbc32abad Co-Authored-By: Kengo Takahara <takahara-kn@njk.co.jp> Story: 2002124 Task: 19803
This commit is contained in:
parent
42bb0c53e3
commit
5701a26a63
39
ocf/masakari_driver.conf
Normal file
39
ocf/masakari_driver.conf
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
[DEFAULT]
|
||||||
|
# Name of log file. (string value)
|
||||||
|
log_file = /var/tmp/masakari_driver.log
|
||||||
|
|
||||||
|
[api]
|
||||||
|
# Authentication URL (string value)
|
||||||
|
#auth_url = <None>
|
||||||
|
auth_url = http://192.168.10.20/identity
|
||||||
|
|
||||||
|
# Project name to scope to (string value)
|
||||||
|
# Deprecated group/name - [api]/tenant_name
|
||||||
|
#project_name = <None>
|
||||||
|
project_name = service
|
||||||
|
|
||||||
|
# Domain ID containing project (string value)
|
||||||
|
#project_domain_id = <None>
|
||||||
|
project_domain_id = default
|
||||||
|
|
||||||
|
# Username (string value)
|
||||||
|
# Deprecated group/name - [api]/user_name
|
||||||
|
#username = <None>
|
||||||
|
username = masakari
|
||||||
|
|
||||||
|
# User's domain id (string value)
|
||||||
|
#user_domain_id = <None>
|
||||||
|
user_domain_id = default
|
||||||
|
|
||||||
|
# User's password (string value)
|
||||||
|
#password = <None>
|
||||||
|
password = masakari
|
||||||
|
|
||||||
|
# Number of retries for send a notification. (integer value)
|
||||||
|
#api_retry_max = 12
|
||||||
|
api_retry_max = 3
|
||||||
|
|
||||||
|
# Trial interval of time of the notification processing is error(in seconds).
|
||||||
|
# (integer value)
|
||||||
|
#api_retry_interval = 10
|
||||||
|
api_retry_interval = 1
|
177
ocf/masakari_driver.py
Normal file
177
ocf/masakari_driver.py
Normal file
@ -0,0 +1,177 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import ConfigParser
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import eventlet
|
||||||
|
from keystoneauth1.identity.generic import password as ks_password
|
||||||
|
from keystoneauth1 import session as ks_session
|
||||||
|
from openstack import connection
|
||||||
|
from openstack import exceptions
|
||||||
|
from openstack import service_description
|
||||||
|
from oslo_config import cfg
|
||||||
|
from oslo_log import log
|
||||||
|
from oslo_utils import timeutils
|
||||||
|
|
||||||
|
from masakariclient.sdk.ha.v1 import _proxy
|
||||||
|
|
||||||
|
LOG = log.getLogger(__name__)
|
||||||
|
CONF = cfg.CONF
|
||||||
|
DOMAIN = "masakari_driver"
|
||||||
|
|
||||||
|
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
|
||||||
|
# NOTE: The config file (masakari_driver.conf) is assumed to exist
|
||||||
|
# in the same directory as this program file.
|
||||||
|
CONFIG_FILE = script_dir + "/masakari_driver.conf"
|
||||||
|
|
||||||
|
default_config = {
|
||||||
|
'log_file': None,
|
||||||
|
'auth_url': None,
|
||||||
|
'project_name': None,
|
||||||
|
'project_domain_id': None,
|
||||||
|
'username': None,
|
||||||
|
'user_domain_id': None,
|
||||||
|
'password': None,
|
||||||
|
'api_retry_max': 12,
|
||||||
|
'api_retry_interval': 10,
|
||||||
|
}
|
||||||
|
|
||||||
|
TYPE_COMPUTE_HOST = "COMPUTE_HOST"
|
||||||
|
EVENT_STOPPED = "STOPPED"
|
||||||
|
CLUSTER_STATUS_OFFLINE = "OFFLINE"
|
||||||
|
HOST_STATUS_NORMAL = "NORMAL"
|
||||||
|
SUCCESS = 0
|
||||||
|
FAILURE = 1
|
||||||
|
|
||||||
|
|
||||||
|
class MasakariDriver(object):
|
||||||
|
def __init__(self):
|
||||||
|
self._read_config()
|
||||||
|
self._setup_log()
|
||||||
|
|
||||||
|
def _read_config(self):
|
||||||
|
"""Read configuration file by using ConfigParser."""
|
||||||
|
|
||||||
|
# NOTE: At first I attempted to use oslo.config, but it required
|
||||||
|
# either '[--config-dir DIR]' or '[--config-file PATH]' for argument,
|
||||||
|
# and the hostname couldn't be passed as an argument.
|
||||||
|
# So I use ConfigParser.
|
||||||
|
inifile = ConfigParser.SafeConfigParser(default_config)
|
||||||
|
inifile.read(CONFIG_FILE)
|
||||||
|
|
||||||
|
self.log_file = inifile.get('DEFAULT', 'log_file')
|
||||||
|
self.auth_url = inifile.get('api', 'auth_url')
|
||||||
|
self.project_name = inifile.get('api', 'project_name')
|
||||||
|
self.project_domain_id = inifile.get('api', 'project_domain_id')
|
||||||
|
self.username = inifile.get('api', 'username')
|
||||||
|
self.user_domain_id = inifile.get('api', 'user_domain_id')
|
||||||
|
self.password = inifile.get('api', 'password')
|
||||||
|
self.api_retry_max = int(inifile.get('api', 'api_retry_max'))
|
||||||
|
self.api_retry_interval = int(inifile.get('api', 'api_retry_interval'))
|
||||||
|
|
||||||
|
def _setup_log(self):
|
||||||
|
"""Setup log"""
|
||||||
|
if self.log_file is not None:
|
||||||
|
CONF.log_file = self.log_file
|
||||||
|
|
||||||
|
log.register_options(CONF)
|
||||||
|
log.setup(CONF, DOMAIN)
|
||||||
|
|
||||||
|
def _make_client(self):
|
||||||
|
"""Make client for a notification."""
|
||||||
|
|
||||||
|
# NOTE: This function uses masakari-monitors's code as reference.
|
||||||
|
|
||||||
|
auth = ks_password.Password(
|
||||||
|
auth_url=self.auth_url,
|
||||||
|
username=self.username,
|
||||||
|
password=self.password,
|
||||||
|
user_domain_id=self.user_domain_id,
|
||||||
|
project_name=self.project_name,
|
||||||
|
project_domain_id=self.project_domain_id)
|
||||||
|
session = ks_session.Session(auth=auth)
|
||||||
|
|
||||||
|
desc = service_description.ServiceDescription(
|
||||||
|
service_type='ha', proxy_class=_proxy.Proxy)
|
||||||
|
conn = connection.Connection(
|
||||||
|
session=session, extra_services=[desc])
|
||||||
|
conn.add_service(desc)
|
||||||
|
|
||||||
|
client = conn.ha.proxy_class(
|
||||||
|
session=session, service_type='ha')
|
||||||
|
|
||||||
|
return client
|
||||||
|
|
||||||
|
def send_notification(self, failure_host):
|
||||||
|
"""Send a notification."""
|
||||||
|
|
||||||
|
# NOTE: This function uses masakari-monitors's code as reference.
|
||||||
|
|
||||||
|
# Make event.
|
||||||
|
current_time = timeutils.utcnow()
|
||||||
|
event = {
|
||||||
|
'notification': {
|
||||||
|
'type': TYPE_COMPUTE_HOST,
|
||||||
|
# Set hostname which was passed as argument.
|
||||||
|
'hostname': failure_host,
|
||||||
|
'generated_time': current_time,
|
||||||
|
'payload': {
|
||||||
|
'event': EVENT_STOPPED,
|
||||||
|
'cluster_status': CLUSTER_STATUS_OFFLINE,
|
||||||
|
'host_status': HOST_STATUS_NORMAL
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
LOG.info("Send a notification. %s", event)
|
||||||
|
|
||||||
|
# Get client.
|
||||||
|
client = self._make_client()
|
||||||
|
|
||||||
|
# Send a notification.
|
||||||
|
retry_count = 0
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
retval = SUCCESS
|
||||||
|
response = client.create_notification(
|
||||||
|
type=event['notification']['type'],
|
||||||
|
hostname=event['notification']['hostname'],
|
||||||
|
generated_time=event['notification']['generated_time'],
|
||||||
|
payload=event['notification']['payload'])
|
||||||
|
|
||||||
|
LOG.info("Response: %s", response)
|
||||||
|
break
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
if isinstance(e, exceptions.HttpException):
|
||||||
|
# If http_status is 409, skip the retry processing.
|
||||||
|
if e.status_code == 409:
|
||||||
|
msg = ("Stop retrying to send a notification because "
|
||||||
|
"same notification have been already sent.")
|
||||||
|
LOG.info("%s", msg)
|
||||||
|
break
|
||||||
|
|
||||||
|
retval = FAILURE
|
||||||
|
if retry_count < self.api_retry_max:
|
||||||
|
LOG.warning("Retry sending a notification. (%s)", e)
|
||||||
|
retry_count = retry_count + 1
|
||||||
|
eventlet.greenthread.sleep(self.api_retry_interval)
|
||||||
|
else:
|
||||||
|
LOG.exception("Exception caught: %s", e)
|
||||||
|
break
|
||||||
|
|
||||||
|
return retval
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
retval = 0
|
||||||
|
hosts = sys.stdin.read().splitlines()
|
||||||
|
|
||||||
|
if len(hosts) > 0:
|
||||||
|
masakari_driver = MasakariDriver()
|
||||||
|
for host in hosts:
|
||||||
|
retval += masakari_driver.send_notification(host)
|
||||||
|
|
||||||
|
# Exit code is number of notification failure.
|
||||||
|
sys.exit(retval)
|
115
ocf/nova-host-alerter
Normal file
115
ocf/nova-host-alerter
Normal file
@ -0,0 +1,115 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
#
|
||||||
|
|
||||||
|
#######################################################################
|
||||||
|
# Initialization:
|
||||||
|
|
||||||
|
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
|
||||||
|
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
|
||||||
|
|
||||||
|
# Define 'driver' parameter in metadata.
|
||||||
|
meta_data() {
|
||||||
|
cat <<END
|
||||||
|
<?xml version="1.0"?>
|
||||||
|
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
|
||||||
|
<resource-agent name="nova-host-alerter">
|
||||||
|
<version>1.0</version>
|
||||||
|
|
||||||
|
<longdesc lang="en">
|
||||||
|
hoge
|
||||||
|
</longdesc>
|
||||||
|
<shortdesc lang="en"></shortdesc>
|
||||||
|
|
||||||
|
<parameters>
|
||||||
|
<parameter name="driver" unique="1" required="1">
|
||||||
|
<longdesc lang="en">
|
||||||
|
Specify the driver of recovery. Currently, only "masakari" is supported.
|
||||||
|
</longdesc>
|
||||||
|
<shortdesc lang="en">Driver of recovery.</shortdesc>
|
||||||
|
<content type="string" />
|
||||||
|
</parameter>
|
||||||
|
</parameters>
|
||||||
|
|
||||||
|
<actions>
|
||||||
|
<action name="start" timeout="60" />
|
||||||
|
<action name="stop" timeout="60" />
|
||||||
|
<action name="status" timeout="60" />
|
||||||
|
<action name="monitor" interval="60" timeout="60" />
|
||||||
|
<action name="meta-data" timeout="5" />
|
||||||
|
</actions>
|
||||||
|
</resource-agent>
|
||||||
|
END
|
||||||
|
return $OCF_SUCCESS
|
||||||
|
}
|
||||||
|
|
||||||
|
SERVICE=nova-host-alerter
|
||||||
|
OP=$1
|
||||||
|
|
||||||
|
# NOTE: It is assumed that masakari_driver.py exists in same directory with
|
||||||
|
# nova-host-alerter.
|
||||||
|
SCRIPT_DIR=$(cd $(dirname $0);pwd)
|
||||||
|
MASAKARI_DRIVER=${SCRIPT_DIR}/masakari_driver.py
|
||||||
|
|
||||||
|
nova_host_alerter_start() {
|
||||||
|
touch "$statefile"
|
||||||
|
# Do not involve monitor here so that the start timeout can be low
|
||||||
|
return $?
|
||||||
|
}
|
||||||
|
|
||||||
|
nova_host_alerter_stop() {
|
||||||
|
rm -f "$statefile"
|
||||||
|
return $OCF_SUCCESS
|
||||||
|
}
|
||||||
|
|
||||||
|
nova_host_alerter_monitor() {
|
||||||
|
case "$OCF_RESKEY_driver" in
|
||||||
|
"masakari")
|
||||||
|
# Pass the list of failure hostnames to MASAKARI_DRIVERS.
|
||||||
|
attrd_updater -n evacuate -A \
|
||||||
|
2> >(grep -v "attribute does not exist" 1>&2) |
|
||||||
|
sed 's/ value=""/ value="no"/' |
|
||||||
|
tr '="' ' ' |
|
||||||
|
awk '{print $4" "$6}' |
|
||||||
|
$MASAKARI_DRIVER
|
||||||
|
retval=$?
|
||||||
|
if [ $retval -eq 0 ]; then
|
||||||
|
ocf_log info "Succeeded in sending a notification."
|
||||||
|
else
|
||||||
|
ocf_log err "$retval host failure notification(s) failed."
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
ocf_log err "Driver type '$OCF_RESKEY_driver' is not supported."
|
||||||
|
return $OCF_ERR_CONFIGURED
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
return $OCF_SUCCESS
|
||||||
|
}
|
||||||
|
|
||||||
|
statefile="${HA_RSCTMP}/${OCF_RESOURCE_INSTANCE}.active"
|
||||||
|
|
||||||
|
case $OP in
|
||||||
|
start)
|
||||||
|
nova_host_alerter_start
|
||||||
|
RC=$?
|
||||||
|
;;
|
||||||
|
stop)
|
||||||
|
nova_host_alerter_stop
|
||||||
|
RC=$?
|
||||||
|
;;
|
||||||
|
monitor)
|
||||||
|
nova_host_alerter_monitor
|
||||||
|
RC=$?
|
||||||
|
;;
|
||||||
|
meta-data)
|
||||||
|
meta_data
|
||||||
|
RC=$?
|
||||||
|
;;
|
||||||
|
validate-all)
|
||||||
|
RC=$OCF_SUCCESS
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
RC=$OCF_ERR_UNIMPLEMENTED
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
exit $RC
|
Loading…
Reference in New Issue
Block a user