fuel-octane/octane/util/maintenance.py

244 lines
8.7 KiB
Python

# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import re
import time
from xml.etree import ElementTree
from octane import magic_consts
from octane.util import env as env_util
from octane.util import ssh
from octane.util import subprocess
def disable_apis(env):
controllers = list(env_util.get_controllers(env))
maintenance_line = 'backend maintenance'
stats_socket_re = re.compile('stats\s+socket\s+/var/lib/haproxy/stats'
'(?!.*level admin)')
mode_tcp_re = re.compile('mode\s+tcp')
use_backend_line = ' use_backend maintenance if TRUE'
for node in controllers:
sftp = ssh.sftp(node)
sftp.chdir('/etc/haproxy')
with ssh.update_file(sftp, 'haproxy.cfg') as (old, new):
found_maint_line = False
for line in old:
if maintenance_line in line:
found_maint_line = True
line = stats_socket_re.sub(r'\g<0> level admin', line)
new.write(line)
if not found_maint_line:
new.write(maintenance_line)
sftp.chdir('/etc/haproxy/conf.d')
for f in sftp.listdir():
with ssh.update_file(sftp, f) as (old, new):
contents = old.read()
if not mode_tcp_re.search(contents):
raise subprocess.DontUpdateException
new.write(contents)
if not contents.endswith('\n'):
new.write('\n')
new.write(use_backend_line)
ssh.call(['crm', 'resource', 'restart', 'p_haproxy'], node=node)
def enable_apis(env):
controllers = list(env_util.get_controllers(env))
maintenance_line = 'backend maintenance'
use_backend_line = ' use_backend maintenance if TRUE'
for node in controllers:
sftp = ssh.sftp(node)
sftp.chdir('/etc/haproxy')
with ssh.update_file(sftp, 'haproxy.cfg') as (old, new):
for line in old:
if maintenance_line in line:
continue
new.write(line)
sftp.chdir('/etc/haproxy/conf.d')
for f in sftp.listdir():
with ssh.update_file(sftp, f) as (old, new):
for line in old:
if use_backend_line in line:
continue
new.write(line)
ssh.call(['crm', 'resource', 'restart', 'p_haproxy'], node=node)
_default_exclude_services = ('p_mysql', 'p_haproxy', 'p_dns', 'p_ntp', 'vip',
'p_conntrackd', 'p_rabbitmq-server',
'clone_p_vrouter')
def get_crm_services(status_out):
data = ElementTree.fromstring(status_out)
for resource in data:
yield resource.get('id')
def start_corosync_services(env):
manage_corosync_services(env, 'start')
def stop_corosync_services(env):
manage_corosync_services(env, 'stop')
def manage_corosync_services(env, status):
node = env_util.get_one_controller(env)
status_out = ssh.call_output(['cibadmin', '--query', '--scope',
'resources'], node=node)
services_list = []
for res in get_crm_services(status_out):
if any(service in res for service in _default_exclude_services):
continue
services_list.append(res)
for service in services_list:
while True:
try:
ssh.call(['crm', 'resource', status, service],
node=node)
except subprocess.CalledProcessError:
# Sometimes pacemaker rejects part of requests what it is
# not able to process. Sleep was added to mitigate this risk.
time.sleep(1)
else:
break
wait_for_corosync_services_sync(env, services_list, status)
def wait_for_corosync_services_sync(env, resource_list, status,
timeout=1200, check_freq=20):
status_bool = status == 'start'
node = env_util.get_one_controller(env)
started_at = time.time()
while True:
crm_out = ssh.call_output(['crm_mon', '--as-xml'], node=node)
if is_resources_synced(resource_list, crm_out, status_bool):
return
if time.time() - started_at >= timeout:
raise Exception("Timeout waiting for corosync cluster for env %s"
" to be synced" % env.id)
time.sleep(check_freq)
def is_resources_synced(resources, crm_out, status):
def get_resource(resources, resource_id):
for resource in resources:
if resource.get('id') == resource_id:
return resource
return None
data = ElementTree.fromstring(crm_out)
mon_resources = data.find('resources')
for resource in resources:
res = get_resource(mon_resources, resource)
if not (is_resource_active(res) is status):
return False
return True
# Resources are fetching from the output of 'crm_mon' command. This command
# doesn't return resource if it's not started and we can consider 'absent'
# resource as disabled.
def is_resource_active(resource):
if resource is None:
return False
if resource.tag == 'resource':
return is_primitive_active(resource)
for primitive in resource:
if not is_primitive_active(primitive):
return False
return True
def is_primitive_active(resource):
if resource.get('active') == 'true':
return True
return False
def stop_upstart_services(env):
controllers = list(env_util.get_controllers(env))
service_re = re.compile("^((?:%s)[^\s]*).*start/running" %
("|".join(magic_consts.OS_SERVICES),),
re.MULTILINE)
for node in controllers:
sftp = ssh.sftp(node)
try:
svc_file = sftp.open('/root/services_list')
except IOError:
with sftp.open('/root/services_list.tmp', 'w') as svc_file:
initctl_out = ssh.call_output(['initctl', 'list'], node=node)
to_stop = []
for match in service_re.finditer(initctl_out):
service = match.group(1)
to_stop.append(service)
svc_file.write(service + '\n')
sftp.rename('/root/services_list.tmp', '/root/services_list')
else:
with svc_file:
to_stop = svc_file.read().splitlines()
for service in to_stop:
ssh.call(['stop', service], node=node)
ssh.call(['service', 'apache2', 'stop'], node=node)
def start_upstart_services(env):
controllers = list(env_util.get_controllers(env))
for node in controllers:
sftp = ssh.sftp(node)
try:
svc_file = sftp.open('/root/services_list')
except IOError:
raise
else:
with svc_file:
to_start = svc_file.read().splitlines()
for service in to_start:
ssh.call(['start', service], node=node)
ssh.call(['service', 'apache2', 'start'], node=node)
def stop_cluster(env):
cmds = [['pcs', 'cluster', 'kill']]
controllers = list(env_util.get_controllers(env))
for node in controllers:
for cmd in cmds:
ssh.call(cmd, node=node)
def start_cluster(env):
major_version = env.data['fuel_version'].split('.')[0]
cmds = []
if int(major_version) < 6:
cmds = [['service', 'corosync', 'start']]
else:
cmds = [['pcs', 'cluster', 'start']]
controllers = list(env_util.get_controllers(env))
for node in controllers:
for cmd in cmds:
ssh.call(cmd, node=node)
# When we start cluster we should wait while resources from constant
# `_default_exclude_services` become up and running. BTW, We don't touch
# these resources in stop/start corosync resources methods at all.
node = env_util.get_one_controller(env)
status_out = ssh.call_output(['cibadmin', '--query', '--scope',
'resources'], node=node)
services_list = []
for res in get_crm_services(status_out):
if any(service in res for service in _default_exclude_services):
services_list.append(res)
wait_for_corosync_services_sync(env, services_list, 'start')