ce7f21eeb0
So far the Triple reboot_method tried to re-activate the affected servers (which are supposed to be shutoff due to the compute reboot) before they were in the SHUTOFF status (and hence no action would occur). This patch fixes the Triple reboot method by making nova to wait for the servers to be on SHUTOFF status before re-activating them. Change-Id: Ic8d71fa0bf5f08ef15a53b6e500e3905e5886d26
260 lines
9.5 KiB
Python
260 lines
9.5 KiB
Python
# Copyright 2019 Red Hat
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
from __future__ import absolute_import
|
|
|
|
import re
|
|
import typing # noqa
|
|
|
|
from oslo_log import log
|
|
|
|
from tobiko.openstack import neutron
|
|
from tobiko.openstack import nova
|
|
from tobiko.openstack import topology
|
|
from tobiko.shell import files
|
|
from tobiko.shell import sh
|
|
from tobiko.tripleo import _overcloud
|
|
from tobiko.tripleo import _undercloud
|
|
|
|
|
|
LOG = log.getLogger(__name__)
|
|
|
|
|
|
class TripleoTopology(topology.OpenStackTopology):
|
|
|
|
agent_to_service_name_mappings = {
|
|
neutron.DHCP_AGENT: 'tripleo_neutron_dhcp',
|
|
neutron.L3_AGENT: 'tripleo_neutron_l3_agent',
|
|
neutron.OPENVSWITCH_AGENT: 'tripleo_neutron_ovs_agent',
|
|
neutron.METADATA_AGENT: 'tripleo_neutron_metadata_agent',
|
|
neutron.OVN_METADATA_AGENT: 'tripleo_ovn_metadata_agent',
|
|
neutron.NEUTRON_OVN_METADATA_AGENT: 'tripleo_ovn_metadata_agent',
|
|
neutron.OVN_CONTROLLER: 'tripleo_ovn_controller'
|
|
}
|
|
|
|
agent_to_container_name_mappings = {
|
|
neutron.DHCP_AGENT: 'neutron_dhcp',
|
|
neutron.L3_AGENT: 'neutron_l3_agent',
|
|
neutron.OPENVSWITCH_AGENT: 'neutron_ovs_agent',
|
|
neutron.METADATA_AGENT: 'neutron_metadata_agent',
|
|
neutron.OVN_METADATA_AGENT: 'ovn_metadata_agent',
|
|
neutron.NEUTRON_OVN_METADATA_AGENT: 'ovn_metadata_agent',
|
|
neutron.OVN_CONTROLLER: 'ovn_controller'
|
|
}
|
|
|
|
has_containers = True
|
|
|
|
# TODO: add more known subgrups here
|
|
known_subgroups: typing.List[str] = ['controller', 'compute']
|
|
|
|
# In TripleO we need to parse log files directly
|
|
file_digger_class = files.LogFileDigger
|
|
|
|
# This is dict which handles mapping of the log file and systemd_unit (if
|
|
# needed) for the OpenStack services
|
|
# Format of this dict is like below:
|
|
# service_name: (log_filename, systemd_unit_name)
|
|
log_names_mappings = {
|
|
neutron.SERVER: '/var/log/containers/neutron/server.log*',
|
|
}
|
|
|
|
def create_node(self, name, ssh_client, **kwargs):
|
|
return TripleoTopologyNode(topology=self,
|
|
name=name,
|
|
ssh_client=ssh_client,
|
|
**kwargs)
|
|
|
|
def discover_nodes(self):
|
|
self.discover_ssh_proxy_jump_node()
|
|
self.discover_undercloud_nodes()
|
|
self.discover_overcloud_nodes()
|
|
|
|
def discover_undercloud_nodes(self):
|
|
if _undercloud.has_undercloud():
|
|
config = _undercloud.undercloud_host_config()
|
|
ssh_client = _undercloud.undercloud_ssh_client()
|
|
self.add_node(address=config.hostname,
|
|
group='undercloud',
|
|
ssh_client=ssh_client)
|
|
|
|
def discover_overcloud_nodes(self):
|
|
if _overcloud.has_overcloud():
|
|
for server in _overcloud.list_overcloud_nodes():
|
|
try:
|
|
_overcloud.power_on_overcloud_node(server)
|
|
except Exception:
|
|
LOG.exception("Error ensuring overcloud node power "
|
|
"status is on")
|
|
host_config = _overcloud.overcloud_host_config(server=server)
|
|
ssh_client = _overcloud.overcloud_ssh_client(
|
|
hostname=server.name,
|
|
host_config=host_config)
|
|
node = self.add_node(address=host_config.hostname,
|
|
hostname=server.name,
|
|
group='overcloud',
|
|
ssh_client=ssh_client)
|
|
assert isinstance(node, TripleoTopologyNode)
|
|
node.overcloud_server = server
|
|
self.discover_overcloud_node_subgroups(node)
|
|
|
|
def discover_overcloud_node_subgroups(self, node):
|
|
# set of subgroups extracted from node name
|
|
subgroups: typing.Set[str] = set()
|
|
|
|
# extract subgroups names from node name
|
|
subgroups.update(subgroup
|
|
for subgroup in node.name.split('-')
|
|
if is_valid_overcloud_group_name(group_name=subgroup,
|
|
node_name=node.name))
|
|
|
|
# add all those known subgroups names that are contained in
|
|
# the node name (controller, compute, ...)
|
|
subgroups.update(subgroup
|
|
for subgroup in self.known_subgroups
|
|
if subgroup in node.name)
|
|
|
|
# bind node to discovered subgroups
|
|
if subgroups:
|
|
for subgroup in sorted(subgroups):
|
|
LOG.debug("Add node '%s' to subgroup '%s'", node.name,
|
|
subgroup)
|
|
self.add_node(hostname=node.name, group=subgroup)
|
|
else:
|
|
LOG.warning("Unable to obtain any node subgroup from node "
|
|
"name: '%s'", node.name)
|
|
return subgroups
|
|
|
|
|
|
class TripleoTopologyNode(topology.OpenStackTopologyNode):
|
|
|
|
overcloud_server: typing.Optional[nova.NovaServer] = None
|
|
|
|
def reboot_overcloud_node(self, reactivate_servers=True):
|
|
"""Reboot overcloud node
|
|
|
|
This method reboots an overcloud node and may start every server which
|
|
changed its provisioning state to SHUTOFF because of that operation.
|
|
|
|
:param start_servers (bool): whether or not to start the servers which
|
|
are hosted on the node after the reboot
|
|
"""
|
|
|
|
if reactivate_servers:
|
|
servers_to_restart = self.get_running_servers()
|
|
|
|
self.power_off_overcloud_node()
|
|
self.power_on_overcloud_node()
|
|
|
|
if reactivate_servers:
|
|
for server in servers_to_restart:
|
|
nova.wait_for_server_status(server=server.id,
|
|
status='SHUTOFF')
|
|
LOG.debug(f'Server {server.name} with ID {server.id} '
|
|
f'had a SHUTOFF status before being '
|
|
f'restarted')
|
|
nova.activate_server(server)
|
|
LOG.debug(f'Server {server.name} with ID {server.id} '
|
|
f'has a {server.status} status after being '
|
|
f'restarted')
|
|
|
|
def get_running_servers(self):
|
|
servers_to_reactivate = list()
|
|
for server in nova.list_servers():
|
|
server_hyp = getattr(server,
|
|
'OS-EXT-SRV-ATTR:'
|
|
'hypervisor_hostname').split('.', 1)[0]
|
|
if self.name == server_hyp and server.status != 'SHUTOFF':
|
|
servers_to_reactivate.append(server)
|
|
LOG.info(f'Servers to restart after reboot: {servers_to_reactivate}')
|
|
return servers_to_reactivate
|
|
|
|
def power_on_overcloud_node(self):
|
|
server = self.overcloud_server
|
|
if server is None:
|
|
raise TypeError(f"Node {self.name} is not and Overcloud server")
|
|
self.ssh_client.close()
|
|
LOG.debug(f"Ensuring overcloud node {self.name} power is on...")
|
|
_overcloud.power_on_overcloud_node(server)
|
|
hostname = sh.get_hostname(ssh_client=self.ssh_client)
|
|
LOG.debug(f"Overcloud node {self.name} power is on ("
|
|
f"hostname={hostname})")
|
|
|
|
def power_off_overcloud_node(self):
|
|
server = self.overcloud_server
|
|
if server is None:
|
|
raise TypeError(f"Node {self.name} is not and Overcloud server")
|
|
self.ssh_client.close()
|
|
LOG.debug(f"Ensuring overcloud node {self.name} power is off...")
|
|
_overcloud.power_off_overcloud_node(server)
|
|
LOG.debug(f"Overcloud server node {self.name} power is off.")
|
|
|
|
|
|
def is_valid_overcloud_group_name(group_name: str, node_name: str = None):
|
|
if not group_name:
|
|
return False
|
|
if group_name in ['overcloud', node_name]:
|
|
return False
|
|
if is_number(group_name):
|
|
return False
|
|
return True
|
|
|
|
|
|
def is_number(text: str):
|
|
try:
|
|
float(text)
|
|
except ValueError:
|
|
return False
|
|
else:
|
|
return True
|
|
|
|
|
|
def setup_tripleo_topology():
|
|
if _undercloud.has_undercloud() or _overcloud.has_overcloud():
|
|
topology.set_default_openstack_topology_class(TripleoTopology)
|
|
|
|
|
|
def get_ip_to_nodes_dict(openstack_nodes=None):
|
|
if not openstack_nodes:
|
|
openstack_nodes = topology.list_openstack_nodes(group='overcloud')
|
|
ip_to_nodes_dict = {str(node.public_ip): node.name for node in
|
|
openstack_nodes}
|
|
return ip_to_nodes_dict
|
|
|
|
|
|
def str_is_not_ip(check_str):
|
|
letters = re.compile('[A-Za-z]')
|
|
return bool(letters.match(check_str))
|
|
|
|
|
|
def ip_to_hostname(oc_ip):
|
|
return get_ip_to_nodes_dict()[oc_ip]
|
|
|
|
|
|
def actual_node_groups(groups):
|
|
"""return only existing node groups"""
|
|
return set(groups).intersection(topology.list_openstack_node_groups())
|
|
|
|
|
|
def get_node(node_name):
|
|
node_name = node_name.split('.')[0]
|
|
return [node for node in topology.list_openstack_nodes() if
|
|
node.name == node_name][0]
|
|
|
|
|
|
def is_composable_roles_env():
|
|
composable_nodes = ['messaging', 'database', 'networker']
|
|
for nodes in composable_nodes:
|
|
if nodes in topology.list_openstack_node_groups():
|
|
return True
|
|
return False
|