virtual-deployment/virtualbox/pybox/install_vbox.py

2705 lines
94 KiB
Python
Executable File

# pylint: disable=too-many-lines
# !/usr/bin/python3
#
# SPDX-License-Identifier: Apache-2.0
#
"""
This tool is an automated installer to allow users to easily install
StarlingX on VirtualBox.
"""
import subprocess
import getpass
import time
import re
import tempfile
import signal
import sys
import paramiko
import streamexpect
import ruamel.yaml
from utils import kpi, serial
from utils.install_log import init_logging, get_log_dir, LOG
from utils.sftp import sftp_send, send_dir
from helper import vboxmanage
from helper import install_lab
from helper import host_helper
from helper.install_lab import exec_cmd, fault_tolerant
from consts.node import Nodes
from consts.networking import NICs, OAM, MGMT, Serial
from consts.timeout import HostTimeout
from consts import env
from exceptions import InvalidSSHConnection
from Parser import handle_args
# Global vars
V_BOX_OPTIONS = None
SSH_CONNECTIONS = {}
# Network
OAM_CONFIG = [getattr(OAM, attr) for attr in dir(OAM) if not attr.startswith('__')]
MGMT_CONFIG = [getattr(MGMT, attr) for attr in dir(MGMT) if not attr.startswith('__')]
def menu_selector(stream, setup_type,
securityprofile, lowlatency, install_mode='serial'):
"""
Select the correct install option.
"""
# Wait for menu to load (add sleep so we can see what is picked)
serial.expect_bytes(stream, "Press", log=False)
time.sleep(2)
# Pick install type
if setup_type in [AIO_SX, AIO_DX]:
LOG.info("Selecting All-in-one Install")
serial.send_bytes(stream, "\033[B", expect_prompt=False, send=False, log=False)
time.sleep(2)
if lowlatency is True:
LOG.info("Selecting All-in-one (lowlatency) Install")
serial.send_bytes(stream, "\033[B", expect_prompt=False, send=False, log=False)
time.sleep(2)
else:
LOG.info("Selecting Standard Install")
serial.send_bytes(stream, "\n", expect_prompt=False, send=False, log=False)
time.sleep(2)
# Serial or Graphical menu (picking Serial by default)
if install_mode == "graphical":
LOG.info("Selecting Graphical menu")
serial.send_bytes(stream, "\033[B", expect_prompt=False, send=False, log=False)
time.sleep(2)
else:
LOG.info("Selecting Serial menu")
serial.send_bytes(stream, "\n", expect_prompt=False, send=False, log=False)
time.sleep(2)
# Security profile menu
if securityprofile == "extended":
LOG.info("Selecting extended security profile")
serial.send_bytes(stream, "\033[B", expect_prompt=False, send=False, log=False)
time.sleep(2)
serial.send_bytes(stream, "\n", expect_prompt=False, send=False, log=False)
time.sleep(2)
def setup_networking(stream, ctrlr0_ip, gateway_ip, password):
"""
Setup initial networking so we can transfer files.
"""
ip_addr = ctrlr0_ip
interface = OAM_CONFIG[0]['device']
ret = serial.send_bytes(
stream,
"/sbin/ip address list",
prompt=ctrlr0_ip,
fail_ok=True,
timeout=10)
if ret != 0:
LOG.info("Setting networking up.")
else:
LOG.info("Skipping networking setup")
return
LOG.info("%s being set up with ip %s", interface, ip_addr)
serial.send_bytes(stream,
f"sudo /sbin/ip addr add {ip_addr}/24 dev {interface}",
expect_prompt=False)
host_helper.check_password(stream, password=password)
time.sleep(2)
serial.send_bytes(stream,
f"sudo /sbin/ip link set {interface} up",
expect_prompt=False)
host_helper.check_password(stream, password=password)
time.sleep(2)
serial.send_bytes(stream,
f"sudo route add default gw {gateway_ip}",
expect_prompt=False)
host_helper.check_password(stream, password=password)
if V_BOX_OPTIONS.vboxnet_type == 'hostonly':
LOG.info("Pinging controller-0 at: %s...", ip_addr)
tmout = HostTimeout.NORMAL_OP
while tmout:
# Ping from machine hosting virtual box to virtual machine
return_code = subprocess.call(['ping', '-c', '1', ip_addr])
if return_code == 0:
break
tmout -= 1
else:
raise ConnectionError(f"Failed to establish connection in {tmout}s " \
"to controller-0 at: {ip_addr}!")
LOG.info("Ping succeeded!")
def fix_networking(stream, release, password):
"""
Vbox/linux bug: Sometimes after resuming a VM networking fails to comes up.
Setting VM interface down then up again fixes it.
"""
if release == "R2":
interface = "eth0"
else:
interface = OAM_CONFIG[0]['device']
LOG.info("Fixing networking ...")
serial.send_bytes(stream,
f"sudo /sbin/ip link set {interface} down",
expect_prompt=False)
host_helper.check_password(stream, password=password)
time.sleep(1)
serial.send_bytes(
stream,
f"sudo /sbin/ip link set {interface} up",
expect_prompt=False)
host_helper.check_password(stream, password=password)
time.sleep(2)
def install_controller_0(cont0_stream, menu_select_dict, network_dict):
"""
Installs controller-0 node by performing the following steps:
1. Selects setup type, security profile, low latency, and install mode using menu_selector.
2. Expects "login:" prompt in the installation console.
3. Changes the password on initial login.
4. Disables user logout.
5. Sets up basic networking.
Args:
cont0_stream (stream): The installation console stream for controller-0.
menu_select_dict (dict): A dictionary containing the following keys:
- setup_type (str): The type of setup (Simplex, Duplex, etc.).
- securityprofile (str): The security profile (Standard, FIPS, etc.).
- lowlatency (bool): Whether or not to enable low latency.
- install_mode (str): The install mode (standard, patch, etc.).
network_dict (dict): A dictionary containing the following keys:
- ctrlr0_ip (str): The IP address for controller-0.
- gateway_ip (str): The IP address for the gateway.
- username (str, optional): The username for the SSH connection.
- password (str, optional): The password for the SSH connection.
Raises:
Exception: If there is a failure in the installation process.
Note:
The function waits for certain durations between each step.
"""
username = network_dict.get("username")
password = network_dict.get("password")
LOG.info("Starting installation of controller-0")
start_time = time.time()
menu_selector(
cont0_stream,
menu_select_dict["setup_type"],
menu_select_dict["securityprofile"],
menu_select_dict["lowlatency"],
menu_select_dict["install_mode"]
)
try:
serial.expect_bytes(
cont0_stream,
"login:",
timeout=HostTimeout.INSTALL)
except Exception as exception: # pylint: disable=E0012, W0703
LOG.exception("Connection failed for controller-0 with %s", exception)
# Sometimes we get UnicodeDecodeError exception due to the output
# of installation. So try one more time maybe
LOG.info("So ignore the exception and wait for controller-0 to be installed again.")
if HostTimeout.INSTALL > (time.time() - start_time):
serial.expect_bytes(
cont0_stream,
"login:",
timeout=HostTimeout.INSTALL - (time.time() - start_time))
LOG.info("Completed installation of controller-0.")
# Change password on initial login
time.sleep(2)
host_helper.change_password(
cont0_stream,
username=username,
password=password)
# Disable user logout
time.sleep(2)
host_helper.disable_logout(cont0_stream)
# Setup basic networking
time.sleep(1)
setup_networking(
cont0_stream,
network_dict["ctrlr0_ip"],
network_dict["gateway_ip"],
password=password
)
def delete_lab(labname):
"""
This allows for the deletion of an existing lab.
"""
node_list = vboxmanage.get_all_vms(labname, option="vms")
if len(node_list) != 0:
LOG.warning("This will delete lab %s with vms: %s", labname, node_list)
#LOG.info("Continue? (y/N)")
if yes_no_prompt("Delete lab?"):
LOG.info("#### Deleting lab %s.", labname)
LOG.info("VMs in lab: %s.", node_list)
else:
LOG.info("Aborting!")
sys.exit(1)
vboxmanage.vboxmanage_controlvms(node_list, "poweroff")
time.sleep(2)
vboxmanage.vboxmanage_deletevms(node_list)
def get_disk_sizes(comma_list):
"""
Return the disk sizes as taken from the command line.
"""
sizes = comma_list.split(',')
for size in sizes:
val = int(size)
if val < 0:
LOG.error("Disk sizes must be a comma separated list of positive integers.")
raise ValueError("Disk sizes must be a comma separated list of positive integers.")
return sizes
def yes_no_prompt(message):
"""
Creates a yes/no prompt to be answered by user.
Uses forced yes-to-all parameter
Args:
message (str): Message to be displayed
Returns:
Answer to the prompt(bool)
"""
question = message + " (y/n)"
if V_BOX_OPTIONS.y is True:
LOG.info("Automatically answering 'y' to '%s'", question)
return True
LOG.info("%s", question)
choice = input().lower()
if choice == 'y':
return True
return False
def create_port_forward(hostname, network, local_port, guest_port, guest_ip):
"""
Create a port forwarding rule for a NAT network in VirtualBox.
Args:
hostname (str): Name of the virtual machine.
network (str): Name of the NAT network.
local_port (str): The local port number to forward.
guest_port (str): The port number on the guest to forward to.
guest_ip (str): The IP address of the guest to forward to.
Returns:
None
"""
if not vboxmanage.vboxmanage_addportforward(
hostname, local_port, guest_ip, guest_port, network
):
rule_name = vboxmanage.vboxmanage_getrulename(network, local_port)
if not rule_name:
LOG.critical(
"Could not add a port-forwarding rule using port %s, "
"and could not find any rule already using it. "
"Check the Nat Network and/or local port.", local_port)
LOG.error("Aborting!")
sys.exit(1)
LOG.warning(
"Trying to create a port-forwarding rule with port: %s, "
"but it is already in use with rule name: %s",
local_port,
rule_name)
if yes_no_prompt("Rewrite rule?"):
LOG.info("Rewriting portforwarding rule...")
vboxmanage.vboxmanage_deleteportforward(rule_name, network)
vboxmanage.vboxmanage_addportforward(
hostname, local_port, guest_ip, guest_port, network
)
else:
LOG.info("Ignoring the creation of the port-forward rule and continuing installation!")
# pylint: disable=too-many-locals, too-many-branches, too-many-statements
def create_lab(m_vboxoptions):
"""
Creates vms using the arguments in vboxoptions.
"""
# Pull in node configuration
node_config = [getattr(Nodes, attr)
for attr in dir(Nodes) if not attr.startswith('__')]
nic_config = [getattr(NICs, attr)
for attr in dir(NICs) if not attr.startswith('__')]
serial_config = [getattr(Serial, attr)
for attr in dir(Serial) if not attr.startswith('__')]
# Create nodes list
nodes_list = []
if m_vboxoptions.controllers:
for node_id in range(0, m_vboxoptions.controllers):
node_name = m_vboxoptions.labname + f"-controller-{node_id}"
nodes_list.append(node_name)
if m_vboxoptions.workers:
for node_id in range(0, m_vboxoptions.workers):
node_name = m_vboxoptions.labname + f"-worker-{node_id}"
nodes_list.append(node_name)
if m_vboxoptions.storages:
for node_id in range(0, m_vboxoptions.storages):
node_name = m_vboxoptions.labname + f"-storage-{node_id}"
nodes_list.append(node_name)
if m_vboxoptions.vboxnet_name != None and m_vboxoptions.vboxnet_type == "nat":
LOG.info('Creating NatNetwork named "%s"', m_vboxoptions.vboxnet_name)
try:
return_nat = vboxmanage.vboxmanage_createnatnet(m_vboxoptions.vboxnet_name, m_vboxoptions.nat_cidr)
if not return_nat:
LOG.warning('NatNetwork named "%s" exists, but CIDR is different from OAM subnet', m_vboxoptions.vboxnet_name)
sys.exit(1)
except subprocess.CalledProcessError as exc:
# pylint: disable=logging-fstring-interpolation
LOG.error(f"Script was interrupted with error: {exc}",)
sys.exit(1)
LOG.info("#### We will create the following nodes: %s", nodes_list)
# pylint: disable=too-many-nested-blocks
for node in nodes_list:
LOG.info("#### Creating node: %s", node)
vboxmanage.vboxmanage_createvm(node, m_vboxoptions.labname)
vboxmanage.vboxmanage_storagectl(
node,
storectl="sata",
hostiocache=m_vboxoptions.hostiocache)
disk_sizes = None
no_disks = 0
if "controller" in node:
if m_vboxoptions.setup_type in [AIO_DX, AIO_SX]:
node_type = "controller-AIO"
else:
node_type = f"controller-{m_vboxoptions.setup_type}"
if m_vboxoptions.controller_disk_sizes:
disk_sizes = get_disk_sizes(m_vboxoptions.controller_disk_sizes)
else:
no_disks = m_vboxoptions.controller_disks
elif "worker" in node:
node_type = "worker"
if m_vboxoptions.worker_disk_sizes:
disk_sizes = get_disk_sizes(m_vboxoptions.worker_disk_sizes)
else:
no_disks = m_vboxoptions.worker_disks
elif "storage" in node:
node_type = "storage"
if m_vboxoptions.storage_disk_sizes:
disk_sizes = get_disk_sizes(m_vboxoptions.storage_disk_sizes)
else:
no_disks = m_vboxoptions.storage_disks
for item in node_config:
if item['node_type'] == node_type:
vboxmanage.vboxmanage_modifyvm(
node,
{
"cpus": str(item['cpus']),
"memory": str(item['memory']),
},
)
if not disk_sizes:
disk_sizes = item['disks'][no_disks]
vboxmanage.vboxmanage_createmedium(node, disk_sizes,
vbox_home_dir=m_vboxoptions.vbox_home_dir)
if "controller-0" in node:
serial_config[0]['uartpath'] = serial_config[0]['uartpath'] + int(m_vboxoptions.nat_controller0_local_ssh_port)
vboxmanage.vboxmanage_modifyvm(
node,
{
"uartbase": serial_config[0]['uartbase'],
"uartport": serial_config[0]['uartport'],
"uartmode": serial_config[0]['uartmode'],
"uartpath": serial_config[0]['uartpath'],
},
)
serial_config[0]['uartpath'] += 1
if "controller" in node:
node_type = "controller"
last_adapter = 1
for item in nic_config:
if item['node_type'] == node_type:
for adapter in item.keys():
if adapter.isdigit():
last_adapter += 1
data = item[adapter]
if m_vboxoptions.vboxnet_name != 'none' and data['nic'] == 'hostonly':
if m_vboxoptions.vboxnet_type == 'nat':
data['nic'] = 'natnetwork'
data['natnetwork'] = m_vboxoptions.vboxnet_name
data['hostonlyadapter'] = None
data['intnet'] = None
# data['nicpromisc1'] = None
else:
data[
'hostonlyadapter'] = m_vboxoptions.vboxnet_name
data['natnetwork'] = None
else:
data['natnetwork'] = None
vboxmanage.vboxmanage_modifyvm(
node,
{
"nic": data['nic'],
"nictype": data['nictype'],
"nicpromisc": data['nicpromisc'],
"nicnum": int(adapter),
"intnet": data['intnet'],
"hostonlyadapter": data['hostonlyadapter'],
"natnetwork": data['natnetwork'],
"prefix": f"{m_vboxoptions.userid}-{m_vboxoptions.labname}",
},
)
if m_vboxoptions.add_nat_interface:
last_adapter += 1
vboxmanage.vboxmanage_modifyvm(
node,
{
# "nicnum": adapter, #TODO where this adapter come from? #pylint: disable=fixme
"nictype": 'nat',
},
)
# Add port forwarding rules for controllers nat interfaces
if m_vboxoptions.vboxnet_type == 'nat' and 'controller' in node:
if 'controller-0' in node:
create_port_forward(
node,
m_vboxoptions.vboxnet_name,
local_port=m_vboxoptions.nat_controller0_local_ssh_port,
guest_port='22',
guest_ip=m_vboxoptions.controller0_ip
)
# Add port forward rule for the floating active controller
if V_BOX_OPTIONS.setup_type not in [AIO_SX]:
create_port_forward(
m_vboxoptions.labname + '-controller',
m_vboxoptions.vboxnet_name,
local_port=m_vboxoptions.nat_controller_floating_ssh_port,
guest_port='22',
guest_ip=m_vboxoptions.controller_floating_ip
)
# Add port forward rule for the StarlingX Dashboard
if V_BOX_OPTIONS.setup_type in [AIO_SX]:
ip_addr = m_vboxoptions.controller0_ip
else:
ip_addr = m_vboxoptions.controller_floating_ip
create_port_forward(
m_vboxoptions.labname + "-horizon-dashbord",
m_vboxoptions.vboxnet_name,
local_port=m_vboxoptions.horizon_dashboard_port,
guest_port='8080',
guest_ip=ip_addr
)
elif 'controller-1' in node:
create_port_forward(
node,
m_vboxoptions.vboxnet_name,
local_port=m_vboxoptions.nat_controller1_local_ssh_port,
guest_port='22',
guest_ip=m_vboxoptions.controller1_ip
)
ctrlr0 = m_vboxoptions.labname + '-controller-0'
vboxmanage.vboxmanage_storagectl(
ctrlr0,
storectl="ide",
hostiocache=m_vboxoptions.hostiocache)
vboxmanage.vboxmanage_storageattach(
ctrlr0,
{
"storectl": "ide",
"storetype": "dvddrive",
"disk": m_vboxoptions.iso_location,
"port_num": "1",
"device_num": "0",
},
)
def override_ansible_become_pass():
"""
Override the ansible_become_pass value in the localhost.yml
with the password passed via terminal in the python call
"""
file = V_BOX_OPTIONS.ansible_controller_config
new_file = "/tmp/localhost.yml"
#Load Ansible config file
try:
with open(file, encoding="utf-8") as stream:
yaml = ruamel.yaml.YAML()
yaml.preserve_quotes = True
yaml.explicit_start = True
loaded = yaml.load(stream)
except FileNotFoundError:
print(f'\n Ansible configuration file not found in {file} \n')
sys.exit(1)
except ruamel.yaml.YAMLError:
print("\n Error while parsing YAML file \n")
sys.exit(1)
# modify the password with the one passed on the python call
loaded['admin_password'] = V_BOX_OPTIONS.password
loaded['ansible_become_pass'] = V_BOX_OPTIONS.sysadmin_password
#Save it again
try:
with open(new_file, mode='w', encoding="utf-8") as stream:
yaml.dump(loaded, stream)
except ruamel.yaml.YAMLError as exc:
print(exc)
return new_file
# pylint: disable=W0102
def get_hostnames(ignore=None, personalities=['controller', 'storage', 'worker']):
"""
Based on the number of nodes defined on the command line, construct
the hostnames of each node.
"""
hostnames = {}
if V_BOX_OPTIONS.controllers and 'controller' in personalities:
for node_id in range(0, V_BOX_OPTIONS.controllers):
node_name = V_BOX_OPTIONS.labname + f"-controller-{node_id}"
if ignore and node_name in ignore:
continue
hostnames[node_name] = f"controller-{node_id}"
if V_BOX_OPTIONS.workers and 'worker' in personalities:
for node_id in range(0, V_BOX_OPTIONS.workers):
node_name = V_BOX_OPTIONS.labname + f"-worker-{node_id}"
if ignore and node_name in ignore:
continue
hostnames[node_name] = f"worker-{node_id}"
if V_BOX_OPTIONS.storages and 'storage' in personalities:
for node_id in range(0, V_BOX_OPTIONS.storages):
node_name = V_BOX_OPTIONS.labname + f"-storage-{node_id}"
if ignore and node_name in ignore:
continue
hostnames[node_name] = f'storage-{node_id}'
return hostnames
def get_personalities(ignore=None):
"""
Map the target to the node type.
"""
personalities = {}
if V_BOX_OPTIONS.controllers:
for node_id in range(0, V_BOX_OPTIONS.controllers):
node_name = V_BOX_OPTIONS.labname + f"-controller-{node_id}"
if ignore and node_name in ignore:
continue
personalities[node_name] = 'controller'
if V_BOX_OPTIONS.workers:
for node_id in range(0, V_BOX_OPTIONS.workers):
node_name = V_BOX_OPTIONS.labname + f"-worker-{node_id}"
if ignore and node_name in ignore:
continue
personalities[node_name] = 'worker'
if V_BOX_OPTIONS.storages:
for node_id in range(0, V_BOX_OPTIONS.storages):
node_name = V_BOX_OPTIONS.labname + f"-storage-{node_id}"
if ignore and node_name in ignore:
continue
personalities[node_name] = 'storage'
return personalities
def create_host_bulk_add():
"""
Sample xml:
<?xml version="1.0" encoding="UTF-8" ?>
<hosts>
<host>
<personality>controller</personality>
<mgmt_mac>08:00:27:4B:6A:6A</mgmt_mac>
</host>
<host>
<personality>storage</personality>
<mgmt_mac>08:00:27:36:14:3D</mgmt_mac>
</host>
<host>
<personality>storage</personality>
<mgmt_mac>08:00:27:B3:D0:69</mgmt_mac>
</host>
<host>
<hostname>worker-0</hostname>
<personality>worker</personality>
<mgmt_mac>08:00:27:47:68:52</mgmt_mac>
</host>
<host>
<hostname>worker-1</hostname>
<personality>worker</personality>
<mgmt_mac>08:00:27:31:15:48</mgmt_mac>
</host>
</hosts>
"""
LOG.info("Creating content for 'system host-bulk-add'")
vms = vboxmanage.get_all_vms(V_BOX_OPTIONS.labname, option="vms")
ctrl0 = V_BOX_OPTIONS.labname + "-controller-0"
vms.remove(ctrl0)
# Get management macs
macs = {}
for virtual_machine in vms:
info = vboxmanage.vboxmanage_showinfo(virtual_machine).splitlines()
for line in info:
try:
key, value = line.split(b'=')
except ValueError:
continue
if key == b'macaddress2':
orig_mac = value.decode('utf-8').replace("\"", "")
# Do for e.g.: 080027C95571 -> 08:00:27:C9:55:71
macs[virtual_machine] = ":".join(re.findall(r"..", orig_mac))
# Get personalities
personalities = get_personalities(ignore=[ctrl0])
hostnames = get_hostnames(ignore=[ctrl0])
# Create file
host_xml = ('<?xml version="1.0" encoding="UTF-8" ?>\n'
'<hosts>\n')
for virtual_machine in vms:
host_xml += ' <host>\n'
host_xml += f' <hostname>{hostnames[virtual_machine]}</hostname>\n'
host_xml += f' <personality>{personalities[virtual_machine]}</personality>\n'
host_xml += f' <mgmt_mac>{macs[virtual_machine]}</mgmt_mac>\n'
host_xml += ' </host>\n'
host_xml += '</hosts>\n'
return host_xml
def wait_for_hosts(
ssh_client, hostnames, status,
timeout=HostTimeout.HOST_INSTALL, interval=60):
"""
Wait for a given interval for the host(s) to reach the expected
status.
"""
LOG.info("Waiting for hosts %s to be in status %s", hostnames, status)
start_time = time.time()
while hostnames:
if (time.time() - start_time) > HostTimeout.HOST_INSTALL:
LOG.error("VMs not booted in %s, aborting: %s", timeout, hostnames)
raise TimeoutError(f"VMs failed to go {status}!")
# Get host list
host_statuses, _, _ = run_ssh_cmd(
ssh_client, 'source /etc/platform/openrc; system host-list',
timeout=HostTimeout.NORMAL_OP
)
host_statuses = host_statuses[1:-1]
for host_status in host_statuses:
for host in hostnames:
if host in host_status and status in host_status:
hostnames.remove(host)
if hostnames:
LOG.warning("Hosts not %s: %s", status, hostnames)
LOG.info("Waiting %s sec before re-checking host status.", interval)
time.sleep(interval)
CONSOLE_UNKNOWN_MODE = 'disconnected'
CONSOLE_USER_MODE = 'user'
CONSOLE_ROOT_MODE = 'root'
SERIAL_CONSOLE_MODE = CONSOLE_UNKNOWN_MODE
def run_ssh_cmd(
ssh_client, cmd, timeout=5,
log_output=True, mode=CONSOLE_USER_MODE):
"""
Execute an arbitrary command on a target.
"""
if mode == CONSOLE_ROOT_MODE:
LOG.info(">>>>>")
cmd = f"sudo {cmd}"
LOG.info("#### Executing remote command:\n$ %s\n", cmd)
stdin, stdout, stderr = ssh_client.exec_command(cmd, timeout, get_pty=True)
if mode == CONSOLE_ROOT_MODE:
stdin.write(f'{V_BOX_OPTIONS.password}\n')
stdin.flush()
stdout_lines = []
while True:
if stdout.channel.exit_status_ready():
break
stdout_lines.append(stdout.readline().rstrip('\n'))
if log_output and stdout:
LOG.info("|%s", stdout_lines[-1])
stderr_lines = stderr.readlines()
if log_output and stderr_lines:
LOG.info("stderr:|\n%s", "".join(stderr_lines))
return_code = stdout.channel.recv_exit_status()
LOG.info("Return code: %s\n", return_code)
if mode == CONSOLE_ROOT_MODE:
# Cut sudo's password echo and "Password:" string from output
stdout_lines = stdout_lines[2:]
return stdout_lines, stderr_lines, return_code
def run_ssh_cmd_list( # pylint: disable=too-many-arguments
ssh_client, cmd_list, timeout=5,
log_output=True, mode=CONSOLE_USER_MODE, scale=1):
"""
Execute a list of commands on the StarlingX VM in a fault-tolerant fashion.
Refer to the fault_tolerant decorator for more details.
"""
for cmd in cmd_list:
if ('system ' in cmd) or ('dcmanager ' in cmd):
cmd = 'source /etc/platform/openrc; ' + cmd
@fault_tolerant(scale=scale)
def run_ssh_cmd_ft(*args, **kwargs): # pylint: disable=unused-argument
_, _, return_code = run_ssh_cmd(
ssh_client,
cmd, # pylint: disable=cell-var-from-loop
timeout=timeout,
log_output=log_output,
mode=mode
)
return return_code
run_ssh_cmd_ft(**{'cmd': cmd})
# Give 1s before running the next command on the list
time.sleep(1)
def set_serial_prompt_mode(stream, mode):
"""
To make sure that we are at the correct prompt,
we first logout, then login back again.
Note that logging out also helps fixing some problems with passwords
not getting accepted in some cases (prompt just hangs after inserting
password).
"""
global SERIAL_CONSOLE_MODE # pylint: disable=global-statement
if SERIAL_CONSOLE_MODE == mode:
LOG.info("Serial console prompt already set to '%s' mode.", mode)
return
if SERIAL_CONSOLE_MODE != CONSOLE_USER_MODE:
# Set mode to user first, even if we later go to root
serial.send_bytes(stream, "exit\n", expect_prompt=False)
if serial.expect_bytes(stream, "ogin:", fail_ok=True, timeout=4):
serial.send_bytes(stream, "exit\n", expect_prompt=False)
if serial.expect_bytes(stream, "ogin:", fail_ok=True, timeout=4):
LOG.error("Expected login prompt, connect to console" \
"stop any running processes and log out.")
raise RuntimeError("Failure getting login prompt on serial console!")
serial.send_bytes(
stream,
V_BOX_OPTIONS.username,
prompt="assword:",
timeout=30)
if serial.send_bytes(
stream, V_BOX_OPTIONS.password, prompt="~$", fail_ok=True, timeout=30
):
raise ValueError("Login failure, invalid password?")
if mode == CONSOLE_USER_MODE:
serial.send_bytes(stream, "source /etc/platform/openrc\n",
timeout=30, prompt='keystone')
SERIAL_CONSOLE_MODE = CONSOLE_USER_MODE
if mode == 'root' and SERIAL_CONSOLE_MODE != 'root':
serial.send_bytes(stream, 'sudo su -', expect_prompt=False)
host_helper.check_password(stream, password=V_BOX_OPTIONS.password)
serial.send_bytes(
stream,
"cd /home/wrsroot",
prompt="/home/wrsroot# ",
timeout=30)
serial.send_bytes(stream, "source /etc/platform/openrc\n",
timeout=30, prompt='keystone')
SERIAL_CONSOLE_MODE = CONSOLE_ROOT_MODE
serial.send_bytes(stream, "export TMOUT=0", timeout=10, prompt='keystone')
# also reset OAM networking?
def serial_prompt_mode(mode):
"""
A decorator function that sets the serial console login prompt to the specified
mode before calling the decorated function.
Args:
mode (str): The login prompt mode to set. Valid values are "admin" and "root".
Returns:
function: A decorator function that sets the serial console login prompt to the specified mode.
"""
def serial_prompt_decorator(func):
def serial_prompt_wrapper(*args, **kwargs):
try:
set_serial_prompt_mode(kwargs['stream'], mode)
except: # pylint: disable=bare-except
LOG.warning("Serial console login as '%s' failed. Retrying once.", mode)
set_serial_prompt_mode(kwargs['stream'], mode)
return func(*args, **kwargs)
return serial_prompt_wrapper
return serial_prompt_decorator
def _connect_to_serial(virtual_machine=None):
if not virtual_machine:
virtual_machine = V_BOX_OPTIONS.labname + "-controller-0"
port = 10000 + int(V_BOX_OPTIONS.nat_controller0_local_ssh_port)
sock = serial.connect(virtual_machine, port, getpass.getuser())
return sock, streamexpect.wrap(sock, echo=True, close_stream=False)
def connect_to_serial(func):
"""
A decorator function that establishes a connection to the serial console before
calling the decorated function.
Args:
func (function): The function to be decorated.
Returns:
function: A wrapper function that establishes a connection to the serial console,
calls the decorated function, and then disconnects from the serial console.
"""
def connect_to_serial_wrapper(*args, **kwargs):
sock = None
try:
sock, kwargs['stream'] = _connect_to_serial()
return func(*args, **kwargs)
finally:
serial.disconnect(sock)
return connect_to_serial_wrapper
def _connect_to_ssh(node='floating'):
# Get ip and port for ssh on floating ip
ip_addr, port = get_ssh_ip_and_port(node)
LOG.info("Testing VM port")
cmd = f'nc -vz localhost {port}'
exec_cmd(cmd)
# Remove ssh key
# For hostonly adapter we remove port 22 of controller ip
# for nat interfaces we remove the specific port on 127.0.0.1 as
# we have port forwarding enabled.
# pylint: disable=R0801
if V_BOX_OPTIONS.vboxnet_type == 'nat':
keygen_arg = f"[127.0.0.1]:{port}"
else:
keygen_arg = ip_addr
cmd = f'ssh-keygen -f "/home/{getpass.getuser()}/.ssh/known_hosts" -R {keygen_arg} 2>/dev/null'
exec_cmd(cmd)
# Connect to ssh
ssh = paramiko.SSHClient()
ssh.load_system_host_keys()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
attempt = 1
reattempt_delay = HostTimeout.REATTEMPT_DELAY
max_attempts = len(reattempt_delay)
try:
while True:
ssh.connect(
ip_addr,
port=port,
username=V_BOX_OPTIONS.username,
password=V_BOX_OPTIONS.password,
look_for_keys=False,
allow_agent=False,
timeout=HostTimeout.NORMAL_OP,
auth_timeout=HostTimeout.NORMAL_OP,
)
break
except paramiko.AuthenticationException as exc:
if attempt < max_attempts:
LOG.warning(
"#### Failed SSH Authentication [attempt: %s/%s]",
attempt, max_attempts
)
LOG.info(
"Trying again after %s ... ",
kpi.get_formated_time(reattempt_delay[attempt])
)
time.sleep(reattempt_delay[attempt])
attempt = attempt + 1
else:
LOG.error(
"#### Failed SSH Authentication [attempt: %s/%s]\nError: %s",
attempt, max_attempts, repr(exc)
)
raise
except Exception as exc:
LOG.error("#### Failed SSH connection\nError: %s", repr(exc))
raise
SSH_CONNECTIONS[node] = ssh
return ssh
def ssh_handler(node='floating'):
"""
Handles the SSH connection. Tries to retrieve a already existing connection.
If it doesn't exist, or isn't active, creates a new one.
"""
try:
ssh = SSH_CONNECTIONS[node]
ssh_transport = ssh.get_transport()
if ssh_transport is None or not ssh_transport.is_active():
raise InvalidSSHConnection()
except (InvalidSSHConnection, KeyError):
ssh = _connect_to_ssh(node)
return ssh
def connect_to_ssh(node='floating'):
"""
Provides the node argument to the Connect-to-SSH decorator.
Args:
- node: target IP to connect: controller-0, controller-1, or floating IP
Returns:
- Connect-to-SSH decorator
"""
def connect_to_ssh_decorator(func):
"""
Decorator to establish a SSH connection
Args:
- func: The function to be decorated.
Returns:
- Connect-to-SSH wrapper
"""
def connect_to_ssh_wrapper(*args, **kwargs):
"""
Establishs a SSH connection before executing the decorated function
Provides the ssh_client argument to the decorated function
Returns: return code of decorated function
"""
ssh = ssh_handler(node)
kwargs['ssh_client'] = ssh
return func(*args, **kwargs)
return connect_to_ssh_wrapper
return connect_to_ssh_decorator
def close_ssh_connections(ssh_clients: dict):
"""
Closes the connection with all current created SSH Clients.
"""
for _, client in ssh_clients.items():
if client is not None:
client.close()
def stage_test_success():
"""Prints a log message indicating the execution of a test stage."""
LOG.info("Executing stage_test_success")
def stage_test_fail():
"""
Prints a log message indicating the execution of a test stage and raises an exception.
Raises:
- Exception: Always raises an exception.
"""
LOG.info("Executing stage_test_success")
raise RuntimeError("exception as of stage_test_fail")
def stage_create_lab():
"""
Wrapper function for deleting an existing virtual lab and creating a new one
using `vboxoptions`.
"""
delete_lab(V_BOX_OPTIONS.labname)
create_lab(V_BOX_OPTIONS)
def stage_install_controller0():
"""
Starts the `controller-0` VM, establishes a serial connection to it, and installs
the OS on it using the `install_controller_0` function with the parameters specified
in `vboxoptions`.
Args:
- None
Raises:
- AssertionError: If `controller-0` is not in the list of available VMs.
"""
node_list = vboxmanage.get_all_vms(V_BOX_OPTIONS.labname, option="vms")
LOG.info("Found nodes: %s", node_list)
ctrlr0 = V_BOX_OPTIONS.labname + "-controller-0"
assert ctrlr0 in node_list, "controller-0 not in vm list. Stopping installation."
vboxmanage.vboxmanage_startvm(ctrlr0, V_BOX_OPTIONS.headless)
port = 10000 + int(V_BOX_OPTIONS.nat_controller0_local_ssh_port)
sock = serial.connect(ctrlr0, port, getpass.getuser())
cont0_stream = streamexpect.wrap(sock, echo=True, close_stream=False)
install_controller_0(
cont0_stream,
menu_select_dict={
"setup_type": V_BOX_OPTIONS.setup_type,
"securityprofile": V_BOX_OPTIONS.securityprofile,
"lowlatency": V_BOX_OPTIONS.lowlatency,
"install_mode": V_BOX_OPTIONS.install_mode,
},
network_dict={
"ctrlr0_ip": V_BOX_OPTIONS.controller0_ip,
"gateway_ip": V_BOX_OPTIONS.vboxnet_ip,
"username": V_BOX_OPTIONS.username,
"password": V_BOX_OPTIONS.password
}
)
serial.disconnect(sock)
@connect_to_serial
def stage_config_controller(stream): # pylint: disable=too-many-locals
"""
Stage to configure controller-0 networking settings and upload the configuration
file to the controller.
Args:
stream (obj): Serial console stream.
Raises:
Exception: If there is an error in the configuration or upload process,
raises an exception with the error message.
Note:
This method assumes that the controller-0 virtual machine has been previously
installed and that its serial console stream is open.
"""
ip_addr, port = get_ssh_ip_and_port('controller-0') # Floating ip is not yet configured
#Update localhost.yml with system password
new_config_ansible = override_ansible_become_pass()
#Send Ansible configuration file to VM
LOG.info("Copying Ansible configuration file")
destination_ansible = f'/home/{V_BOX_OPTIONS.username}/localhost.yml'
sftp_send(
new_config_ansible,
destination_ansible,
{
"remote_host": ip_addr,
"remote_port": port,
"username": V_BOX_OPTIONS.username,
"password": V_BOX_OPTIONS.password
}
)
# Run config_controller
LOG.info("#### Running config_controller")
install_lab.config_controller(stream, V_BOX_OPTIONS.password)
# Wait for services to stabilize
LOG.info("Waiting 120s for services to stabilize.")
time.sleep(120)
if V_BOX_OPTIONS.setup_type == AIO_SX:
# Increase AIO responsiveness by allocating more cores to platform
install_lab.update_platform_cpus(stream, 'controller-0')
def get_ssh_ip_and_port(node='floating'):
"""
This function returns the IP address and port of the specified node to use for
an SSH connection.
Args:
node (str, optional): The node to get the IP address and port for.
Valid values are "floating" (default), "controller-0", and "controller-1".
Returns:
tuple: A tuple containing the IP address and port of the specified node.
Raises:
Exception: If an undefined node is specified.
"""
if V_BOX_OPTIONS.vboxnet_type == 'nat':
ip_addr = '127.0.0.1'
if node == 'floating':
if V_BOX_OPTIONS.setup_type in [AIO_SX]:
port = V_BOX_OPTIONS.nat_controller0_local_ssh_port
else:
port = V_BOX_OPTIONS.nat_controller_floating_ssh_port
elif node == 'controller-0':
port = V_BOX_OPTIONS.nat_controller0_local_ssh_port
elif node == 'controller-1':
port = V_BOX_OPTIONS.nat_controller1_local_ssh_port
else:
raise ValueError(f"Undefined node '{node}'")
else:
if node == 'floating':
if V_BOX_OPTIONS.setup_type in [AIO_SX]:
ip_addr = V_BOX_OPTIONS.controller0_ip
else:
ip_addr = V_BOX_OPTIONS.controller_floating_ip
elif node == 'controller-0':
ip_addr = V_BOX_OPTIONS.controller0_ip
elif node == 'controller-1':
ip_addr = V_BOX_OPTIONS.controller1_ip
else:
raise ValueError(f"Undefined node '{node}'")
port = 22
return ip_addr, port
def stage_rsync_config():
"""
Rsync the local configuration files with the remote host's configuration files.
This method copies the configuration files to the controller. It uses rsync to
synchronize the local configuration files with the remote host's configuration files.
If the `config_files_dir` or `config_files_dir_dont_follow_links` option is set, this
method copies the files to the remote host. If both are not set, then this method does
nothing.
Args:
None.
Returns:
None.
"""
if not V_BOX_OPTIONS.config_files_dir and not V_BOX_OPTIONS.config_files_dir_dont_follow_links:
LOG.warning("No rsync done! Please set config-files-dir "
"and/or config-files-dir-dont-follow-links")
return
# Get ip and port for ssh on floating ip
ip_addr, port = get_ssh_ip_and_port('controller-0')
# Copy config files to controller
if V_BOX_OPTIONS.config_files_dir:
local_path = V_BOX_OPTIONS.config_files_dir
follow_links = True
send_dir(
{
"source": local_path,
"remote_host": ip_addr,
"remote_port": port,
"destination": '/home/' + V_BOX_OPTIONS.username + '/',
"username": V_BOX_OPTIONS.username,
"password": V_BOX_OPTIONS.password,
"follow_links": follow_links
}
)
if V_BOX_OPTIONS.config_files_dir_dont_follow_links:
local_path = V_BOX_OPTIONS.config_files_dir_dont_follow_links
follow_links = False
send_dir(
{
"source": local_path,
"remote_host": ip_addr,
"remote_port": port,
"destination": '/home/' + V_BOX_OPTIONS.username + '/',
"username": V_BOX_OPTIONS.username,
"password": V_BOX_OPTIONS.password,
"follow_links": follow_links
}
)
@connect_to_serial
@serial_prompt_mode(CONSOLE_USER_MODE)
def _run_lab_setup_serial(stream):
conf_str = ""
for cfg_file in V_BOX_OPTIONS.lab_setup_conf:
conf_str = conf_str + f" -f {cfg_file}"
serial.send_bytes(stream, f"sh lab_setup.sh {conf_str}",
timeout=HostTimeout.LAB_INSTALL, prompt='keystone')
LOG.info("Lab setup execution completed. Checking if return code is 0.")
serial.send_bytes(stream, "echo \"Return code: [$?]\"",
timeout=3, prompt='Return code: [0]')
@connect_to_ssh()
def _run_lab_setup(ssh_client):
conf_str = ""
for cfg_file in V_BOX_OPTIONS.lab_setup_conf:
conf_str = conf_str + f" -f {cfg_file}"
command = 'source /etc/platform/openrc; ' \
'export PATH="$PATH:/usr/local/bin"; ' \
'export PATH="$PATH:/usr/bin"; ' \
'export PATH="$PATH:/usr/local/sbin"; ' \
'export PATH="$PATH:/usr/sbin"; ' \
'sh lab_setup.sh'
_, _, exitcode = run_ssh_cmd(ssh_client, command, timeout=HostTimeout.LAB_INSTALL)
if exitcode != 0:
msg = f"Lab setup failed, expecting exit code of 0 but got {exitcode}."
LOG.error(msg)
raise RuntimeError(msg)
def stage_lab_setup():
"""Calls _run_lab_setup with ssh_client"""
_run_lab_setup() # pylint: disable=no-value-for-parameter
@connect_to_ssh('controller-0')
def stage_setup_controller_0(ssh_client):
"""Provision controller-0 networking and OSD storage"""
try:
# The CLI commands below are executed on controller-0
LOG.info("#### Display system info")
commands = [
r'source /etc/platform/openrc;',
r'system show;',
]
run_ssh_cmd_list(
ssh_client,
commands,
timeout=HostTimeout.NORMAL_OP
)
LOG.info("#### Configure OAM and MGMT networks")
if V_BOX_OPTIONS.setup_type in [AIO_SX]:
oam_if = OAM_CONFIG[0]['device']
commands = [
f'system host-if-modify controller-0 {oam_if} -c platform;',
f'system interface-network-assign controller-0 {oam_if} oam;',
]
else:
oam_if = OAM_CONFIG[0]['device']
mgmt_if = MGMT_CONFIG[0]['device']
commands = [
f'system host-if-modify controller-0 {oam_if} -c platform;',
f'system interface-network-assign controller-0 {oam_if} oam;',
r'system host-if-modify controller-0 lo -c none;',
r'IFNET_UUIDS=$('
r' system interface-network-list controller-0 '
r""" | awk '{if ($6=="lo") print $4;}');"""
r'for UUID in ${IFNET_UUIDS}; do '
r' system interface-network-remove ${UUID};'
r'done;',
f'system host-if-modify controller-0 {mgmt_if} -c platform;',
f'system interface-network-assign controller-0 {mgmt_if} mgmt;',
f'system interface-network-assign controller-0 {mgmt_if} cluster-host;',
]
run_ssh_cmd_list(
ssh_client,
commands,
timeout=HostTimeout.NORMAL_OP
)
LOG.info("#### Provision Ceph OSD")
if V_BOX_OPTIONS.setup_type in [AIO_SX, AIO_DX]:
commands = [
r'system storage-backend-add ceph --confirmed;',
r'system host-disk-list controller-0;',
r'system host-disk-list controller-0 '
r" | awk '/\/dev\/sdb/{print $2}' "
r' | xargs -i system host-stor-add controller-0 {};',
r'system host-stor-list controller-0;',
]
elif V_BOX_OPTIONS.setup_type in [STANDARD, STORAGE]:
commands = [
r'system storage-backend-add ceph --confirmed;',
]
run_ssh_cmd_list(
ssh_client,
commands,
timeout=HostTimeout.NORMAL_OP
)
except:
LOG.error("Failed stage: %s", STG_SETUP_CONTROLLER_0)
raise
@connect_to_ssh('controller-0')
@connect_to_serial
def stage_unlock_controller0(stream, ssh_client):
"""
Unlocks the controller-0 node and waits for it to reboot.
Args:
- stream (obj): Serial stream to send and receive data
- ssh_client (obj): SSH client connection to execute remote commands
Returns:
None.
"""
LOG.info("#### Unlocking controller-0")
_, _, _ = run_ssh_cmd(ssh_client,
'source /etc/platform/openrc; system host-unlock controller-0',
timeout=HostTimeout.CONTROLLER_UNLOCK)
LOG.info("#### Waiting for controller-0 to reboot")
serial.expect_bytes(
stream,
'login:',
timeout=HostTimeout.CONTROLLER_UNLOCK)
LOG.info("Waiting 120s for services to activate.")
time.sleep(120)
# Make sure we login again, after reboot we are not logged in.
SERIAL_CONSOLE_MODE = CONSOLE_UNKNOWN_MODE # pylint: disable=redefined-outer-name, invalid-name, unused-variable
@connect_to_serial
@serial_prompt_mode(CONSOLE_USER_MODE)
def stage_unlock_controller0_serial(stream):
"""
Unlock the controller-0 host via serial console and wait for services to activate.
Args:
- stream (stream object): The serial console stream.
Returns:
None.
"""
global SERIAL_CONSOLE_MODE # pylint: disable=global-statement
if host_helper.unlock_host(stream, 'controller-0'):
LOG.info("Host is unlocked, nothing to do. Exiting stage.")
return
serial.expect_bytes(
stream,
'login:',
timeout=HostTimeout.CONTROLLER_UNLOCK)
LOG.info("Waiting 120s for services to activate.")
time.sleep(120)
# Make sure we login again
SERIAL_CONSOLE_MODE = CONSOLE_UNKNOWN_MODE # After reboot we are not logged in.
@connect_to_ssh()
def stage_install_nodes(ssh_client):
"""
Install nodes in the environment using SSH.
Args:
- ssh_client (paramiko SSH client object): The SSH client to use for connecting
to the environment.
Returns:
None.
"""
try:
# Create and transfer host_bulk_add.xml to ctrl-0
host_xml = create_host_bulk_add()
LOG.info("host_bulk_add.xml content:\n%s", host_xml)
# Send file to controller
destination = "/home/" + V_BOX_OPTIONS.username + "/host_bulk_add.xml"
with tempfile.NamedTemporaryFile() as file:
file.write(host_xml.encode('utf-8'))
file.flush()
# Connection to NAT interfaces is local
if V_BOX_OPTIONS.vboxnet_type == 'nat':
ip_addr = '127.0.0.1'
port = V_BOX_OPTIONS.nat_controller0_local_ssh_port
else:
ip_addr = V_BOX_OPTIONS.controller0_ip
port = 22
sftp_send(
file.name,
destination,
{
"remote_host": ip_addr,
"remote_port": port,
"username": V_BOX_OPTIONS.username,
"password": V_BOX_OPTIONS.password
}
)
LOG.info("Waiting for controller-0 to be available")
wait_for_hosts(ssh_client, ['controller-0'], 'available')
commands = [
r'source /etc/platform/openrc;',
f'system host-bulk-add {destination};',
]
run_ssh_cmd_list(
ssh_client,
commands,
timeout=HostTimeout.NORMAL_OP
)
# Start hosts one by one, wait 5s between each start
vms = vboxmanage.get_all_vms(V_BOX_OPTIONS.labname, option="vms")
runningvms = vboxmanage.get_all_vms(
V_BOX_OPTIONS.labname,
option="runningvms")
powered_off = list(set(vms) - set(runningvms))
LOG.info("#### Powered off VMs: %s", powered_off)
for virtual_machine in powered_off:
LOG.info("#### Powering on VM: %s", virtual_machine)
vboxmanage.vboxmanage_startvm(virtual_machine, V_BOX_OPTIONS.headless, force=True)
time.sleep(5)
LOG.info("Give VMs 5min to boot and install host personality")
time.sleep(5*60)
ctrl0 = V_BOX_OPTIONS.labname + "-controller-0"
hostnames = list(get_hostnames(ignore=[ctrl0]).values())
wait_for_hosts(ssh_client, hostnames, 'online')
except:
LOG.error("Failed stage: %s", STG_INSTALL_NODES)
raise
@connect_to_ssh()
def stage_setup_controller_1(ssh_client):
"""Provision controller-1 networking and OSD storage"""
try:
# The CLI commands below are executed on the active controller
LOG.info("#### Display system info")
commands = [
r'source /etc/platform/openrc;',
r'system show;',
]
run_ssh_cmd_list(
ssh_client,
commands,
timeout=HostTimeout.NORMAL_OP
)
LOG.info("#### Configure OAM and MGMT networks")
oam_if = OAM_CONFIG[0]['device']
commands = [
f'system host-if-modify controller-1 {oam_if} -c platform;',
f'system interface-network-assign controller-1 {oam_if} oam;',
r'system interface-network-assign controller-1 mgmt0 cluster-host;',
]
run_ssh_cmd_list(
ssh_client,
commands,
timeout=HostTimeout.NORMAL_OP
)
if V_BOX_OPTIONS.setup_type in [AIO_SX, AIO_DX]:
LOG.info("#### Provision Ceph OSD")
commands = [
r'system storage-backend-list;',
r'system host-disk-list controller-1;',
r'system host-disk-list controller-1 '
r" | awk '/\/dev\/sdb/{print $2}' "
r' | xargs -i system host-stor-add controller-1 {};',
r'system host-stor-list controller-1;',
]
run_ssh_cmd_list(
ssh_client,
commands,
timeout=HostTimeout.NORMAL_OP
)
except:
LOG.error("Failed stage: %s", STG_SETUP_CONTROLLER_1)
raise
@connect_to_ssh()
def stage_unlock_controller1(ssh_client):
"""
Unlock controller-1 host via SSH.
Args:
- ssh_client (paramiko SSH client object): The SSH client to use for
connecting to the environment.
Returns:
None.
"""
# Fast for standard, wait for storage
hostnames = list(get_hostnames().values())
if 'controller-1' not in hostnames:
LOG.info("Controller-1 not configured, skipping unlock.")
return
LOG.info("#### Unlocking controller-1")
run_ssh_cmd(ssh_client,
'source /etc/platform/openrc; system host-unlock controller-1',
timeout=60)
LOG.info("#### waiting for controller-1 to be enabled")
wait_for_hosts(ssh_client, ['controller-1'], 'enabled')
@connect_to_ssh()
def stage_setup_storages(ssh_client):
"""Provision dedicated storage nodes"""
try:
storages = list(get_hostnames(personalities=['storage']).values())
# Add storages setup here
LOG.info("#### Provision dedicated storage nodes")
for storage in storages:
commands = [
f'echo "TODO {storage}";',
]
run_ssh_cmd_list(
ssh_client,
commands,
timeout=HostTimeout.NORMAL_OP
)
except:
LOG.error("Failed stage: %s", STG_SETUP_STORAGES)
raise
@connect_to_ssh()
def stage_unlock_storages(ssh_client):
"""
Unlock all storage nodes, wait for them to be enabled
Args:
- ssh_client (paramiko SSH client object): The SSH client to use for
connecting to the environment.
Returns:
None.
"""
storages = list(get_hostnames(personalities=['storage']).values())
for storage in storages:
run_ssh_cmd(ssh_client,
f'source /etc/platform/openrc; system host-unlock {storage}',
timeout=60)
LOG.info("Waiting 15s before next unlock")
time.sleep(15)
LOG.info("#### Waiting for all storage nodes to be enabled")
wait_for_hosts(ssh_client, storages, 'enabled')
@connect_to_ssh()
def stage_setup_workers(ssh_client):
"""Provision worker nodes"""
try:
workers = list(get_hostnames(personalities=['worker']).values())
LOG.info("#### Provision worker nodes")
LOG.info("Adding third Ceph monitor to worker-0 node")
command = ["system ceph-mon-add worker-0"]
run_ssh_cmd_list(ssh_client, command, timeout=HostTimeout.NORMAL_OP)
node = "worker-0"
status = "configured"
start_time = time.time()
while node:
if (time.time() - start_time) > HostTimeout.NORMAL_OP:
LOG.error("Ceph monitor not configured in %s, aborting!", HostTimeout.NORMAL_OP)
raise TimeoutError("Error while trying to add third Ceph monitor")
nodes_statuses, _, _ = run_ssh_cmd(
ssh_client, 'source /etc/platform/openrc; system ceph-mon-list',
timeout=HostTimeout.NORMAL_OP
)
nodes_statuses = nodes_statuses[1:-1]
for nodes_status in nodes_statuses:
if node in nodes_status and status in nodes_status:
node = ""
if node:
LOG.warning("Worker-0 Ceph monitor not yet configured.")
LOG.info("Waiting 20 sec before re-checking node status.")
time.sleep(20)
for worker in workers:
commands = [
f'system interface-network-assign {worker} mgmt0 cluster-host'
]
run_ssh_cmd_list(
ssh_client,
commands,
timeout=HostTimeout.NORMAL_OP
)
except:
LOG.error("Failed stage: %s", STG_SETUP_WORKERS)
raise
@connect_to_ssh()
def stage_unlock_workers(ssh_client):
"""
Unlock all workers, wait for them to be enabled
Args:
- ssh_client (paramiko SSH client object): The SSH client to use for
connecting to the environment.
Returns:
None.
"""
workers = list(get_hostnames(personalities=['worker']).values())
LOG.info("Waiting 120s for services to stabilize.")
time.sleep(120)
for worker in workers:
run_ssh_cmd(
ssh_client,
f'source /etc/platform/openrc; system host-unlock {worker}',
timeout=60)
LOG.info("Waiting 15s before next unlock")
time.sleep(15)
LOG.info("#### Waiting for all worker nodes to be enabled")
wait_for_hosts(ssh_client, workers, 'enabled')
@connect_to_ssh()
def stage_setup_controller_storage(ssh_client):
"""Provision controller storage"""
try:
LOG.info("#### Provision controller storage")
controllers = list(get_hostnames(personalities=
['controller']).values())
osd = "/dev/sdb"
for controller in controllers:
commands = [
f'DISKS=$(system host-disk-list {controller});'
f'TIERS=$(system storage-tier-list ceph_cluster);'
f"""UUID=$(echo "$DISKS" | grep "{osd}" | awk '{{print $2}}')"""
r"TIER_UUID=$(echo $TIERS | grep storage | awk '{print $2}');"
f'system host-stor-add {controller} $UUID --tier-uuid $TIER_UUID'
f'while true; do '
f' system host-stor-list {controller} | grep {osd} | grep configuring; '
f"if [ $? -ne 0 ]; "
f"then break; "
f"fi; "
f"sleep 1; "
f"done",
f"system host-stor-list {controller}"
]
run_ssh_cmd_list(ssh_client,
commands,
timeout=HostTimeout.NORMAL_OP)
except:
LOG.error("Failed stage: %s", STG_SETUP_CTRL_STORAGE)
raise
@connect_to_ssh()
def stage_enable_kubernetes(ssh_client):
"""Installation and configuration of Kubernetes dashboard"""
try:
ip_addr, port = get_ssh_ip_and_port()
local_path = V_BOX_OPTIONS.kubernetes_config_files
send_dir(
{
"source": local_path,
"remote_host": ip_addr,
"remote_port": port,
"destination":'/home/' + V_BOX_OPTIONS.username + '/',
"username": V_BOX_OPTIONS.username, "password": V_BOX_OPTIONS.password
}
)
LOG.info("#### Adding port-forward rule for kubernetes dashboard")
# Add port forward rule for Kubernetes dashboard visualization at 32000
ip_addr = V_BOX_OPTIONS.controller0_ip
rule_name = V_BOX_OPTIONS.labname + "-kubernetes-dasboard"
create_port_forward(rule_name,
V_BOX_OPTIONS.vboxnet_name,
local_port=V_BOX_OPTIONS.kubernetes_dashboard_port,
guest_port='32000',
guest_ip=ip_addr)
LOG.info("#### Installing Kubernetes dashboard")
commands = [
'source /etc/platform/openrc'
' && source /etc/profile'
' && cp /etc/kubernetes/admin.conf ~/.kube/config'
' && helm repo update;'
'helm repo add kubernetes-dashboard https://kubernetes.github.io/dashboard/'
' && helm install kubernetes-dashboard kubernetes-dashboard/kubernetes-dashboard'
' -f dashboard-values.yaml --version 6.0.8',
]
run_ssh_cmd_list(
ssh_client,
commands,
timeout=HostTimeout.NORMAL_OP
)
LOG.info("#### Creating an admin-user service account with cluster-admin provileges")
commands = [
'kubectl apply -f admin-login.yaml'
' && kubectl -n kube-system describe secret'
' $(kubectl get secret'
' | grep admin-user-sa-token'
' | awk "{print $1}")'
' | tee $HOME/token.txt',
]
run_ssh_cmd_list(
ssh_client,
commands,
timeout=HostTimeout.NORMAL_OP
)
LOG.info("#### Sending token.txt to /home/%s", getpass.getuser())
ip_addr, port = get_ssh_ip_and_port()
username = V_BOX_OPTIONS.username
password = V_BOX_OPTIONS.password
source = f'/home/{username}/token.txt'
destination = f'/home/{getpass.getuser()}'
# Send token file to HOME/Desktop using rsync
LOG.info("#### rsync command")
cmd = (
f'rsync -avL --rsh="/usr/bin/sshpass -p {password} '
f'ssh -p {port} -o StrictHostKeyChecking=no -l {username}" '
f'{username}@{ip_addr}:{source}* {destination}'
)
exec_cmd(cmd)
LOG.info(
"#### TOKEN CREATED AND FILE CONTAINING TOKEN SENT TO HOST AT /home/%s",
getpass.getuser()
)
except:
LOG.error("Failed stage: %s", STG_ENABLE_KUBERNETES)
raise
def run_custom_script(script, timeout, console, mode):
"""
Run a custom script on the environment.
Args:
- script (str): The name of the script to run.
- timeout (int): The timeout for the script.
- console (str): The console to use for running the script.
- mode (str): The mode to use for running the script.
Returns:
None.
"""
LOG.info("#### Running custom script %s with options:", script)
LOG.info(" timeout: %s", timeout)
LOG.info(" console mode: %s", console)
LOG.info(" user mode: %s", mode)
if console == 'ssh':
ssh_client = ssh_handler()
# pylint: disable=W0703, C0103
_, __, return_code = run_ssh_cmd(ssh_client, f"./{script}", timeout=timeout, mode=mode)
if return_code != 0:
LOG.info("Custom script '%s' return code is not 0. Aborting.", script)
raise RuntimeError(f"Script execution failed with return code: {return_code}")
else:
sock, stream = _connect_to_serial()
try:
if mode == 'root':
set_serial_prompt_mode(stream, CONSOLE_ROOT_MODE)
# Login as root
serial.send_bytes(stream, 'sudo su -', expect_prompt=False)
host_helper.check_password(
stream,
password=V_BOX_OPTIONS.password)
else:
set_serial_prompt_mode(stream, CONSOLE_USER_MODE)
serial.send_bytes(stream, f"./{script}",
timeout=timeout, prompt='keystone')
LOG.info("Script execution completed. Checking if return code is 0.")
serial.send_bytes(stream,
f"echo 'Return code: [{script}]'",
timeout=3, prompt='Return code: [0]')
finally:
if sock:
sock.close()
def get_custom_script_options(options_list):
"""
Parse options for a custom script.
Args:
- options_list (str): The list of options for the script.
Returns:
A tuple containing the script name, timeout, console, and mode.
"""
LOG.info("Parsing custom script options: %s", options_list)
# defaults
script = ""
timeout = 5
console = 'serial'
mode = 'user'
# supported options
consoles = ['serial', 'ssh']
modes = ['user', 'root']
# No spaces or special chars allowed
not_allowed = ['\n', ' ', '*']
for char in not_allowed:
if char in options_list:
LOG.error("Char '%s' not allowed in options list: %s.", char, options_list)
raise TypeError("Char not allowed in options_list")
# get options
options = options_list.split(',')
if len(options) >= 1:
script = options[0]
if len(options) >= 2:
timeout = int(options[1])
if len(options) >= 3:
console = options[2]
if console not in consoles:
raise f"Console must be one of {consoles}, not {console}."
if len(options) >= 4:
mode = options[3]
if mode not in modes:
raise f"Mode must be one of {modes}, not {mode}."
return script, timeout, console, mode
def stage_custom_script1():
"""
Run the first custom script.
Returns:
None.
"""
if V_BOX_OPTIONS.script1:
script, timeout, console, mode = get_custom_script_options(
V_BOX_OPTIONS.script1)
else:
script = "custom_script1.sh"
timeout = 3600
console = 'serial'
mode = 'user'
run_custom_script(script, timeout, console, mode)
def stage_custom_script2():
"""
Run the second custom script.
Returns:
None.
"""
if V_BOX_OPTIONS.script2:
script, timeout, console, mode = get_custom_script_options(
V_BOX_OPTIONS.script2)
else:
script = "custom_script2.sh"
timeout = 3600
console = 'serial'
mode = 'user'
run_custom_script(script, timeout, console, mode)
def stage_custom_script3():
"""
Run the third custom script.
Returns:
None.
"""
if V_BOX_OPTIONS.script3:
script, timeout, console, mode = get_custom_script_options(
V_BOX_OPTIONS.script3)
else:
script = "custom_script3.sh"
timeout = 3600
console = 'serial'
mode = 'user'
run_custom_script(script, timeout, console, mode)
def stage_custom_script4():
"""
Run the fourth custom script.
Returns:
None.
"""
if V_BOX_OPTIONS.script4:
script, timeout, console, mode = get_custom_script_options(
V_BOX_OPTIONS.script4)
else:
script = "custom_script4.sh"
timeout = 3600
console = 'serial'
mode = 'user'
run_custom_script(script, timeout, console, mode)
def stage_custom_script5():
"""
Run the fifth custom script.
Returns:
None.
"""
if V_BOX_OPTIONS.script5:
script, timeout, console, mode = get_custom_script_options(
V_BOX_OPTIONS.script5)
else:
script = "custom_script5.sh"
timeout = 3600
console = 'serial'
mode = 'user'
run_custom_script(script, timeout, console, mode)
@connect_to_ssh('controller-0')
def stage_openstack_config(ssh_client):
"""A Function to configure pre-requisites necessary for installing stx-openstack"""
if V_BOX_OPTIONS.openstack_package_location is None:
return
if V_BOX_OPTIONS.setup_type != AIO_SX:
raise Exception("The installer currently does not support installing stx-openstack in setup types other than \
AIO-SX.")
concat_ssh_cmd = [
"source /etc/platform/openrc",
"DATA0IF=enp0s9",
"DATA1IF=enp0s10",
"export NODE=controller-0",
"PHYSNET0='physnet0'",
"PHYSNET1='physnet1'",
"SPL=/tmp/tmp-system-port-list",
"SPIL=/tmp/tmp-system-host-if-list",
"system host-port-list ${NODE} --nowrap > ${SPL}",
"system host-if-list -a ${NODE} --nowrap > ${SPIL}",
"DATA0PCIADDR=$(cat $SPL | grep $DATA0IF |awk '{print $8}')",
"DATA1PCIADDR=$(cat $SPL | grep $DATA1IF |awk '{print $8}')",
"DATA0PORTUUID=$(cat $SPL | grep ${DATA0PCIADDR} | awk '{print $2}')",
"DATA1PORTUUID=$(cat $SPL | grep ${DATA1PCIADDR} | awk '{print $2}')",
"DATA0PORTNAME=$(cat $SPL | grep ${DATA0PCIADDR} | awk '{print $4}')",
"DATA1PORTNAME=$(cat $SPL | grep ${DATA1PCIADDR} | awk '{print $4}')",
"DATA0IFUUID=$(cat $SPIL | awk -v DATA0PORTNAME=$DATA0PORTNAME " +
"'($12 ~ DATA0PORTNAME) {print $2}')",
"DATA1IFUUID=$(cat $SPIL | awk -v DATA1PORTNAME=$DATA1PORTNAME " +
"'($12 ~ DATA1PORTNAME) {print $2}')",
"system datanetwork-add ${PHYSNET0} vlan",
"system datanetwork-add ${PHYSNET1} vlan",
"system host-if-modify -m 1500 -n data0 -c data ${NODE} ${DATA0IFUUID}",
"system host-if-modify -m 1500 -n data1 -c data ${NODE} ${DATA1IFUUID}",
"system interface-datanetwork-assign ${NODE} ${DATA0IFUUID} ${PHYSNET0}",
"system interface-datanetwork-assign ${NODE} ${DATA1IFUUID} ${PHYSNET1}",
"system host-label-assign controller-0 openstack-control-plane=enabled",
"system host-label-assign controller-0 openstack-compute-node=enabled",
"system host-label-assign controller-0 openvswitch=enabled",
"export NODE=controller-0",
"system host-fs-add ${NODE} instances=34"
]
run_ssh_cmd(ssh_client, "\n".join(concat_ssh_cmd))
def step_check_platform_integ_apps(ssh_client):
"""A function that checks whether the platform_integ_apps application is in the required state,
as it is a prerequisite for installing stx-openstack"""
LOG.info("#### Checking if platform_integ_apps is already applied")
regex_1 = r'\|\s*status\s*\|\s*applied\s*\|'
regex_2 = r'\|\s*active\s*\|\s*True\s*\|'
ssh_cmd_list = [
f'system application-show platform-integ-apps | grep -E "{regex_1}"',
f'system application-show platform-integ-apps | grep -E "{regex_2}"'
]
run_ssh_cmd_list(ssh_client, ssh_cmd_list, timeout=HostTimeout.NORMAL_OP, scale=2)
LOG.info("#### Sleeping for 60sec")
time.sleep(60)
def step_increase_docker_partition(ssh_client):
"""A Function that increases docker_lv filesystem necessary
for the stx-openstack installation"""
LOG.info("#### Modifying the size of the docker_lv filesystem")
_, _, return_code = run_ssh_cmd(ssh_client,
'source /etc/platform/openrc;\
system host-fs-modify controller-0 docker=60',
timeout=HostTimeout.NORMAL_OP)
if int(return_code) == 0:
return
raise Exception("Couldn't allocate appropriate size to docker partition!")
def step_upload_package(ssh_client):
"""A function that uploads the stx-openstack installation package"""
LOG.info("Copying stx-openstack package .tgz file")
ip_addr, port = get_ssh_ip_and_port(
'controller-0')
destination_loc = f'/home/{V_BOX_OPTIONS.username}/stx-openstack-pkg.tgz'
sftp_send(
V_BOX_OPTIONS.openstack_package_location,
destination_loc,
{
"remote_host": ip_addr,
"remote_port": port,
"username": V_BOX_OPTIONS.username,
"password": V_BOX_OPTIONS.password
}
)
LOG.info("#### Uploading the application package")
regex = r'\|\s*status\s*\|\s*uploaded\s*\|'
ssh_cmd_list = [
"system application-upload stx-openstack-pkg.tgz",
f'system application-show stx-openstack | grep -E "{regex}"'
]
run_ssh_cmd_list(ssh_client, ssh_cmd_list, timeout=HostTimeout.NORMAL_OP)
LOG.info("#### Sleeping for 60sec")
time.sleep(60)
def step_apply_openstack(ssh_client):
"""A function to apply the stx-openstack application"""
LOG.info("#### Applying openstack application package")
regex_1 = r'\|\s*status\s*\|\s*applied\s*\|'
regex_2 = r'\|\s*active\s*\|\s*True\s*\|'
ssh_cmd_list = [
"system application-apply stx-openstack",
f'system application-show stx-openstack | grep -E "{regex_1}"',
f'system application-show stx-openstack | grep -E "{regex_2}"'
]
run_ssh_cmd_list(ssh_client, ssh_cmd_list, scale=6)
LOG.info("#### Sleeping for 10sec")
time.sleep(10)
run_ssh_cmd(ssh_client, 'source /etc/platform/openrc;\
system application-show stx-openstack',
timeout=HostTimeout.NORMAL_OP)
def step_config_openstack_dashboard(ssh_client):
"""Creates the stx-openstack horizon port-forward in vbox"""
LOG.info("#### Creating stx-openstack horizon port forward")
ip_addr = V_BOX_OPTIONS.controller0_ip
rule_name = V_BOX_OPTIONS.labname + "-openstack-horizon"
create_port_forward(rule_name,
V_BOX_OPTIONS.vboxnet_name,
local_port=V_BOX_OPTIONS.openstack_horizon_port,
guest_port='31000',
guest_ip=ip_addr)
LOG.info ('#### Setting up admin credentials on active controller')
run_ssh_cmd(ssh_client, "sed " +
"'/export OS_AUTH_URL/c\\export OS_AUTH_URL=http://keystone.openstack.svc.cluster." +
"local/v3' /etc/platform/openrc > ~/openrc.os")
@connect_to_ssh('controller-0')
def stage_install_openstack(ssh_client):
"""Move the application package to the VM, upload the package, and then install stx-openstack"""
if V_BOX_OPTIONS.openstack_package_location is None:
return
if V_BOX_OPTIONS.setup_type != AIO_SX:
raise Exception("The installer currently does not support installing stx-openstack in setup types other than \
AIO-SX.")
step_check_platform_integ_apps(ssh_client)
step_increase_docker_partition(ssh_client)
step_upload_package(ssh_client)
step_apply_openstack(ssh_client)
step_config_openstack_dashboard(ssh_client)
LOG.info("#### stx-openstack was successfully installed!")
return
STG_CREATE_LAB = "create-lab"
STG_INSTALL_CONTROLLER0 = "install-controller-0"
STG_CONFIG_CONTROLLER = "config-controller"
STG_SETUP_CONTROLLER_0 = "setup-controller-0"
STG_UNLOCK_CONTROLLER0 = "unlock-controller-0"
STG_INSTALL_NODES = "install-nodes"
STG_SETUP_CONTROLLER_1 = "setup-controller-1"
STG_UNLOCK_CONTROLLER1 = "unlock-controller-1"
STG_SETUP_STORAGES = "setup-storages"
STG_UNLOCK_STORAGES = "unlock-storages"
STG_SETUP_WORKERS = "setup-workers"
STG_UNLOCK_WORKERS = "unlock-workers"
STG_SETUP_CTRL_STORAGE = "setup-controller-storage"
STG_ENABLE_KUBERNETES = "enable-kubernetes"
STG_RSYNC_CONFIG = "rsync-config"
STG_LAB_SETUP = "lab-setup"
STG_CUSTOM_SCRIPT1 = "custom-script1"
STG_CUSTOM_SCRIPT2 = "custom-script2"
STG_CUSTOM_SCRIPT3 = "custom-script3"
STG_CUSTOM_SCRIPT4 = "custom-script4"
STG_CUSTOM_SCRIPT5 = "custom-script5"
STG_CONFIG_OPENSTACK = "config-openstack"
STG_INSTALL_OPENSTACK = "install-openstack"
# For internal testing only, one stage is always successful
# the other one always raises an exception.
STC_TEST_SUCCESS = "test-success"
STG_TEST_FAIL = "test-fail"
CALLBACK = 'callback'
HELP = 'help'
STAGE_CALLBACKS = {
STG_CREATE_LAB:
{CALLBACK: stage_create_lab,
HELP: "Create VMs in vbox: controller-0, controller-1..."},
STG_INSTALL_CONTROLLER0:
{CALLBACK: stage_install_controller0,
HELP: "Install controller-0 from --iso-location"},
STG_CONFIG_CONTROLLER:
{CALLBACK: stage_config_controller,
HELP: "Run config controller using the --ansible-controller-config" \
"updated based on --ini-* options."},
STG_SETUP_CONTROLLER_0:
{CALLBACK: stage_setup_controller_0,
HELP: "Provision controller-0 networking and OSD storage."},
STG_UNLOCK_CONTROLLER0:
{CALLBACK: stage_unlock_controller0,
HELP: "Unlock controller-0 and wait for it to reboot."},
STG_INSTALL_NODES:
{CALLBACK: stage_install_nodes,
HELP: "Generate a host-bulk-add.xml, apply it and install all" \
"other nodes, wait for them to be 'online."},
STG_SETUP_CONTROLLER_1:
{CALLBACK: stage_setup_controller_1,
HELP: "Provision controller-1 networking and OSD storage."},
STG_UNLOCK_CONTROLLER1:
{CALLBACK: stage_unlock_controller1,
HELP: "Unlock controller-1, wait for it to be enabled"},
STG_SETUP_STORAGES:
{CALLBACK: stage_setup_storages,
HELP: "Provision dedicated storage nodes."},
STG_UNLOCK_STORAGES:
{CALLBACK: stage_unlock_storages,
HELP: "Unlock all storage nodes, wait for them to be enabled"},
STG_SETUP_WORKERS:
{CALLBACK: stage_setup_workers,
HELP: "Provision worker nodes."},
STG_UNLOCK_WORKERS:
{CALLBACK: stage_unlock_workers,
HELP: "Unlock all workers, wait for them to be enabled"},
STG_SETUP_CTRL_STORAGE:
{CALLBACK: stage_setup_controller_storage,
HELP: "Provision controller storage."},
STG_ENABLE_KUBERNETES:
{CALLBACK: stage_enable_kubernetes,
HELP: "Installation and configuration of Kubernetes dashboard"},
STG_RSYNC_CONFIG:
{CALLBACK: stage_rsync_config,
HELP: "Rsync all files from --config-files-dir and --config-files-dir* to /home/wrsroot."},
STG_LAB_SETUP:
{CALLBACK: stage_lab_setup,
HELP: "Run lab_setup with one or more --lab-setup-conf files"},
STG_CUSTOM_SCRIPT1:
{CALLBACK: stage_custom_script1,
HELP: "Run a custom script from /home/wrsroot, make sure you\
upload it in the rsync-config stage and it is +x. See help."},
STG_CUSTOM_SCRIPT2:
{CALLBACK: stage_custom_script2,
HELP: "Run a custom script from /home/wrsroot, make sure you\
upload it in the rsync-config stage and it is +x. See help."},
STG_CUSTOM_SCRIPT3:
{CALLBACK: stage_custom_script3,
HELP: "Run a custom script from /home/wrsroot, make sure you\
upload it in the rsync-config stage and it is +x. See help."},
STG_CUSTOM_SCRIPT4:
{CALLBACK: stage_custom_script4,
HELP: "Run a custom script from /home/wrsroot, make sure you\
upload it in the rsync-config stage and it is +x. See help."},
STG_CUSTOM_SCRIPT5:
{CALLBACK: stage_custom_script5,
HELP: "Run a custom script from /home/wrsroot, make sure you\
upload it in the rsync-config stage and it is +x. See help."},
# internal testing
STC_TEST_SUCCESS: {CALLBACK: stage_test_success,
HELP: "Internal only, does not do anything, used for testing."},
STG_TEST_FAIL: {CALLBACK: stage_test_fail,
HELP: "Internal only, raises exception, used for testing."},
STG_CONFIG_OPENSTACK:
{CALLBACK: stage_openstack_config,
HELP: "Run a custom script to assign labels to prepare stx-openstack installation."},
STG_INSTALL_OPENSTACK:
{CALLBACK: stage_install_openstack,
HELP: "Run a custom script to upload the application package and installs \
the stx-openstack."}
}
AVAILABLE_STAGES = [STG_CREATE_LAB,
STG_INSTALL_CONTROLLER0,
STG_CONFIG_CONTROLLER,
STG_SETUP_CONTROLLER_0,
STG_UNLOCK_CONTROLLER0,
STG_INSTALL_NODES,
STG_SETUP_CONTROLLER_1,
STG_UNLOCK_CONTROLLER1,
STG_SETUP_STORAGES,
STG_UNLOCK_STORAGES,
STG_SETUP_WORKERS,
STG_UNLOCK_WORKERS,
STG_SETUP_CTRL_STORAGE,
STG_ENABLE_KUBERNETES,
STG_RSYNC_CONFIG,
STG_LAB_SETUP,
STG_CUSTOM_SCRIPT1,
STG_CUSTOM_SCRIPT2,
STG_CUSTOM_SCRIPT3,
STG_CUSTOM_SCRIPT4,
STG_CUSTOM_SCRIPT5,
STC_TEST_SUCCESS,
STG_TEST_FAIL,
STG_CONFIG_OPENSTACK,
STG_INSTALL_OPENSTACK]
AIO_SX_STAGES = [
STG_CREATE_LAB,
STG_INSTALL_CONTROLLER0,
STG_CONFIG_CONTROLLER,
STG_SETUP_CONTROLLER_0,
STG_CONFIG_OPENSTACK,
STG_UNLOCK_CONTROLLER0,
STG_ENABLE_KUBERNETES,
STG_INSTALL_OPENSTACK
]
AIO_DX_STAGES = [
STG_CREATE_LAB,
STG_INSTALL_CONTROLLER0,
STG_CONFIG_CONTROLLER,
STG_SETUP_CONTROLLER_0,
STG_UNLOCK_CONTROLLER0,
STG_INSTALL_NODES,
STG_SETUP_CONTROLLER_1,
STG_UNLOCK_CONTROLLER1,
STG_ENABLE_KUBERNETES,
]
STD_STAGES = [
STG_CREATE_LAB,
STG_INSTALL_CONTROLLER0,
STG_CONFIG_CONTROLLER,
STG_SETUP_CONTROLLER_0,
STG_UNLOCK_CONTROLLER0,
STG_INSTALL_NODES,
STG_SETUP_CONTROLLER_1,
STG_UNLOCK_CONTROLLER1,
STG_SETUP_WORKERS,
STG_UNLOCK_WORKERS,
STG_SETUP_CTRL_STORAGE,
STG_ENABLE_KUBERNETES,
]
STORAGE_STAGES = [
STG_CREATE_LAB,
STG_INSTALL_CONTROLLER0,
STG_CONFIG_CONTROLLER,
STG_SETUP_CONTROLLER_0,
STG_UNLOCK_CONTROLLER0,
STG_INSTALL_NODES,
STG_SETUP_CONTROLLER_1,
STG_UNLOCK_CONTROLLER1,
STG_SETUP_STORAGES,
STG_UNLOCK_STORAGES,
STG_SETUP_WORKERS,
STG_UNLOCK_WORKERS,
STG_ENABLE_KUBERNETES,
]
AIO_SX = 'AIO-SX'
AIO_DX = 'AIO-DX'
STANDARD = 'STANDARD'
STORAGE = 'STORAGE'
STAGES_CHAINS = {AIO_SX: AIO_SX_STAGES,
AIO_DX: AIO_DX_STAGES,
STANDARD: STD_STAGES,
STORAGE: STORAGE_STAGES}
AVAILABLE_CHAINS = [AIO_SX, AIO_DX, STANDARD, STORAGE]
def load_config():
"""
Loads and updates the configuration options specified in the command-line arguments.
It also sets defaults for some options.
"""
global V_BOX_OPTIONS # pylint: disable=global-statement
V_BOX_OPTIONS = handle_args().parse_args()
V_BOX_OPTIONS.username = env.USERNAME
if V_BOX_OPTIONS.sysadmin_password is None:
V_BOX_OPTIONS.sysadmin_password = V_BOX_OPTIONS.password
if V_BOX_OPTIONS.vboxnet_ip is None:
V_BOX_OPTIONS.vboxnet_ip = OAM_CONFIG[0]['ip']
if V_BOX_OPTIONS.y is None:
V_BOX_OPTIONS.y=False
if V_BOX_OPTIONS.hostiocache:
V_BOX_OPTIONS.hostiocache = 'on'
else:
V_BOX_OPTIONS.hostiocache = 'off'
if V_BOX_OPTIONS.lab_setup_conf is None:
V_BOX_OPTIONS.lab_setup_conf = {"~/lab_setup.conf"}
else:
V_BOX_OPTIONS.lab_setup_conf = V_BOX_OPTIONS.lab_setup_conf
try:
with open(V_BOX_OPTIONS.ansible_controller_config, encoding="utf-8") as stream:
loaded = ruamel.yaml.safe_load(stream)
V_BOX_OPTIONS.nat_cidr = loaded.get('external_oam_subnet')
if V_BOX_OPTIONS.setup_type != AIO_SX:
V_BOX_OPTIONS.controller_floating_ip = loaded.get('external_oam_floating_address')
V_BOX_OPTIONS.controller0_ip = loaded.get('external_oam_node_0_address')
V_BOX_OPTIONS.controller1_ip = loaded.get('external_oam_node_1_address')
assert V_BOX_OPTIONS.controller_floating_ip, \
"Missing external_oam_floating_address from ansible config file"
assert V_BOX_OPTIONS.controller0_ip, \
"Missing external_oam_node_0_address from ansible config file"
assert V_BOX_OPTIONS.controller1_ip, \
"Missing external_oam_node_1_address from ansible config file"
else:
V_BOX_OPTIONS.controller_floating_ip = None
# In a AIO-SX configuration the ip of controller-0 must be
# the same as the floating defined in ansible config file.
V_BOX_OPTIONS.controller0_ip = loaded.get('external_oam_floating_address')
V_BOX_OPTIONS.controller1_ip = None
assert V_BOX_OPTIONS.controller0_ip, \
"Missing external_oam_floating_address from ansible config file"
except FileNotFoundError:
print("\n Error: Ansible configuration file not found in %s\n",
V_BOX_OPTIONS.ansible_controller_config)
sys.exit(1)
except ruamel.yaml.YAMLError:
print("\n Error while parsing YAML file \n")
sys.exit()
if V_BOX_OPTIONS.setup_type == AIO_SX:
V_BOX_OPTIONS.controllers = 1
V_BOX_OPTIONS.workers = 0
V_BOX_OPTIONS.storages = 0
elif V_BOX_OPTIONS.setup_type == AIO_DX:
V_BOX_OPTIONS.controllers = 2
V_BOX_OPTIONS.workers = 0
V_BOX_OPTIONS.storages = 0
elif V_BOX_OPTIONS.setup_type == STANDARD:
V_BOX_OPTIONS.storages = 0
def validate(v_box_opt, m_stages):
"""
Validates the values of the configuration options based on the stages that are going
to be executed. Checks that required options have been set and prints an error
message and exits with an error code if any of them are missing. It also performs
additional validation depending on the stage that is going to be executed.
"""
err = False
# Generic
if v_box_opt.vboxnet_type == 'nat':
if not v_box_opt.nat_controller0_local_ssh_port:
print("Please set --nat-controller0-local-ssh-port")
err = True
if v_box_opt.controllers > 1 and not v_box_opt.nat_controller1_local_ssh_port:
print("Second controller is configured, "
"please set --nat-controller1-local-ssh-port")
err = True
if v_box_opt.controllers > 1 and not v_box_opt.nat_controller_floating_ssh_port:
print("Second controller is configured, "
"please set --nat-controller-floating-ssh-port")
err = True
else:
if v_box_opt.setup_type != AIO_SX:
if not v_box_opt.controller_floating_ip:
print("Please set --controller-floating-ip")
err = True
if not v_box_opt.controller0_ip:
print("Please set --controller0-ip")
err = True
if v_box_opt.controllers > 1 and not v_box_opt.controller1_ip:
print("Second controller is configured, "
"please set --controller1-ip")
err = True
if STG_CONFIG_CONTROLLER in m_stages:
if not v_box_opt.ansible_controller_config:
print("Please set --ansible-controller-config "
f"as needed by stage {STG_CONFIG_CONTROLLER}")
err = True
if STG_RSYNC_CONFIG in m_stages:
if not v_box_opt.config_files_dir and not v_box_opt.config_files_dir_dont_follow_links:
print("Please set --config-files-dir and/or --config-files-dir-dont-follow-links "
f"as needed by stage {STG_RSYNC_CONFIG} and {STG_LAB_SETUP}")
err = True
if STG_LAB_SETUP in m_stages:
if not v_box_opt.lab_setup_conf:
print("Please set at least one --lab-setup-conf file "
"as needed by lab-setup stages")
err = True
# file = ["lab_setup.sh"]
dirs = []
if v_box_opt.config_files_dir:
dirs.append(v_box_opt.config_files_dir)
if v_box_opt.config_files_dir_dont_follow_links:
dirs.append(v_box_opt.config_files_dir_dont_follow_links)
if err:
print("\nMissing arguments. Please check --help and --list-stages for usage.")
sys.exit(5)
def wrap_stage_help(m_stage, stage_callbacks, number=None):
"""
Returns a formatted string containing the name of the stage, its number (if given),
and its description, separated by "#" symbol. m_stage is a string with the name of
the stage. stage_callbacks is a string with the description of the stage.
Number is an optional integer with the number of the stage.
"""
if number:
text = f" {number}. {m_stage}"
else:
text = f" {m_stage}"
length = 30
fill = length - len(text)
text += " " * fill
text += f"# {stage_callbacks}"
return text
def signal_handler():
"""
This function is called when the user presses Ctrl+C. It prints a message to the
console and exits the script. Additionally, it calls the print_kpi_metrics()
function from the kpi module to print KPI metrics.
"""
print('You pressed Ctrl+C!')
close_ssh_connections(SSH_CONNECTIONS)
kpi.print_kpi_metrics()
sys.exit(1)
def log_heading_msg(msg, pattern='#', panel_size=20):
"""
This function logs a formatted heading message with the style below
####################################################################
#################### Here goes the heading msg ####################
####################################################################
"""
panel = ""
i = 0
while i < panel_size:
panel = panel + pattern
i += 1
pad = ""
i = 0
while i < len(msg) + 2:
pad = pad + pattern
i += 1
LOG.info("%s%s%s", panel, pad, panel)
LOG.info("%s %s %s", panel, msg, panel)
LOG.info("%s%s%s", panel, pad, panel)
# pylint: disable=invalid-name
if __name__ == "__main__":
try:
kpi.init_kpi_metrics()
signal.signal(signal.SIGINT, signal_handler)
load_config()
if V_BOX_OPTIONS.list_stages:
print(f"Defined setups: {list(STAGES_CHAINS.keys())}")
if V_BOX_OPTIONS.setup_type and V_BOX_OPTIONS.setup_type in AVAILABLE_CHAINS:
AVAILABLE_CHAINS = [V_BOX_OPTIONS.setup_type]
for stg_chain in AVAILABLE_CHAINS:
stg_no = 1
print(f"Stages for setup on: {stg_chain}")
for stage in STAGES_CHAINS[stg_chain]:
print(wrap_stage_help(stage, STAGE_CALLBACKS[stage][HELP], stg_no))
stg_no += 1
print("Available stages that can be used for --custom-stages:")
for stage in AVAILABLE_STAGES:
print(wrap_stage_help(stage, STAGE_CALLBACKS[stage][HELP]))
sys.exit(0)
init_logging(V_BOX_OPTIONS.labname, V_BOX_OPTIONS.logpath)
LOG.info("Logging to directory: %s", (get_log_dir() + "/"))
LOG.info("Install manages: %s controllers, %s workers, %s storages.",
V_BOX_OPTIONS.controllers, V_BOX_OPTIONS.workers, V_BOX_OPTIONS.storages)
# Setup stages to run based on config
install_stages = []
if V_BOX_OPTIONS.custom_stages:
# Custom stages
install_stages = V_BOX_OPTIONS.custom_stages.split(',')
for stage in install_stages:
invalid_stages = []
if stage not in AVAILABLE_STAGES:
invalid_stages.append(stage)
if invalid_stages:
LOG.warning("Following custom stages are not supported: %s.\n" \
"Choose from: %s", invalid_stages, AVAILABLE_STAGES)
sys.exit(1)
else:
# List all stages between 'from-stage' to 'to-stage'
stages = STAGES_CHAINS[V_BOX_OPTIONS.setup_type]
from_stg_index = 0
to_stg_index = None
if V_BOX_OPTIONS.from_stage:
if V_BOX_OPTIONS.from_stage == 'start':
from_stg_index = 0
else:
from_stg_index = stages.index(V_BOX_OPTIONS.from_stage)
if V_BOX_OPTIONS.to_stage:
if V_BOX_OPTIONS.from_stage == 'end':
to_stg_index = -1
else:
to_stg_index = stages.index(V_BOX_OPTIONS.to_stage) + 1
if to_stg_index is not None:
install_stages = stages[from_stg_index:to_stg_index]
else:
install_stages = stages[from_stg_index:]
LOG.info("Executing %s stage(s): %s.", len(install_stages), install_stages)
validate(V_BOX_OPTIONS, install_stages)
stg_no = 0
prev_stage = None
for stage in install_stages:
stg_no += 1
stg_start_time = time.time()
try:
stg_msg = f"({stg_no}/{len(install_stages)}) Entering stage {stage}"
log_heading_msg(stg_msg)
STAGE_CALLBACKS[stage][CALLBACK]()
# Take snapshot if configured
if V_BOX_OPTIONS.snapshot:
vboxmanage.take_snapshot(
V_BOX_OPTIONS.labname,
f"snapshot-AFTER-{stage}")
# Compute KPIs
stg_duration = time.time() - stg_start_time
kpi.set_kpi_metric(stage, stg_duration)
kpi.print_kpi(stage)
kpi.print_kpi('total')
except Exception as stg_exc:
stg_duration = time.time() - stg_start_time
kpi.set_kpi_metric(stage, stg_duration)
LOG.error("INSTALL FAILED, ABORTING!")
kpi.print_kpi_metrics()
LOG.info("Exception details: %s", repr(stg_exc))
raise
# Stage completed
prev_stage = stage
LOG.info("INSTALL SUCCEEDED!")
kpi.print_kpi_metrics()
finally:
close_ssh_connections(SSH_CONNECTIONS)