From 0389ebc90a805afa4c3b801feb06218565e6d78a Mon Sep 17 00:00:00 2001 From: Roger Ferraz Date: Tue, 25 Jul 2023 13:56:56 -0300 Subject: [PATCH] Review lab-setup files The lab-setup files are refactored, as detailed below. In addition, recovery, administration and logging improvements are implemented. The following lab-setup files are removed: - lab_setup1.sh - lab_setup2.sh The corresponding code, previously run locally in the VM, is now integrated to the main Python code. The files lab_setup.sh and lab_setup.conf are kept, because they are useful to populate the stx-openStack application. These should be reviewed by a new task under the context of stx-openStack. Test Plan - AIO-SX Virtual Deployment (PASS) - AIO-DX Virtual Deployment (PASS) Story: 2005051 Task: 48402 Change-Id: I940e5a16ea98a4325efe1ee0dd45127674d6b192 Signed-off-by: Roger Ferraz --- tox.ini | 2 +- virtualbox/pybox/.gitignore | 6 + virtualbox/pybox/Parser.py | 20 +- virtualbox/pybox/README.md | 21 +- .../config/ansibleFiles/duplex_localhost.yml | 7 +- .../config/ansibleFiles/simplex_localhost.yml | 7 +- .../pybox/config/labSetupFiles/lab_setup.conf | 5 +- .../pybox/config/labSetupFiles/lab_setup.sh | 14 +- .../pybox/config/labSetupFiles/lab_setup1.sh | 107 -- .../pybox/config/labSetupFiles/lab_setup2.sh | 83 -- virtualbox/pybox/consts/networking.py | 12 +- virtualbox/pybox/consts/timeout.py | 3 +- virtualbox/pybox/helper/host_helper.py | 42 +- virtualbox/pybox/helper/install_lab.py | 147 ++- .../pybox/helper/tests/test_install_lab.py | 264 +++- virtualbox/pybox/helper/vboxmanage.py | 247 ++-- virtualbox/pybox/install_vbox.py | 1100 +++++++++++------ virtualbox/pybox/tests/test_install_vbox.py | 21 +- virtualbox/pybox/utils/install_log.py | 5 +- virtualbox/pybox/utils/serial.py | 101 +- virtualbox/pybox/utils/sftp.py | 24 +- virtualbox/pybox/utils/tests/test_serial.py | 7 +- virtualbox/pybox/utils/tests/test_sftp.py | 2 +- virtualbox/pybox/vbox-controlgrp.sh | 6 +- 24 files changed, 1400 insertions(+), 853 deletions(-) create mode 100644 virtualbox/pybox/.gitignore delete mode 100644 virtualbox/pybox/config/labSetupFiles/lab_setup1.sh delete mode 100644 virtualbox/pybox/config/labSetupFiles/lab_setup2.sh diff --git a/tox.ini b/tox.ini index c6a012d..20b7dbc 100644 --- a/tox.ini +++ b/tox.ini @@ -25,7 +25,7 @@ commands = -not -name \*~ \ -not -name \*.md \ -name \*.sh \ - -print0 | xargs -0 bashate -v -iE006,E040" + -print0 | xargs -0 bashate -v -iE006,E040,E042" [testenv:pylint] basepython = python3 diff --git a/virtualbox/pybox/.gitignore b/virtualbox/pybox/.gitignore new file mode 100644 index 0000000..aa3fc6f --- /dev/null +++ b/virtualbox/pybox/.gitignore @@ -0,0 +1,6 @@ +localhost.yml +__pycache__/ +consts/__pycache__/ +helper/__pycache__/ +utils/__pycache__/ +venv/ diff --git a/virtualbox/pybox/Parser.py b/virtualbox/pybox/Parser.py index 3796e37..8229fdf 100644 --- a/virtualbox/pybox/Parser.py +++ b/virtualbox/pybox/Parser.py @@ -134,11 +134,17 @@ def parse_setup_config(parser: ArgumentParser): default="sysadmin") parser.add_argument("--password", help= """ - Password. + admin password """, type=validate_password, required=True) - + parser.add_argument("--sysadmin-password", help= + """ + sysadmin password + This argument is optional + The default value is the admin password + """, + type=validate_password) def parse_config_location(parser: ArgumentParser): """ @@ -283,6 +289,16 @@ def parse_networking(parser: ArgumentParser): installed. """, type=str) + parser.add_argument("--nat-controller-floating-ssh-port", help= + """ + When oam network is configured as 'nat' a port on + the vbox host is used for connecting to ssh on + active controller. No default value is configued. This + is mandatory if --vboxnet-type is 'nat' for non + AIO-SX deployments or if second controller is + installed. + """, + type=str) parser.add_argument("--horizon-dashboard-port", help= """ Port for the visualization of the StarlingX diff --git a/virtualbox/pybox/README.md b/virtualbox/pybox/README.md index 3545fa6..3fdb083 100644 --- a/virtualbox/pybox/README.md +++ b/virtualbox/pybox/README.md @@ -38,27 +38,24 @@ Example stages: - ansible-controller-config updated based on args options. - rsync-config # Rsync all files from --config-files-dir and --config-files-dir* to /home/sysadmin. -- lab-setup1 # Run lab_setup with one or more --lab-setup-conf +- setup-controller-0 # Run lab_setup with one or more --lab-setup-conf files from controller-0. - unlock-controller-0 # Unlock controller-0 and wait for it to reboot. -- lab-setup2 # Run lab_setup with one or more --lab-setup-conf - files from controller-0. Example chains: [create-lab, install-controller-0, config-controller, -rsync-config, lab-setup1, unlock-controller-0, lab-setup2]. This chain -will install an AIO-SX. +setup-controller-0, unlock-controller-0]. This chain will install an AIO-SX. The autoinstaller has a predefined set of chains. The user can select from these chains and choose from which stage to which stage to do the install. -For example, if the user already executed config_controller, they can choose -to continue from rsync-config to lab-setup2. +For example, if the user already executed config_controller, he or she can +choose to continue from setup-controller-0 to unlock-controller-0. The user can also create a custom set of chains, as he sees fit by specifying them in the desired order. This allows better customization of the install process. For example, the user might want to execute his own script after config_controller. In this case, he will have to specify a chain like this: [create-lab, install-controller-0, config-controller, -rsync-config, custom-script1, lab-setup1, unlock-controller-0, lab-setup2] +rsync-config, custom-script1, setup-controller-0, unlock-controller-0] The installer supports creating virtualbox snapshots after each stage so the user does not need to reinstall from scratch. The user can restore the @@ -125,10 +122,10 @@ will be configured and used. sudo apt install virtualbox socat git rsync sshpass openssh-client python3-pip python3-venv ``` -2. Create a NAT Network with the `VBoxManage` CLI that is installed with VirtualBox: +2. Create a NAT Network with the `vboxmanage` CLI that is installed with VirtualBox: ```shell - VBoxManage natnetwork add --netname NatNetwork --network 10.10.10.0/24 --dhcp off --ipv6 on + vboxmanage natnetwork add --netname NatNetwork --network 10.10.10.0/24 --dhcp off --ipv6 on ``` 3. Checkout the repository, and set up Python's Virtual Environment with: @@ -172,6 +169,9 @@ running it): --snapshot ``` +The StarlingX admin and sysadmin passwords are administrated by the argument --password. +Optionally, a distinct sysadmin password may be assigned by the argument --sysadmin-password. + The script takes a while to do all the things (from creating a VM and installing an OS in it to configuring StarlingX). Several restarts might occur on the VM, and you might see a VirtualBox window with a prompt. @@ -184,4 +184,3 @@ running. ├── consts: contains modules for managing virtual lab environments, including classes for Lab, Subnets, NICs, OAM, Serial, nodes, and HostTimeout. ├── helper: contains modules for interacting with a StarlingX controller-0 server via a serial connection, configuring system settings, and managing virtual machines using VirtualBox. └── utils: contains modules for initializing logging, tracking and reporting KPIs, connecting and communicating with remote hosts via local domain socket, and sending files and directories to remote servers using rsync and paramiko libraries. - diff --git a/virtualbox/pybox/config/ansibleFiles/duplex_localhost.yml b/virtualbox/pybox/config/ansibleFiles/duplex_localhost.yml index bd70340..da9ce90 100644 --- a/virtualbox/pybox/config/ansibleFiles/duplex_localhost.yml +++ b/virtualbox/pybox/config/ansibleFiles/duplex_localhost.yml @@ -11,5 +11,8 @@ external_oam_node_0_address: 10.10.10.4 external_oam_node_1_address: 10.10.10.5 admin_username: admin -admin_password: Li69nux* -ansible_become_pass: \ No newline at end of file + +# The following password fields are overriden by the Automated Installer. +# Refer to the README for instructions on how to administrate the StarlingX passwords. +admin_password: +ansible_become_pass: diff --git a/virtualbox/pybox/config/ansibleFiles/simplex_localhost.yml b/virtualbox/pybox/config/ansibleFiles/simplex_localhost.yml index 3d9aea4..f046eb8 100644 --- a/virtualbox/pybox/config/ansibleFiles/simplex_localhost.yml +++ b/virtualbox/pybox/config/ansibleFiles/simplex_localhost.yml @@ -9,5 +9,8 @@ external_oam_gateway_address: 10.10.10.1 external_oam_floating_address: 10.10.10.3 admin_username: admin -admin_password: Li69nux* -ansible_become_pass: \ No newline at end of file + +# The following password fields are overriden by the Automated Installer. +# Refer to the README for instructions on how to administrate the StarlingX passwords. +admin_password: +ansible_become_pass: diff --git a/virtualbox/pybox/config/labSetupFiles/lab_setup.conf b/virtualbox/pybox/config/labSetupFiles/lab_setup.conf index b9653d0..e1b0722 100644 --- a/virtualbox/pybox/config/labSetupFiles/lab_setup.conf +++ b/virtualbox/pybox/config/labSetupFiles/lab_setup.conf @@ -32,7 +32,10 @@ INTERNALPNET="vlan|data0" DATA_INTERFACES="ethernet|eth1000|${DATAMTU}|data0 \ ethernet|eth1001|${DATAMTU}|data1" -OAM_INTERFACES="ethernet|enp0s3|1500|none" +# Virtual Box +DEFAULT_IF0="enp0s3" + +OAM_INTERFACES="ethernet|${DEFAULT_IF0}|1500|none" ## IP address pools to support VXLAN provider networks. Each compute node will ## get an address allocated from within the specified pools diff --git a/virtualbox/pybox/config/labSetupFiles/lab_setup.sh b/virtualbox/pybox/config/labSetupFiles/lab_setup.sh index c137f04..98ccef5 100755 --- a/virtualbox/pybox/config/labSetupFiles/lab_setup.sh +++ b/virtualbox/pybox/config/labSetupFiles/lab_setup.sh @@ -12,9 +12,15 @@ CLEAR_CHAIN="no" SYSTEM_NAME="" RAM_QUOTA="" -DEFAULT_IF0=eth0 -DEFAULT_IF1=eth1 -DEFAULT_IF2=eth2 +# Bare Metal +# DEFAULT_IF0="eth0" +# DEFAULT_IF1="eth1" +# DEFAULT_IF2="eth2" + +# Virtual Box +DEFAULT_IF0="enp0s3" +DEFAULT_IF1="enp0s8" +DEFAULT_IF2="enp0s9" CLI_NOWRAP=--nowrap DEFAULT_OPENSTACK_PASSWORD="Li69nux*" @@ -101,7 +107,7 @@ SYSTEM_MODE=${system_mode:-none} DISTRIBUTED_CLOUD_ROLE="none" ## vswitch type -VSWITCH_TYPE="avs" +VSWITCH_TYPE="ovs-dpdk" ## Cinder's backends. # LVM, Ceph, both or none. If CONFIGURE_STORAGE_LVM is set, then Cinder will be configured by default diff --git a/virtualbox/pybox/config/labSetupFiles/lab_setup1.sh b/virtualbox/pybox/config/labSetupFiles/lab_setup1.sh deleted file mode 100644 index 4848552..0000000 --- a/virtualbox/pybox/config/labSetupFiles/lab_setup1.sh +++ /dev/null @@ -1,107 +0,0 @@ -#!/bin/bash - -## This file makes the necessary configuration for the unlock of the Controller-0 - -DATE_FORMAT="%Y-%m-%d %T" -LOG_FILE=${LOG_FILE:-"${HOME}/lab_setup_1.log"} -VERBOSE_LEVEL=0 -##For now ceph_storage variable will be set to true but can be changed before executing the script -CEPH_STORAGE="true" - -#Identify setup type -SETUP_TYPE=$(system show | grep 'system_mode' | awk '{print $4}') - -OPENRC=/etc/platform/openrc -source ${OPENRC} - - -function info { - local MSG="$1" - - echo ${MSG} - echo $(date +"${DATE_FORMAT}") ${MSG} >> ${LOG_FILE} -} - - -function log_command { - local CMD=$1 - local MSG="[${OS_USERNAME}@${OS_PROJECT_NAME}]> RUNNING: ${CMD}" - - set +e - if [ ${VERBOSE_LEVEL} -gt 0 ]; then - echo ${MSG} - fi - echo $(date +"${DATE_FORMAT}") ${MSG} >> ${LOG_FILE} - - if [ ${VERBOSE_LEVEL} -gt 1 ]; then - eval ${CMD} 2>&1 | tee -a ${LOG_FILE} - RET=${PIPESTATUS[0]} - else - eval ${CMD} &>> ${LOG_FILE} - RET=$? - fi - - if [ ${RET} -ne 0 ]; then - info "COMMAND FAILED (rc=${RET}): ${CMD}" - info "===========================" - info "Check \"${LOG_FILE}\" for more details, fix the issues and" - info "re-run the failed command manually." - exit 1 - fi - set -e - - return ${RET} -} - - -## Set OAM interface -function configure_OAM_interface { - - if [ "$SETUP_TYPE" == "simplex" ]; then - #Set OAM_IF variable - log_command "OAM_IF=enp0s3" - #Associate OAM_IF with Controller-0 - log_command "system host-if-modify controller-0 $OAM_IF -c platform" - log_command "system interface-network-assign controller-0 $OAM_IF oam" - - else - #Set Variables - log_command "OAM_IF=enp0s3 && MGMT_IF=enp0s8" - - log_command "system host-if-modify controller-0 lo -c none" - local IFNET_UUIDS=$(system interface-network-list controller-0 | awk '{if ($6=="lo") print $4;}') - for UUID in $IFNET_UUIDS; do - log_command "system interface-network-remove ${UUID}" - done - - #Associate variables with Controller-0 - log_command "system host-if-modify controller-0 $OAM_IF -c platform" - log_command "system interface-network-assign controller-0 $OAM_IF oam" - log_command "system host-if-modify controller-0 $MGMT_IF -c platform" - log_command "system interface-network-assign controller-0 $MGMT_IF mgmt" - log_command "system interface-network-assign controller-0 $MGMT_IF cluster-host" - fi - - return 0 -} - - -## Initialize and set ceph_storage -function initialize_ceph_storage { - echo "Setting host-based Ceph storage backend solution" - - #Adding ceph backend - log_command "system storage-backend-add ceph --confirmed" - - #Adding OSD on controller-0 - log_command "system host-disk-list controller-0" - log_command "system host-disk-list controller-0 | awk '/\/dev\/sdb/{print \$2}' | xargs -i system host-stor-add controller-0 {}" - log_command "system host-stor-list controller-0" -} - - -configure_OAM_interface - -if [ "${CEPH_STORAGE}" == "true" ]; then - initialize_ceph_storage -fi \ No newline at end of file diff --git a/virtualbox/pybox/config/labSetupFiles/lab_setup2.sh b/virtualbox/pybox/config/labSetupFiles/lab_setup2.sh deleted file mode 100644 index 8838444..0000000 --- a/virtualbox/pybox/config/labSetupFiles/lab_setup2.sh +++ /dev/null @@ -1,83 +0,0 @@ -#!/bin/bash - -## This file makes the necessary configuration for the unlock of the Controller-1 - -DATE_FORMAT="%Y-%m-%d %T" -LOG_FILE=${LOG_FILE:-"${HOME}/lab_setup_2.log"} -VERBOSE_LEVEL=0 - -#Identify setup type -SETUP_TYPE=$(system show | grep 'system_mode' | awk '{print $4}') - -OPENRC=/etc/platform/openrc -source ${OPENRC} - - -function info { - local MSG="$1" - - echo ${MSG} - echo $(date +"${DATE_FORMAT}") ${MSG} >> ${LOG_FILE} -} - - -function log_command { - local CMD=$1 - local MSG="[${OS_USERNAME}@${OS_PROJECT_NAME}]> RUNNING: ${CMD}" - - set +e - if [ ${VERBOSE_LEVEL} -gt 0 ]; then - echo ${MSG} - fi - echo $(date +"${DATE_FORMAT}") ${MSG} >> ${LOG_FILE} - - if [ ${VERBOSE_LEVEL} -gt 1 ]; then - eval ${CMD} 2>&1 | tee -a ${LOG_FILE} - RET=${PIPESTATUS[0]} - else - eval ${CMD} &>> ${LOG_FILE} - RET=$? - fi - - if [ ${RET} -ne 0 ]; then - info "COMMAND FAILED (rc=${RET}): ${CMD}" - info "===========================" - info "Check \"${LOG_FILE}\" for more details, fix the issues and" - info "re-run the failed command manually." - exit 1 - fi - set -e - - return ${RET} -} - -function configure_OAM_MGMT_interfaces { - #Set OAM_IF variable - log_command "OAM_IF=enp0s3" - #Associate OAM_IF with Controller-0 - log_command "system host-if-modify controller-1 $OAM_IF -c platform" - log_command "system interface-network-assign controller-1 $OAM_IF oam" - log_command "system interface-network-assign controller-1 mgmt0 cluster-host" -} - - -##Configure ceph storage in controller-1 -function configure_ceph_storage { - echo "Setting host-based Ceph storage backend solution" - local CEPH=$(system storage-backend-list | grep 'ceph') - - if [ -z "$CEPH" ]; then - echo "Ceph storage not set in controller-0, skipping process in controller-1" - else - #Adding OSD on controller-1 - log_command "system host-disk-list controller-1" - log_command "system host-disk-list controller-1 | awk '/\/dev\/sdb/{print \$2}' | xargs -i system host-stor-add controller-1 {}" - log_command "system host-stor-list controller-1" - fi -} - -configure_OAM_MGMT_interfaces - -configure_ceph_storage - - diff --git a/virtualbox/pybox/consts/networking.py b/virtualbox/pybox/consts/networking.py index a828c1a..9e4cc16 100644 --- a/virtualbox/pybox/consts/networking.py +++ b/virtualbox/pybox/consts/networking.py @@ -166,15 +166,23 @@ class NICs: class OAM: - """The `OAM` class contains an IP address and netmask for the out-of-band - management (OAM) network.""" + """The `OAM` class defines the out-of-band management (OAM) network.""" OAM = { + "device": "enp0s3", "ip": "10.10.10.254", "netmask": "255.255.255.0", } +class MGMT: + """The `MGMT` class defines the internal management (MGMT) network.""" + + MGMT = { + "device": "enp0s8", + } + + class Serial: """The `Serial` class contains configurations for the serial ports.""" diff --git a/virtualbox/pybox/consts/timeout.py b/virtualbox/pybox/consts/timeout.py index 420ac3c..02e429c 100644 --- a/virtualbox/pybox/consts/timeout.py +++ b/virtualbox/pybox/consts/timeout.py @@ -19,4 +19,5 @@ class HostTimeout: #pylint: disable=too-few-public-methods HOST_INSTALL = 3600 LAB_CONFIG = 5400 INSTALL_PATCHES = 900 - NETWORKING_OPERATIONAL = 60 + NORMAL_OP = 90 + REATTEMPT_DELAY = [0, 2, 5, 10, 30, 60, 2*60, 3*60, 5*60, 10*60] diff --git a/virtualbox/pybox/helper/host_helper.py b/virtualbox/pybox/helper/host_helper.py index d7897cb..0ae0f76 100644 --- a/virtualbox/pybox/helper/host_helper.py +++ b/virtualbox/pybox/helper/host_helper.py @@ -10,7 +10,6 @@ locking, rebooting, and installing a host. The module uses streamexpect library facilitate stream parsing. """ -import time import streamexpect from consts.timeout import HostTimeout from utils import serial @@ -28,14 +27,17 @@ def unlock_host(stream, hostname): - Unlock host """ - LOG.info("#### Unlock %s", hostname) - serial.send_bytes(stream, f"system host-list | grep {hostname}", expect_prompt=False) + cmd = f"system host-list | grep {hostname}" + serial.send_bytes(stream, cmd, expect_prompt=False) try: serial.expect_bytes(stream, "locked") except streamexpect.ExpectTimeout: LOG.info("Host %s not locked", hostname) return 1 - serial.send_bytes(stream, f"system host-unlock {hostname}", expect_prompt=False) + + LOG.info("#### Unlock %s", hostname) + cmd = f"system host-unlock {hostname}" + serial.send_bytes(stream, cmd, expect_prompt=False) LOG.info("Unlocking %s", hostname) return None @@ -51,14 +53,17 @@ def lock_host(stream, hostname): - Lock host """ - LOG.info("Lock %s", hostname) - serial.send_bytes(stream, f"system host-list |grep {hostname}", expect_prompt=False) + cmd = f"system host-list |grep {hostname}" + serial.send_bytes(stream, cmd, expect_prompt=False) try: serial.expect_bytes(stream, "unlocked") except streamexpect.ExpectTimeout: LOG.info("Host %s not unlocked", hostname) return 1 - serial.send_bytes(stream, f"system host-lock {hostname}", expect_prompt="keystone") + + LOG.info("Lock %s", hostname) + cmd = f"system host-lock {hostname}" + serial.send_bytes(stream, cmd, expect_prompt="keystone") LOG.info("Locking %s", hostname) return None @@ -72,7 +77,8 @@ def reboot_host(stream, hostname): """ LOG.info("Rebooting %s", hostname) - serial.send_bytes(stream, f"system host-reboot {hostname}", expect_prompt=False) + cmd = f"system host-reboot {hostname}" + serial.send_bytes(stream, cmd, expect_prompt=False) serial.expect_bytes(stream, "rebooting", HostTimeout.REBOOT) @@ -86,21 +92,16 @@ def install_host(stream, hostname, host_type, host_id): host_id(int): id to identify host """ - time.sleep(10) LOG.info("Installing %s with id %s", hostname, host_id) if host_type == 'controller': - serial.send_bytes(stream, - f"system host-update {host_id} personality=controller", - expect_prompt=False) + cmd = f"system host-update {host_id} personality=controller" + serial.send_bytes(stream, cmd, expect_prompt=False) elif host_type == 'storage': - serial.send_bytes(stream, - f"system host-update {host_id} personality=storage", - expect_prompt=False) + cmd = f"system host-update {host_id} personality=storage" + serial.send_bytes(stream, cmd, expect_prompt=False) else: - serial.send_bytes(stream, - f"system host-update {host_id} personality=compute hostname={hostname}", - expect_prompt=False) - time.sleep(30) + cmd = f"system host-update {host_id} personality=compute hostname={hostname}" + serial.send_bytes(stream, cmd, expect_prompt=False) def disable_logout(stream): @@ -111,7 +112,8 @@ def disable_logout(stream): """ LOG.info('Disabling automatic logout') - serial.send_bytes(stream, "export TMOUT=0") + cmd = "export TMOUT=0" + serial.send_bytes(stream, cmd) def change_password(stream, username, password): diff --git a/virtualbox/pybox/helper/install_lab.py b/virtualbox/pybox/helper/install_lab.py index ea7a8e1..6f0fabb 100644 --- a/virtualbox/pybox/helper/install_lab.py +++ b/virtualbox/pybox/helper/install_lab.py @@ -7,8 +7,12 @@ Contains helper functions that will configure basic system settings. """ +import subprocess +import sys +import time + from consts.timeout import HostTimeout -from utils import serial +from utils import kpi, serial from utils.install_log import LOG from helper import host_helper @@ -19,13 +23,9 @@ def update_platform_cpus(stream, hostname, cpu_num=5): """ LOG.info("Allocating %s CPUs for use by the %s platform.", cpu_num, hostname) - serial.send_bytes( - stream, - "\nsource /etc/platform/openrc; system host-cpu-modify " - f"{hostname} -f platform -p0 {cpu_num}", - prompt="keystone", - timeout=300, - ) + cmd = "\nsource /etc/platform/openrc;" \ + f" system host-cpu-modify {hostname} -f platform -p0 {cpu_num}" + serial.send_bytes(stream, cmd, prompt="keystone", timeout=300) def set_dns(stream, dns_ip): @@ -34,12 +34,8 @@ def set_dns(stream, dns_ip): """ LOG.info("Configuring DNS to %s.", dns_ip) - serial.send_bytes( - stream, - "source /etc/platform/openrc; system dns-modify " - f"nameservers={dns_ip}", - prompt="keystone", - ) + cmd = f"source /etc/platform/openrc; system dns-modify nameservers={dns_ip}" + serial.send_bytes(stream, cmd, prompt="keystone") def config_controller(stream, password): @@ -47,13 +43,122 @@ def config_controller(stream, password): Configure controller-0 using optional arguments """ - serial.send_bytes( - stream, - "ansible-playbook /usr/share/ansible/stx-ansible/playbooks/bootstrap.yml", - expect_prompt=False, - ) + LOG.info("Executing the bootstrap ansible playbook") + cmd = "ansible-playbook /usr/share/ansible/stx-ansible/playbooks/bootstrap.yml" + serial.send_bytes(stream, cmd, expect_prompt=False) host_helper.check_password(stream, password=password) - ret = serial.expect_bytes(stream, "~$", timeout=HostTimeout.LAB_CONFIG) + serial.expect_bytes(stream, "~$", timeout=HostTimeout.LAB_CONFIG) + + cmd = "echo [$?]" + serial.send_bytes(stream, cmd, expect_prompt=False, log=False) + ret = serial.expect_bytes(stream, "[0]", timeout=HostTimeout.NORMAL_OP, log=False) if ret != 0: LOG.info("Configuration failed. Exiting installer.") - raise Exception("Configcontroller failed") # pylint: disable=E0012, W0719 + raise SystemExit("Configcontroller failed") + LOG.info("Successful bootstrap ansible playbook execution") + + +def fault_tolerant(scale=1): + """ + Provides the scale argument to the fault-tolerant decorator. + + Args: + - scale: re-attempt delay vector scale factor + + Returns: + - fault-tolerant decorator. + """ + + def fault_tolerant_decorator(func): + """ + Decorator to run a command in a fault-tolerant fashion. + + Args: + - func: The function to be decorated. + + Returns: + - fault-tolerant wrapper + """ + + def fault_tolerant_wrapper(*args, **kwargs): + + """ + Runs a command in a fault-tolerant fashion. + + The function provides a recovery mechanism with progressive re-attempt delays + The first attempt is the normal command execution. If the command fails, the first + re-attempt runs after 2s, and the re-attempt delay goes increasing until 10 min. + The last re-attempts, with longer delays are intended to help the user to + salvage the ongoing installation, if the system does not recover automatically. + + Intentionally, the function does not provide a return code, due to the following + reason. To ensure system integrity, the function stops the program execution, + if it can not achieve a successful result, after a maximum number of retries. + + Hence, any subsequent functions may safely rely on the system integrity. + + Args: + - cmd: The command to be executed. + + Returns: None + """ + + delay = HostTimeout.REATTEMPT_DELAY + reattempt_delay = scale*delay + max_attempts = len(reattempt_delay) + attempt = 1 + while True: + cmd = kwargs['cmd'] + try: + return_code = func(*args, **kwargs) + assert return_code == 0 + break + except AssertionError as exc: + if attempt < max_attempts: + LOG.warning( + "#### Failed command:\n$ %s [attempt: %s/%s]\n", + cmd, attempt, max_attempts + ) + LOG.info( + "Trying again after %s ... ", + kpi.get_formated_time(reattempt_delay[attempt]) + ) + time.sleep(reattempt_delay[attempt]) + attempt = attempt + 1 + else: + LOG.error( + "#### Failed command:\n$ %s [attempt: %s/%s]\n", + cmd, attempt, max_attempts + ) + raise TimeoutError from exc + except Exception as exc: # pylint: disable=broad-except + LOG.error( + "#### Failed command:\n$ %s\nError: %s", + cmd, repr(exc) + ) + sys.exit(1) + + return fault_tolerant_wrapper + + return fault_tolerant_decorator + + +def exec_cmd(cmd): + + """ + Execute a local command on the host machine in a fault-tolerant fashion. + Refer to the fault_tolerant decorator for more details. + """ + + @fault_tolerant() + def exec_cmd_ft(*args, **kwargs): # pylint: disable=unused-argument + + LOG.info("#### Executing command on the host machine:\n$ %s\n", cmd) + with subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) as process: + for line in iter(process.stdout.readline, b''): + LOG.info("%s", line.decode("utf-8").strip()) + process.wait() + return process.returncode + + + exec_cmd_ft(**{'cmd': cmd}) diff --git a/virtualbox/pybox/helper/tests/test_install_lab.py b/virtualbox/pybox/helper/tests/test_install_lab.py index 392b3c2..694c17d 100644 --- a/virtualbox/pybox/helper/tests/test_install_lab.py +++ b/virtualbox/pybox/helper/tests/test_install_lab.py @@ -3,9 +3,238 @@ Unit tests related to install_lab """ import unittest -from unittest.mock import MagicMock, patch import install_lab +from consts.timeout import HostTimeout +from helper.install_lab import exec_cmd +from unittest.mock import MagicMock, patch, call +from utils import kpi + + +class ExecCmdTestCase(unittest.TestCase): + """ + Class to test help function exec_cmd + """ + + SUCCESS = 0 + FAILED1 = 1 + + def setUp(self): + """ + Method to set up the parameters used on the tests in this class + """ + self.cmd = 'print ("Hello!")' + self.reattempt_delay = HostTimeout.REATTEMPT_DELAY + self.max_attempts = len(self.reattempt_delay) + self.counter = 0 + self.trigger = 0 + self.result = None + + + def dynamicMock(self, *args, **kwargs): + """ + Method to simulate a function with temporary failures + """ + process = MagicMock() + process.__enter__.return_value = process + + self.counter += 1 + + if self.counter == self.trigger: + self.counter = 0 + attrs = { + 'wait.return_value': None, + 'stdout.readline.side_effect': [bytes('Hello!\n', 'utf-8'), b''] + } + process.configure_mock(**attrs) + process.returncode = self.SUCCESS + else: + attrs = { + 'wait.return_value': None, + 'stdout.readline.side_effect': [b''] + } + process.configure_mock(**attrs) + process.returncode = self.FAILED1 + + return process + + + @patch('time.sleep') + @patch('utils.install_log.LOG.error') + @patch('utils.install_log.LOG.warning') + @patch('utils.install_log.LOG.info') + @patch('subprocess.Popen') + def test_exec_cmd_1st_attempt_ok( + self, m_s_Popen, m_LOG_info, m_LOG_warning, m_LOG_error, m_time_sleep + ): + """ + Test successful call in the first attempt (normal case) + This test focus on info messages. No warnings or error messages are expected. + The expect result code is None (refer to the description of fault_tolerant decorator) + """ + # Setup + m_time_sleep.return_value = 0 + process = MagicMock() + attrs = { + 'wait.return_value': None, + 'returncode': self.SUCCESS, + 'stdout.readline.side_effect': [bytes('Hello!\n', 'utf-8'), b''] + } + process.configure_mock(**attrs) + m_s_Popen.return_value.__enter__.return_value = process + + # Run + self.result = exec_cmd(self.cmd) + + # Assert + calls = [ + call("#### Executing command on the host machine:\n$ %s\n", self.cmd), + call("%s", "Hello!"), + ] + m_LOG_info.assert_has_calls(calls, any_order=False) + self.assertEqual(m_LOG_warning.call_count, 0) + self.assertEqual(m_LOG_error.call_count, 0) + self.assertIsNone(self.result) + + + @patch('time.sleep') + @patch('utils.install_log.LOG.error') + @patch('utils.install_log.LOG.warning') + @patch('utils.install_log.LOG.info') + @patch('subprocess.Popen') + def test_exec_cmd_3rd_attempt_ok( + self, m_s_Popen, m_LOG_info, m_LOG_warning, m_LOG_error, m_time_sleep + ): + """ + Test successful call after a few retries (this may occor in system instability scenarios) + This test focus on log info messages, for re-attempt scenarios. + Warning messages are covered in next test. + No error messages are expected. + The expect result code is None (refer to the description of fault_tolerant decorator) + """ + # Setup + m_s_Popen.side_effect = self.dynamicMock + m_time_sleep.return_value = 0 + self.trigger = 3 + + # Run + self.result = exec_cmd(self.cmd) + + # Assert + calls = [ + call('#### Executing command on the host machine:\n$ %s\n', self.cmd), + call("Trying again after %s ... ", kpi.get_formated_time(self.reattempt_delay[1])), + + call('#### Executing command on the host machine:\n$ %s\n', self.cmd), + call("Trying again after %s ... ", kpi.get_formated_time(self.reattempt_delay[2])), + + call("#### Executing command on the host machine:\n$ %s\n", self.cmd), + call("%s", "Hello!"), + ] + m_LOG_info.assert_has_calls(calls, any_order=False) + self.assertEqual(m_LOG_error.call_count, 0) + self.assertEqual(m_LOG_warning.call_count, 2) + self.assertIsNone(self.result) + + + @patch('time.sleep') + @patch('utils.install_log.LOG.error') + @patch('utils.install_log.LOG.warning') + @patch('subprocess.Popen') + def test_exec_cmd_5th_attempt_ok( + self, m_s_Popen, m_LOG_warning, m_LOG_error, m_time_sleep + ): + """ + Test successful call after a few retries (thsi may occor in system instability scenarios) + This test focus on warning messages, for re-attempt scenarios. + No error messages are expected. + The expect result code is None (refer to the description of fault_tolerant decorator) + """ + # Setup + m_s_Popen.side_effect = self.dynamicMock + m_time_sleep.return_value = 0 + self.trigger = 5 + + # Run + self.result = exec_cmd(self.cmd) + + # Assert + calls = [ + call('#### Failed command:\n$ %s [attempt: %s/%s]\n', self.cmd, 1, 10), + call('#### Failed command:\n$ %s [attempt: %s/%s]\n', self.cmd, 2, 10), + call('#### Failed command:\n$ %s [attempt: %s/%s]\n', self.cmd, 3, 10), + call('#### Failed command:\n$ %s [attempt: %s/%s]\n', self.cmd, 4, 10), + ] + m_LOG_warning.assert_has_calls(calls, any_order=False) + self.assertEqual(m_LOG_error.call_count, 0) + self.assertIsNone(self.result) + + + @patch("sys.exit") + @patch('time.sleep') + @patch('utils.install_log.LOG.error') + @patch('utils.install_log.LOG.warning') + @patch('utils.install_log.LOG.info') + @patch('subprocess.Popen') + def test_exec_cmd_failed( + self, m_s_Popen, m_LOG_info, m_LOG_warning, m_LOG_error, m_time_sleep, mock_exit + ): + """ + Test unsuccessful call + This may occur after a maximum number of retries, in strong system instability scenarios) + This test focus on info and error messages, for the failure scenarios. + The expected warning messages are the same as for successful calls, covered previously. + The expect result code is None (refer to the description of fault_tolerant decorator) + """ + + # Setup + m_s_Popen.side_effect = self.dynamicMock + m_LOG_warning.return_value = 0 + m_time_sleep.return_value = 0 + mock_exit.side_effect = SystemExit(1) + self.trigger = self.max_attempts+1 + + with self.assertRaises(TimeoutError): + # Run + self.result = exec_cmd(self.cmd) + + # Assert + calls = [ + call('#### Executing command on the host machine:\n$ %s\n', self.cmd), + call("Trying again after %s ... ", kpi.get_formated_time(self.reattempt_delay[1])), + + call('#### Executing command on the host machine:\n$ %s\n', self.cmd), + call("Trying again after %s ... ", kpi.get_formated_time(self.reattempt_delay[2])), + + call('#### Executing command on the host machine:\n$ %s\n', self.cmd), + call("Trying again after %s ... ", kpi.get_formated_time(self.reattempt_delay[3])), + + call('#### Executing command on the host machine:\n$ %s\n', self.cmd), + call("Trying again after %s ... ", kpi.get_formated_time(self.reattempt_delay[4])), + + call('#### Executing command on the host machine:\n$ %s\n', self.cmd), + call("Trying again after %s ... ", kpi.get_formated_time(self.reattempt_delay[5])), + + call('#### Executing command on the host machine:\n$ %s\n', self.cmd), + call("Trying again after %s ... ", kpi.get_formated_time(self.reattempt_delay[6])), + + call('#### Executing command on the host machine:\n$ %s\n', self.cmd), + call("Trying again after %s ... ", kpi.get_formated_time(self.reattempt_delay[7])), + + call('#### Executing command on the host machine:\n$ %s\n', self.cmd), + call("Trying again after %s ... ", kpi.get_formated_time(self.reattempt_delay[8])), + + call('#### Executing command on the host machine:\n$ %s\n', self.cmd), + call("Trying again after %s ... ", kpi.get_formated_time(self.reattempt_delay[9])), + ] + m_LOG_info.assert_has_calls(calls, any_order=False) + + m_LOG_error.assert_called_once_with( + "#### Failed command:\n$ %s [attempt: %s/%s]\n", + self.cmd, self.max_attempts, self.max_attempts + ) + self.assertIsNone(self.result) + class UpdatePlatformCpusTestCase(unittest.TestCase): """ @@ -89,12 +318,18 @@ class ConfigControllerTestCase(unittest.TestCase): install_lab.config_controller(self.mock_stream, password=self.mock_password) # Assert - mock_serial.send_bytes.assert_called_once_with( - self.mock_stream, self.command_string, expect_prompt=False - ) + calls = [ + call(self.mock_stream, self.command_string, expect_prompt=False), + call(self.mock_stream, 'echo [$?]', expect_prompt=False, log=False), + ] + mock_serial.send_bytes.assert_has_calls(calls, any_order=False) + calls = [ + call(self.mock_stream, "~$", timeout=HostTimeout.LAB_CONFIG), + call(self.mock_stream, '[0]', timeout=HostTimeout.NORMAL_OP, log=False), + ] + mock_serial.expect_bytes.assert_has_calls(calls, any_order=False) mock_check_password.assert_called_once_with(self.mock_stream, password=self.mock_password) - mock_serial.expect_bytes.assert_called_once_with(self.mock_stream, "~$", - timeout=install_lab.HostTimeout.LAB_CONFIG) + @patch("install_lab.serial") @patch("install_lab.host_helper.check_password") @@ -107,16 +342,21 @@ class ConfigControllerTestCase(unittest.TestCase): mock_serial.expect_bytes.return_value = 1 # Run - with self.assertRaises(Exception): + with self.assertRaises(SystemExit): install_lab.config_controller(self.mock_stream, password=self.mock_password) # Assert - mock_serial.send_bytes.assert_called_once_with( - self.mock_stream, self.command_string, expect_prompt=False - ) + calls = [ + call(self.mock_stream, self.command_string, expect_prompt=False), + call(self.mock_stream, 'echo [$?]', expect_prompt=False, log=False), + ] + mock_serial.send_bytes.assert_has_calls(calls, any_order=False) + calls = [ + call(self.mock_stream, "~$", timeout=HostTimeout.LAB_CONFIG), + call(self.mock_stream, '[0]', timeout=HostTimeout.NORMAL_OP, log=False), + ] + mock_serial.expect_bytes.assert_has_calls(calls, any_order=False) mock_check_password.assert_called_once_with(self.mock_stream, password=self.mock_password) - mock_serial.expect_bytes.assert_called_once_with(self.mock_stream, "~$", - timeout=install_lab.HostTimeout.LAB_CONFIG) if __name__ == '__main__': diff --git a/virtualbox/pybox/helper/vboxmanage.py b/virtualbox/pybox/helper/vboxmanage.py index 7ed688a..f773680 100644 --- a/virtualbox/pybox/helper/vboxmanage.py +++ b/virtualbox/pybox/helper/vboxmanage.py @@ -22,10 +22,8 @@ def vboxmanage_version(): Return version of vbox. """ - version = subprocess.check_output( - ["vboxmanage", "--version"], stderr=subprocess.STDOUT - ) - + cmd = ["vboxmanage", "--version"] + version = subprocess.check_output(cmd, stderr=subprocess.STDOUT) return version @@ -40,17 +38,18 @@ def vboxmanage_extpack(): LOG.info("Downloading extension pack") filename = f"Oracle_VM_VirtualBox_Extension_Pack-{version_path}.vbox-extpack" - cmd = f"http://download.virtualbox.org/virtualbox/{version_path}/{filename}" - result = subprocess.check_output( - ["wget", cmd, "-P", "/tmp"], stderr=subprocess.STDOUT - ) + cmd = [ + "wget", + f"http://download.virtualbox.org/virtualbox/{version_path}/{filename}", + "-P", + "/tmp" + ] + result = subprocess.check_output(cmd, stderr=subprocess.STDOUT) LOG.info(result) LOG.info("Installing extension pack") - result = subprocess.check_output( - ["vboxmanage", "extpack", "install", "/tmp/" + filename, "--replace"], - stderr=subprocess.STDOUT, - ) + cmd = ["vboxmanage", "extpack", "install", "/tmp/" + filename, "--replace"] + result = subprocess.check_output(cmd, stderr=subprocess.STDOUT) LOG.info(result) @@ -60,7 +59,7 @@ def get_all_vms(labname, option="vms"): Args: labname (str): The name of the lab to which the VMs belong. - option (str, optional): The VBoxManage command option to use when listing VMs. + option (str, optional): The vboxmanage command option to use when listing VMs. Defaults to "vms". Returns: @@ -120,6 +119,7 @@ def take_snapshot(labname, snapshot_name): _resume_running_vms(runningvms) + LOG.info("Waiting 10s before running VMs") time.sleep(10) if runningvms: @@ -213,18 +213,22 @@ def restore_snapshot(node_list, name): LOG.info("Restore snapshot of %s for hosts %s", name, node_list) if len(node_list) != 0: vboxmanage_controlvms(node_list, "poweroff") + LOG.info("Waiting 5s") time.sleep(5) if len(node_list) != 0: for host in node_list: vboxmanage_restoresnapshot(host, name) + LOG.info("Waiting 5s") time.sleep(5) for host in node_list: if "controller-0" not in host: vboxmanage_startvm(host) + LOG.info("Waiting 10s") time.sleep(10) for host in node_list: if "controller-0" in host: vboxmanage_startvm(host) + LOG.info("Waiting 10s") time.sleep(10) @@ -233,9 +237,8 @@ def vboxmanage_list(option="vms"): This returns a list of vm names. """ - result = subprocess.check_output( - ["vboxmanage", "list", option], stderr=subprocess.STDOUT - ) + cmd = ["vboxmanage", "list", option] + result = subprocess.check_output(cmd, stderr=subprocess.STDOUT) vms_list = [] for item in result.splitlines(): vm_name = re.match(b'"(.*?)"', item) @@ -251,10 +254,8 @@ def vboxmanage_showinfo(host): if not isinstance(host, str): host.decode("utf-8") - result = subprocess.check_output( - ["vboxmanage", "showvminfo", host, "--machinereadable"], - stderr=subprocess.STDOUT, - ) + cmd = ["vboxmanage", "showvminfo", host, "--machinereadable"] + result = subprocess.check_output(cmd, stderr=subprocess.STDOUT) return result @@ -267,20 +268,18 @@ def vboxmanage_createvm(hostname, labname): assert labname, "Labname is required" group = "/" + labname LOG.info("Creating VM %s", hostname) - subprocess.check_output( - [ - "vboxmanage", - "createvm", - "--name", - hostname, - "--register", - "--ostype", - "Linux_64", - "--groups", - group, - ], - stderr=subprocess.STDOUT, - ) + cmd = [ + "vboxmanage", + "createvm", + "--name", + hostname, + "--register", + "--ostype", + "Linux_64", + "--groups", + group, + ] + subprocess.check_output(cmd, stderr=subprocess.STDOUT) def vboxmanage_deletevms(hosts=None): @@ -293,10 +292,9 @@ def vboxmanage_deletevms(hosts=None): if len(hosts) != 0: for hostname in hosts: LOG.info("Deleting VM %s", hostname) - subprocess.check_output( - ["vboxmanage", "unregistervm", hostname, "--delete"], - stderr=subprocess.STDOUT, - ) + cmd = ["vboxmanage", "unregistervm", hostname, "--delete"] + subprocess.check_output(cmd, stderr=subprocess.STDOUT) + LOG.info("Waiting 10s") time.sleep(10) # in case medium is still present after delete vboxmanage_deletemedium(hostname) @@ -318,25 +316,21 @@ def vboxmanage_hostonlyifcreate(name="vboxnet0", oam_ip=None, netmask=None): assert netmask, "Must provide an OAM Netmask" LOG.info("Creating Host-only Network") - - subprocess.check_output( - ["vboxmanage", "hostonlyif", "create"], stderr=subprocess.STDOUT - ) + cmd = ["vboxmanage", "hostonlyif", "create"] + subprocess.check_output(cmd, stderr=subprocess.STDOUT) LOG.info("Provisioning %s with IP %s and Netmask %s", name, oam_ip, netmask) - subprocess.check_output( - [ - "vboxmanage", - "hostonlyif", - "ipconfig", - name, - "--ip", - oam_ip, - "--netmask", - netmask, - ], - stderr=subprocess.STDOUT, - ) + cmd = [ + "vboxmanage", + "hostonlyif", + "ipconfig", + name, + "--ip", + oam_ip, + "--netmask", + netmask, + ] + subprocess.check_output(cmd, stderr=subprocess.STDOUT) def vboxmanage_hostonlyifdelete(name="vboxnet0"): @@ -346,9 +340,8 @@ def vboxmanage_hostonlyifdelete(name="vboxnet0"): assert name, "Must provide network name" LOG.info("Removing Host-only Network") - subprocess.check_output( - ["vboxmanage", "hostonlyif", "remove", name], stderr=subprocess.STDOUT - ) + cmd = ["vboxmanage", "hostonlyif", "remove", name] + subprocess.check_output(cmd, stderr=subprocess.STDOUT) def vboxmanage_modifyvm(hostname, vm_config=None): @@ -397,9 +390,8 @@ def vboxmanage_modifyvm(hostname, vm_config=None): cmd.extend(["--boot4"]) cmd.extend(["net"]) - LOG.info(cmd) - - LOG.info("Updating VM %s configuration", hostname) + LOG.info("#### Updating VM %s configuration", hostname) + LOG.info("#### Executing command on the host machine:\n$ %s\n", ' '.join(str(i) for i in cmd)) subprocess.check_output(cmd, stderr=subprocess.STDOUT) @@ -525,21 +517,20 @@ def vboxmanage_storagectl(hostname=None, storectl="sata", hostiocache="off"): assert hostname, "Hostname is required" assert storectl, "Type of storage controller is required" + LOG.info("Creating %s storage controller on VM %s", storectl, hostname) - subprocess.check_output( - [ - "vboxmanage", - "storagectl", - hostname, - "--name", - storectl, - "--add", - storectl, - "--hostiocache", - hostiocache, - ], - stderr=subprocess.STDOUT, - ) + cmd = [ + "vboxmanage", + "storagectl", + hostname, + "--name", + storectl, + "--add", + storectl, + "--hostiocache", + hostiocache, + ] + subprocess.check_output(cmd, stderr=subprocess.STDOUT) def vboxmanage_storageattach(hostname, storage_config): @@ -552,7 +543,7 @@ def vboxmanage_storageattach(hostname, storage_config): Possible key values: storectl, storetype, disk, port_num, device_num. Returns: - str: The output of the VBoxManage command. + str: The output of the vboxmanage command. """ assert hostname, "Hostname is required" @@ -574,25 +565,22 @@ def vboxmanage_storageattach(hostname, storage_config): storectl, hostname, ) - - return subprocess.check_output( - [ - "vboxmanage", - "storageattach", - hostname, - "--storagectl", - storectl, - "--medium", - disk, - "--type", - storetype, - "--port", - port_num, - "--device", - device_num, - ], - stderr=subprocess.STDOUT, - ) + cmd = [ + "vboxmanage", + "storageattach", + hostname, + "--storagectl", + storectl, + "--medium", + disk, + "--type", + storetype, + "--port", + port_num, + "--device", + device_num, + ] + return subprocess.check_output(cmd, stderr=subprocess.STDOUT) def vboxmanage_deletemedium(hostname, vbox_home_dir="/home"): @@ -625,20 +613,18 @@ def vboxmanage_deletemedium(hostname, vbox_home_dir="/home"): for disk in disk_list: LOG.info("Disconnecting disk %s from vbox.", disk) try: - result = subprocess.check_output( - [ - "vboxmanage", - "closemedium", - "disk", - f"{vbox_home_dir}{disk}", - "--delete", - ], - stderr=subprocess.STDOUT, - ) + cmd = [ + "vboxmanage", + "closemedium", + "disk", + f"{vbox_home_dir}{disk}", + "--delete", + ] + result = subprocess.check_output(cmd, stderr=subprocess.STDOUT) LOG.info(result) except subprocess.CalledProcessError as exception: # Continue if failures, disk may not be present - LOG.info( + LOG.warning( "Error disconnecting disk, continuing. " "Details: stdout: %s stderr: %s", exception.stdout, @@ -647,8 +633,8 @@ def vboxmanage_deletemedium(hostname, vbox_home_dir="/home"): LOG.info("Removing backing file %s", disk) try: os.remove(f"{vbox_home_dir}{disk}") - except: # pylint: disable=bare-except - pass + except Exception as exc: + LOG.debug("Failure at removing backing file\nError: %s\n", repr(exc)) def vboxmanage_createmedium(hostname=None, disk_list=None, vbox_home_dir="/home"): @@ -691,25 +677,23 @@ def vboxmanage_createmedium(hostname=None, disk_list=None, vbox_home_dir="/home" ) try: - result = subprocess.check_output( - [ - "vboxmanage", - "createmedium", - "disk", - "--size", - str(disk), - "--filename", - file_name, - "--format", - "vdi", - "--variant", - "standard", - ], - stderr=subprocess.STDOUT, - ) + cmd = [ + "vboxmanage", + "createmedium", + "disk", + "--size", + str(disk), + "--filename", + file_name, + "--format", + "vdi", + "--variant", + "standard", + ] + result = subprocess.check_output(cmd, stderr=subprocess.STDOUT) LOG.info(result) except subprocess.CalledProcessError as exception: - LOG.info("Error stdout: %s stderr: %s", exception.stdout, exception.stderr) + LOG.error("Error stdout: %s stderr: %s", exception.stdout, exception.stderr) raise vboxmanage_storageattach( hostname, @@ -723,6 +707,8 @@ def vboxmanage_createmedium(hostname=None, disk_list=None, vbox_home_dir="/home" ) disk_count += 1 port_num += 1 + + LOG.info("Waiting 5s") time.sleep(5) @@ -747,9 +733,8 @@ def vboxmanage_startvm(hostname=None, headless=False, force=False): LOG.info("Host %s is already started", hostname) else: LOG.info("Powering on VM %s", hostname) - result = subprocess.check_output( - ["vboxmanage", "startvm", hostname, "--type", interface_type], stderr=subprocess.STDOUT - ) + cmd = ["vboxmanage", "startvm", hostname, "--type", interface_type] + result = subprocess.check_output(cmd, stderr=subprocess.STDOUT) LOG.info(result) # Wait for VM to start @@ -808,6 +793,8 @@ def vboxmanage_restoresnapshot(host=None, name=None): subprocess.call( ["vboxmanage", "snapshot", host, "restore", name], stderr=subprocess.STDOUT ) + + LOG.info("Waiting 10s") time.sleep(10) @@ -822,6 +809,7 @@ def vboxmanage_getrulename(network, local_port): Returns: (str): Name of rule or empty """ + # List information about all nat networks in VirtualBox cmd = ["vboxmanage", "list", "natnets"] result = subprocess.check_output(cmd, stderr=subprocess.STDOUT) @@ -861,7 +849,9 @@ def vboxmanage_addportforward(rule_name, local_port, guest_ip, guest_port, netwo True if the port was added False if an error occurred when trying to add the port-forward rule. """ + rule = f"{rule_name}:tcp:[]:{local_port}:[{guest_ip}]:{guest_port}" + LOG.info("Creating port-forwarding rule to: %s", rule) cmd = [ "vboxmanage", @@ -891,6 +881,7 @@ def vboxmanage_deleteportforward(rule_name, network): Returns: None """ + LOG.info( "Removing previous forwarding rule '%s' from NAT network '%s'", rule_name, diff --git a/virtualbox/pybox/install_vbox.py b/virtualbox/pybox/install_vbox.py index 964854e..15bd9e1 100755 --- a/virtualbox/pybox/install_vbox.py +++ b/virtualbox/pybox/install_vbox.py @@ -27,16 +27,22 @@ from utils.sftp import sftp_send, send_dir from helper import vboxmanage from helper import install_lab from helper import host_helper +from helper.install_lab import exec_cmd, fault_tolerant from consts.node import Nodes -from consts.networking import NICs, OAM, Serial +from consts.networking import NICs, OAM, MGMT, Serial from consts.timeout import HostTimeout from Parser import handle_args + # Global vars V_BOX_OPTIONS = None +# Network +OAM_CONFIG = [getattr(OAM, attr) for attr in dir(OAM) if not attr.startswith('__')] +MGMT_CONFIG = [getattr(MGMT, attr) for attr in dir(MGMT) if not attr.startswith('__')] + def menu_selector(stream, setup_type, securityprofile, lowlatency, install_mode='serial'): @@ -45,36 +51,40 @@ def menu_selector(stream, setup_type, """ # Wait for menu to load (add sleep so we can see what is picked) - serial.expect_bytes(stream, "Press") + serial.expect_bytes(stream, "Press", log=False) + time.sleep(2) # Pick install type if setup_type in [AIO_SX, AIO_DX]: LOG.info("Selecting All-in-one Install") - serial.send_bytes(stream, "\033[B", expect_prompt=False, send=False) + serial.send_bytes(stream, "\033[B", expect_prompt=False, send=False, log=False) + time.sleep(2) if lowlatency is True: LOG.info("Selecting All-in-one (lowlatency) Install") - serial.send_bytes(stream, "\033[B", expect_prompt=False, send=False) + serial.send_bytes(stream, "\033[B", expect_prompt=False, send=False, log=False) + time.sleep(2) else: - LOG.info("Selecting Controller Install") - serial.send_bytes(stream, "\n", expect_prompt=False, send=False) - time.sleep(4) + LOG.info("Selecting Standard Install") + serial.send_bytes(stream, "\n", expect_prompt=False, send=False, log=False) + time.sleep(2) # Serial or Graphical menu (picking Serial by default) if install_mode == "graphical": LOG.info("Selecting Graphical menu") - serial.send_bytes(stream, "\033[B", expect_prompt=False, send=False) + serial.send_bytes(stream, "\033[B", expect_prompt=False, send=False, log=False) + time.sleep(2) else: LOG.info("Selecting Serial menu") - serial.send_bytes(stream, "\n", expect_prompt=False, send=False) - time.sleep(6) + serial.send_bytes(stream, "\n", expect_prompt=False, send=False, log=False) + time.sleep(2) # Security profile menu if securityprofile == "extended": LOG.info("Selecting extended security profile") - serial.send_bytes(stream, "\033[B", expect_prompt=False, send=False) + serial.send_bytes(stream, "\033[B", expect_prompt=False, send=False, log=False) + time.sleep(2) + serial.send_bytes(stream, "\n", expect_prompt=False, send=False, log=False) time.sleep(2) - serial.send_bytes(stream, "\n", expect_prompt=False, send=False) - time.sleep(4) def setup_networking(stream, ctrlr0_ip, gateway_ip, password): @@ -83,7 +93,7 @@ def setup_networking(stream, ctrlr0_ip, gateway_ip, password): """ ip_addr = ctrlr0_ip - interface = "enp0s3" + interface = OAM_CONFIG[0]['device'] ret = serial.send_bytes( stream, "/sbin/ip address list", @@ -113,7 +123,7 @@ def setup_networking(stream, ctrlr0_ip, gateway_ip, password): if V_BOX_OPTIONS.vboxnet_type == 'hostonly': LOG.info("Pinging controller-0 at: %s...", ip_addr) - tmout = HostTimeout.NETWORKING_OPERATIONAL + tmout = HostTimeout.NORMAL_OP while tmout: # Ping from machine hosting virtual box to virtual machine return_code = subprocess.call(['ping', '-c', '1', ip_addr]) @@ -135,7 +145,7 @@ def fix_networking(stream, release, password): if release == "R2": interface = "eth0" else: - interface = "enp0s3" + interface = OAM_CONFIG[0]['device'] LOG.info("Fixing networking ...") serial.send_bytes(stream, f"sudo /sbin/ip link set {interface} down", @@ -210,13 +220,13 @@ def install_controller_0(cont0_stream, menu_select_dict, network_dict): LOG.info("Completed installation of controller-0.") # Change password on initial login - time.sleep(20) + time.sleep(2) host_helper.change_password( cont0_stream, username=username, password=password) # Disable user logout - time.sleep(10) + time.sleep(2) host_helper.disable_logout(cont0_stream) # Setup basic networking time.sleep(1) @@ -262,8 +272,7 @@ def get_disk_sizes(comma_list): val = int(size) if val < 0: LOG.info("Disk sizes must be a comma separated list of positive integers.") - # pylint: disable=E0012, W0719 - raise Exception("Disk sizes must be a comma separated list of positive integers.") + raise ValueError("Disk sizes must be a comma separated list of positive integers.") return sizes @@ -281,15 +290,20 @@ def create_port_forward(hostname, network, local_port, guest_port, guest_ip): Returns: None """ - if not vboxmanage.vboxmanage_addportforward(hostname, local_port, guest_ip, guest_port, network): + if not vboxmanage.vboxmanage_addportforward( + hostname, local_port, guest_ip, guest_port, network + ): rule_name = vboxmanage.vboxmanage_getrulename(network, local_port) if not rule_name: LOG.info( - "Could not add a port-forwarding rule using port %s, and could not find any rule already using it. Check the Nat Network and/or local port.", local_port) + "Could not add a port-forwarding rule using port %s, " + "and could not find any rule already using it. " + "Check the Nat Network and/or local port.", local_port) LOG.info("Aborting!") sys.exit(1) LOG.info( - "Trying to create a port-forwarding rule with port: %s, but it is already in use with rule name: %s", + "Trying to create a port-forwarding rule with port: %s, " + "but it is already in use with rule name: %s", local_port, rule_name) @@ -297,7 +311,9 @@ def create_port_forward(hostname, network, local_port, guest_port, guest_ip): choice = input().lower() if choice == 'y': vboxmanage.vboxmanage_deleteportforward(rule_name, network) - vboxmanage.vboxmanage_addportforward(hostname, local_port, guest_ip, guest_port, network) + vboxmanage.vboxmanage_addportforward( + hostname, local_port, guest_ip, guest_port, network + ) else: LOG.info("Ignoring the creation of the port-forward rule and continuing installation!") @@ -313,8 +329,6 @@ def create_lab(m_vboxoptions): for attr in dir(Nodes) if not attr.startswith('__')] nic_config = [getattr(NICs, attr) for attr in dir(NICs) if not attr.startswith('__')] - # oam_config = [getattr(OAM, attr) - # for attr in dir(OAM) if not attr.startswith('__')][0] serial_config = [getattr(Serial, attr) for attr in dir(Serial) if not attr.startswith('__')] @@ -440,27 +454,44 @@ def create_lab(m_vboxoptions): # Add port forwarding rules for controllers nat interfaces if m_vboxoptions.vboxnet_type == 'nat' and 'controller' in node: if 'controller-0' in node: - local_port = m_vboxoptions.nat_controller0_local_ssh_port - ip_addr = m_vboxoptions.controller0_ip + create_port_forward( + node, + m_vboxoptions.vboxnet_name, + local_port=m_vboxoptions.nat_controller0_local_ssh_port, + guest_port='22', + guest_ip=m_vboxoptions.controller0_ip + ) - # Add port forward rule for StarlingX dashboard visualization at 8080 - rule_name = m_vboxoptions.labname + "-horizon-dashbord" - create_port_forward(rule_name, - m_vboxoptions.vboxnet_name, - local_port=m_vboxoptions.horizon_dashboard_port, - guest_port='8080', - guest_ip=ip_addr) + # Add port forward rule for the floating active controller + if V_BOX_OPTIONS.setup_type not in [AIO_SX]: + create_port_forward( + m_vboxoptions.labname + '-controller', + m_vboxoptions.vboxnet_name, + local_port=m_vboxoptions.nat_controller_floating_ssh_port, + guest_port='22', + guest_ip=m_vboxoptions.controller_floating_ip + ) + # Add port forward rule for the StarlingX Dashboard + if V_BOX_OPTIONS.setup_type in [AIO_SX]: + ip_addr = m_vboxoptions.controller0_ip + else: + ip_addr = m_vboxoptions.controller_floating_ip + create_port_forward( + m_vboxoptions.labname + "-horizon-dashbord", + m_vboxoptions.vboxnet_name, + local_port=m_vboxoptions.horizon_dashboard_port, + guest_port='8080', + guest_ip=ip_addr + ) elif 'controller-1' in node: - local_port = m_vboxoptions.nat_controller1_local_ssh_port - ip_addr = m_vboxoptions.controller1_ip - create_port_forward( - node, - m_vboxoptions.vboxnet_name, - local_port=local_port, - guest_port='22', - guest_ip=ip_addr - ) + create_port_forward( + node, + m_vboxoptions.vboxnet_name, + local_port=m_vboxoptions.nat_controller1_local_ssh_port, + guest_port='22', + guest_ip=m_vboxoptions.controller1_ip + ) ctrlr0 = m_vboxoptions.labname + '-controller-0' vboxmanage.vboxmanage_storagectl( @@ -503,7 +534,8 @@ def override_ansible_become_pass(): sys.exit(1) # modify the password with the one passed on the python call - loaded['ansible_become_pass'] = V_BOX_OPTIONS.password + loaded['admin_password'] = V_BOX_OPTIONS.password + loaded['ansible_become_pass'] = V_BOX_OPTIONS.sysadmin_password #Save it again try: @@ -640,31 +672,32 @@ def create_host_bulk_add(): return host_xml -# serial_prompt_configured = False - - -def wait_for_hosts(ssh_client, hostnames, status, - timeout=HostTimeout.HOST_INSTALL, interval=30): +def wait_for_hosts( + ssh_client, hostnames, status, + timeout=HostTimeout.HOST_INSTALL, interval=60): """ Wait for a given interval for the host(s) to reach the expected status. """ + LOG.info("Waiting for hosts %s to be in status %s", hostnames, status) start_time = time.time() while hostnames: - LOG.info("Hosts not %s: %s", status, hostnames) if (time.time() - start_time) > HostTimeout.HOST_INSTALL: LOG.info("VMs not booted in %s, aborting: %s", timeout, hostnames) - raise Exception(f"VMs failed to go {status}!") # pylint: disable=E0012, W0719 + raise TimeoutError(f"VMs failed to go {status}!") # Get host list host_statuses, _, _ = run_ssh_cmd( - ssh_client, 'source /etc/platform/openrc; system host-list', timeout=30) + ssh_client, 'source /etc/platform/openrc; system host-list', + timeout=HostTimeout.NORMAL_OP + ) host_statuses = host_statuses[1:-1] for host_status in host_statuses: for host in hostnames: if host in host_status and status in host_status: hostnames.remove(host) if hostnames: + LOG.info("Hosts not %s: %s", status, hostnames) LOG.info("Waiting %s sec before re-checking host status.", interval) time.sleep(interval) @@ -675,8 +708,9 @@ CONSOLE_ROOT_MODE = 'root' SERIAL_CONSOLE_MODE = CONSOLE_UNKNOWN_MODE -def run_ssh_cmd(ssh_client, cmd, timeout=5, - log_output=True, mode=CONSOLE_USER_MODE): +def run_ssh_cmd( + ssh_client, cmd, timeout=5, + log_output=True, mode=CONSOLE_USER_MODE): """ Execute an arbitrary command on a target. """ @@ -684,7 +718,7 @@ def run_ssh_cmd(ssh_client, cmd, timeout=5, if mode == CONSOLE_ROOT_MODE: LOG.info(">>>>>") cmd = f"sudo {cmd}" - LOG.info("#### Running command over ssh: '%s'", cmd) + LOG.info("#### Executing remote command:\n$ %s\n", cmd) stdin, stdout, stderr = ssh_client.exec_command(cmd, timeout, get_pty=True) if mode == CONSOLE_ROOT_MODE: stdin.write(f'{V_BOX_OPTIONS.password}\n') @@ -700,13 +734,45 @@ def run_ssh_cmd(ssh_client, cmd, timeout=5, if log_output and stderr_lines: LOG.info("stderr:|\n%s", "".join(stderr_lines)) return_code = stdout.channel.recv_exit_status() - LOG.info("Return code: %s", return_code) + LOG.info("Return code: %s\n", return_code) if mode == CONSOLE_ROOT_MODE: # Cut sudo's password echo and "Password:" string from output stdout_lines = stdout_lines[2:] return stdout_lines, stderr_lines, return_code +def run_ssh_cmd_list( # pylint: disable=too-many-arguments + ssh_client, cmd_list, timeout=5, + log_output=True, mode=CONSOLE_USER_MODE, scale=1): + """ + Execute a list of commands on the StarlingX VM in a fault-tolerant fashion. + Refer to the fault_tolerant decorator for more details. + """ + + for cmd in cmd_list: + + if ('system ' in cmd) or ('dcmanager ' in cmd): + cmd = 'source /etc/platform/openrc; ' + cmd + + @fault_tolerant(scale=scale) + def run_ssh_cmd_ft(*args, **kwargs): # pylint: disable=unused-argument + + _, _, return_code = run_ssh_cmd( + ssh_client, + cmd, # pylint: disable=cell-var-from-loop + timeout=timeout, + log_output=log_output, + mode=mode + ) + return return_code + + + run_ssh_cmd_ft(**{'cmd': cmd}) + + # Give 1s before running the next command on the list + time.sleep(1) + + def set_serial_prompt_mode(stream, mode): """ To make sure that we are at the correct prompt, @@ -729,14 +795,16 @@ def set_serial_prompt_mode(stream, mode): if serial.expect_bytes(stream, "ogin:", fail_ok=True, timeout=4): LOG.info("Expected login prompt, connect to console" \ "stop any running processes and log out.") - raise Exception("Failure getting login prompt on serial console!") # pylint: disable=E0012, W0719 + raise RuntimeError("Failure getting login prompt on serial console!") serial.send_bytes( stream, V_BOX_OPTIONS.username, prompt="assword:", timeout=30) - if serial.send_bytes(stream, V_BOX_OPTIONS.password, prompt="~$", fail_ok=True, timeout=30): - raise Exception("Login failure, invalid password?") # pylint: disable=E0012, W0719 + if serial.send_bytes( + stream, V_BOX_OPTIONS.password, prompt="~$", fail_ok=True, timeout=30 + ): + raise ValueError("Login failure, invalid password?") if mode == CONSOLE_USER_MODE: serial.send_bytes(stream, "source /etc/platform/openrc\n", timeout=30, prompt='keystone') @@ -768,8 +836,8 @@ def serial_prompt_mode(mode): function: A decorator function that sets the serial console login prompt to the specified mode. """ - def real_decorator(func): - def func_wrapper(*args, **kwargs): + def serial_prompt_decorator(func): + def serial_prompt_wrapper(*args, **kwargs): try: set_serial_prompt_mode(kwargs['stream'], mode) except: # pylint: disable=bare-except @@ -777,9 +845,9 @@ def serial_prompt_mode(mode): set_serial_prompt_mode(kwargs['stream'], mode) return func(*args, **kwargs) - return func_wrapper + return serial_prompt_wrapper - return real_decorator + return serial_prompt_decorator def _connect_to_serial(virtual_machine=None): @@ -802,7 +870,7 @@ def connect_to_serial(func): calls the decorated function, and then disconnects from the serial console. """ - def func_wrapper(*args, **kwargs): + def connect_to_serial_wrapper(*args, **kwargs): sock = None try: sock, kwargs['stream'] = _connect_to_serial() @@ -810,13 +878,17 @@ def connect_to_serial(func): finally: serial.disconnect(sock) - return func_wrapper + return connect_to_serial_wrapper -def _connect_to_ssh(my_stage=1): +def _connect_to_ssh(node='floating'): # Get ip and port for ssh on floating ip - ip_addr, port = get_ssh_ip_and_port(my_stage=my_stage) + ip_addr, port = get_ssh_ip_and_port(node) + + LOG.info("Testing VM port") + cmd = f'nc -vz localhost {port}' + exec_cmd(cmd) # Remove ssh key # For hostonly adapter we remove port 22 of controller ip @@ -827,48 +899,93 @@ def _connect_to_ssh(my_stage=1): keygen_arg = f"[127.0.0.1]:{port}" else: keygen_arg = ip_addr - cmd = f'ssh-keygen -f "/home/{getpass.getuser()}/.ssh/known_hosts" -R {keygen_arg}' - LOG.info("CMD: %s", cmd) - with subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) as process: - for line in iter(process.stdout.readline, b''): - LOG.info("%s", line.decode("utf-8").strip()) - process.wait() + cmd = f'ssh-keygen -f "/home/{getpass.getuser()}/.ssh/known_hosts" -R {keygen_arg} 2>/dev/null' + exec_cmd(cmd) # Connect to ssh ssh = paramiko.SSHClient() ssh.load_system_host_keys() ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) - ssh.connect(ip_addr, port=port, username=V_BOX_OPTIONS.username, - password=V_BOX_OPTIONS.password, look_for_keys=False, allow_agent=False) + attempt = 1 + reattempt_delay = HostTimeout.REATTEMPT_DELAY + max_attempts = len(reattempt_delay) + try: + while True: + ssh.connect( + ip_addr, + port=port, + username=V_BOX_OPTIONS.username, + password=V_BOX_OPTIONS.password, + look_for_keys=False, + allow_agent=False, + timeout=HostTimeout.NORMAL_OP, + auth_timeout=HostTimeout.NORMAL_OP, + ) + break + except paramiko.AuthenticationException as exc: + if attempt < max_attempts: + LOG.warning( + "#### Failed SSH Authentication [attempt: %s/%s]", + attempt, max_attempts + ) + LOG.info( + "Trying again after %s ... ", + kpi.get_formated_time(reattempt_delay[attempt]) + ) + time.sleep(reattempt_delay[attempt]) + attempt = attempt + 1 + else: + LOG.error( + "#### Failed SSH Authentication [attempt: %s/%s]\nError: %s", + attempt, max_attempts, repr(exc) + ) + raise + + except Exception as exc: + LOG.error("#### Failed SSH connection\nError: %s", repr(exc)) + raise + return ssh -def connect_to_ssh(func): +def connect_to_ssh(node='floating'): """ - Decorator function to establish a SSH connection before executing the function - and close the connection afterwards. - + Provides the node argument to the Connect-to-SSH decorator. Args: - - func: The function to be decorated. - + - node: target IP to connect: controller-0, controller-1, or floating IP Returns: - - The decorated function that has a SSH connection established before executing the function. + - Connect-to-SSH decorator """ - def func_wrapper(*args, **kwargs): - try: - if 'm_stage' in kwargs and 'm_stage' is not None: - m_stage = kwargs['m_stage'] - ssh = _connect_to_ssh(m_stage) - else: - ssh = _connect_to_ssh() - kwargs['ssh_client'] = ssh - return func(*args, **kwargs) - finally: - ssh.close() + def connect_to_ssh_decorator(func): + """ + Decorator to establish a SSH connection + Args: + - func: The function to be decorated. + Returns: + - Connect-to-SSH wrapper + """ - return func_wrapper + def connect_to_ssh_wrapper(*args, **kwargs): + """ + Establishs a SSH connection before executing the decorated function + Provides the ssh_client argument to the decorated function + + Returns: return code of decorated function + """ + + try: + ssh = _connect_to_ssh(node) + kwargs['ssh_client'] = ssh + return func(*args, **kwargs) + finally: + if ssh: + ssh.close() + + return connect_to_ssh_wrapper + + return connect_to_ssh_decorator def stage_test_success(): @@ -880,13 +997,13 @@ def stage_test_success(): def stage_test_fail(): """ Prints a log message indicating the execution of a test stage and raises an exception. - + Raises: - - Exception: Always raises an exception. + - Exception: Always raises an exception. """ LOG.info("Executing stage_test_success") - raise Exception("exception as of stage_test_fail") # pylint: disable=E0012, W0719 + raise RuntimeError("exception as of stage_test_fail") def stage_create_lab(): @@ -897,7 +1014,6 @@ def stage_create_lab(): delete_lab(V_BOX_OPTIONS.labname, V_BOX_OPTIONS.force_delete_lab) create_lab(V_BOX_OPTIONS) - # time.sleep(2) def stage_install_controller0(): @@ -940,7 +1056,6 @@ def stage_install_controller0(): } ) serial.disconnect(sock) - time.sleep(5) @connect_to_serial @@ -985,6 +1100,7 @@ def stage_config_controller(stream): # pylint: disable=too-many-locals install_lab.config_controller(stream, V_BOX_OPTIONS.password) # Wait for services to stabilize + LOG.info("Waiting 120s for services to stabilize.") time.sleep(120) if V_BOX_OPTIONS.setup_type == AIO_SX: @@ -992,7 +1108,7 @@ def stage_config_controller(stream): # pylint: disable=too-many-locals install_lab.update_platform_cpus(stream, 'controller-0') -def get_ssh_ip_and_port(node="", my_stage=1): +def get_ssh_ip_and_port(node='floating'): """ This function returns the IP address and port of the specified node to use for an SSH connection. @@ -1000,8 +1116,6 @@ def get_ssh_ip_and_port(node="", my_stage=1): Args: node (str, optional): The node to get the IP address and port for. Valid values are "floating" (default), "controller-0", and "controller-1". - my_stage (int, optional): The stage of lab_setup. When an installation is - already started, use this value to determine which node to connect. Returns: tuple: A tuple containing the IP address and port of the specified node. @@ -1010,39 +1124,35 @@ def get_ssh_ip_and_port(node="", my_stage=1): Exception: If an undefined node is specified. """ - if my_stage in (1, 2): - node = 'controller-0' - else: - node = 'controller-1' - if V_BOX_OPTIONS.vboxnet_type == 'nat': ip_addr = '127.0.0.1' - if node == 'controller-0': + if node == 'floating': + if V_BOX_OPTIONS.setup_type in [AIO_SX]: + port = V_BOX_OPTIONS.nat_controller0_local_ssh_port + else: + port = V_BOX_OPTIONS.nat_controller_floating_ssh_port + elif node == 'controller-0': port = V_BOX_OPTIONS.nat_controller0_local_ssh_port elif node == 'controller-1': port = V_BOX_OPTIONS.nat_controller1_local_ssh_port else: - raise Exception(f"Undefined node '{node}'") # pylint: disable=E0012, W0719 + raise ValueError(f"Undefined node '{node}'") else: if node == 'floating': - if V_BOX_OPTIONS.setup_type != 'AIO-SX': - ip_addr = V_BOX_OPTIONS.controller_floating_ip - else: + if V_BOX_OPTIONS.setup_type in [AIO_SX]: ip_addr = V_BOX_OPTIONS.controller0_ip + else: + ip_addr = V_BOX_OPTIONS.controller_floating_ip elif node == 'controller-0': ip_addr = V_BOX_OPTIONS.controller0_ip elif node == 'controller-1': ip_addr = V_BOX_OPTIONS.controller1_ip else: - raise Exception(f"Undefined node '{node}'") # pylint: disable=E0012, W0719 + raise ValueError(f"Undefined node '{node}'") port = 22 return ip_addr, port -# @connect_to_serial -# @serial_prompt_mode(CONSOLE_USER_MODE) - - def stage_rsync_config(): """ Rsync the local configuration files with the remote host's configuration files. @@ -1114,55 +1224,117 @@ def _run_lab_setup_serial(stream): timeout=3, prompt='Return code: [0]') -@connect_to_ssh -def _run_lab_setup(m_stage, ssh_client): +@connect_to_ssh() +def _run_lab_setup(ssh_client): conf_str = "" for cfg_file in V_BOX_OPTIONS.lab_setup_conf: conf_str = conf_str + f" -f {cfg_file}" - command = f'source /etc/platform/openrc; export ' \ - f'PATH="$PATH:/usr/local/bin"; export PATH="$PATH:/usr/bin"; ' \ - f'export PATH="$PATH:/usr/local/sbin"; export ' \ - f'PATH="$PATH:/usr/sbin"; sh lab_setup{m_stage}.sh' + command = 'source /etc/platform/openrc; ' \ + 'export PATH="$PATH:/usr/local/bin"; ' \ + 'export PATH="$PATH:/usr/bin"; ' \ + 'export PATH="$PATH:/usr/local/sbin"; ' \ + 'export PATH="$PATH:/usr/sbin"; ' \ + 'sh lab_setup.sh' _, _, exitcode = run_ssh_cmd(ssh_client, command, timeout=HostTimeout.LAB_INSTALL) if exitcode != 0: msg = f"Lab setup failed, expecting exit code of 0 but got {exitcode}." LOG.info(msg) - raise Exception(msg) # pylint: disable=E0012, W0719 + raise RuntimeError(msg) -def stage_lab_setup1(): - """Calls _run_lab_setup with ssh_client 1""" +def stage_lab_setup(): + """Calls _run_lab_setup with ssh_client""" - _run_lab_setup(m_stage=1) # pylint: disable=no-value-for-parameter - -def stage_lab_setup2(): - """Calls _run_lab_setup with ssh_client 2""" - - _run_lab_setup(m_stage=2) # pylint: disable=no-value-for-parameter + _run_lab_setup() # pylint: disable=no-value-for-parameter -def stage_lab_setup3(): - """Calls _run_lab_setup with ssh_client 3""" +@connect_to_ssh('controller-0') +def stage_setup_controller_0(ssh_client): + """Provision controller-0 networking and OSD storage""" - _run_lab_setup(3) # pylint: disable=no-value-for-parameter + try: + # The CLI commands below are executed on controller-0 + LOG.info("#### Display system info") + commands = [ + r'source /etc/platform/openrc;', + r'system show;', + ] + run_ssh_cmd_list( + ssh_client, + commands, + timeout=HostTimeout.NORMAL_OP + ) + + LOG.info("#### Configure OAM and MGMT networks") + if V_BOX_OPTIONS.setup_type in [AIO_SX]: + oam_if = OAM_CONFIG[0]['device'] + commands = [ + f'system host-if-modify controller-0 {oam_if} -c platform;', + f'system interface-network-assign controller-0 {oam_if} oam;', + ] + else: + oam_if = OAM_CONFIG[0]['device'] + mgmt_if = MGMT_CONFIG[0]['device'] + commands = [ + f'system host-if-modify controller-0 {oam_if} -c platform;', + f'system interface-network-assign controller-0 {oam_if} oam;', + r'system host-if-modify controller-0 lo -c none;', + r'IFNET_UUIDS=$(' + r' system interface-network-list controller-0 ' + r""" | awk '{if ($6=="lo") print $4;}');""" + r'for UUID in ${IFNET_UUIDS}; do ' + r' system interface-network-remove ${UUID};' + r'done;', + f'system host-if-modify controller-0 {mgmt_if} -c platform;', + f'system interface-network-assign controller-0 {mgmt_if} mgmt;', + f'system interface-network-assign controller-0 {mgmt_if} cluster-host;', + ] + run_ssh_cmd_list( + ssh_client, + commands, + timeout=HostTimeout.NORMAL_OP + ) + + LOG.info("#### Provision Ceph OSD") + if V_BOX_OPTIONS.setup_type in [AIO_SX, AIO_DX]: + commands = [ + r'system storage-backend-add ceph --confirmed;', + r'system host-disk-list controller-0;', + r'system host-disk-list controller-0 ' + r" | awk '/\/dev\/sdb/{print $2}' " + r' | xargs -i system host-stor-add controller-0 {};', + r'system host-stor-list controller-0;', + ] + elif V_BOX_OPTIONS.setup_type in [STANDARD, STORAGE]: + commands = [ + r'system storage-backend-add ceph --confirmed;', + ] + run_ssh_cmd_list( + ssh_client, + commands, + timeout=HostTimeout.NORMAL_OP + ) + + LOG.info("#### Configure platform memory allocation") + if V_BOX_OPTIONS.setup_type in [AIO_SX, AIO_DX]: + commands = [ + r'system host-memory-show controller-0 0;', + r'system host-memory-modify controller-0 0 -m 11200;', + ] + run_ssh_cmd_list( + ssh_client, + commands, + timeout=HostTimeout.NORMAL_OP + ) + except: + LOG.error("Failed stage: %s", STG_SETUP_CONTROLLER_0) + raise -def stage_lab_setup4(): - """Calls _run_lab_setup with ssh_client 4""" - - _run_lab_setup(4) # pylint: disable=no-value-for-parameter - - -def stage_lab_setup5(): - """Calls _run_lab_setup with ssh_client 5""" - - _run_lab_setup(5) # pylint: disable=no-value-for-parameter - - -@connect_to_ssh +@connect_to_ssh('controller-0') @connect_to_serial def stage_unlock_controller0(stream, ssh_client): """ @@ -1171,7 +1343,7 @@ def stage_unlock_controller0(stream, ssh_client): Args: - stream (obj): Serial stream to send and receive data - ssh_client (obj): SSH client connection to execute remote commands - + Returns: None. """ @@ -1224,7 +1396,7 @@ def stage_unlock_controller0_serial(stream): SERIAL_CONSOLE_MODE = CONSOLE_UNKNOWN_MODE # After reboot we are not logged in. -@connect_to_ssh +@connect_to_ssh() def stage_install_nodes(ssh_client): """ Install nodes in the environment using SSH. @@ -1237,62 +1409,134 @@ def stage_install_nodes(ssh_client): None. """ - # Create and transfer host_bulk_add.xml to ctrl-0 - host_xml = create_host_bulk_add() + try: + # Create and transfer host_bulk_add.xml to ctrl-0 + host_xml = create_host_bulk_add() - LOG.info("host_bulk_add.xml content:\n%s", host_xml) + LOG.info("host_bulk_add.xml content:\n%s", host_xml) - # Send file to controller - destination = "/home/" + V_BOX_OPTIONS.username + "/host_bulk_add.xml" - with tempfile.NamedTemporaryFile() as file: - file.write(host_xml.encode('utf-8')) - file.flush() - # Connection to NAT interfaces is local - if V_BOX_OPTIONS.vboxnet_type == 'nat': - ip_addr = '127.0.0.1' - port = V_BOX_OPTIONS.nat_controller0_local_ssh_port - else: - ip_addr = V_BOX_OPTIONS.controller0_ip - port = 22 - sftp_send( - file.name, - destination, - { - "remote_host": ip_addr, - "remote_port": port, - "username": V_BOX_OPTIONS.username, - "password": V_BOX_OPTIONS.password - } + # Send file to controller + destination = "/home/" + V_BOX_OPTIONS.username + "/host_bulk_add.xml" + with tempfile.NamedTemporaryFile() as file: + file.write(host_xml.encode('utf-8')) + file.flush() + # Connection to NAT interfaces is local + if V_BOX_OPTIONS.vboxnet_type == 'nat': + ip_addr = '127.0.0.1' + port = V_BOX_OPTIONS.nat_controller0_local_ssh_port + else: + ip_addr = V_BOX_OPTIONS.controller0_ip + port = 22 + sftp_send( + file.name, + destination, + { + "remote_host": ip_addr, + "remote_port": port, + "username": V_BOX_OPTIONS.username, + "password": V_BOX_OPTIONS.password + } + ) + + LOG.info("Waiting for controller-0 to be available") + wait_for_hosts(ssh_client, ['controller-0'], 'available') + + commands = [ + r'source /etc/platform/openrc;', + f'system host-bulk-add {destination};', + ] + run_ssh_cmd_list( + ssh_client, + commands, + timeout=HostTimeout.NORMAL_OP ) - # Apply host-bulk-add - _, _, exitcode = run_ssh_cmd(ssh_client, - f'source /etc/platform/openrc; system host-bulk-add {destination}', - timeout=60) - if exitcode != 0: - msg = "Host bulk add failed, expecting exit code of 0 but got %s", exitcode - LOG.info(msg) - raise Exception(msg) # pylint: disable=E0012, W0719 - # Start hosts one by one, wait 10s between each start - vms = vboxmanage.get_all_vms(V_BOX_OPTIONS.labname, option="vms") - runningvms = vboxmanage.get_all_vms( - V_BOX_OPTIONS.labname, - option="runningvms") - powered_off = list(set(vms) - set(runningvms)) - LOG.info("#### Powered off VMs: %s", powered_off) - for virtual_machine in powered_off: - LOG.info("#### Powering on VM: %s", virtual_machine) - vboxmanage.vboxmanage_startvm(virtual_machine, V_BOX_OPTIONS.headless, force=True) - LOG.info("Give VM 180s to boot.") - time.sleep(180) + # Start hosts one by one, wait 5s between each start + vms = vboxmanage.get_all_vms(V_BOX_OPTIONS.labname, option="vms") + runningvms = vboxmanage.get_all_vms( + V_BOX_OPTIONS.labname, + option="runningvms") + powered_off = list(set(vms) - set(runningvms)) + LOG.info("#### Powered off VMs: %s", powered_off) + for virtual_machine in powered_off: + LOG.info("#### Powering on VM: %s", virtual_machine) + vboxmanage.vboxmanage_startvm(virtual_machine, V_BOX_OPTIONS.headless, force=True) + time.sleep(5) - ctrl0 = V_BOX_OPTIONS.labname + "-controller-0" - hostnames = list(get_hostnames(ignore=[ctrl0]).values()) + LOG.info("Give VMs 5min to boot and install host personality") + time.sleep(5*60) - wait_for_hosts(ssh_client, hostnames, 'online') + ctrl0 = V_BOX_OPTIONS.labname + "-controller-0" + hostnames = list(get_hostnames(ignore=[ctrl0]).values()) + wait_for_hosts(ssh_client, hostnames, 'online') + + except: + LOG.error("Failed stage: %s", STG_INSTALL_NODES) + raise -@connect_to_ssh +@connect_to_ssh() +def stage_setup_controller_1(ssh_client): + """Provision controller-1 networking and OSD storage""" + + try: + # The CLI commands below are executed on the active controller + LOG.info("#### Display system info") + commands = [ + r'source /etc/platform/openrc;', + r'system show;', + ] + run_ssh_cmd_list( + ssh_client, + commands, + timeout=HostTimeout.NORMAL_OP + ) + + LOG.info("#### Configure OAM and MGMT networks") + oam_if = OAM_CONFIG[0]['device'] + commands = [ + f'system host-if-modify controller-1 {oam_if} -c platform;', + f'system interface-network-assign controller-1 {oam_if} oam;', + r'system interface-network-assign controller-1 mgmt0 cluster-host;', + ] + run_ssh_cmd_list( + ssh_client, + commands, + timeout=HostTimeout.NORMAL_OP + ) + + if V_BOX_OPTIONS.setup_type in [AIO_SX, AIO_DX]: + LOG.info("#### Provision Ceph OSD") + commands = [ + r'system storage-backend-list;', + r'system host-disk-list controller-1;', + r'system host-disk-list controller-1 ' + r" | awk '/\/dev\/sdb/{print $2}' " + r' | xargs -i system host-stor-add controller-1 {};', + r'system host-stor-list controller-1;', + ] + run_ssh_cmd_list( + ssh_client, + commands, + timeout=HostTimeout.NORMAL_OP + ) + LOG.info("#### Configure platform memory allocation") + if V_BOX_OPTIONS.setup_type in [AIO_SX, AIO_DX]: + commands = [ + r'system host-memory-show controller-1 0;', + r'system host-memory-modify controller-1 0 -m 11200;', + ] + run_ssh_cmd_list( + ssh_client, + commands, + timeout=HostTimeout.NORMAL_OP + ) + except: + LOG.error("Failed stage: %s", STG_SETUP_CONTROLLER_1) + raise + + +@connect_to_ssh() def stage_unlock_controller1(ssh_client): """ Unlock controller-1 host via SSH. @@ -1316,15 +1560,37 @@ def stage_unlock_controller1(ssh_client): 'source /etc/platform/openrc; system host-unlock controller-1', timeout=60) - LOG.info("#### waiting for controller-1 to be available.") - time.sleep(120) - wait_for_hosts(ssh_client, ['controller-1'], 'available') + LOG.info("#### waiting for controller-1 to be enabled") + wait_for_hosts(ssh_client, ['controller-1'], 'enabled') -@connect_to_ssh +@connect_to_ssh() +def stage_setup_storages(ssh_client): + """Provision dedicated storage nodes""" + + try: + storages = list(get_hostnames(personalities=['storage']).values()) + + # Add storages setup here + LOG.info("#### Provision dedicated storage nodes") + for storage in storages: + commands = [ + f'echo "TODO {storage}";', + ] + run_ssh_cmd_list( + ssh_client, + commands, + timeout=HostTimeout.NORMAL_OP + ) + except: + LOG.error("Failed stage: %s", STG_SETUP_STORAGES) + raise + + +@connect_to_ssh() def stage_unlock_storages(ssh_client): """ - Unlock storage nodes via SSH. + Unlock all storage nodes, wait for them to be enabled Args: - ssh_client (paramiko SSH client object): The SSH client to use for @@ -1334,7 +1600,6 @@ def stage_unlock_storages(ssh_client): None. """ - # Unlock storage nodes, wait for them to be 'available' storages = list(get_hostnames(personalities=['storage']).values()) for storage in storages: @@ -1344,14 +1609,37 @@ def stage_unlock_storages(ssh_client): LOG.info("Waiting 15s before next unlock") time.sleep(15) - LOG.info("#### Waiting for all hosts to be available.") - wait_for_hosts(ssh_client, storages, 'available') + LOG.info("#### Waiting for all storage nodes to be enabled") + wait_for_hosts(ssh_client, storages, 'enabled') -@connect_to_ssh +@connect_to_ssh() +def stage_setup_workers(ssh_client): + """Provision worker nodes""" + + try: + workers = list(get_hostnames(personalities=['worker']).values()) + + # Add workers setup here + LOG.info("#### Provision worker nodes") + for worker in workers: + commands = [ + f'echo "TODO {worker}";', + ] + run_ssh_cmd_list( + ssh_client, + commands, + timeout=HostTimeout.NORMAL_OP + ) + except: + LOG.error("Failed stage: %s", STG_SETUP_WORKERS) + raise + + +@connect_to_ssh() def stage_unlock_workers(ssh_client): """ - Unlock worker nodes via SSH. + Unlock all workers, wait for them to be enabled Args: - ssh_client (paramiko SSH client object): The SSH client to use for @@ -1361,9 +1649,7 @@ def stage_unlock_workers(ssh_client): None. """ - # Unlock all, wait for all hosts, except ctrl0 to be 'available' workers = list(get_hostnames(personalities=['worker']).values()) - ctrl0 = V_BOX_OPTIONS.labname + '-controller-0' for worker in workers: run_ssh_cmd( @@ -1373,86 +1659,111 @@ def stage_unlock_workers(ssh_client): LOG.info("Waiting 15s before next unlock") time.sleep(15) - # Wait for all hosts, except ctrl0 to be available - # At this stage we expect ctrl1 to also be available - hosts = list(get_hostnames(ignore=[ctrl0]).values()) - wait_for_hosts(ssh_client, hosts, 'available') + LOG.info("#### Waiting for all worker nodes to be enabled") + wait_for_hosts(ssh_client, workers, 'enabled') -@connect_to_ssh +@connect_to_ssh() +def stage_setup_controller_storage(ssh_client): + """Provision controller storage""" + + try: + # Add controller storage setup here + LOG.info("#### Provision controller storage") + commands = [ + 'echo "TODO";', + ] + run_ssh_cmd_list(ssh_client, + commands, + timeout=HostTimeout.NORMAL_OP) + except: + LOG.error("Failed stage: %s", STG_SETUP_CTRL_STORAGE) + raise + + +@connect_to_ssh() def stage_enable_kubernetes(ssh_client): + """Installation and configuration of Kubernetes dashboard""" - ip_addr, port = get_ssh_ip_and_port() + try: + ip_addr, port = get_ssh_ip_and_port() - local_path = V_BOX_OPTIONS.kubernetes_config_files - send_dir( - { - "source": local_path, - "remote_host": ip_addr, - "remote_port": port, - "destination":'/home/' + V_BOX_OPTIONS.username + '/', - "username": V_BOX_OPTIONS.username, "password": V_BOX_OPTIONS.password - } - ) - LOG.info("###### Adding port-forward rule for kubernetes dashboard ######") + local_path = V_BOX_OPTIONS.kubernetes_config_files + send_dir( + { + "source": local_path, + "remote_host": ip_addr, + "remote_port": port, + "destination":'/home/' + V_BOX_OPTIONS.username + '/', + "username": V_BOX_OPTIONS.username, "password": V_BOX_OPTIONS.password + } + ) + LOG.info("#### Adding port-forward rule for kubernetes dashboard") - # Add port forward rule for Kubernetes dashboard visualization at 32000 - ip_addr = V_BOX_OPTIONS.controller0_ip - rule_name = V_BOX_OPTIONS.labname + "-kubernetes-dasboard" + # Add port forward rule for Kubernetes dashboard visualization at 32000 + ip_addr = V_BOX_OPTIONS.controller0_ip + rule_name = V_BOX_OPTIONS.labname + "-kubernetes-dasboard" - create_port_forward(rule_name, - V_BOX_OPTIONS.vboxnet_name, - local_port=V_BOX_OPTIONS.kubernetes_dashboard_port, - guest_port='32000', - guest_ip=ip_addr) + create_port_forward(rule_name, + V_BOX_OPTIONS.vboxnet_name, + local_port=V_BOX_OPTIONS.kubernetes_dashboard_port, + guest_port='32000', + guest_ip=ip_addr) - LOG.info("###### Installing Kubernetes dashboard ######") + LOG.info("#### Installing Kubernetes dashboard") + commands = [ + 'source /etc/platform/openrc' + ' && source /etc/profile' + ' && cp /etc/kubernetes/admin.conf ~/.kube/config' + ' && helm repo update;' + 'helm repo add kubernetes-dashboard https://kubernetes.github.io/dashboard/' + ' && helm install kubernetes-dashboard kubernetes-dashboard/kubernetes-dashboard' + ' -f dashboard-values.yaml --version 6.0.8', + ] + run_ssh_cmd_list( + ssh_client, + commands, + timeout=HostTimeout.NORMAL_OP + ) - _, _, exitcode = run_ssh_cmd(ssh_client, - 'source /etc/platform/openrc && ' - 'source /etc/profile && ' - 'cp /etc/kubernetes/admin.conf ~/.kube/config && ' - 'helm repo update; helm repo add kubernetes-dashboard https://kubernetes.github.io/dashboard/ && ' - 'helm install kubernetes-dashboard kubernetes-dashboard/kubernetes-dashboard ' - '-f dashboard-values.yaml --version 6.0.8', timeout=60) + LOG.info("#### Creating an admin-user service account with cluster-admin provileges") + commands = [ + 'kubectl apply -f admin-login.yaml' + ' && kubectl -n kube-system describe secret' + ' $(kubectl get secret' + ' | grep admin-user-sa-token' + ' | awk "{print $1}")' + ' | tee $HOME/token.txt', + ] + run_ssh_cmd_list( + ssh_client, + commands, + timeout=HostTimeout.NORMAL_OP + ) - if exitcode == 0: - LOG.info("###### Creating an admin-user service account with cluster-admin provileges ######") + LOG.info("#### Sending token.txt to /home/%s", getpass.getuser()) + ip_addr, port = get_ssh_ip_and_port() + username = V_BOX_OPTIONS.username + password = V_BOX_OPTIONS.password + source = f'/home/{username}/token.txt' + destination = f'/home/{getpass.getuser()}' - _, _, exitcode2 = run_ssh_cmd(ssh_client, - 'kubectl apply -f admin-login.yaml && kubectl -n kube-system ' - 'describe secret $(kubectl get secret | grep admin-user-sa-token | awk "{print $1}") | tee $HOME/token.txt', timeout=60) - if exitcode2 == 0: - send_token() - LOG.info('##### TOKEN CREATED AND FILE CONTAINING TOKEN SENT TO HOST AT /home/%s #####', getpass.getuser()) + # Send token file to HOME/Desktop using rsync + LOG.info("#### rsync command") + cmd = ( + f'rsync -avL --rsh="/usr/bin/sshpass -p {password} ' + f'ssh -p {port} -o StrictHostKeyChecking=no -l {username}" ' + f'{username}@{ip_addr}:{source}* {destination}' + ) + exec_cmd(cmd) + LOG.info( + "#### TOKEN CREATED AND FILE CONTAINING TOKEN SENT TO HOST AT /home/%s", + getpass.getuser() + ) - if exitcode != 0 or exitcode2 != 0: - msg = f'Installation of Kubernetes dashboard failed, expecting exit code of 0 but got {exitcode}.' - LOG.info(msg) - raise Exception(msg) - - -def send_token(): - LOG.info('###### Sending token.txt to /home/%s ######', getpass.getuser()) - ip_addr, port = get_ssh_ip_and_port() - username =V_BOX_OPTIONS.username - password = V_BOX_OPTIONS.password - source = f'/home/{username}/token.txt' - destination = f'/home/{getpass.getuser()}' - - # Send token file to HOME/Desktop using rsync - LOG.info("###### rsync command ######") - cmd = (f'rsync -avL --rsh="/usr/bin/sshpass -p {password} ' - f'ssh -p {port} -o StrictHostKeyChecking=no -l {username}" ' - f'{username}@{ip_addr}:{source}* {destination}') - LOG.info('CMD: %s', cmd) - - with subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) as process: - for line in iter(process.stdout.readline, b''): - LOG.info("%s", line.decode("utf-8").strip()) - process.wait() - if process.returncode: - raise Exception(f'Error in rsync, return code: {process.returncode}') + except: + LOG.error("Failed stage: %s", STG_ENABLE_KUBERNETES) + raise def run_custom_script(script, timeout, console, mode): @@ -1479,7 +1790,7 @@ def run_custom_script(script, timeout, console, mode): _, __, return_code = run_ssh_cmd(ssh_client, f"./{script}", timeout=timeout, mode=mode) if return_code != 0: LOG.info("Custom script '%s' return code is not 0. Aborting.", script) - raise Exception(f"Script execution failed with return code: {return_code}") # pylint: disable=E0012, W0719 + raise RuntimeError(f"Script execution failed with return code: {return_code}") else: sock, stream = _connect_to_serial() try: @@ -1499,7 +1810,8 @@ def run_custom_script(script, timeout, console, mode): f"echo 'Return code: [{script}]'", timeout=3, prompt='Return code: [0]') finally: - sock.close() + if sock: + sock.close() def get_custom_script_options(options_list): @@ -1528,7 +1840,7 @@ def get_custom_script_options(options_list): for char in not_allowed: if char in options_list: LOG.info("Char '%s' not allowed in options list: %s.", char, options_list) - raise Exception("Char not allowed in options_list") # pylint: disable=E0012, W0719 + raise TypeError("Char not allowed in options_list") # get options options = options_list.split(',') @@ -1645,18 +1957,19 @@ def stage_custom_script5(): STG_CREATE_LAB = "create-lab" STG_INSTALL_CONTROLLER0 = "install-controller-0" STG_CONFIG_CONTROLLER = "config-controller" -STG_RSYNC_CONFIG = "rsync-config" -STG_LAB_SETUP1 = "lab-setup1" +STG_SETUP_CONTROLLER_0 = "setup-controller-0" STG_UNLOCK_CONTROLLER0 = "unlock-controller-0" -STG_LAB_SETUP2 = "lab-setup2" STG_INSTALL_NODES = "install-nodes" +STG_SETUP_CONTROLLER_1 = "setup-controller-1" STG_UNLOCK_CONTROLLER1 = "unlock-controller-1" -STG_LAB_SETUP3 = "lab-setup3" +STG_SETUP_STORAGES = "setup-storages" STG_UNLOCK_STORAGES = "unlock-storages" -STG_LAB_SETUP4 = "lab-setup4" +STG_SETUP_WORKERS = "setup-workers" STG_UNLOCK_WORKERS = "unlock-workers" -STG_LAB_SETUP5 = "lab-setup5" +STG_SETUP_CTRL_STORAGE = "setup-controller-storage" STG_ENABLE_KUBERNETES = "enable-kubernetes" +STG_RSYNC_CONFIG = "rsync-config" +STG_LAB_SETUP = "lab-setup" STG_CUSTOM_SCRIPT1 = "custom-script1" STG_CUSTOM_SCRIPT2 = "custom-script2" STG_CUSTOM_SCRIPT3 = "custom-script3" @@ -1682,43 +1995,46 @@ STAGE_CALLBACKS = { {CALLBACK: stage_config_controller, HELP: "Run config controller using the --ansible-controller-config" \ "updated based on --ini-* options."}, - STG_RSYNC_CONFIG: - {CALLBACK: stage_rsync_config, - HELP: "Rsync all files from --config-files-dir and --config-files-dir* to /home/wrsroot."}, - STG_LAB_SETUP1: - {CALLBACK: stage_lab_setup1, - HELP: "Run lab_setup with one or more --lab-setup-conf files from controller-0."}, + STG_SETUP_CONTROLLER_0: + {CALLBACK: stage_setup_controller_0, + HELP: "Provision controller-0 networking and OSD storage."}, STG_UNLOCK_CONTROLLER0: {CALLBACK: stage_unlock_controller0, HELP: "Unlock controller-0 and wait for it to reboot."}, - STG_LAB_SETUP2: - {CALLBACK: stage_lab_setup2, - HELP: "Run lab_setup with one or more --lab-setup-conf files from controller-0."}, STG_INSTALL_NODES: {CALLBACK: stage_install_nodes, HELP: "Generate a host-bulk-add.xml, apply it and install all" \ "other nodes, wait for them to be 'online."}, + STG_SETUP_CONTROLLER_1: + {CALLBACK: stage_setup_controller_1, + HELP: "Provision controller-1 networking and OSD storage."}, STG_UNLOCK_CONTROLLER1: {CALLBACK: stage_unlock_controller1, - HELP: "Unlock controller-1, wait for it to be 'available'"}, - STG_LAB_SETUP3: - {CALLBACK: stage_lab_setup3, - HELP: "Run lab_setup with one or more --lab-setup-conf files from controller-0."}, + HELP: "Unlock controller-1, wait for it to be enabled"}, + STG_SETUP_STORAGES: + {CALLBACK: stage_setup_storages, + HELP: "Provision dedicated storage nodes."}, STG_UNLOCK_STORAGES: {CALLBACK: stage_unlock_storages, - HELP: "Unlock all storage nodes, wait for them to be 'available'"}, - STG_LAB_SETUP4: - {CALLBACK: stage_lab_setup4, - HELP: "Run lab_setup with one or more --lab-setup-conf files from controller-0."}, + HELP: "Unlock all storage nodes, wait for them to be enabled"}, + STG_SETUP_WORKERS: + {CALLBACK: stage_setup_workers, + HELP: "Provision worker nodes."}, STG_UNLOCK_WORKERS: {CALLBACK: stage_unlock_workers, - HELP: "Unlock all workers, wait for them to be 'available"}, - STG_LAB_SETUP5: - {CALLBACK: stage_lab_setup5, - HELP: "Run lab_setup with one or more --lab-setup-conf files from controller-0."}, + HELP: "Unlock all workers, wait for them to be enabled"}, + STG_SETUP_CTRL_STORAGE: + {CALLBACK: stage_setup_controller_storage, + HELP: "Provision controller storage."}, STG_ENABLE_KUBERNETES: {CALLBACK: stage_enable_kubernetes, HELP: "Installation and configuration of Kubernetes dashboard"}, + STG_RSYNC_CONFIG: + {CALLBACK: stage_rsync_config, + HELP: "Rsync all files from --config-files-dir and --config-files-dir* to /home/wrsroot."}, + STG_LAB_SETUP: + {CALLBACK: stage_lab_setup, + HELP: "Run lab_setup with one or more --lab-setup-conf files"}, STG_CUSTOM_SCRIPT1: {CALLBACK: stage_custom_script1, HELP: "Run a custom script from /home/wrsroot, make sure you" \ @@ -1749,18 +2065,19 @@ STAGE_CALLBACKS = { AVAILABLE_STAGES = [STG_CREATE_LAB, STG_INSTALL_CONTROLLER0, STG_CONFIG_CONTROLLER, - STG_RSYNC_CONFIG, - STG_LAB_SETUP1, + STG_SETUP_CONTROLLER_0, STG_UNLOCK_CONTROLLER0, - STG_LAB_SETUP2, STG_INSTALL_NODES, + STG_SETUP_CONTROLLER_1, STG_UNLOCK_CONTROLLER1, - STG_LAB_SETUP3, + STG_SETUP_STORAGES, STG_UNLOCK_STORAGES, - STG_LAB_SETUP4, + STG_SETUP_WORKERS, STG_UNLOCK_WORKERS, - STG_LAB_SETUP5, + STG_SETUP_CTRL_STORAGE, STG_ENABLE_KUBERNETES, + STG_RSYNC_CONFIG, + STG_LAB_SETUP, STG_CUSTOM_SCRIPT1, STG_CUSTOM_SCRIPT2, STG_CUSTOM_SCRIPT3, @@ -1773,8 +2090,7 @@ AIO_SX_STAGES = [ STG_CREATE_LAB, STG_INSTALL_CONTROLLER0, STG_CONFIG_CONTROLLER, - STG_RSYNC_CONFIG, - STG_LAB_SETUP1, + STG_SETUP_CONTROLLER_0, STG_UNLOCK_CONTROLLER0, STG_ENABLE_KUBERNETES, ] @@ -1783,11 +2099,10 @@ AIO_DX_STAGES = [ STG_CREATE_LAB, STG_INSTALL_CONTROLLER0, STG_CONFIG_CONTROLLER, - STG_RSYNC_CONFIG, - STG_LAB_SETUP1, + STG_SETUP_CONTROLLER_0, STG_UNLOCK_CONTROLLER0, STG_INSTALL_NODES, - STG_LAB_SETUP2, + STG_SETUP_CONTROLLER_1, STG_UNLOCK_CONTROLLER1, STG_ENABLE_KUBERNETES, ] @@ -1796,14 +2111,14 @@ STD_STAGES = [ STG_CREATE_LAB, STG_INSTALL_CONTROLLER0, STG_CONFIG_CONTROLLER, - STG_RSYNC_CONFIG, - STG_LAB_SETUP1, + STG_SETUP_CONTROLLER_0, STG_UNLOCK_CONTROLLER0, STG_INSTALL_NODES, - STG_LAB_SETUP2, + STG_SETUP_CONTROLLER_1, STG_UNLOCK_CONTROLLER1, - STG_LAB_SETUP3, + STG_SETUP_WORKERS, STG_UNLOCK_WORKERS, + STG_SETUP_CTRL_STORAGE, STG_ENABLE_KUBERNETES, ] @@ -1811,17 +2126,15 @@ STORAGE_STAGES = [ STG_CREATE_LAB, STG_INSTALL_CONTROLLER0, STG_CONFIG_CONTROLLER, - STG_RSYNC_CONFIG, - STG_LAB_SETUP1, + STG_SETUP_CONTROLLER_0, STG_UNLOCK_CONTROLLER0, STG_INSTALL_NODES, - STG_LAB_SETUP2, + STG_SETUP_CONTROLLER_1, STG_UNLOCK_CONTROLLER1, - STG_LAB_SETUP3, + STG_SETUP_STORAGES, STG_UNLOCK_STORAGES, - STG_LAB_SETUP4, + STG_SETUP_WORKERS, STG_UNLOCK_WORKERS, - STG_LAB_SETUP5, STG_ENABLE_KUBERNETES, ] @@ -1846,11 +2159,11 @@ def load_config(): global V_BOX_OPTIONS # pylint: disable=global-statement V_BOX_OPTIONS = handle_args().parse_args() - oam_config = [getattr(OAM, attr) - for attr in dir(OAM) if not attr.startswith('__')] + if V_BOX_OPTIONS.sysadmin_password is None: + V_BOX_OPTIONS.sysadmin_password = V_BOX_OPTIONS.password if V_BOX_OPTIONS.vboxnet_ip is None: - V_BOX_OPTIONS.vboxnet_ip = oam_config[0]['ip'] + V_BOX_OPTIONS.vboxnet_ip = OAM_CONFIG[0]['ip'] if V_BOX_OPTIONS.hostiocache: V_BOX_OPTIONS.hostiocache = 'on' @@ -1869,24 +2182,29 @@ def load_config(): V_BOX_OPTIONS.controller0_ip = loaded.get('external_oam_node_0_address') V_BOX_OPTIONS.controller1_ip = loaded.get('external_oam_node_1_address') - assert V_BOX_OPTIONS.controller_floating_ip, "Missing external_oam_floating_address from ansible config file" - assert V_BOX_OPTIONS.controller0_ip, "Missing external_oam_node_0_address from ansible config file" - assert V_BOX_OPTIONS.controller1_ip, "Missing external_oam_node_1_address from ansible config file" + assert V_BOX_OPTIONS.controller_floating_ip, \ + "Missing external_oam_floating_address from ansible config file" + assert V_BOX_OPTIONS.controller0_ip, \ + "Missing external_oam_node_0_address from ansible config file" + assert V_BOX_OPTIONS.controller1_ip, \ + "Missing external_oam_node_1_address from ansible config file" else: V_BOX_OPTIONS.controller_floating_ip = None - # In a AIO-SX configuration the ip of controller-0 must be the same as the floating defined in ansible config file. + # In a AIO-SX configuration the ip of controller-0 must be + # the same as the floating defined in ansible config file. V_BOX_OPTIONS.controller0_ip = loaded.get('external_oam_floating_address') V_BOX_OPTIONS.controller1_ip = None - assert V_BOX_OPTIONS.controller0_ip, "Missing external_oam_floating_address from ansible config file" + assert V_BOX_OPTIONS.controller0_ip, \ + "Missing external_oam_floating_address from ansible config file" except FileNotFoundError: - print (f' \n Ansible configuration file not found in {V_BOX_OPTIONS.ansible_controller_config} \n') + print("\n Error: Ansible configuration file not found in %s\n", + V_BOX_OPTIONS.ansible_controller_config) sys.exit(1) except ruamel.yaml.YAMLError: print("\n Error while parsing YAML file \n") sys.exit() - if V_BOX_OPTIONS.setup_type == AIO_SX: V_BOX_OPTIONS.controllers = 1 V_BOX_OPTIONS.workers = 0 @@ -1914,7 +2232,12 @@ def validate(v_box_opt, m_stages): print("Please set --nat-controller0-local-ssh-port") err = True if v_box_opt.controllers > 1 and not v_box_opt.nat_controller1_local_ssh_port: - print("Second controller is configured, please set --nat-controller1-local-ssh-port") + print("Second controller is configured, " + "please set --nat-controller1-local-ssh-port") + err = True + if v_box_opt.controllers > 1 and not v_box_opt.nat_controller_floating_ssh_port: + print("Second controller is configured, " + "please set --nat-controller-floating-ssh-port") err = True else: if v_box_opt.setup_type != AIO_SX: @@ -1925,22 +2248,23 @@ def validate(v_box_opt, m_stages): print("Please set --controller0-ip") err = True if v_box_opt.controllers > 1 and not v_box_opt.controller1_ip: - print("Second controller is configured, please set --controller1-ip") + print("Second controller is configured, " + "please set --controller1-ip") err = True if STG_CONFIG_CONTROLLER in m_stages: if not v_box_opt.ansible_controller_config: - print(f"Please set --ansible-controller-config as needed by stage {STG_CONFIG_CONTROLLER}") + print("Please set --ansible-controller-config " + f"as needed by stage {STG_CONFIG_CONTROLLER}") err = True if STG_RSYNC_CONFIG in m_stages: if not v_box_opt.config_files_dir and not v_box_opt.config_files_dir_dont_follow_links: print("Please set --config-files-dir and/or --config-files-dir-dont-follow-links " - f"as needed by stage {STG_RSYNC_CONFIG} and {STG_LAB_SETUP1}") + f"as needed by stage {STG_RSYNC_CONFIG} and {STG_LAB_SETUP}") err = True - if (STG_LAB_SETUP1 in m_stages or STG_LAB_SETUP2 in m_stages - or STG_LAB_SETUP3 in m_stages or STG_LAB_SETUP4 in m_stages - or STG_LAB_SETUP5 in m_stages): + if STG_LAB_SETUP in m_stages: if not v_box_opt.lab_setup_conf: - print("Please set at least one --lab-setup-conf file as needed by lab-setup stages") + print("Please set at least one --lab-setup-conf file " + "as needed by lab-setup stages") err = True # file = ["lab_setup.sh"] dirs = [] @@ -1948,8 +2272,6 @@ def validate(v_box_opt, m_stages): dirs.append(v_box_opt.config_files_dir) if v_box_opt.config_files_dir_dont_follow_links: dirs.append(v_box_opt.config_files_dir_dont_follow_links) - # for directory in dirs: - # pass if err: print("\nMissing arguments. Please check --help and --list-stages for usage.") sys.exit(5) @@ -1974,9 +2296,6 @@ def wrap_stage_help(m_stage, stage_callbacks, number=None): return text -# Define signal handler for ctrl+c - - def signal_handler(): """ This function is called when the user presses Ctrl+C. It prints a message to the @@ -1989,6 +2308,31 @@ def signal_handler(): sys.exit(1) +def log_heading_msg(msg, pattern='#', panel_size=20): + """ + This function logs a formatted heading message with the style below + + #################################################################### + #################### Here goes the heading msg #################### + #################################################################### + """ + panel = "" + i = 0 + while i < panel_size: + panel = panel + pattern + i += 1 + + pad = "" + i = 0 + while i < len(msg) + 2: + pad = pad + pattern + i += 1 + + LOG.info("%s%s%s", panel, pad, panel) + LOG.info("%s %s %s", panel, msg, panel) + LOG.info("%s%s%s", panel, pad, panel) + + # pylint: disable=invalid-name if __name__ == "__main__": kpi.init_kpi_metrics() @@ -2000,12 +2344,12 @@ if __name__ == "__main__": print(f"Defined setups: {list(STAGES_CHAINS.keys())}") if V_BOX_OPTIONS.setup_type and V_BOX_OPTIONS.setup_type in AVAILABLE_CHAINS: AVAILABLE_CHAINS = [V_BOX_OPTIONS.setup_type] - for setup in AVAILABLE_CHAINS: - i = 1 - print(f"Stages for setup: {setup}") - for stage in STAGES_CHAINS[setup]: - print(wrap_stage_help(stage, STAGE_CALLBACKS[stage][HELP], i)) - i += 1 + for stg_chain in AVAILABLE_CHAINS: + stg_no = 1 + print(f"Stages for setup on: {stg_chain}") + for stage in STAGES_CHAINS[stg_chain]: + print(wrap_stage_help(stage, STAGE_CALLBACKS[stage][HELP], stg_no)) + stg_no += 1 print("Available stages that can be used for --custom-stages:") for stage in AVAILABLE_STAGES: print(wrap_stage_help(stage, STAGE_CALLBACKS[stage][HELP])) @@ -2034,22 +2378,22 @@ if __name__ == "__main__": else: # List all stages between 'from-stage' to 'to-stage' stages = STAGES_CHAINS[V_BOX_OPTIONS.setup_type] - from_index = 0 - to_index = None + from_stg_index = 0 + to_stg_index = None if V_BOX_OPTIONS.from_stage: if V_BOX_OPTIONS.from_stage == 'start': - from_index = 0 + from_stg_index = 0 else: - from_index = stages.index(V_BOX_OPTIONS.from_stage) + from_stg_index = stages.index(V_BOX_OPTIONS.from_stage) if V_BOX_OPTIONS.to_stage: if V_BOX_OPTIONS.from_stage == 'end': - to_index = -1 + to_stg_index = -1 else: - to_index = stages.index(V_BOX_OPTIONS.to_stage) + 1 - if to_index is not None: - install_stages = stages[from_index:to_index] + to_stg_index = stages.index(V_BOX_OPTIONS.to_stage) + 1 + if to_stg_index is not None: + install_stages = stages[from_stg_index:to_stg_index] else: - install_stages = stages[from_index:] + install_stages = stages[from_stg_index:] LOG.info("Executing %s stage(s): %s.", len(install_stages), install_stages) validate(V_BOX_OPTIONS, install_stages) @@ -2058,12 +2402,10 @@ if __name__ == "__main__": prev_stage = None for stage in install_stages: stg_no += 1 - start = time.time() + stg_start_time = time.time() try: - LOG.info("######## (%s/%s) Entering stage %s ########", - stg_no, - len(install_stages), - stage) + stg_msg = f"({stg_no}/{len(install_stages)}) Entering stage {stage}" + log_heading_msg(stg_msg) STAGE_CALLBACKS[stage][CALLBACK]() # Take snapshot if configured @@ -2073,16 +2415,16 @@ if __name__ == "__main__": f"snapshot-AFTER-{stage}") # Compute KPIs - duration = time.time() - start - kpi.set_kpi_metric(stage, duration) + stg_duration = time.time() - stg_start_time + kpi.set_kpi_metric(stage, stg_duration) kpi.print_kpi(stage) kpi.print_kpi('total') - except Exception as e: - duration = time.time() - start - kpi.set_kpi_metric(stage, duration) + except Exception as stg_exc: + stg_duration = time.time() - stg_start_time + kpi.set_kpi_metric(stage, stg_duration) LOG.info("INSTALL FAILED, ABORTING!") kpi.print_kpi_metrics() - LOG.info("Exception details: %s", e) + LOG.info("Exception details: %s", repr(stg_exc)) raise # Stage completed prev_stage = stage diff --git a/virtualbox/pybox/tests/test_install_vbox.py b/virtualbox/pybox/tests/test_install_vbox.py index c62374b..876d51a 100644 --- a/virtualbox/pybox/tests/test_install_vbox.py +++ b/virtualbox/pybox/tests/test_install_vbox.py @@ -1,7 +1,14 @@ -import unittest -from unittest.mock import MagicMock, patch, call, ANY import install_vbox +import unittest + +from consts.networking import OAM, MGMT from dataclasses import dataclass +from unittest.mock import MagicMock, patch, call, ANY + + +# Network +OAM_CONFIG = [getattr(OAM, attr) for attr in dir(OAM) if not attr.startswith('__')] +MGMT_CONFIG = [getattr(MGMT, attr) for attr in dir(MGMT) if not attr.startswith('__')] @dataclass @@ -148,11 +155,12 @@ class SetupNetworkingTestCase(unittest.TestCase): call(self.mock_stream, password=self.mock_password), call(self.mock_stream, password=self.mock_password) ]) + oam_if = OAM_CONFIG[0]['device'] mock_serial.send_bytes.assert_any_call(self.mock_stream, - f"sudo /sbin/ip addr add {self.mock_ip}/24 dev enp0s3", + f"sudo /sbin/ip addr add {self.mock_ip}/24 dev {oam_if}", expect_prompt=False) mock_serial.send_bytes.assert_any_call(self.mock_stream, - "sudo /sbin/ip link set enp0s3 up", + f"sudo /sbin/ip link set {oam_if} up", expect_prompt=False) mock_serial.send_bytes.assert_any_call(self.mock_stream, f"sudo route add default gw {self.mock_gateway_ip}", @@ -203,13 +211,14 @@ class FixNetworkingTestCase(unittest.TestCase): install_vbox.fix_networking(self.mock_stream, self.mock_release_r3, self.mock_password) # Assert + oam_if = OAM_CONFIG[0]['device'] mock_serial.send_bytes.assert_any_call(self.mock_stream, - "sudo /sbin/ip link set enp0s3 down", + f"sudo /sbin/ip link set {oam_if} down", expect_prompt=False) mock_host_helper.check_password.assert_called_with(self.mock_stream, password=self.mock_password) mock_serial.send_bytes.assert_any_call( self.mock_stream, - "sudo /sbin/ip link set enp0s3 up", + f"sudo /sbin/ip link set {oam_if} up", expect_prompt=False) mock_host_helper.check_password.assert_called_with(self.mock_stream, password=self.mock_password) diff --git a/virtualbox/pybox/utils/install_log.py b/virtualbox/pybox/utils/install_log.py index 6ec919b..8969166 100644 --- a/virtualbox/pybox/utils/install_log.py +++ b/virtualbox/pybox/utils/install_log.py @@ -53,12 +53,13 @@ def init_logging(lab_name, log_path=None): # Create symbolic link to latest logs of this lab try: os.unlink(lab_log_path + "/latest") - except: # pylint: disable=bare-except + except FileNotFoundError: pass + os.symlink(LOG_DIR, lab_log_path + "/latest") def get_log_dir(): - """This method returns the directory path of the current logging run.""" + """This method returns the log directory""" return LOG_DIR diff --git a/virtualbox/pybox/utils/serial.py b/virtualbox/pybox/utils/serial.py index e660fbd..3366cbb 100644 --- a/virtualbox/pybox/utils/serial.py +++ b/virtualbox/pybox/utils/serial.py @@ -27,24 +27,27 @@ def connect(hostname, port=10000, prefix=""): if prefix: prefix = f"{prefix}_" + socketname = f"/tmp/{prefix}{hostname}" if 'controller-0' in hostname: socketname += '_serial' + LOG.info("Connecting to %s at %s", hostname, socketname) - - sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM, socket.IPPROTO_TCP) - try: + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM, socket.IPPROTO_TCP) sock.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1) sock.connect(('localhost', port)) - except: # pylint: disable=bare-except - LOG.info("Connection failed") - pass # pylint: disable=unnecessary-pass - # disconnect(sock) - sock = None - # TODO (WEI): double check this # pylint: disable=fixme - if sock: - sock.setblocking(False) + + # TODO (WEI): double check this # pylint: disable=fixme + if sock: + sock.setblocking(False) + + except Exception as exc: + LOG.info("Failed sock connection") + LOG.debug("Error:\n%s\n", repr(exc)) + if sock: + sock.close() + sock = None return sock @@ -85,12 +88,12 @@ def get_output(stream, cmd, prompts=None, timeout=5, log=True, as_lines=True, fl trash = stream.poll(1) # flush input buffers if trash: try: - LOG.info("Buffer has bytes before cmd execution: %s", + LOG.debug("Buffer has bytes before cmd execution: %s", trash.decode('utf-8')) - except Exception: # pylint: disable=W0703 - pass - except streamexpect.ExpectTimeout: - pass + except Exception as exc: + LOG.debug("Failed decoding buffer\nError: %s\n", repr(exc)) + except streamexpect.ExpectTimeout as exc: + LOG.debug("Failed flushing buffer\nError: %s\n", repr(exc)) # Send command stream.sendall(f"{cmd}\n".encode('utf-8')) @@ -110,8 +113,8 @@ def get_output(stream, cmd, prompts=None, timeout=5, log=True, as_lines=True, fl while (end_time - now) >= 0: try: incoming = stream.recv(max_read_buffer) - except socket.timeout: - pass + except socket.timeout as exc: + LOG.debug("Failed reading buffer\nError: %s\n", repr(exc)) if incoming: data += incoming if log: @@ -133,16 +136,19 @@ def get_output(stream, cmd, prompts=None, timeout=5, log=True, as_lines=True, fl stream.settimeout(prev_timeout) -def expect_bytes(stream, text, timeout=180, fail_ok=False, flush=True): +def expect_bytes(stream, text, timeout=180, fail_ok=False, flush=True, log=True): """ Wait for user specified text from stream. """ time.sleep(1) - if timeout < 60: - LOG.info("Expecting text within %s seconds: %s\n", timeout, text) - else: - LOG.info("Expecting text within %s minutes: %s\n", timeout / 60, text) + + if log: + if timeout < 60: + LOG.info("Expecting text within %s seconds: %s\n", timeout, text) + else: + LOG.info("Expecting text within %s minutes: %s\n", timeout / 60, text) + try: stream.expect_bytes(f"{text}".encode('utf-8'), timeout=timeout) except streamexpect.ExpectTimeout: @@ -153,12 +159,13 @@ def expect_bytes(stream, text, timeout=180, fail_ok=False, flush=True): LOG.error("Did not find expected text") # disconnect(stream) raise - except Exception as exception: - LOG.info("Connection failed with %s", exception) + except Exception as exc: + LOG.debug("Failed connection\nError: %s\n", repr(exc)) raise stdout.write('\n') - LOG.info("Found expected text: %s", text) + if log: + LOG.debug("Found expected text: %s", text) time.sleep(1) if flush: @@ -167,21 +174,21 @@ def expect_bytes(stream, text, timeout=180, fail_ok=False, flush=True): if incoming: incoming += b'\n' try: - LOG.info(">>> expect_bytes: Buffer has bytes!") + if log: + LOG.debug(">>> expect_bytes: Buffer has bytes!") stdout.write(incoming.decode('utf-8')) # streamexpect hardcodes it - except Exception: # pylint: disable=W0703 - pass - except streamexpect.ExpectTimeout: - pass - + except Exception as exc: + LOG.debug("Failed decoding buffer\nError: %s\n", repr(exc)) + except streamexpect.ExpectTimeout as exc: + LOG.debug("Failed flushing buffer\nError: %s\n", repr(exc)) return 0 # pylint: disable=inconsistent-return-statements -def send_bytes(stream, text, fail_ok=False, expect_prompt=True, - prompt=None, timeout=180, send=True, flush=True): +def send_bytes(stream, command, fail_ok=False, expect_prompt=True, + prompt=None, timeout=180, send=True, flush=True, log=True): """ - Send user specified text to stream. + Send user specified command to stream. """ time.sleep(1) @@ -191,19 +198,20 @@ def send_bytes(stream, text, fail_ok=False, expect_prompt=True, if incoming: incoming += b'\n' try: - LOG.info(">>> send_bytes: Buffer has bytes!") + LOG.debug(">>> send_bytes: Buffer has bytes!") stdout.write(incoming.decode('utf-8')) # streamexpect hardcodes it - except Exception: # pylint: disable=W0703 - pass - except streamexpect.ExpectTimeout: - pass + except Exception as exc: + LOG.debug("Failed decoding buffer\nError: %s\n", repr(exc)) + except streamexpect.ExpectTimeout as exc: + LOG.debug("Failed flushing buffer\nError: %s\n", repr(exc)) - LOG.info("Sending text: %s", text) + if log: + LOG.info("Sending command: %s", command) try: if send: - stream.sendall(f"{text}\n".encode('utf-8')) + stream.sendall(f"{command}\n".encode('utf-8')) else: - stream.sendall(f"{text}".encode('utf-8')) + stream.sendall(f"{command}".encode('utf-8')) if expect_prompt: time.sleep(1) if prompt: @@ -218,11 +226,12 @@ def send_bytes(stream, text, fail_ok=False, expect_prompt=True, if fail_ok: return -1 - LOG.error("Failed to send text, logging out.") + LOG.error("Failed to send command, logging out.") stream.sendall("exit".encode('utf-8')) raise - except Exception as exception: - LOG.info("Connection failed with %s.", exception) + except Exception as exc: + LOG.error("Connection failed") + LOG.debug("Failed flushing buffer\nError: %s\n", repr(exc)) raise return 0 diff --git a/virtualbox/pybox/utils/sftp.py b/virtualbox/pybox/utils/sftp.py index efa4c6f..0c41449 100644 --- a/virtualbox/pybox/utils/sftp.py +++ b/virtualbox/pybox/utils/sftp.py @@ -11,8 +11,8 @@ rsync and paramiko libraries. import getpass import os import time -import subprocess import paramiko +from helper.install_lab import exec_cmd from utils.install_log import LOG @@ -47,6 +47,7 @@ def sftp_send(source, destination, client_dict): except Exception: # pylint: disable=W0703 LOG.info("******* try again") retry += 1 + LOG.info("Waiting 10s") time.sleep(10) LOG.info("Sending file from %s to %s", source, destination) @@ -102,25 +103,15 @@ def send_dir(params_dict): keygen_arg = f"[127.0.0.1]:{remote_port}" else: keygen_arg = remote_host - cmd = f'ssh-keygen -f "/home/{getpass.getuser()}/.ssh/known_hosts" -R {keygen_arg}' - LOG.info("CMD: %s", cmd) - with subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) as process: - for line in iter(process.stdout.readline, b''): - LOG.info("%s", line.decode("utf-8").strip()) - process.wait() + cmd = f'ssh-keygen -f \ + "/home/{getpass.getuser()}/.ssh/known_hosts" -R {keygen_arg} 2>/dev/null' + exec_cmd(cmd) - LOG.info('Running rsync of dir: %s -> %s@%s:%s', source, username, remote_host, destination) + LOG.info('#### Running rsync of dir: %s -> %s@%s:%s', source, username, remote_host, destination) cmd = (f'rsync -av{follow_links} --rsh="/usr/bin/sshpass -p {password} ' f'ssh -p {remote_port} -o StrictHostKeyChecking=no -l {username}" ' f'{source}* {username}@{remote_host}:{destination}') - LOG.info("CMD: %s", cmd) - - with subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) as process: - for line in iter(process.stdout.readline, b''): - LOG.info("%s", line.decode("utf-8").strip()) - process.wait() - if process.returncode: - raise Exception(f"Error in rsync, return code:{process.returncode}") # pylint: disable=E0012, W0719 + exec_cmd(cmd) def send_dir_fallback(source, remote_host, destination, username, password): @@ -165,4 +156,5 @@ def send_dir_fallback(source, remote_host, destination, username, password): sftp_client.close() ssh_client.close() if send_img: + LOG.info("Waiting 10s") time.sleep(10) diff --git a/virtualbox/pybox/utils/tests/test_serial.py b/virtualbox/pybox/utils/tests/test_serial.py index e687f71..1425bbe 100644 --- a/virtualbox/pybox/utils/tests/test_serial.py +++ b/virtualbox/pybox/utils/tests/test_serial.py @@ -48,7 +48,7 @@ class ConnectTestCase(unittest.TestCase): # Assert mock_socket.assert_called_once_with(socket.AF_INET, socket.SOCK_STREAM, socket.IPPROTO_TCP) mock_socket.connect.assert_called_once_with(('localhost', port)) - mock_log_info.assert_called_with("Connection failed") + mock_log_info.assert_called_with("Failed sock connection") self.assertIsNone(result) @@ -116,9 +116,10 @@ class ExpectBytesTestCase(unittest.TestCase): Class to test expect_bytes method """ + @patch("serial.LOG.debug") @patch("serial.LOG.info") @patch("serial.stdout.write") - def test_expect_bytes(self, mock_stdout_write, mock_log_info): + def test_expect_bytes(self, mock_stdout_write, mock_log_info, mock_log_debug): """ Test expect_bytes method """ @@ -140,7 +141,7 @@ class ExpectBytesTestCase(unittest.TestCase): stream.expect_bytes.assert_called_once_with(f"{text}".encode('utf-8'), timeout=timeout) mock_stdout_write.assert_any_call('\n') mock_log_info.assert_any_call("Expecting text within %s minutes: %s\n", timeout / 60, text) - mock_log_info.assert_any_call("Found expected text: %s", text) + mock_log_debug.assert_any_call("Found expected text: %s", text) class SendBytesTestCase(unittest.TestCase): diff --git a/virtualbox/pybox/utils/tests/test_sftp.py b/virtualbox/pybox/utils/tests/test_sftp.py index 9733ffe..0bdbe57 100644 --- a/virtualbox/pybox/utils/tests/test_sftp.py +++ b/virtualbox/pybox/utils/tests/test_sftp.py @@ -54,7 +54,7 @@ class TestSendDir(unittest.TestCase): """ @patch("serial.LOG.info") - @patch('sftp.subprocess.Popen') + @patch('sftp.exec_cmd') @patch('sftp.getpass.getuser') def test_send_dir(self, mock_getuser, mock_popen, mock_log_info): """ diff --git a/virtualbox/pybox/vbox-controlgrp.sh b/virtualbox/pybox/vbox-controlgrp.sh index c57bc06..8422241 100755 --- a/virtualbox/pybox/vbox-controlgrp.sh +++ b/virtualbox/pybox/vbox-controlgrp.sh @@ -41,7 +41,7 @@ SNAP_ACTIONS="take delete restore" get_vms_by_group () { local group=$1 - vms=$(VBoxManage list -l vms | + vms=$(vboxmanage list -l vms | awk -v group="/$group" \ '/^Name:/ { name = $2; } '` '/^Groups:/ { groups = $2; } '` @@ -62,7 +62,7 @@ if [[ "$SNAP_ACTIONS" = *"$ACTION"* ]]; then while read -r vm; do vm=(${vm}) echo "Executing '$ACTION' on ${vm[0]}..." - VBoxManage snapshot ${vm[1]} "${ACTION}" "${SNAP_NAME}" + vboxmanage snapshot ${vm[1]} "${ACTION}" "${SNAP_NAME}" done <<< "$vms" elif [[ "$BASIC_INST_ACTIONS" = *"$ACTION"* ]]; then vms=$(get_vms_by_group "$GROUP") @@ -70,7 +70,7 @@ elif [[ "$BASIC_INST_ACTIONS" = *"$ACTION"* ]]; then while read -r vm; do vm=(${vm}) echo "Executing '$ACTION' on '${vm[0]}'..." - VBoxManage controlvm ${vm[1]} "${ACTION}" + vboxmanage controlvm ${vm[1]} "${ACTION}" done <<< "$vms" wait elif [[ "$ACTION" = "resume" ]]; then