Merge "Implement workers configuration"

This commit is contained in:
Zuul 2024-01-22 13:24:07 +00:00 committed by Gerrit Code Review
commit 51645b5822
3 changed files with 39 additions and 9 deletions

View File

@ -19,5 +19,5 @@ class HostTimeout:
HOST_INSTALL = 3600
LAB_CONFIG = 5400
INSTALL_PATCHES = 900
NORMAL_OP = 90
NORMAL_OP = 120
REATTEMPT_DELAY = [0, 2, 5, 10, 30, 60, 2*60, 3*60, 5*60, 10*60]

View File

@ -605,7 +605,7 @@ def get_hostnames(ignore=None, personalities=['controller', 'storage', 'worker']
node_name = V_BOX_OPTIONS.labname + f"-worker-{node_id}"
if ignore and node_name in ignore:
continue
hostnames[node_name] = f"worker-{id}"
hostnames[node_name] = f"worker-{node_id}"
if V_BOX_OPTIONS.storages and 'storage' in personalities:
for node_id in range(0, V_BOX_OPTIONS.storages):
node_name = V_BOX_OPTIONS.labname + f"-storage-{node_id}"
@ -1658,11 +1658,38 @@ def stage_setup_workers(ssh_client):
try:
workers = list(get_hostnames(personalities=['worker']).values())
# Add workers setup here
LOG.info("#### Provision worker nodes")
LOG.info("Adding third Ceph monitor to worker-0 node")
command = ["system ceph-mon-add worker-0"]
run_ssh_cmd_list(ssh_client, command, timeout=HostTimeout.NORMAL_OP)
node = "worker-0"
status = "configured"
start_time = time.time()
while node:
if (time.time() - start_time) > HostTimeout.NORMAL_OP:
LOG.error("Ceph monitor not configured in %s, aborting!", HostTimeout.NORMAL_OP)
raise TimeoutError("Error while trying to add third Ceph monitor")
nodes_statuses, _, _ = run_ssh_cmd(
ssh_client, 'source /etc/platform/openrc; system ceph-mon-list',
timeout=HostTimeout.NORMAL_OP
)
nodes_statuses = nodes_statuses[1:-1]
for nodes_status in nodes_statuses:
if node in nodes_status and status in nodes_status:
node = ""
if node:
LOG.warning("Worker-0 Ceph monitor not yet configured.")
LOG.info("Waiting 20 sec before re-checking node status.")
time.sleep(20)
for worker in workers:
commands = [
f'echo "TODO {worker}";',
f'system interface-network-assign {worker} mgmt0 cluster-host'
]
run_ssh_cmd_list(
ssh_client,
@ -1689,6 +1716,9 @@ def stage_unlock_workers(ssh_client):
workers = list(get_hostnames(personalities=['worker']).values())
LOG.info("Waiting 120s for services to stabilize.")
time.sleep(120)
for worker in workers:
run_ssh_cmd(
ssh_client,

View File

@ -465,8 +465,8 @@ class TestGetHostnames(unittest.TestCase):
expected = {
'test-controller-0': 'controller-0',
'test-controller-1': 'controller-1',
'test-worker-0': f'worker-{id}',
'test-worker-1': f'worker-{id}',
'test-worker-0': f'worker-0',
'test-worker-1': f'worker-1',
'test-storage-0': 'storage-0',
'test-storage-1': 'storage-1',
}
@ -492,7 +492,7 @@ class TestGetHostnames(unittest.TestCase):
ignore = ['test-controller-0', 'test-worker-1']
expected = {
'test-controller-1': 'controller-1',
'test-worker-0': f'worker-{id}',
'test-worker-0': f'worker-0',
'test-storage-0': 'storage-0',
'test-storage-1': 'storage-1',
}
@ -519,8 +519,8 @@ class TestGetHostnames(unittest.TestCase):
expected = {
'test-controller-0': 'controller-0',
'test-controller-1': 'controller-1',
'test-worker-0': f'worker-{id}',
'test-worker-1': f'worker-{id}',
'test-worker-0': f'worker-0',
'test-worker-1': f'worker-1',
}
# Run