Update mgmt network during enrollment

This commit adds support to update the primary stack management
network during subcloud enrollment in the case:
1. the system is simplex
2. no admin network configured

As the reconfiguration of the sencondary management network was not
properly tested, it is unknown if potential issues if update two
stacks together. Further, the communication between the system
controllers and the subcloud is based on the primary stack. This
commit doesn't include the task to update the secondary stack
management network.

Test plan:
1. Passed - update management network during enrollment on SX subcloud
w/o admin network configured, verify the management network updated
and the subcloud can be online and in-sync post unlock and managed.
2. Passed - update management floating address only during enrollment
on SX subcloud, verify the floating address is added into the address
pool, verify the subcloud can be online and in-sync post unlock and
managed.
3. Passed - subcloud enrollment against a DX system with management
network update(in bootstrap values), verified the update of the
management network is skipped.
4. Passed - subcloud enrollment against a SX subcloud with both admin
network and management network update, verifed the update of the
management network is skipped, the admin network updated. The subcloud
can be online and in-sync post unlock and managed.
5. Passed - rehomed a subcloud to the DC system with & without admin
network configured.

Depends-on: https://review.opendev.org/c/starlingx/config/+/936201
Closes-bug: 2089596

Signed-off-by: Yuxing Jiang <Yuxing.Jiang@windriver.com>
Change-Id: If9977c0c8e69c7de940a436c31e960c3f93d7206
This commit is contained in:
Yuxing Jiang 2024-11-21 19:43:28 -05:00
parent 664abccf71
commit 0ce82bc973
8 changed files with 206 additions and 46 deletions

View File

@ -37,5 +37,6 @@
- role: rehome-enroll-common/update-keystone-data
become: yes
- rehome-enroll-common/persist-configuration
- common/update-sc-admin-endpoints
- role: common/update-sc-admin-endpoints
when: admin_floating_address is defined
- enroll-subcloud/complete-enrollment

View File

@ -16,19 +16,28 @@
command: /usr/local/bin/enroll-init-cleanup
become: yes
- name: Lock controller-0
include_role:
name: common/host-lock
vars:
target_host: 'controller-0'
# Ensure controller-0 is schedulable in non-simplex systems
- name: Ensure controller-0 is schedulable and taints are removed
block:
- name: Uncordon controller-0 node
command: kubectl uncordon controller-0
environment:
KUBECONFIG: "/etc/kubernetes/admin.conf"
failed_when: false
register: uncordon_result
# In non-SX system, the controller-0 node will be tainted disabled during lock,
# remove the taint after locking the only controller.
- name: Remove disabled taint on controller-0 if not simplex
command: kubectl taint nodes controller-0 services=disabled:NoExecute-
environment:
KUBECONFIG: "/etc/kubernetes/admin.conf"
failed_when: false
- name: Remove services=disabled taint from controller-0
command: kubectl taint nodes controller-0 services=disabled:NoExecute-
environment:
KUBECONFIG: "/etc/kubernetes/admin.conf"
failed_when: false
register: taint_removal_result
- name: Debug uncordon and taint removal results
debug:
msg:
- "Uncordon result: {{ uncordon_result.stdout }}"
- "Taint removal result: {{ taint_removal_result.stdout }}"
when: system_mode != 'simplex'
- name: Mark enrollment is completed

View File

@ -62,15 +62,6 @@
import_role:
name: common/validate-registries
# TODO(yuxing) uncomment the following block when the admin network can be configured
# during factory installation. Task: 50089
# - name: Validate admin network configuration
# block:
# - name: Fail if admin network configuration not specified
# fail:
# msg: "Admin_subnet or admin_gateway_address is not defined in bootstrap values."
# when: admin_network is not defined
#
- name: Initialize address pairs secondary
set_fact:
address_pairs_secondary: { }
@ -79,7 +70,7 @@
block:
- name: Build admin address primary pairs for validation
set_fact:
address_pairs:
admin_pairs:
admin:
start:
"{{ network_params.admin_start_address_primary if admin_start_address != 'derived'
@ -90,7 +81,7 @@
subnet: "{{ network_params.admin_subnet_primary }}"
- include_tasks: roles/common/validate-addresses/tasks/validate_address_range.yml
with_dict: "{{ address_pairs }}"
with_dict: "{{ admin_pairs }}"
# admin secondary pairs validation if defined
- block:
@ -116,6 +107,21 @@
when: network_params.admin_subnet_secondary
when: admin_network is defined
- name: Build management address primary pairs for validation
set_fact:
management_pairs:
management:
start:
"{{ network_params.management_start_address_primary if management_start_address != 'derived'
else default_management_start_address_primary }}"
end:
"{{ network_params.management_end_address_primary if management_end_address != 'derived'
else default_management_end_address_primary }}"
subnet: "{{ network_params.management_subnet_primary }}"
- include_tasks: roles/common/validate-addresses/tasks/validate_address_range.yml
with_dict: "{{ management_pairs }}"
- name: Validate oam secondary network configuration
block:
- name: Build oam address secondary pairs for validation

View File

@ -11,6 +11,7 @@ import configparser
import os
import subprocess
import sys
import time
from barbicanclient import client as barbican_client
from cgtsclient import client as cgts_client
@ -19,6 +20,7 @@ from keystoneclient.auth.identity import v3
from keystoneclient import session
from netaddr import IPNetwork
from sysinv.common import constants as sysinv_constants
from tsconfig.tsconfig import MGMT_NETWORK_RECONFIGURATION_ONGOING
# Configuration parser setup
@ -39,6 +41,17 @@ def print_with_timestamp(*args, **kwargs):
print(f"[{current_time}]", *args, **kwargs)
def wait_for_file(file_path, timeout=300, interval=5):
start_time = time.time()
while not os.path.exists(file_path):
elapsed_time = time.time() - start_time
if elapsed_time > timeout:
raise ValueError(f"Timeout reached: {file_path} does not exist.")
print_with_timestamp(f"Waiting for {file_path}...")
time.sleep(interval)
print_with_timestamp(f"File found: {file_path}")
# CgtsClient class to handle API interactions
class CgtsClient(object):
SYSINV_API_VERSION = 1
@ -549,6 +562,8 @@ def update_admin_network(client, section_name):
if not has_admin_network(section_name):
return
delete_network_and_addrpool(client, 'admin', section_name)
admin_subnet = IPNetwork(CONF.get(section_name, "ADMIN_SUBNET"))
admin_start_address = CONF.get(section_name, "ADMIN_START_ADDRESS")
admin_end_address = CONF.get(section_name, "ADMIN_END_ADDRESS")
@ -620,6 +635,95 @@ def update_admin_network_secondary(client, section_name):
client.sysinv.network_addrpool.assign(**network_addrpool_data)
def precheck_update_management_network(client, section_name):
# skip update management network if not simplex
system_mode = CONF.get(section_name, 'SYSTEM_MODE')
if system_mode != sysinv_constants.SYSTEM_MODE_SIMPLEX:
print_with_timestamp(
f"Ignore management network update in {system_mode}",
)
return False
# skip update management network if admin network configured
try:
admin_network = get_network(client, sysinv_constants.NETWORK_TYPE_ADMIN)
if admin_network:
print_with_timestamp(
f"Admin network: {admin_network.uuid} discovered, ignore management "
"network update.",
)
return False
except ValueError:
# admin network is expected to be not configured if need to update
# management network
pass
return True
# TODO(yuxing): improve the following method if dual stack reconfiguration on the
# management network is verified
def update_management_network(client, section_name):
if not precheck_update_management_network(client, section_name):
return
management_subnet = IPNetwork(CONF.get(section_name, "MANAGEMENT_SUBNET"))
ip_family = get_version_text(management_subnet)
existing_network = get_network(client, sysinv_constants.NETWORK_TYPE_MGMT)
primary_ip_family = existing_network.primary_pool_family
if primary_ip_family.lower() != ip_family:
print_with_timestamp(
f"Primary IP family of management network: {primary_ip_family}, "
f"can not be updated to {ip_family}."
)
sys.exit(1)
subcloud_gateway = CONF.get(section_name, "MANAGEMENT_GATEWAY_ADDRESS")
if subcloud_gateway == 'undef':
print_with_timestamp(
"Management gateway address required to update management network, "
"please add it to the bootstrap values and try again."
)
sys.exit(1)
pool_id = existing_network.pool_uuid
values = {
'network': str(management_subnet.network),
'prefix': str(management_subnet.prefixlen),
'ranges': [(
CONF.get(section_name, "MANAGEMENT_START_ADDRESS"),
CONF.get(section_name, "MANAGEMENT_END_ADDRESS"),
)],
'gateway_address': subcloud_gateway,
'floating_address': CONF.get(section_name, "MANAGEMENT_FLOATING_ADDRESS"),
'controller0_address': CONF.get(section_name, "MANAGEMENT_CONTROLLER0_ADDRESS"),
'controller1_address': CONF.get(section_name, "MANAGEMENT_CONTROLLER1_ADDRESS"),
}
if is_equal_with_existing_pool(client, values, pool_id):
print_with_timestamp(
f"Management network addrpool {pool_id} is up-to-date.")
return
patch = []
for (k, v) in values.items():
patch.append({'op': 'replace', 'path': '/' + k, 'value': v})
try:
client.sysinv.address_pool.update(pool_id, patch)
# Wait for flag to block the dnsmasq runtime manifest triggered by
# system controller network update
wait_for_file(MGMT_NETWORK_RECONFIGURATION_ONGOING)
print_with_timestamp(
f"Management network addrpool {pool_id} is updated.")
except Exception as e:
print_with_timestamp(f"Failed to update management network: {e}")
sys.exit(1)
return
def is_equal_with_existing_pool(client, pool_values, pool_uuid):
address_pool = client.sysinv.address_pool.get(pool_uuid)
return (
@ -752,9 +856,9 @@ def main():
# Primary OAM has been updated by cloud-init, secondary oam has been
# procastinated until now.
update_oam_network_secondary(client, section_name)
update_management_network(client, section_name)
populate_service_parameter_config(client, section_name)
update_system_controller_subnets(client, section_name)
delete_network_and_addrpool(client, 'admin', section_name)
update_admin_network(client, section_name)
edit_dc_role_to_subcloud(client)

View File

@ -22,26 +22,42 @@
when: user_dns_host_records
- name: Generate config file for python sysinv db population script
template:
src: system_config.j2
dest: "/tmp/{{ system_config_file }}"
- block:
- name: Lock controller-0 for network update
include_role:
name: common/host-lock
vars:
target_host: 'controller-0'
- name: Set input parameters to populate config script
set_fact:
script_input: "/tmp/{{ system_config_file }}"
- name: Generate config file for python sysinv db population script
template:
src: system_config.j2
dest: "/tmp/{{ system_config_file }}"
- name: Update system configurations
script: update_system_config.py {{ script_input }}
register: update_result
failed_when: false
- name: Set input parameters to populate config script
set_fact:
script_input: "/tmp/{{ system_config_file }}"
- debug: var=update_result
- name: Update system configurations
script: update_system_config.py {{ script_input }}
register: update_result
failed_when: false
- name: Fail if update config script throws an exception
fail:
msg: "Failed to update system configuration."
when: update_result.rc != 0
- debug: var=update_result
- name: Fail if update config script throws an exception
fail:
msg: |
Failed to update system configuration.
Waiting for the system to unlock before retry.
when: update_result.rc != 0
rescue:
- name: Unlock controller-0
include_role:
name: common/host-unlock
vars:
target_host: 'controller-0'
- name: Copy central registy cert
import_tasks: copy_central_registry_cert.yml

View File

@ -57,9 +57,20 @@ GHCR_REGISTRY_PASSWORD={{ ghcr_registry.password | default('none') }}
REGISTRYK8S_REGISTRY_PASSWORD={{ registryk8s_registry.password | default('none') }}
ICR_REGISTRY_PASSWORD={{ icr_registry.password | default('none') }}
ADMIN_SUBNET={{ address_pairs['admin']['subnet'] | default('undef') }}
ADMIN_START_ADDRESS={{ address_pairs['admin']['start'] | default('undef') }}
ADMIN_END_ADDRESS={{ address_pairs['admin']['end'] | default('undef') }}
MANAGEMENT_SUBNET={{ management_pairs['management']['subnet'] | default('undef') }}
MANAGEMENT_START_ADDRESS={{ management_pairs['management']['start'] | default('undef') }}
MANAGEMENT_END_ADDRESS={{ management_pairs['management']['end'] | default('undef') }}
MANAGEMENT_FLOATING_ADDRESS={{ controller_floating_address| default('undef') }}
MANAGEMENT_CONTROLLER0_ADDRESS={{ derived_network_params.controller_0_address | default('undef')}}
MANAGEMENT_CONTROLLER1_ADDRESS={{ derived_network_params.controller_1_address | default('undef')}}
MANAGEMENT_GATEWAY_ADDRESS={{ network_params.management_gateway_address_primary
if (network_params.management_gateway_address_primary is defined
and network_params.management_gateway_address_primary)
else 'undef' }}
ADMIN_SUBNET={{ admin_pairs['admin']['subnet'] | default('undef') }}
ADMIN_START_ADDRESS={{ admin_pairs['admin']['start'] | default('undef') }}
ADMIN_END_ADDRESS={{ admin_pairs['admin']['end'] | default('undef') }}
ADMIN_GATEWAY_ADDRESS={{ network_params.admin_gateway_address_primary
if (network_params.admin_gateway_address_primary is defined
and network_params.admin_gateway_address_primary)

View File

@ -68,6 +68,7 @@
external_oam_node_1_address: "{{ external_oam_node_1_address }}"
external_oam_floating_address: "{{ external_oam_floating_address }}"
management_start_address: "{{ management_start_address }}"
management_end_address: "{{ management_end_address }}"
admin_start_address: "{{ admin_start_address }}"
admin_end_address: "{{ admin_end_address }}"
cluster_service_start_address: "{{ cluster_service_start_address }}"
@ -117,6 +118,11 @@
subnet:
name: management_subnet
value: "{{ management_subnet }}"
management_end_address:
address: "{{ management_end_address }}"
subnet:
name: management_subnet
value: "{{ management_subnet }}"
cluster_service_start_address:
address: "{{ cluster_service_start_address }}"
subnet:
@ -166,13 +172,18 @@
# The provided subnets have passed validation, set the primary default addresses
# based on the primary subnet values
- name: Set default start primary addresses based on provided primary subnets
- name: Set default start and end primary addresses based on provided primary subnets
set_fact:
default_external_oam_start_address_primary: "{{ (network_params.external_oam_subnet_primary | ipaddr(1))
.split('/')[0] }}"
default_external_oam_end_address_primary: "{{ (network_params.external_oam_subnet_primary | ipaddr(-2))
.split('/')[0] }}"
default_management_start_address_primary: "{{ (network_params.management_subnet_primary | ipaddr(1)).split('/')[0] }}"
default_management_end_address_primary: "{{ (network_params.management_subnet_primary | ipaddr(-2)).split('/')[0] }}"
default_cluster_service_start_address_primary: "{{ (network_params.cluster_service_subnet_primary |
ipaddr(1)).split('/')[0] }}"
default_cluster_service_end_address_primary: "{{ (network_params.cluster_service_subnet_primary | ipaddr(-2))
.split('/')[0] }}"
# The provided subnets have passed validation, set the secondary default addresses
# based on the secondary subnet values, if present.

View File

@ -48,7 +48,9 @@
no_log: true
- name: Flush memcached
shell: echo flush_all > /dev/tcp/{{ derived_network_params.controller_0_address }}/11211
shell: |
controller_0_address=$(awk '/controller-0$/ {print $1}' /etc/hosts)
echo flush_all > /dev/tcp/$controller_0_address/11211
- name: Restart keystone service
import_role: