diff --git a/playbookconfig/src/playbooks/enroll_subcloud.yml b/playbookconfig/src/playbooks/enroll_subcloud.yml index 3f8c4e311..c73aa7e18 100644 --- a/playbookconfig/src/playbooks/enroll_subcloud.yml +++ b/playbookconfig/src/playbooks/enroll_subcloud.yml @@ -37,5 +37,6 @@ - role: rehome-enroll-common/update-keystone-data become: yes - rehome-enroll-common/persist-configuration - - common/update-sc-admin-endpoints + - role: common/update-sc-admin-endpoints + when: admin_floating_address is defined - enroll-subcloud/complete-enrollment diff --git a/playbookconfig/src/playbooks/roles/enroll-subcloud/complete-enrollment/tasks/main.yml b/playbookconfig/src/playbooks/roles/enroll-subcloud/complete-enrollment/tasks/main.yml index 18ef6c5b4..fd5c2654e 100644 --- a/playbookconfig/src/playbooks/roles/enroll-subcloud/complete-enrollment/tasks/main.yml +++ b/playbookconfig/src/playbooks/roles/enroll-subcloud/complete-enrollment/tasks/main.yml @@ -16,19 +16,28 @@ command: /usr/local/bin/enroll-init-cleanup become: yes -- name: Lock controller-0 - include_role: - name: common/host-lock - vars: - target_host: 'controller-0' +# Ensure controller-0 is schedulable in non-simplex systems +- name: Ensure controller-0 is schedulable and taints are removed + block: + - name: Uncordon controller-0 node + command: kubectl uncordon controller-0 + environment: + KUBECONFIG: "/etc/kubernetes/admin.conf" + failed_when: false + register: uncordon_result -# In non-SX system, the controller-0 node will be tainted disabled during lock, -# remove the taint after locking the only controller. -- name: Remove disabled taint on controller-0 if not simplex - command: kubectl taint nodes controller-0 services=disabled:NoExecute- - environment: - KUBECONFIG: "/etc/kubernetes/admin.conf" - failed_when: false + - name: Remove services=disabled taint from controller-0 + command: kubectl taint nodes controller-0 services=disabled:NoExecute- + environment: + KUBECONFIG: "/etc/kubernetes/admin.conf" + failed_when: false + register: taint_removal_result + + - name: Debug uncordon and taint removal results + debug: + msg: + - "Uncordon result: {{ uncordon_result.stdout }}" + - "Taint removal result: {{ taint_removal_result.stdout }}" when: system_mode != 'simplex' - name: Mark enrollment is completed diff --git a/playbookconfig/src/playbooks/roles/enroll-subcloud/validate-before-enroll/tasks/main.yml b/playbookconfig/src/playbooks/roles/enroll-subcloud/validate-before-enroll/tasks/main.yml index d1f82487a..841dabe8b 100644 --- a/playbookconfig/src/playbooks/roles/enroll-subcloud/validate-before-enroll/tasks/main.yml +++ b/playbookconfig/src/playbooks/roles/enroll-subcloud/validate-before-enroll/tasks/main.yml @@ -62,15 +62,6 @@ import_role: name: common/validate-registries -# TODO(yuxing) uncomment the following block when the admin network can be configured -# during factory installation. Task: 50089 -# - name: Validate admin network configuration -# block: -# - name: Fail if admin network configuration not specified -# fail: -# msg: "Admin_subnet or admin_gateway_address is not defined in bootstrap values." -# when: admin_network is not defined -# - name: Initialize address pairs secondary set_fact: address_pairs_secondary: { } @@ -79,7 +70,7 @@ block: - name: Build admin address primary pairs for validation set_fact: - address_pairs: + admin_pairs: admin: start: "{{ network_params.admin_start_address_primary if admin_start_address != 'derived' @@ -90,7 +81,7 @@ subnet: "{{ network_params.admin_subnet_primary }}" - include_tasks: roles/common/validate-addresses/tasks/validate_address_range.yml - with_dict: "{{ address_pairs }}" + with_dict: "{{ admin_pairs }}" # admin secondary pairs validation if defined - block: @@ -116,6 +107,21 @@ when: network_params.admin_subnet_secondary when: admin_network is defined +- name: Build management address primary pairs for validation + set_fact: + management_pairs: + management: + start: + "{{ network_params.management_start_address_primary if management_start_address != 'derived' + else default_management_start_address_primary }}" + end: + "{{ network_params.management_end_address_primary if management_end_address != 'derived' + else default_management_end_address_primary }}" + subnet: "{{ network_params.management_subnet_primary }}" + +- include_tasks: roles/common/validate-addresses/tasks/validate_address_range.yml + with_dict: "{{ management_pairs }}" + - name: Validate oam secondary network configuration block: - name: Build oam address secondary pairs for validation diff --git a/playbookconfig/src/playbooks/roles/rehome-enroll-common/persist-configuration/files/update_system_config.py b/playbookconfig/src/playbooks/roles/rehome-enroll-common/persist-configuration/files/update_system_config.py index 5ce807f66..28f31dc02 100644 --- a/playbookconfig/src/playbooks/roles/rehome-enroll-common/persist-configuration/files/update_system_config.py +++ b/playbookconfig/src/playbooks/roles/rehome-enroll-common/persist-configuration/files/update_system_config.py @@ -11,6 +11,7 @@ import configparser import os import subprocess import sys +import time from barbicanclient import client as barbican_client from cgtsclient import client as cgts_client @@ -19,6 +20,7 @@ from keystoneclient.auth.identity import v3 from keystoneclient import session from netaddr import IPNetwork from sysinv.common import constants as sysinv_constants +from tsconfig.tsconfig import MGMT_NETWORK_RECONFIGURATION_ONGOING # Configuration parser setup @@ -39,6 +41,17 @@ def print_with_timestamp(*args, **kwargs): print(f"[{current_time}]", *args, **kwargs) +def wait_for_file(file_path, timeout=300, interval=5): + start_time = time.time() + while not os.path.exists(file_path): + elapsed_time = time.time() - start_time + if elapsed_time > timeout: + raise ValueError(f"Timeout reached: {file_path} does not exist.") + print_with_timestamp(f"Waiting for {file_path}...") + time.sleep(interval) + print_with_timestamp(f"File found: {file_path}") + + # CgtsClient class to handle API interactions class CgtsClient(object): SYSINV_API_VERSION = 1 @@ -549,6 +562,8 @@ def update_admin_network(client, section_name): if not has_admin_network(section_name): return + delete_network_and_addrpool(client, 'admin', section_name) + admin_subnet = IPNetwork(CONF.get(section_name, "ADMIN_SUBNET")) admin_start_address = CONF.get(section_name, "ADMIN_START_ADDRESS") admin_end_address = CONF.get(section_name, "ADMIN_END_ADDRESS") @@ -620,6 +635,95 @@ def update_admin_network_secondary(client, section_name): client.sysinv.network_addrpool.assign(**network_addrpool_data) +def precheck_update_management_network(client, section_name): + # skip update management network if not simplex + system_mode = CONF.get(section_name, 'SYSTEM_MODE') + if system_mode != sysinv_constants.SYSTEM_MODE_SIMPLEX: + print_with_timestamp( + f"Ignore management network update in {system_mode}", + ) + return False + + # skip update management network if admin network configured + try: + admin_network = get_network(client, sysinv_constants.NETWORK_TYPE_ADMIN) + if admin_network: + print_with_timestamp( + f"Admin network: {admin_network.uuid} discovered, ignore management " + "network update.", + ) + return False + except ValueError: + # admin network is expected to be not configured if need to update + # management network + pass + + return True + + +# TODO(yuxing): improve the following method if dual stack reconfiguration on the +# management network is verified +def update_management_network(client, section_name): + + if not precheck_update_management_network(client, section_name): + return + + management_subnet = IPNetwork(CONF.get(section_name, "MANAGEMENT_SUBNET")) + ip_family = get_version_text(management_subnet) + + existing_network = get_network(client, sysinv_constants.NETWORK_TYPE_MGMT) + primary_ip_family = existing_network.primary_pool_family + if primary_ip_family.lower() != ip_family: + print_with_timestamp( + f"Primary IP family of management network: {primary_ip_family}, " + f"can not be updated to {ip_family}." + ) + sys.exit(1) + + subcloud_gateway = CONF.get(section_name, "MANAGEMENT_GATEWAY_ADDRESS") + if subcloud_gateway == 'undef': + print_with_timestamp( + "Management gateway address required to update management network, " + "please add it to the bootstrap values and try again." + ) + sys.exit(1) + + pool_id = existing_network.pool_uuid + + values = { + 'network': str(management_subnet.network), + 'prefix': str(management_subnet.prefixlen), + 'ranges': [( + CONF.get(section_name, "MANAGEMENT_START_ADDRESS"), + CONF.get(section_name, "MANAGEMENT_END_ADDRESS"), + )], + 'gateway_address': subcloud_gateway, + 'floating_address': CONF.get(section_name, "MANAGEMENT_FLOATING_ADDRESS"), + 'controller0_address': CONF.get(section_name, "MANAGEMENT_CONTROLLER0_ADDRESS"), + 'controller1_address': CONF.get(section_name, "MANAGEMENT_CONTROLLER1_ADDRESS"), + } + if is_equal_with_existing_pool(client, values, pool_id): + print_with_timestamp( + f"Management network addrpool {pool_id} is up-to-date.") + return + + patch = [] + for (k, v) in values.items(): + patch.append({'op': 'replace', 'path': '/' + k, 'value': v}) + try: + client.sysinv.address_pool.update(pool_id, patch) + # Wait for flag to block the dnsmasq runtime manifest triggered by + # system controller network update + wait_for_file(MGMT_NETWORK_RECONFIGURATION_ONGOING) + print_with_timestamp( + f"Management network addrpool {pool_id} is updated.") + except Exception as e: + print_with_timestamp(f"Failed to update management network: {e}") + sys.exit(1) + + return + + def is_equal_with_existing_pool(client, pool_values, pool_uuid): address_pool = client.sysinv.address_pool.get(pool_uuid) return ( @@ -752,9 +856,9 @@ def main(): # Primary OAM has been updated by cloud-init, secondary oam has been # procastinated until now. update_oam_network_secondary(client, section_name) + update_management_network(client, section_name) populate_service_parameter_config(client, section_name) update_system_controller_subnets(client, section_name) - delete_network_and_addrpool(client, 'admin', section_name) update_admin_network(client, section_name) edit_dc_role_to_subcloud(client) diff --git a/playbookconfig/src/playbooks/roles/rehome-enroll-common/persist-configuration/tasks/main.yml b/playbookconfig/src/playbooks/roles/rehome-enroll-common/persist-configuration/tasks/main.yml index 6d2db93a4..e1d6adc0e 100644 --- a/playbookconfig/src/playbooks/roles/rehome-enroll-common/persist-configuration/tasks/main.yml +++ b/playbookconfig/src/playbooks/roles/rehome-enroll-common/persist-configuration/tasks/main.yml @@ -22,26 +22,42 @@ when: user_dns_host_records -- name: Generate config file for python sysinv db population script - template: - src: system_config.j2 - dest: "/tmp/{{ system_config_file }}" +- block: + - name: Lock controller-0 for network update + include_role: + name: common/host-lock + vars: + target_host: 'controller-0' -- name: Set input parameters to populate config script - set_fact: - script_input: "/tmp/{{ system_config_file }}" + - name: Generate config file for python sysinv db population script + template: + src: system_config.j2 + dest: "/tmp/{{ system_config_file }}" -- name: Update system configurations - script: update_system_config.py {{ script_input }} - register: update_result - failed_when: false + - name: Set input parameters to populate config script + set_fact: + script_input: "/tmp/{{ system_config_file }}" -- debug: var=update_result + - name: Update system configurations + script: update_system_config.py {{ script_input }} + register: update_result + failed_when: false -- name: Fail if update config script throws an exception - fail: - msg: "Failed to update system configuration." - when: update_result.rc != 0 + - debug: var=update_result + + - name: Fail if update config script throws an exception + fail: + msg: | + Failed to update system configuration. + Waiting for the system to unlock before retry. + when: update_result.rc != 0 + + rescue: + - name: Unlock controller-0 + include_role: + name: common/host-unlock + vars: + target_host: 'controller-0' - name: Copy central registy cert import_tasks: copy_central_registry_cert.yml diff --git a/playbookconfig/src/playbooks/roles/rehome-enroll-common/persist-configuration/templates/system_config.j2 b/playbookconfig/src/playbooks/roles/rehome-enroll-common/persist-configuration/templates/system_config.j2 index c65d93e06..09705b465 100644 --- a/playbookconfig/src/playbooks/roles/rehome-enroll-common/persist-configuration/templates/system_config.j2 +++ b/playbookconfig/src/playbooks/roles/rehome-enroll-common/persist-configuration/templates/system_config.j2 @@ -57,9 +57,20 @@ GHCR_REGISTRY_PASSWORD={{ ghcr_registry.password | default('none') }} REGISTRYK8S_REGISTRY_PASSWORD={{ registryk8s_registry.password | default('none') }} ICR_REGISTRY_PASSWORD={{ icr_registry.password | default('none') }} -ADMIN_SUBNET={{ address_pairs['admin']['subnet'] | default('undef') }} -ADMIN_START_ADDRESS={{ address_pairs['admin']['start'] | default('undef') }} -ADMIN_END_ADDRESS={{ address_pairs['admin']['end'] | default('undef') }} +MANAGEMENT_SUBNET={{ management_pairs['management']['subnet'] | default('undef') }} +MANAGEMENT_START_ADDRESS={{ management_pairs['management']['start'] | default('undef') }} +MANAGEMENT_END_ADDRESS={{ management_pairs['management']['end'] | default('undef') }} +MANAGEMENT_FLOATING_ADDRESS={{ controller_floating_address| default('undef') }} +MANAGEMENT_CONTROLLER0_ADDRESS={{ derived_network_params.controller_0_address | default('undef')}} +MANAGEMENT_CONTROLLER1_ADDRESS={{ derived_network_params.controller_1_address | default('undef')}} +MANAGEMENT_GATEWAY_ADDRESS={{ network_params.management_gateway_address_primary + if (network_params.management_gateway_address_primary is defined + and network_params.management_gateway_address_primary) + else 'undef' }} + +ADMIN_SUBNET={{ admin_pairs['admin']['subnet'] | default('undef') }} +ADMIN_START_ADDRESS={{ admin_pairs['admin']['start'] | default('undef') }} +ADMIN_END_ADDRESS={{ admin_pairs['admin']['end'] | default('undef') }} ADMIN_GATEWAY_ADDRESS={{ network_params.admin_gateway_address_primary if (network_params.admin_gateway_address_primary is defined and network_params.admin_gateway_address_primary) diff --git a/playbookconfig/src/playbooks/roles/rehome-enroll-common/prepare-env/tasks/main.yml b/playbookconfig/src/playbooks/roles/rehome-enroll-common/prepare-env/tasks/main.yml index 02b0864c0..33087e989 100644 --- a/playbookconfig/src/playbooks/roles/rehome-enroll-common/prepare-env/tasks/main.yml +++ b/playbookconfig/src/playbooks/roles/rehome-enroll-common/prepare-env/tasks/main.yml @@ -68,6 +68,7 @@ external_oam_node_1_address: "{{ external_oam_node_1_address }}" external_oam_floating_address: "{{ external_oam_floating_address }}" management_start_address: "{{ management_start_address }}" + management_end_address: "{{ management_end_address }}" admin_start_address: "{{ admin_start_address }}" admin_end_address: "{{ admin_end_address }}" cluster_service_start_address: "{{ cluster_service_start_address }}" @@ -117,6 +118,11 @@ subnet: name: management_subnet value: "{{ management_subnet }}" + management_end_address: + address: "{{ management_end_address }}" + subnet: + name: management_subnet + value: "{{ management_subnet }}" cluster_service_start_address: address: "{{ cluster_service_start_address }}" subnet: @@ -166,13 +172,18 @@ # The provided subnets have passed validation, set the primary default addresses # based on the primary subnet values - - name: Set default start primary addresses based on provided primary subnets + - name: Set default start and end primary addresses based on provided primary subnets set_fact: default_external_oam_start_address_primary: "{{ (network_params.external_oam_subnet_primary | ipaddr(1)) .split('/')[0] }}" + default_external_oam_end_address_primary: "{{ (network_params.external_oam_subnet_primary | ipaddr(-2)) + .split('/')[0] }}" default_management_start_address_primary: "{{ (network_params.management_subnet_primary | ipaddr(1)).split('/')[0] }}" + default_management_end_address_primary: "{{ (network_params.management_subnet_primary | ipaddr(-2)).split('/')[0] }}" default_cluster_service_start_address_primary: "{{ (network_params.cluster_service_subnet_primary | ipaddr(1)).split('/')[0] }}" + default_cluster_service_end_address_primary: "{{ (network_params.cluster_service_subnet_primary | ipaddr(-2)) + .split('/')[0] }}" # The provided subnets have passed validation, set the secondary default addresses # based on the secondary subnet values, if present. diff --git a/playbookconfig/src/playbooks/roles/rehome-enroll-common/update-keystone-data/tasks/migrate_keystone_ids.yml b/playbookconfig/src/playbooks/roles/rehome-enroll-common/update-keystone-data/tasks/migrate_keystone_ids.yml index 2567aa4b5..451760f31 100644 --- a/playbookconfig/src/playbooks/roles/rehome-enroll-common/update-keystone-data/tasks/migrate_keystone_ids.yml +++ b/playbookconfig/src/playbooks/roles/rehome-enroll-common/update-keystone-data/tasks/migrate_keystone_ids.yml @@ -48,7 +48,9 @@ no_log: true - name: Flush memcached - shell: echo flush_all > /dev/tcp/{{ derived_network_params.controller_0_address }}/11211 + shell: | + controller_0_address=$(awk '/controller-0$/ {print $1}' /etc/hosts) + echo flush_all > /dev/tcp/$controller_0_address/11211 - name: Restart keystone service import_role: