diff --git a/examples/migrate/migrate-subcloud1-overrides-EXAMPLE.yml b/examples/migrate/migrate-subcloud1-overrides-EXAMPLE.yml new file mode 100644 index 000000000..fdec192a5 --- /dev/null +++ b/examples/migrate/migrate-subcloud1-overrides-EXAMPLE.yml @@ -0,0 +1,6 @@ +--- +{ + "ansible_ssh_pass": "St8rlingX*", + "external_oam_node_0_address": "10.10.10.13", + "external_oam_node_1_address": "10.10.10.14", +} diff --git a/playbookconfig/centos/playbookconfig.spec b/playbookconfig/centos/playbookconfig.spec index a0b3fbaad..bc754c7e1 100644 --- a/playbookconfig/centos/playbookconfig.spec +++ b/playbookconfig/centos/playbookconfig.spec @@ -33,6 +33,7 @@ This package contains playbooks used for configuring StarlingX. %install make install DESTDIR=%{buildroot}%{local_stx_ansible_dir} +chmod 755 %{buildroot}%{local_stx_ansible_dir}/playbooks/roles/rehome-subcloud/update-keystone-data/files/validate_keystone_passwords.sh %post cp %{local_stx_ansible_dir}/playbooks/ansible.cfg %{local_etc_ansible} diff --git a/playbookconfig/opensuse/playbookconfig.spec b/playbookconfig/opensuse/playbookconfig.spec index 3e9d9b921..d093c671a 100644 --- a/playbookconfig/opensuse/playbookconfig.spec +++ b/playbookconfig/opensuse/playbookconfig.spec @@ -37,6 +37,8 @@ make install DESTDIR=%{buildroot}%{local_stx_ansible_dir} chmod 755 %{buildroot}%{local_stx_ansible_dir}/playbooks/roles/bootstrap/persist-config/files/populate_initial_config.py chmod 755 %{buildroot}%{local_stx_ansible_dir}/playbooks/roles/bootstrap/prepare-env/files/check_root_disk_size.py chmod 755 %{buildroot}%{local_stx_ansible_dir}/playbooks/roles/backup/backup-system/files/fm_alarm.py +chmod 755 %{buildroot}%{local_stx_ansible_dir}/playbooks/roles/rehome-subcloud/update-keystone-data/files/migrate_keystone_ids.py +chmod 755 %{buildroot}%{local_stx_ansible_dir}/playbooks/roles/rehome-subcloud/update-keystone-data/files/validate_keystone_passwords.sh %post diff --git a/playbookconfig/src/playbooks/ansible.cfg b/playbookconfig/src/playbooks/ansible.cfg index 20973c87f..37606a8a3 100644 --- a/playbookconfig/src/playbooks/ansible.cfg +++ b/playbookconfig/src/playbooks/ansible.cfg @@ -100,7 +100,7 @@ stdout_callback = skippy #sudo_flags = -H -S -n # SSH timeout -#timeout = 10 +timeout = 60 # default user to use for playbooks if user is not specified # (/usr/bin/ansible will use current user as default) @@ -426,7 +426,7 @@ pipelining = False # Number of times to retry an SSH connection to a host, in case of UNREACHABLE. # For each retry attempt, there is an exponential backoff, # so after the first attempt there is 1s wait, then 2s, 4s etc. up to 30s (max). -#retries = 3 +retries = 10 [persistent_connection] diff --git a/playbookconfig/src/playbooks/bootstrap.yml b/playbookconfig/src/playbooks/bootstrap.yml index 5f258ed83..dceeb58f9 100644 --- a/playbookconfig/src/playbooks/bootstrap.yml +++ b/playbookconfig/src/playbooks/bootstrap.yml @@ -20,7 +20,7 @@ - bootstrap/prepare-env - { role: bootstrap/validate-config, become: yes } - { role: bootstrap/store-passwd, when: save_password, become: yes } - - { role: bootstrap/apply-bootstrap-manifest, when: not replayed, become: yes } + - { role: bootstrap/apply-manifest, become: yes } - { role: bootstrap/persist-config, become: yes } - { role: bootstrap/bringup-essential-services, become: yes } - { role: bootstrap/bringup-bootstrap-applications, become: yes } diff --git a/playbookconfig/src/playbooks/enable_secured_etcd.yml b/playbookconfig/src/playbooks/enable_secured_etcd.yml new file mode 100644 index 000000000..e0d443fdb --- /dev/null +++ b/playbookconfig/src/playbooks/enable_secured_etcd.yml @@ -0,0 +1,98 @@ +--- +# +# Copyright (c) 2020 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +# ROLE DESCRIPTION: +# Enable secured etcd. +# This file can be removed in the release after STX5.0 + +- hosts: all + become: yes + become_user: root + tasks: + - name: Create cert for etcd server and client + import_role: + name: common/create-etcd-certs + + - name: Create etcd cert permdir + file: + path: "{{ config_permdir + '/etcd' }}" + state: directory + mode: 0700 + + - name: Copy etcd certificates to config_permdir + copy: + src: "/etc/etcd/{{ item }}" + dest: "{{ config_permdir + '/etcd' }}/{{ item }}" + remote_src: yes + force: yes + with_items: + - "etcd-server.crt" + - "etcd-server.key" + - "etcd-client.crt" + - "etcd-client.key" + - "apiserver-etcd-client.crt" + - "apiserver-etcd-client.key" + - "ca.crt" + - "ca.key" + + - name: Copy apiserver-etcd-client cert + copy: + src: "/etc/etcd/{{ item }}" + dest: "/etc/kubernetes/pki/{{ item }}" + remote_src: yes + force: yes + with_items: + - "apiserver-etcd-client.crt" + - "apiserver-etcd-client.key" + + - name: Write security settings to hieradata + lineinfile: + path: "{{ puppet_permdir }}/hieradata/static.yaml" + line: "{{ item }}" + with_items: + - "platform::etcd::params::security_enabled: true" + - "platform::etcd::params::bind_address: {{ cluster_floating_address }}" + - "platform::etcd::params::bind_address_version: {{ etcd_listen_address_version }}" + + - name: Create list of etcd classes to pass to puppet + copy: + dest: "/tmp/etcd.yml" + content: | + classes: + - platform::etcd::upgrade::runtime + + - name: Applying puppet for enabling etcd security + command: > + /usr/local/bin/puppet-manifest-apply.sh + {{ puppet_permdir }}/hieradata/ + {{ ipaddress }} + controller runtime /tmp/etcd.yml + register: etcd_apply_result + failed_when: false + environment: + LC_ALL: "en_US.UTF-8" + + - block: + - name: Remove bind address and address version + lineinfile: + dest: "{{ puppet_permdir }}/hieradata/static.yaml" + regexp: "{{ item }}" + state: absent + with_items: + - "^platform::etcd::params::bind_address" + - "^platform::etcd::params::bind_address_version" + + - name: Revert security_enable flag + lineinfile: + dest: "{{ puppet_permdir }}/hieradata/static.yaml" + regexp: "^platform::etcd::params::security_enabled" + line: "platform::etcd::params::security_enabled: false" + + - name: Fail if puppet manifest apply script returns an error + fail: + msg: >- + Failed to apply etcd manifest! + when: etcd_apply_result.rc != 0 diff --git a/playbookconfig/src/playbooks/host_vars/backup-restore/default.yml b/playbookconfig/src/playbooks/host_vars/backup-restore/default.yml index c79fc0109..109c4e369 100644 --- a/playbookconfig/src/playbooks/host_vars/backup-restore/default.yml +++ b/playbookconfig/src/playbooks/host_vars/backup-restore/default.yml @@ -75,6 +75,12 @@ openstack_backup_filename_prefix: "{{ inventory_hostname }}_openstack_backup" # # This variable is used for StarlingX OpenStack application restore only # + +# The dc_vault backup tarball will be named in this format: +# _.tgz +# +dc_vault_backup_filename_prefix: "{{ inventory_hostname }}_dc_vault_backup" + restore_cinder_glance_data: false # Default directory where the system backup tarballs fetched from the @@ -94,3 +100,7 @@ restore_openstack_continue: false # When set to false, disk partitions that were previously used for Ceph data are # not wiped. Otherwise, all disks are wiped as part of the bootstrap. wipe_ceph_osds: false + +# The following parameter indicates where the backup data file(s) reside, +# on the host itself (true) or off box (false). +on_box_data: true diff --git a/playbookconfig/src/playbooks/host_vars/rehome-subcloud/default.yml b/playbookconfig/src/playbooks/host_vars/rehome-subcloud/default.yml new file mode 100644 index 000000000..14b7f0d0f --- /dev/null +++ b/playbookconfig/src/playbooks/host_vars/rehome-subcloud/default.yml @@ -0,0 +1,6 @@ +--- +password_change: false + +cluster_service_subnet: 10.96.0.0/12 +# cluster_service_start_address: +# cluster_service_end_address: diff --git a/playbookconfig/src/playbooks/install.yml b/playbookconfig/src/playbooks/install.yml index ab4e519c6..7e2163b2b 100644 --- a/playbookconfig/src/playbooks/install.yml +++ b/playbookconfig/src/playbooks/install.yml @@ -1,6 +1,6 @@ --- # -# Copyright (c) 2020 Wind River Systems, Inc. +# Copyright (c) 2020-2021 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -9,6 +9,7 @@ hosts: all gather_facts: false become: no + tasks: - set_fact: ansible_port: "{{ ansible_port | default(22) }}" @@ -164,22 +165,10 @@ timeout={{ wait_for_timeout }} state=started - - set_fact: - override_files_dir: "{{ lookup('env', 'HOME') }}" - password_change_responses: - yes/no: 'yes' - sysadmin*: 'sysadmin' - \(current\) UNIX password: 'sysadmin' - (?i)New password: "{{ ansible_ssh_pass }}" - (?i)Retype new password: "{{ ansible_ssh_pass }}" - - - import_role: - name: common/prepare-env - - - name: Check if the system is ready - command: "systemctl is-active multi-user.target" - register: check_active - retries: 30 - delay: 40 - until: check_active.rc == 0 - no_log: true +- name: Run validate host playbook post install + import_playbook: validate_host.yml + vars: + check_system: true + check_load: false + check_bootstrap_address: false + check_patches: false diff --git a/playbookconfig/src/playbooks/migrate_sx_to_dx.yml b/playbookconfig/src/playbooks/migrate_sx_to_dx.yml new file mode 100644 index 000000000..6530ecee4 --- /dev/null +++ b/playbookconfig/src/playbooks/migrate_sx_to_dx.yml @@ -0,0 +1,116 @@ +--- +# +# Copyright (c) 2021 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +# This playbook provides the capability to migrate a subcloud from +# AIO-SX to AIO-DX. It will lock the subcloud, perform the necessary +# configuration updates, then unlock the subcloud. NOTE: This is for a +# non-ceph subcloud. +# +# To run the playbook, the user would define an overrides file that +# provides the required variable settings, passing this on the ansible +# command-line. +# (see migrate-subcloud1-overrides-EXAMPLE.yml) +# +# Example command: +# ansible-playbook /usr/share/ansible/stx-ansible/playbooks/migrate_sx_to_dx.yml \ +# -e @migrate-subcloud1-overrides.yml -i subcloud1, -v +# + +- hosts: all + gather_facts: no + + tasks: + - name: Check required parameters + debug: + msg: + - "Validating required migration parameters:" + - "ansible_ssh_pass: {{ ansible_ssh_pass | regex_replace('.', '*') }}" + - "external_oam_node_0_address: {{ external_oam_node_0_address }}" + - "external_oam_node_1_address: {{ external_oam_node_1_address }}" + failed_when: (ansible_ssh_pass | length == 0) or + (external_oam_node_0_address | ipaddr == false) or + (external_oam_node_1_address | ipaddr == false) + + - name: Query management interface configuration + shell: | + source /etc/platform/openrc + system interface-network-list controller-0 --nowrap | awk '$8 == "mgmt" { print $6 }' + register: mgmt_if + + - name: Query cluster-host interface configuration + shell: | + source /etc/platform/openrc + system interface-network-list controller-0 --nowrap | awk '$8 == "cluster-host" { print $6 }' + register: cluster_host_if + + - name: Validate network interface configuration + fail: + msg: | + The cluster-host and management networks cannot be on the + loopback interface for simplex to duplex migration + when: mgmt_if.stdout == 'lo' or + cluster_host_if.stdout == 'lo' + + - set_fact: + duplex_mode: "{{ duplex_mode | default('duplex') }}" + kubernetes_duplex_migration_flag: '/var/run/.kubernetes_duplex_migration_complete' + + - name: Query system_mode + shell: source /etc/platform/openrc; system show | awk '$2 == "system_mode" { print $4 }' + register: current_system_mode + + - name: Query oam_c0_ip + shell: source /etc/platform/openrc; system oam-show | awk '$2 == "oam_c0_ip" { print $4 }' + register: current_oam_c0_ip + + - name: Query oam_c1_ip + shell: source /etc/platform/openrc; system oam-show | awk '$2 == "oam_c1_ip" { print $4 }' + register: current_oam_c1_ip + + - block: + - name: Lock host + include_role: + name: common/host-lock + vars: + target_host: 'controller-0' + + - name: Update system mode + expect: + echo: yes + command: bash -c 'source /etc/platform/openrc; system modify -m {{ duplex_mode }} ' + responses: + (.*)Are you sure you want to continue(.*): "yes" + failed_when: false + when: current_system_mode.stdout == 'simplex' + + # Wait up to 20 minutes for the runtime manifest triggered by the previous + # "system modify -m duplex" command, as there may be other runtime manifests + # being applied + - name: Wait for kubernetes duplex migration manifest completion, up to 20 minutes + wait_for: + path: "{{ kubernetes_duplex_migration_flag }}" + state: present + timeout: 1200 + msg: Timeout waiting for kubernetes duplex migration manifest completion + + - name: Update OAM configuration + shell: >- + source /etc/platform/openrc; + system oam-modify oam_c0_ip={{ external_oam_node_0_address }} oam_c1_ip={{ external_oam_node_1_address }} + args: + warn: false + when: current_oam_c0_ip.stdout != external_oam_node_0_address or + current_oam_c1_ip.stdout != external_oam_node_1_address + + - name: Unlock host + include_role: + name: common/host-unlock + vars: + target_host: 'controller-0' + + when: current_system_mode.stdout == 'simplex' or + current_oam_c0_ip.stdout != external_oam_node_0_address or + current_oam_c1_ip.stdout != external_oam_node_1_address diff --git a/playbookconfig/src/playbooks/provision_edgeworker.yml b/playbookconfig/src/playbooks/provision_edgeworker.yml new file mode 100644 index 000000000..321e6613d --- /dev/null +++ b/playbookconfig/src/playbooks/provision_edgeworker.yml @@ -0,0 +1,30 @@ +--- +# +# Copyright (c) 2021 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +- hosts: localhost + gather_facts: no + become: true + roles: + - role: provision-edgeworker/prepare-controller/platform + when: groups['edgeworker'] | length > 0 + + - role: provision-edgeworker/prepare-controller/kubernetes + when: groups['edgeworker'] | length > 0 + +- hosts: edgeworker + become: true + strategy: free + roles: + - role: provision-edgeworker/prepare-edgeworker/os + + - role: provision-edgeworker/prepare-edgeworker/kubernetes + +- hosts: localhost + gather_facts: no + become: true + roles: + - role: provision-edgeworker/prepare-controller/cleanup + when: groups['edgeworker'] | length > 0 diff --git a/playbookconfig/src/playbooks/rehome_subcloud.yml b/playbookconfig/src/playbooks/rehome_subcloud.yml new file mode 100644 index 000000000..bb1b09480 --- /dev/null +++ b/playbookconfig/src/playbooks/rehome_subcloud.yml @@ -0,0 +1,22 @@ +--- +# +# Copyright (c) 2021 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +- hosts: all + gather_facts: no + + vars_files: + - vars/common/main.yml + - host_vars/rehome-subcloud/default.yml + + roles: + - common/prepare-env + - common/validate-target + - { role: rehome-subcloud/prepare-env, become: yes } + - { role: rehome-subcloud/update-network-config, become: yes } + - { role: rehome-subcloud/update-keystone-data, become: yes } + - { role: rehome-subcloud/update-sc-cert, become: yes } + - { role: rehome-subcloud/check-services-status, become: yes } diff --git a/playbookconfig/src/playbooks/restore_dc_vault.yml b/playbookconfig/src/playbooks/restore_dc_vault.yml new file mode 100644 index 000000000..922eba712 --- /dev/null +++ b/playbookconfig/src/playbooks/restore_dc_vault.yml @@ -0,0 +1,16 @@ +--- +# +# Copyright (c) 2021 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +- hosts: all + gather_facts: no + + vars_files: + - host_vars/backup-restore/default.yml + + roles: + - { role: common/prepare-env } + - { role: restore-dc-vault/prepare-env } + - { role: restore-dc-vault/restore-dc-vault-directory, become: yes } diff --git a/playbookconfig/src/playbooks/restore_openstack.yml b/playbookconfig/src/playbooks/restore_openstack.yml index c5edf5b86..4659cab60 100644 --- a/playbookconfig/src/playbooks/restore_openstack.yml +++ b/playbookconfig/src/playbooks/restore_openstack.yml @@ -15,7 +15,5 @@ # Main play roles: - { role: common/prepare-env } - - { role: backup-restore/prepare-env } - { role: restore-openstack/prepare-env, when: not restore_openstack_continue|bool } - - { role: backup-restore/transfer-file, when: not restore_openstack_continue|bool } - { role: restore-openstack/restore, become: yes } diff --git a/playbookconfig/src/playbooks/restore_platform.yml b/playbookconfig/src/playbooks/restore_platform.yml index bcecad851..474129ebf 100644 --- a/playbookconfig/src/playbooks/restore_platform.yml +++ b/playbookconfig/src/playbooks/restore_platform.yml @@ -15,7 +15,6 @@ - common/prepare-env - common/validate-target - restore-platform/prepare-env - - backup-restore/transfer-file - restore-platform/restore-sw-patches - name: Run bootstrap playbook with restore mode diff --git a/playbookconfig/src/playbooks/restore_user_images.yml b/playbookconfig/src/playbooks/restore_user_images.yml index 92a2fcce9..cdeb9f2a3 100644 --- a/playbookconfig/src/playbooks/restore_user_images.yml +++ b/playbookconfig/src/playbooks/restore_user_images.yml @@ -1,6 +1,6 @@ --- # -# Copyright (c) 2020 Wind River Systems, Inc. +# Copyright (c) 2020-2021 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -13,7 +13,5 @@ roles: - { role: common/prepare-env } - { role: restore-user-images/prepare-env, become: yes } - - { role: backup-restore/prepare-env } - - { role: backup-restore/transfer-file } - { role: restore-user-images/restore-local-registry-images, become: yes, docker_images_backup: "{{ target_backup_dir }}/{{ backup_filename }}" } diff --git a/playbookconfig/src/playbooks/roles/backup-restore/validate-input/tasks/main.yml b/playbookconfig/src/playbooks/roles/backup-restore/validate-input/tasks/main.yml new file mode 100644 index 000000000..52eb14f23 --- /dev/null +++ b/playbookconfig/src/playbooks/roles/backup-restore/validate-input/tasks/main.yml @@ -0,0 +1,56 @@ +--- +# +# Copyright (c) 2021 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +# ROLE DESCRIPTION: +# This role validates user input common to all B&R playbooks +# +- name: Set default target where backup tarball inspection takes place + set_fact: + inspection_target: "{{ inventory_hostname }}" + +# Set inspection target to Ansible control machine if the backup tarball +# is off-box. +- name: Update target if backup data are off-box + set_fact: + inspection_target: localhost + when: on_box_data|bool == false + +- block: + - name: Fail if backup_filename is not defined or set + fail: + msg: "Mandatory configuration parameter backup_filename is not defined or set." + when: backup_filename is not defined or backup_filename is none + + - name: Fail if the backup file is off-box and initial_backup_dir is not specified + fail: + msg: "Parameter initial_backup_dir must be specified if the backup tar file is off box." + when: (initial_backup_dir is not defined or initial_backup_dir is none) and + (on_box_data|bool == false) + + - name: Set the initial_backup_dir to /opt/platform-backup if not specified and backup file is on the host + set_fact: + initial_backup_dir: /opt/platform-backup + when: (initial_backup_dir is not defined or initial_backup_dir is none) and + (on_box_data|bool == true) + + - name: Check if backup file exists + stat: + path: "{{ initial_backup_dir }}/{{ backup_filename }}" + register: backup_stat_result + + - block: + - name: Fail if backup file does not exist on the target + fail: + msg: "Backup file {{ initial_backup_dir }}/{{ backup_filename }} does not exist on the target." + when: (on_box_data|bool == true) + + - name: Fail if the backup file does not exist locally + fail: + msg: "Backup file {{ initial_backup_dir }}/{{ backup_filename }} does not exist on this machine." + when: (on_box_data|bool == false) + when: not backup_stat_result.stat.exists + + delegate_to: "{{ inspection_target }}" diff --git a/playbookconfig/src/playbooks/roles/backup/backup-system/tasks/main.yml b/playbookconfig/src/playbooks/roles/backup/backup-system/tasks/main.yml index 23519840f..fc77a51de 100644 --- a/playbookconfig/src/playbooks/roles/backup/backup-system/tasks/main.yml +++ b/playbookconfig/src/playbooks/roles/backup/backup-system/tasks/main.yml @@ -11,6 +11,35 @@ # - name: Do StarlingX backup block: + - name: Send application lifecycle notifications for pre-backup semantic check + script: /usr/bin/sysinv-utils notify backup-semantic-check + register: backup_semantic_check_notification_result + failed_when: false + + - name: Fail if some application won't allow backup to proceed because semantic check failed. + fail: + msg: > + Semantic check failed for backup action from application + {{ backup_semantic_check_notification_result.stderr }}. + when: backup_semantic_check_notification_result.rc == 1 + + - name: Fail if there is some other/internal error when sending lifecycle hook. + fail: + msg: "Failed to run backup-semantic-check action." + when: backup_semantic_check_notification_result.rc == 2 + + - name: Send application lifecycle notifications for pre-backup action + script: /usr/bin/sysinv-utils notify pre-backup-action + register: pre_backup_notification_result + failed_when: false + + - name: Fail if some application cannot handle the pre-backup action + fail: + msg: > + Pre-backup action for application + {{ pre_backup_notification_result.stderr }}. + when: pre_backup_notification_result.rc != 0 + - name: Generate backup_in_progress alarm script: fm_alarm.py "--set" "--backup" register: alarm_result @@ -40,7 +69,7 @@ args: warn: false - - name: Backup postgres, template1, sysinv, barbican db data + - name: Backup postgres, template1, sysinv, barbican, helmv2 db data shell: >- sudo -u postgres pg_dump --format=plain --inserts --disable-triggers --data-only {{ item }} > {{ postgres_dir.path }}/{{ item }}.postgreSql.data @@ -51,6 +80,7 @@ - template1 - sysinv - barbican + - helmv2 - name: Backup fm db data shell: >- @@ -192,7 +222,6 @@ - "{{ patching_permdir }}" - "{{ patching_repo_permdir }}" - "{{ extension_permdir }}" - - "{{ dc_vault_permdir }}" - "{{ deploy_permdir }}" - "{{ postgres_dir.path }}" - "{{ armada_permdir }}" @@ -210,6 +239,18 @@ loop_control: label: "{{ item.item }}" + # For SystemController the dc-vault is part of platform but restored after controller-0 unlock + # Create a separate archive for it + - block: + - name: Check the size (in KiB) of directories that will be backed up for dc-vault + shell: "du -sh -k {{ dc_vault_permdir }} | awk '{print $1}'" + register: size_output_dc_vault + + - name: Estimate the total required disk size for platform backup archive + set_fact: + total_platform_size_estimation: "{{ total_platform_size_estimation|int + size_output_dc_vault.stdout|int }}" + when: check_dc_controller.rc == 0 + - name: Check the free space in the archive dir shell: "df -k {{ backup_dir }} --output=avail | tail -1" register: df_output @@ -291,6 +332,18 @@ when: ceph_backend.stat.exists + - name: Send application lifecycle notifications for pre-etcd-backup action + script: /usr/bin/sysinv-utils notify pre-etcd-backup-action + register: pre_etcd_backup_notification_result + failed_when: false + + - name: Fail if some application cannot handle the pre-etcd-backup action + fail: + msg: > + Pre-etcd-backup action failed for application + {{ pre_etcd_backup_notification_result.stderr }}. + when: pre_etcd_backup_notification_result.rc != 0 + - name: Create etcd snapshot temp dir file: path: "{{ tempdir.path }}/etcd-snapshot" @@ -301,11 +354,32 @@ set_fact: etcd_snapshot_file: "{{ etcd_snapshot_dir.path }}/etcd-snapshot.db" + - name: Get etcd endpoints + shell: | + source /etc/platform/openrc + system addrpool-list | awk '/cluster-host-subnet/{print$14}' + register: etcd_endpoint + + - name: Wrap etcd_endpoint in [] brackets if it's an ipv6 address + set_fact: + etcd_endpoint_parsed: "{{ etcd_endpoint.stdout | ipwrap }}" + - name: Create etcd snapshot - command: "etcdctl snapshot save {{ etcd_snapshot_file }}" + command: "etcdctl --endpoints https://{{ etcd_endpoint_parsed }}:2379 --cert=/etc/etcd/etcd-client.crt + --key=/etc/etcd/etcd-client.key --cacert=/etc/etcd/ca.crt snapshot save {{ etcd_snapshot_file }}" environment: ETCDCTL_API: 3 + - name: Notify applications that etcd-backup succeeded + script: /usr/bin/sysinv-utils notify post-etcd-backup-action success + register: post_etcd_backup_notification_result + failed_when: false + + - name: Fail if there is some other/internal error when sending lifecycle hook. + fail: + msg: "Failed to run post-etcd-backup action [{{ post_etcd_backup_notification_result.rc }}]" + when: post_etcd_backup_notification_result.rc != 0 + - name: Create temp dir for override backup file file: path: "{{ tempdir.path }}/override" @@ -339,19 +413,32 @@ platform_backup_file: "{{ platform_backup_filename_prefix }}_{{ backup_timestamp }}.tgz" docker_local_registry_backup_file: "{{ docker_local_registry_backup_filename_prefix }}_{{ backup_timestamp }}.tgz" openstack_backup_file: "{{ openstack_backup_filename_prefix }}_{{ backup_timestamp }}.tgz" + dc_vault_backup_file: "{{ dc_vault_backup_filename_prefix }}_{{ backup_timestamp }}.tgz" - name: Set backup files absolute path set_fact: platform_backup_file_path: "{{ backup_dir }}/{{ platform_backup_file }}" docker_local_registry_backup_file_path: "{{ backup_dir }}/{{ docker_local_registry_backup_file }}" openstack_backup_file_path: "{{ backup_dir }}/{{ openstack_backup_file }}" + dc_vault_backup_file_path: "{{ backup_dir }}/{{ dc_vault_backup_file }}" - name: Save user uploaded images from local registry to an archive import_tasks: export-user-local-registry-images.yml vars: export_file_path: "{{ docker_local_registry_backup_file_path }}" kilo_free_size: "{{ remaining_disk_size_estimation }}" - when: backup_user_local_registry is defined and backup_user_local_registry + when: backup_user_local_registry is defined and backup_user_local_registry|bool == true + + - name: Notify applications that backup succeeded + script: /usr/bin/sysinv-utils notify post-backup-action success + register: post_backup_notification_result + failed_when: false + + - name: Fail if there is some other/internal error when sending lifecycle hook. + fail: + msg: "Failed to run post-backup action [{{ post_backup_notification_result.rc }}]" + when: post_backup_notification_result.rc != 0 + # Archive module has a known bug that doesn't handle empty symbolic links # well. Restore to tar command. Can add -P option to keep the leading @@ -370,7 +457,6 @@ {{ patching_permdir }} \ {{ patching_repo_permdir }} \ {{ extension_permdir }} \ - {{ dc_vault_permdir }} \ {{ deploy_permdir }} \ {{ crushmap_file | default(\"\") }} \ {{ etcd_snapshot_file }} \ @@ -382,6 +468,13 @@ args: warn: false + - name: Create a tgz archive for dc-vault backup + shell: "tar -czf {{ dc_vault_backup_file_path }} $(ls -d \ + {{ dc_vault_permdir }} 2>/dev/null)" + args: + warn: false + when: check_dc_controller.rc == 0 + - name: Create a tgz archive for OpenStack backup shell: "tar -czf {{ openstack_backup_file_path }} $(ls -d \ {{ armada_permdir }}/stx-openstack \ @@ -427,7 +520,7 @@ file: path: "{{ tempdir.path }}" state: absent - when: tempdir is defined + when: tempdir is defined and tempdir.path is defined - name: Remove the backup in progress flag file file: @@ -443,3 +536,7 @@ fail: msg: "Failed to clear backup-in-progress alarm." when: alarm_result.rc != 0 + + rescue: + - name: Notify applications that backup failed. + script: /usr/bin/sysinv-utils notify post-backup-action failure diff --git a/playbookconfig/src/playbooks/roles/bootstrap/apply-bootstrap-manifest/defaults/main.yml b/playbookconfig/src/playbooks/roles/bootstrap/apply-bootstrap-manifest/defaults/main.yml deleted file mode 100644 index 3e51218f3..000000000 --- a/playbookconfig/src/playbooks/roles/bootstrap/apply-bootstrap-manifest/defaults/main.yml +++ /dev/null @@ -1,4 +0,0 @@ ---- -hieradata_workdir: /tmp/hieradata -manifest_apply_log: /tmp/apply_manifest.log -loopback_ifname: lo diff --git a/playbookconfig/src/playbooks/roles/bootstrap/apply-bootstrap-manifest/tasks/main.yml b/playbookconfig/src/playbooks/roles/bootstrap/apply-bootstrap-manifest/tasks/main.yml deleted file mode 100644 index ced567e55..000000000 --- a/playbookconfig/src/playbooks/roles/bootstrap/apply-bootstrap-manifest/tasks/main.yml +++ /dev/null @@ -1,126 +0,0 @@ ---- -# -# Copyright (c) 2019 Wind River Systems, Inc. -# -# SPDX-License-Identifier: Apache-2.0 -# -# ROLE DESCRIPTION: -# This role is to create static configuration and apply the puppet bootstrap -# manifest. - -- name: Create config workdir - file: - path: "{{ hieradata_workdir }}" - state: directory - owner: root - group: root - mode: 0755 - -- block: - - name: Generating static config data - command: "/usr/bin/sysinv-puppet create-static-config {{ hieradata_workdir }}" - failed_when: false - register: static_config_result - - - name: Fail if static hieradata cannot be generated - fail: - msg: "Failed to create puppet hiera static config." - when: static_config_result.rc != 0 - - - name: Write required system controller keystone user and project Ids to static hieradata if it's subcloud - lineinfile: - path: "{{ hieradata_workdir }}/static.yaml" - line: "{{ item }}" - with_items: - - "keystone::dc_admin_user_id: {{ system_controller_keystone_admin_user_id }}" - - "keystone::dc_admin_project_id: {{ system_controller_keystone_admin_project_id }}" - - "openstack::keystone::bootstrap::dc_services_project_id: {{ system_controller_keystone_services_project_id }}" - - "platform::sysinv::bootstrap::dc_sysinv_user_id: {{ system_controller_keystone_sysinv_user_id }}" - - "platform::dcmanager::bootstrap::dc_dcmanager_user_id: {{system_controller_keystone_dcmanager_user_id}}" - - when: distributed_cloud_role == 'subcloud' - - when: mode == 'bootstrap' - -- block: - - name: Set ssh_config tmp dir fact - set_fact: - temp_ssh_config_dir: /tmp/config/ssh_config - - - name: Prefetch static hieradata - command: >- - tar -C {{ hieradata_workdir }} -xpf {{ restore_data_file }} --transform='s,.*/,,' - '{{ archive_puppet_permdir }}/*static.yaml' - args: - warn: false - - - name: Migrate static hieradata to new version - command: > - sed -i 's/{{ upgrade_metadata.from_release }}/{{ upgrade_metadata.to_release }}/g' - {{ hieradata_workdir }}/static.yaml - args: - warn: false - when: migrate_platform_data is defined and migrate_platform_data - - - name: Look for ssh_config dir in the backup tarball - shell: "tar -tf {{ restore_data_file }} | grep 'opt/platform/config/.*/ssh_config'" - args: - warn: false - failed_when: false - register: search_ssh_config - - - block: - - name: Create temp ssh_config dir - file: - path: "{{ temp_ssh_config_dir }}" - state: directory - owner: root - group: root - mode: 0755 - - - name: Prefetch ssh_config - command: >- - tar -C {{ temp_ssh_config_dir }} -xpf {{ restore_data_file }} --transform='s,.*/,,' - {{ archive_ssh_config_permdir }} - args: - warn: false - - - name: Remove the unwanted directory - file: - path: "{{ temp_ssh_config_dir }}/ssh_config" - state: absent - - when: search_ssh_config.rc == 0 - when: mode == 'restore' - -- name: Applying puppet bootstrap manifest - command: > - /usr/local/bin/puppet-manifest-apply.sh - {{ hieradata_workdir }} - {{ derived_network_params.controller_0_address }} - controller ansible_bootstrap > {{ manifest_apply_log }} - register: bootstrap_manifest - environment: - INITIAL_CONFIG_PRIMARY: "true" - LC_ALL: "en_US.UTF-8" - failed_when: false - -- name: Fail if puppet manifest apply script returns an error - fail: - msg: >- - Failed to apply bootstrap manifest. Please re-install host {{ansible_host}} - before attempting to bootstrap it again. See /var/log/puppet/latest/puppet.log - for details. - when: bootstrap_manifest.rc != 0 - -- name: Ensure Puppet directory exists - file: - path: "{{ puppet_permdir }}" - state: directory - recurse: yes - owner: root - group: root - mode: 0755 - -- name: Persist puppet working files - command: "mv {{ hieradata_workdir }} {{ puppet_permdir }}" diff --git a/playbookconfig/src/playbooks/roles/bootstrap/apply-manifest/defaults/main.yml b/playbookconfig/src/playbooks/roles/bootstrap/apply-manifest/defaults/main.yml new file mode 100644 index 000000000..99c111a07 --- /dev/null +++ b/playbookconfig/src/playbooks/roles/bootstrap/apply-manifest/defaults/main.yml @@ -0,0 +1,8 @@ +--- +hieradata_workdir: /tmp/hieradata +manifest_apply_log: /tmp/apply_manifest.log +loopback_ifname: lo +# The default disk size thresholds must align with the ones in +# config/.../sysinv/common/constants.py +small_root_disk_size: 240 +minimum_small_root_disk_size: 196 diff --git a/playbookconfig/src/playbooks/roles/bootstrap/apply-manifest/tasks/apply_bootstrap_manifest.yml b/playbookconfig/src/playbooks/roles/bootstrap/apply-manifest/tasks/apply_bootstrap_manifest.yml new file mode 100644 index 000000000..d2a748b30 --- /dev/null +++ b/playbookconfig/src/playbooks/roles/bootstrap/apply-manifest/tasks/apply_bootstrap_manifest.yml @@ -0,0 +1,321 @@ +--- +# +# Copyright (c) 2019 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +# SUB-TASK DESCRIPTION: +# These tasks create static configuration and apply the puppet bootstrap +# manifest. + +- name: Create config workdir + file: + path: "{{ hieradata_workdir }}" + state: directory + owner: root + group: root + mode: 0755 + +- block: + - name: Generating static config data + command: "/usr/bin/sysinv-puppet create-static-config {{ hieradata_workdir }}" + failed_when: false + register: static_config_result + + - name: Fail if static hieradata cannot be generated + fail: + msg: "Failed to create puppet hiera static config." + when: static_config_result.rc != 0 + + - name: Write required system controller keystone user and project Ids to static hieradata if it's subcloud + lineinfile: + path: "{{ hieradata_workdir }}/static.yaml" + line: "{{ item }}" + with_items: + - "keystone::dc_admin_user_id: {{ system_controller_keystone_admin_user_id }}" + - "keystone::dc_admin_project_id: {{ system_controller_keystone_admin_project_id }}" + - "openstack::keystone::bootstrap::dc_services_project_id: {{ system_controller_keystone_services_project_id }}" + - "platform::sysinv::bootstrap::dc_sysinv_user_id: {{ system_controller_keystone_sysinv_user_id }}" + - "platform::dcmanager::bootstrap::dc_dcmanager_user_id: {{system_controller_keystone_dcmanager_user_id}}" + + when: distributed_cloud_role == 'subcloud' + + - name: Create cert for etcd server and client + import_role: + name: common/create-etcd-certs + when: mode == 'bootstrap' + +- block: + - name: Set ssh_config tmp dir fact + set_fact: + temp_ssh_config_dir: /tmp/config/ssh_config + + - name: Prefetch static and system hieradata + command: >- + tar -C {{ hieradata_workdir }} -xpf {{ restore_data_file }} --transform='s,.*/,,' + '{{ archive_puppet_permdir }}/*static.yaml' + '{{ archive_puppet_permdir }}/system.yaml' + args: + warn: false + + - name: Migrate static hieradata to new version + command: > + sed -i 's/{{ upgrade_metadata.from_release }}/{{ upgrade_metadata.to_release }}/g' + {{ hieradata_workdir }}/static.yaml + args: + warn: false + when: migrate_platform_data is defined and migrate_platform_data + + # The helmv2 database is new in the release stx5.0. The AIO-SX + # upgrade from stx4.0 to stx5.0 requires a password to be generated + # and written into hieradata to access the DB. This can be removed + # in the release that follows stx5.0 + - block: + - name: Generate helmv2 database password and store in keyring + vars: + script_content: | + import keyring + import os + from sysinv.common import utils + + helmv2_db_pw = keyring.get_password("helmv2", "database") + if helmv2_db_pw: + print helmv2_db_pw + exit() + + os.environ['XDG_DATA_HOME'] = '/tmp' + try: + helmv2_db_pw = utils.generate_random_password(length=16) + keyring.set_password("helmv2", "database", helmv2_db_pw) + print helmv2_db_pw + except Exception as e: + raise Exception("Failed to generate password for helmv2:%s" % e) + finally: + del os.environ['XDG_DATA_HOME'] + shell: "{{ script_content }}" + args: + executable: /usr/bin/python + register: helmv2_db_pw_output + no_log: true + + - name: Write helmv2 database username to hieradata + lineinfile: + path: "{{ hieradata_workdir }}/static.yaml" + line: "platform::helm::v2::db::postgresql::user: admin-helmv2" + + - name: Write helmv2 database password to hieradata + lineinfile: + path: "{{ hieradata_workdir }}/secure_static.yaml" + line: "platform::helm::v2::db::postgresql::password: {{ helmv2_db_pw_output.stdout }}" + + when: (migrate_platform_data is defined and + migrate_platform_data and + upgrade_metadata.from_release == "20.06") + + - name: Restore etcd certificates. + shell: tar -C / --overwrite -xpf {{ restore_data_file }} {{ item }} + args: + warn: false + with_items: + - "{{ '/etc/etcd' | regex_replace('^\\/', '') }}" + become_user: root + + - name: Check if etcd certs are exist. + find: + paths: "/etc/etcd" + patterns: + - '*.crt' + - '*.key' + register: etcd_certs_find_output + + # This is for simplex upgrade from STX 4.0 to 5.0 + - block: + - name: set kubeadm_pki_dir + set_fact: + kubeadm_pki_dir: /etc/kubernetes/pki + + - name: Create pki directory for kubernetes certificates + file: + path: "{{ kubeadm_pki_dir }}" + state: directory + mode: 0700 + + - name: Restore CA + shell: tar -C / --overwrite -xpf {{ restore_data_file }} {{ item }} + args: + warn: false + with_items: + - "{{ kubeadm_pki_dir | regex_replace('^\\/', '') }}" + become_user: root + + - name: Create certs for etcd server and client for simplex upgrade + import_role: + name: common/create-etcd-certs + when: etcd_certs_find_output.matched == 0 + + - name: Check if apiserver-keys are present in the backup tarball + shell: "tar -tf {{ restore_data_file }} | grep 'etc/kubernetes/pki/apiserver-etcd-client.*'" + args: + warn: false + failed_when: false + register: apiserver_etcd_certs_find_output + + - name: Extract apiserver-keys from /etc/kubernetes/pki + shell: tar -C /etc/etcd/ --overwrite -xpf {{ restore_data_file }} {{ item }} --strip-components 3 + args: + warn: false + with_items: + - "{{ kubeadm_pki_dir | regex_replace('^\\/', '') }}/apiserver-etcd-client.crt" + - "{{ kubeadm_pki_dir | regex_replace('^\\/', '') }}/apiserver-etcd-client.key" + become_user: root + when: apiserver_etcd_certs_find_output.rc == 0 + + - name: Look for ssh_config dir in the backup tarball + shell: "tar -tf {{ restore_data_file }} | grep 'opt/platform/config/.*/ssh_config'" + args: + warn: false + failed_when: false + register: search_ssh_config + + - block: + - name: Create temp ssh_config dir + file: + path: "{{ temp_ssh_config_dir }}" + state: directory + owner: root + group: root + mode: 0755 + + - name: Prefetch ssh_config + command: >- + tar -C {{ temp_ssh_config_dir }} -xpf {{ restore_data_file }} --transform='s,.*/,,' + {{ archive_ssh_config_permdir }} + args: + warn: false + + - name: Remove the unwanted directory + file: + path: "{{ temp_ssh_config_dir }}/ssh_config" + state: absent + + when: search_ssh_config.rc == 0 + when: mode == 'restore' + +- name: Set the ip version of etcd listen address to its default value + set_fact: + etcd_listen_address_version: 4 + +- name: Update the ip version of etcd listen address to ipv6 + set_fact: + etcd_listen_address_version: 6 + when: ipv6_addressing != False + + # Add etcd security info to static hieradata so that etcd is configured with security + # when etc puppet manifest is applied before Kubernetes master is initialized in the later role. +- name: Write security settings to static hieradata + lineinfile: + path: "{{ hieradata_workdir }}/static.yaml" + line: "{{ item }}" + with_items: + - "platform::etcd::params::security_enabled: true" + - "platform::etcd::params::bind_address: {{ cluster_floating_address }}" + - "platform::etcd::params::bind_address_version: {{ etcd_listen_address_version }}" + +- name: Create runtime hieradata + file: + path: "{{ hieradata_workdir }}/runtime.yaml" + state: touch + owner: root + group: root + mode: 0600 + +- block: + - name: Specify filesystem sizes + set_fact: + pgsql_size: 10 + platform_size: 10 + etcd_size: 5 + dockerdistribution_size: 16 + backup_size: 20 + docker_size: 30 + kubelet_size: 10 + scratch_size: 16 + when: root_disk_size|int >= minimum_small_root_disk_size + + - name: Expand filesystem sizes + set_fact: + pgsql_size: 20 + backup_size: 25 + when: root_disk_size|int > small_root_disk_size + + - name: Write filesystem settings to runtime hieradata + lineinfile: + path: "{{ hieradata_workdir }}/runtime.yaml" + line: "{{ item }}" + with_items: + - "platform::filesystem::backup::params::lv_size: {{ backup_size }}" + - "platform::filesystem::docker::params::bootstrap::lv_size: {{ docker_size }}" + - "platform::filesystem::kubelet::params::lv_size: {{ kubelet_size }}" + - "platform::filesystem::scratch::params::lv_size: {{ scratch_size }}" + - "platform::drbd::dockerdistribution::params::lv_size: {{ dockerdistribution_size }}" + - "platform::drbd::etcd::params::lv_size: {{ etcd_size }}" + - "platform::drbd::pgsql::params::lv_size: {{ pgsql_size }}" + - "platform::drbd::platform::params::lv_size: {{ platform_size }}" + when: root_disk_size|int >= minimum_small_root_disk_size + when: mode == 'bootstrap' + +- block: + - name: Read drbd settings from system hieradata + command: "grep 'platform::drbd::.*::params::lv_size.*' {{ hieradata_workdir }}/system.yaml" + register: system_file + + - name: Write filesystem settings to runtime hieradata + lineinfile: + path: "{{ hieradata_workdir }}/runtime.yaml" + line: "{{ item }}" + with_items: "{{system_file.stdout_lines}}" + + - name: Remove system hieradata + file: + path: "{{ hieradata_workdir }}/system.yaml" + state: absent + + when: mode == 'restore' + +- name: Applying puppet bootstrap manifest + command: > + /usr/local/bin/puppet-manifest-apply.sh + {{ hieradata_workdir }} + {{ derived_network_params.controller_0_address }} + controller ansible_bootstrap + {{ hieradata_workdir }}/runtime.yaml > {{ manifest_apply_log }} + register: bootstrap_manifest + environment: + INITIAL_CONFIG_PRIMARY: "true" + LC_ALL: "en_US.UTF-8" + failed_when: false + +- name: Fail if puppet manifest apply script returns an error + fail: + msg: >- + Failed to apply bootstrap manifest. Please re-install host {{ansible_host}} + before attempting to bootstrap it again. See /var/log/puppet/latest/puppet.log + for details. + when: bootstrap_manifest.rc != 0 + +- name: Remove runtime hieradata + file: + path: "{{ hieradata_workdir }}/runtime.yaml" + state: absent + +- name: Ensure Puppet directory exists + file: + path: "{{ puppet_permdir }}" + state: directory + recurse: yes + owner: root + group: root + mode: 0755 + +- name: Persist puppet working files + command: "mv {{ hieradata_workdir }} {{ puppet_permdir }}" diff --git a/playbookconfig/src/playbooks/roles/bootstrap/apply-manifest/tasks/apply_etcd_manifest.yml b/playbookconfig/src/playbooks/roles/bootstrap/apply-manifest/tasks/apply_etcd_manifest.yml new file mode 100644 index 000000000..4d14ef80d --- /dev/null +++ b/playbookconfig/src/playbooks/roles/bootstrap/apply-manifest/tasks/apply_etcd_manifest.yml @@ -0,0 +1,56 @@ +--- +# +# Copyright (c) 2021 Intel, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +# SUB-TASK DESCRIPTION: +# These tasks reconfigure etcd when bootstrap is replayed with networking change(s). + +- name: Copy back puppet working files + command: "cp -r {{ puppet_permdir }}/hieradata /tmp/" + +- name: Create cert for etcd server and client + import_role: + name: common/create-etcd-certs + +- name: Reconfigure cluster float address in static.yaml + lineinfile: + path: "{{ hieradata_workdir }}/static.yaml" + regexp: "^platform::etcd::params::bind_address:" + line: "platform::etcd::params::bind_address: {{ cluster_floating_address }}" + +- name: Create list of etcd classes to pass to puppet + copy: + dest: "/tmp/etcd.yml" + content: | + classes: + - platform::etcd::init + +- name: Set facts derived from previous network configurations + set_fact: + prev_controller_floating_address: + "{{ (prev_management_subnet | ipaddr(1)).split('/')[0] + if prev_management_start_address == 'derived' else prev_management_start_address }}" + +- name: Get previous controller_0 address + set_fact: + prev_controller_0_address: "{{ prev_controller_floating_address|ipmath(1) }}" + +- name: Applying puppet for enabling etcd security + command: > + /usr/local/bin/puppet-manifest-apply.sh + {{ hieradata_workdir }} + {{ prev_controller_0_address }} + controller runtime /tmp/etcd.yml + register: etcd_manifest + environment: + LC_ALL: "en_US.UTF-8" + +- name: Fail if etcd manifest apply script returns an error + fail: + msg: "Failed to apply etcd manifest!" + when: etcd_manifest.rc != 0 + +- name: Copy static files + command: "cp -r {{ hieradata_workdir }} {{ puppet_permdir }}" diff --git a/playbookconfig/src/playbooks/roles/bootstrap/apply-manifest/tasks/main.yml b/playbookconfig/src/playbooks/roles/bootstrap/apply-manifest/tasks/main.yml new file mode 100644 index 000000000..ac75934c6 --- /dev/null +++ b/playbookconfig/src/playbooks/roles/bootstrap/apply-manifest/tasks/main.yml @@ -0,0 +1,16 @@ +--- +# +# Copyright (c) 2021 Intel, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +# ROLE DESCRIPTION: +# This role is to apply the puppet manifest. + +- name: Apply bootstrap manifest (only in the initial play) + include_tasks: apply_bootstrap_manifest.yml + when: not replayed + +- name: Apply manifest to reconfigure etcd + include_tasks: apply_etcd_manifest.yml + when: replayed and network_config_update diff --git a/playbookconfig/src/playbooks/roles/bootstrap/bringup-bootstrap-applications/tasks/main.yml b/playbookconfig/src/playbooks/roles/bootstrap/bringup-bootstrap-applications/tasks/main.yml index 8ed0f2489..d8917f93d 100644 --- a/playbookconfig/src/playbooks/roles/bootstrap/bringup-bootstrap-applications/tasks/main.yml +++ b/playbookconfig/src/playbooks/roles/bootstrap/bringup-bootstrap-applications/tasks/main.yml @@ -1,6 +1,6 @@ --- # -# Copyright (c) 2020 Wind River Systems, Inc. +# Copyright (c) 2020-21 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -32,6 +32,27 @@ import_tasks: test_cert_manager_creation.yml when: mode != 'restore' +# Create a generic namespace to be shared by deployed platform resources as needed +# and copy default-registry-key +- name: Create 'deployment' namespace + shell: kubectl --kubeconfig=/etc/kubernetes/admin.conf create namespace deployment --dry-run -o yaml | + kubectl --kubeconfig=/etc/kubernetes/admin.conf apply -f - + +- name: Get deployment namespace default registry key + command: >- + kubectl --kubeconfig=/etc/kubernetes/admin.conf get secret default-registry-key --namespace=deployment + failed_when: false + register: get_deployment_default_registry_key + +- name: Copy default-registry-key to deployment namespace + shell: >- + kubectl get secret default-registry-key --namespace=kube-system -o yaml + | sed 's/namespace: kube-system/namespace: deployment/' + | kubectl apply --namespace=deployment -f - + environment: + KUBECONFIG: "/etc/kubernetes/admin.conf" + when: get_deployment_default_registry_key.stdout == "" + # Create DC CA and set up subcloud admin endpoint certificates for bootstrap mode. - block: - name: Create distributed cloud CA @@ -39,7 +60,10 @@ when: distributed_cloud_role == 'systemcontroller' - name: Set up subcloud admin endpoints certificates if host is a subcloud - include_tasks: setup_sc_adminep_certs.yml + include_role: + name: common/setup-subcloud-adminep-certs + vars: + ansible_become: yes when: distributed_cloud_role == 'subcloud' when: mode == 'bootstrap' diff --git a/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/files/psp-policies.yaml b/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/files/psp-policies.yaml index ef61c6903..2ae49f925 100644 --- a/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/files/psp-policies.yaml +++ b/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/files/psp-policies.yaml @@ -98,3 +98,60 @@ subjects: - kind: Group name: system:serviceaccounts:kube-system apiGroup: rbac.authorization.k8s.io +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: kubelet-kube-system-privileged-psp-user + namespace: kube-system + annotations: + kubernetes.io/description: 'Allow nodes to create privileged pods. Should + be used in combination with the NodeRestriction admission plugin to limit + nodes to mirror pods bound to themselves.' + labels: + addonmanager.kubernetes.io/mode: Reconcile + kubernetes.io/cluster-service: 'true' +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: privileged-psp-user +subjects: +- kind: Group + apiGroup: rbac.authorization.k8s.io + name: system:nodes +- kind: User + apiGroup: rbac.authorization.k8s.io + # Legacy node ID + name: kubelet +--- +# ClusterRoleBinding of system:serviceaccounts:kubesystem to +# restricted-psp-user Role. Applies to all namespaces. +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: kube-system-SAs-restricted-psp-users +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: restricted-psp-user +subjects: +- kind: Group + name: system:serviceaccounts:kube-system + apiGroup: rbac.authorization.k8s.io +--- +# ClusterRoleBinding of system:authenticated (all +# authenticated users) to restricted-psp-user Role. +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: authenticated-users-restricted-psp-users +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: restricted-psp-user +subjects: +- kind: Group + name: system:authenticated + apiGroup: rbac.authorization.k8s.io + + diff --git a/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/tasks/bringup_helm.yml b/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/tasks/bringup_helm.yml index 037ed58da..1fd30f89e 100644 --- a/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/tasks/bringup_helm.yml +++ b/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/tasks/bringup_helm.yml @@ -163,107 +163,6 @@ name: lighttpd state: restarted -- block: - # NOTE: helm --debug option displays vital information, no harm enabling. - # These only show in ansible.log on failure. - - name: Add Helm repos - command: /sbin/helm repo add "{{ item }}" "http://127.0.0.1:{{ helm_repo_port }}/helm_charts/{{ item }}" --debug - with_items: - - "{{ helm_repo_name_apps }}" - - "{{ helm_repo_name_platform }}" - - # NOTE: helm --debug option displays vital information, no harm enabling. - # These only show in ansible.log on failure. - - name: Update Helm repos - command: /sbin/helm repo update --debug - - become_user: sysadmin - environment: - KUBECONFIG: /etc/kubernetes/admin.conf - HOME: /home/sysadmin - -- block: - - name: Set Armada overrides - set_fact: - helm_charts_url: "http://{{ controller_floating_address | ipwrap }}:{{ helm_repo_port }}/helm_charts" - armada_default_password: "{{ lookup('password', '/dev/null chars=ascii_letters length=16') | b64encode }}" - armada_password: "{{ lookup('password', '/dev/null chars=ascii_letters length=16') | b64encode }}" - - - name: Create keystone credentials for armada domain (local host client only) - shell: "source /etc/platform/openrc; \ - openstack domain create {{ armada_domain }}; \ - openstack project create --domain {{ armada_domain }} 'service'; \ - openstack user create --domain {{ armada_domain }} \ - --project service --project-domain {{ armada_domain }} \ - --password {{ armada_password }} {{ armada_user }}; \ - openstack role add --project-domain {{ armada_domain }} \ - --user-domain {{ armada_domain }} --user {{ armada_user }} \ - --project service admin" - args: - executable: /bin/bash - no_log: true - - - name: Create Armada overrides - template: - src: "armada-overrides.yaml.j2" - dest: "/tmp/armada-overrides.yaml" - - - name: Create namespace for Armada - command: > - kubectl create namespace {{ armada_namespace }} - failed_when: false - register: create_ns_output - - - name: Fail if creating namespace fails - fail: - msg: "Failed to create {{ armada_namespace }} namespace. Error: {{ create_ns_output.stderr }}" - when: create_ns_output.rc is defined and create_ns_output.rc !=0 and - create_ns_output.stderr is not search('AlreadyExists') - - - name: Check if secret exists - command: kubectl -n {{ armada_namespace }} get secret {{ armada_secret_name }} - failed_when: false - register: armada_get_secret_response - - # Set no_log to true so that we don't expose the local registry credentials - - name: Create secret if it doesn't exist - command: >- - kubectl -n {{ armada_namespace }} create secret docker-registry {{ armada_secret_name }} - --docker-server={{ local_registry }} - --docker-username={{ local_registry_credentials['username'] }} - --docker-password={{ local_registry_credentials['password'] }} - when: armada_get_secret_response.rc != 0 - no_log: true - - # Workaround for helm v3 issue with null overrides. - # Configure sane node label values that work with armada node selector - - name: Create Armada node label - command: > - kubectl label node controller-0 armada=enabled --overwrite=true - - # To prevent helm-upload requiring sudo and a tty for password, - # become the intended www user. - - name: Upload Armada charts - become_user: www - command: > - /usr/local/sbin/helm-upload stx-platform /opt/extracharts/armada-0.1.0.tgz - - # NOTE: helm --debug option displays vital information, no harm enabling. - # These only show in ansible.log on failure. - - name: Update info of available charts from chart repos - command: > - /sbin/helm repo update --debug - - # NOTE: helm --debug option displays vital information, no harm enabling. - # These only show in ansible.log on failure. - - name: Launch Armada with Helm v3 - command: >- - /sbin/helm upgrade --install armada stx-platform/armada - --namespace {{ armada_namespace }} - --values /tmp/armada-overrides.yaml - --debug - - become_user: sysadmin - environment: - KUBECONFIG: /etc/kubernetes/admin.conf - HOME: /home/sysadmin +- name: Launch containerized Armada using Helm v3 + import_role: + name: common/armada-helm diff --git a/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/tasks/bringup_kubemaster.yml b/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/tasks/bringup_kubemaster.yml index 5b9b0d883..9622ade36 100644 --- a/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/tasks/bringup_kubemaster.yml +++ b/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/tasks/bringup_kubemaster.yml @@ -20,7 +20,6 @@ # - Prepare and apply coredns config # - Restrict coredns to master node and set anti-affnity (duplex system) # - Restrict coredns to 1 pod (simplex system) -# - Remove taint from master node # - Add kubelet service override # - Register kubelet with pmond # - Reload systemd @@ -58,20 +57,42 @@ k8s_pki_files: { ca.crt: "{{k8s_root_ca_cert}}", ca.key: "{{k8s_root_ca_key}}" } when: (k8s_root_ca_cert) -- block: - - name: Create pki directory for kubernetes certificates - file: - path: "{{ kubeadm_pki_dir }}" - state: directory - mode: 0700 +- name: Create pki directory for kubernetes certificates + file: + path: "{{ kubeadm_pki_dir }}" + state: directory + mode: 0700 +- block: - name: Copy kubernetes certificates copy: src: "{{ item.value }}" dest: "{{ kubeadm_pki_dir }}/{{item.key}}" with_dict: "{{ k8s_pki_files }}" - when: k8s_pki_files is defined and mode == 'bootstrap' + - name: Copy apiserver-etcd-client cert and key + copy: + src: "/etc/etcd/{{ item }}" + dest: "{{ kubeadm_pki_dir }}/{{ item }}" + remote_src: yes + force: yes + with_items: + - "apiserver-etcd-client.crt" + - "apiserver-etcd-client.key" + when: k8s_pki_files is defined + +- name: Copy ca, cert and key generated by etcd to kubeadm_pki_dir + copy: + src: "/etc/etcd/{{ item }}" + dest: "{{ kubeadm_pki_dir }}/{{ item }}" + remote_src: yes + force: yes + with_items: + - "ca.crt" + - "ca.key" + - "apiserver-etcd-client.crt" + - "apiserver-etcd-client.key" + when: k8s_pki_files is undefined - name: Set kubelet node configuration set_fact: @@ -126,7 +147,7 @@ environment: APISERVER_ADVERTISE_ADDRESS: "{{ controller_0_cluster_host }}" CONTROLPLANE_ENDPOINT: "{{ cluster_floating_address }}" - ETCD_ENDPOINT: "http://{{ cluster_floating_address | ipwrap }}:2379" + ETCD_ENDPOINT: "https://{{ cluster_floating_address | ipwrap }}:2379" POD_NETWORK_CIDR: "{{ cluster_pod_subnet }}" SERVICE_NETWORK_CIDR: "{{ cluster_service_subnet }}" VOLUME_PLUGIN_DIR: "{{ kubelet_vol_plugin_dir }}" @@ -311,9 +332,6 @@ when: mode == 'bootstrap' -- name: Remove taint from master node - shell: "kubectl --kubeconfig=/etc/kubernetes/admin.conf taint node controller-0 node-role.kubernetes.io/master- || true" - - block: - name: Applying kubernetes plugins include_role: diff --git a/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/tasks/bringup_local_registry.yml b/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/tasks/bringup_local_registry.yml index 4dab95fad..d4cd1f8d1 100644 --- a/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/tasks/bringup_local_registry.yml +++ b/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/tasks/bringup_local_registry.yml @@ -54,11 +54,35 @@ state: directory mode: 0700 +- name: Determine the stream_server_address for containerd + set_fact: + stream_server_address: "{{ '127.0.0.1' if ipv6_addressing == False else '::1' }}" + +- name: Get guest local registry credentials + vars: + script_content: | + import keyring + password = keyring.get_password("mtce", "services") + if not password: + raise Exception("Local registry password not found.") + print dict(username='mtce', password=str(password)) + shell: "{{ script_content }}" + args: + executable: /usr/bin/python + register: guest_local_registry_credentials_output + +- set_fact: + guest_local_registry_credentials: "{{ guest_local_registry_credentials_output.stdout }}" + +- name: Determine the registry_auth for containerd + set_fact: + registry_auth: "{{ (guest_local_registry_credentials['username'] + ':' + + guest_local_registry_credentials['password']) | b64encode }}" + - name: Create config.toml file for containerd configuration - copy: - src: "{{ containerd_template }}" + template: + src: "config.toml.j2" dest: /etc/containerd/config.toml - remote_src: yes mode: 0600 - name: Remove puppet template for insecure registries @@ -81,13 +105,6 @@ when: (insecure_registries is defined and insecure_registries | length > 0) -- name: Update config.toml with cni bin dir - command: "sed -i -e 's|<%= @k8s_cni_bin_dir %>|$CNI_BIN_DIR|g' /etc/containerd/config.toml" - args: - warn: false - environment: - CNI_BIN_DIR: "{{ kubelet_cni_bin_dir }}" - - name: Get local registry credentials vars: script_content: | @@ -104,63 +121,6 @@ - set_fact: local_registry_credentials: "{{ local_registry_credentials_output.stdout }}" -- name: Get guest local registry credentials - vars: - script_content: | - import keyring - password = keyring.get_password("mtce", "services") - if not password: - raise Exception("Local registry password not found.") - print dict(username='mtce', password=str(password)) - shell: "{{ script_content }}" - args: - executable: /usr/bin/python - register: guest_local_registry_credentials_output - -- set_fact: - guest_local_registry_credentials: "{{ guest_local_registry_credentials_output.stdout }}" - -- name: Update config.toml with registry auth - command: "sed -i -e 's|<%= @registry_auth %>|$REG_AUTH|g' /etc/containerd/config.toml" - args: - warn: false - environment: - REG_AUTH: "{{ (guest_local_registry_credentials['username'] + ':' - + guest_local_registry_credentials['password']) | b64encode }}" - -- name: Determine the stream_server_address for containerd - set_fact: - stream_server_address: "{{ '127.0.0.1' if ipv6_addressing == False else '::1' }}" - -- name: Set the stream_server_address in config.toml - command: "sed -i -e 's|<%= @stream_server_address %>|'$STREAM_SERVER_ADDRESS'|g' /etc/containerd/config.toml" - args: - warn: false - environment: - STREAM_SERVER_ADDRESS: "{{ stream_server_address }}" - -- name: Remove central cloud registry config on non-subclouds - replace: - path: /etc/containerd/config.toml - after: '# Central cloud registry config for distributed cloud subclouds' - regexp: '^(<%- if @distributed_cloud_role.+)\n(.+)\n(.+)\n(.+end -%>)' - replace: '' - when: distributed_cloud_role != 'subcloud' - -- name: Remove erb if tag start for central cloud registry config on subclouds - replace: - path: /etc/containerd/config.toml - regexp: "^(# Central cloud registry.+)\n(.+role == 'subcloud' -%>)" - replace: '# Central cloud registry config for distributed cloud subclouds' - when: distributed_cloud_role == 'subcloud' - -- name: Remove erb if tag end for central cloud registry config on subclouds - replace: - path: /etc/containerd/config.toml - regexp: "^(<%- end -%>)\n(.+config for distributed cloud subclouds)" - replace: '# End central cloud registry config for distributed cloud subclouds' - when: distributed_cloud_role == 'subcloud' - - name: Restart containerd systemd: name: containerd @@ -297,6 +257,7 @@ src: "{{ registry_cert_crt }}" dest: "{{ docker_cert_dir }}/registry.local:9001" remote_src: yes + mode: preserve - name: Start registry token server systemd: diff --git a/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/tasks/main.yml b/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/tasks/main.yml index 0b74bdd46..96e1d8c89 100644 --- a/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/tasks/main.yml +++ b/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/tasks/main.yml @@ -58,6 +58,31 @@ import_role: name: common/push-docker-images + - name: Bring up etcd + systemd: + name: etcd + state: started + + - name: Check if etcd-client crt was created. + find: + paths: "/etc/etcd" + patterns: "etcd-client.*" + register: etcd_client_find_output + + - name: Create etcd client account for root, apiserver and enable etcd auth + command: "etcdctl --cert-file=$ETCD_CERT --key-file=$ETCD_KEY --ca-file=$ETCD_CA + --endpoint=$ETCD_ENDPOINT {{ item }}" + with_items: + - "user add root:sysadmin" + - "user add apiserver-etcd-client:sysadmin" + - "auth enable" + environment: + ETCD_ENDPOINT: "https://{{ cluster_floating_address | ipwrap }}:2379" + ETCD_CERT: "/etc/etcd/etcd-client.crt" + ETCD_KEY: "/etc/etcd/etcd-client.key" + ETCD_CA: "/etc/etcd/ca.crt" + when: etcd_client_find_output.matched != 0 + - name: Bring up Kubernetes master import_tasks: bringup_kubemaster.yml @@ -147,36 +172,23 @@ kubectl --kubeconfig=/etc/kubernetes/admin.conf scale deployment -n kube-system coredns --replicas={{ coredns_get_replicas.stdout }} - - name: Get the number of Kubernetes nodes to calculate async_retries value - # Use tail to skip the header of kubectl command, then count the line output - shell: kubectl --kubeconfig=/etc/kubernetes/admin.conf get nodes | tail -n +2 | wc -l - register: k8s_node_count - - name: Override async parameters - # async_retries needs to scale with the number of nodes because the async - # kubectl wait tasks can use their full 30 second timeout for each pod they - # are checking. When checking a daemonset like app=multus, this means that - # the wait command could take up to 30s * number of nodes - # When this value is too short, the async task does not complete and the - # play fails on the "Fail if any of the Kubernetes component, Networking - # or Tiller pods are not ready by this time" task. - # The async_retries value results in a total wait time of - # async_retries * delay (from "Get wait task results" task below). set_fact: async_timeout: 120 - async_retries: "{{ (k8s_node_count.stdout|int * 6)|int }}" - when: k8s_node_count.stdout|int > 1 - - name: Wait for {{ pods_wait_time }} seconds to ensure deployments have time to scale back up + - name: Wait for 30 seconds to ensure deployments have time to scale back up wait_for: - timeout: "{{ pods_wait_time }}" + timeout: 30 when: mode == 'restore' - name: Start parallel tasks to wait for Kubernetes component and Networking pods to reach ready state + # Only check for pods on the current host to avoid waiting for pods on downed nodes + # This speeds up "Get wait tasks results" on multi-node systems command: >- kubectl --kubeconfig=/etc/kubernetes/admin.conf wait --namespace=kube-system - --for=condition=Ready pods --selector {{ item }} --timeout=30s + --for=condition=Ready pods --selector {{ item }} --field-selector spec.nodeName=controller-0 + --timeout={{ async_timeout }}s async: "{{ async_timeout }}" poll: 0 with_items: "{{ kube_component_list }}" @@ -186,7 +198,7 @@ # Check the deployment status rather than the pod status in case some pods are down on other nodes command: >- kubectl --kubeconfig=/etc/kubernetes/admin.conf wait --namespace={{ item.namespace }} - --for=condition=Available deployment {{ item.deployment }} --timeout=30s + --for=condition=Available deployment {{ item.deployment }} --timeout={{ async_timeout }}s async: "{{ async_timeout }}" poll: 0 with_items: @@ -204,13 +216,6 @@ # complete (success or failure) within 30 seconds retries: "{{ async_retries }}" delay: 6 - # At B&R, after the restore phase, this will fail on duplex or standard systems because - # some of the resources that we are waiting for are replicasets and daemonsets - # and some pods will be launched on a different host than controller-0. - # Since only the controller-0 is online at this step, the rest of the pods that - # are on a different host will fail to start, so we only need to check that - # at least 1 pod from every deployment is up and running. If there are none active - # from a particular deployment it will be caught in the next task. failed_when: false with_items: - "{{ wait_for_kube_system_pods.results }}" diff --git a/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/tasks/setup_default_route.yml b/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/tasks/setup_default_route.yml index 329df6a61..7d66870bf 100644 --- a/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/tasks/setup_default_route.yml +++ b/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/tasks/setup_default_route.yml @@ -1,6 +1,6 @@ --- # -# copyright (c) 2019 Wind River Systems, Inc. +# Copyright (c) 2019-2021 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -14,7 +14,7 @@ - name: Check if the default route exists shell: "{{ ip_command }} route show | grep 'default via {{ external_oam_gateway_address }}'" - ignore_errors: yes + failed_when: false register: route_check - block: diff --git a/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/templates/config.toml.j2 b/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/templates/config.toml.j2 new file mode 100644 index 000000000..e1ed56273 --- /dev/null +++ b/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/templates/config.toml.j2 @@ -0,0 +1,117 @@ +root = "/var/lib/docker" +state = "/var/run/containerd" +oom_score = 0 + +[grpc] + address = "/var/run/containerd/containerd.sock" + uid = 0 + gid = 0 + max_recv_message_size = 16777216 + max_send_message_size = 16777216 + +[debug] + address = "" + uid = 0 + gid = 0 + level = "" + +[metrics] + address = "" + grpc_histogram = false + +[cgroup] + path = "" + +[plugins] + [plugins.cgroups] + no_prometheus = false + [plugins.cri] + stream_server_address = "{{ stream_server_address }}" + stream_server_port = "0" + enable_selinux = false + sandbox_image = "registry.local:9001/k8s.gcr.io/pause:3.2" + stats_collect_period = 10 + systemd_cgroup = false + enable_tls_streaming = false + max_container_log_line_size = 16384 + [plugins.cri.containerd] + snapshotter = "overlayfs" + no_pivot = false + default_runtime_name = "runc" + [plugins.cri.containerd.runtimes] + [plugins.cri.containerd.runtimes.runc] + runtime_type = "io.containerd.runc.v1" + [plugins.cri.containerd.runtimes.runc.options] + NoPivotRoot = false + NoNewKeyring = false + ShimCgroup = "" + IoUid = 0 + IoGid = 0 + BinaryName = "runc" + Root = "" + CriuPath = "" + SystemdCgroup = false + [plugins.cri.containerd.runtimes.kata] + runtime_type = "io.containerd.kata.v2" + [plugins.cri.containerd.runtimes.katacli] + runtime_type = "io.containerd.runc.v1" + [plugins.cri.containerd.runtimes.katacli.options] + NoPivotRoot = false + NoNewKeyring = false + ShimCgroup = "" + IoUid = 0 + IoGid = 0 + BinaryName = "/usr/bin/kata-runtime" + Root = "" + CriuPath = "" + SystemdCgroup = false + [plugins.cri.containerd.runtimes.untrusted] + runtime_type = "io.containerd.kata.v2" + runtime_engine = "" + runtime_root = "" + + [plugins.cri.cni] + # conf_dir is the directory in which the admin places a CNI conf. + conf_dir = "/etc/cni/net.d" + bin_dir = "{{ kubelet_cni_bin_dir }}" + max_conf_num = 1 + conf_template = "" + [plugins.cri.registry] + [plugins.cri.registry.mirrors] + # Begin of insecure registries +<%- @insecure_registries.each do |insecure_registry| -%> + [plugins.cri.registry.mirrors."<%= insecure_registry %>"] + endpoint = ["http://<%= insecure_registry %>"] +<%- end -%> + # End of insecure registries + [plugins.cri.registry.configs."registry.local:9001".tls] + ca_file = "/etc/docker/certs.d/registry.local:9001/registry-cert.crt" + [plugins.cri.registry.configs."registry.local:9001".auth] + auth = "{{ registry_auth }}" +# Central cloud registry config for distributed cloud subclouds +{% if "{{ distributed_cloud_role }}" == 'subcloud' %} + [plugins.cri.registry.configs."registry.central:9001".tls] + ca_file = "/etc/docker/certs.d/registry.central:9001/registry-cert.crt" +{% endif -%} +# End central cloud registry config for distributed cloud subclouds + [plugins.cri.x509_key_pair_streaming] + tls_cert_file = "" + tls_key_file = "" + [plugins.diff-service] + default = ["walking"] + [plugins.linux] + shim = "containerd-shim" + runtime = "runc" + runtime_root = "" + no_shim = false + shim_debug = false + [plugins.opt] + path = "/opt/containerd" + [plugins.restart] + interval = "10s" + [plugins.scheduler] + pause_threshold = 0.02 + deletion_threshold = 0 + mutation_threshold = 100 + schedule_delay = "0s" + startup_delay = "100ms" diff --git a/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/templates/coredns.yaml.j2 b/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/templates/coredns.yaml.j2 index 5aa812757..c7340b737 100644 --- a/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/templates/coredns.yaml.j2 +++ b/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/templates/coredns.yaml.j2 @@ -26,7 +26,9 @@ data: forward . /etc/resolv.conf { policy sequential } - cache 30 + cache 30 { + denial 9984 5 + } loop reload loadbalance diff --git a/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/templates/k8s-v1.18.1/calico-cni.yaml.j2 b/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/templates/k8s-v1.18.1/calico-cni.yaml.j2 index a5e88706d..5d0fd4eb4 100644 --- a/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/templates/k8s-v1.18.1/calico-cni.yaml.j2 +++ b/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/templates/k8s-v1.18.1/calico-cni.yaml.j2 @@ -730,7 +730,6 @@ spec: privileged: true resources: requests: - cpu: 150m livenessProbe: exec: command: @@ -747,13 +746,6 @@ spec: - -felix-ready - -bird-ready periodSeconds: 10 - {% if cluster_network_ipv4 -%} - # Disable reverse path filter for tunl0 interface - lifecycle: - postStart: - exec: - command: ["/bin/sh", "-c", "sysctl -w net.ipv4.conf.tunl0.rp_filter=0"] - {% endif -%} volumeMounts: - mountPath: /lib/modules name: lib-modules diff --git a/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/templates/k8s-v1.18.1/multus-cni.yaml.j2 b/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/templates/k8s-v1.18.1/multus-cni.yaml.j2 index d23d32344..8281aaec8 100644 --- a/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/templates/k8s-v1.18.1/multus-cni.yaml.j2 +++ b/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/templates/k8s-v1.18.1/multus-cni.yaml.j2 @@ -208,10 +208,8 @@ spec: /entrypoint.sh --multus-conf-file=/usr/src/multus-cni/images/05-multus.conf resources: requests: - cpu: "50m" memory: "50Mi" limits: - cpu: "100m" memory: "50Mi" securityContext: privileged: true diff --git a/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/templates/k8s-v1.18.1/sriov-cni.yaml.j2 b/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/templates/k8s-v1.18.1/sriov-cni.yaml.j2 index 23c5b8b91..01a1826a0 100644 --- a/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/templates/k8s-v1.18.1/sriov-cni.yaml.j2 +++ b/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/templates/k8s-v1.18.1/sriov-cni.yaml.j2 @@ -49,10 +49,8 @@ spec: privileged: true resources: requests: - cpu: "50m" memory: "50Mi" limits: - cpu: "100m" memory: "50Mi" volumeMounts: - name: cnibin diff --git a/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/templates/kubelet.conf.j2 b/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/templates/kubelet.conf.j2 index 80e9a4450..ab17d42e8 100644 --- a/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/templates/kubelet.conf.j2 +++ b/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/templates/kubelet.conf.j2 @@ -1,2 +1,12 @@ # Overrides config file for kubelet -KUBELET_EXTRA_ARGS=--cni-bin-dir={{ kubelet_cni_bin_dir }} --node-ip={{ node_ip }} --volume-plugin-dir={{ kubelet_vol_plugin_dir }} +# Temporary set pod-max-pids until unlock. During the restore procedure, +# between restore_platform playbook and unlock, there is a window in +# which user app pods can misbehave. Chose a value over the 20, which is +# the number of processes used by platform pods. Chose a value over 100, +# to allow room for platform pods. The user apps are in fact +# intended to run after the unlock anyway. During the unlock the correct +# value is used, the one generated by sysinv as hieradata. +KUBELET_EXTRA_ARGS=--cni-bin-dir={{ kubelet_cni_bin_dir }} \ +--node-ip={{ node_ip }} \ +--volume-plugin-dir={{ kubelet_vol_plugin_dir }} \ +--pod-max-pids 500 diff --git a/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/vars/main.yml b/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/vars/main.yml index 9667a270e..00ad4f0d8 100644 --- a/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/vars/main.yml +++ b/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/vars/main.yml @@ -30,10 +30,5 @@ kubeadm_pki_dir: /etc/kubernetes/pki etcd_tmp_dir: /opt/backups/etcd_tmp_dir psp_file: /usr/share/ansible/stx-ansible/playbooks/roles/bootstrap/bringup-essential-services/files/psp-policies.yaml -armada_domain: armada-domain -armada_user: armada -armada_namespace: armada -armada_secret_name: default-registry-key - # Kubernetes api server encryption provider configuration file encryption_provider_config: /etc/kubernetes/encryption-provider.yaml diff --git a/playbookconfig/src/playbooks/roles/bootstrap/persist-config/files/populate_initial_config.py b/playbookconfig/src/playbooks/roles/bootstrap/persist-config/files/populate_initial_config.py index 431631143..ddeb84a5e 100644 --- a/playbookconfig/src/playbooks/roles/bootstrap/persist-config/files/populate_initial_config.py +++ b/playbookconfig/src/playbooks/roles/bootstrap/persist-config/files/populate_initial_config.py @@ -157,8 +157,7 @@ def populate_system_config(client): dc_role = None if is_subcloud(): - capabilities.update({'shared_services': "['identity', ]", - 'region_config': True}) + capabilities.update({'region_config': True}) values = { 'system_mode': CONF.get('BOOTSTRAP_CONFIG', 'SYSTEM_MODE'), @@ -732,8 +731,9 @@ def populate_docker_config(client): # previous run. parameters = client.sysinv.service_parameter.list() for parameter in parameters: - if (parameter.name == - sysinv_constants.SERVICE_PARAM_NAME_KUBERNETES_API_SAN_LIST): + if (parameter.name in [ + sysinv_constants.SERVICE_PARAM_NAME_KUBERNETES_API_SAN_LIST, + sysinv_constants.SERVICE_PARAM_NAME_KUBERNETES_POD_MAX_PIDS]): client.sysinv.service_parameter.delete(parameter.uuid) apiserver_san_list = CONF.get('BOOTSTRAP_CONFIG', 'APISERVER_SANS') @@ -753,9 +753,27 @@ def populate_docker_config(client): 'parameters': parameters } - print("Populating/Updating kubernetes config...") + print("Populating/Updating kubernetes san list...") client.sysinv.service_parameter.create(**values) - print("Kubernetes config completed.") + + parameters = { + sysinv_constants.SERVICE_PARAM_NAME_KUBERNETES_POD_MAX_PIDS: + str(sysinv_constants.SERVICE_PARAM_KUBERNETES_POD_MAX_PIDS_DEFAULT) + } + + values = { + 'service': sysinv_constants.SERVICE_TYPE_KUBERNETES, + 'section': + sysinv_constants.SERVICE_PARAM_SECTION_KUBERNETES_CONFIG, + 'personality': None, + 'resource': None, + 'parameters': parameters + } + + print("Populating/Updating kubernetes config...") + client.sysinv.service_parameter.create(**values) + + print("Kubernetes config completed.") parameters = client.sysinv.service_parameter.list() diff --git a/playbookconfig/src/playbooks/roles/bootstrap/persist-config/tasks/main.yml b/playbookconfig/src/playbooks/roles/bootstrap/persist-config/tasks/main.yml index 857eaf7f2..8840d702e 100644 --- a/playbookconfig/src/playbooks/roles/bootstrap/persist-config/tasks/main.yml +++ b/playbookconfig/src/playbooks/roles/bootstrap/persist-config/tasks/main.yml @@ -68,6 +68,29 @@ - include: one_time_config_tasks.yml when: not initial_db_populated +- name: Find etcd certs files + find: + paths: "/etc/etcd" + patterns: + - '*.crt' + - '*.key' + register: etcd_certs_find_output + +- name: Copy etcd certificates to etcd certs directory + copy: + src: "/etc/etcd/{{ item }}" + dest: "{{ etcd_certs_dir }}/{{ item }}" + remote_src: yes + force: yes + with_items: + - "etcd-server.crt" + - "etcd-server.key" + - "ca.crt" + - "ca.key" + - "etcd-client.crt" + - "etcd-client.key" + when: etcd_certs_find_output.matched != 0 + # Banner customization is not part of one_time_config_task.yml as the user may # choose to change their banner look and feel and replay. - name: Check if custom banner exists @@ -395,3 +418,20 @@ when: search_result.rc == 0 when: mode == 'restore' + +- name: Create source and target device_images bind directories + file: + path: "{{ item }}" + state: directory + owner: www + group: root + mode: 0755 + with_items: + - "{{ source_device_image_bind_dir }}" + - "{{ target_device_image_bind_dir }}" + +- name: Bind mount on {{ target_device_image_bind_dir }} + # Due to deficiency of mount module, resort to command for now + command: mount -o bind -t ext4 {{ source_device_image_bind_dir }} {{ target_device_image_bind_dir }} + args: + warn: false diff --git a/playbookconfig/src/playbooks/roles/bootstrap/persist-config/tasks/one_time_config_tasks.yml b/playbookconfig/src/playbooks/roles/bootstrap/persist-config/tasks/one_time_config_tasks.yml index f9170c792..bf799e821 100644 --- a/playbookconfig/src/playbooks/roles/bootstrap/persist-config/tasks/one_time_config_tasks.yml +++ b/playbookconfig/src/playbooks/roles/bootstrap/persist-config/tasks/one_time_config_tasks.yml @@ -38,13 +38,16 @@ - "{{ postgres_config_dir }}" - "{{ pxe_config_dir }}" -- name: Ensure SSL CA certificates directory exists +- name: Ensure SSL CA and etcd certs directories exist file: - path: "{{ ssl_ca_certs_dir }}" + path: "{{ item }}" state: directory owner: root group: root mode: 0700 + with_items: + - "{{ ssl_ca_certs_dir }}" + - "{{ etcd_certs_dir }}" - name: Get list of Postgres conf files find: @@ -109,54 +112,9 @@ - "grubby --update-kernel={{ grub_kernel_output.stdout_lines[0] }} --args='{{ default_security_feature }}'" - "grubby --efi --update-kernel={{ grub_kernel_output.stdout_lines[0] }} --args='{{ default_security_feature }}'" -- block: - - name: Resize logical volumes - include: resize_logical_volume.yml - with_items: - - { size: "10G", lv: "/dev/cgts-vg/pgsql-lv" } - - { size: "10G", lv: "/dev/cgts-vg/platform-lv" } - - { size: "5G", lv: "/dev/cgts-vg/etcd-lv" } - - { size: "16G", lv: "/dev/cgts-vg/dockerdistribution-lv" } - - { size: "20G", lv: "/dev/cgts-vg/backup-lv" } - - { size: "30G", lv: "/dev/cgts-vg/docker-lv" } - - { size: "10G", lv: "/dev/cgts-vg/kubelet-lv" } - - { size: "16G", lv: "/dev/cgts-vg/scratch-lv" } +- name: Gather drbd status + command: drbd-overview + register: drbd_result - - name: Resize filesystems (default) - command: "{{ item }}" - failed_when: false - with_items: - - resize2fs /dev/cgts-vg/backup-lv - - xfs_growfs /dev/cgts-vg/docker-lv - - resize2fs /dev/cgts-vg/kubelet-lv - - resize2fs /dev/cgts-vg/scratch-lv - - - name: Resize drbd filesystems (default) - include: resize_drbd.yml - with_items: - - { resource: "drbd-pgsql/0", device: "drbd0" } - - { resource: "drbd-platform/0", device: "drbd2" } - - { resource: "drbd-etcd/0", device: "drbd7" } - - { resource: "drbd-dockerdistribution/0", device: "drbd8" } - - when: root_disk_size|int >= minimum_small_root_disk_size - -- block: - - name: Further resize if root disk size is larger than 240G - include: resize_logical_volume.yml - with_items: - - { size: "20G", lv: "/dev/cgts-vg/pgsql-lv" } - - { size: "25G", lv: "/dev/cgts-vg/backup-lv" } - - - name: Resize filesystems - command: "{{ item }}" - failed_when: false - with_items: - - resize2fs /dev/cgts-vg/backup-lv - - - name: Further resize drbd filesystems - include: resize_drbd.yml - with_items: - - { resource: "drbd-pgsql/0", device: "drbd0" } - - when: root_disk_size|int > small_root_disk_size +- name: Record drbd status + debug: var=drbd_result.stdout_lines diff --git a/playbookconfig/src/playbooks/roles/bootstrap/persist-config/tasks/resize_drbd.yml b/playbookconfig/src/playbooks/roles/bootstrap/persist-config/tasks/resize_drbd.yml deleted file mode 100644 index 99e125703..000000000 --- a/playbookconfig/src/playbooks/roles/bootstrap/persist-config/tasks/resize_drbd.yml +++ /dev/null @@ -1,28 +0,0 @@ ---- -# -# Copyright (c) 2020 Wind River Systems, Inc. -# -# SPDX-License-Identifier: Apache-2.0 -# -# SUB-TASKS DESCRIPTION: -# - Resize drbd filesytems -# - After issuing the drbdadm resize, a pause is also required prior to -# performing the resize2fs operation. -# -# There does not appear to be much observability into drbdadm resize -# at /proc/drbd or drbd-overview, so a pause is introduced. The pause needed -# to be at least 1 second as per observations in virtual and hardware labs, -# AIO and Standard controllers. -# - -- name: Resize drbd resource {{ item.resource }} - command: "drbdadm -- --assume-peer-has-space resize {{ item.resource }}" - -# Pause for 10 seconds to mimic this workaround. When moving to drbd9 this can be removed -# https://github.com/LINBIT/drbd-utils/commit/b12e02eb8ac83aeb0a2165810d91dc3f5d20c83f -- name: Pause 10 seconds for drbd resize - pause: - seconds: 10 - -- name: Resize filesystem {{ item.device }} - command: "resize2fs /dev/{{ item.device }}" diff --git a/playbookconfig/src/playbooks/roles/bootstrap/persist-config/tasks/resize_logical_volume.yml b/playbookconfig/src/playbooks/roles/bootstrap/persist-config/tasks/resize_logical_volume.yml deleted file mode 100644 index ad6651018..000000000 --- a/playbookconfig/src/playbooks/roles/bootstrap/persist-config/tasks/resize_logical_volume.yml +++ /dev/null @@ -1,26 +0,0 @@ ---- -# -# Copyright (c) 2020 Wind River Systems, Inc. -# -# SPDX-License-Identifier: Apache-2.0 -# -# SUB-TASKS DESCRIPTION: -# - Resize some filesytems -# - Remove stale data at the end of LV block device -# - -- name: Resize logical volume {{ item.lv }} - command: "lvextend -L{{ item.size }} {{ item.lv }}" - register: res - failed_when: false - -- name: Fail if file system resizing failed for a reason other than it has been done already - fail: - msg: "{{ item.lv }} resize failed for the following reason: {{ res.stderr }}." - when: res.rc !=0 and res.stderr is not search('matches existing size') and - res.stderr is not search('not larger than existing size') - -- name: Remove stale data at the end of LV block device - shell: "seek_end=$(($(blockdev --getsz {{ item.lv }})/2048 - 10)); - dd if=/dev/zero of={{ item.lv }} bs=1M count=10 seek=${seek_end} " - when: res.rc == 0 diff --git a/playbookconfig/src/playbooks/roles/bootstrap/persist-config/tasks/shutdown_services.yml b/playbookconfig/src/playbooks/roles/bootstrap/persist-config/tasks/shutdown_services.yml index bd1db8275..c4d9a8458 100644 --- a/playbookconfig/src/playbooks/roles/bootstrap/persist-config/tasks/shutdown_services.yml +++ b/playbookconfig/src/playbooks/roles/bootstrap/persist-config/tasks/shutdown_services.yml @@ -54,16 +54,16 @@ warn: false - block: - - name: Restart etcd + - name: Stop etcd systemd: name: etcd - state: restarted + state: stopped rescue: - - name: Etcd failed to restart, try one more time + - name: Etcd failed to stop, try one more time systemd: name: etcd - state: restarted + state: stopped - block: # Revert configuration to loopback interface diff --git a/playbookconfig/src/playbooks/roles/bootstrap/persist-config/tasks/update_sysinv_database.yml b/playbookconfig/src/playbooks/roles/bootstrap/persist-config/tasks/update_sysinv_database.yml index 3284e9395..0654e793f 100644 --- a/playbookconfig/src/playbooks/roles/bootstrap/persist-config/tasks/update_sysinv_database.yml +++ b/playbookconfig/src/playbooks/roles/bootstrap/persist-config/tasks/update_sysinv_database.yml @@ -98,22 +98,29 @@ msg: Timeout waiting for system controller database configuration to complete when: distributed_cloud_role == 'systemcontroller' -- block: - - name: Update sysinv with new region name - replace: - path: /etc/sysinv/sysinv.conf - regexp: "region_name=.*$" - replace: "region_name={{ region_name }}" - - name: Restart sysinv-agent and sysinv-api to pick up sysinv.conf update - command: "{{ item }}" - with_items: - - /etc/init.d/sysinv-agent restart - - /usr/lib/ocf/resource.d/platform/sysinv-api reload - environment: - OCF_ROOT: "/usr/lib/ocf" +- name: Update sysinv with new region name + replace: + path: /etc/sysinv/sysinv.conf + regexp: "region_name=.*$" + replace: "region_name={{ region_name }}" when: distributed_cloud_role == 'subcloud' +- name: Restart sysinv-agent and sysinv-api to pick up sysinv.conf update + command: "{{ item }}" + with_items: + - /etc/init.d/sysinv-agent restart + - /usr/lib/ocf/resource.d/platform/sysinv-api reload + environment: + OCF_ROOT: "/usr/lib/ocf" + +- name: Wait for sysinv inventory + wait_for: + path: /var/run/sysinv/.sysinv_reported + state: present + timeout: 600 + msg: Timeout waiting for system inventory to complete + - name: Set flag to mark the initial db population completed milestone file: path: "{{ initial_db_populated_flag }}" diff --git a/playbookconfig/src/playbooks/roles/bootstrap/persist-config/vars/main.yml b/playbookconfig/src/playbooks/roles/bootstrap/persist-config/vars/main.yml index 5a50b594e..74a379940 100644 --- a/playbookconfig/src/playbooks/roles/bootstrap/persist-config/vars/main.yml +++ b/playbookconfig/src/playbooks/roles/bootstrap/persist-config/vars/main.yml @@ -6,7 +6,6 @@ default_security_feature: "nopti nospectre_v2 nospectre_v1" temp_ssl_ca: "/tmp/ca-cert.pem" ssl_ca_complete_flag: /etc/platform/.ssl_ca_complete region_config: no -# The default disk size thresholds must align with the ones in -# config/.../sysinv/common/constants.py -small_root_disk_size: 240 -minimum_small_root_disk_size: 181 +source_device_image_bind_dir: /opt/platform/device_images +target_device_image_bind_dir: /www/pages/device_images +etcd_certs_dir: "{{ config_permdir + '/etcd' }}" diff --git a/playbookconfig/src/playbooks/roles/bootstrap/prepare-env/tasks/main.yml b/playbookconfig/src/playbooks/roles/bootstrap/prepare-env/tasks/main.yml index ff18577ca..84cead085 100644 --- a/playbookconfig/src/playbooks/roles/bootstrap/prepare-env/tasks/main.yml +++ b/playbookconfig/src/playbooks/roles/bootstrap/prepare-env/tasks/main.yml @@ -108,6 +108,10 @@ docker.elastic.co: url: docker.elastic.co +- name: Save the list of user defined registry keys + set_fact: + user_defined_registry_keys: "{{ docker_registries.keys() | list }}" + - name: Merge user and default registries dictionaries set_fact: docker_registries: "{{ default_docker_registries | combine(docker_registries) }}" diff --git a/playbookconfig/src/playbooks/roles/bootstrap/prepare-env/tasks/restore_prep_tasks.yml b/playbookconfig/src/playbooks/roles/bootstrap/prepare-env/tasks/restore_prep_tasks.yml index ef03bf1c0..f726c9f14 100644 --- a/playbookconfig/src/playbooks/roles/bootstrap/prepare-env/tasks/restore_prep_tasks.yml +++ b/playbookconfig/src/playbooks/roles/bootstrap/prepare-env/tasks/restore_prep_tasks.yml @@ -62,7 +62,7 @@ */{{ temp_ssl_ca_file }}* args: warn: false - ignore_errors: yes + failed_when: false - name: Check that ssl_ca certificate exists stat: @@ -80,7 +80,7 @@ --overwrite --transform='s,.*/,,' '{{ archive_config_permdir }}/enabled_kube_plugins' args: warn: false - ignore_errors: yes + failed_when: false - name: Check that enabled_kube_plugins exists stat: diff --git a/playbookconfig/src/playbooks/roles/bootstrap/validate-config/tasks/main.yml b/playbookconfig/src/playbooks/roles/bootstrap/validate-config/tasks/main.yml index b3828c4eb..40b7862f8 100644 --- a/playbookconfig/src/playbooks/roles/bootstrap/validate-config/tasks/main.yml +++ b/playbookconfig/src/playbooks/roles/bootstrap/validate-config/tasks/main.yml @@ -8,6 +8,12 @@ # This role is to validate and save host (non secure) config. # +- name: Fail if any of the configured registry keys is unknown + fail: + msg: "Unknown registry key: '{{ item }}'. Acceptable keys are {{ known_registry_keys|join(', ') }} " + when: not item in known_registry_keys + with_items: "{{ user_defined_registry_keys }}" + # error check the password section of docker registries # check password parameters before trying to hide the password # we need to do that here as opposed to with the other docker registry diff --git a/playbookconfig/src/playbooks/roles/bootstrap/validate-config/tasks/purge_application.yml b/playbookconfig/src/playbooks/roles/bootstrap/validate-config/tasks/purge_application.yml index b85281cb1..2294c61c7 100644 --- a/playbookconfig/src/playbooks/roles/bootstrap/validate-config/tasks/purge_application.yml +++ b/playbookconfig/src/playbooks/roles/bootstrap/validate-config/tasks/purge_application.yml @@ -20,7 +20,7 @@ - block: - name: Remove application - shell: "source /etc/platform/openrc; system application-remove {{ application }}" + shell: "source /etc/platform/openrc; system application-remove --force {{ application }}" - name: Wait until application is in the uploaded state shell: "source /etc/platform/openrc; system application-show {{ application }} @@ -34,4 +34,4 @@ # expect applications to be in the uploaded state now # because of above check leaving 2 possible states and above code eliminating "applied" - name: Delete application - shell: "source /etc/platform/openrc; system application-delete {{ application }}" + shell: "source /etc/platform/openrc; system application-delete --force {{ application }}" diff --git a/playbookconfig/src/playbooks/roles/bootstrap/validate-config/vars/main.yml b/playbookconfig/src/playbooks/roles/bootstrap/validate-config/vars/main.yml index 06d23e0c8..9bd63d8c7 100644 --- a/playbookconfig/src/playbooks/roles/bootstrap/validate-config/vars/main.yml +++ b/playbookconfig/src/playbooks/roles/bootstrap/validate-config/vars/main.yml @@ -15,3 +15,11 @@ system_controller_floating_address: none system_controller_subnet: none system_controller_oam_floating_address: none system_controller_oam_subnet: none + +known_registry_keys: + - quay.io + - gcr.io + - k8s.gcr.io + - docker.io + - docker.elastic.co + - defaults diff --git a/playbookconfig/src/playbooks/roles/common/armada-helm/tasks/main.yml b/playbookconfig/src/playbooks/roles/common/armada-helm/tasks/main.yml new file mode 100644 index 000000000..8cf37d8fe --- /dev/null +++ b/playbookconfig/src/playbooks/roles/common/armada-helm/tasks/main.yml @@ -0,0 +1,164 @@ +--- +# +# Copyright (c) 2020 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +# ROLE DESCRIPTION: +# This role is to perform tasks that configure and launch containerized Armada. +# + +# For the IPv6 system, the CIDR address should be changed to IPv6 to allow +# users from IPv6 address to access DB (ie. tiller running in container) +- block: + - name: Update postgresql pg_hba.conf with IPv6 address if system is IPv6 + command: "{{ item }}" + args: + warn: false + with_items: + - "sed -i -e 's|0.0.0.0/0|::0/0|g' /etc/postgresql/pg_hba.conf" + - "sed -i -e 's|0.0.0.0/32|::0/128|g' /etc/postgresql/pg_hba.conf" + + - name: Restart postgresql + systemd: + name: postgresql + state: restarted + when: (mode != 'upgrade_k8s_armada_helm' and + ipv6_addressing is defined and ipv6_addressing != False) + +- name: Get Helm SQL database password + vars: + script_content: | + import keyring + password = keyring.get_password("helmv2", "database") + if not password: + raise Exception("Helm database password not found.") + print password + shell: "{{ script_content }}" + args: + executable: /usr/bin/python + register: helm_sql_database_password + +- name: Set Armada overrides + set_fact: + helm_charts_url: "http://{{ controller_floating_address | ipwrap }}:{{ helm_repo_port }}/helm_charts" + helm_sql_connection_address: "postgresql://admin-helmv2:{{ helm_sql_database_password.stdout }}@{{ + controller_floating_address | ipwrap }}:5432/helmv2?sslmode=disable" + helm_sql_endpoint_ip: "{{ controller_floating_address | ipwrap }}" + +- name: Configure and launch containerized Armada + block: + - name: Add Helm repos + command: /sbin/helm repo add "{{ item }}" "http://127.0.0.1:{{ helm_repo_port }}/helm_charts/{{ item }}" --debug + with_items: + - "{{ helm_repo_name_apps }}" + - "{{ helm_repo_name_platform }}" + + - name: Update Helm repos + command: /sbin/helm repo update --debug + + - name: Create Armada overrides + template: + src: "roles/common/armada-helm/templates/armada-overrides.yaml.j2" + dest: "/tmp/armada-overrides.yaml" + become_user: sysadmin + + - name: Create namespace for Armada + command: > + kubectl create namespace {{ armada_namespace }} + failed_when: false + register: create_ns + + - name: Fail if creating namespace fails + fail: + msg: "Failed to create {{ armada_namespace }} namespace. Error: {{ create_ns.stderr }}" + when: create_ns.rc is defined and create_ns.rc !=0 and + create_ns.stderr is not search('AlreadyExists') + + # Retrieve local registry credentials if it's unknown + - block: + - name: Get local registry credentials + vars: + script_content: | + import keyring + password = keyring.get_password("sysinv", "services") + if not password: + raise Exception("Local registry password not found.") + print dict(username='sysinv', password=str(password)) + shell: "{{ script_content }}" + args: + executable: /usr/bin/python + register: local_registry_credentials_output + + - set_fact: + local_registry_credentials: "{{ local_registry_credentials_output.stdout }}" + local_registry: "registry.local:9001" + when: local_registry_credentials is not defined + + - name: Check if secret exists + command: kubectl -n {{ armada_namespace }} get secret {{ armada_secret_name }} + failed_when: false + register: armada_get_secret + + - name: Create secret if it doesn't exist + command: >- + kubectl -n {{ armada_namespace }} create secret docker-registry {{ armada_secret_name }} + --docker-server={{ local_registry }} + --docker-username={{ local_registry_credentials['username'] }} + --docker-password={{ local_registry_credentials['password'] }} + when: armada_get_secret.rc is defined and armada_get_secret.rc !=0 and + armada_get_secret.stderr is search('NotFound') + + # Configure sane node label values that work with armada node selector + - name: Create Armada node label + command: > + kubectl label node controller-0 armada=enabled --overwrite=true + + # To prevent helm-upload requiring sudo and a tty for password, + # become the intended www user. + - name: Upload Armada charts + become_user: www + command: > + /usr/local/sbin/helm-upload stx-platform /opt/extracharts/armada-0.1.0.tgz + + - name: Update info of available charts from chart repos + command: > + /sbin/helm repo update --debug + + - name: Check if Armada revisions exists + command: >- + /sbin/helm status armada + --namespace {{ armada_namespace }} + failed_when: false + register: armada_check + + - name: Uninstall Armada revisions + command: >- + /sbin/helm uninstall + --namespace {{ armada_namespace }} armada + when: armada_check.rc == 0 + + - name: Launch Armada with Helm v3 + command: >- + /sbin/helm upgrade --install armada stx-platform/armada + --namespace {{ armada_namespace }} + --values /tmp/armada-overrides.yaml + --debug + + # For the armada upgrade during system upgrade, wait until + # armada pod is in a ready state before marking it as successful. + # This is needed as helm releases migration should be done + # after tiller is running with SQL backend + - name: Launch Armada with Helm v3 (Upgrade armada) + command: >- + /sbin/helm upgrade --install armada stx-platform/armada + --namespace {{ armada_namespace }} + --values /tmp/armada-overrides.yaml + --wait + --debug + when: mode == "upgrade_k8s_armada_helm" + + become_user: sysadmin + environment: + KUBECONFIG: /etc/kubernetes/admin.conf + HOME: /home/sysadmin diff --git a/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/templates/armada-overrides.yaml.j2 b/playbookconfig/src/playbooks/roles/common/armada-helm/templates/armada-overrides.yaml.j2 similarity index 73% rename from playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/templates/armada-overrides.yaml.j2 rename to playbookconfig/src/playbooks/roles/common/armada-helm/templates/armada-overrides.yaml.j2 index ce9d863c2..f6b54fbd4 100644 --- a/playbookconfig/src/playbooks/roles/bootstrap/bringup-essential-services/templates/armada-overrides.yaml.j2 +++ b/playbookconfig/src/playbooks/roles/common/armada-helm/templates/armada-overrides.yaml.j2 @@ -20,6 +20,13 @@ pod: type: none replicas: api: 1 + # When updating armada, the old pod is terminated after the new pod is ready. + # This allows "helm install/upgrade" with --wait to work when replicas is 1. + lifecycle: + upgrades: + deployments: + rolling_update: + max_unavailable: 0 probes: armada: api: @@ -37,9 +44,9 @@ pod: liveness: enabled: true params: - failureThreshold: 3 + failureThreshold: 2 initialDelaySeconds: 1 - periodSeconds: 10 + periodSeconds: 4 successThreshold: 1 timeoutSeconds: 1 readiness: @@ -50,25 +57,6 @@ pod: periodSeconds: 10 successThreshold: 1 timeoutSeconds: 1 -endpoints: - identity: - auth: - armada: - password: "{{ armada_default_password }}" - project_domain_name: default - project_name: default - region_name: RegionOne - user_domain_name: default - username: admin - armada: - password: "{{ armada_password }}" - project_domain_name: {{ armada_domain }} - project_name: service - region_name: RegionOne - user_domain_name: {{ armada_domain }} - username: "{{ armada_user }}" - host_fqdn_override: - default: "{{ controller_floating_address }}" dependencies: static: api: @@ -85,9 +73,14 @@ manifests: job_ks_service: false job_ks_endpoints: false job_ks_user: false + secret_keystone: false conf: tiller: charts_url: {{ helm_charts_url }} repo_names: - {{ helm_repo_name_apps }} - {{ helm_repo_name_platform }} + storage: sql + sql_dialect: postgres + sql_connection: {{ helm_sql_connection_address }} + sql_endpoint_ip: {{ helm_sql_endpoint_ip }} diff --git a/playbookconfig/src/playbooks/roles/common/armada-helm/vars/main.yml b/playbookconfig/src/playbooks/roles/common/armada-helm/vars/main.yml new file mode 100644 index 000000000..7d558a79c --- /dev/null +++ b/playbookconfig/src/playbooks/roles/common/armada-helm/vars/main.yml @@ -0,0 +1,3 @@ +--- +armada_namespace: armada +armada_secret_name: default-registry-key diff --git a/playbookconfig/src/playbooks/roles/common/create-etcd-certs/tasks/main.yml b/playbookconfig/src/playbooks/roles/common/create-etcd-certs/tasks/main.yml new file mode 100644 index 000000000..23a3c7d16 --- /dev/null +++ b/playbookconfig/src/playbooks/roles/common/create-etcd-certs/tasks/main.yml @@ -0,0 +1,129 @@ +--- +# +# Copyright (c) 2020 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +# Copyright (c) 2021 Wind River Systems, Inc. +# +# ROLE DESCRIPTION: +# Create etcd server and client certs and key. + +- name: Generate private key for etcd server and client + openssl_privatekey: + path: "/etc/etcd/{{ item }}.key" + type: RSA + size: 4096 + state: present + force: true + with_items: + - "etcd-server" + - "apiserver-etcd-client" + - "etcd-client" + +- name: Generate CSRs for etcd server and client + openssl_csr: + path: "/etc/etcd/{{ item }}.csr" + privatekey_path: "/etc/etcd/{{ item }}.key" + common_name: "{{ item }}" + key_usage: + - digitalSignature + extended_key_usage: + - serverAuth + - clientAuth + subject_alt_name: + - IP:{{ cluster_floating_address }} + - IP:127.0.0.1 + force: true + with_items: + - "etcd-server" + - "apiserver-etcd-client" + +- name: Generate CSRs for etcd root client + openssl_csr: + path: "/etc/etcd/{{ item }}.csr" + privatekey_path: "/etc/etcd/{{ item }}.key" + common_name: "root" + key_usage: + - digitalSignature + extended_key_usage: + - serverAuth + - clientAuth + force: true + with_items: + - "etcd-client" + +- name: Check if CA exists + stat: + path: /etc/kubernetes/pki/ca.crt + register: ca_file + +- name: Copy existed CA + copy: + src: "/etc/kubernetes/pki/{{ item }}" + dest: "/etc/etcd/{{ item }}" + remote_src: yes + force: yes + with_items: + - "ca.crt" + - "ca.key" + when: ca_file.stat.exists + +- name: copy user specified CA + copy: + src: "{{ item }}" + dest: "/etc/etcd/{{ item }}" + remote_src: yes + force: yes + with_items: + - "{{k8s_root_ca_cert}}" + - "{{k8s_root_ca_key}}" + when: (k8s_root_ca_cert) + +- block: + - name: Generate private key for kubernetes-ca + # Reuse this kubernetes-ca for the etcd-ca, + # will copy to /etc/kubernetes/pki later + openssl_privatekey: + path: "/etc/etcd/ca.key" + type: RSA + size: 4096 + state: present + force: true + + - name: Generate CSR for kubernetes-ca + openssl_csr: + path: "/etc/etcd/ca.csr" + privatekey_path: "/etc/etcd/ca.key" + common_name: kubernetes + basic_constraints: + - CA:true + - pathlen:1 + basic_constraints_critical: True + key_usage: + - keyCertSign + - digitalSignature + force: true + + - name: Generate self-signed CA certificate + openssl_certificate: + path: "/etc/etcd/ca.crt" + privatekey_path: "/etc/etcd/ca.key" + csr_path: "/etc/etcd/ca.csr" + provider: selfsigned + force: true + + when: not ca_file.stat.exists and k8s_root_ca_cert == '' + +- name: Generate certs signed with kubernetes CA certificate" + openssl_certificate: + path: "/etc/etcd/{{ item }}.crt" + csr_path: "/etc/etcd/{{ item }}.csr" + ownca_path: "/etc/etcd/ca.crt" + ownca_privatekey_path: "/etc/etcd/ca.key" + provider: ownca + force: true + with_items: + - "etcd-server" + - "apiserver-etcd-client" + - "etcd-client" diff --git a/playbookconfig/src/playbooks/roles/common/files/kubeadm.yaml.erb b/playbookconfig/src/playbooks/roles/common/files/kubeadm.yaml.erb index ae26dcb55..fb82dab8e 100644 --- a/playbookconfig/src/playbooks/roles/common/files/kubeadm.yaml.erb +++ b/playbookconfig/src/playbooks/roles/common/files/kubeadm.yaml.erb @@ -4,6 +4,10 @@ localAPIEndpoint: advertiseAddress: <%= @apiserver_advertise_address %> nodeRegistration: criSocket: "/var/run/containerd/containerd.sock" + # configure kubeadm to not apply the default + # taint "node-role.kubernetes.io/master:NoSchedule" + # to control plane nodes + taints: [] --- apiVersion: kubeadm.k8s.io/v1beta2 kind: ClusterConfiguration @@ -40,6 +44,9 @@ etcd: external: endpoints: - <%= @etcd_endpoint %> + caFile: /etc/kubernetes/pki/ca.crt + certFile: /etc/kubernetes/pki/apiserver-etcd-client.crt + keyFile: /etc/kubernetes/pki/apiserver-etcd-client.key imageRepository: "registry.local:9001/k8s.gcr.io" kubernetesVersion: v1.18.1 networking: diff --git a/playbookconfig/src/playbooks/roles/common/host-lock/tasks/main.yml b/playbookconfig/src/playbooks/roles/common/host-lock/tasks/main.yml new file mode 100644 index 000000000..2fd8f8a31 --- /dev/null +++ b/playbookconfig/src/playbooks/roles/common/host-lock/tasks/main.yml @@ -0,0 +1,28 @@ +--- +# +# Copyright (c) 2021 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +- name: Query administrative state + shell: source /etc/platform/openrc; system host-show {{ target_host }} --column administrative --format value + register: administrative_state + +- block: + - name: Lock host + shell: source /etc/platform/openrc; system host-lock {{ target_host }} + retries: 10 + delay: 30 + register: result + until: result.rc == 0 + + - name: Wait for lock + shell: source /etc/platform/openrc; system host-show {{ target_host }} --column administrative --format value + register: check_lock + failed_when: false + retries: 30 + delay: 10 + until: check_lock.stdout == "locked" + + when: administrative_state.stdout != "locked" diff --git a/playbookconfig/src/playbooks/roles/common/host-unlock/tasks/main.yml b/playbookconfig/src/playbooks/roles/common/host-unlock/tasks/main.yml new file mode 100644 index 000000000..842dd1ca2 --- /dev/null +++ b/playbookconfig/src/playbooks/roles/common/host-unlock/tasks/main.yml @@ -0,0 +1,13 @@ +--- +# +# Copyright (c) 2021 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +- name: Unlock host + shell: source /etc/platform/openrc; system host-unlock {{ target_host }} + retries: 10 + delay: 30 + register: result + until: result.rc == 0 diff --git a/playbookconfig/src/playbooks/roles/common/load-images-information/tasks/main.yml b/playbookconfig/src/playbooks/roles/common/load-images-information/tasks/main.yml index 946efaa6d..a3d204103 100644 --- a/playbookconfig/src/playbooks/roles/common/load-images-information/tasks/main.yml +++ b/playbookconfig/src/playbooks/roles/common/load-images-information/tasks/main.yml @@ -1,6 +1,6 @@ --- # -# Copyright (c) 2020 Wind River Systems, Inc. +# Copyright (c) 2020-2021 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -24,11 +24,70 @@ - set_fact: kubernetes_images: "{{ kubernetes_images_output.stdout_lines }}" - - include_vars: + - name: Read in system images list + include_vars: dir: "vars/k8s-{{ kubernetes_version }}" files_matching: "system-images.yml" - - name: Get the list of platform images + # Optional system images can be addded in {{ additional_system_images_conf_file }} + # under {{ additional_system_images_conf_path }}. + # Additional static images can be introduced in this file, e.g. + # additional_static_images_list: + # image1_name: /image1:v1.0 + # image2_name: /image2:v2.0 + # Additional static images required or a DC controller only can also be introduced + # in this file, e.g. + # additional_system_controller_static_images_list: + # image3_name: /image3:v3.0 + # image4_name: /image4:v4.0 + - name: Check if additional image config file exists + stat: + path: "{{ additional_system_images_conf_path + '/' + additional_system_images_conf_file }}" + register: additional_config_file + + - name: Read in additional system images list(s) in localhost + include_vars: + file: "{{ additional_config_file.stat.path }}" + when: additional_config_file.stat.exists and + inventory_hostname == "localhost" + + # When invoking the playbook remotely, the additional config file locates in the remote + # host may not also exist in the control host, fetch it to the control host to prevent + # include_vars failure. + - block: + - name: Create a temporary file on remote + tempfile: + state: file + prefix: "additional_images_" + register: additional_system_images_temp_file + + - name: Fetch the additional images config in case the playbook is executed remotely + fetch: + src: "{{ additional_config_file.stat.path }}" + dest: "{{ additional_system_images_temp_file.path }}" + flat: yes + + - name: Read in additional system images list(s) fetched from remote + include_vars: + file: "{{ additional_system_images_temp_file.path }}" + + - name: Remove the temporary file on remote + file: + path: "{{ additional_system_images_temp_file.path }}" + state: absent + delegate_to: "{{ inventory_hostname }}" + + - name: Remove override temp file on Ansible control host + file: + path: "{{ additional_system_images_temp_file.path }}" + state: absent + delegate_to: localhost + become: no + + when: additional_config_file.stat.exists and + inventory_hostname != "localhost" + + - name: Categorize system images set_fact: networking_images: - "{{ calico_cni_img }}" @@ -45,4 +104,27 @@ - "{{ kubernetes_entrypoint_img }}" storage_images: - "{{ snapshot_controller_img }}" + + - name: Append additional static images if provisioned + set_fact: + static_images: "{{ static_images }} + [ '{{item.value}}' ]" + with_items: "{{ additional_static_images_list | default({})| dict2items }}" + when: additional_static_images_list is defined + + - block: + - name: Append RVMC image for a DC system controller + set_fact: + static_images: "{{ static_images }} + [ '{{ rvmc_img }}' ]" + + - name: Append additional static images for a DC system controller if provisioned + set_fact: + static_images: "{{ static_images }} + [ '{{item.value}}' ]" + with_items: "{{ additional_system_controller_static_images_list | default({})| dict2items }}" + when: additional_system_controller_static_images_list is defined + + when: (distributed_cloud_role is defined and + distributed_cloud_role == 'systemcontroller') or + (check_dc_controller is defined and + check_dc_controller.rc == 0) + when: kubernetes_version is defined diff --git a/playbookconfig/src/playbooks/roles/common/load-images-information/vars/k8s-v1.18.1/system-images.yml b/playbookconfig/src/playbooks/roles/common/load-images-information/vars/k8s-v1.18.1/system-images.yml index 66cf5a9d2..ab5f6c7da 100644 --- a/playbookconfig/src/playbooks/roles/common/load-images-information/vars/k8s-v1.18.1/system-images.yml +++ b/playbookconfig/src/playbooks/roles/common/load-images-information/vars/k8s-v1.18.1/system-images.yml @@ -9,9 +9,10 @@ calico_node_img: quay.io/calico/node:v3.12.0 calico_kube_controllers_img: quay.io/calico/kube-controllers:v3.12.0 calico_flexvol_img: quay.io/calico/pod2daemon-flexvol:v3.12.0 multus_img: docker.io/nfvpe/multus:v3.4 -sriov_cni_img: docker.io/starlingx/k8s-cni-sriov:stx.3.0-v2.2 +sriov_cni_img: docker.io/starlingx/k8s-cni-sriov:stx.5.0-v2.6-7-gb18123d8 sriov_network_device_img: docker.io/starlingx/k8s-plugins-sriov-network-device:stx.4.0-v3.2-16-g4e0302ae # Keep the snapshot-controller image in sync with the one provided at: # cluster/addons/volumesnapshots/volume-snapshot-controller/volume-snapshot-controller-deployment.yaml # in the kubernetes github repo snapshot_controller_img: quay.io/k8scsi/snapshot-controller:v2.0.0-rc2 +rvmc_img: docker.io/starlingx/rvmc:stx.5.0-v1.0.0 diff --git a/playbookconfig/src/playbooks/roles/common/load-images-information/vars/main.yml b/playbookconfig/src/playbooks/roles/common/load-images-information/vars/main.yml new file mode 100644 index 000000000..20ecea7e0 --- /dev/null +++ b/playbookconfig/src/playbooks/roles/common/load-images-information/vars/main.yml @@ -0,0 +1,8 @@ +--- +# +# Copyright (c) 2020 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +additional_system_images_conf_path: /usr/share/additional-config +additional_system_images_conf_file: additional-system-images.yml diff --git a/playbookconfig/src/playbooks/roles/common/push-docker-images/files/download_images.py b/playbookconfig/src/playbooks/roles/common/push-docker-images/files/download_images.py index 99d307fcd..ac18589a9 100644 --- a/playbookconfig/src/playbooks/roles/common/push-docker-images/files/download_images.py +++ b/playbookconfig/src/playbooks/roles/common/push-docker-images/files/download_images.py @@ -87,8 +87,17 @@ def download_an_image(img): # as opae container runs via docker. # TODO: run opae with containerd. if not ('n3000-opae' in target_img): - client.remove_image(target_img) - client.remove_image(local_img) + delete_warn = "WARNING: Image %s was not deleted because it was not " \ + "present into the local docker filesystem" + if client.images(target_img): + client.remove_image(target_img) + else: + print(delete_warn % target_img) + if client.images(local_img): + client.remove_image(local_img) + else: + print(delete_warn % local_img) + return target_img, True except docker.errors.NotFound as e: print(err_msg + str(e)) diff --git a/playbookconfig/src/playbooks/roles/common/push-docker-images/files/push_pull_local_registry.py b/playbookconfig/src/playbooks/roles/common/push-docker-images/files/push_pull_local_registry.py index 96a300c70..41b374e26 100644 --- a/playbookconfig/src/playbooks/roles/common/push-docker-images/files/push_pull_local_registry.py +++ b/playbookconfig/src/playbooks/roles/common/push-docker-images/files/push_pull_local_registry.py @@ -54,7 +54,15 @@ def push_from_filesystem(image): subprocess.check_call(["crictl", "pull", "--creds", auth_str, image]) print("Image %s download succeeded by containerd" % image) - client.remove_image(image) + # Clean up docker images except for n3000-opae + # as opae container runs via docker. + # TODO: run opae with containerd. + if not ('n3000-opae' in image): + if client.images(image): + client.remove_image(image) + else: + print("WARNING: Image %s was not deleted because it " + "was not present into the local docker filesystem" % image) return image, True except docker.errors.APIError as e: print(err_msg + str(e)) diff --git a/playbookconfig/src/playbooks/roles/common/push-docker-images/tasks/main.yml b/playbookconfig/src/playbooks/roles/common/push-docker-images/tasks/main.yml index ad1d9cf92..5d259cbb3 100644 --- a/playbookconfig/src/playbooks/roles/common/push-docker-images/tasks/main.yml +++ b/playbookconfig/src/playbooks/roles/common/push-docker-images/tasks/main.yml @@ -9,6 +9,11 @@ # from public/private registries and push to local registry. # +- name: Set default values for docker_http_proxy and docker_https_proxy if they are undefined + set_fact: + docker_http_proxy: "{{ docker_http_proxy|default('') }}" + docker_https_proxy: "{{ docker_https_proxy|default('') }}" + - name: Get docker registries if not in bootstap or restore mode include: get_docker_registry.yml registry={{ item }} with_items: @@ -93,6 +98,10 @@ registry: "{{ item['url'] }}" username: "{{ item['username'] }}" password: "{{ item['password'] }}" + register: login_result + retries: 10 + delay: 5 + until: login_result is succeeded with_items: - "{{ k8s_registry }}" - "{{ gcr_registry }}" @@ -123,6 +132,9 @@ - name: Download images and push to local registry script: download_images.py {{ download_images }} register: download_images_output + retries: 10 + delay: 5 + until: download_images_output.rc == 0 environment: REGISTRIES: "{{ registries | to_json }}" diff --git a/playbookconfig/src/playbooks/roles/bootstrap/bringup-bootstrap-applications/tasks/setup_sc_adminep_certs.yml b/playbookconfig/src/playbooks/roles/common/setup-subcloud-adminep-certs/tasks/main.yml similarity index 76% rename from playbookconfig/src/playbooks/roles/bootstrap/bringup-bootstrap-applications/tasks/setup_sc_adminep_certs.yml rename to playbookconfig/src/playbooks/roles/common/setup-subcloud-adminep-certs/tasks/main.yml index b41e3a03e..6685ce4cf 100644 --- a/playbookconfig/src/playbooks/roles/bootstrap/bringup-bootstrap-applications/tasks/setup_sc_adminep_certs.yml +++ b/playbookconfig/src/playbooks/roles/common/setup-subcloud-adminep-certs/tasks/main.yml @@ -1,6 +1,6 @@ --- # -# copyright (c) 2020 Wind River Systems, Inc. +# copyright (c) 2020-21 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -85,31 +85,6 @@ kubectl --kubeconfig=/etc/kubernetes/admin.conf -n "{{ sc_adminep_ca_cert_ns }}" wait --for=condition=ready certificate "{{ sc_adminep_cert_secret }}" --timeout=30s -- name: Extract subcloud admin endpoint certificate - shell: >- - kubectl --kubeconfig=/etc/kubernetes/admin.conf get secret "{{ sc_adminep_cert_secret }}" - -n "{{ sc_adminep_ca_cert_ns }}" -o=jsonpath='{.data.tls\.crt}' - | base64 --decode > "{{ sc_adminep_temp_dir }}/{{ sc_adminep_cert }}" - -- name: Extract subcloud admin endpoint key - shell: >- - kubectl --kubeconfig=/etc/kubernetes/admin.conf get secret "{{ sc_adminep_cert_secret }}" - -n "{{ sc_adminep_ca_cert_ns }}" -o=jsonpath='{.data.tls\.key}' - | base64 --decode > "{{ sc_adminep_temp_dir }}/{{ sc_adminep_key }}" - -- name: Create haproxy tls certifcate - shell: >- - cat "{{ sc_adminep_temp_dir }}/{{ sc_adminep_cert }}" - "{{ sc_adminep_temp_dir }}/{{ sc_adminep_key }}" - > "{{ sc_adminep_temp_dir }}/{{ haproxy_adminep_cert }}" - -- name: Install haproxy tls certficate - copy: - src: "{{ sc_adminep_temp_dir }}/{{ haproxy_adminep_cert }}" - dest: /etc/ssl/private - remote_src: yes - mode: 0400 - - name: Copy admin endpoint certficates to the shared filesystem directory copy: src: "{{ item }}" @@ -117,7 +92,6 @@ remote_src: yes mode: 0400 with_items: - - "/etc/ssl/private/{{ haproxy_adminep_cert }}" - "/etc/pki/ca-trust/source/anchors/{{ dc_adminep_root_ca_cert }}" - name: Cleanup temporary working directory diff --git a/playbookconfig/src/playbooks/roles/bootstrap/bringup-bootstrap-applications/templates/setup-sc-adminep-certs.yaml.j2 b/playbookconfig/src/playbooks/roles/common/setup-subcloud-adminep-certs/templates/setup-sc-adminep-certs.yaml.j2 similarity index 100% rename from playbookconfig/src/playbooks/roles/bootstrap/bringup-bootstrap-applications/templates/setup-sc-adminep-certs.yaml.j2 rename to playbookconfig/src/playbooks/roles/common/setup-subcloud-adminep-certs/templates/setup-sc-adminep-certs.yaml.j2 diff --git a/playbookconfig/src/playbooks/roles/bootstrap/bringup-bootstrap-applications/vars/main.yml b/playbookconfig/src/playbooks/roles/common/setup-subcloud-adminep-certs/vars/main.yml similarity index 91% rename from playbookconfig/src/playbooks/roles/bootstrap/bringup-bootstrap-applications/vars/main.yml rename to playbookconfig/src/playbooks/roles/common/setup-subcloud-adminep-certs/vars/main.yml index d9ea4739c..4acc884ef 100644 --- a/playbookconfig/src/playbooks/roles/bootstrap/bringup-bootstrap-applications/vars/main.yml +++ b/playbookconfig/src/playbooks/roles/common/setup-subcloud-adminep-certs/vars/main.yml @@ -10,4 +10,3 @@ sc_adminep_ca_cert_ns: sc-cert sc_adminep_cert_duration: 4320h sc_adminep_cert_renew_before: 720h setup_sc_adminep_certs_yaml: setup-sc-adminep-certs.yaml -haproxy_adminep_cert: admin-ep-cert.pem diff --git a/playbookconfig/src/playbooks/roles/common/validate-target/tasks/main.yml b/playbookconfig/src/playbooks/roles/common/validate-target/tasks/main.yml index 8cef624ad..08f56be65 100644 --- a/playbookconfig/src/playbooks/roles/common/validate-target/tasks/main.yml +++ b/playbookconfig/src/playbooks/roles/common/validate-target/tasks/main.yml @@ -1,12 +1,23 @@ --- # -# Copyright (c) 2020 Wind River Systems, Inc. +# Copyright (c) 2020-2021 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # # ROLE DESCRIPTION: -# This role performs checks to ensure the target is combatiable for these playbooks +# This role performs checks to ensure the target is compatible with the playbook +# and is ready for the next step. # +- set_fact: + check_system: "{{ check_system | default(false) }}" + check_load: "{{ check_load | default(false) }}" + check_bootstrap_address: "{{ check_bootstrap_address | default(false) }}" + check_patches: "{{ check_patches | default(false) }}" + +- name: Gather facts if check_bootstrap_address is turned on + setup: + when: check_bootstrap_address and bootstrap_address is defined + - name: Retrieve software version number # lookup module does not work with /etc/build.info as it does not have ini # format. Resort to shell source. @@ -27,12 +38,83 @@ msg: "system_type is missing in /etc/platform/platform.conf" when: system_type_result.stdout_lines|length == 0 -- name: Set software version, system type config path facts +- name: Set host software version, system type set_fact: - software_version: "{{ sw_version_result.stdout_lines[0] }}" - system_type: "{{ system_type_result.stdout_lines[0] }}" + host_software_version: "{{ sw_version_result.stdout }}" + host_system_type: "{{ system_type_result.stdout }}" - name: Fail if host software version is not supported by this playbook fail: - msg: "This playbook is not compatible with StarlingX software version {{ software_version }}." - when: software_version not in supported_release_versions + msg: "This playbook is not compatible with StarlingX software version {{ host_software_version }}." + when: host_software_version not in supported_release_versions + +# check_system validation, e.g. right after remote fresh install +- name: Check if the system is ready + command: "systemctl is-active multi-user.target" + register: check_active + retries: 30 + delay: 40 + until: check_active.rc == 0 + no_log: true + when: check_system + +# check_load validation, e.g. prior to subcloud upgrade/restore of non-Redfish subcloud +- block: + - name: Fail if the host was not installed with the right software version + fail: + msg: > + This host is running {{ host_software_version }}. The expected software + version is {{ software_version }}. + when: host_software_version != software_version + when: check_load and software_version is defined + +# check_bootstrap_address - e.g. prior to subcloud restore of non-Redfish subcloud +- block: + - set_fact: + ipv4_addressing: "{{ bootstrap_address | ipv4 }}" + ipv6_addressing: "{{ bootstrap_address | ipv6 }}" + + - name: Fail if the boot address does not exist in this host (IPv4) + fail: + msg: >- + The specified bootstrap address {{ bootstrap_address }} does + not exist on this host. All IPv4 addresses existing on this + host are {{ ansible_all_ipv4_addresses }}. + when: (ipv4_addressing) and + (bootstrap_address not in ansible_all_ipv4_addresses) + + - name: Fail if the boot address does not exist in this host (IPv6) + fail: + msg: >- + The specified bootstrap address {{ bootstrap_address }} does + not exist on this host. All IPv6 addresses existing on this + host are {{ ansible_all_ipv6_addresses }}. + when: (ipv6_addressing) and + (bootstrap_address not in ansible_all_ipv6_addresses) + + when: check_bootstrap_address + +# check_patches validation - e.g. prior to subcloud restore of non-Redfish subcloud +- block: + - name: Query list of applied patches on master + shell: sw-patch query|awk '($4 == "Applied" || $4 == "Partial-Applied") {print $1}' + register: master_patch_list + delegate_to: localhost + + - name: Query list of applied patches on the target + shell: sw-patch query|awk '($4 == "Applied" || $4 == "Partial-Applied") {print $1}' + register: target_patch_list + + - name: Fail if the applied patch list on target differs from that on the master + fail: + msg: >- + The applied patches ({{ target_patch_list.stdout_lines }}) are not the same on the + master ({{ master_patch_list.stdout_lines }}) + when: target_patch_list.stdout_lines != master_patch_list.stdout_lines + when: check_patches + +# Now that the host has been validated, set the official software version, system type facts +- name: Set software version, system type config path facts + set_fact: + software_version: "{{ host_software_version }}" + system_type: "{{ host_system_type }}" diff --git a/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-controller/cleanup/tasks/main.yml b/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-controller/cleanup/tasks/main.yml new file mode 100644 index 000000000..a1e708c82 --- /dev/null +++ b/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-controller/cleanup/tasks/main.yml @@ -0,0 +1,15 @@ +--- +# +# Copyright (c) 2021 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +- name: Cleanup temporary files + file: + path: /tmp/edgeworker + state: absent + +- name: Remove the provision in progress flag + file: + path: "{{ edgeworker_provision_flag_file }}" + state: absent diff --git a/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-controller/kubernetes/tasks/main.yml b/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-controller/kubernetes/tasks/main.yml new file mode 100644 index 000000000..87e0e5c74 --- /dev/null +++ b/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-controller/kubernetes/tasks/main.yml @@ -0,0 +1,66 @@ +--- +# +# Copyright (c) 2021 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +- name: Prepare containerd config + command: "cp /etc/containerd/config.toml /tmp/edgeworker/containerd-config.toml" + +- name: Change containerd config owner + file: + path: /tmp/edgeworker/containerd-config.toml + owner: "{{ ansible_user }}" + +- name: Prepare registry cert + command: "cp /etc/ssl/private/registry-cert.crt /tmp/edgeworker/registry-cert.crt" + +- name: Change registry cert owner + file: + path: /tmp/edgeworker/registry-cert.crt + owner: "{{ ansible_user }}" + +- name: Prepare docker config and registry cert + command: "cp -r /etc/docker /tmp/edgeworker/docker" + +- name: Change docker config owner + file: + path: /tmp/edgeworker/docker + state: directory + recurse: true + owner: "{{ ansible_user }}" + +- name: Prepare cgroup setup bash + command: "cp /usr/bin/kubelet-cgroup-setup.sh /tmp/edgeworker/kubelet-cgroup-setup.sh" + +- name: Change cgroup setup bash owner + file: + path: /tmp/edgeworker/kubelet-cgroup-setup.sh + owner: "{{ ansible_user }}" + +- name: Check kubeadm master + shell: "awk -F '//' '/server/{print$2}' /etc/kubernetes/admin.conf" + register: kubeadm_master_out + +- name: Set kubeadm masterip + set_fact: + kubeadm_master: "{{ kubeadm_master_out.stdout }}" + +- name: Check kubeadm token + shell: "kubeadm token create" + register: kubeadm_token_create_out + +- name: Set kubeadm token + set_fact: + kubeadm_token: "{{ kubeadm_token_create_out.stdout }}" + +- name: Get kubeadm ca cert hash + shell: >- + openssl x509 -pubkey -in /etc/kubernetes/pki/ca.crt | + openssl rsa -pubin -outform der 2>/dev/null | + openssl dgst -sha256 -hex | sed 's/^.* /sha256:/' + register: ca_cert_hash_out + +- name: Set kubeadm_ca_cert_hash fact + set_fact: + kubeadm_ca_cert_hash: "{{ ca_cert_hash_out.stdout }}" diff --git a/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-controller/platform/tasks/main.yml b/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-controller/platform/tasks/main.yml new file mode 100644 index 000000000..59969c4c0 --- /dev/null +++ b/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-controller/platform/tasks/main.yml @@ -0,0 +1,99 @@ +--- +# +# Copyright (c) 2021 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +- name: Define a flag file to inidicate edgeworker is being provisioned + set_fact: + edgeworker_provision_flag_file: /var/run/edgeworker_provisioning_in_progress + +- name: Check if edgeworker playbook is running by other users + stat: + path: "{{ edgeworker_provision_flag_file }}" + register: edgeworker_provision_flag + +- name: Fail if another user is provisioning edgeworker nodes + fail: + msg: > + An edgeworker provisioning session is already in progress, if this is not the case, + please remove "{{ edgeworker_provision_flag_file }}" and try again. + when: edgeworker_provision_flag.stat.exists + +- name: Create a flag to indicate the provision is in progress + file: + path: "{{ edgeworker_provision_flag_file }}" + state: touch + +- name: Cleanup temporary files if previous run failed + file: + path: /tmp/edgeworker + state: absent + +- name: Get host list with hostname/ip/uuid/personality + shell: | + source /etc/platform/openrc + system host-list --column hostname --column mgmt_ip --column uuid --column personality --format yaml + register: host_list_result + +- name: Set a list for node hostname and ip address + set_fact: + hostname_ip_list: "{{ hostname_ip_list | default({}) | combine({item.hostname: item.mgmt_ip}) }}" + with_items: "{{ host_list_result.stdout | from_yaml }}" + +- name: Set ansible_host of edgeworker + add_host: + hostname: '{{ item }}' + ansible_host: '{{ item }}' + ansible_ssh_host: '{{ hostname_ip_list[item] }}' + with_inventory_hostnames: + - edgeworker + +- name: Write edgeworker to /etc/hosts + lineinfile: + dest: /etc/hosts + state: present + regexp: "^{{ hostname_ip_list[item] }}" + line: "{{ hostname_ip_list[item] }} {{ item }}" + with_inventory_hostnames: + - edgeworker + +- name: Check the edgeworker connectivity + wait_for: + host: "{{ hostvars[item]['ansible_ssh_host'] }}" + port: 22 + state: started + delay: 0 + delay: 30 + retries: 5 + failed_when: false + with_inventory_hostnames: + - edgeworker + become: no + +- name: Create edgeworker temp directory for configuration files + file: + path: /tmp/edgeworker + state: directory + +- name: Create the host overrides file + command: "/usr/bin/sysinv-utils create-host-overrides /tmp/edgeworker/host-overrides.yml" + +- name: Read the overrides variables + include_vars: + file: "/tmp/edgeworker/host-overrides.yml" + +- name: Set default docker proxy values if not configured + set_fact: + http_proxy: "{{ docker_http_proxy|default('') }}" + https_proxy: "{{ docker_https_proxy|default('') }}" + no_proxy: "{{ docker_no_proxy|default([]) }}" + +- name: Get docker registry config + shell: | + source /etc/platform/openrc + system service-parameter-list | awk '/docker-registry/&&/url/{print$10}' + register: docker_registry_result + +- set_fact: + docker_registry: "{{ docker_registry_result.stdout }}" diff --git a/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/kubernetes/defaults/main.yml b/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/kubernetes/defaults/main.yml new file mode 100644 index 000000000..672fa4b05 --- /dev/null +++ b/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/kubernetes/defaults/main.yml @@ -0,0 +1,8 @@ +--- +# +# Copyright (c) 2021 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +containerd_cfg_dir: /etc/containerd +containerd_bin_dir: /usr/bin diff --git a/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/kubernetes/handlers/main.yml b/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/kubernetes/handlers/main.yml new file mode 100644 index 000000000..3b76bbebb --- /dev/null +++ b/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/kubernetes/handlers/main.yml @@ -0,0 +1,61 @@ +--- +# +# Copyright (c) 2021 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +# The dockerd and containerd will be restarted after the config files +# from the controllers are copied to the edgeworker nodes. Then the +# configuration of the daemon will be updated. +# +- name: Restart containerd + command: /bin/true + notify: + - Containerd | restart containerd + - Containerd | wait for containerd + +- name: Containerd | restart containerd + systemd: + name: containerd + state: restarted + enabled: yes + daemon-reload: yes + +- name: Containerd | wait for containerd + command: "{{ containerd_bin_dir }}/ctr images ls -q" + register: containerd_ready + retries: 8 + delay: 4 + until: containerd_ready.rc == 0 + +- name: Restart dockerd + command: /bin/true + notify: + - Docker | reload systemd + - Docker | reload docker + - Docker | wait for docker + +- name: Docker | reload systemd + systemd: + daemon_reload: true + +- name: Docker | reload docker + service: + name: docker + state: restarted + +- name: Docker | wait for docker + command: "{{ docker_bin_dir }}/docker images" + register: docker_ready + retries: 20 + delay: 1 + until: docker_ready.rc == 0 + +# This will be called after kubeadm join to update specific node-ip +# and cri socket config. +- name: Restart kubelet + systemd: + name: kubelet + state: restarted + enabled: yes + daemon-reload: yes diff --git a/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/kubernetes/tasks/configure-networking.yml b/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/kubernetes/tasks/configure-networking.yml new file mode 100644 index 000000000..ca7be9360 --- /dev/null +++ b/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/kubernetes/tasks/configure-networking.yml @@ -0,0 +1,23 @@ +--- +# +# Copyright (c) 2021 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +- name: Stat sysctl file configuration + stat: + path: /etc/sysctl.d/99-sysctl.conf + +- name: Enable net.ipv4.ip_forward in sysctl + sysctl: + name: net.ipv4.ip_forward + value: 1 + sysctl_file: /etc/sysctl.d/99-sysctl.conf + state: present + +- name: Add net.ipv4.conf.tunl0.rp_filter in sysctl for calico + lineinfile: + path: /etc/sysctl.conf + line: net.ipv4.conf.tunl0.rp_filter=0 + state: present + failed_when: false diff --git a/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/kubernetes/tasks/configure-registry-cgroup.yml b/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/kubernetes/tasks/configure-registry-cgroup.yml new file mode 100644 index 000000000..b5790de4a --- /dev/null +++ b/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/kubernetes/tasks/configure-registry-cgroup.yml @@ -0,0 +1,69 @@ +--- +# +# Copyright (c) 2021 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +- name: Ensure containerd config directory exists + file: + dest: "{{ containerd_cfg_dir }}" + state: directory + mode: 0755 + owner: root + group: root + +- name: Copy containerd config file to {{ inventory_hostname }} + copy: + src: /tmp/edgeworker/containerd-config.toml + dest: "{{ containerd_cfg_dir }}/config.toml" + force: true + owner: root + group: root + mode: 0600 + +- name: Ensure registry cert directory exists + file: + dest: /etc/ssl/private + state: directory + mode: 0755 + owner: root + group: root + +- name: Copy controller docker certificates to /etc/docker on {{ inventory_hostname }} + copy: + src: /tmp/edgeworker/docker/certs.d + dest: /etc/docker/ + force: true + owner: root + group: root + +- name: Copy controller registry cert file to /etc/ssl/private/ on {{ inventory_hostname }} + copy: + src: /tmp/edgeworker/registry-cert.crt + dest: /etc/ssl/private/registry-cert.crt + force: true + owner: root + group: root + mode: 0400 + notify: + - Restart containerd + +- name: Copy controller cgroup setup script to {{ inventory_hostname }} + copy: + src: /tmp/edgeworker/kubelet-cgroup-setup.sh + dest: /usr/bin/kubelet-cgroup-setup.sh + force: true + owner: root + group: root + mode: 0700 + +- name: Add k8s-infra cgroup + command: /usr/bin/kubelet-cgroup-setup.sh + +- name: Add k8s-infra cgroup creation in kubelet service + lineinfile: + path: "/etc/systemd/system/kubelet.service.d/10-kubeadm.conf" + state: present + regexp: '^ExecStartPre=' + line: 'ExecStartPre=/usr/bin/kubelet-cgroup-setup.sh' + when: ansible_facts['distribution'] == "Ubuntu" diff --git a/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/kubernetes/tasks/configure-swap.yml b/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/kubernetes/tasks/configure-swap.yml new file mode 100644 index 000000000..2a74b211c --- /dev/null +++ b/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/kubernetes/tasks/configure-swap.yml @@ -0,0 +1,24 @@ +--- +# +# Copyright (c) 2021 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +# Disable swap since kubernetes does not support swap on kubelet nodes +- name: Remove swapfile from /etc/fstab + mount: + name: "{{ item }}" + fstype: swap + state: absent + with_items: + - swap + - none + +- name: Check swap + command: /sbin/swapon -s + register: swapon + changed_when: no + +- name: Disable swap + command: /sbin/swapoff -a + when: swapon.stdout diff --git a/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/kubernetes/tasks/install-ubuntu-packages.yml b/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/kubernetes/tasks/install-ubuntu-packages.yml new file mode 100644 index 000000000..0ec0c8c11 --- /dev/null +++ b/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/kubernetes/tasks/install-ubuntu-packages.yml @@ -0,0 +1,65 @@ +--- +# +# Copyright (c) 2021 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +- name: Gather variables + include_vars: "vars/ubuntu.yml" + +- name: Install prerequisite packages for adding repos and installing kubelet + apt: + pkg: + - apt-transport-https + - ca-certificates + - curl + - gnupg-agent + - software-properties-common + +- name: Check if containerd executable exists + stat: + path: /usr/local/bin/containerd + register: containerd_installed + +- name: Get containerd version if exists + command: "/usr/local/bin/containerd --version" + register: containerd_installed_ver + when: containerd_installed.stat.exists + +- name: Install containerd + unarchive: + src: "https://storage.googleapis.com/cri-containerd-release/cri-containerd-{{ containerd_version }}.linux-amd64.tar.gz" + dest: / + remote_src: yes + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ https_proxy }}" + no_proxy: "{{ no_proxy }}" + when: (not containerd_installed.stat.exists or containerd_installed_ver.stdout != containerd_version) + +- name: Add kubernetes repo key + apt_key: + url: https://packages.cloud.google.com/apt/doc/apt-key.gpg + state: present + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ https_proxy }}" + no_proxy: "{{ no_proxy }}" + +- name: Add kubernetes repo to apt repo + apt_repository: + repo: deb https://apt.kubernetes.io/ kubernetes-xenial main + state: present + +- name: Ensure k8s packages are installed + action: "{{ k8s_package_info.pkg_mgr }}" + args: + pkg: "{{ item.name }}" + state: present + force: true + update_cache: true + register: install_task_result + until: install_task_result is succeeded + retries: 5 + delay: "{{ retry_stagger | d(5) }}" + with_items: "{{ k8s_package_info.pkgs }}" diff --git a/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/kubernetes/tasks/join-k8s-cluster.yml b/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/kubernetes/tasks/join-k8s-cluster.yml new file mode 100644 index 000000000..b27e46e7d --- /dev/null +++ b/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/kubernetes/tasks/join-k8s-cluster.yml @@ -0,0 +1,68 @@ +--- +# +# Copyright (c) 2021 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +# These tasks enable edgeworkers joining to STX Kubernetes cluster +# +# 1. Prepare kubelet and containerd configuration +# 2. Join to STX Kubernetes +# 3. Pull Kubernetes images to the edgeworker nodes +# +- name: Add node ip and containerd runtime to kubelet config + lineinfile: + path: "{{ kubelet_extra_config_file }}" + state: present + regexp: '^KUBELET_EXTRA_ARGS=' + line: >- + "KUBELET_EXTRA_ARGS=--node-ip={{ ansible_ssh_host }} --container-runtime=remote + --container-runtime-endpoint=unix:///run/containerd/containerd.sock" + create: yes + notify: Restart kubelet + +- name: Check if kubelet config exists + stat: + path: /etc/kubernetes/kubelet.conf + register: kubelet_conf_st + +- name: Join the {{ inventory_hostname }} with k8s cluster + shell: >- + kubeadm join --token {{ hostvars['localhost']['kubeadm_token'] }} + --discovery-token-ca-cert-hash {{ hostvars['localhost']['kubeadm_ca_cert_hash'] }} + --skip-phases preflight {{ hostvars['localhost']['kubeadm_master'] }} + environment: + no_proxy: "{{ no_proxy }}" + when: not kubelet_conf_st.stat.exists + +- name: Modify the default cni bin directory if the Linux distribution is Ubuntu + lineinfile: + path: "{{ kubelet_extra_config_file }}" + regexp: "^(KUBELET_EXTRA_ARGS=.*)$" + line: '\1 --cni-bin-dir /usr/libexec/cni' + backrefs: yes + when: ansible_facts['distribution'] == "Ubuntu" + +- name: Get the controller registry credentials + vars: + script_content: | + import keyring + password = str(keyring.get_password("CGCS", "admin")) + if not password: + raise Exception("Local registry password not found.") + print dict(username='admin', password=password) + ansible_connection: local + shell: "{{ script_content }}" + args: + executable: /usr/bin/python + register: local_registry_credentials_output + +- set_fact: + local_registry: "registry.local:9001" + local_registry_credentials: "{{ local_registry_credentials_output.stdout }}" + +- name: Pull k8s gcr images from controller registry to {{ inventory_hostname }} + shell: >- + kubeadm config images list --kubernetes-version {{ k8s_version }} --image-repository {{ local_registry }}/k8s.gcr.io | + xargs -i sudo crictl pull --creds + {{ local_registry_credentials['username'] }}:{{ local_registry_credentials['password'] }} {} diff --git a/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/kubernetes/tasks/main.yml b/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/kubernetes/tasks/main.yml new file mode 100644 index 000000000..40213b47d --- /dev/null +++ b/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/kubernetes/tasks/main.yml @@ -0,0 +1,31 @@ +--- +# +# Copyright (c) 2021 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +- name: Get Kubernetes version on the controller + shell: | + kubectl version --client=true --short=true | awk '{print $3}' + register: k8s_ver_result + delegate_to: localhost + +- set_fact: + kubernetes_version: "{{ k8s_ver_result.stdout }}" + +- include_vars: + dir: "{{ playbook_dir }}/roles/common/load-images-information/vars/k8s-{{ kubernetes_version }}" + files_matching: "system-images.yml" + +- import_tasks: configure-swap.yml + +- import_tasks: configure-networking.yml + +- include_tasks: install-ubuntu-packages.yml + when: ansible_facts['distribution'] == "Ubuntu" + +- import_tasks: configure-registry-cgroup.yml + +- meta: flush_handlers + +- import_tasks: join-k8s-cluster.yml diff --git a/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/kubernetes/templates/http-proxy.j2 b/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/kubernetes/templates/http-proxy.j2 new file mode 100644 index 000000000..aca609701 --- /dev/null +++ b/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/kubernetes/templates/http-proxy.j2 @@ -0,0 +1,4 @@ +[Service] +Environment="HTTP_PROXY={{ http_proxy }}" +Environment="HTTPS_PROXY={{ https_proxy }}" +Environment="NO_PROXY={{ no_proxy }}" diff --git a/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/kubernetes/templates/kubernetes.repo.j2 b/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/kubernetes/templates/kubernetes.repo.j2 new file mode 100644 index 000000000..65eda50b5 --- /dev/null +++ b/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/kubernetes/templates/kubernetes.repo.j2 @@ -0,0 +1,7 @@ +[kubernetes] +name=Kubernetes +baseurl=https://packages.cloud.google.com/yum/repos/kubernetes-el7-x86_64 +enabled=1 +gpgcheck=1 +repo_gpgcheck=1 +gpgkey=https://packages.cloud.google.com/yum/doc/yum-key.gpg https://packages.cloud.google.com/yum/doc/rpm-package-key.gpg diff --git a/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/kubernetes/vars/ubuntu.yml b/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/kubernetes/vars/ubuntu.yml new file mode 100644 index 000000000..25855ce58 --- /dev/null +++ b/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/kubernetes/vars/ubuntu.yml @@ -0,0 +1,34 @@ +--- +# +# Copyright (c) 2021 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +containerd_bin_dir: /usr/local/bin +containerd_version: '1.3.0' +containerd_versioned_pkg: + '1.3.3': containerd-1.3.3-0ubuntu1 + +docker_bin_dir: /usr/bin +docker_version: '18.09.6' +docker_versioned_pkg: + '18.09.6': 5:18.09.6~3-0~ubuntu-bionic + +k8s_version: '1.18.1' + +kubeadm_versioned_pkg: + '1.16.2': kubeadm=1.16.2-00 + '1.18.1': kubeadm=1.18.1-00 + +kubelet_versioned_pkg: + '1.16.2': kubelet=1.16.2-00 + '1.18.1': kubelet=1.18.1-00 + +k8s_package_info: + pkg_mgr: apt + pkgs: + - name: "{{ kubelet_versioned_pkg[k8s_version | string] }}" + - name: "{{ kubeadm_versioned_pkg[k8s_version | string] }}" + +kubelet_extra_config_file: /etc/default/kubelet diff --git a/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/os/tasks/main.yml b/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/os/tasks/main.yml new file mode 100644 index 000000000..eadf4d6a5 --- /dev/null +++ b/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/os/tasks/main.yml @@ -0,0 +1,26 @@ +--- +# +# Copyright (c) 2021 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +# Prepare edgeworker OS configuration +# +# Some limitations for edgeworker nodes: +# - The edgeworker nodes do not support IPv6 +# - The edgeworker nodes only support Ubuntu as OS +# +- name: Fail if the network address type of the edgeworker node is IPv6 + fail: + msg: "IPv6 network addressing is currently not supported for edgeworker nodes." + when: ansible_default_ipv6.keys() | length != 0 + +- name: Fail if the edgeworker node OS is not Ubuntu + fail: + msg: "The edgeworker node OS must be Ubuntu." + when: ansible_distribution != "Ubuntu" + +- import_tasks: prepare-host.yml + +- include_tasks: prepare-ubuntu.yml + when: ansible_distribution == "Ubuntu" diff --git a/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/os/tasks/prepare-host.yml b/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/os/tasks/prepare-host.yml new file mode 100644 index 000000000..00aea6c77 --- /dev/null +++ b/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/os/tasks/prepare-host.yml @@ -0,0 +1,37 @@ +--- +# +# Copyright (c) 2021 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +- name: Set http_proxy value if configured + set_fact: + http_proxy: "{{ hostvars['localhost']['http_proxy'] }}" + when: hostvars['localhost']['http_proxy'] is defined + +- name: Set https_proxy value if configured + set_fact: + https_proxy: "{{ hostvars['localhost']['https_proxy'] }}" + when: hostvars['localhost']['https_proxy'] is defined + +- name: Set no_proxy value if configured + set_fact: + no_proxy: "{{ hostvars['localhost']['no_proxy'] }}" + when: hostvars['localhost']['no_proxy'] is defined + +- name: Set docker_registry value if configured + set_fact: + docker_registry: "{{ hostvars['localhost']['docker_registry'] }}" + when: hostvars['localhost']['docker_registry'] is defined + +- name: Ensure ssh directory exists + file: + dest: "/home/{{ ansible_user }}/.ssh" + state: directory + mode: 0700 + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + +- name: Set hostname + hostname: + name: "{{ ansible_host }}" diff --git a/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/os/tasks/prepare-ubuntu.yml b/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/os/tasks/prepare-ubuntu.yml new file mode 100644 index 000000000..e1cdc0c1d --- /dev/null +++ b/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/os/tasks/prepare-ubuntu.yml @@ -0,0 +1,137 @@ +--- +# +# Copyright (c) 2021 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +- name: Set management ip address + set_fact: + mgmt_ip_addr: "{{ hostvars['localhost']['hostname_ip_list'][ansible_host] }}" + +# Find the management interface name by ip +# Search for specific ip in ipv4.address and ipv4_secondaries list +# Sample output: mgmt_if_name: eno1 +- name: Get management network interface name + set_fact: + mgmt_if_name: "{{ item }}" + when: > + (hostvars[ansible_host]['ansible_%s' % item]|default({})) + .get('ipv4', {}).get('address') == mgmt_ip_addr + or + mgmt_ip_addr in ((hostvars[ansible_host]['ansible_%s' % item]|default({})) + .get('ipv4_secondaries'))|map(attribute='address')|list + with_items: + - "{{ ansible_interfaces }}" + +- name: Set systemd-networkd interface config + template: + src: 1-interface.network.j2 + dest: /etc/systemd/network/1-interface.network + mode: '0644' + +# It's a workaround for stx-openstack neutron that +# it mandatorily requires docker0 bridge on the host +- name: Set systemd-networkd bridge config + template: + src: 5-bridge.netdev.j2 + dest: /etc/systemd/network/5-bridge.netdev + mode: '0644' + +- name: Set systemd-networkd bridge address + template: + src: 5-bridge.network.j2 + dest: /etc/systemd/network/5-bridge.network + mode: '0644' + +- name: Start systemd-networkd service + systemd: + name: systemd-networkd + daemon_reload: yes + enabled: yes + state: restarted + +- name: Read in service facts + service_facts: + +- block: + - name: Disable NetworkManager + systemd: + name: NetworkManager + enabled: no + state: stopped + + - name: Disable NetworkManager-wait-online + systemd: + name: NetworkManager-wait-online + enabled: no + state: stopped + + - name: Disable NetworkManager-dispatcher + systemd: + name: NetworkManager-dispatcher + enabled: no + state: stopped + + - name: Disable network-manager + systemd: + name: network-manager + enabled: no + state: stopped + + - name: Reboot edgeworker + reboot: + when: >- + ("NetworkManager.service" in ansible_facts.services and + ansible_facts.services["NetworkManager.service"].state != "stopped") + +- name: Set dns server for systemd-resolved + lineinfile: + dest: /etc/systemd/resolved.conf + state: present + insertbefore: EOF + regexp: "DNS={{ hostvars['localhost']['management_floating_address'] }}" + line: "DNS={{ hostvars['localhost']['management_floating_address'] }}" + +- name: Restart systemd-resolved service + systemd: + name: systemd-resolved + daemon_reload: yes + enabled: yes + state: restarted + +- name: Add registry.local to hosts + lineinfile: + dest: /etc/hosts + state: present + regexp: "registry.local" + line: "{{ hostvars['localhost']['management_floating_address'] }} registry.local" + +- name: Add proxy to apt.conf.d if http_proxy is defined + lineinfile: + path: "/etc/apt/apt.conf.d/10proxy" + regexp: '^Acquire::http::Proxy' + line: 'Acquire::http::Proxy "{{ http_proxy }}";' + create: true + state: present + when: + - http_proxy is defined + +- name: Install ntp packages + apt: + pkg: + - ntp + update_cache: yes + +- name: Config ntp server + lineinfile: + dest: /etc/ntp.conf + state: present + regexp: "^server {{ hostvars['localhost']['management_floating_address'] }}" + line: "server {{ hostvars['localhost']['management_floating_address'] }}" + +- name: Start ntp service + systemd: + name: ntp + enabled: yes + daemon_reload: yes + state: restarted diff --git a/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/os/templates/1-interface.network.j2 b/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/os/templates/1-interface.network.j2 new file mode 100644 index 000000000..2e03812c5 --- /dev/null +++ b/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/os/templates/1-interface.network.j2 @@ -0,0 +1,6 @@ +[Match] +Name={{ mgmt_if_name }} + +[Network] +#Address= +DHCP=yes diff --git a/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/os/templates/5-bridge.netdev.j2 b/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/os/templates/5-bridge.netdev.j2 new file mode 100644 index 000000000..9dd2024ea --- /dev/null +++ b/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/os/templates/5-bridge.netdev.j2 @@ -0,0 +1,3 @@ +[NetDev] +Name=docker0 +Kind=bridge diff --git a/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/os/templates/5-bridge.network.j2 b/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/os/templates/5-bridge.network.j2 new file mode 100644 index 000000000..6ef82603f --- /dev/null +++ b/playbookconfig/src/playbooks/roles/provision-edgeworker/prepare-edgeworker/os/templates/5-bridge.network.j2 @@ -0,0 +1,6 @@ +[Match] +Name=docker0 + +[Network] +Address=172.17.0.1/16 +ConfigureWithoutCarrier=yes diff --git a/playbookconfig/src/playbooks/roles/recover-ceph-data/tasks/main.yml b/playbookconfig/src/playbooks/roles/recover-ceph-data/tasks/main.yml index 6dc75b9ea..3b274e524 100644 --- a/playbookconfig/src/playbooks/roles/recover-ceph-data/tasks/main.yml +++ b/playbookconfig/src/playbooks/roles/recover-ceph-data/tasks/main.yml @@ -163,7 +163,7 @@ shell: cp -ar /tmp/mon-store/store.db /var/lib/ceph/mon/ceph-{{ mon_name }} - name: Bring up ceph Monitor and OSDs - command: /etc/init.d/ceph start + command: /etc/init.d/ceph start mon osd - name: Wait for ceph monitor to be up shell: ceph -s diff --git a/playbookconfig/src/playbooks/roles/rehome-subcloud/check-services-status/tasks/main.yml b/playbookconfig/src/playbooks/roles/rehome-subcloud/check-services-status/tasks/main.yml new file mode 100644 index 000000000..852c46ea0 --- /dev/null +++ b/playbookconfig/src/playbooks/roles/rehome-subcloud/check-services-status/tasks/main.yml @@ -0,0 +1,23 @@ +--- +# +# Copyright (c) 2021 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +# ROLE DESCRIPTION: +# This role checks related services are all enabled-active before +# finishing the rehome playbook +# + +- name: Wait for 90 secs before check if services come up + wait_for: timeout=90 + +- name: Check all services are enabled-active + shell: >- + source /etc/platform/openrc; system service-list + | awk 'FNR >= 4 { print $8 }' + | grep -v "enabled-active" | wc -l + register: service_status_result + until: service_status_result.stdout == "1" + retries: 5 + delay: 30 diff --git a/playbookconfig/src/playbooks/roles/rehome-subcloud/prepare-env/tasks/main.yml b/playbookconfig/src/playbooks/roles/rehome-subcloud/prepare-env/tasks/main.yml new file mode 100644 index 000000000..ccc4d1219 --- /dev/null +++ b/playbookconfig/src/playbooks/roles/rehome-subcloud/prepare-env/tasks/main.yml @@ -0,0 +1,68 @@ +--- +# +# Copyright (c) 2021 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +# This role is to check the target host environment before proceeding to +# the next step. +# + +- name: Set config path facts + set_fact: + config_permdir: "{{ platform_path + '/config/' + software_version }}" + puppet_permdir: "{{ platform_path + '/puppet/' + software_version }}" + +- name: Set network facts + set_fact: + controller_floating_address: "{{ management_start_address }}" + external_oam_node_0_address: "{{ external_oam_node_0_address | default('derived') }}" + +- name: Set derived facts for subsequent tasks/roles + set_fact: + derived_network_params: + 'controller_0_address': "{{ controller_floating_address|ipmath(1) }}" + 'controller_1_address': "{{ controller_floating_address|ipmath(2) }}" + 'oam_start_address': "{{ external_oam_node_0_address if external_oam_node_0_address != 'derived' + else external_oam_floating_address | ipmath(1) }}" + +- name: Get existing docker no_proxy + shell: >- + source /etc/platform/openrc; system service-parameter-list | + awk '($4 == "docker" && $8 == "no_proxy") {print $2}' + register: existing_docker_no_proxy_result + +- block: + - name: Set subcloud docker no_proxy facts + set_fact: + subcloud_no_proxy: + - localhost + - 127.0.0.1 + - registry.local + - "{{ (cluster_service_subnet | ipaddr(1)).split('/')[0] }}" + - "{{ controller_floating_address }}" + - "{{ derived_network_params.controller_0_address }}" + - "{{ external_oam_floating_address }}" + - "{{ derived_network_params.oam_start_address }}" + - registry.central + - "{{ system_controller_oam_floating_address }}" + docker_no_proxy_combined: [] + + - name: Add user defined no-proxy address list to subcloud no proxy list + set_fact: + docker_no_proxy_combined: "{{ subcloud_no_proxy | union(docker_no_proxy) | ipwrap | unique }}" + + when: existing_docker_no_proxy_result.stdout | length > 0 + +- name: Get management interface of controller-0 + shell: >- + source /etc/platform/openrc; system interface-network-list controller-0 | + awk '$8 == "mgmt" { print $6 }' + register: controller_0_management_interface_result + +- name: Get management interface of controller-1 if the subcloud is not simplex + shell: >- + source /etc/platform/openrc; system interface-network-list controller-1 | + awk '$8 == "mgmt" { print $6 }' + register: controller_1_management_interface_result + when: system_mode != 'simplex' diff --git a/playbookconfig/src/playbooks/roles/rehome-subcloud/prepare-env/vars/main.yml b/playbookconfig/src/playbooks/roles/rehome-subcloud/prepare-env/vars/main.yml new file mode 100644 index 000000000..2fad38992 --- /dev/null +++ b/playbookconfig/src/playbooks/roles/rehome-subcloud/prepare-env/vars/main.yml @@ -0,0 +1,9 @@ +--- +# +# Copyright (c) 2021 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +platform_path: /opt/platform +puppet_path: /opt/platform/puppet diff --git a/playbookconfig/src/playbooks/roles/rehome-subcloud/update-keystone-data/files/migrate_keystone_ids.py b/playbookconfig/src/playbooks/roles/rehome-subcloud/update-keystone-data/files/migrate_keystone_ids.py new file mode 100644 index 000000000..1a0ed9075 --- /dev/null +++ b/playbookconfig/src/playbooks/roles/rehome-subcloud/update-keystone-data/files/migrate_keystone_ids.py @@ -0,0 +1,138 @@ +#!/usr/bin/python + +# +# Copyright (c) 2021 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +# Migrate keystone IDs during rehoming a subcloud +# + +import psycopg2 +import sys + +from psycopg2.extras import RealDictCursor + + +def get_keystone_local_user_id(user_name, cur): + """ Get a keystone local user id by name""" + + cur.execute("SELECT user_id FROM local_user WHERE name='%s'" % + user_name) + user_id = cur.fetchone() + if user_id is not None: + return user_id['user_id'] + else: + return user_id + + +def get_keystone_local_user_record(user_name, cur): + """ Get a keystone local user record by name""" + + cur.execute("SELECT public.user.* FROM public.user INNER JOIN public.local_user \ + ON public.user.id=public.local_user.user_id \ + WHERE public.local_user.name='%s'" % user_name) + user_record = cur.fetchone() + return user_record + + +def get_keystone_project_id(project_name, cur): + """ Get a keystone project id by name""" + + cur.execute("SELECT id FROM public.project WHERE name='%s'" % + project_name) + project_id = cur.fetchone() + if project_id is not None: + return project_id['id'] + else: + return project_id + + +def clean_keystone_non_local_user(user_id, cur): + """ Clean an existing keystone non local user by user id""" + + try: + cur.execute("DELETE FROM nonlocal_user WHERE user_id='%s'" % user_id) + cur.execute("DELETE FROM federated_user WHERE user_id='%s'" % user_id) + cur.execute("DELETE FROM public.user WHERE id='%s'" % user_id) + except Exception as ex: + print("Failed to clean the user id: %s" % user_id) + raise ex + + +def update_keystone_user_id(user_name, user_id): + """ Update the keystone user id""" + + conn = psycopg2.connect("dbname='keystone' user='postgres'") + with conn: + with conn.cursor(cursor_factory=RealDictCursor) as cur: + current_user_id = get_keystone_local_user_id(user_name, cur) + if current_user_id != user_id: + try: + clean_keystone_non_local_user(user_id, cur) + local_user_record = get_keystone_local_user_record(user_name, cur) + cur.execute("INSERT INTO public.user (id, extra, enabled, created_at, domain_id) \ + VALUES ('%s', '%s', '%s', '%s', '%s')" % + (user_id, local_user_record['extra'], local_user_record['enabled'], + local_user_record['created_at'], local_user_record['domain_id'])) + cur.execute("UPDATE public.user_option SET user_id='%s' WHERE user_id='%s'" + % (user_id, local_user_record['id'])) + cur.execute("UPDATE public.assignment SET actor_id='%s' from public.local_user \ + WHERE public.assignment.actor_id=public.local_user.user_id AND \ + public.local_user.name='%s'" % (user_id, user_name)) + cur.execute("UPDATE public.local_user SET user_id='%s' \ + WHERE public.local_user.name='%s'" % (user_id, user_name)) + cur.execute("DELETE FROM public.user WHERE id='%s'" % local_user_record['id']) + except Exception as ex: + print("Failed to update keystone id for user: %s" % user_name) + raise ex + + +def update_barbican_project_external_id(old_id, new_id): + """ update the project external id in barbican db """ + + conn = psycopg2.connect("dbname='barbican' user='postgres'") + with conn: + with conn.cursor() as cur: + try: + cur.execute("UPDATE public.projects SET external_id='%s' WHERE \ + external_id='%s'" % (new_id, old_id)) + except Exception as ex: + raise ex + + +def update_keystone_project_id(project_name, project_id): + """ Update a keystone project id by name""" + + conn = psycopg2.connect("dbname='keystone' user='postgres'") + with conn: + with conn.cursor(cursor_factory=RealDictCursor) as cur: + current_project_id = get_keystone_project_id(project_name, cur) + if current_project_id != project_id: + try: + cur.execute("UPDATE public.assignment SET target_id='%s' FROM public.project \ + WHERE public.assignment.target_id=public.project.id AND \ + public.project.name='%s'" % (project_id, project_name)) + cur.execute("UPDATE public.project SET id='%s' WHERE \ + name='%s'" % (project_id, project_name)) + except Exception as ex: + print("Failed to update keystone id for project: %s" % project_name) + raise ex + + try: + update_barbican_project_external_id(current_project_id, project_id) + except Exception as ex: + print("Failed to update external_id in barbican db for project: %s" % project_name) + raise ex + + +if __name__ == "__main__": + + keystone_name = sys.argv[1] + keystone_id = sys.argv[2] + keystone_type = sys.argv[3] + + if keystone_type == 'user': + update_keystone_user_id(keystone_name, keystone_id) + elif keystone_type == 'project': + update_keystone_project_id(keystone_name, keystone_id) diff --git a/playbookconfig/src/playbooks/roles/rehome-subcloud/update-keystone-data/files/validate_keystone_passwords.sh b/playbookconfig/src/playbooks/roles/rehome-subcloud/update-keystone-data/files/validate_keystone_passwords.sh new file mode 100644 index 000000000..36c4301ff --- /dev/null +++ b/playbookconfig/src/playbooks/roles/rehome-subcloud/update-keystone-data/files/validate_keystone_passwords.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# +# Copyright (c) 2021 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +# As the 'openstack user set' command may fail to +# update the passwords, this script validates the password +# from db, to ensure the password is updated in database. +# + +USER_NAME=$1 +START_TIME=$2 + +# Search the password creation timestamp in microsecond +create_time_in_db=$(sudo -u postgres psql -c "select password.created_at_int \ + from local_user inner join password \ + on local_user.id=password.local_user_id \ + where local_user.name='"${USER_NAME}"' \ + and password.expires_at is null" keystone \ + |sed -n 3p) + +if [[ $((create_time_in_db/1000000)) -lt $START_TIME ]]; then + echo "Failed to update keystone password." + exit 1 +fi + +echo "Updated keystone password." +exit 0 diff --git a/playbookconfig/src/playbooks/roles/rehome-subcloud/update-keystone-data/tasks/main.yml b/playbookconfig/src/playbooks/roles/rehome-subcloud/update-keystone-data/tasks/main.yml new file mode 100644 index 000000000..d923672d8 --- /dev/null +++ b/playbookconfig/src/playbooks/roles/rehome-subcloud/update-keystone-data/tasks/main.yml @@ -0,0 +1,29 @@ +--- +# +# Copyright (c) 2021 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +# ROLE DESCRIPTION: +# This role updates the openstack keystone data(Ids, passwords) +# in database, reloads the configurations of related services and +# restarts these services +# + +- name: Temporarily disable lockout in keystone + lineinfile: + path: "/etc/keystone/keystone.conf" + regexp: "^{{ item }}" + state: absent + with_items: + - "lockout_failure_attempts" + - "lockout_duration" + +- name: Restart keystone service + command: "sm-restart service keystone" + +- name: Migrate keystone passwords + import_tasks: migrate_keystone_passwords.yml + +- name: Migrate keystone IDs + import_tasks: migrate_keystone_ids.yml diff --git a/playbookconfig/src/playbooks/roles/rehome-subcloud/update-keystone-data/tasks/migrate_keystone_ids.yml b/playbookconfig/src/playbooks/roles/rehome-subcloud/update-keystone-data/tasks/migrate_keystone_ids.yml new file mode 100644 index 000000000..9a07b4634 --- /dev/null +++ b/playbookconfig/src/playbooks/roles/rehome-subcloud/update-keystone-data/tasks/migrate_keystone_ids.yml @@ -0,0 +1,68 @@ +--- +# +# Copyright (c) 2021 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +# SUB-TASKS DESCRIPTION: +# These tasks are to migrate the keystone IDs in keystone DB and hieradata. +# + +- name: Migrate keystone user IDs + script: migrate_keystone_ids.py {{ item.name }} {{ item.id }} 'user' + with_items: + - { name: 'admin', id: "{{ system_controller_keystone_admin_user_id }}" } + - { name: 'sysinv', id: "{{ system_controller_keystone_sysinv_user_id }}" } + - { name: 'dcmanager', id: "{{ system_controller_keystone_dcmanager_user_id }}" } + become_user: postgres + no_log: true + +- name: Migrate keystone project IDs + script: migrate_keystone_ids.py {{ item.name }} {{ item.id }} 'project' + with_items: + - { name: 'admin', id: "{{ system_controller_keystone_admin_project_id }}" } + - { name: 'services', id: "{{ system_controller_keystone_services_project_id }}" } + become_user: postgres + no_log: true + + # The values updated will be applied as puppet manifest after unlock +- name: Write required system controller keystone user and project Ids to static hieradata + lineinfile: + path: "{{ puppet_path }}/{{ software_version }}/hieradata/static.yaml" + regexp: "{{ item.From }}" + line: "{{ item.To }}" + with_items: + - { From: "^keystone::dc_admin_user_id", + To: "keystone::dc_admin_user_id: {{ system_controller_keystone_admin_user_id }}" } + - { From: "^keystone::dc_admin_project_id", + To: "keystone::dc_admin_project_id: {{ system_controller_keystone_admin_project_id }}" } + - { From: "^openstack::keystone::bootstrap::dc_services_project_id", + To: "openstack::keystone::bootstrap::dc_services_project_id: + {{ system_controller_keystone_services_project_id }}" } + - { From: "^platform::sysinv::bootstrap::dc_sysinv_user_id", + To: "platform::sysinv::bootstrap::dc_sysinv_user_id: + {{ system_controller_keystone_sysinv_user_id }}" } + - { From: "^platform::dcmanager::bootstrap::dc_dcmanager_user_id", + To: "platform::dcmanager::bootstrap::dc_dcmanager_user_id: + {{ system_controller_keystone_dcmanager_user_id }}" } + no_log: true + +- name: Reload related services + systemd: + name: "{{ item }}" + state: restarted + with_items: + - sm-api + - fminit + - fm-api + - sysinv-agent + - sw-patch-controller + +- name: Restart sm managed services + command: "sm-restart service {{ item }}" + with_items: + - lighttpd + - keystone + - sysinv-conductor + - fm-mgr + - barbican-api diff --git a/playbookconfig/src/playbooks/roles/rehome-subcloud/update-keystone-data/tasks/migrate_keystone_passwords.yml b/playbookconfig/src/playbooks/roles/rehome-subcloud/update-keystone-data/tasks/migrate_keystone_passwords.yml new file mode 100644 index 000000000..81b07d4eb --- /dev/null +++ b/playbookconfig/src/playbooks/roles/rehome-subcloud/update-keystone-data/tasks/migrate_keystone_passwords.yml @@ -0,0 +1,148 @@ +--- +# +# Copyright (c) 2021 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +# SUB-TASKS DESCRIPTION: +# These tasks update keystone passwords in keystone database, secure hieradata, +# relevant service config files as well as service passwords in keyring. +# + + +- name: Get current time before update password + # TODO(yuxing) The 'openstack user set' may fail to update password in + # keystone database. Further, if we move it in a shell script and invoke the + # script remotely, the ansible will fail to access the remote keystone + # endpoint for authentication. Need to remove this workaround if we can + # address either of these two problems. + shell: START_TIME=$(date +%s); echo $START_TIME + register: current_time_result + +- name: Update keystone passwords + # There's special characters in password, wrap the passwords with single quotes + shell: >- + source /etc/platform/openrc; + openstack user set {{ item.name }} --password $'{{ item.password }}'; + {{ validate_keystone_passwords_script }} {{ item.name }} {{ current_time_result.stdout }} + with_items: + - { name: 'sysinv', password: "{{ users['sysinv'] }}" } + - { name: 'patching', password: "{{ users['patching'] }}" } + - { name: 'smapi', password: "{{ users['smapi'] }}" } + - { name: 'mtce', password: "{{ users['mtce'] }}" } + - { name: 'dcmanager', password: "{{ users['dcmanager'] }}" } + - { name: 'barbican', password: "{{ users['barbican'] }}" } + register: migrate_keystone_password_result + until: migrate_keystone_password_result.rc == 0 + retries: 3 + delay: 20 + no_log: true + +- name: Update services' passwords in hieradata + lineinfile: + path: "/opt/platform/puppet/{{ software_version }}/hieradata/secure_static.yaml" + regexp: "{{ item.From }}" + line: "{{ item.To }}" + with_items: + - { From: "^dcmanager::api::keystone_password", + To: "dcmanager::api::keystone_password: !!python/unicode '{{ users['dcmanager'] }}'" } + - { From: "^dcmanager::keystone::auth::password", + To: "dcmanager::keystone::auth::password: !!python/unicode '{{ users['dcmanager'] }}'" } + - { From: "^dcorch::api_proxy::dcmanager_keystone_password", + To: "dcorch::api_proxy::dcmanager_keystone_password: !!python/unicode '{{ users['dcmanager'] }}'" } + - { From: "^patching::api::keystone_password", + To: "patching::api::keystone_password: !!python/unicode '{{ users['patching'] }}'" } + - { From: "^patching::keystone::auth::password", + To: "patching::keystone::auth::password: !!python/unicode '{{ users['patching'] }}'" } + - { From: "^patching::keystone::authtoken::password", + To: "patching::keystone::authtoken::password: !!python/unicode '{{ users['patching'] }}'" } + - { From: "^platform::mtce::params::auth_pw", + To: "platform::mtce::params::auth_pw: !!python/unicode '{{ users['mtce'] }}'" } + - { From: "^platform::smapi::params::keystone_password", + To: "platform::smapi::params::keystone_password: !!python/unicode '{{ users['smapi'] }}'" } + - { From: "^smapi::auth::auth_password", + To: "smapi::auth::auth_password: !!python/unicode '{{ users['smapi'] }}'" } + - { From: "^smapi::keystone::auth::password", + To: "smapi::keystone::auth::password: !!python/unicode '{{ users['smapi'] }}'" } + - { From: "^smapi::keystone::authtoken::password", + To: "smapi::keystone::authtoken::password: !!python/unicode '{{ users['smapi'] }}'" } + - { From: "^sysinv::api::keystone_password", + To: "sysinv::api::keystone_password: !!python/unicode '{{ users['sysinv'] }}'" } + - { From: "^sysinv::certmon::local_keystone_password", + To: "sysinv::certmon::local_keystone_password: !!python/unicode '{{ users['sysinv'] }}'" } + - { From: "^sysinv::keystone::auth::password", + To: "sysinv::keystone::auth::password: !!python/unicode '{{ users['sysinv'] }}'" } + - { From: "^barbican::keystone::auth::password", + To: "barbican::keystone::auth::password: !!python/unicode '{{ users['barbican'] }}'" } + - { From: "^barbican::keystone::authtoken::password", + To: "barbican::keystone::authtoken::password: !!python/unicode '{{ users['barbican'] }}'" } + no_log: true + +- name: Update sysinv config + lineinfile: + path: "{{ item }}" + regexp: "^password=" + line: "password={{ users['sysinv'] }}" + with_items: + - /etc/sysinv/sysinv.conf + - /etc/sysinv/cert-mon.conf + - /etc/sysinv/api-paste.ini + no_log: true + +- name: Update patching config + lineinfile: + path: "/etc/patching/patching.conf" + regexp: "^password=" + line: "password={{ users['patching'] }}" + no_log: true + +- name: Update barbican config + lineinfile: + path: "/etc/barbican/barbican.conf" + regexp: "^password=" + line: "password={{ users['barbican'] }}" + no_log: true + +- name: Temporarily allow write permission for sm-api config + file: + path: "/etc/sm-api/sm-api.conf" + mode: 0640 + +- name: Update smapi config + lineinfile: + path: "/etc/sm-api/sm-api.conf" + regexp: "^password=" + line: "password={{ users['smapi'] }}" + no_log: true + +- name: Restore the original permission of sm-api config + file: + path: "/etc/sm-api/sm-api.conf" + mode: 0400 + +- name: Update mtc config + lineinfile: + path: "/etc/mtc.ini" + regexp: "^keystone_auth_pw" + line: "keystone_auth_pw = {{ users['mtce'] }} ; mtce auth password" + no_log: true + +- name: Store service passwords in keyring + vars: + script_content: | + import keyring + import os + os.environ['XDG_DATA_HOME'] = "/opt/platform/.keyring/{{ software_version }}" + keyring.set_password("{{ item.username }}", "services", "{{ item.password }}") + del os.environ['XDG_DATA_HOME'] + shell: "{{ script_content }}" + with_items: + - { username: 'sysinv', password: "{{ users['sysinv'] }}" } + - { username: 'patching', password: "{{ users['patching'] }}" } + - { username: 'mtce', password: "{{ users['mtce'] }}" } + - { username: 'smapi', password: "{{ users['smapi'] }}" } + - { username: 'dcmanager', password: "{{ users['dcmanager'] }}" } + - { username: 'barbican', password: "{{ users['barbican'] }}" } + args: + executable: /usr/bin/python + no_log: true diff --git a/playbookconfig/src/playbooks/roles/rehome-subcloud/update-keystone-data/vars/main.yml b/playbookconfig/src/playbooks/roles/rehome-subcloud/update-keystone-data/vars/main.yml new file mode 100644 index 000000000..bc4998a68 --- /dev/null +++ b/playbookconfig/src/playbooks/roles/rehome-subcloud/update-keystone-data/vars/main.yml @@ -0,0 +1,11 @@ +--- +# +# Copyright (c) 2021 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +platform_path: /opt/platform +puppet_path: /opt/platform/puppet +validate_keystone_passwords_script: + "{{ role_path }}/files/validate_keystone_passwords.sh" diff --git a/playbookconfig/src/playbooks/roles/rehome-subcloud/update-network-config/tasks/copy_central_registry_cert.yml b/playbookconfig/src/playbooks/roles/rehome-subcloud/update-network-config/tasks/copy_central_registry_cert.yml new file mode 100644 index 000000000..2fc9ee414 --- /dev/null +++ b/playbookconfig/src/playbooks/roles/rehome-subcloud/update-network-config/tasks/copy_central_registry_cert.yml @@ -0,0 +1,22 @@ +--- +# +# Copyright (c) 2021 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +# SUB-TASKS DESCRIPTION: +# These tasks are to copy the central registry's cert to subcloud +# + +- name: Copy the central cloud registry certificate from central cloud to subcloud + copy: + src: "{{ docker_cert_dir }}/registry.local:9001/registry-cert.crt" + dest: "{{ docker_cert_dir }}/registry.central:9001/registry-cert.crt" + mode: preserve + +- name: Store the central certificate to the shared filesystem + copy: + src: "{{ docker_cert_dir }}/registry.central:9001/registry-cert.crt" + dest: "{{ config_permdir }}/registry.central/registry-cert.crt" + remote_src: yes + mode: preserve diff --git a/playbookconfig/src/playbooks/roles/rehome-subcloud/update-network-config/tasks/main.yml b/playbookconfig/src/playbooks/roles/rehome-subcloud/update-network-config/tasks/main.yml new file mode 100644 index 000000000..6e28e9930 --- /dev/null +++ b/playbookconfig/src/playbooks/roles/rehome-subcloud/update-network-config/tasks/main.yml @@ -0,0 +1,58 @@ +--- +# +# Copyright (c) 2021 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +# ROLE DESCRIPTION: +# This role updates the network configuration for rehoming on subcloud +# + +- name: Check existing host route in controller-0 + # Checking if the static route to the new system controller subnet + # is already configured. If it is already configured, e.g. recovery + # from a disaster that the system controllers are reinstalled, the + # static routes should not be added again. + shell: >- + source /etc/platform/openrc; system host-route-list controller-0 | + awk '/{{ (system_controller_subnet | ipaddr(0)).split('/')[0] }}/' + register: existing_route_controller_0 + +- name: Add static route from controller-0 to system controller + shell: >- + source /etc/platform/openrc; system host-route-add controller-0 + {{ controller_0_management_interface_result.stdout_lines[0] }} + {{ (system_controller_subnet | ipaddr(0)).split('/')[0] }} + {{ system_controller_subnet | ipaddr('prefix') }} + {{ management_gateway_address }} + when: existing_route_controller_0.stdout | length == 0 + +- block: + - name: Check existing host route in controller-1 + shell: >- + source /etc/platform/openrc; system host-route-list controller-1 | + awk '/{{ (system_controller_subnet | ipaddr(0)).split('/')[0] }}/' + register: existing_route_controller_1 + + - name: Add static route from controller-1 to system controller + shell: >- + source /etc/platform/openrc; system host-route-add controller-1 + {{ controller_1_management_interface_result.stdout_lines[0] }} + {{ (system_controller_subnet | ipaddr(0)).split('/')[0] }} + {{ system_controller_subnet | ipaddr('prefix') }} + {{ management_gateway_address }} + when: existing_route_controller_1.stdout | length == 0 + + when: system_mode != 'simplex' + +- name: Update system controller's subnets + import_tasks: update_system_controller_subnet.yml + +- name: Modify docker no_proxy if exists + shell: >- + source /etc/platform/openrc; system service-parameter-modify + docker proxy no_proxy={{ docker_no_proxy_combined | join(',') }} + when: existing_docker_no_proxy_result.stdout | length > 0 + +- name: Copy central registy cert + import_tasks: copy_central_registry_cert.yml diff --git a/playbookconfig/src/playbooks/roles/rehome-subcloud/update-network-config/tasks/update_system_controller_subnet.yml b/playbookconfig/src/playbooks/roles/rehome-subcloud/update-network-config/tasks/update_system_controller_subnet.yml new file mode 100644 index 000000000..9c91f96c7 --- /dev/null +++ b/playbookconfig/src/playbooks/roles/rehome-subcloud/update-network-config/tasks/update_system_controller_subnet.yml @@ -0,0 +1,62 @@ +--- +# +# Copyright (c) 2021 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +# SUB-TASKS DESCRIPTION: +# These tasks are to update system controller's subnets +# + +- name: Get previous system controller OAM network uuid + shell: >- + source /etc/platform/openrc; + system addrpool-list | awk '/system-controller-oam-subnet/{ print$2 }' + register: system_controller_oam_subnet_uuid + +- name: Delete previous system controller OAM subnet + shell: >- + source /etc/platform/openrc; system addrpool-delete + {{ system_controller_oam_subnet_uuid.stdout }} + when: system_controller_oam_subnet_uuid.stdout | length > 0 + +- name: Add system controller OAM subnet + shell: >- + source /etc/platform/openrc; system addrpool-add system-controller-oam-subnet + {{ (system_controller_oam_subnet | ipaddr(0)).split('/')[0] }} + {{ system_controller_oam_subnet | ipaddr('prefix') }} + --floating-address {{ system_controller_oam_floating_address }} | awk '/uuid/{ print$4 }' + register: add_system_controller_oam_subnet_uuid + +- name: Add new system-controller OAM network + shell: >- + source /etc/platform/openrc; + system network-add system-controller-oam system-controller-oam false + {{ add_system_controller_oam_subnet_uuid.stdout }} + +- name: Get previous system controller network uuid + shell: >- + source /etc/platform/openrc; + system addrpool-list | awk '/system-controller-subnet/{ print$2 }' + register: system_controller_subnet_uuid + +- name: Delete previous system controller subnet + shell: >- + source /etc/platform/openrc; system addrpool-delete + {{ system_controller_subnet_uuid.stdout }} + when: system_controller_subnet_uuid.stdout | length > 0 + +- name: Add system controller subnet + shell: >- + source /etc/platform/openrc; + system addrpool-add system-controller-subnet + {{ (system_controller_subnet | ipaddr(0)).split('/')[0] }} + {{ system_controller_subnet | ipaddr('prefix') }} + --floating-address {{ system_controller_floating_address }} | awk '/uuid/{ print$4 }' + register: add_system_controller_subnet_uuid + +- name: Add new system-controller network + shell: >- + source /etc/platform/openrc; + system network-add system-controller system-controller false + {{ add_system_controller_subnet_uuid.stdout }} diff --git a/playbookconfig/src/playbooks/roles/rehome-subcloud/update-network-config/vars/main.yml b/playbookconfig/src/playbooks/roles/rehome-subcloud/update-network-config/vars/main.yml new file mode 100644 index 000000000..c8a9e6813 --- /dev/null +++ b/playbookconfig/src/playbooks/roles/rehome-subcloud/update-network-config/vars/main.yml @@ -0,0 +1,8 @@ +--- +# +# Copyright (c) 2021 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +docker_cert_dir: /etc/docker/certs.d diff --git a/playbookconfig/src/playbooks/roles/rehome-subcloud/update-sc-cert/tasks/main.yml b/playbookconfig/src/playbooks/roles/rehome-subcloud/update-sc-cert/tasks/main.yml new file mode 100644 index 000000000..e1bae1770 --- /dev/null +++ b/playbookconfig/src/playbooks/roles/rehome-subcloud/update-sc-cert/tasks/main.yml @@ -0,0 +1,45 @@ +--- +# +# Copyright (c) 2021 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +# ROLE DESCRIPTION: +# This role updates the certificates for https enabled admin endpoints on a subcloud +# + +- name: Get sc_adminep_ca_cert namespaces + shell: >- + kubectl --kubeconfig=/etc/kubernetes/admin.conf get namespaces | + awk '/{{ sc_adminep_ca_cert_ns }}/{print$1}' + register: get_sc_adminep_ca_cert_ns + +- name: Remove sc-cert namespace if exists + command: >- + kubectl --kubeconfig=/etc/kubernetes/admin.conf delete ns + "{{ sc_adminep_ca_cert_ns }}" + when: get_sc_adminep_ca_cert_ns + +- name: Set up subcloud admin endpoints certificates + import_role: + name: common/setup-subcloud-adminep-certs + vars: + ansible_become: yes + +- name: Create dc_root_ca runtime class to pass to puppet + copy: + dest: "/tmp/dc_root_ca.yml" + content: | + classes: + - platform::config::dc_root_ca::runtime + - platform::haproxy::restart::runtime + +- name: Applying puppet runtime manifest + command: > + /usr/local/bin/puppet-manifest-apply.sh + {{ puppet_permdir }}/hieradata + {{ derived_network_params.controller_0_address }} + controller runtime /tmp/dc_root_ca.yml + register: dc_root_ca_apply_result + environment: + LC_ALL: "en_US.UTF-8" diff --git a/playbookconfig/src/playbooks/roles/rehome-subcloud/update-sc-cert/vars/main.yml b/playbookconfig/src/playbooks/roles/rehome-subcloud/update-sc-cert/vars/main.yml new file mode 100644 index 000000000..c85257b18 --- /dev/null +++ b/playbookconfig/src/playbooks/roles/rehome-subcloud/update-sc-cert/vars/main.yml @@ -0,0 +1,8 @@ +--- +# +# Copyright (c) 2021 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +sc_adminep_ca_cert_ns: sc-cert diff --git a/playbookconfig/src/playbooks/roles/restore-dc-vault/prepare-env/tasks/main.yml b/playbookconfig/src/playbooks/roles/restore-dc-vault/prepare-env/tasks/main.yml new file mode 100644 index 000000000..6fee7a998 --- /dev/null +++ b/playbookconfig/src/playbooks/roles/restore-dc-vault/prepare-env/tasks/main.yml @@ -0,0 +1,67 @@ +--- +# +# Copyright (c) 2021 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +# ROLE DESCRIPTION: +# This role verifies if dc-vault restore should proceed +# +- name: Perform generic user input validation for restore + import_role: + name: backup-restore/validate-input + +- name: Perform generic B&R environment validation + import_role: + name: backup-restore/prepare-env + +- name: Check if restoring dc vault is already in progress + stat: + path: "{{ restore_dc_vault_in_progress_flag }}" + register: restore_dc_vault_in_progress_flag_file + +- name: Fail if restoring dc vault is already in progress + fail: + msg: "Restoring dc-vault is already in progress!" + when: restore_dc_vault_in_progress_flag_file.stat.exists + +- name: Check if the system is a DC controller + command: >- + grep -i "distributed_cloud_role\s*=\s*systemcontroller" + {{ platform_conf_path }}/platform.conf + register: check_dc_controller + failed_when: false + +- name: Fail if controller is not DC controller + fail: + msg: "Controller must be a DC systemcontroller in order to restore the dc-vault" + when: check_dc_controller.rc != 0 + +- name: Check if controller is unlocked + shell: source /etc/platform/openrc ; system host-show $(cat /etc/hostname) --format value --column administrative + register: check_unlocked + +- name: Fail if controller is not unlocked + fail: + msg: "Controller must be in an unlocked state before restoring dc-vault" + when: '"unlocked" not in (check_unlocked.stdout | lower)' + +# Set the restore staging directory to scratch so it's a consistent +# behavior regardless of where the restore playbook is executed (locally vs remotely) +- name: Set restore staging directory to /scratch + set_fact: + target_backup_dir: /scratch + +- name: Transfer backup tarball to target if the file is off-box + include_role: + name: backup-restore/transfer-file + when: on_box_data|bool == false + +- name: Copy the backup tarball to {{ target_backup_dir }} if the file is already on-box + copy: + src: "{{ initial_backup_dir }}/{{ backup_filename }}" + dest: "{{ target_backup_dir }}" + remote_src: yes + when: on_box_data|bool == true + become: yes + become_user: root diff --git a/playbookconfig/src/playbooks/roles/restore-dc-vault/restore-dc-vault-directory/tasks/main.yml b/playbookconfig/src/playbooks/roles/restore-dc-vault/restore-dc-vault-directory/tasks/main.yml new file mode 100644 index 000000000..0c4f069a0 --- /dev/null +++ b/playbookconfig/src/playbooks/roles/restore-dc-vault/restore-dc-vault-directory/tasks/main.yml @@ -0,0 +1,54 @@ +--- +# +# Copyright (c) 2021 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +# ROLE DESCRIPTION: +# This role exists to restore the dc-vault directory +# It will run only if the node is an unlocked system controller +# +- block: + - name: Set dc-vault backup file path + set_fact: + dc_vault_backup_file: "{{ target_backup_dir }}/{{ backup_filename }}" + + - name: Check if dc-vault backup file exists + stat: + path: "{{ dc_vault_backup_file }}" + register: dc_vault_backup_file_result + + - name: Fail if dc-vault backup is missing + fail: + msg: "dc-vault backup file is missing" + when: not dc_vault_backup_file_result.stat.exists + + - name: 'Create {{ restore_dc_vault_in_progress_flag }} flag file' + file: + path: "{{ restore_dc_vault_in_progress_flag }}" + state: touch + + - name: Set dc-vault path in the archive + set_fact: + archive_dc_vault_permdir: "{{ dc_vault_permdir | regex_replace('^\\/', '') }}" + + - name: Look for dc-vault filesystem + shell: "tar -tf {{ dc_vault_backup_file }} | grep '{{ dc_vault_permdir|basename }}'" + args: + warn: false + failed_when: false + register: search_result + + - name: Restore dc-vault filesystem + command: >- + tar -C / --overwrite -xpf {{ dc_vault_backup_file }} + {{ archive_dc_vault_permdir }} + args: + warn: false + when: search_result.rc == 0 + + always: + - name: 'Remove the {{ restore_dc_vault_in_progress_flag }} file' + file: + path: "{{ restore_dc_vault_in_progress_flag }}" + state: absent diff --git a/playbookconfig/src/playbooks/roles/restore-dc-vault/restore-dc-vault-directory/vars/main.yml b/playbookconfig/src/playbooks/roles/restore-dc-vault/restore-dc-vault-directory/vars/main.yml new file mode 100644 index 000000000..652aa06d0 --- /dev/null +++ b/playbookconfig/src/playbooks/roles/restore-dc-vault/restore-dc-vault-directory/vars/main.yml @@ -0,0 +1,9 @@ +--- +# +# Copyright (c) 2021 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +platform_conf_path: /etc/platform +restore_dc_vault_in_progress_flag: "{{ platform_conf_path }}/.restore_dc_vault_in_progress_flag" +dc_vault_permdir: /opt/dc-vault diff --git a/playbookconfig/src/playbooks/roles/restore-openstack/prepare-env/tasks/main.yml b/playbookconfig/src/playbooks/roles/restore-openstack/prepare-env/tasks/main.yml index 4bc8ada83..ca3fdb6fc 100644 --- a/playbookconfig/src/playbooks/roles/restore-openstack/prepare-env/tasks/main.yml +++ b/playbookconfig/src/playbooks/roles/restore-openstack/prepare-env/tasks/main.yml @@ -1,21 +1,19 @@ --- # -# Copyright (c) 2019 Wind River Systems, Inc. +# Copyright (c) 2019-2021 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # # ROLE DESCRIPTION: # This role verifies if platform restore should proceed # -- name: Fail if backup_filename is not defined or set - fail: - msg: "Mandatory configuration parameter backup_filename is not defined or set." - when: backup_filename is not defined or backup_filename is none +- name: Perform generic user input validation for restore + import_role: + name: backup-restore/validate-input -- name: Fail if initial_backup_dir is not defined or set - fail: - msg: "Mandatory configuration parameter initial_backup_dir is not defined or set." - when: initial_backup_dir is not defined or initial_backup_dir is none +- name: Perform generic B&R environment validation + import_role: + name: backup-restore/prepare-env - name: Check if restoring StarlingX Openstack is already in progress stat: @@ -70,12 +68,22 @@ msg: Application platform-integ-apps was not applied successfully during controller-0 unlock when: check_platform_integ.stdout not in ['applied', 'completed'] -- name: For remote play set target_backup_dir to {{ backup_dir }} +# Set the restore staging directory to scratch so it's a consistent +# behavior regardless of where the restore playbook is executed (locally vs remotely) +- name: Set restore staging directory to /scratch set_fact: - target_backup_dir: "{{ backup_dir }}" - when: inventory_hostname != "localhost" + target_backup_dir: /scratch -- name: For local play set target_backup_dir to initial_backup_dir - set_fact: - target_backup_dir: "{{ initial_backup_dir }}" - when: inventory_hostname == "localhost" +- name: Transfer backup tarball to target if the file is off-box + include_role: + name: backup-restore/transfer-file + when: on_box_data|bool == false + +- name: Copy the backup tarball to {{ target_backup_dir }} if the file is already on-box + copy: + src: "{{ initial_backup_dir }}/{{ backup_filename }}" + dest: "{{ target_backup_dir }}" + remote_src: yes + when: on_box_data|bool == true + become: yes + become_user: root diff --git a/playbookconfig/src/playbooks/roles/restore-platform/prepare-env/tasks/main.yml b/playbookconfig/src/playbooks/roles/restore-platform/prepare-env/tasks/main.yml index ae5483c1b..a26e50990 100644 --- a/playbookconfig/src/playbooks/roles/restore-platform/prepare-env/tasks/main.yml +++ b/playbookconfig/src/playbooks/roles/restore-platform/prepare-env/tasks/main.yml @@ -1,26 +1,34 @@ --- # -# Copyright (c) 2019 Wind River Systems, Inc. +# Copyright (c) 2019-2021 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # # ROLE DESCRIPTION: -# This role performs following tasks: -# 1. Retrieve the override file from the backup tarball -# required for the controller bootstrap. -# 2. Verify if platform restore should proceed +# This role performs the following tasks: +# 1. Validate user input. +# 2. Verify that the target is in the right state for platform restore. +# 3. Transfer the backup tarball to the target if it is off-box, otherwise +# copy it to the designated staging directory (/scratch). +# 4. Extract the host override file from the tarball and load bootstrap +# parameters in memory. +# 5. Create restore_in_progress flag. # +# Note that due to Ansible mishandling of boolean values via extra-vars we are +# adding supplementary validation here +# See: https://github.com/ansible/ansible/issues/17193 +- name: Check for Ceph data wipe flag + fail: + msg: "wipe_ceph_osds is misconfigured. Valid value is either 'true' or 'false'" + when: (not wipe_ceph_osds | type_debug == 'bool') and + (wipe_ceph_osds != 'true') and + (wipe_ceph_osds != 'false') + +- name: Perform generic user input validation for restore + import_role: + name: backup-restore/validate-input + - block: - - name: Fail if backup_filename is not defined or set - fail: - msg: "Mandatory configuration parameter backup_filename is not defined or set." - when: backup_filename is not defined or backup_filename is none - - - name: Fail if initial_backup_dir is not defined or set - fail: - msg: "Mandatory configuration parameter initial_backup_dir is not defined or set." - when: initial_backup_dir is not defined or initial_backup_dir is none - - name: Look for override backup file in the backup tarball shell: "tar -tf {{ initial_backup_dir }}/{{ backup_filename }} | grep '_override_backup.yml'" args: @@ -28,57 +36,13 @@ failed_when: false register: search_result - # Note that due to Ansible mishandling of boolean values via extra-vars we are - # adding supplementary validation here. - # See: https://github.com/ansible/ansible/issues/17193 - - name: Check for Ceph data wipe flag - fail: - msg: "wipe_ceph_osds is misconfigured. Valid value is either 'true' or 'false'" - when: (not wipe_ceph_osds | type_debug == 'bool') and - (wipe_ceph_osds != 'true') and - (wipe_ceph_osds != 'false') - - - block: - - name: Extract override file from backup tarball - shell: >- - tar -C {{ override_files_dir }} -xf {{ initial_backup_dir }}/{{ backup_filename }} --transform='s,.*/,,' - {{ search_result.stdout_lines[0] }} - args: - warn: false - - - name: Prepare to rename override file - set_fact: - override_filename: restore_platform_overrides.yml - - - name: Rename override file for bootstrap - command: >- - mv -f {{ override_files_dir }}/{{ (search_result.stdout_lines[0] | basename) }} - {{ override_files_dir }}/{{ override_filename }} - - - name: Include override data - include_vars: "{{ override_files_dir }}/{{ override_filename }}" - - when: search_result.rc == 0 - - name: Fail if override file is missing fail: - msg: >- + msg: > Cannot find {{ initial_backup_dir }}/{{ backup_filename }} or the override file is missing in the backup tarball! when: search_result.rc != 0 - - delegate_to: localhost - -- name: Set archive parameters for bootstrap - set_fact: - archive_puppet_permdir: "opt/platform/puppet/{{ software_version }}/hieradata" - archive_ssh_config_permdir: "opt/platform/config/{{ software_version }}/ssh_config" - archive_config_permdir: "opt/platform/config/{{ software_version }}" - archive_keyring_permdir: "opt/platform/.keyring/{{ software_version }}/python_keyring" - archive_branding_permdir: "opt/platform/config/{{ software_version }}/branding" - archive_banner_permdir: "opt/platform/config/{{ software_version }}/banner/etc" - archive_deploy_permdir: "opt/platform/deploy/{{ software_version }}" - archive_helm_permdir: "opt/platform/helm/{{ software_version }}" + delegate_to: "{{ inspection_target }}" - block: # Bail if the host has been unlocked @@ -102,24 +66,104 @@ msg: " Restore is already in progress!" when: restore_in_progress.stat.exists - - name: Create {{ restore_in_progress_flag }} flag file - file: - path: "{{ restore_in_progress_flag }}" - state: touch - - - name: For remote play set target_backup_dir to /scratch + # Set the restore staging directory to scratch so it's a consistent + # behavior regardless of where the restore playbook is executed (locally vs remotely) + - name: Set restore staging directory to /scratch set_fact: target_backup_dir: /scratch - when: inventory_hostname != "localhost" - - - name: For local play set target_backup_dir to initial_backup_dir - set_fact: - target_backup_dir: "{{ initial_backup_dir }}" - when: inventory_hostname == "localhost" - name: Set restore file parameter set_fact: restore_data_file: "{{ target_backup_dir }}/{{ backup_filename }}" + - name: Transfer backup tarball to target if the file is off-box + include_role: + name: backup-restore/transfer-file + when: on_box_data|bool == false + + - name: Copy the backup tarball to {{ target_backup_dir }} if the file is already on-box + copy: + src: "{{ initial_backup_dir }}/{{ backup_filename }}" + dest: "{{ target_backup_dir }}" + remote_src: yes + when: on_box_data|bool == true + + - name: Extract override file from backup tarball + command: > + tar -C {{ target_backup_dir }} -xf {{ target_backup_dir }}/{{ backup_filename }} --transform='s,.*/,,' + {{ search_result.stdout_lines[0] }} + register: extract_result + failed_when: false + args: + warn: false + + - name: Fail if host override file cannot be extracted from the backup tar file + fail: + msg: > + Failed to extract the host override file {{ search_result.stdout_lines[0] }}. + If the backup tar file is not on the host that is being restored, please either + transfer the tar file to the target or set on_box_data parameter to false and + try again. + when: extract_result.rc != 0 + + - name: Prepare to rename override file + set_fact: + override_filename: restore_platform_overrides.yml + + - name: Rename override file for bootstrap + command: > + mv -f {{ target_backup_dir }}/{{ (search_result.stdout_lines[0] | basename) }} + {{ target_backup_dir }}/{{ override_filename }} + + - name: Load override data (local execution) + include_vars: "{{ target_backup_dir }}/{{ override_filename }}" + when: inventory_hostname == "localhost" + + - block: + - name: Create a temporary file + tempfile: + state: file + register: host_override_tmp_file + + - name: Fetch override file to temp file to Ansible control host + fetch: + src: "{{ target_backup_dir }}/{{ override_filename }}" + dest: "{{ host_override_tmp_file.path }}" + flat: yes + + - name: Load override data (remote execution) + include_vars: "{{ host_override_tmp_file.path }}" + + - name: Remove override temp file on target + file: + path: "{{ host_override_tmp_file.path }}" + state: absent + delegate_to: "{{ inventory_hostname }}" + + - name: Remove override temp file on Ansible control host + file: + path: "{{ host_override_tmp_file.path }}" + state: absent + delegate_to: localhost + # In case the ansible user does not have sudo privilege on Ansible control machine + become: no + when: inventory_hostname != "localhost" + + - name: Create {{ restore_in_progress_flag }} flag file + file: + path: "{{ restore_in_progress_flag }}" + state: touch + become: yes become_user: root + +- name: Set archive parameters for bootstrap + set_fact: + archive_puppet_permdir: "opt/platform/puppet/{{ software_version }}/hieradata" + archive_ssh_config_permdir: "opt/platform/config/{{ software_version }}/ssh_config" + archive_config_permdir: "opt/platform/config/{{ software_version }}" + archive_keyring_permdir: "opt/platform/.keyring/{{ software_version }}/python_keyring" + archive_branding_permdir: "opt/platform/config/{{ software_version }}/branding" + archive_banner_permdir: "opt/platform/config/{{ software_version }}/banner/etc" + archive_deploy_permdir: "opt/platform/deploy/{{ software_version }}" + archive_helm_permdir: "opt/platform/helm/{{ software_version }}" diff --git a/playbookconfig/src/playbooks/roles/restore-platform/restore-more-data/tasks/main.yml b/playbookconfig/src/playbooks/roles/restore-platform/restore-more-data/tasks/main.yml index 63b940979..442de550e 100644 --- a/playbookconfig/src/playbooks/roles/restore-platform/restore-more-data/tasks/main.yml +++ b/playbookconfig/src/playbooks/roles/restore-platform/restore-more-data/tasks/main.yml @@ -11,7 +11,6 @@ - name: Set parameters for archive paths set_fact: - archive_dc_vault_permdir: "{{ dc_vault_permdir | regex_replace('^\\/', '') }}" archive_platform_conf_path: "{{ platform_conf_path | regex_replace('^\\/', '') }}" archive_ceph_backend_flag: "{{ ceph_backend_flag | regex_replace('^\\/', '') }}" @@ -82,28 +81,10 @@ when: migrate_platform_data is not defined or not migrate_platform_data -# For subcloud, the DC root CA certificate and admin endpoint certificate need to be -# restored from backup into /opt/platform/config directory so that -# /etc/init.d/controller_configure will install them when controllers unlock. +# For subcloud, the DC root CA certificate needs to be restored from backup +# into /opt/platform/config directory and it will be installed to controllers +# at the time when controllers are unlocked. - block: - # Restore admin endpoint certificate for DC if it exists in backup - - name: Check if admin endpoint certficate exists in backup config permdir (opt/platform/config) - command: >- - tar -tf {{ restore_data_file }} '{{ archive_config_permdir }}/admin-ep-cert.pem' - register: check_admin_cert - failed_when: false - args: - warn: false - - - name: Restore admin endpoint certificate into config permdir (/opt/platform/config/...) - command: >- - tar -C {{ config_permdir }} -xpf {{ restore_data_file}} - --overwrite --transform='s,.*/,,' '{{ archive_config_permdir }}/admin-ep-cert.pem' - args: - warn: false - when: check_admin_cert.rc is defined and - check_admin_cert.rc == 0 - # Restore admin endpoint root CA certificate for DC if it exists in backup - name: Check if admin endpoint root CA certficate exists in backup config permdir (opt/platform/config) command: >- @@ -294,21 +275,6 @@ when: ceph_backend.rc == 0 -- name: Look for dc-vault filesystem - shell: "tar -tf {{ restore_data_file }} | grep '{{ dc_vault_permdir|basename }}'" - args: - warn: false - failed_when: false - register: search_result - -- name: Restore dc-vault filesystem - command: >- - tar -C / --overwrite -xpf {{ restore_data_file }} - {{ archive_dc_vault_permdir }} - args: - warn: false - when: search_result.rc == 0 - - name: Look for deploy files shell: "tar -tf {{ restore_data_file }} | grep {{ archive_deploy_permdir }}" @@ -388,6 +354,7 @@ - "{{ staging_dir }}/postgres/keystone.postgreSql.data" - "{{ staging_dir }}/postgres/fm.postgreSql.data" - "{{ staging_dir }}/postgres/barbican.postgreSql.data" + - "{{ staging_dir }}/postgres/helmv2.postgreSql.data" - name: Restore postgres db for DC systemcontroller shell: "psql -f {{ item }} {{ (item|basename).split('.')[0] }}" @@ -493,74 +460,6 @@ guide for next step. when: check_online.stdout != "online" - # Resize drbd-backed partitions - - block: - - name: Get DRBD-synced partitions - shell: source /etc/platform/openrc; system controllerfs-list --format yaml - register: controllerfs_partitions_output - - # First extend the logical volumes - - name: Resize logical volumes (except database, dc-vault) - command: "lvextend -L{{ item.size }}G /dev/cgts-vg/{{ item.logical_volume }}" - failed_when: false - with_items: "{{ controllerfs_partitions_output.stdout | from_yaml }}" - when: item.name != "database" and item.name != "{{ dc_vault_permdir|basename }}" - register: lvextend_output - - # In cases where we try to resize an LV to the size it already is, lvextend - # will throw an error, but it's not an issue so we just ignore that error. - - name: Fail if resize of logical volumes fail - fail: - msg: "{{ item.item }} failed for the following reason: {{ item.stderr }}" - when: item.rc is defined and item.rc != 0 and - item.stderr is not search('matches existing size') - with_items: "{{ lvextend_output.results }}" - - # The database LV is twice the size that is stored in sysinv (in order to support - # upgrades), so we resize it seperately. - - name: Resize database logical volume - command: "lvextend -L{{ (item.size*2) }}G /dev/cgts-vg/{{ item.logical_volume }}" - failed_when: false - with_items: "{{ controllerfs_partitions_output.stdout | from_yaml }}" - when: item.name == "database" - register: lvextend_database_output - - - name: Fail if resize of database logical volume fails - fail: - msg: "{{ item.item }} failed for the following reason: {{ item.stderr }}" - when: item.rc is defined and item.rc != 0 and - item.stderr is not search('matches existing size') - with_items: "{{ lvextend_database_output.results }}" - - - name: Resize DRBD resources - command: "drbdadm -- --assume-peer-has-space resize all" - - # Resize the filesystem on top of DRBD resources. - # The information about which /dev/drbd corresponds to each LV is hard-coded - # in puppet and is not available in sysinv, so we provide a static list of devices - # here as well. - # Keep this list in sync with the device names specified in the stx-puppet repo at: - # puppet-manifests/src/modules/platform/manifests/drbd.pp - # NOTE: Only devices present in the "system controllerfs-list" command output - # need to be kept in sync. Filesystem that we don't allow resizing for - # (for example rabbitmq) or those that don't use the controllerfs - # command (for example cephmon) don't need to be kept in sync. - - name: Resize DRBD filesystems - command: "resize2fs {{ item }}" - register: resize2fs_output - failed_when: false - with_items: - - /dev/drbd0 # postgres - - /dev/drbd2 # platform - - /dev/drbd5 # extension - - /dev/drbd7 # etcd - - /dev/drbd8 # docker-distribution - - - name: Fail if resize of DRBD filesystems fail - fail: - msg: "{{ item.item }} failed for the following reason: {{ item.stderr }}" - when: item.rc != 0 and item.stderr is not search('Nothing to do!') - with_items: "{{ resize2fs_output.results }}" # Restore ceph-mon data if ceph backend is configured - block: - block: @@ -622,7 +521,7 @@ when: check_online.stdout == "online" # Remove temporary staging area used by the copy module - - name: Remove {{ ansible_remote_tmp }} directory + - name: Remove temporary directory used to stage restore data file: path: "{{ ansible_remote_tmp }}" state: absent diff --git a/playbookconfig/src/playbooks/roles/restore-platform/restore-sw-patches/tasks/main.yml b/playbookconfig/src/playbooks/roles/restore-platform/restore-sw-patches/tasks/main.yml index fb6b8f08c..62f30f305 100644 --- a/playbookconfig/src/playbooks/roles/restore-platform/restore-sw-patches/tasks/main.yml +++ b/playbookconfig/src/playbooks/roles/restore-platform/restore-sw-patches/tasks/main.yml @@ -1,6 +1,6 @@ --- # -# Copyright (c) 2019 Wind River Systems, Inc. +# Copyright (c) 2019-2021 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -10,6 +10,10 @@ # - install patches if the system was patched # - reboot the controller if it is required by the patching # +- name: Restore patches unless specified + set_fact: + skip_patches_restore: "{{ skip_patches_restore | default(false) }}" + - block: - name: Set fact for patching staging dir set_fact: @@ -230,5 +234,6 @@ re-run the playbook to restore the platform after reboot is completed. when: required_reboot + when: skip_patches_restore|bool == false become: yes become_user: root diff --git a/playbookconfig/src/playbooks/roles/restore-user-images/prepare-env/tasks/main.yml b/playbookconfig/src/playbooks/roles/restore-user-images/prepare-env/tasks/main.yml index 2bd2f4fb7..26c699bd1 100644 --- a/playbookconfig/src/playbooks/roles/restore-user-images/prepare-env/tasks/main.yml +++ b/playbookconfig/src/playbooks/roles/restore-user-images/prepare-env/tasks/main.yml @@ -1,21 +1,21 @@ --- # -# Copyright (c) 2020 Wind River Systems, Inc. +# Copyright (c) 2020-2021 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # # ROLE DESCRIPTION: -# This role verifies if user images restore should proceed -# -- name: Fail if backup_filename is not defined or set - fail: - msg: "Mandatory configuration parameter backup_filename is not defined or set." - when: backup_filename is not defined or backup_filename is none +# This role performs the following tasks: +# 1. Validate user input. +# 2. Verify that the target is in the right state for images restore. +# 3. Transfer the backup tarball to the target if it is off-box, otherwise +# copy it to the designated staging directory (/scratch). +# 5. Create restore_in_progress flag. -- name: Fail if initial_backup_dir is not defined or set - fail: - msg: "Mandatory configuration parameter initial_backup_dir is not defined or set." - when: initial_backup_dir is not defined or initial_backup_dir is none +- name: Perform generic user input validation for restore + import_role: + name: backup-restore/validate-input + become: no - name: Check if restoring user images is already in progress stat: @@ -27,17 +27,32 @@ msg: "Restoring user images is already in progress!" when: restore_user_images_in_progress.stat.exists -- name: Create {{ restore_user_images_in_progress_flag }} flag file - file: - path: "{{ restore_user_images_in_progress_flag }}" - state: touch - -- name: For remote play set target_backup_dir to {{ backup_dir }} +# Set the restore staging directory to scratch so it's a consistent +# behavior regardless of where the restore playbook is executed (locally vs remotely) +- name: Set restore staging directory to /scratch set_fact: - target_backup_dir: "{{ backup_dir }}" - when: inventory_hostname != "localhost" + target_backup_dir: /scratch -- name: For local play set target_backup_dir to initial_backup_dir - set_fact: - target_backup_dir: "{{ initial_backup_dir }}" - when: inventory_hostname == "localhost" +- name: Perform generic B&R environment validation + import_role: + name: backup-restore/prepare-env + +- name: Transfer backup tarball to target if the file is off-box + include_role: + name: backup-restore/transfer-file + when: on_box_data|bool == false + +- block: + - name: Copy the backup tarball to {{ target_backup_dir }} if the file is already on-box + copy: + src: "{{ initial_backup_dir }}/{{ backup_filename }}" + dest: "{{ target_backup_dir }}" + remote_src: yes + when: on_box_data|bool == true + + - name: Create {{ restore_user_images_in_progress_flag }} flag file + file: + path: "{{ restore_user_images_in_progress_flag }}" + state: touch + become: yes + become_user: root diff --git a/playbookconfig/src/playbooks/upgrade-k8s-armada-helm.yml b/playbookconfig/src/playbooks/upgrade-k8s-armada-helm.yml new file mode 100644 index 000000000..2f00a0994 --- /dev/null +++ b/playbookconfig/src/playbooks/upgrade-k8s-armada-helm.yml @@ -0,0 +1,40 @@ +--- +# +# Copyright (c) 2020 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +- hosts: all + + vars: + mode: "upgrade_k8s_armada_helm" + overrides_file: "/tmp/upgrade-armada-overrides.yaml" + playbooks_root: "/usr/share/ansible/stx-ansible/playbooks" + + tasks: + - name: Create the upgrade overrides file + command: > + /usr/bin/sysinv-utils --debug --config-file /etc/sysinv/sysinv.conf create-host-overrides "{{ overrides_file }}" + become: yes + become_user: sysinv + environment: + HOME: /home/sysadmin + + - name: Read the overrides variables + include_vars: + file: "{{ overrides_file }}" + + - name: Set controller floating address + set_fact: + controller_floating_address: "{{ management_floating_address }}" + + - name: Set helm repo facts + set_fact: + helm_repo_name_apps: "starlingx" + helm_repo_name_platform: "stx-platform" + helm_repo_port: 8080 + + - name: Upgrade to containerized Armada using Helm v3 + import_role: + name: common/armada-helm diff --git a/playbookconfig/src/playbooks/upgrade-k8s-networking.yml b/playbookconfig/src/playbooks/upgrade-k8s-networking.yml index c966f04e7..0d6ec3e13 100644 --- a/playbookconfig/src/playbooks/upgrade-k8s-networking.yml +++ b/playbookconfig/src/playbooks/upgrade-k8s-networking.yml @@ -67,3 +67,11 @@ - name: Update SRIOV device plugin command: "kubectl --kubeconfig=/etc/kubernetes/admin.conf apply -f /etc/kubernetes/update_sriovdp-daemonset.yaml" + + - name: Create Coredns config file + template: + src: "roles/bootstrap/bringup-essential-services/templates/coredns.yaml.j2" + dest: /etc/kubernetes/update_coredns.yaml + + - name: Update Coredns config map + command: "kubectl --kubeconfig=/etc/kubernetes/admin.conf apply -f /etc/kubernetes/update_coredns.yaml" diff --git a/playbookconfig/src/playbooks/validate_host.yml b/playbookconfig/src/playbooks/validate_host.yml new file mode 100644 index 000000000..ed26e058e --- /dev/null +++ b/playbookconfig/src/playbooks/validate_host.yml @@ -0,0 +1,31 @@ +--- +# +# Copyright (c) 2021 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +- hosts: all + # If gathering facts is really necessary, run setup task AFTER host connectivity + # check block in prepare-env role. + gather_facts: no + + vars_files: + - vars/common/main.yml + + pre_tasks: + - set_fact: + override_files_dir: "{{ lookup('env', 'HOME') }}" + check_load: "{{ check_load | default(true) }}" + check_bootstrap_address: "{{ check_bootstrap_address | default(true) }}" + check_patches: "{{ check_patches | default(true) }}" + password_change: "{{ password_change | default(true) }}" + password_change_responses: + yes/no: 'yes' + sysadmin*: 'sysadmin' + \(current\) UNIX password: 'sysadmin' + (?i)New password: "{{ ansible_ssh_pass }}" + (?i)Retype new password: "{{ ansible_ssh_pass }}" + + roles: + - common/prepare-env + - common/validate-target diff --git a/playbookconfig/src/playbooks/vars/common/main.yml b/playbookconfig/src/playbooks/vars/common/main.yml index fb9adb813..8c8e8bf47 100644 --- a/playbookconfig/src/playbooks/vars/common/main.yml +++ b/playbookconfig/src/playbooks/vars/common/main.yml @@ -1,3 +1,3 @@ --- supported_release_versions: - - "20.12" + - "21.05"