Merge "Add container engine migration scenario"

This commit is contained in:
Zuul
2025-05-08 22:25:04 +00:00
committed by Gerrit Code Review
21 changed files with 574 additions and 1 deletions

View File

@ -0,0 +1,21 @@
---
- import_playbook: gather-facts.yml
vars:
kolla_action: migrate-container-engine
- name: Prepare the Openstack migration
hosts: baremetal
gather_facts: false
roles:
- role: container-engine-migration
- name: Migrate the Openstack
vars:
kolla_action: deploy
migration_flag:
import_playbook: site.yml
tags: redeploy
- name: Verify the migration
import_playbook: post-deploy.yml
tags: post

View File

@ -0,0 +1,19 @@
---
engine_data:
docker:
name: docker
volumes_dir: "{{ docker_volumes_path }}"
volumes_mode: "0701"
packages:
- docker-ce
podman:
name: podman
volumes_dir: "{{ podman_volumes_path }}"
volumes_mode: "0700"
packages:
- podman
# NOTE(mhiner): default is used to pass ansible syntax check,
# which otherwise complains that kolla_container_engine is undefined.
target_engine: "{{ kolla_container_engine | default('podman') }}"
current_engine: "{{ 'docker' if kolla_container_engine == 'podman' else 'podman' }}"

View File

@ -0,0 +1,55 @@
#!/usr/bin/env bash
set -o xtrace
set -o errexit
set -o pipefail
# default to docker if not specified
engine="${1:-docker}"
shift 1
if ! [[ "$engine" =~ ^(docker|podman)$ ]]; then
echo "Invalid container engine: ${engine}"
exit 1
fi
echo "Using container engine: $engine"
echo "Looking for containers, images and volumes to remove..."
containers_to_kill=$(sudo $engine ps --filter "label=kolla_version" --format "{{.Names}}" -a)
images_to_remove=$(sudo $engine images --filter "label=kolla_version" -q -a)
if [ -n "${containers_to_kill}" ]; then
volumes_to_remove=$(sudo $engine inspect -f '{{range .Mounts}} {{printf "%s\n" .Name }}{{end}}' ${containers_to_kill} | \
egrep -v '(^\s*$)' | sort | uniq)
echo "Stopping containers..."
for container in ${containers_to_kill}; do
sudo systemctl disable kolla-${container}-container.service
sudo systemctl stop kolla-${container}-container.service
sudo systemctl is-failed kolla-${container}-container.service && \
sudo systemctl reset-failed kolla-${container}-container.service
done
echo "Removing containers..."
(sudo $engine rm -f ${containers_to_kill} 2>&1) > /dev/null
fi
echo "Removing any remaining unit files..."
sudo rm -f /etc/systemd/system/kolla-*-container.service
sudo systemctl daemon-reload
echo "Removing images..."
if [ -n "${images_to_remove}" ]; then
(sudo $engine rmi -f ${images_to_remove} 2>&1) > /dev/null
fi
echo "Removing volumes..."
if [ -n "${volumes_to_remove}" ]; then
(sudo $engine volume rm -f ${volumes_to_remove} 2>&1) > /dev/null
fi
echo "Performing final cleanup of any remaining unused resources..."
sudo $engine system prune -a -f
echo "All cleaned up!"

View File

@ -0,0 +1,15 @@
---
- name: Gather package facts
package_facts:
when:
- "'packages' not in ansible_facts"
- name: Fail when attempting to migrate to current container engine
fail:
msg: -|
Cannot migrate to the same container engine that is currently used.
Packages from target_engine were found on the system.
If the migration failed in the stage of deploying the target engine,
you can resume it by running "kolla-ansible deploy"
when: "engine_data[target_engine].packages is subset(ansible_facts.packages)"
any_errors_fatal: true

View File

@ -0,0 +1,8 @@
---
- name: Install target container engine
include_role:
name: openstack.kolla.{{ target_engine }}
- name: Install target container engine SDK
include_role:
name: openstack.kolla.{{ target_engine }}_sdk

View File

@ -0,0 +1,8 @@
---
- import_tasks: check-migration.yml
- import_tasks: install-target-engine.yml
- import_tasks: migrate-volumes.yml
- import_tasks: uninstall-current-engine.yml

View File

@ -0,0 +1,69 @@
---
- name: Get list of all containers
become: true
kolla_container_facts:
action: get_containers_names
container_engine: "{{ current_engine }}"
register: containers
- import_tasks: ovs-cleanup.yml
when:
- "'openvswitch_vswitchd' in containers.container_names"
- neutron_plugin_agent == 'openvswitch'
- name: Stop all containers
become: true
kolla_container:
action: "stop_container"
name: "{{ item }}"
container_engine: "{{ current_engine }}"
with_items: "{{ containers.container_names }}"
- name: Create directory for target container engine volumes
become: true
file:
path: "{{ engine_data[target_engine].volumes_dir }}"
state: directory
mode: "{{ engine_data[target_engine].volumes_mode }}"
- name: Find current container engine volumes
become: true
find:
paths: "{{ engine_data[current_engine].volumes_dir }}"
file_type: directory
register: container_volumes
# NOTE(r-krcek): The following block is workaround for problem where podman
# changes directory permissions after starting a container from any UID:GID
# to root after migration.
- block:
- name: Pre-create volumes in target engine
become: true
kolla_container:
action: "create_volume"
name: "{{ item.path | basename }}"
common_options: "{{ docker_common_options }}"
with_items: "{{ container_volumes.files }}"
- name: Move volumes to target_engine volume directory
vars:
target_path: "{{ engine_data[target_engine].volumes_dir }}/{{ item.path | basename }}"
source_path: "{{ item.path }}/_data"
become: true
command: "mv -f {{ source_path }} {{ target_path }}"
with_items: "{{ container_volumes.files }}"
- name: Cleanup volumes
file:
path: "{{ item.path }}"
state: absent
with_items: "{{ container_volumes.files }}"
when: container_volumes.matched > 0
# NOTE(mhiner): this prevents RabbitMQ failing to establish connection
# to other instances in multinode deployment
- name: Remove old Mnesia tables
become: true
file:
path: "{{ engine_data[target_engine].volumes_dir }}/rabbitmq/_data/mnesia"
state: absent

View File

@ -0,0 +1,14 @@
---
- name: Remove OVS bridges from neutron
become: true
command: >
{{ current_engine }} exec -u root neutron_openvswitch_agent neutron-ovs-cleanup
--config-file /etc/neutron/neutron.conf
--config-file /etc/neutron/plugins/ml2/openvswitch_agent.ini
--ovs_all_ports
- name: Remove OVS bridges
become: true
command: >
{{ current_engine }} exec openvswitch_vswitchd
bash -c 'for br in `ovs-vsctl list-br`;do ovs-vsctl --if-exists del-br $br;done'

View File

@ -0,0 +1,20 @@
---
# TODO(r-krcek): This could be together with some other tasks
# replaced with the new destroy role when
# https://review.opendev.org/c/openstack/kolla-ansible/+/920714
# is merged one day
- name: Remove all containers and images
become: true
script: ce-cleanup.sh "{{ current_engine }}"
- name: Uninstall current container engine SDK
include_role:
name: "openstack.kolla.{{ current_engine }}_sdk"
vars:
package_action: "uninstall"
- name: Uninstall current container engine
include_role:
name: "openstack.kolla.{{ current_engine }}"
vars:
package_action: "uninstall"

View File

@ -3,4 +3,9 @@
- import_tasks: check-containers.yml
- import_tasks: recover_cluster.yml
vars:
mariadb_cluster_exists: true
when: migration_flag is defined
- import_tasks: bootstrap.yml

View File

@ -6,6 +6,7 @@
common_options: "{{ docker_common_options }}"
name: "mariadb"
register: mariadb_volume
when: migration_flag is not defined
- name: Divide hosts by their MariaDB volume availability
group_by:

View File

@ -314,3 +314,41 @@ To specify additional volumes for a single container, set
nova_libvirt_extra_volumes:
- "/etc/foo:/etc/foo"
Migrate container engine
~~~~~~~~~~~~~~~~~~~~~~~~
Kolla-Ansible supports two container engines - Docker and Podman.
It is possible to migrate deployed OpenStack between these two engines.
Migration is supported in both directions, meaning it is possible to
migrate from Docker to Podman as well as from Podman to Docker.
Before starting the migration, you have to change the value of
``kolla_container_engine`` in your ``/etc/kolla/globals.yml`` file to the new
container engine:
.. code-block:: yaml
# previous value was docker
kolla_container_engine: podman
Apart from this change, ``globals.yml`` should stay unchanged.
The same goes for any other config file, such as the inventory file.
.. warning::
Currently, rolling migration is not supported. You have to stop
all virtual machines running in your OpenStack. Otherwise,
migration will become unstable and can fail.
After editing ``globals.yml`` and stopping virtual machines
migration can be started with the following command:
.. code-block:: console
kolla-ansible migrate-container-engine
.. warning::
During the migration, all the container volumes will be migrated
under the new container engine. Old container engine system packages will be
removed from the system and all their resources and data will be deleted.

View File

@ -87,6 +87,7 @@ workaround_ansible_issue_8743: yes
# Container engine
##################
# Set desired container engine to deploy on or migrate to
# Valid options are [ docker, podman ]
#kolla_container_engine: docker

View File

@ -491,3 +491,16 @@ class Check(KollaAnsibleMixin, Command):
playbooks = _choose_playbooks(parsed_args)
self.run_playbooks(parsed_args, playbooks, extra_vars=extra_vars)
class MigrateContainerEngine(KollaAnsibleMixin, Command):
"""Migrate the container engine of the deployed OpenStack"""
def take_action(self, parsed_args):
self.app.LOG.info(
"Migrate the container engine of the deployed Openstack"
)
playbooks = _choose_playbooks(parsed_args, "migrate-container-engine")
self.run_playbooks(parsed_args, playbooks)

View File

@ -73,3 +73,4 @@ kolla_ansible.cli =
mariadb-recovery = kolla_ansible.cli.commands:MariaDBRecovery
nova-libvirt-cleanup = kolla_ansible.cli.commands:NovaLibvirtCleanup
check = kolla_ansible.cli.commands:Check
migrate-container-engine = kolla_ansible.cli.commands:MigrateContainerEngine

View File

@ -3,6 +3,13 @@
vars:
logs_dir: "/tmp/logs"
tasks:
# TODO(mhiner): Currently only Docker to Podman migration is tested.
# If we want to test the other direction we have to rework this.
- name: Change container engine after the migration
set_fact:
container_engine: "podman"
when: scenario == "container-engine-migration"
- name: Print all facts
copy:
content: "{{ hostvars[inventory_hostname] | to_nice_json }}"

View File

@ -21,7 +21,7 @@
need_build_image: "{{ kolla_build_images | default(false) }}"
build_image_tag: "change_{{ zuul.change | default('none') }}"
openstack_core_enabled: "{{ openstack_core_enabled }}"
openstack_core_tested: "{{ scenario in ['core', 'cephadm', 'zun', 'cells', 'ovn', 'lets-encrypt'] }}"
openstack_core_tested: "{{ scenario in ['core', 'cephadm', 'zun', 'cells', 'ovn', 'lets-encrypt', 'container-engine-migration'] }}"
dashboard_enabled: "{{ openstack_core_enabled }}"
upper_constraints_file: "{{ ansible_env.HOME }}/src/opendev.org/openstack/requirements/upper-constraints.txt"
docker_image_tag_suffix: "{{ '-aarch64' if ansible_architecture == 'aarch64' else '' }}"
@ -566,6 +566,15 @@
chdir: "{{ kolla_ansible_src_dir }}"
when: scenario == "skyline-sso"
- name: Run test-container-engine-migration.sh script
script:
cmd: test-container-engine-migration.sh
executable: /bin/bash
chdir: "{{ kolla_ansible_src_dir }}"
environment:
KOLLA_ANSIBLE_VENV_PATH: "{{ kolla_ansible_venv_path }}"
IP_VERSION: "{{ 6 if address_family == 'ipv6' else 4 }}"
when: scenario == "container-engine-migration"
when: scenario != "bifrost"
# NOTE(yoctozepto): each host checks itself
@ -797,6 +806,11 @@
- hosts: all
any_errors_fatal: true
tasks:
- name: Change container engine after the migration
set_fact:
container_engine: podman
when: scenario == "container-engine-migration"
- name: Post-deploy/upgrade sanity checks
block:
- name: Run check-failure.sh script

View File

@ -0,0 +1,205 @@
#!/bin/bash
set -o xtrace
set -o errexit
set -o pipefail
# Enable unbuffered output
export PYTHONUNBUFFERED=1
VM_NAME="kolla_migration_test"
FIP_ADDR=
function create_instance {
local name=$1
local server_create_extra
if [[ $IP_VERSION -eq 6 ]]; then
# NOTE(yoctozepto): CirrOS has no IPv6 metadata support, hence need to use configdrive
server_create_extra="${server_create_extra} --config-drive True"
fi
openstack server create --wait --image cirros --flavor m1.tiny --key-name mykey --network demo-net ${server_create_extra} ${name}
# If the status is not ACTIVE, print info and exit 1
if [[ $(openstack server show ${name} -f value -c status) != "ACTIVE" ]]; then
echo "FAILED: Instance is not active"
openstack --debug server show ${name}
return 1
fi
}
function start_instance {
local name=$1
local attempts
attempts=12
openstack server start ${name}
# substitution for missing --wait argument
for i in $(seq 1 ${attempts}); do
if [[ $(openstack server show ${name} -f value -c status) == "ACTIVE" ]]; then
break
elif [[ $i -eq ${attempts} ]]; then
echo "Failed to start server after ${attempts} attempts"
echo "Console log:"
openstack console log show ${name} || true
openstack --debug server show ${name}
return 1
else
echo "Server is not yet started - retrying"
fi
sleep 10
done
}
function stop_instance {
local name=$1
local attempts
attempts=12
openstack server stop ${name}
# substitution for missing --wait argument
for i in $(seq 1 ${attempts}); do
if [[ $(openstack server show ${name} -f value -c status) == "SHUTOFF" ]]; then
break
elif [[ $i -eq ${attempts} ]]; then
echo "Failed to stop server after ${attempts} attempts"
echo "Console log:"
openstack console log show ${name} || true
openstack --debug server show ${name}
return 1
else
echo "Server is not yet stopped - retrying"
fi
sleep 10
done
}
function delete_instance {
local name=$1
openstack server delete --wait ${name}
}
function create_fip {
openstack floating ip create public1 -f value -c floating_ip_address
}
function delete_fip {
local fip_addr=$1
openstack floating ip delete ${fip_addr}
}
function attach_fip {
local instance_name=$1
local fip_addr=$2
openstack server add floating ip ${instance_name} ${fip_addr}
}
function detach_fip {
local instance_name=$1
local fip_addr=$2
openstack server remove floating ip ${instance_name} ${fip_addr}
}
function test_ssh {
local instance_name=$1
local fip_addr=$2
local attempts
attempts=12
for i in $(seq 1 ${attempts}); do
if ping -c1 -W1 ${fip_addr} && ssh -v -o BatchMode=yes -o StrictHostKeyChecking=no cirros@${fip_addr} hostname; then
break
elif [[ $i -eq ${attempts} ]]; then
echo "Failed to access server via SSH after ${attempts} attempts"
echo "Console log:"
openstack console log show ${instance_name} || true
openstack --debug server show ${instance_name}
return 1
else
echo "Cannot access server - retrying"
fi
sleep 10
done
}
function test_initial_vm {
. /etc/kolla/admin-openrc.sh
. ~/openstackclient-venv/bin/activate
echo "TESTING: Initial server creation"
create_instance ${VM_NAME}
echo "SUCCESS: Initial server creation"
if [[ $IP_VERSION -eq 4 ]]; then
echo "TESTING: Floating ip allocation"
FIP_ADDR=$(create_fip)
attach_fip ${VM_NAME} ${FIP_ADDR}
echo "SUCCESS: Floating ip allocation"
else
# NOTE(yoctozepto): Neutron has no IPv6 NAT support, hence no floating ip addresses
local instance_addresses
FIP_ADDR=$(openstack server show ${VM_NAME} -f yaml -c addresses|tail -1|cut -d- -f2)
fi
echo "TESTING: PING&SSH to initial instance"
test_ssh ${VM_NAME} ${FIP_ADDR}
echo "SUCCESS: PING&SSH to initial instance"
echo "TESTING: Stopping the initial instance"
stop_instance ${VM_NAME}
echo "SUCCESS: Stopped the initial instance"
}
function test_migrated_vm {
. /etc/kolla/admin-openrc.sh
. ~/openstackclient-venv/bin/activate
echo "TESTING: Starting the migrated instance"
start_instance ${VM_NAME}
echo "SUCCESS: Started the migrated instance"
echo "TESTING: PING&SSH to migrated instance"
test_ssh ${VM_NAME} ${FIP_ADDR}
echo "SUCCESS: PING&SSH to migrated instance"
if [[ $IP_VERSION -eq 4 ]]; then
echo "TESTING: Floating ip deallocation"
detach_fip ${VM_NAME} ${FIP_ADDR}
delete_fip ${FIP_ADDR}
echo "SUCCESS: Floating ip deallocation"
fi
echo "TESTING: Server deletion"
delete_instance ${VM_NAME}
echo "SUCCESS: Server deletion"
}
function migrate_container_engine {
echo "MIGRATION: Migrating from Docker to Podman"
sed -i "s/\(kolla_container_engine:\s*\).*/\1podman/" /etc/kolla/globals.yml
RAW_INVENTORY=/etc/kolla/inventory
source ${KOLLA_ANSIBLE_VENV_PATH}/bin/activate
kolla-ansible migrate-container-engine -i ${RAW_INVENTORY} -vvv
echo "SUCCESS: Migrated from Docker to Podman"
}
function test_container_engine_migration_logged {
test_initial_vm
migrate_container_engine
test_migrated_vm
}
function test_container_engine_migration {
echo "Testing container engine migration from Docker to Podman"
test_container_engine_migration_logged > /tmp/logs/ansible/test-container-engine-migration 2>&1
result=$?
if [[ $result != 0 ]]; then
echo "Testing container engine migration failed. See ansible/test-container-engine-migration for details"
else
echo "Successfully tested container engine migration. See ansible/test-container-engine-migration for details"
fi
return $result
}
test_container_engine_migration

View File

@ -313,3 +313,14 @@
- ^tests/test-skyline-sso.sh
vars:
scenario: skyline-sso
- job:
name: kolla-ansible-container-engine-migration-base
parent: kolla-ansible-base
voting: false
files:
- ^ansible/migrate-container-engine.yml
- ^ansible/roles/container-engine-migration/
- ^tests/test-container-engine-migration.sh
vars:
scenario: container-engine-migration

View File

@ -529,3 +529,45 @@
nodeset: kolla-ansible-rocky9
vars:
base_distro: rocky
- job:
name: kolla-ansible-rocky9-container-engine-migration
parent: kolla-ansible-container-engine-migration-base
nodeset: kolla-ansible-rocky9
vars:
base_distro: rocky
- job:
name: kolla-ansible-rocky9-container-engine-migration-multinode
parent: kolla-ansible-container-engine-migration-base
nodeset: kolla-ansible-rocky9-multi
vars:
base_distro: rocky
- job:
name: kolla-ansible-ubuntu-container-engine-migration
parent: kolla-ansible-container-engine-migration-base
nodeset: kolla-ansible-noble
vars:
base_distro: ubuntu
- job:
name: kolla-ansible-ubuntu-container-engine-migration-multinode
parent: kolla-ansible-container-engine-migration-base
nodeset: kolla-ansible-noble-multi
vars:
base_distro: ubuntu
- job:
name: kolla-ansible-debian-container-engine-migration
parent: kolla-ansible-container-engine-migration-base
nodeset: kolla-ansible-debian-bookworm
vars:
base_distro: debian
- job:
name: kolla-ansible-debian-container-engine-migration-multinode
parent: kolla-ansible-container-engine-migration-base
nodeset: kolla-ansible-debian-bookworm-multi
vars:
base_distro: debian

View File

@ -83,6 +83,12 @@
- kolla-ansible-rocky9-skyline
- kolla-ansible-ubuntu-skyline-sso
- kolla-ansible-rocky9-skyline-sso
- kolla-ansible-rocky9-container-engine-migration
- kolla-ansible-rocky9-container-engine-migration-multinode
- kolla-ansible-ubuntu-container-engine-migration
- kolla-ansible-ubuntu-container-engine-migration-multinode
- kolla-ansible-debian-container-engine-migration
- kolla-ansible-debian-container-engine-migration-multinode
check-arm64:
jobs:
- kolla-ansible-debian-aarch64