Merge "etcd: Add support for more scenarios"

2023-11-29 11:13:18 +00:00 · 2023-11-29 11:13:18 +00:00 · e971d0c795
commit e971d0c795
parent 65c2196975 ed3b27cc92
18 changed files with 471 additions and 19 deletions
--- a/ansible/roles/etcd/defaults/main.yml
+++ b/ansible/roles/etcd/defaults/main.yml
@ -5,15 +5,18 @@ etcd_services:
    group: etcd
    enabled: true
    environment:
+      # KOLLA_BOOTSTRAP_STATUS is used to indicate whether the container should
+      # be recreated. Otherwise the kolla_container task doesn't detect that the
+      # environment has changed if variables are removed.
+      KOLLA_BOOTSTRAP_STATUS: "bootstrap completed"
+      ETCDCTL_API: "3"
+      ETCDCTL_ENDPOINTS: "{{ etcd_client_internal_endpoint }}"
+      ETCDCTL_WRITE_OUT: "json"
      ETCD_DATA_DIR: "/var/lib/etcd"
      ETCD_NAME: "{{ ansible_facts.hostname }}"
      ETCD_ADVERTISE_CLIENT_URLS: "{{ etcd_client_internal_endpoint }}"
      ETCD_LISTEN_CLIENT_URLS: "{{ etcd_client_internal_endpoint }}"
-      ETCD_INITIAL_ADVERTISE_PEER_URLS: "{{ etcd_peer_internal_endpoint }}"
      ETCD_LISTEN_PEER_URLS: "{{ etcd_peer_internal_endpoint }}"
-      ETCD_INITIAL_CLUSTER_TOKEN: "{{ etcd_cluster_token }}"
-      ETCD_INITIAL_CLUSTER: "{% for host in groups['etcd'] %}{{ hostvars[host].ansible_facts.hostname }}={{ etcd_protocol }}://{{ 'api' | kolla_address(host) | put_address_in_context('url') }}:{{ etcd_peer_port }}{% if not loop.last %},{% endif %}{% endfor %}"
-      ETCD_INITIAL_CLUSTER_STATE: "new"
      ETCD_OUT_FILE: "/var/log/kolla/etcd/etcd.log"
      KOLLA_CONFIG_STRATEGY: "{{ config_strategy }}"
      ETCD_CERT_FILE: "{% if etcd_enable_tls | bool %}/etc/etcd/certs/etcd-cert.pem{% endif %}"
@ -52,3 +55,8 @@ etcd_extra_volumes: "{{ default_extra_volumes }}"
 ############
 etcd_client_internal_endpoint: "{{ etcd_protocol }}://{{ api_interface_address | put_address_in_context('url') }}:{{ etcd_client_port }}"
 etcd_peer_internal_endpoint: "{{ etcd_protocol }}://{{ api_interface_address | put_address_in_context('url') }}:{{ etcd_peer_port }}"
+
+###################
+# Managing members
+###################
+etcd_remove_deleted_members: "no"
--- a/ansible/roles/etcd/handlers/main.yml
+++ b/ansible/roles/etcd/handlers/main.yml
@ -1,16 +1,59 @@
 ---
- name: Restart etcd container
-  vars:
-    service_name: "etcd"
-    service: "{{ etcd_services[service_name] }}"
-  become: true
-  kolla_container:
-    action: "recreate_or_restart_container"
-    common_options: "{{ docker_common_options }}"
-    name: "{{ service.container_name }}"
-    image: "{{ service.image }}"
-    environment: "{{ service.environment }}"
-    volumes: "{{ service.volumes }}"
-    dimensions: "{{ service.dimensions }}"
+- name: Bootstrap etcd on new cluster
+  include_tasks: 'bootstrap_cluster.yml'
  when:
    - kolla_action != "config"
+  listen:
+    - Bootstrap etcd cluster
+
+- name: Look up the cluster leader
+  include_tasks: 'lookup_leader.yml'
+  when:
+    - kolla_action != "config"
+  listen:
+    - Restart etcd container
+    - Bootstrap etcd services
+    - Bootstrap etcd cluster
+    - Check for deleted members
+
+- name: Bootstrap etcd on new services
+  include_tasks: 'bootstrap_services.yml'
+  when:
+    - groups.etcd_had_volume_False is defined
+    - inventory_hostname in groups.etcd_had_volume_False
+    - kolla_action != "config"
+  listen:
+    - Bootstrap etcd services
+
+- name: Rolling restart of etcd non-leaders
+  include_tasks: 'restart_services.yml'
+  when:
+    - inventory_hostname not in (groups.etcd_is_leader_True | default([]))
+    - groups.etcd.index(inventory_hostname) % 4 == item
+    - kolla_action != "config"
+  listen:
+    - Restart etcd container
+    - Bootstrap etcd services
+    - Bootstrap etcd cluster
+  loop:
+    - 0
+    - 1
+    - 2
+    - 3
+
+- name: Restart etcd leader
+  include_tasks: 'restart_services.yml'
+  when:
+    - inventory_hostname in (groups.etcd_is_leader_True | default([]))
+    - kolla_action != "config"
+  listen:
+    - Restart etcd container
+    - Bootstrap etcd services
+    - Bootstrap etcd cluster
+
+- name: Remove deleted members
+  include_tasks: 'remove_deleted_members.yml'
+  when:
+    - kolla_action != "config"
+  listen:
+    - Check for deleted members
--- a/ansible/roles/etcd/tasks/bootstrap.yml
+++ b/ansible/roles/etcd/tasks/bootstrap.yml
@ -0,0 +1,25 @@
+---
+- import_tasks: lookup_cluster.yml
+
+# NOTE(jan.gutter): The following two tasks set facts that aren't really used.
+# They serve the purpose to trigger the handlers for bootstrapping:
+# If no etcd data volumes exist, bootstrap a new initial cluster.
+# If some volumes exist, add the new nodes to an existing cluster.
+
+- name: Determine whether a new cluster needs bootstrapping
+  set_fact:
+    etcd_bootstrap_cluster: "{% for host in groups['etcd'] %}{{ hostvars[host].ansible_facts.hostname }}={{ etcd_protocol }}://{{ 'api' | kolla_address(host) | put_address_in_context('url') }}:{{ etcd_peer_port }}{% if not loop.last %},{% endif %}{% endfor %}"
+  when: not (etcd_cluster_exists | bool)
+  changed_when: not (etcd_cluster_exists | bool)
+  notify: Bootstrap etcd cluster
+
+- name: Determine when new services need bootstrapping
+  set_fact:
+    etcd_bootstrap_services: "{% for host in groups['etcd_had_volume_False'] %}{{ hostvars[host].ansible_facts.hostname }}={{ etcd_protocol }}://{{ 'api' | kolla_address(host) | put_address_in_context('url') }}:{{ etcd_peer_port }}{% if not loop.last %},{% endif %}{% endfor %}"
+  when:
+    - etcd_cluster_exists | bool
+    - groups.etcd_had_volume_False is defined
+  changed_when:
+    - etcd_cluster_exists | bool
+    - groups.etcd_had_volume_False is defined
+  notify: Bootstrap etcd services
--- a/ansible/roles/etcd/tasks/bootstrap_cluster.yml
+++ b/ansible/roles/etcd/tasks/bootstrap_cluster.yml
@ -0,0 +1,60 @@
+---
+- name: Bootstrapping etcd cluster
+  vars:
+    service_name: "etcd"
+    service: "{{ etcd_services[service_name] }}"
+  become: true
+  kolla_container:
+    action: "start_container"
+    common_options: "{{ docker_common_options }}"
+    environment:
+      KOLLA_BOOTSTRAP_STATUS: "bootstrap cluster"
+      ETCD_INITIAL_CLUSTER_STATE: "new"
+      ETCD_INITIAL_ADVERTISE_PEER_URLS: "{{ etcd_peer_internal_endpoint }}"
+      ETCD_INITIAL_CLUSTER_TOKEN: "{{ etcd_cluster_token }}"
+      ETCD_INITIAL_CLUSTER: "{% for host in groups['etcd'] %}{{ hostvars[host].ansible_facts.hostname }}={{ etcd_protocol }}://{{ 'api' | kolla_address(host) | put_address_in_context('url') }}:{{ etcd_peer_port }}{% if not loop.last %},{% endif %}{% endfor %}"
+      ETCDCTL_API: "3"
+      ETCDCTL_ENDPOINTS: "{{ etcd_client_internal_endpoint }}"
+      ETCDCTL_WRITE_OUT: "json"
+      ETCD_DATA_DIR: "/var/lib/etcd"
+      ETCD_NAME: "{{ ansible_facts.hostname }}"
+      ETCD_ADVERTISE_CLIENT_URLS: "{{ etcd_client_internal_endpoint }}"
+      ETCD_LISTEN_CLIENT_URLS: "{{ etcd_client_internal_endpoint }}"
+      ETCD_LISTEN_PEER_URLS: "{{ etcd_peer_internal_endpoint }}"
+      ETCD_OUT_FILE: "/var/log/kolla/etcd/etcd.log"
+      KOLLA_CONFIG_STRATEGY: "{{ config_strategy }}"
+      ETCD_CERT_FILE: "{% if etcd_enable_tls | bool %}/etc/etcd/certs/etcd-cert.pem{% endif %}"
+      ETCD_KEY_FILE: "{% if etcd_enable_tls | bool %}/etc/etcd/certs/etcd-key.pem{% endif %}"
+      ETCD_PEER_CERT_FILE: "{% if etcd_enable_tls | bool %}/etc/etcd/certs/etcd-cert.pem{% endif %}"
+      ETCD_PEER_KEY_FILE: "{% if etcd_enable_tls | bool %}/etc/etcd/certs/etcd-key.pem{% endif %}"
+    image: "{{ service.image }}"
+    name: "{{ service.container_name }}"
+    volumes: "{{ service.volumes }}"
+    dimensions: "{{ service.dimensions }}"
+
+- name: Wait for etcd service port liveness
+  wait_for:
+    host: "{{ api_interface_address }}"
+    port: "{{ etcd_client_port }}"
+    connect_timeout: 1
+    timeout: 60
+  register: check_etcd_port
+  until: check_etcd_port is success
+  retries: 10
+  delay: 6
+
+- name: Wait for etcd endpoints to be healthy
+  become: true
+  vars:
+    service_name: "etcd"
+    service: "{{ etcd_services[service_name] }}"
+  command: >-
+    {{ kolla_container_engine }} exec {{ service.container_name }}
+    etcdctl endpoint health
+  changed_when: false
+  register: result
+  until:
+    - result is success
+    - ((result.stdout | from_json | first)['health'] | default(False) | bool)
+  retries: 10
+  delay: 6
--- a/ansible/roles/etcd/tasks/bootstrap_services.yml
+++ b/ansible/roles/etcd/tasks/bootstrap_services.yml
@ -0,0 +1,55 @@
+---
+- name: Add new member to etcd cluster
+  vars:
+    service_name: "etcd"
+    service: "{{ etcd_services[service_name] }}"
+  become: true
+  command: >-
+    {{ kolla_container_engine }} exec {{ service.container_name }}
+    etcdctl member add {{ ansible_facts.hostname }}
+    --peer-urls={{ etcd_protocol }}://{{ 'api' | kolla_address(inventory_hostname) | put_address_in_context('url') }}:{{ etcd_peer_port }}
+  delegate_to: "{{ etcd_cluster_leader | default(groups[service.group][0]) }}"
+
+- name: Bootstrapping etcd containers
+  vars:
+    service_name: "etcd"
+    service: "{{ etcd_services[service_name] }}"
+  become: true
+  kolla_container:
+    action: "start_container"
+    common_options: "{{ docker_common_options }}"
+    environment:
+      KOLLA_BOOTSTRAP_STATUS: "bootstrap service"
+      ETCD_INITIAL_CLUSTER_STATE: "existing"
+      ETCD_INITIAL_ADVERTISE_PEER_URLS: "{{ etcd_peer_internal_endpoint }}"
+      ETCD_INITIAL_CLUSTER_TOKEN: "{{ etcd_cluster_token }}"
+      ETCD_INITIAL_CLUSTER: "{% for host in groups['etcd_had_volume_True'] %}{{ hostvars[host].ansible_facts.hostname }}={{ etcd_protocol }}://{{ 'api' | kolla_address(host) | put_address_in_context('url') }}:{{ etcd_peer_port }},{% endfor %}{{ ansible_facts.hostname }}={{ etcd_protocol }}://{{ 'api' | kolla_address(inventory_hostname) | put_address_in_context('url') }}:{{ etcd_peer_port }}"
+      ETCDCTL_API: "3"
+      ETCDCTL_ENDPOINTS: "{{ etcd_client_internal_endpoint }}"
+      ETCDCTL_WRITE_OUT: "json"
+      ETCD_DATA_DIR: "/var/lib/etcd"
+      ETCD_NAME: "{{ ansible_facts.hostname }}"
+      ETCD_ADVERTISE_CLIENT_URLS: "{{ etcd_client_internal_endpoint }}"
+      ETCD_LISTEN_CLIENT_URLS: "{{ etcd_client_internal_endpoint }}"
+      ETCD_LISTEN_PEER_URLS: "{{ etcd_peer_internal_endpoint }}"
+      ETCD_OUT_FILE: "/var/log/kolla/etcd/etcd.log"
+      KOLLA_CONFIG_STRATEGY: "{{ config_strategy }}"
+      ETCD_CERT_FILE: "{% if etcd_enable_tls | bool %}/etc/etcd/certs/etcd-cert.pem{% endif %}"
+      ETCD_KEY_FILE: "{% if etcd_enable_tls | bool %}/etc/etcd/certs/etcd-key.pem{% endif %}"
+      ETCD_PEER_CERT_FILE: "{% if etcd_enable_tls | bool %}/etc/etcd/certs/etcd-cert.pem{% endif %}"
+      ETCD_PEER_KEY_FILE: "{% if etcd_enable_tls | bool %}/etc/etcd/certs/etcd-key.pem{% endif %}"
+    image: "{{ service.image }}"
+    name: "{{ service.container_name }}"
+    volumes: "{{ service.volumes }}"
+    dimensions: "{{ service.dimensions }}"
+
+- name: Wait for etcd service port liveness
+  wait_for:
+    host: "{{ api_interface_address }}"
+    port: "{{ etcd_client_port }}"
+    connect_timeout: 1
+    timeout: 60
+  register: check_etcd_client_port
+  until: check_etcd_client_port is success
+  retries: 10
+  delay: 6
--- a/ansible/roles/etcd/tasks/deploy.yml
+++ b/ansible/roles/etcd/tasks/deploy.yml
@ -3,5 +3,7 @@

 - import_tasks: check-containers.yml

+- import_tasks: bootstrap.yml
+
 - name: Flush handlers
  meta: flush_handlers
--- a/ansible/roles/etcd/tasks/lookup_cluster.yml
+++ b/ansible/roles/etcd/tasks/lookup_cluster.yml
@ -0,0 +1,26 @@
+---
+- name: Ensure etcd volume
+  become: true
+  kolla_container:
+    action: "create_volume"
+    common_options: "{{ docker_common_options }}"
+    name: "kolla_etcd"
+  register: etcd_volume
+
+# NOTE(jan.gutter): If the play is interrupted before properly bootstrapping,
+# we will incorrectly assume that an etcd cluster exists. This likely requires
+# manual intervention to unwedge. If a volume exists we must assume there's
+# data on it.
+
+- name: Divide hosts by their etcd volume availability
+  group_by:
+    key: etcd_had_volume_{{ etcd_volume is not changed }}
+  changed_when: false
+
+- name: Establish whether the cluster has already existed
+  set_fact:
+    etcd_cluster_exists: "{{ groups.etcd_had_volume_True is defined }}"
+  changed_when:
+    - etcd_remove_deleted_members | bool
+    - groups.etcd_had_volume_True is defined
+  notify: Check for deleted members
--- a/ansible/roles/etcd/tasks/lookup_leader.yml
+++ b/ansible/roles/etcd/tasks/lookup_leader.yml
@ -0,0 +1,41 @@
+---
+# NOTE(jan.gutter): These tasks assume a cluster is running
+- name: Check for the etcd leader
+  vars:
+    service_name: "etcd"
+    service: "{{ etcd_services[service_name] }}"
+  become: true
+  # NOTE(jan.gutter): We need to set the ETCD environment vars here to
+  # handle an upgrade scenario from older etcd containers. These can be
+  # removed once the new workflow has been in place for a cycle or two.
+  command: >-
+    {{ kolla_container_engine }} exec
+    -e ETCDCTL_API=3
+    -e ETCDCTL_ENDPOINTS="{{ etcd_client_internal_endpoint }}"
+    -e ETCDCTL_WRITE_OUT="json"
+    {{ service.container_name }}
+    etcdctl endpoint status
+  changed_when: false
+  when:
+    - inventory_hostname in (groups.etcd_had_volume_True | default([]))
+  register: etcd_endpoint_status_result
+
+- name: Divide hosts by their etcd leader status
+  vars:
+    etcd_endpoint_status: >-
+      {{ etcd_endpoint_status_result.stdout | default('[]') | from_json }}
+    etcd_member_id: >-
+      {{ etcd_endpoint_status[0]['Status']['header']['member_id']
+      | default('') }}
+    etcd_leader_id: >-
+      {{ etcd_endpoint_status[0]['Status']['leader']
+      | default('none') }}
+  group_by:
+    key: etcd_is_leader_{{ etcd_member_id == etcd_leader_id }}
+  changed_when: false
+
+- name: Set the etcd cluster leader
+  set_fact:
+    etcd_cluster_leader: "{{ groups.etcd_is_leader_True | sort | first }}"
+  when: groups.etcd_is_leader_True is defined
+  changed_when: false
--- a/ansible/roles/etcd/tasks/remove_deleted_members.yml
+++ b/ansible/roles/etcd/tasks/remove_deleted_members.yml
@ -0,0 +1,39 @@
+---
+- name: List the etcd members
+  vars:
+    service_name: "etcd"
+    service: "{{ etcd_services[service_name] }}"
+  become: true
+  command: >-
+    {{ kolla_container_engine }} exec {{ service.container_name }}
+    etcdctl member list
+  changed_when: false
+  run_once: true
+  delegate_to: "{{ etcd_cluster_leader | default(groups[service.group][0]) }}"
+  register: etcd_member_list_result
+
+- name: Remove deleted members from the etcd cluster
+  vars:
+    service_name: "etcd"
+    service: "{{ etcd_services[service_name] }}"
+    etcd_members_from_inventory: >-
+      {{ groups['etcd']
+      | map('extract', hostvars, 'ansible_facts')
+      | map(attribute='hostname')
+      | list }}
+    etcd_deleted_members: >-
+      {{ etcd_member_list_result.stdout | from_json
+      | json_query('members[].name')
+      | difference(etcd_members_from_inventory) }}
+    etcd_member_id: >-
+      {{ etcd_member_list_result.stdout | from_json
+      | json_query('members[].{key: name, value: ID}') | items2dict }}
+  become: true
+  command: >-
+    {{ kolla_container_engine }} exec {{ service.container_name }}
+    etcdctl member remove {{ '%x' % etcd_member_id[etcd_deleted_member] }}
+  run_once: true
+  delegate_to: "{{ etcd_cluster_leader | default(groups[service.group][0]) }}"
+  loop: "{{ etcd_deleted_members }}"
+  loop_control:
+    loop_var: etcd_deleted_member
--- a/ansible/roles/etcd/tasks/restart_services.yml
+++ b/ansible/roles/etcd/tasks/restart_services.yml
@ -0,0 +1,25 @@
+---
+- name: Restart etcd container
+  vars:
+    service_name: "etcd"
+    service: "{{ etcd_services[service_name] }}"
+  become: true
+  kolla_container:
+    action: "recreate_or_restart_container"
+    common_options: "{{ docker_common_options }}"
+    name: "{{ service.container_name }}"
+    image: "{{ service.image }}"
+    volumes: "{{ service.volumes }}"
+    dimensions: "{{ service.dimensions }}"
+    environment: "{{ service.environment }}"
+
+- name: Wait for etcd service port liveness
+  wait_for:
+    host: "{{ api_interface_address }}"
+    port: "{{ etcd_client_port }}"
+    connect_timeout: 1
+    timeout: 60
+  register: check_etcd_client_port
+  until: check_etcd_client_port is success
+  retries: 10
+  delay: 6
--- a/ansible/site.yml
+++ b/ansible/site.yml
@ -458,7 +458,6 @@
  hosts:
    - etcd
    - '&enable_etcd_True'
-  serial: '{{ kolla_serial|default("0") }}'
  roles:
    - { role: etcd,
        tags: etcd }
--- a/doc/source/admin/etcd.rst
+++ b/doc/source/admin/etcd.rst
@ -0,0 +1,97 @@
+.. etcd:
+
+=============
+Managing etcd
+=============
+
+Kolla Ansible can manage the lifecycle of an etcd cluster and supports the
+following operations:
+
+* Bootstrapping a clean multi-node etcd cluster
+* Adding a new member to the etcd cluster
+* Optionally, automatically removing a deleted node from the etcd cluster.
+
+It is highly recommended to read the operator documentation for the version
+of etcd deployed in the cluster.
+
+.. note::
+
+   Once an etcd cluster is bootstrapped, the etcd service takes most of its
+   configuration from the etcd database itself.
+
+   This pattern is very different from many other Kolla Ansible services, and
+   is a source of confusion for operators unfamiliar with etcd.
+
+Cluster vs Node Bootstrapping
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Kolla Ansible distinguishes between two forms of bootstrapping in an etcd
+cluster:
+
+* Bootstrapping multiple nodes at the same time to bring up a new cluster
+* Bootstrapping a single node to add it to an existing cluster
+
+These corresponds to the `new` and `existing` parameters for
+`ETCD_INITIAL_CLUSTER_STATE` in the upstream documentation. Once an etcd node
+has completed bootstrap, the bootstrap configuration is ignored, even if it is
+changed.
+
+Kolla Ansible will decide to perform a new cluster bootstrap if it detects that
+there is no existing data on the etcd nodes. Otherwise it assumes that there is
+a healthy etcd cluster and it will add a new node to it.
+
+Forcing Bootstrapping
+~~~~~~~~~~~~~~~~~~~~~
+
+Kolla Ansible looks for the `kolla_etcd` volume on the node. If this volume
+is available, it assumes that the bootstrap process has run on the node and
+the volume contains the required config.
+
+However, if the process was interrupted (externally, or by an error), this
+volume might be misconfigured. In order to prevent dataloss, manual
+intervention is required.
+
+Before retriggering bootstrap make sure that there is no valuable data on the
+volume. This could be because the node was not in service, or that the data
+is persisted elsewhere.
+
+To retrigger a bootstrap (for either the cluster, or for a single node),
+remove the volume, from all affected nodes:
+
+``docker volume rm kolla_etcd``
+
+Rerunning Kolla Ansible will then trigger the appropriate workflow and either
+a blank cluster will be bootstrapped, or an empty member will be added to
+the existing cluster.
+
+Manual Commands
+~~~~~~~~~~~~~~~
+
+In order to manage etcd manually, the ``etcdctl`` command can be used inside
+the `etcd` container. This command has been set up with the appropriate
+environment variables for integrating with automation.
+
+``etcdctl`` is configured with json output by default:
+
+.. code-block:: console
+
+   # list cluster members in a human-readable table
+   docker exec -it etcd etcdctl -w table member list
+
+Removing Dead Nodes
+~~~~~~~~~~~~~~~~~~~
+
+If ``globals.yml`` has the value ``etcd_remove_deleted_members: "yes"`` then
+etcd nodes that are not in the inventory will be removed from the etcd cluster.
+
+Any errors in the inventory can therefore cause unintended removal.
+
+To manually remove a dead node from the etcd cluster, use the following
+commands:
+
+.. code-block:: console
+
+   # list cluster members and identify dead member
+   docker exec -it etcd etcdctl -w table member list
+   # remove dead member
+   docker exec -it etcd etcdctl member remove MEMBER_ID_IN_HEX
--- a/doc/source/admin/index.rst
+++ b/doc/source/admin/index.rst
@ -9,5 +9,6 @@ Admin Guides
   tls
   acme
   mariadb-backup-and-restore
+   etcd
   production-architecture-guide
   deployment-philosophy
--- a/doc/source/user/adding-and-removing-hosts.rst
+++ b/doc/source/user/adding-and-removing-hosts.rst
@ -173,6 +173,14 @@ For each host, clean up its services:

 .. _removing-existing-compute-nodes:

+If the node is also running the `etcd` service, set
+``etcd_remove_deleted_members: "yes"`` in `globals.yml` to automatically
+remove nodes from the `etcd` cluster that have been removed from the inventory.
+
+Alternatively the `etcd` members can be removed manually with `etcdctl`. For
+more details, please consult the `runtime reconfiguration` documentation
+section for the version of etcd in operation.
+
 Removing existing compute nodes
 -------------------------------

--- a/etc/kolla/globals.yml
+++ b/etc/kolla/globals.yml
@ -903,3 +903,10 @@ workaround_ansible_issue_8743: yes

 # this is UDP port
 #hacluster_corosync_port: 5405
+
+##############
+# etcd options
+##############
+# If `etcd_remove_deleted_members` is enabled, Kolla Ansible will automatically
+# remove etcd members from the cluster that are no longer in the inventory.
+#etcd_remove_deleted_members: "no"
--- a/releasenotes/notes/managed-etcd-72fb2d3fbba516d9.yaml
+++ b/releasenotes/notes/managed-etcd-72fb2d3fbba516d9.yaml
@ -0,0 +1,12 @@
+---
+fixes:
+  - |
+    The `etcd` tooling has been updated to better serialize restarts when
+    applying configuration or updates. Previously minor outages might occur
+    since all services were restarted in the same task.
+  - |
+    The `etcd` tooling has been updated to handle adding and removing nodes.
+    Previously this was an undocumented manual process and required creating
+    service containers. Operators can refer to the
+    `etcd admin guide <https://docs.openstack.org/kolla-ansible/latest/admin/etcd.html>`__
+    for more details.
--- a/tests/setup_gate.sh
+++ b/tests/setup_gate.sh
@ -52,7 +52,7 @@ function prepare_images {
    fi

    if [[ $SCENARIO == "cephadm" ]]; then
-        GATE_IMAGES+=",^cinder"
+        GATE_IMAGES+=",^cinder,^etcd"
    fi

    if [[ $SCENARIO == "cells" ]]; then
--- a/tests/templates/globals-default.j2
+++ b/tests/templates/globals-default.j2
@ -77,6 +77,7 @@ openstack_tag_suffix: "{{ docker_image_tag_suffix }}"
 enable_zun: "yes"
 enable_kuryr: "yes"
 enable_etcd: "yes"
+etcd_remove_deleted_members: "yes"
 docker_configure_for_zun: "yes"
 containerd_configure_for_zun: "yes"
 enable_cinder: "yes"
@ -132,6 +133,9 @@ enable_cinder: "yes"
 glance_backend_ceph: "yes"
 cinder_backend_ceph: "yes"
 nova_backend_ceph: "yes"
+# Internal etcd
+enable_etcd: "yes"
+etcd_remove_deleted_members: "yes"

 enable_ceph_rgw: "yes"
 ceph_rgw_hosts: