From 42574b2b37b36eade05cd9259bc6061e20d263a3 Mon Sep 17 00:00:00 2001 From: "James E. Blair" Date: Thu, 16 Apr 2020 07:44:34 -0700 Subject: [PATCH] Run ZK from containers Migration plan: * add zk* to emergency * copy data files on each node to a safe place for DR backup * make a json data backup: zk-shell localhost:2181 --run-once 'mirror / json://!tmp!zookeeper-backup.json/' * manually run a modified playbook to set up the docker infra without starting containers * rolling restart; for each node: * stop zk * split data and log files and move them to new locations * remove zk packages * start zk containers * remove from emergency; land this change. Change-Id: Ic06c9cf9604402aa8eb4bb79238021c14c5d9563 --- .zuul.yaml | 37 +++++++++++++ inventory/groups.yaml | 2 - manifests/site.pp | 26 --------- modules.env | 1 - .../test-fixtures/results.yaml | 2 - playbooks/roles/zookeeper/README.rst | 1 + .../zookeeper-compose/docker-compose.yaml | 14 +++++ playbooks/roles/zookeeper/tasks/main.yaml | 55 +++++++++++++++++++ playbooks/roles/zookeeper/templates/myid.j2 | 1 + .../roles/zookeeper/templates/zoo.cfg.j2 | 28 ++++++++++ playbooks/service-zookeeper.yaml | 6 ++ testinfra/test_zookeeper.py | 26 +++++++++ 12 files changed, 168 insertions(+), 31 deletions(-) create mode 100644 playbooks/roles/zookeeper/README.rst create mode 100644 playbooks/roles/zookeeper/files/zookeeper-compose/docker-compose.yaml create mode 100644 playbooks/roles/zookeeper/tasks/main.yaml create mode 100644 playbooks/roles/zookeeper/templates/myid.j2 create mode 100644 playbooks/roles/zookeeper/templates/zoo.cfg.j2 create mode 100644 playbooks/service-zookeeper.yaml create mode 100644 testinfra/test_zookeeper.py diff --git a/.zuul.yaml b/.zuul.yaml index 211086e4c4..3a9d4e0e6c 100644 --- a/.zuul.yaml +++ b/.zuul.yaml @@ -1357,6 +1357,27 @@ - playbooks/roles/jitsi-meet/ - testinfra/test_meetpad.py +- job: + name: system-config-run-zookeeper + parent: system-config-run + description: | + Run the playbook for the zookeeper cluster. + nodeset: + nodes: + - name: bridge.openstack.org + label: ubuntu-bionic + - name: zk01.opendev.org + label: ubuntu-bionic + vars: + run_playbooks: + - playbooks/service-zookeeper.yaml + files: + - playbooks/install-ansible.yaml + - playbooks/group_vars/zookeeper.yaml + - ^playbooks/host_vars/zk\d+\..* + - playbooks/roles/zookeeper/ + - testinfra/test_zookeeper.py + - job: name: system-config-run-zuul-preview parent: system-config-run @@ -1783,6 +1804,19 @@ - playbooks/roles/install-docker/.* - playbooks/roles/zuul-preview/.* +- job: + name: infra-prod-service-zookeeper + parent: infra-prod-service-base + description: Run service-zookeeper.yaml playbook + vars: + playbook_name: service-zookeeper.yaml + files: + - inventory/.* + - playbooks/group_vars/zookeeper.yaml + - ^playbooks/host_vars/zk\d+\..* + - playbooks/roles/install-docker/ + - playbooks/roles/zookeeper/ + - job: name: infra-prod-service-review parent: infra-prod-service-base @@ -2131,6 +2165,7 @@ - name: opendev-buildset-registry - name: system-config-build-image-gerrit-2.13 soft: true + - system-config-run-zookeeper - system-config-run-zuul-preview - system-config-run-letsencrypt - system-config-build-image-jinja-init: @@ -2195,6 +2230,7 @@ - name: opendev-buildset-registry - name: system-config-upload-image-gerrit-2.13 soft: true + - system-config-run-zookeeper - system-config-run-zuul-preview - system-config-run-letsencrypt - system-config-upload-image-jinja-init: @@ -2273,6 +2309,7 @@ - infra-prod-service-static - infra-prod-service-backup - infra-prod-service-registry + - infra-prod-service-zookeeper - infra-prod-service-zuul-preview - infra-prod-service-review - infra-prod-service-review-dev diff --git a/inventory/groups.yaml b/inventory/groups.yaml index 486c400315..7faf5915d0 100644 --- a/inventory/groups.yaml +++ b/inventory/groups.yaml @@ -159,7 +159,6 @@ groups: - ze[0-9]*.open*.org - zm[0-9]*.open*.org - zuul[0-9]*.open*.org - - zk[0-9]*.open*.org puppet4: - afs[0-9]*.open*.org - afsdb[0-9]*.open*.org @@ -202,7 +201,6 @@ groups: - wiki[0-9]*.openstack.org - wiki-dev[0-9]*.openstack.org - ze[0-9]*.open*.org - - zk[0-9]*.open*.org - zm[0-9]*.open*.org - zuul01.open*.org refstack: diff --git a/manifests/site.pp b/manifests/site.pp index 91d0d31703..2d1e7c220d 100644 --- a/manifests/site.pp +++ b/manifests/site.pp @@ -405,32 +405,6 @@ node /^storyboard-dev\d+\.opendev\.org$/ { } -# Node-OS: xenial -node /^zk\d+\.open.*\.org$/ { - # We use IP addresses here so that zk listens on the public facing addresses - # allowing cluster members to talk to each other. Without this they listen - # on 127.0.1.1 because that is what we have in /etc/hosts for - # zk0X.openstack.org. - $zk_cluster_members = [ - '23.253.236.126', # zk01 - '172.99.117.32', # zk02 - '23.253.90.246', # zk03 - ] - class { 'openstack_project::server': } - - class { '::zookeeper': - # ID needs to be numeric, so we use regex to extra numbers from fqdn. - id => regsubst($::fqdn, '^zk(\d+)\.open.*\.org$', '\1'), - # The frequency in hours to look for and purge old snapshots, - # defaults to 0 (disabled). The number of retained snapshots can - # be separately controlled through snap_retain_count and - # defaults to the minimum value of 3. This will quickly fill the - # disk in production if not enabled. Works on ZK >=3.4. - purge_interval => 6, - servers => $zk_cluster_members, - } -} - # A machine to serve various project status updates. # Node-OS: xenial node /^status\d*\.open.*\.org$/ { diff --git a/modules.env b/modules.env index 19c1801907..2dff67551f 100644 --- a/modules.env +++ b/modules.env @@ -30,7 +30,6 @@ OPENSTACK_GIT_ROOT=https://opendev.org SOURCE_MODULES["https://git.drupal.org/project/puppet-drush"]="origin/1.0.x" SOURCE_MODULES["https://github.com/biemond/biemond-wildfly"]="v1.2.4" SOURCE_MODULES["https://github.com/dalen/puppet-dnsquery"]="2.0.1" -SOURCE_MODULES["https://github.com/deric/puppet-zookeeper"]="v0.5.5" SOURCE_MODULES["https://github.com/duritong/puppet-sysctl"]="v0.0.11" # initfact is a dep of biemond-wildfly SOURCE_MODULES["https://github.com/jethrocarr/puppet-initfact"]="1.0.1" diff --git a/playbooks/roles/install-ansible/files/inventory_plugins/test-fixtures/results.yaml b/playbooks/roles/install-ansible/files/inventory_plugins/test-fixtures/results.yaml index e88d4e2a53..a7d7f6968d 100644 --- a/playbooks/roles/install-ansible/files/inventory_plugins/test-fixtures/results.yaml +++ b/playbooks/roles/install-ansible/files/inventory_plugins/test-fixtures/results.yaml @@ -73,6 +73,4 @@ results: - zuul-executor zk01.openstack.org: - - puppet - - puppet4 - zookeeper diff --git a/playbooks/roles/zookeeper/README.rst b/playbooks/roles/zookeeper/README.rst new file mode 100644 index 0000000000..f7e9877497 --- /dev/null +++ b/playbooks/roles/zookeeper/README.rst @@ -0,0 +1 @@ +Install, configure, and run zookeeper servers. diff --git a/playbooks/roles/zookeeper/files/zookeeper-compose/docker-compose.yaml b/playbooks/roles/zookeeper/files/zookeeper-compose/docker-compose.yaml new file mode 100644 index 0000000000..ae6aa3a9c4 --- /dev/null +++ b/playbooks/roles/zookeeper/files/zookeeper-compose/docker-compose.yaml @@ -0,0 +1,14 @@ +# Version 2 is the latest that is supported by docker-compose in +# Ubuntu Xenial. +version: '2' + +services: + zk: + image: docker.io/library/zookeeper:3.5 + network_mode: host + user: "10001:10001" + volumes: + - "/var/zookeeper/conf/zoo.cfg:/conf/zoo.cfg" + - "/var/zookeeper/data:/data" + - "/var/zookeeper/datalog:/datalog" + - "/var/zookeeper/logs:/logs" diff --git a/playbooks/roles/zookeeper/tasks/main.yaml b/playbooks/roles/zookeeper/tasks/main.yaml new file mode 100644 index 0000000000..3d8e7a9e20 --- /dev/null +++ b/playbooks/roles/zookeeper/tasks/main.yaml @@ -0,0 +1,55 @@ +- name: Create Zookeeper group + group: + name: "zookeeper" + gid: 10001 + system: yes +- name: Create Zookeeper User + user: + name: "zookeeper" + uid: 10001 + comment: Zookeeper + shell: /bin/false + group: "zookeeper" + home: "/var/zookeeper" + create_home: no + system: yes +- name: Synchronize compose directory + synchronize: + src: zookeeper-compose/ + dest: /etc/zookeeper-compose/ +- name: Ensure volume directories exist + file: + state: directory + path: "/var/zookeeper/{{ item }}" + owner: zookeeper + group: zookeeper + loop: + - conf + - data + - datalog + - logs +- name: Write config + template: + src: zoo.cfg.j2 + dest: /var/zookeeper/conf/zoo.cfg +- name: Write ID file + template: + src: myid.j2 + dest: /var/zookeeper/data/myid +# Temporary until 719589 merges. +- name: Install docker-compose + pip: + name: docker-compose + state: present + executable: pip3 +- name: Run docker-compose pull + shell: + cmd: docker-compose pull + chdir: /etc/zookeeper-compose/ +- name: Run docker-compose up + shell: + cmd: docker-compose up -d + chdir: /etc/zookeeper-compose/ +- name: Run docker prune to cleanup unneeded images + shell: + cmd: docker image prune -f diff --git a/playbooks/roles/zookeeper/templates/myid.j2 b/playbooks/roles/zookeeper/templates/myid.j2 new file mode 100644 index 0000000000..8d72019eea --- /dev/null +++ b/playbooks/roles/zookeeper/templates/myid.j2 @@ -0,0 +1 @@ +{{ inventory_hostname | regex_replace('^zk(\\d+)\\.open.*\\.org$', '\\1') | int }} diff --git a/playbooks/roles/zookeeper/templates/zoo.cfg.j2 b/playbooks/roles/zookeeper/templates/zoo.cfg.j2 new file mode 100644 index 0000000000..63dff1b82a --- /dev/null +++ b/playbooks/roles/zookeeper/templates/zoo.cfg.j2 @@ -0,0 +1,28 @@ +dataDir=/data +dataLogDir=/datalog +# The number of milliseconds of each tick +tickTime=2000 +# The number of ticks that the initial +# synchronization phase can take +initLimit=10 +# The number of ticks that can pass between +# sending a request and getting an acknowledgement +syncLimit=5 +# When enabled, ZooKeeper auto purge feature retains the autopurge. +# snapRetainCount most recent snapshots and the corresponding +# transaction logs in the dataDir and dataLogDir respectively and +# deletes the rest. Defaults to 3. Minimum value is 3. +autopurge.snapRetainCount=3 +# The frequency in hours to look for and purge old snapshots, +# defaults to 0 (disabled). The number of retained snapshots can +# be separately controlled through snapRetainCount and +# defaults to the minimum value of 3. This will quickly fill the +# disk in production if not enabled. Works on ZK >=3.4. +autopurge.purgeInterval=6 +maxClientCnxns=60 +standaloneEnabled=true +admin.enableServer=true +clientPort=2181 +{% for host in groups['zookeeper'] %} +server.{{ loop.index }}={{ (hostvars[host].ansible_default_ipv4.address) }}:2888:3888 +{% endfor %} diff --git a/playbooks/service-zookeeper.yaml b/playbooks/service-zookeeper.yaml new file mode 100644 index 0000000000..1cc78357ab --- /dev/null +++ b/playbooks/service-zookeeper.yaml @@ -0,0 +1,6 @@ +- hosts: "zookeeper:!disabled" + name: "Configure Zookeeper" + serial: 1 + roles: + - install-docker + - zookeeper diff --git a/testinfra/test_zookeeper.py b/testinfra/test_zookeeper.py new file mode 100644 index 0000000000..0890bfaa46 --- /dev/null +++ b/testinfra/test_zookeeper.py @@ -0,0 +1,26 @@ +# Copyright 2020 Red Hat, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +testinfra_hosts = ['zk01.opendev.org'] + + +def test_id_file(host): + # Test that wacky hostname regex works + myid = host.file('/var/zookeeper/data/myid') + assert myid.content == b'1\n' + +def test_zk_listening(host): + zk = host.socket("tcp://0.0.0.0:2181") + assert zk.is_listening