diff --git a/doc/source/systems.rst b/doc/source/systems.rst index c58a035dad..cd1bd15735 100644 --- a/doc/source/systems.rst +++ b/doc/source/systems.rst @@ -29,6 +29,7 @@ Major Systems storyboard kerberos afs + tracing translate refstack codesearch diff --git a/doc/source/tracing.rst b/doc/source/tracing.rst new file mode 100644 index 0000000000..961e2da995 --- /dev/null +++ b/doc/source/tracing.rst @@ -0,0 +1,37 @@ +:title: Tracing + +.. _tracing: + +Tracing +####### + +The Jaeger tracing server is installed on tracing.opendev.org. It is +intended to be used by Zuul, but may be used by other services in the +future. It displays information about Zuul operations in visual form. + +At a Glance +=========== + +:Hosts: + * https://tracing.opendev.org +:Ansible: + * https://opendev.org/opendev/system-config + * :git_file:`playbooks/roles/jaeger` + * :git_file:`playbooks/service-tracing.yaml` +:Projects: + * https://www.jaegertracing.io/ + * https://www.jaegertracing.io/docs/latest/getting-started/ +:Bugs: + * https://storyboard.openstack.org/#!/project/748 + +Overview +======== + +Apache is configured as a reverse proxy and there is an internal +Badger database stored at ``/var/jaeger/badger``. + +Zuul sends telemetry information to Jaeger via the gRPC protocol. + +The internal CA (`zk-ca`) used to create ZooKeeper certs for Zuul is +used to provide and validate client certificates for the gRPC +connection to Jaeger as well. diff --git a/inventory/service/group_vars/tracing.yaml b/inventory/service/group_vars/tracing.yaml new file mode 100644 index 0000000000..304baeef08 --- /dev/null +++ b/inventory/service/group_vars/tracing.yaml @@ -0,0 +1,12 @@ +letsencrypt_certs: + tracing-opendev-org-main: + - tracing.opendev.org + - '{{ inventory_hostname }}' +jaeger_user: jaeger +jaeger_group: jaeger +jaeger_uid: 10001 +jaeger_gid: 10001 +iptables_extra_allowed_groups: + # gRPC + - {'protocol': 'tcp', 'port': '4317', 'group': 'nodepool'} + - {'protocol': 'tcp', 'port': '4317', 'group': 'zuul'} diff --git a/inventory/service/groups.yaml b/inventory/service/groups.yaml index 35fd4b652f..fd173f011a 100644 --- a/inventory/service/groups.yaml +++ b/inventory/service/groups.yaml @@ -97,6 +97,7 @@ groups: - review[0-9]*.opendev.org - static[0-9]*.opendev.org - storyboard[0-9]*.opendev.org + - tracing[0-9]*.opendev.org - translate[0-9]*.open*.org - zuul[0-9]*.opendev.org mailman: @@ -146,6 +147,7 @@ groups: - storyboard[0-9]*.opendev.org storyboard-dev: - storyboard-dev[0-9]*.opendev.org + tracing: tracing[0-9]*.opendev.org translate-dev: - translate-dev[0-9]*.open*.org translate: @@ -165,6 +167,7 @@ groups: - static[0-9]*.opendev.org - storyboard-dev[0-9]*.opendev.org - storyboard[0-9]*.opendev.org + - tracing[0-9]*.opendev.org - translate-dev[0-9]*.open*.org - translate[0-9]*.open*.org zookeeper: diff --git a/playbooks/roles/jaeger/README.rst b/playbooks/roles/jaeger/README.rst new file mode 100644 index 0000000000..86994be276 --- /dev/null +++ b/playbooks/roles/jaeger/README.rst @@ -0,0 +1,2 @@ +Run a Jaeger (tracing) server. + diff --git a/playbooks/roles/jaeger/handlers/main.yaml b/playbooks/roles/jaeger/handlers/main.yaml new file mode 100644 index 0000000000..9f136f6562 --- /dev/null +++ b/playbooks/roles/jaeger/handlers/main.yaml @@ -0,0 +1,4 @@ +- name: jaeger Reload apache2 + service: + name: apache2 + state: reloaded diff --git a/playbooks/roles/jaeger/tasks/main.yaml b/playbooks/roles/jaeger/tasks/main.yaml new file mode 100644 index 0000000000..a53cbb5617 --- /dev/null +++ b/playbooks/roles/jaeger/tasks/main.yaml @@ -0,0 +1,87 @@ +- name: Create jaeger group + group: + name: "{{ jaeger_group }}" + gid: "{{ jaeger_gid }}" + system: yes + +- name: Create jaeger user + user: + name: "{{ jaeger_user }}" + group: "{{ jaeger_group }}" + uid: "{{ jaeger_uid }}" + home: "/home/{{ jaeger_user }}" + create_home: yes + shell: /bin/bash + system: yes + +- name: Ensure docker-compose directory exists + file: + state: directory + path: /etc/jaeger-docker + +- name: Write docker-compose file + template: + src: docker-compose.yaml.j2 + dest: /etc/jaeger-docker/docker-compose.yaml + +- name: Ensure data directory exists + file: + state: directory + path: /var/jaeger/badger + owner: "{{ jaeger_user }}" + group: "{{ jaeger_group }}" + mode: "0750" + +- name: Generate GRPC TLS cert + include_role: + name: zk-ca + vars: + zk_ca_cert_dir: /var/jaeger/tls + zk_ca_cert_dir_owner: "{{ jaeger_user }}" + zk_ca_cert_dir_group: "{{ jaeger_group }}" + +- name: Install apache2 + apt: + name: + - apache2 + - apache2-utils + state: present + +- name: Apache modules + apache2_module: + state: present + name: "{{ item }}" + loop: + - rewrite + - proxy + - proxy_http + - ssl + - headers + +- name: Copy apache config + template: + src: tracing.vhost.j2 + dest: /etc/apache2/sites-enabled/000-default.conf + owner: root + group: root + mode: 0644 + notify: jaeger Reload apache2 + +- name: Run docker-compose pull + shell: + cmd: docker-compose pull + chdir: /etc/jaeger-docker/ + +- name: Run docker-compose up + shell: + cmd: docker-compose up -d + chdir: /etc/jaeger-docker/ + +- name: Wait for jaeger to start + wait_for: + port: 16686 + timeout: 60 + +- name: Run docker prune to cleanup unneeded images + shell: + cmd: docker image prune -f diff --git a/playbooks/roles/jaeger/templates/docker-compose.yaml.j2 b/playbooks/roles/jaeger/templates/docker-compose.yaml.j2 new file mode 100644 index 0000000000..0209f37870 --- /dev/null +++ b/playbooks/roles/jaeger/templates/docker-compose.yaml.j2 @@ -0,0 +1,23 @@ +# Version 2 is the latest that is supported by docker-compose in +# Ubuntu Xenial. +version: '2' + +services: + jaeger: + image: docker.io/jaegertracing/all-in-one:latest + network_mode: host + restart: always + environment: + - COLLECTOR_OTLP_ENABLED=true + - SPAN_STORAGE_TYPE=badger + - BADGER_EPHEMERAL=false + - BADGER_DIRECTORY_VALUE=/badger/data + - BADGER_DIRECTORY_KEY=/badger/key + - BADGER_SPAN_STORE_TTL=30d + - COLLECTOR_GRPC_TLS_ENABLED=true + - COLLECTOR_GRPC_TLS_CERT=/tls/certs/cert.pem + - COLLECTOR_GRPC_TLS_KEY=/tls/keys/key.pem + - COLLECTOR_GRPC_TLS_CLIENT_CA=/tls/certs/cacert.pem + volumes: + - /var/jaeger/badger:/badger + - /var/jaeger/tls:/tls diff --git a/playbooks/roles/jaeger/templates/tracing.vhost.j2 b/playbooks/roles/jaeger/templates/tracing.vhost.j2 new file mode 100644 index 0000000000..87a834ff17 --- /dev/null +++ b/playbooks/roles/jaeger/templates/tracing.vhost.j2 @@ -0,0 +1,57 @@ + + ServerName tracing.opendev.org + ServerAdmin webmaster@openstack.org + + ErrorLog ${APACHE_LOG_DIR}/tracing-error.log + + LogLevel warn + + CustomLog ${APACHE_LOG_DIR}/tracing-access.log combined + + Redirect / https://tracing.opendev.org/ + + + + + ServerName tracing.opendev.org + ServerAdmin webmaster@openstack.org + + AllowEncodedSlashes On + + ErrorLog ${APACHE_LOG_DIR}/tracing-ssl-error.log + + LogLevel warn + + CustomLog ${APACHE_LOG_DIR}/tracing-ssl-access.log combined + + SSLEngine on + SSLProtocol All -SSLv2 -SSLv3 + # Note: this list should ensure ciphers that provide forward secrecy + SSLCipherSuite ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES:!AES256:!aNULL:!eNULL:!MD5:!DSS:!PSK:!SRP + SSLHonorCipherOrder on + + SSLCertificateFile /etc/letsencrypt-certs/tracing.opendev.org/tracing.opendev.org.cer + SSLCertificateKeyFile /etc/letsencrypt-certs/tracing.opendev.org/tracing.opendev.org.key + SSLCertificateChainFile /etc/letsencrypt-certs/tracing.opendev.org/ca.cer + + BrowserMatch "MSIE [2-6]" \ + nokeepalive ssl-unclean-shutdown \ + downgrade-1.0 force-response-1.0 + # MSIE 7 and newer should be able to use keepalive + BrowserMatch "MSIE [17-9]" ssl-unclean-shutdown + + RewriteEngine on + + # Do not rewrite the /server-status URL (though by default, this + # is only accessible from localhost). Connect to it with: + # ssh -L 8443:localhost:443 $HOSTNAME + # https://localhost:8443/server-status + RewriteRule ^/server-status$ /server-status [L] + + ProxyPass / http://localhost:16686/ retry=0 + ProxyPassReverse / http://localhost:16686/ + ProxyPreserveHost on + RequestHeader set "X-Forwarded-Proto" expr=%{REQUEST_SCHEME} + + + diff --git a/playbooks/roles/letsencrypt-create-certs/handlers/main.yaml b/playbooks/roles/letsencrypt-create-certs/handlers/main.yaml index 1d5cf9d2a7..6a71ed2d81 100644 --- a/playbooks/roles/letsencrypt-create-certs/handlers/main.yaml +++ b/playbooks/roles/letsencrypt-create-certs/handlers/main.yaml @@ -253,6 +253,9 @@ - name: letsencrypt updated storyboard01-opendev-org-main include_tasks: roles/letsencrypt-create-certs/handlers/restart_apache.yaml +- name: letsencrypt updated tracing-opendev-org-main + include_tasks: roles/letsencrypt-create-certs/handlers/restart_apache.yaml + - name: letsencrypt updated translate01-openstack-org-main include_tasks: roles/letsencrypt-create-certs/handlers/restart_apache.yaml diff --git a/playbooks/service-tracing.yaml b/playbooks/service-tracing.yaml new file mode 100644 index 0000000000..1bf53d0683 --- /dev/null +++ b/playbooks/service-tracing.yaml @@ -0,0 +1,6 @@ +- hosts: "tracing:!disabled" + name: "Base: configure tracing" + roles: + - iptables + - install-docker + - jaeger diff --git a/testinfra/test_tracing.py b/testinfra/test_tracing.py new file mode 100644 index 0000000000..2eabc29bda --- /dev/null +++ b/testinfra/test_tracing.py @@ -0,0 +1,25 @@ +# Copyright 2022 Acme Gating, LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +testinfra_hosts = ['tracing99.opendev.org'] + + +def test_jaeger_listening(host): + jaeger = host.socket("tcp://127.0.0.1:16686") + assert jaeger.is_listening + +def test_tracing_http(host): + cmd = host.run('curl https://tracing99.opendev.org') + assert cmd.succeeded diff --git a/zuul.d/infra-prod.yaml b/zuul.d/infra-prod.yaml index c190ac7f2e..20029d04e6 100644 --- a/zuul.d/infra-prod.yaml +++ b/zuul.d/infra-prod.yaml @@ -302,6 +302,20 @@ - playbooks/roles/zuul-user/ - roles/openafs-client/ +- job: + name: infra-prod-service-tracing + parent: infra-prod-service-base + description: Run service-tracing.yaml playbook. + vars: + playbook_name: service-tracing.yaml + files: + - inventory/base + - playbooks/service-tracing.yaml + - inventory/service/group_vars/tracing.yaml + - playbooks/roles/jaeger/ + - playbooks/roles/install-docker/ + - playbooks/roles/iptables/ + - job: name: infra-prod-service-borg-backup parent: infra-prod-service-base diff --git a/zuul.d/project.yaml b/zuul.d/project.yaml index 80c513bc55..88fd57315c 100644 --- a/zuul.d/project.yaml +++ b/zuul.d/project.yaml @@ -83,6 +83,7 @@ - name: opendev-buildset-registry - name: system-config-build-image-refstack soft: true + - system-config-run-tracing - system-config-run-zookeeper: dependencies: - name: opendev-buildset-registry @@ -225,6 +226,7 @@ - name: opendev-buildset-registry - name: system-config-upload-image-refstack soft: true + - system-config-run-tracing - system-config-run-zookeeper: dependencies: - name: opendev-buildset-registry @@ -499,6 +501,10 @@ soft: true - name: system-config-promote-image-gerrit-3.5 soft: true + - infra-prod-service-tracing: &infra-prod-service-tracing + dependencies: + - name: infra-prod-letsencrypt + soft: true - infra-prod-service-zookeeper: &infra-prod-service-zookeeper dependencies: - name: infra-prod-letsencrypt @@ -606,6 +612,7 @@ - infra-prod-service-registry: *infra-prod-service-registry - infra-prod-service-refstack: *infra-prod-service-refstack - infra-prod-service-review: *infra-prod-service-review + - infra-prod-service-tracing: *infra-prod-service-tracing - infra-prod-service-zookeeper: *infra-prod-service-zookeeper - infra-prod-service-zuul: *infra-prod-service-zuul - infra-prod-service-zuul-lb: *infra-prod-service-zuul-lb diff --git a/zuul.d/system-config-run.yaml b/zuul.d/system-config-run.yaml index 655d3c3219..d9d45d76ab 100644 --- a/zuul.d/system-config-run.yaml +++ b/zuul.d/system-config-run.yaml @@ -800,6 +800,31 @@ - playbooks/test-paste.yaml - testinfra/test_paste.py +- job: + name: system-config-run-tracing + parent: system-config-run + description: | + Run the playbook for the jaeger servers. + nodeset: + nodes: + - name: bridge.openstack.org + label: ubuntu-bionic + - name: tracing99.opendev.org + label: ubuntu-focal + vars: + run_playbooks: + - playbooks/letsencrypt.yaml + - playbooks/service-tracing.yaml + files: + - inventory/service/group_vars/tracing.yaml + - playbooks/install-ansible.yaml + - playbooks/letsencrypt.yaml + - playbooks/service-tracing.yaml + - playbooks/roles/jaeger/ + - playbooks/roles/install-docker/ + - playbooks/roles/iptables/ + - testinfra/test_tracing.py + - job: name: system-config-run-zookeeper parent: system-config-run