Add Jaeger tracing server

Change-Id: I1aa68b1d5f99364fa09776301894b922ed169a3a
This commit is contained in:
James E. Blair 2022-09-05 13:42:18 -07:00
parent 2768b7709d
commit c661fb0972
15 changed files with 306 additions and 0 deletions

View File

@ -29,6 +29,7 @@ Major Systems
storyboard
kerberos
afs
tracing
translate
refstack
codesearch

37
doc/source/tracing.rst Normal file
View File

@ -0,0 +1,37 @@
:title: Tracing
.. _tracing:
Tracing
#######
The Jaeger tracing server is installed on tracing.opendev.org. It is
intended to be used by Zuul, but may be used by other services in the
future. It displays information about Zuul operations in visual form.
At a Glance
===========
:Hosts:
* https://tracing.opendev.org
:Ansible:
* https://opendev.org/opendev/system-config
* :git_file:`playbooks/roles/jaeger`
* :git_file:`playbooks/service-tracing.yaml`
:Projects:
* https://www.jaegertracing.io/
* https://www.jaegertracing.io/docs/latest/getting-started/
:Bugs:
* https://storyboard.openstack.org/#!/project/748
Overview
========
Apache is configured as a reverse proxy and there is an internal
Badger database stored at ``/var/jaeger/badger``.
Zuul sends telemetry information to Jaeger via the gRPC protocol.
The internal CA (`zk-ca`) used to create ZooKeeper certs for Zuul is
used to provide and validate client certificates for the gRPC
connection to Jaeger as well.

View File

@ -0,0 +1,12 @@
letsencrypt_certs:
tracing-opendev-org-main:
- tracing.opendev.org
- '{{ inventory_hostname }}'
jaeger_user: jaeger
jaeger_group: jaeger
jaeger_uid: 10001
jaeger_gid: 10001
iptables_extra_allowed_groups:
# gRPC
- {'protocol': 'tcp', 'port': '4317', 'group': 'nodepool'}
- {'protocol': 'tcp', 'port': '4317', 'group': 'zuul'}

View File

@ -97,6 +97,7 @@ groups:
- review[0-9]*.opendev.org
- static[0-9]*.opendev.org
- storyboard[0-9]*.opendev.org
- tracing[0-9]*.opendev.org
- translate[0-9]*.open*.org
- zuul[0-9]*.opendev.org
mailman:
@ -146,6 +147,7 @@ groups:
- storyboard[0-9]*.opendev.org
storyboard-dev:
- storyboard-dev[0-9]*.opendev.org
tracing: tracing[0-9]*.opendev.org
translate-dev:
- translate-dev[0-9]*.open*.org
translate:
@ -165,6 +167,7 @@ groups:
- static[0-9]*.opendev.org
- storyboard-dev[0-9]*.opendev.org
- storyboard[0-9]*.opendev.org
- tracing[0-9]*.opendev.org
- translate-dev[0-9]*.open*.org
- translate[0-9]*.open*.org
zookeeper:

View File

@ -0,0 +1,2 @@
Run a Jaeger (tracing) server.

View File

@ -0,0 +1,4 @@
- name: jaeger Reload apache2
service:
name: apache2
state: reloaded

View File

@ -0,0 +1,87 @@
- name: Create jaeger group
group:
name: "{{ jaeger_group }}"
gid: "{{ jaeger_gid }}"
system: yes
- name: Create jaeger user
user:
name: "{{ jaeger_user }}"
group: "{{ jaeger_group }}"
uid: "{{ jaeger_uid }}"
home: "/home/{{ jaeger_user }}"
create_home: yes
shell: /bin/bash
system: yes
- name: Ensure docker-compose directory exists
file:
state: directory
path: /etc/jaeger-docker
- name: Write docker-compose file
template:
src: docker-compose.yaml.j2
dest: /etc/jaeger-docker/docker-compose.yaml
- name: Ensure data directory exists
file:
state: directory
path: /var/jaeger/badger
owner: "{{ jaeger_user }}"
group: "{{ jaeger_group }}"
mode: "0750"
- name: Generate GRPC TLS cert
include_role:
name: zk-ca
vars:
zk_ca_cert_dir: /var/jaeger/tls
zk_ca_cert_dir_owner: "{{ jaeger_user }}"
zk_ca_cert_dir_group: "{{ jaeger_group }}"
- name: Install apache2
apt:
name:
- apache2
- apache2-utils
state: present
- name: Apache modules
apache2_module:
state: present
name: "{{ item }}"
loop:
- rewrite
- proxy
- proxy_http
- ssl
- headers
- name: Copy apache config
template:
src: tracing.vhost.j2
dest: /etc/apache2/sites-enabled/000-default.conf
owner: root
group: root
mode: 0644
notify: jaeger Reload apache2
- name: Run docker-compose pull
shell:
cmd: docker-compose pull
chdir: /etc/jaeger-docker/
- name: Run docker-compose up
shell:
cmd: docker-compose up -d
chdir: /etc/jaeger-docker/
- name: Wait for jaeger to start
wait_for:
port: 16686
timeout: 60
- name: Run docker prune to cleanup unneeded images
shell:
cmd: docker image prune -f

View File

@ -0,0 +1,23 @@
# Version 2 is the latest that is supported by docker-compose in
# Ubuntu Xenial.
version: '2'
services:
jaeger:
image: docker.io/jaegertracing/all-in-one:latest
network_mode: host
restart: always
environment:
- COLLECTOR_OTLP_ENABLED=true
- SPAN_STORAGE_TYPE=badger
- BADGER_EPHEMERAL=false
- BADGER_DIRECTORY_VALUE=/badger/data
- BADGER_DIRECTORY_KEY=/badger/key
- BADGER_SPAN_STORE_TTL=30d
- COLLECTOR_GRPC_TLS_ENABLED=true
- COLLECTOR_GRPC_TLS_CERT=/tls/certs/cert.pem
- COLLECTOR_GRPC_TLS_KEY=/tls/keys/key.pem
- COLLECTOR_GRPC_TLS_CLIENT_CA=/tls/certs/cacert.pem
volumes:
- /var/jaeger/badger:/badger
- /var/jaeger/tls:/tls

View File

@ -0,0 +1,57 @@
<VirtualHost *:80>
ServerName tracing.opendev.org
ServerAdmin webmaster@openstack.org
ErrorLog ${APACHE_LOG_DIR}/tracing-error.log
LogLevel warn
CustomLog ${APACHE_LOG_DIR}/tracing-access.log combined
Redirect / https://tracing.opendev.org/
</VirtualHost>
<VirtualHost *:443>
ServerName tracing.opendev.org
ServerAdmin webmaster@openstack.org
AllowEncodedSlashes On
ErrorLog ${APACHE_LOG_DIR}/tracing-ssl-error.log
LogLevel warn
CustomLog ${APACHE_LOG_DIR}/tracing-ssl-access.log combined
SSLEngine on
SSLProtocol All -SSLv2 -SSLv3
# Note: this list should ensure ciphers that provide forward secrecy
SSLCipherSuite ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES:!AES256:!aNULL:!eNULL:!MD5:!DSS:!PSK:!SRP
SSLHonorCipherOrder on
SSLCertificateFile /etc/letsencrypt-certs/tracing.opendev.org/tracing.opendev.org.cer
SSLCertificateKeyFile /etc/letsencrypt-certs/tracing.opendev.org/tracing.opendev.org.key
SSLCertificateChainFile /etc/letsencrypt-certs/tracing.opendev.org/ca.cer
BrowserMatch "MSIE [2-6]" \
nokeepalive ssl-unclean-shutdown \
downgrade-1.0 force-response-1.0
# MSIE 7 and newer should be able to use keepalive
BrowserMatch "MSIE [17-9]" ssl-unclean-shutdown
RewriteEngine on
# Do not rewrite the /server-status URL (though by default, this
# is only accessible from localhost). Connect to it with:
# ssh -L 8443:localhost:443 $HOSTNAME
# https://localhost:8443/server-status
RewriteRule ^/server-status$ /server-status [L]
ProxyPass / http://localhost:16686/ retry=0
ProxyPassReverse / http://localhost:16686/
ProxyPreserveHost on
RequestHeader set "X-Forwarded-Proto" expr=%{REQUEST_SCHEME}
</VirtualHost>

View File

@ -253,6 +253,9 @@
- name: letsencrypt updated storyboard01-opendev-org-main
include_tasks: roles/letsencrypt-create-certs/handlers/restart_apache.yaml
- name: letsencrypt updated tracing-opendev-org-main
include_tasks: roles/letsencrypt-create-certs/handlers/restart_apache.yaml
- name: letsencrypt updated translate01-openstack-org-main
include_tasks: roles/letsencrypt-create-certs/handlers/restart_apache.yaml

View File

@ -0,0 +1,6 @@
- hosts: "tracing:!disabled"
name: "Base: configure tracing"
roles:
- iptables
- install-docker
- jaeger

25
testinfra/test_tracing.py Normal file
View File

@ -0,0 +1,25 @@
# Copyright 2022 Acme Gating, LLC
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
testinfra_hosts = ['tracing99.opendev.org']
def test_jaeger_listening(host):
jaeger = host.socket("tcp://127.0.0.1:16686")
assert jaeger.is_listening
def test_tracing_http(host):
cmd = host.run('curl https://tracing99.opendev.org')
assert cmd.succeeded

View File

@ -302,6 +302,20 @@
- playbooks/roles/zuul-user/
- roles/openafs-client/
- job:
name: infra-prod-service-tracing
parent: infra-prod-service-base
description: Run service-tracing.yaml playbook.
vars:
playbook_name: service-tracing.yaml
files:
- inventory/base
- playbooks/service-tracing.yaml
- inventory/service/group_vars/tracing.yaml
- playbooks/roles/jaeger/
- playbooks/roles/install-docker/
- playbooks/roles/iptables/
- job:
name: infra-prod-service-borg-backup
parent: infra-prod-service-base

View File

@ -83,6 +83,7 @@
- name: opendev-buildset-registry
- name: system-config-build-image-refstack
soft: true
- system-config-run-tracing
- system-config-run-zookeeper:
dependencies:
- name: opendev-buildset-registry
@ -225,6 +226,7 @@
- name: opendev-buildset-registry
- name: system-config-upload-image-refstack
soft: true
- system-config-run-tracing
- system-config-run-zookeeper:
dependencies:
- name: opendev-buildset-registry
@ -499,6 +501,10 @@
soft: true
- name: system-config-promote-image-gerrit-3.5
soft: true
- infra-prod-service-tracing: &infra-prod-service-tracing
dependencies:
- name: infra-prod-letsencrypt
soft: true
- infra-prod-service-zookeeper: &infra-prod-service-zookeeper
dependencies:
- name: infra-prod-letsencrypt
@ -606,6 +612,7 @@
- infra-prod-service-registry: *infra-prod-service-registry
- infra-prod-service-refstack: *infra-prod-service-refstack
- infra-prod-service-review: *infra-prod-service-review
- infra-prod-service-tracing: *infra-prod-service-tracing
- infra-prod-service-zookeeper: *infra-prod-service-zookeeper
- infra-prod-service-zuul: *infra-prod-service-zuul
- infra-prod-service-zuul-lb: *infra-prod-service-zuul-lb

View File

@ -800,6 +800,31 @@
- playbooks/test-paste.yaml
- testinfra/test_paste.py
- job:
name: system-config-run-tracing
parent: system-config-run
description: |
Run the playbook for the jaeger servers.
nodeset:
nodes:
- name: bridge.openstack.org
label: ubuntu-bionic
- name: tracing99.opendev.org
label: ubuntu-focal
vars:
run_playbooks:
- playbooks/letsencrypt.yaml
- playbooks/service-tracing.yaml
files:
- inventory/service/group_vars/tracing.yaml
- playbooks/install-ansible.yaml
- playbooks/letsencrypt.yaml
- playbooks/service-tracing.yaml
- playbooks/roles/jaeger/
- playbooks/roles/install-docker/
- playbooks/roles/iptables/
- testinfra/test_tracing.py
- job:
name: system-config-run-zookeeper
parent: system-config-run