From 185797a0e5e46fd0f68f7b423e79f732c8541d68 Mon Sep 17 00:00:00 2001 From: Ian Wienand Date: Fri, 26 Jun 2020 10:23:16 +1000 Subject: [PATCH] Graphite container deployment This deploys graphite from the upstream container. We override the statsd configuration to have it listen on ipv6. Similarly we override the ngnix config to listen on ipv6, enable ssl, forward port 80 to 443, block the /admin page (we don't use it). For production we will just want to put some cinder storage in /opt/graphite/storage on the production host and figure out how to migrate the old stats. The is also a bit of cleanup that will follow, because we half-converted grafana01.opendev.org -- so everything can't be in the same group till that is gone. Testing has been added to push some stats and ensure they are seen. Change-Id: Ie843b3d90a72564ef90805f820c8abc61a71017d --- .../service/group_vars/graphite_opendev.org | 14 +++++ inventory/service/groups.yaml | 13 ++++- .../host_vars/graphite02.opendev.org.yaml | 4 ++ playbooks/roles/graphite/README.rst | 1 + playbooks/roles/graphite/handlers/main.yaml | 4 ++ playbooks/roles/graphite/tasks/main.yaml | 50 ++++++++++++++++ .../graphite/templates/docker-compose.yaml.j2 | 15 +++++ .../templates/graphite-statsd.conf.j2 | 57 ++++++++++++++++++ .../roles/graphite/templates/statsd.js.j2 | 9 +++ .../test-fixtures/results.yaml | 5 +- .../handlers/main.yaml | 3 + .../handlers/restart_graphite.yaml | 12 ++++ playbooks/service-graphite.yaml | 6 ++ testinfra/test_graphite.py | 58 +++++++++++++++++++ zuul.d/infra-prod.yaml | 16 +++++ zuul.d/project.yaml | 3 + zuul.d/system-config-run.yaml | 33 +++++++++++ 17 files changed, 299 insertions(+), 4 deletions(-) create mode 100644 inventory/service/group_vars/graphite_opendev.org create mode 100644 inventory/service/host_vars/graphite02.opendev.org.yaml create mode 100644 playbooks/roles/graphite/README.rst create mode 100644 playbooks/roles/graphite/handlers/main.yaml create mode 100644 playbooks/roles/graphite/tasks/main.yaml create mode 100644 playbooks/roles/graphite/templates/docker-compose.yaml.j2 create mode 100644 playbooks/roles/graphite/templates/graphite-statsd.conf.j2 create mode 100644 playbooks/roles/graphite/templates/statsd.js.j2 create mode 100644 playbooks/roles/letsencrypt-create-certs/handlers/restart_graphite.yaml create mode 100644 playbooks/service-graphite.yaml create mode 100644 testinfra/test_graphite.py diff --git a/inventory/service/group_vars/graphite_opendev.org b/inventory/service/group_vars/graphite_opendev.org new file mode 100644 index 0000000000..ccbd309b14 --- /dev/null +++ b/inventory/service/group_vars/graphite_opendev.org @@ -0,0 +1,14 @@ +iptables_extra_allowed_hosts: + - hostname: bridge.openstack.org + port: 8125 + protocol: udp + - hostname: opendev.org + port: 8125 + protocol: udp + +iptables_extra_allowed_groups: + - {'protocol': 'udp', 'port': '8125', 'group': 'firehose'} + - {'protocol': 'udp', 'port': '8125', 'group': 'mirror-update'} + - {'protocol': 'udp', 'port': '8125', 'group': 'logstash'} + - {'protocol': 'udp', 'port': '8125', 'group': 'nodepool'} + - {'protocol': 'udp', 'port': '8125', 'group': 'zuul'} diff --git a/inventory/service/groups.yaml b/inventory/service/groups.yaml index 9aa74a05cb..eb80ce26c8 100644 --- a/inventory/service/groups.yaml +++ b/inventory/service/groups.yaml @@ -65,6 +65,10 @@ groups: - grafana[0-9]*.opendev.org graphite: - graphite*.open*.org + # NOTE(ianw) : to be cleaned up once the half-puppet + # graphite01.opendev.org is gone. + graphite_opendev: + - graphite02.opendev.org health: - health[0-9]*.openstack.org jvb: @@ -74,7 +78,7 @@ groups: letsencrypt: - etherpad[0-9]*.opendev.org - gitea[0-9]*.opendev.org - - graphite01.opendev.org + - graphite[0-9]*.opendev.org - grafana[0-9]*.opendev.org - insecure-ci-registry[0-9]*.opendev.org - meetpad[0-9]*.opendev.org @@ -130,7 +134,10 @@ groups: - ethercalc[0-9]*.open*.org - firehose[0-9]*.open*.org - grafana[0-9]*.open*.org - - graphite*.open*.org + # TODO(ianw) : this is a weird one we half-converted and moved + # into opendev.org in the early days of opendev. remove when + # graphite02 up. + - graphite01.opendev.org - health[0-9]*.openstack.org - kdc[0-9]*.open*.org - lists*.katacontainers.io @@ -167,7 +174,7 @@ groups: - ethercalc[0-9]*.open*.org - firehose[0-9]*.open*.org - grafana[0-9]*.open*.org - - graphite[0-9]*.open*.org + - graphite01.opendev.org - health[0-9]*.openstack.org - kdc[0-9]*.open*.org - lists*.katacontainers.io diff --git a/inventory/service/host_vars/graphite02.opendev.org.yaml b/inventory/service/host_vars/graphite02.opendev.org.yaml new file mode 100644 index 0000000000..939d0ca663 --- /dev/null +++ b/inventory/service/host_vars/graphite02.opendev.org.yaml @@ -0,0 +1,4 @@ +letsencrypt_certs: + graphite02-main: + - graphite02.opendev.org + - graphite.opendev.org diff --git a/playbooks/roles/graphite/README.rst b/playbooks/roles/graphite/README.rst new file mode 100644 index 0000000000..e045c696eb --- /dev/null +++ b/playbooks/roles/graphite/README.rst @@ -0,0 +1 @@ +Run Graphite diff --git a/playbooks/roles/graphite/handlers/main.yaml b/playbooks/roles/graphite/handlers/main.yaml new file mode 100644 index 0000000000..08b750b530 --- /dev/null +++ b/playbooks/roles/graphite/handlers/main.yaml @@ -0,0 +1,4 @@ +- name: graphite Reload apache2 + service: + name: apache2 + state: reloaded diff --git a/playbooks/roles/graphite/tasks/main.yaml b/playbooks/roles/graphite/tasks/main.yaml new file mode 100644 index 0000000000..852f9a9b00 --- /dev/null +++ b/playbooks/roles/graphite/tasks/main.yaml @@ -0,0 +1,50 @@ +- name: Ensure docker-compose directory exists + file: + state: directory + path: /etc/graphite-docker + +- name: Write settings file + template: + src: docker-compose.yaml.j2 + dest: /etc/graphite-docker/docker-compose.yaml + +- name: Write nginx override config + template: + src: graphite-statsd.conf.j2 + dest: /etc/graphite-docker/graphite-statsd.conf + +- name: Write statsd override config + template: + src: statsd.js.j2 + dest: /etc/graphite-docker/statsd.js + +- name: Ensure storage directory exists + file: + state: directory + path: /opt/graphite/storage + +- name: Ensure log directory exists + file: + state: directory + path: /var/log/graphite + +- name: Run docker-compose pull + shell: + cmd: docker-compose pull + chdir: /etc/graphite-docker/ + +- name: Run docker-compose up + shell: + cmd: docker-compose up -d + chdir: /etc/graphite-docker/ + +- name: Run docker prune to cleanup unneeded images + shell: + cmd: docker image prune -f + +# This is handy to have on the host for checking stat ingestion +- name: Install netcat + package: + name: + - netcat + state: present diff --git a/playbooks/roles/graphite/templates/docker-compose.yaml.j2 b/playbooks/roles/graphite/templates/docker-compose.yaml.j2 new file mode 100644 index 0000000000..02154cdfe0 --- /dev/null +++ b/playbooks/roles/graphite/templates/docker-compose.yaml.j2 @@ -0,0 +1,15 @@ +# Version 2 is the latest that is supported by docker-compose in +# Ubuntu Xenial. +version: '2' + +services: + graphite: + restart: always + image: docker.io/graphiteapp/graphite-statsd + network_mode: host + volumes: + - /etc/graphite-docker/graphite-statsd.conf:/etc/nginx/sites-enabled/graphite-statsd.conf + - /etc/graphite-docker/statsd.js:/opt/statsd/config/udp.js + - /etc/letsencrypt-certs:/etc/letsencrypt-certs + - /opt/graphite/storage:/opt/graphite/storage + - /var/log/graphite:/var/log/ diff --git a/playbooks/roles/graphite/templates/graphite-statsd.conf.j2 b/playbooks/roles/graphite/templates/graphite-statsd.conf.j2 new file mode 100644 index 0000000000..ec7e73c7d4 --- /dev/null +++ b/playbooks/roles/graphite/templates/graphite-statsd.conf.j2 @@ -0,0 +1,57 @@ +server { + listen 80 default_server; + listen [::]:80 default_server; + server_name _; + + return 301 https://$host$request_uri; +} + +server { + listen 443 ssl; + listen [::]:443 ssl; + server_name {{ inventory_hostname }}; + + ssl_certificate /etc/letsencrypt-certs/{{ inventory_hostname }}/{{ inventory_hostname }}.cer; + ssl_certificate_key /etc/letsencrypt-certs/{{ inventory_hostname }}/{{ inventory_hostname }}.key; + root /opt/graphite/static; + index index.html; + + location /nginx_status { + stub_status on; + access_log off; + allow 127.0.0.1; + deny all; + } + + # No remote login + location /admin { + allow 127.0.0.1; + deny all; + } + + location /media { + # django admin static files + alias /usr/local/lib/python3.6/dist-packages/django/contrib/admin/media/; + } + + location /admin/auth/admin { + alias /usr/local/lib/python3.6/dist-packages/django/contrib/admin/static/admin; + } + + location /admin/auth/user/admin { + alias /usr/local/lib/python3.6/dist-packages/django/contrib/admin/static/admin; + } + + location / { + proxy_pass http://127.0.0.1:8080; + proxy_set_header Host $http_host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + + add_header 'Access-Control-Allow-Origin' '*'; + add_header 'Access-Control-Allow-Methods' 'GET, POST, OPTIONS'; + add_header 'Access-Control-Allow-Headers' 'Authorization, Content-Type'; + add_header 'Access-Control-Allow-Credentials' 'true'; + } + +} diff --git a/playbooks/roles/graphite/templates/statsd.js.j2 b/playbooks/roles/graphite/templates/statsd.js.j2 new file mode 100644 index 0000000000..de9d7e85c3 --- /dev/null +++ b/playbooks/roles/graphite/templates/statsd.js.j2 @@ -0,0 +1,9 @@ +{ + "graphiteHost": "127.0.0.1", + "graphitePort": 2003, + "port": 8125, + "flushInterval": 10000, + "servers": [ + { server: "./servers/udp", address: "::", port: 8125, address_ipv6: true } + ] +} diff --git a/playbooks/roles/install-ansible/files/inventory_plugins/test-fixtures/results.yaml b/playbooks/roles/install-ansible/files/inventory_plugins/test-fixtures/results.yaml index 91571a98ab..ad47003481 100644 --- a/playbooks/roles/install-ansible/files/inventory_plugins/test-fixtures/results.yaml +++ b/playbooks/roles/install-ansible/files/inventory_plugins/test-fixtures/results.yaml @@ -18,8 +18,11 @@ results: - puppet - puppet4 - graphite.opendev.org: + graphite01.opendev.org: + - puppet + - puppet4 - graphite + - letsencrypt - puppet - webservers diff --git a/playbooks/roles/letsencrypt-create-certs/handlers/main.yaml b/playbooks/roles/letsencrypt-create-certs/handlers/main.yaml index 813bf47198..2de2d6302c 100644 --- a/playbooks/roles/letsencrypt-create-certs/handlers/main.yaml +++ b/playbooks/roles/letsencrypt-create-certs/handlers/main.yaml @@ -12,6 +12,9 @@ # https://docs.ansible.com/ansible/latest/porting_guides/porting_guide_2.8.html#imports-as-handlers - name: letsencrypt updated graphite01-main + include_tasks: roles/letsencrypt-create-certs/handlers/restart_graphite.yaml + +- name: letsencrypt updated graphite02-main include_tasks: roles/letsencrypt-create-certs/handlers/restart_apache.yaml - name: letsencrypt updated tarballs-main diff --git a/playbooks/roles/letsencrypt-create-certs/handlers/restart_graphite.yaml b/playbooks/roles/letsencrypt-create-certs/handlers/restart_graphite.yaml new file mode 100644 index 0000000000..ae8c27402d --- /dev/null +++ b/playbooks/roles/letsencrypt-create-certs/handlers/restart_graphite.yaml @@ -0,0 +1,12 @@ +- name: Check for running nginx + command: pgrep -f nginx + ignore_errors: yes + register: nginx_pids + +- name: Restart graphite container + when: nginx_pids.rc == 0 + block: + - name: Restart nginx + shell: + cmd: docker-compose restart graphite + chdir: /etc/graphite-docker diff --git a/playbooks/service-graphite.yaml b/playbooks/service-graphite.yaml new file mode 100644 index 0000000000..e7c31e7c55 --- /dev/null +++ b/playbooks/service-graphite.yaml @@ -0,0 +1,6 @@ +- hosts: "graphite_opendev:!disabled" + name: "Base: configure graphite" + roles: + - iptables + - install-docker + - graphite diff --git a/testinfra/test_graphite.py b/testinfra/test_graphite.py new file mode 100644 index 0000000000..351187943b --- /dev/null +++ b/testinfra/test_graphite.py @@ -0,0 +1,58 @@ +# Copyright 2020 Red Hat, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import json +import logging +import ssl +import urllib.request + +testinfra_hosts = ['graphite02.opendev.org'] + + +def test_graphite_container_web_listening(host): + graphite_http = host.socket("tcp://127.0.0.1:80") + assert graphite_http.is_listening + + graphite_https = host.socket("tcp://127.0.0.1:443") + assert graphite_https.is_listening + +def test_graphite(host): + cmd = host.run('curl --insecure ' + '--resolve graphite.opendev.org:443:127.0.0.1 ' + 'https://graphite.opendev.org') + assert 'Graphite Browser' in cmd.stdout + +def test_graphite_data(host): + # seed some data; send it over ipv6 + cmd = ('timeout 20 bash -c ' + '\'while true; do echo -n "example:$((RANDOM % 100))|c" ' + '| nc -6 -w 1 -u localhost 8125; done\'') + host.run(cmd) + + url='render?from=-10mins&until=now&target=stats.example&format=json' + + # Assert we see some non-null values for this stat + # multi-node-hosts-file has setup graphite02.opendev.org to + # resolve from hosts. + found_value = False + with urllib.request.urlopen('https://graphite02.opendev.org/%s' % (url), + context=ssl._create_unverified_context()) \ + as req: + data = json.loads(req.read().decode()) + logging.debug('got: %s' % data) + datapoints = (data[0]['datapoints']) + for p in datapoints: + if p[0] != None: + found_value = True + + assert found_value diff --git a/zuul.d/infra-prod.yaml b/zuul.d/infra-prod.yaml index 37538e6b23..d52bbe21c2 100644 --- a/zuul.d/infra-prod.yaml +++ b/zuul.d/infra-prod.yaml @@ -526,6 +526,22 @@ - playbooks/roles/logrotate - playbooks/roles/iptables/ +- job: + name: infra-prod-service-graphite + parent: infra-prod-service-base + description: Run service-graphite.yaml playbook. + vars: + playbook_name: service-graphite.yaml + files: + - inventory/ + - playbooks/service-graphite.yaml + - inventory/service/host_vars/graphite02.opendev.org.yaml + - inventory/service/group_vars/graphite + - playbooks/roles/install-docker/ + - playbooks/roles/pip3/ + - playbooks/roles/graphite/ + - playbooks/roles/iptables/ + # Run AFS changes separately so we can make sure to only do one at a time # (turns out quorum is nice to have) - job: diff --git a/zuul.d/project.yaml b/zuul.d/project.yaml index f2e376a347..1ec9e233f8 100644 --- a/zuul.d/project.yaml +++ b/zuul.d/project.yaml @@ -48,6 +48,7 @@ - name: opendev-buildset-registry - name: system-config-build-image-grafana soft: true + - system-config-run-graphite - system-config-run-review: dependencies: - name: opendev-buildset-registry @@ -132,6 +133,7 @@ - name: opendev-buildset-registry - name: system-config-upload-image-grafana soft: true + - system-config-run-graphite - system-config-run-review: dependencies: - name: opendev-buildset-registry @@ -226,6 +228,7 @@ soft: true - name: system-config-promote-image-grafana soft: true + - infra-prod-service-graphite - infra-prod-service-meetpad - infra-prod-service-mirror-update - infra-prod-service-mirror diff --git a/zuul.d/system-config-run.yaml b/zuul.d/system-config-run.yaml index af85b61dc5..98a25c9329 100644 --- a/zuul.d/system-config-run.yaml +++ b/zuul.d/system-config-run.yaml @@ -572,6 +572,39 @@ - docker/grafana/ - testinfra/test_grafana.py +- job: + name: system-config-run-graphite + parent: system-config-run + description: | + Run the playbook for the graphite servers. + timeout: 3600 + required-projects: + - opendev/system-config + nodeset: + nodes: + - name: bridge.openstack.org + label: ubuntu-bionic + # NOTE(ianw): 01 is a half-puppet opendev.org + # server + - name: graphite02.opendev.org + label: ubuntu-focal + vars: + run_playbooks: + - playbooks/letsencrypt.yaml + - playbooks/service-graphite.yaml + host-vars: + graphite02.opendev.org: + host_copy_output: + '/var/log/graphite': logs + files: + - playbooks/bridge.yaml + - playbooks/letsencrypt.yaml + - playbooks/service-graphite.yaml + - playbooks/roles/graphite + - playbooks/roles/install-docker/ + - playbooks/roles/pip3/ + - testinfra/test_graphite.py + - job: name: system-config-run-meetpad parent: system-config-run-containers