diff --git a/ara/wsgi_sqlite.py b/ara/wsgi_sqlite.py new file mode 100644 index 00000000..04a4a821 --- /dev/null +++ b/ara/wsgi_sqlite.py @@ -0,0 +1,134 @@ +# Copyright (c) 2017 Red Hat, Inc. +# +# This file is part of ARA: Ansible Run Analysis. +# +# ARA is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# ARA is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with ARA. If not, see . + +# A WSGI script to load the ARA web application against a variable database +# location requested over HTTP. +# Can be configured using environment variables (i.e, Apache SetEnv) with the +# following variables: +# +# ARA_WSGI_USE_VIRTUALENV +# Enable virtual environment usage if ARA is installed in a virtual +# environment. +# Defaults to '0', set to '1' to enable. +# ARA_WSGI_VIRTUALENV_PATH +# When using a virtual environment, where the virtualenv is located. +# Defaults to None, set to the absolute path of your virtualenv. +# ARA_WSGI_TMPDIR_MAX_AGE +# This WSGI middleware creates temporary directories which should be +# discarded on a regular basis to avoid them accumulating. +# This is a duration, in seconds, before cleaning directories up. +# Defaults to 3600. +# ARA_WSGI_LOG_ROOT +# Absolute path on the filesystem that matches the DocumentRoot of your +# webserver vhost. +# Defaults to '/srv/static/logs'. +# ARA_WSGI_DATABASE_DIRECTORY +# Subdirectory in which ARA sqlite databases are expected to reside in. +# For example, 'ara-report' would expect: +# http://logserver/some/path/ara-report/ansible.sqlite +# This variable should match the 'WSGIScriptAliasMatch' pattern of your +# webserver vhost. +# Defaults to 'ara-report' + +import logging +import os +import re +import shutil +import six +import time + +if (int(os.getenv('ARA_WSGI_USE_VIRTUALENV', 0)) == 1 and + os.getenv('ARA_WSGI_VIRTUALENV_PATH')): + activate_this = os.getenv('ARA_WSGI_VIRTUALENV_PATH') + if six.PY2: + execfile(activate_this, dict(__file__=activate_this)) # nosec + else: + exec(open(activate_this).read()) # nosec + +TMPDIR_MAX_AGE = int(os.getenv('ARA_WSGI_TMPDIR_MAX_AGE', 3600)) +LOG_ROOT = os.getenv('ARA_WSGI_LOG_ROOT', '/srv/static/logs') +DATABASE_DIRECTORY = os.getenv('ARA_WSGI_DATABASE_DIRECTORY', 'ara-report') + +logger = logging.getLogger('ara.wsgi_sqlite') +if not logger.handlers: + logging.basicConfig(format='%(name)s:%(levelname)s:%(message)s') + + +def bad_request(environ, start_response, message): + logger.error('HTTP 400: %s' % message) + message = """ + + 400 Bad Request +

Bad Request

+

%s

""" % message + status = '400 Bad Request' + response_headers = [('Content-Type', 'text/html')] + start_response(status, response_headers) + return [message] + + +def application(environ, start_response): + request = environ['REQUEST_URI'] + match = re.search('/(?P.*/{}/)'.format(DATABASE_DIRECTORY), request) + if not match: + return bad_request(environ, start_response, + 'No "/{}/" in URL.'.format(DATABASE_DIRECTORY)) + + path = os.path.abspath(os.path.join(LOG_ROOT, match.group('path'))) + + # Ensure we don't escape outside LOG_ROOT and we are looking at a + # valid directory + if not path.startswith(LOG_ROOT) or not os.path.isdir(path): + logger.error('Directory access violation: %s' % path) + return bad_request(environ, start_response, 'No directory found.') + + database = os.path.join(path, 'ansible.sqlite') + if not os.path.isfile(database): + return bad_request(environ, start_response, 'No ARA database found.') + + # ARA and Ansible (when loading configuration) both expect a directory + # they are able to write to, this can be safely discarded. + # Nothing is read from here and there is therefore no security risks. + # It needs to be at a known location in order to be able to clean it up + # so it doesn't accumulate needless directories and files. + # TODO: ARA 1.0 no longer requires temporary directories, clean this up. + tmpdir = '/tmp/ara_wsgi_sqlite' # nosec + if os.path.exists(tmpdir): + # Periodically delete this directory to avoid accumulating directories + # and files endlessly + now = time.time() + if now - TMPDIR_MAX_AGE > os.path.getmtime(tmpdir): + shutil.rmtree(tmpdir, ignore_errors=True) + os.environ['ANSIBLE_LOCAL_TEMP'] = os.path.join(tmpdir, '.ansible') + os.environ['ARA_DIR'] = os.path.join(tmpdir, '.ara') + + # Path to the ARA database + os.environ['ARA_DATABASE'] = 'sqlite:///{}'.format(database) + + from ara.webapp import create_app + try: + app = create_app() + app.config['APPLICATION_ROOT'] = match.group('path') + return app(environ, start_response) + except Exception as e: + # We're staying relatively vague on purpose to avoid disclosure + logger.error('ARA bootstrap failure for %s: %s' % (database, str(e))) + return bad_request(environ, start_response, 'ARA bootstrap failure.') + + +def main(): + return application diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst new file mode 100644 index 00000000..4abdbb61 --- /dev/null +++ b/doc/source/advanced.rst @@ -0,0 +1,124 @@ +.. _advanced_configuration: + +Serving ARA sqlite databases over http +====================================== + +Hosting statically generated reports is not very efficient at a large scale. +The reports are relatively small in size but can contain thousands of files if +you are generating a report that contains thousands of tasks. + +However, using a centralized database (such as MySQL) might not be optimal +either. Perhaps due to the latency or maybe because of the concurrency of the +runs. +It is also possible you are not interested in aggregating data in the first +place and would rather keep individual reports. + +ARA ships a bundled WSGI middleware, ``wsgi_sqlite.py``. + +This middleware allows you to store your ``ansible.sqlite`` databases on a +web server (for example, a logserver for your CI jobs) and load these databases +on the fly without needing to generate static reports. + +It works by matching a requested URL +(ex: ``http://logserver/some/path/ara-report``) against the filesystem location +(ex: ``/srv/static/logs/some/path/ara-report/ansible.sqlite``) and loading +ARA's web application so that it reads from the database directly. + +To put this use case into perspective, it was "benchmarked" against a single +job from the OpenStack-Ansible_ project: + +- 4 playbooks +- 4647 tasks +- 4760 results +- 53 hosts, of which 39 had gathered host facts +- 416 saved files + +Generating a static report from that database takes ~1min30s on an average +machine. It weighs 63MB (27MB recursively gzipped), contains 5321 files and +5243 directories. + +This middleware allows you to host the exact same report on your web server +just by storing the sqlite database which is just one file and weighs 5.6MB. + +.. _OpenStack-Ansible: https://github.com/openstack/openstack-ansible + +wsgi_sqlite configuration +------------------------- + +Configuration for the ``wsgi_sqlite.py`` script can be done through environment +variables, for example with Apache's ``SetEnv`` directive. + +ARA_WSGI_USE_VIRTUALENV +~~~~~~~~~~~~~~~~~~~~~~~ + +Enable virtual environment usage if ARA is installed in a virtual +environment. You will need to set ``ARA_WSGI_VIRTUALENV_PATH`` if enabling +this. + +Defaults to ``0``, set to ``1`` to enable. + +ARA_WSGI_VIRTUALENV_PATH +~~~~~~~~~~~~~~~~~~~~~~~~ + +When using a virtual environment, where the virtualenv is located. +Defaults to ``None``, set to the absolute path of your virtualenv. + +ARA_WSGI_TMPDIR_MAX_AGE +~~~~~~~~~~~~~~~~~~~~~~~ + +This WSGI middleware creates temporary directories which should be +discarded on a regular basis to avoid them accumulating. +This is a duration, in seconds, before cleaning directories up. + +Defaults to ``3600``. + +ARA_WSGI_LOG_ROOT +~~~~~~~~~~~~~~~~~ + +Absolute path on the filesystem that matches the ``DocumentRoot`` of your +webserver vhost. + +For a ``DocumentRoot`` of ``/srv/static/logs``, this value should be +``/srv/static/logs``. + +Defaults to ``/srv/static/logs``. + +ARA_WSGI_DATABASE_DIRECTORY +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Subdirectory in which ARA sqlite databases are expected to reside in. +For example, ``ara-report`` would expect: +``http://logserver/some/path/ara-report/ansible.sqlite``. + +This variable should match the ``WSGIScriptAliasMatch`` pattern of your +webserver vhost. + +Defaults to ``ara-report``. + +Using wsgi_sqlite with Apache's mod_wsgi +---------------------------------------- + +The vhost requires you to redirect requests to ``*/ara-report/*`` to the WSGI +middleware. In order to do so, the vhost must look like the following:: + + + # Remember that DocumentRoot and ARA_WSGI_LOG_ROOT must match + DocumentRoot /srv/static/logs + ServerName logs.domain.tld + + ErrorLog /var/log/httpd/logs.domain.tld-error.log + LogLevel warn + CustomLog /var/log/httpd/logs.domain.tld-access.log combined + + SetEnv ARA_WSGI_TMPDIR_MAX_AGE 3600 + SetEnv ARA_WSGI_LOG_ROOT /srv/static/logs + SetEnv ARA_WSGI_DATABASE_DIRECTORY ara-report + WSGIDaemonProcess ara user=apache group=apache processes=4 threads=1 + WSGIScriptAliasMatch ^.*/ara-report /var/www/cgi-bin/ara-wsgi-sqlite + + +You'll notice the ``WSGIScriptAliasMatch`` directive pointing to the WSGI +script. This is bundled when installing ARA and can be copied to the location +of your choice by doing:: + + cp -p $(which ara-wsgi-sqlite) /var/www/cgi-bin/ diff --git a/doc/source/index.rst b/doc/source/index.rst index 305f1f5d..620edf24 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -12,6 +12,7 @@ Table of Contents Installation Configuration Web Server configuration + Advanced use cases Usage Contributing Manifesto: Project core values diff --git a/setup.cfg b/setup.cfg index 4528cf0b..50015dd9 100644 --- a/setup.cfg +++ b/setup.cfg @@ -51,6 +51,7 @@ console_scripts = wsgi_scripts = ara-wsgi = ara.wsgi:main + ara-wsgi-sqlite = ara.wsgi_sqlite:main ara.cli = data list = ara.cli.data:DataList