Add distributed_sqlite db backend and WSGI app

This new feature is an implementation equivalent to the sqlite
middleware from 0.x but in Django instead of Flask.

It uses a WSGI middleware to map an URL to a location on
the filesystem which is then loaded by the database backend.

Fixes: https://github.com/ansible-community/ara/issues/57

Co-Authored-By: Florian Apolloner <florian@apolloner.eu>
Co-Authored-By: David Moreau-Simard <dmsimard@redhat.com>
Change-Id: I338dae2fabda2af7fd0a75c795c7b4fd6b179940
This commit is contained in:
Florian Apolloner 2019-06-14 10:00:34 +02:00 committed by David Moreau Simard
parent ca5927fc1c
commit 9b0c08f77a
No known key found for this signature in database
GPG Key ID: CBEB466764A9E621
16 changed files with 417 additions and 4 deletions

View File

@ -11,6 +11,7 @@
- ansible-role-ara-api-ubuntu-2.6
- ansible-role-ara-api-ubuntu-postgresql
- ansible-role-ara-api-ubuntu-mysql
- ansible-role-ara-api-fedora-distributed-sqlite
- ansible-role-ara-web-ubuntu
- ansible-role-ara-web-fedora
- ara-tox-linters
@ -23,6 +24,7 @@
- ansible-role-ara-api-ubuntu-2.6
- ansible-role-ara-api-ubuntu-postgresql
- ansible-role-ara-api-ubuntu-mysql
- ansible-role-ara-api-fedora-distributed-sqlite
- ansible-role-ara-web-ubuntu
- ansible-role-ara-web-fedora
- ara-tox-linters

View File

@ -85,6 +85,13 @@
pre-run: tests/install_docker.yaml
run: tests/with_mysql.yaml
- job:
name: ansible-role-ara-api-fedora-distributed-sqlite
parent: ansible-role-ara-api-fedora
roles:
- zuul: zuul/zuul-jobs
run: tests/with_distributed_sqlite.yaml
# ara-web jobs
- job:
name: ansible-role-ara-web-base

View File

View File

@ -0,0 +1,34 @@
# Copyright (c) 2019 Red Hat, Inc.
#
# This file is part of ARA Records Ansible.
#
# ARA is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# ARA is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ARA. If not, see <http://www.gnu.org/licenses/>.
import threading
from django.db.backends.sqlite3.base import DatabaseWrapper as BaseDatabaseWrapper
local_storage = threading.local()
class DatabaseWrapper(BaseDatabaseWrapper):
"""
Custom sqlite database backend meant to work with ara.server.wsgi.distributed_sqlite
in order to dynamically load different databases at runtime.
"""
def get_new_connection(self, conn_params):
if hasattr(local_storage, "db_path") and local_storage.db_path:
conn_params["database"] = local_storage.db_path
return super().get_new_connection(conn_params)

View File

@ -87,6 +87,25 @@ def get_secret_key():
SECRET_KEY = get_secret_key()
# Whether or not to enable the distributed sqlite database backend and WSGI application.
DISTRIBUTED_SQLITE = settings.get("DISTRIBUTED_SQLITE", False)
# Under which URL should requests be delegated to the distributed sqlite wsgi application
DISTRIBUTED_SQLITE_PREFIX = settings.get("DISTRIBUTED_SQLITE_PREFIX", "ara-api")
# Root directory under which databases will be found relative to the requested URLs.
# This will restrict where the WSGI application will go to seek out databases.
# For example, the URL "example.org/some/path/ara-api" would translate to
# "/var/www/logs/some/path/ara-api" instead of "/some/path/ara-api".
DISTRIBUTED_SQLITE_ROOT = settings.get("DISTRIBUTED_SQLITE_ROOT", "/var/www/logs")
if DISTRIBUTED_SQLITE:
WSGI_APPLICATION = "ara.server.wsgi.distributed_sqlite"
DATABASE_ENGINE = settings.get("DATABASE_ENGINE", "ara.server.db.backends.distributed_sqlite")
else:
WSGI_APPLICATION = "ara.server.wsgi.application"
DATABASE_ENGINE = settings.get("DATABASE_ENGINE", "django.db.backends.sqlite3")
# We're not expecting ARA to use multiple concurrent databases.
# Make it easier for users to specify the configuration for a single database.
DATABASE_ENGINE = settings.get("DATABASE_ENGINE", "django.db.backends.sqlite3")
@ -185,7 +204,6 @@ STATIC_ROOT = settings.get("STATIC_ROOT", os.path.join(BASE_DIR, "www", "static"
MEDIA_URL = settings.get("MEDIA_URL", "/media/")
MEDIA_ROOT = settings.get("MEDIA_ROOT", os.path.join(BASE_DIR, "www", "media"))
WSGI_APPLICATION = "ara.server.wsgi.application"
ROOT_URLCONF = "ara.server.urls"
APPEND_SLASH = False
@ -237,6 +255,9 @@ if not os.path.exists(DEFAULT_SETTINGS) and "ARA_SETTINGS" not in os.environ:
READ_LOGIN_REQUIRED=READ_LOGIN_REQUIRED,
WRITE_LOGIN_REQUIRED=WRITE_LOGIN_REQUIRED,
PAGE_SIZE=PAGE_SIZE,
DISTRIBUTED_SQLITE=DISTRIBUTED_SQLITE,
DISTRIBUTED_SQLITE_PREFIX=DISTRIBUTED_SQLITE_PREFIX,
DISTRIBUTED_SQLITE_ROOT=DISTRIBUTED_SQLITE_ROOT,
)
with open(DEFAULT_SETTINGS, "w+") as settings_file:
comment = f"""

View File

@ -15,15 +15,91 @@
# You should have received a copy of the GNU General Public License
# along with ARA. If not, see <http://www.gnu.org/licenses/>.
import logging
import os
from ara.setup.exceptions import MissingDjangoException
try:
from django.core.wsgi import get_wsgi_application
from django.core.handlers.wsgi import get_path_info, get_script_name
except ImportError as e:
raise MissingDjangoException from e
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "ara.server.settings")
logger = logging.getLogger(__name__)
# The default WSGI application
application = get_wsgi_application()
def handle_405(start_response):
start_response("405 Method Not Allowed", [("content-type", "text/html")])
return [b"<h1>Method Not Allowed</h1><p>This endpoint is read only.</p>"]
def handle_404(start_response):
start_response("404 Not Found", [("content-type", "text/html")])
return [b"<h1>Not Found</h1><p>The requested resource was not found on this server.</p>"]
def distributed_sqlite(environ, start_response):
"""
Custom WSGI application meant to work with ara.server.db.backends.distributed_sqlite
in order to dynamically load different databases at runtime.
"""
# This endpoint is read only, do not accept write requests.
if environ["REQUEST_METHOD"] not in ["GET", "HEAD", "OPTIONS"]:
handle_405(start_response)
script_name = get_script_name(environ)
path_info = get_path_info(environ)
from django.conf import settings
# The root under which database files are expected
root = settings.DISTRIBUTED_SQLITE_ROOT
# The prefix after which everything should be delegated (ex: /ara-api)
prefix = settings.DISTRIBUTED_SQLITE_PREFIX
# Static assets should always be served by the regular app
if path_info.startswith(settings.STATIC_URL):
return application(environ, start_response)
if prefix not in path_info:
logger.warn("Ignoring request: URL does not contain delegated prefix (%s)" % prefix)
return handle_404(start_response)
# Slice path_info up until after the prefix to obtain the requested directory
i = path_info.find(prefix) + len(prefix)
fs_path = path_info[:i]
# Make sure we aren't escaping outside the root and the directory exists
db_dir = os.path.abspath(os.path.join(root, fs_path.lstrip("/")))
if not db_dir.startswith(root):
logger.warn("Ignoring request: path is outside the root (%s)" % db_dir)
return handle_404(start_response)
elif not os.path.exists(db_dir):
logger.warn("Ignoring request: database directory not found (%s)" % db_dir)
return handle_404(start_response)
# Find the database file and make sure it exists
db_file = os.path.join(db_dir, "ansible.sqlite")
if not os.path.exists(db_file):
logger.warn("Ignoring request: database file not found (%s)" % db_file)
return handle_404(start_response)
# Tell Django about the new URLs it should be using
environ["SCRIPT_NAME"] = script_name + fs_path
environ["PATH_INFO"] = path_info[len(fs_path) :] # noqa: E203
# Store the path of the database in a thread so the distributed_sqlite
# database backend can retrieve it.
from ara.server.db.backends.distributed_sqlite.base import local_storage
local_storage.db_path = db_file
try:
return application(environ, start_response)
finally:
del local_storage.db_path

View File

@ -44,6 +44,12 @@ For more details, click on the configuration parameters.
+--------------------------------+--------------------------------------------------------+------------------------------------------------------+
| ARA_DEBUG_ | ``False`` | Django's DEBUG_ setting |
+--------------------------------+--------------------------------------------------------+------------------------------------------------------+
| ARA_DISTRIBUTED_SQLITE_ | ``False`` | Whether to enable distributed sqlite backend |
+--------------------------------+--------------------------------------------------------+------------------------------------------------------+
| ARA_DISTRIBUTED_SQLITE_PREFIX_ | ``ara-api`` | Prefix to delegate to the distributed sqlite backend |
+--------------------------------+--------------------------------------------------------+------------------------------------------------------+
| ARA_DISTRIBUTED_SQLITE_ROOT_ | ``/var/www/logs`` | Root under which sqlite databases are expected |
+--------------------------------+--------------------------------------------------------+------------------------------------------------------+
| ARA_ENV_ | ``default`` | Environment to load configuration for |
+--------------------------------+--------------------------------------------------------+------------------------------------------------------+
| ARA_LOGGING_ | See ARA_LOGGING_ | Logging configuration |
@ -157,8 +163,10 @@ ARA_DATABASE_ENGINE
- **Default**: ``django.db.backends.sqlite3``
- **Examples**:
- ``django.db.backends.sqlite3``
- ``django.db.backends.postgresql``
- ``django.db.backends.mysql``
- ``ara.server.db.backends.distributed_sqlite``
The Django database driver to use.
@ -245,6 +253,57 @@ Whether or not Django's debug mode should be enabled.
The Django project recommends turning this off for production use.
ARA_DISTRIBUTED_SQLITE
~~~~~~~~~~~~~~~~~~~~~~
- **Environment variable**: ``ARA_DISTRIBUTED_SQLITE``
- **Configuration file variable**: ``DISTRIBUTED_SQLITE``
- **Provided by**: ``ara.server.db.backends.distributed_sqlite`` and ``ara.server.wsgi.distributed_sqlite``
- **Type**: ``bool``
- **Default**: ``False``
Whether or not to enable the distributed sqlite database backend and WSGI application.
This feature is useful for loading different ARA sqlite databases dynamically
based on request URLs.
For more information, see: :ref:`distributed sqlite backend <distributed-sqlite-backend>`.
ARA_DISTRIBUTED_SQLITE_PREFIX
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- **Environment variable**: ``ARA_DISTRIBUTED_SQLITE_PREFIX``
- **Configuration file variable**: ``DISTRIBUTED_SQLITE_PREFIX``
- **Provided by**: ``ara.server.db.backends.distributed_sqlite`` and ``ara.server.wsgi.distributed_sqlite``
- **Type**: ``string``
- **Default**: ``ara-api``
Under which URL should requests be delegated to the distributed sqlite wsgi application.
``ara-api`` would delegate everything under ``.*/ara-api/.*``
The path leading to this prefix must contain the ``ansible.sqlite`` database file, for example:
``/var/www/logs/some/path/ara-api/ansible.sqlite``.
For more information, see: :ref:`distributed sqlite backend <distributed-sqlite-backend>`.
ARA_DISTRIBUTED_SQLITE_ROOT
~~~~~~~~~~~~~~~~~~~~~~~~~~~
- **Environment variable**: ``ARA_DISTRIBUTED_SQLITE_ROOT``
- **Configuration file variable**: ``DISTRIBUTED_SQLITE_ROOT``
- **Provided by**: ``ara.server.db.backends.distributed_sqlite`` and ``ara.server.wsgi.distributed_sqlite``
- **Type**: ``string``
- **Default**: ``/var/www/logs``
Root directory under which databases will be found relative to the requested URLs.
This will restrict where the WSGI application will go to seek out databases.
For example, the URL ``example.org/some/path/ara-api`` would translate to
``/var/www/logs/some/path/ara-api``.
For more information, see: :ref:`distributed sqlite backend <distributed-sqlite-backend>`.
ARA_ENV
~~~~~~~

View File

@ -0,0 +1,88 @@
.. _distributed-sqlite-backend:
Distributed sqlite database backend
===================================
The ARA API server provides an optional backend that dynamically loads sqlite
databases based on the requested URL with the help of a WSGI application
middleware.
In summary, it maps an URL such as ``http://example.org/some/path/ara-api`` to
a location on the file system like ``/var/www/logs/some/path/ara-api`` and
loads an ``ansible.sqlite`` database from that directory, if it exists.
.. note::
This backend is not enabled by default and is designed with a specific range
of use cases in mind. This documentation attempts to explain if this might
be a good fit for you.
Use case
--------
Running at least one Ansible playbook with the ARA Ansible callback plugin
enabled will generate a database at ``~/.ara/server/ansible.sqlite`` by default.
sqlite, in the context of ARA, is good enough for most use cases:
- it is portable: everything the API server needs is in a single file that you can upload anywhere
- no network dependency or latency: sqlite is on your filesystem and doesn't rely on a remote database server
- relatively lightweight: Ansible's own integration tests used ~13MB for 415 playbooks, 1935 files, 12456 tasks, 12762 results, 586 hosts (and host facts)
However, since write concurrency does not scale very well with sqlite, it might
not be a good fit if you plan on having a single API server handle data for
multiple ``ansible-playbook`` commands running at the same time.
The distributed sqlite database backend and WSGI middleware provide an
alternative to work around this limitation.
This approach works best if it makes sense to logically split your playbooks
into different databases. One such example is in continuous integration (CI)
where you might have multiple jobs running Ansible playbooks concurrently.
If each CI job is recording to its own database, you probably no longer have
write concurrency issues and the database can be uploaded in your logs or as an
artifact after the job has been completed.
The file hierarchy on your log or artifact server might end up looking like
this::
/var/www/logs/
├── 1
│   ├── ara-api
│   │   └── ansible.sqlite
│   └── console.txt
├── 2
│   ├── logs.tar.gz
│   └── some
│   └── path
│   └── ara-api
│   └── ansible.sqlite
└── 3
├── builds.txt
├── dev
│   └── ara-api
│   └── ansible.sqlite
└── prod
└── ara-api
└── ansible.sqlite
With the above example file tree, a single instance of the API server with the
distributed sqlite backend enabled would be able to respond to queries at the
following endpoints:
- http://example.org/1/ara-api
- http://example.org/2/some/path/ara-api
- http://example.org/3/dev/ara-api
- http://example.org/3/prod/ara-api
Configuration
-------------
For enabling and configuring the distributed sqlite backend, see:
- :ref:`ARA_DISTRIBUTED_SQLITE <api-configuration:ARA_DISTRIBUTED_SQLITE>`
- :ref:`ARA_DISTRIBUTED_SQLITE_PREFIX <api-configuration:ARA_DISTRIBUTED_SQLITE_PREFIX>`
- :ref:`ARA_DISTRIBUTED_SQLITE_ROOT <api-configuration:ARA_DISTRIBUTED_SQLITE_ROOT>`
When recording data to a sqlite database, the location of the database can be
defined with :ref:`ARA_DATABASE_NAME <api-configuration:ARA_DATABASE_NAME>`.

View File

@ -16,6 +16,7 @@ Table of Contents
API: Authentication and security <api-security>
API: Endpoint documentation <api-documentation>
API: Usage with built-in clients <api-usage>
API: Distributed sqlite backend <distributed-sqlite-backend>
Setting playbook names and labels <playbook-names-and-labels>
Recording arbitrary data in playbooks <ara-record>
Contributing to ARA <contributing>

View File

@ -152,8 +152,17 @@ ara_api_debug: false
# Note: If no key is provided, a random one will be generated once and persisted
ara_api_secret_key: null
# ARA_DISTRIBUTED_SQLITE - Whether to enable distributed sqlite backend
ara_api_distributed_sqlite: false
# ARA_DISTRIBUTED_SQLITE_PREFIX - Prefix to delegate to the distributed sqlite backend
ara_api_distributed_sqlite_prefix: ara-api
# ARA_DISTRIBUTED_SQLITE_ROOT - Root under which sqlite databases are expected
ara_api_distributed_sqlite_root: /var/www/logs
# ARA_DATABASE_ENGINE - Djangos ENGINE database setting
ara_api_database_engine: django.db.backends.sqlite3
ara_api_database_engine: "{{ ara_api_distributed_sqlite | ternary('ara.server.db.backends.distributed_sqlite', 'django.db.backends.sqlite3') }}"
# ARA_DATABASE_NAME - Djangos NAME database setting
ara_api_database_name: "{{ ara_api_base_dir }}/ansible.sqlite"

View File

@ -59,6 +59,15 @@
ara_api_secret_key: "{{ generated_key.stdout }}"
no_log: yes
- name: Validate distributed sqlite configuration
assert:
that:
- "ara_api_database_engine == 'ara.server.db.backends.distributed_sqlite'"
msg: |
The database engine should be 'ara.server.db.backends.distributed_sqlite'
when 'ara_api_distributed_sqlite' is true.
when: ara_api_distributed_sqlite
# Put configuration in a format we can write to a file
- name: Reconcile configuration
vars:
@ -75,6 +84,9 @@
DATABASE_PORT: "{{ ara_api_database_port }}"
DATABASE_CONN_MAX_AGE: "{{ ara_api_database_conn_max_age }}"
DEBUG: "{{ ara_api_debug }}"
DISTRIBUTED_SQLITE: "{{ ara_api_distributed_sqlite }}"
DISTRIBUTED_SQLITE_PREFIX: "{{ ara_api_distributed_sqlite_prefix }}"
DISTRIBUTED_SQLITE_ROOT: "{{ ara_api_distributed_sqlite_root }}"
LOGGING: "{{ ara_api_logging }}"
LOG_LEVEL: "{{ ara_api_log_level }}"
SECRET_KEY: "{{ ara_api_secret_key }}"

View File

@ -0,0 +1,22 @@
---
# Copyright (c) 2019 Red Hat, Inc.
#
# This file is part of ARA Records Ansible.
#
# ARA Records Ansible is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# ARA Records Ansible is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ARA Records Ansible. If not, see <http://www.gnu.org/licenses/>.
# This ensures the WSGI application servers are set up to use the right endpoint
- name: Set WSGI application to distributed_sqlite
set_fact:
_ara_api_wsgi_application: "ara.server.wsgi:distributed_sqlite"

View File

@ -8,9 +8,9 @@ RuntimeDirectory=ara-api
WorkingDirectory={{ ara_api_root_dir }}
Environment=ARA_SETTINGS={{ ara_api_settings }}
{% if ara_api_venv %}
ExecStart={{ ara_api_venv_path }}/bin/gunicorn --workers=4 --bind {{ ara_api_wsgi_bind }} ara.server.wsgi
ExecStart={{ ara_api_venv_path }}/bin/gunicorn --workers=4 --bind {{ ara_api_wsgi_bind }} {{ _ara_api_wsgi_application | default('ara.server.wsgi') }}
{% else %}
ExecStart=gunicorn --workers=4 --bind {{ ara_api_wsgi_bind }} ara.server.wsgi
ExecStart=gunicorn --workers=4 --bind {{ ara_api_wsgi_bind }} {{ _ara_api_wsgi_application | default('ara.server.wsgi') }}
{% endif %}
ExecReload=/bin/kill -s HUP $MAINPID
ExecStop=/bin/kill -s TERM $MAINPID

View File

@ -60,6 +60,8 @@
ANSIBLE_CALLBACK_PLUGINS: "{{ ara_setup_plugins.stdout }}/callback"
ANSIBLE_ACTION_PLUGINS: "{{ ara_setup_plugins.stdout }}/action"
ARA_SETTINGS: "{{ ara_api_settings }}"
ARA_API_CLIENT: "{{ ara_api_client | default('offline') }}"
ARA_API_SERVER: "{{ ara_api_server | default('http://127.0.0.1:8000') }}"
vars:
_ansible_playbook: "{{ ara_api_venv_path }}/bin/ansible-playbook -vvv"
_test_root: "{{ ara_api_source_checkout }}/tests/integration"

View File

@ -0,0 +1,80 @@
# Copyright (c) 2019 Red Hat, Inc.
#
# This file is part of ARA Records Ansible.
#
# ARA is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# ARA is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ARA. If not, see <http://www.gnu.org/licenses/>.
- name: Test the ARA API with the distributed sqlite backend
hosts: all
gather_facts: yes
vars:
ara_tests_cleanup: true
ara_api_root_dir: "{{ ansible_user_dir }}/.ara-tests"
ara_api_distributed_sqlite_root: "{{ ansible_user_dir }}/.ara-tests"
ara_api_wsgi_server: gunicorn
ara_api_secret_key: testing
ara_api_debug: true
ara_api_log_level: DEBUG
ara_api_server: "http://127.0.0.1:8000"
ara_api_client: http
_test_directories:
- "tests/some/path/ara-api"
- "tests/dev/ara-api"
- "tests/prod/ara-api"
tasks:
# Generate ourselves a fresh database to run tests with
- name: Set up the API with the default sqlite backend
include_role:
name: ara_api
public: yes
# These are tasks rather than a standalone playbook to give us an easy
# access to all the variables within the same play.
- include_tasks: test_tasks.yaml
- name: Enable the distributed sqlite backend
vars:
ara_api_distributed_sqlite: true
include_role:
name: ara_api
public: yes
- name: Ensure there are no pending handlers
meta: flush_handlers
- name: Create test directories
file:
path: "{{ ara_api_distributed_sqlite_root }}/{{ item }}"
state: directory
recurse: yes
loop: "{{ _test_directories }}"
- name: Copy the database to the test directories
copy:
src: "{{ ara_api_database_name }}"
dest: "{{ ara_api_distributed_sqlite_root }}/{{ item }}/ansible.sqlite"
remote_src: true
loop: "{{ _test_directories }}"
- name: Test that the API works
uri:
url: "http://127.0.0.1:8000/{{ item }}"
return_content: yes
register: api_test
loop: "{{ _test_directories }}"
- name: Print API responses for debug purposes
debug:
msg: "{{ item['json'] | to_nice_json }}"
loop: "{{ api_test.results }}"