diff --git a/ansible/roles/check-services/tasks/main.yml b/ansible/roles/check-services/tasks/main.yml index c5a0e82..e0ef7ef 100644 --- a/ansible/roles/check-services/tasks/main.yml +++ b/ansible/roles/check-services/tasks/main.yml @@ -7,11 +7,19 @@ ### BUILD CONTAINER IMAGES ### - name: Build container images block: - - name: Build logscraper container image + - name: Build logscraper container image - Zuul shell: > podman build -t quay.io/logscraper:dev -f Dockerfile args: chdir: "{{ zuul.projects['opendev.org/openstack/ci-log-processing'].src_dir }}" + when: zuul is defined + + - name: Build logscraper container image - non Zuul + shell: > + podman build -t quay.io/logscraper:dev -f Dockerfile + args: + chdir: "{{ playbook_dir }}" + when: zuul is not defined - name: Get logscraper image id shell: | @@ -23,6 +31,14 @@ podman build -t quay.io/loggearman:dev -f loggearman/Dockerfile args: chdir: "{{ zuul.projects['opendev.org/openstack/ci-log-processing'].src_dir }}" + when: zuul is defined + + - name: Build loggearman container image - non Zuul + shell: > + podman build -t quay.io/loggearman:dev -f loggearman/Dockerfile + args: + chdir: "{{ playbook_dir }}" + when: zuul is not defined - name: Get loggearman image id shell: | @@ -38,7 +54,6 @@ container_images: logscraper: "{{ _logscraper_image_id.stdout }}" loggearman: "{{ _loggearman_image_id.stdout }}" - when: zuul is defined ### OPENSEARCH #### - name: Setup Opensearch diff --git a/ansible/roles/logscraper/README.rst b/ansible/roles/logscraper/README.rst index 821facc..d5f353b 100644 --- a/ansible/roles/logscraper/README.rst +++ b/ansible/roles/logscraper/README.rst @@ -60,10 +60,10 @@ and second one for getting logs from `sometenant` tenant. zuul_api_url: https://zuul.opendev.org/api/tenant/openstack insecure: False - tenant: sometenant - gearman_port: 4731 - gearman_server: someproject.org zuul_api_url: https://zuul.opendev.org/api/tenant/sometenant insecure: True + download: true + download_dir: /mnt/logscraper roles: - logscraper diff --git a/ansible/roles/logscraper/defaults/main.yml b/ansible/roles/logscraper/defaults/main.yml index 7a34351..1610ad4 100644 --- a/ansible/roles/logscraper/defaults/main.yml +++ b/ansible/roles/logscraper/defaults/main.yml @@ -15,10 +15,10 @@ container_images: # gearman_port: 4731 # gearman_server: logstash.openstack.org # zuul_api_url: https://zuul.opendev.org/api/tenant/openstack -# insecure: False +# insecure: false # - tenant: sometenant -# gearman_port: 4731 -# gearman_server: logstash.openstack.org # zuul_api_url: https://zuul.opendev.org/api/tenant/sometenant -# insecure: True +# insecure: true +# download: true +# download_dir: /mnt/logscraper/sometenant tenant_builds: [] diff --git a/ansible/roles/logscraper/tasks/service.yml b/ansible/roles/logscraper/tasks/service.yml index 03a2528..43ccb8c 100644 --- a/ansible/roles/logscraper/tasks/service.yml +++ b/ansible/roles/logscraper/tasks/service.yml @@ -1,4 +1,13 @@ --- +- name: Create logscraper download directory + file: + path: "{{ item.download_dir }}" + state: directory + recurse: true + owner: "{{ logscraper_user }}" + group: "{{ logscraper_group }}" + when: "'download_dir' in item" + - name: Generate logscraper script template: src: logscraper.sh.j2 diff --git a/ansible/roles/logscraper/templates/logscraper.sh.j2 b/ansible/roles/logscraper/templates/logscraper.sh.j2 index f416504..187eea1 100644 --- a/ansible/roles/logscraper/templates/logscraper.sh.j2 +++ b/ansible/roles/logscraper/templates/logscraper.sh.j2 @@ -8,9 +8,19 @@ --uidmap 1000:{{ logscraper_uid }}:1 \ --name logscraper-{{ item.tenant }} \ --volume {{ logscraper_dir }}:{{ logscraper_dir }}:z \ + {% if 'download_dir' in item %} + --volume {{ item.download_dir }}:{{ item.download_dir }}:z \ + {% endif %} {{ container_images['logscraper'] }} \ + {% if 'gearman_port' in item and 'gearman_server' in item %} --gearman-port {{ item.gearman_port }} \ --gearman-server {{ item.gearman_server }} \ + {% else %} + --download \ + {% endif %} + {% if 'download_dir' in item %} + --directory {{ item.download_dir }} \ + {% endif %} --checkpoint-file {{ item.checkpoint_file | default(logscraper_dir + '/checkpoint') }} \ --follow \ --zuul-api-url {{ item.zuul_api_url }} diff --git a/doc/source/logscraper.rst b/doc/source/logscraper.rst index dc98f86..c96e6ab 100644 --- a/doc/source/logscraper.rst +++ b/doc/source/logscraper.rst @@ -11,6 +11,38 @@ It is available by typing: logscraper --help + Fetch and push last Zuul CI job logs into gearman. + + optional arguments: + -h, --help show this help message and exit + --zuul-api-url ZUUL_API_URL + URL(s) for Zuul API. Parameter can be set multiple + times. + --job-name JOB_NAME CI job name(s). Parameter can be set multiple times. + If not set it would scrape every latest builds. + --gearman-server GEARMAN_SERVER + Gearman host addresss + --gearman-port GEARMAN_PORT + Gearman listen port. Defaults to 4730. + --follow Keep polling zuul builds + --insecure Skip validating SSL cert + --checkpoint-file CHECKPOINT_FILE + File that will keep information about last uuid + timestamp for a job. + --logstash-url LOGSTASH_URL + When provided, script will check connection to + Logstash service before sending to log processing + system. For example: logstash.local:9999 + --workers WORKERS Worker processes for logscraper + --max-skipped MAX_SKIPPED + How many job results should be checked until last uuid + written in checkpoint file is founded + --debug Print more information + --download Download logs and do not send to gearman service + --directory DIRECTORY + Directory, where the logs will be stored. Defaults to: + /tmp/logscraper + Basic usage ----------- @@ -43,6 +75,12 @@ Example: logscraper --gearman-server localhost --job-name tripleo-ci-centos-8-containers-multinode --job-name openstack-tox-linters --zuul-api-url https://zuul.opendev.org/api/tenant/openstack --zuul-api-url https://zuul.opendev.org/api/tenant/local +* download logs to /mnt/logscraper. NOTE: if you are using container service, this directory needs to be mounted! + +.. code-block:: + + logscraper --zuul-api-url https://zuul.opendev.org/api/tenant/openstack --directory /mnt/logscraper --download + Containerize tool ----------------- @@ -62,3 +100,10 @@ to the container, for example: .. code-block:: docker run -v $(pwd):/checkpoint-dir:z -d logscraper logscraper --gearman-server somehost --zuul-api-url https://zuul.opendev.org/api/tenant/openstack --checkpoint-file /checkpoint-dir/checkpoint.txt --follow + + +In this example, logscraper will download log files to the /mnt/logscraper directory: + +.. code-block:: + + docker run -v $(pwd):/checkpoint-dir:z -v /mnt/logscraper:/mnt/logscraper:z -d logscraper logscraper --zuul-api-url https://zuul.opendev.org/api/tenant/openstack --checkpoint-file /checkpoint-dir/checkpoint.txt --directory /mnt/logscraper --download --follow diff --git a/logscraper/logscraper.py b/logscraper/logscraper.py index 436b2b8..5510ca1 100755 --- a/logscraper/logscraper.py +++ b/logscraper/logscraper.py @@ -23,18 +23,21 @@ The goal is to push recent zuul builds into log gearman processor. import argparse import gear +import itertools import json import logging import multiprocessing +import os import requests import socket import sys import time -import urllib import yaml +from concurrent.futures import ThreadPoolExecutor from distutils.version import StrictVersion as s_version import tenacity +from urllib.parse import urljoin file_to_check = [ @@ -46,6 +49,8 @@ file_to_check = [ "var/log/extra/logstash.txt.gz", "var/log/extra/errors.txt", "var/log/extra/errors.txt.gz", + "zuul-info/inventory.yaml", + "zuul-info/inventory.yaml.gz" ] # From: https://opendev.org/opendev/base-jobs/src/branch/master/roles/submit-logstash-jobs/defaults/main.yaml # noqa @@ -165,8 +170,7 @@ def get_arguments(): "set multiple times. If not set it would scrape " "every latest builds.", action='append') - parser.add_argument("--gearman-server", help="Gearman host addresss", - required=True) + parser.add_argument("--gearman-server", help="Gearman host addresss") parser.add_argument("--gearman-port", help="Gearman listen port. " "Defaults to 4730.", default=4730) @@ -181,6 +185,7 @@ def get_arguments(): "to log processing system. For example: " "logstash.local:9999") parser.add_argument("--workers", help="Worker processes for logscraper", + type=int, default=1) parser.add_argument("--max-skipped", help="How many job results should be " "checked until last uuid written in checkpoint file " @@ -188,6 +193,12 @@ def get_arguments(): default=500) parser.add_argument("--debug", help="Print more information", action="store_true") + parser.add_argument("--download", help="Download logs and do not send " + "to gearman service", + action="store_true") + parser.add_argument("--directory", help="Directory, where the logs will " + "be stored. Defaults to: /tmp/logscraper", + default="/tmp/logscraper") args = parser.parse_args() return args @@ -292,7 +303,7 @@ class LogMatcher(object): fields["build_newrev"] = result.get("newrev", "UNKNOWN") fields["node_provider"] = "local" - log_url = urllib.parse.urljoin(result["log_url"], filename) + log_url = urljoin(result["log_url"], filename) fields["log_url"] = log_url fields["tenant"] = result["tenant"] @@ -401,17 +412,45 @@ def get_last_job_results(zuul_url, insecure, max_skipped, last_uuid, ############################################################################### # Log scraper # ############################################################################### -def check_specified_files(job_result, insecure): - """Return list of specified files if they exists on logserver. """ - available_files = [] - for f in file_to_check: - if not job_result["log_url"]: - continue - response = requests_get("%s%s" % (job_result["log_url"], f), - insecure) +def save_build_info(directory, build): + with open("%s/buildinfo" % directory, "w") as text_file: + yaml.dump(build, text_file) + + +def download_file(url, directory, insecure=False): + logging.debug("Started fetching %s" % url) + filename = url.split("/")[-1] + try: + response = requests.get(url, verify=insecure, stream=True) if response.status_code == 200: - available_files.append(f) - return available_files + if directory: + with open("%s/%s" % (directory, filename), 'wb') as f: + for txt in response.iter_content(1024): + f.write(txt) + return filename + except requests.exceptions.ContentDecodingError: + logging.critical("Can not decode content from %s" % url) + + +def check_specified_files(job_result, insecure, directory=None): + """Return list of specified files if they exists on logserver. """ + + args = job_result.get("build_args") + if not job_result["log_url"]: + logging.debug("There is no file to download for build " + "uuid: %s" % job_result["uuid"]) + return + + build_log_urls = [urljoin(job_result["log_url"], s) for s in file_to_check] + + results = [] + pool = ThreadPoolExecutor(max_workers=args.workers) + for page in pool.map(download_file, build_log_urls, + itertools.repeat(directory), + itertools.repeat(insecure)): + if page: + results.append(page) + return results def setup_logging(debug): @@ -425,7 +464,12 @@ def setup_logging(debug): def run_build(build): """Submit job informations into log processing system. """ - args = build.pop("build_args") + args = build.get("build_args") + + # NOTE: if build result is "ABORTED", there is no any + # job result files to parse. Skipping that file. + if build["result"].lower() == 'aborted': + return logging.info( "Processing logs for %s | %s | %s | %s", @@ -435,18 +479,36 @@ def run_build(build): build["uuid"], ) - results = dict(files=[], jobs=[], invocation={}) + if args.download: + logging.debug("Started fetching build logs") + directory = "%s/%s" % (args.directory, build["uuid"]) + try: + if not os.path.exists(directory): + os.makedirs(directory) + except PermissionError: + logging.critical("Can not create directory %s" % directory) + except Exception as e: + logging.critical("Exception occured %s on creating dir %s" % ( + e, directory)) - lmc = LogMatcher( - args.gearman_server, - args.gearman_port, - build["result"], - build["log_url"], - {}, - ) - results["files"] = check_specified_files(build, args.insecure) + check_specified_files(build, args.insecure, directory) + save_build_info(directory, build) + else: + logging.debug("Parsing content for gearman service") + results = dict(files=[], jobs=[], invocation={}) + files = check_specified_files(build, args.insecure) + if not files: + return + results["files"] = files + lmc = LogMatcher( + args.gearman_server, + args.gearman_port, + build["result"], + build["log_url"], + {}, + ) - lmc.submitJobs("push-log", results["files"], build) + lmc.submitJobs("push-log", results["files"], build) def check_connection(logstash_url): @@ -512,6 +574,10 @@ def run(args): def main(): args = get_arguments() setup_logging(args.debug) + if args.download and args.gearman_server and args.gearman_port: + logging.critical("Can not use logscraper to send logs to gearman " + "and dowload logs. Choose one") + sys.exit(1) while True: run(args) if not args.follow: diff --git a/logscraper/tests/test_logscraper.py b/logscraper/tests/test_logscraper.py index 104ef6a..5a71b53 100644 --- a/logscraper/tests/test_logscraper.py +++ b/logscraper/tests/test_logscraper.py @@ -103,12 +103,24 @@ builds_result = [{ }] +class _MockedPoolMapResult: + def __init__(self, func, iterable): + self.func = func + self.iterable = iterable + + # mocked results + self._value = [self.func(i) for i in iterable] + + def get(self, timeout=0): + return self._value + + class FakeArgs(object): def __init__(self, zuul_api_url=None, gearman_server=None, gearman_port=None, follow=False, insecure=False, checkpoint_file=None, ignore_checkpoint=None, logstash_url=None, workers=None, max_skipped=None, - job_name=None): + job_name=None, download=None, directory=None): self.zuul_api_url = zuul_api_url self.gearman_server = gearman_server @@ -121,6 +133,8 @@ class FakeArgs(object): self.workers = workers self.max_skipped = max_skipped self.job_name = job_name + self.download = download + self.directory = directory class TestScraper(base.TestCase): @@ -183,7 +197,7 @@ class TestScraper(base.TestCase): logstash_url='localhost:9999') args = logscraper.get_arguments() logscraper.check_connection(args.logstash_url) - mock_socket.assert_called_once() + self.assertTrue(mock_socket.called) @mock.patch('socket.socket') def test_check_connection_wrong_host(self, mock_socket): @@ -216,7 +230,8 @@ class TestScraper(base.TestCase): 'someuuid', None) self.assertRaises(ValueError, make_fake_list, job_result) - @mock.patch('multiprocessing.pool.Pool.map') + @mock.patch('logscraper.logscraper.save_build_info') + @mock.patch('logscraper.logscraper.check_specified_files') @mock.patch('builtins.open', new_callable=mock.mock_open()) @mock.patch('os.path.isfile') @mock.patch('logscraper.logscraper.check_specified_files', @@ -229,15 +244,24 @@ class TestScraper(base.TestCase): gearman_port=4731, workers=1)) def test_run_scraping(self, mock_args, mock_submit, mock_files, - mock_isfile, mock_readfile, mock_map): + mock_isfile, mock_readfile, mock_specified_files, + mock_save_buildinfo): with mock.patch('logscraper.logscraper.get_last_job_results' ) as mock_job_results: - args = logscraper.get_arguments() - mock_job_results.return_value = [builds_result[0]] - logscraper.run_scraping(args, - 'http://somehost.com/api/tenant/tenant1') - self.assertEqual(builds_result[0], mock_map.call_args.args[1][0]) - self.assertIn("build_args", mock_map.call_args.args[1][0]) + with mock.patch('multiprocessing.pool.Pool.map', + lambda self, func, iterable, chunksize=None, + callback=None, error_callback=None: + _MockedPoolMapResult(func, iterable)): + args = logscraper.get_arguments() + mock_job_results.return_value = [builds_result[0]] + logscraper.run_scraping( + args, 'http://somehost.com/api/tenant/tenant1') + self.assertEqual(builds_result[0]['uuid'], + mock_submit.call_args.args[2]['uuid']) + self.assertTrue(mock_submit.called) + self.assertEqual(builds_result[0], + mock_specified_files.call_args.args[0]) + self.assertFalse(mock_save_buildinfo.called) @mock.patch('logscraper.logscraper.run_scraping') def test_run(self, mock_scraping): @@ -251,6 +275,38 @@ class TestScraper(base.TestCase): logscraper.run(args) self.assertEqual(3, mock_scraping.call_count) + @mock.patch('logscraper.logscraper.save_build_info') + @mock.patch('logscraper.logscraper.check_specified_files') + @mock.patch('builtins.open', new_callable=mock.mock_open()) + @mock.patch('os.path.isfile') + @mock.patch('logscraper.logscraper.check_specified_files', + return_value=['job-output.txt']) + @mock.patch('logscraper.logscraper.LogMatcher.submitJobs') + @mock.patch('argparse.ArgumentParser.parse_args', + return_value=FakeArgs( + zuul_api_url=['http://somehost.com/api/tenant/tenant1'], + workers=1, download=True, directory="/tmp/testdir")) + def test_run_scraping_download(self, mock_args, mock_submit, mock_files, + mock_isfile, mock_readfile, + mock_specified_files, mock_save_buildinfo): + with mock.patch('logscraper.logscraper.get_last_job_results' + ) as mock_job_results: + with mock.patch( + 'multiprocessing.pool.Pool.map', + lambda self, func, iterable, chunksize=None, callback=None, + error_callback=None: _MockedPoolMapResult(func, iterable), + ): + args = logscraper.get_arguments() + mock_job_results.return_value = [builds_result[0]] + logscraper.run_scraping( + args, 'http://somehost.com/api/tenant/tenant1') + + self.assertFalse(mock_submit.called) + self.assertTrue(mock_specified_files.called) + self.assertEqual(builds_result[0], + mock_specified_files.call_args.args[0]) + self.assertTrue(mock_save_buildinfo.called) + class TestConfig(base.TestCase): @mock.patch('sys.exit')