Add option to download logs to the directory

The new option '--download' give possibility to download the log
files with additional Zuul information files, that later can be
used by other tool to send the content to next log processing service.
The option was proposed in other patch set [1], which is large and
it makes review more complex.

[1] https://review.opendev.org/c/openstack/ci-log-processing/+/830337

Change-Id: I88d39c8ca2e186f68296e0f21551eac2c550b5b8
This commit is contained in:
Daniel Pawlik 2022-02-25 09:16:48 +01:00 committed by daniel.pawlik
parent 5124657fcd
commit 2ed0fd0912
8 changed files with 244 additions and 43 deletions

View File

@ -7,11 +7,19 @@
### BUILD CONTAINER IMAGES ### ### BUILD CONTAINER IMAGES ###
- name: Build container images - name: Build container images
block: block:
- name: Build logscraper container image - name: Build logscraper container image - Zuul
shell: > shell: >
podman build -t quay.io/logscraper:dev -f Dockerfile podman build -t quay.io/logscraper:dev -f Dockerfile
args: args:
chdir: "{{ zuul.projects['opendev.org/openstack/ci-log-processing'].src_dir }}" chdir: "{{ zuul.projects['opendev.org/openstack/ci-log-processing'].src_dir }}"
when: zuul is defined
- name: Build logscraper container image - non Zuul
shell: >
podman build -t quay.io/logscraper:dev -f Dockerfile
args:
chdir: "{{ playbook_dir }}"
when: zuul is not defined
- name: Get logscraper image id - name: Get logscraper image id
shell: | shell: |
@ -23,6 +31,14 @@
podman build -t quay.io/loggearman:dev -f loggearman/Dockerfile podman build -t quay.io/loggearman:dev -f loggearman/Dockerfile
args: args:
chdir: "{{ zuul.projects['opendev.org/openstack/ci-log-processing'].src_dir }}" chdir: "{{ zuul.projects['opendev.org/openstack/ci-log-processing'].src_dir }}"
when: zuul is defined
- name: Build loggearman container image - non Zuul
shell: >
podman build -t quay.io/loggearman:dev -f loggearman/Dockerfile
args:
chdir: "{{ playbook_dir }}"
when: zuul is not defined
- name: Get loggearman image id - name: Get loggearman image id
shell: | shell: |
@ -38,7 +54,6 @@
container_images: container_images:
logscraper: "{{ _logscraper_image_id.stdout }}" logscraper: "{{ _logscraper_image_id.stdout }}"
loggearman: "{{ _loggearman_image_id.stdout }}" loggearman: "{{ _loggearman_image_id.stdout }}"
when: zuul is defined
### OPENSEARCH #### ### OPENSEARCH ####
- name: Setup Opensearch - name: Setup Opensearch

View File

@ -60,10 +60,10 @@ and second one for getting logs from `sometenant` tenant.
zuul_api_url: https://zuul.opendev.org/api/tenant/openstack zuul_api_url: https://zuul.opendev.org/api/tenant/openstack
insecure: False insecure: False
- tenant: sometenant - tenant: sometenant
gearman_port: 4731
gearman_server: someproject.org
zuul_api_url: https://zuul.opendev.org/api/tenant/sometenant zuul_api_url: https://zuul.opendev.org/api/tenant/sometenant
insecure: True insecure: True
download: true
download_dir: /mnt/logscraper
roles: roles:
- logscraper - logscraper

View File

@ -15,10 +15,10 @@ container_images:
# gearman_port: 4731 # gearman_port: 4731
# gearman_server: logstash.openstack.org # gearman_server: logstash.openstack.org
# zuul_api_url: https://zuul.opendev.org/api/tenant/openstack # zuul_api_url: https://zuul.opendev.org/api/tenant/openstack
# insecure: False # insecure: false
# - tenant: sometenant # - tenant: sometenant
# gearman_port: 4731
# gearman_server: logstash.openstack.org
# zuul_api_url: https://zuul.opendev.org/api/tenant/sometenant # zuul_api_url: https://zuul.opendev.org/api/tenant/sometenant
# insecure: True # insecure: true
# download: true
# download_dir: /mnt/logscraper/sometenant
tenant_builds: [] tenant_builds: []

View File

@ -1,4 +1,13 @@
--- ---
- name: Create logscraper download directory
file:
path: "{{ item.download_dir }}"
state: directory
recurse: true
owner: "{{ logscraper_user }}"
group: "{{ logscraper_group }}"
when: "'download_dir' in item"
- name: Generate logscraper script - name: Generate logscraper script
template: template:
src: logscraper.sh.j2 src: logscraper.sh.j2

View File

@ -8,9 +8,19 @@
--uidmap 1000:{{ logscraper_uid }}:1 \ --uidmap 1000:{{ logscraper_uid }}:1 \
--name logscraper-{{ item.tenant }} \ --name logscraper-{{ item.tenant }} \
--volume {{ logscraper_dir }}:{{ logscraper_dir }}:z \ --volume {{ logscraper_dir }}:{{ logscraper_dir }}:z \
{% if 'download_dir' in item %}
--volume {{ item.download_dir }}:{{ item.download_dir }}:z \
{% endif %}
{{ container_images['logscraper'] }} \ {{ container_images['logscraper'] }} \
{% if 'gearman_port' in item and 'gearman_server' in item %}
--gearman-port {{ item.gearman_port }} \ --gearman-port {{ item.gearman_port }} \
--gearman-server {{ item.gearman_server }} \ --gearman-server {{ item.gearman_server }} \
{% else %}
--download \
{% endif %}
{% if 'download_dir' in item %}
--directory {{ item.download_dir }} \
{% endif %}
--checkpoint-file {{ item.checkpoint_file | default(logscraper_dir + '/checkpoint') }} \ --checkpoint-file {{ item.checkpoint_file | default(logscraper_dir + '/checkpoint') }} \
--follow \ --follow \
--zuul-api-url {{ item.zuul_api_url }} --zuul-api-url {{ item.zuul_api_url }}

View File

@ -11,6 +11,38 @@ It is available by typing:
logscraper --help logscraper --help
Fetch and push last Zuul CI job logs into gearman.
optional arguments:
-h, --help show this help message and exit
--zuul-api-url ZUUL_API_URL
URL(s) for Zuul API. Parameter can be set multiple
times.
--job-name JOB_NAME CI job name(s). Parameter can be set multiple times.
If not set it would scrape every latest builds.
--gearman-server GEARMAN_SERVER
Gearman host addresss
--gearman-port GEARMAN_PORT
Gearman listen port. Defaults to 4730.
--follow Keep polling zuul builds
--insecure Skip validating SSL cert
--checkpoint-file CHECKPOINT_FILE
File that will keep information about last uuid
timestamp for a job.
--logstash-url LOGSTASH_URL
When provided, script will check connection to
Logstash service before sending to log processing
system. For example: logstash.local:9999
--workers WORKERS Worker processes for logscraper
--max-skipped MAX_SKIPPED
How many job results should be checked until last uuid
written in checkpoint file is founded
--debug Print more information
--download Download logs and do not send to gearman service
--directory DIRECTORY
Directory, where the logs will be stored. Defaults to:
/tmp/logscraper
Basic usage Basic usage
----------- -----------
@ -43,6 +75,12 @@ Example:
logscraper --gearman-server localhost --job-name tripleo-ci-centos-8-containers-multinode --job-name openstack-tox-linters --zuul-api-url https://zuul.opendev.org/api/tenant/openstack --zuul-api-url https://zuul.opendev.org/api/tenant/local logscraper --gearman-server localhost --job-name tripleo-ci-centos-8-containers-multinode --job-name openstack-tox-linters --zuul-api-url https://zuul.opendev.org/api/tenant/openstack --zuul-api-url https://zuul.opendev.org/api/tenant/local
* download logs to /mnt/logscraper. NOTE: if you are using container service, this directory needs to be mounted!
.. code-block::
logscraper --zuul-api-url https://zuul.opendev.org/api/tenant/openstack --directory /mnt/logscraper --download
Containerize tool Containerize tool
----------------- -----------------
@ -62,3 +100,10 @@ to the container, for example:
.. code-block:: .. code-block::
docker run -v $(pwd):/checkpoint-dir:z -d logscraper logscraper --gearman-server somehost --zuul-api-url https://zuul.opendev.org/api/tenant/openstack --checkpoint-file /checkpoint-dir/checkpoint.txt --follow docker run -v $(pwd):/checkpoint-dir:z -d logscraper logscraper --gearman-server somehost --zuul-api-url https://zuul.opendev.org/api/tenant/openstack --checkpoint-file /checkpoint-dir/checkpoint.txt --follow
In this example, logscraper will download log files to the /mnt/logscraper directory:
.. code-block::
docker run -v $(pwd):/checkpoint-dir:z -v /mnt/logscraper:/mnt/logscraper:z -d logscraper logscraper --zuul-api-url https://zuul.opendev.org/api/tenant/openstack --checkpoint-file /checkpoint-dir/checkpoint.txt --directory /mnt/logscraper --download --follow

View File

@ -23,18 +23,21 @@ The goal is to push recent zuul builds into log gearman processor.
import argparse import argparse
import gear import gear
import itertools
import json import json
import logging import logging
import multiprocessing import multiprocessing
import os
import requests import requests
import socket import socket
import sys import sys
import time import time
import urllib
import yaml import yaml
from concurrent.futures import ThreadPoolExecutor
from distutils.version import StrictVersion as s_version from distutils.version import StrictVersion as s_version
import tenacity import tenacity
from urllib.parse import urljoin
file_to_check = [ file_to_check = [
@ -46,6 +49,8 @@ file_to_check = [
"var/log/extra/logstash.txt.gz", "var/log/extra/logstash.txt.gz",
"var/log/extra/errors.txt", "var/log/extra/errors.txt",
"var/log/extra/errors.txt.gz", "var/log/extra/errors.txt.gz",
"zuul-info/inventory.yaml",
"zuul-info/inventory.yaml.gz"
] ]
# From: https://opendev.org/opendev/base-jobs/src/branch/master/roles/submit-logstash-jobs/defaults/main.yaml # noqa # From: https://opendev.org/opendev/base-jobs/src/branch/master/roles/submit-logstash-jobs/defaults/main.yaml # noqa
@ -165,8 +170,7 @@ def get_arguments():
"set multiple times. If not set it would scrape " "set multiple times. If not set it would scrape "
"every latest builds.", "every latest builds.",
action='append') action='append')
parser.add_argument("--gearman-server", help="Gearman host addresss", parser.add_argument("--gearman-server", help="Gearman host addresss")
required=True)
parser.add_argument("--gearman-port", help="Gearman listen port. " parser.add_argument("--gearman-port", help="Gearman listen port. "
"Defaults to 4730.", "Defaults to 4730.",
default=4730) default=4730)
@ -181,6 +185,7 @@ def get_arguments():
"to log processing system. For example: " "to log processing system. For example: "
"logstash.local:9999") "logstash.local:9999")
parser.add_argument("--workers", help="Worker processes for logscraper", parser.add_argument("--workers", help="Worker processes for logscraper",
type=int,
default=1) default=1)
parser.add_argument("--max-skipped", help="How many job results should be " parser.add_argument("--max-skipped", help="How many job results should be "
"checked until last uuid written in checkpoint file " "checked until last uuid written in checkpoint file "
@ -188,6 +193,12 @@ def get_arguments():
default=500) default=500)
parser.add_argument("--debug", help="Print more information", parser.add_argument("--debug", help="Print more information",
action="store_true") action="store_true")
parser.add_argument("--download", help="Download logs and do not send "
"to gearman service",
action="store_true")
parser.add_argument("--directory", help="Directory, where the logs will "
"be stored. Defaults to: /tmp/logscraper",
default="/tmp/logscraper")
args = parser.parse_args() args = parser.parse_args()
return args return args
@ -292,7 +303,7 @@ class LogMatcher(object):
fields["build_newrev"] = result.get("newrev", "UNKNOWN") fields["build_newrev"] = result.get("newrev", "UNKNOWN")
fields["node_provider"] = "local" fields["node_provider"] = "local"
log_url = urllib.parse.urljoin(result["log_url"], filename) log_url = urljoin(result["log_url"], filename)
fields["log_url"] = log_url fields["log_url"] = log_url
fields["tenant"] = result["tenant"] fields["tenant"] = result["tenant"]
@ -401,17 +412,45 @@ def get_last_job_results(zuul_url, insecure, max_skipped, last_uuid,
############################################################################### ###############################################################################
# Log scraper # # Log scraper #
############################################################################### ###############################################################################
def check_specified_files(job_result, insecure): def save_build_info(directory, build):
"""Return list of specified files if they exists on logserver. """ with open("%s/buildinfo" % directory, "w") as text_file:
available_files = [] yaml.dump(build, text_file)
for f in file_to_check:
if not job_result["log_url"]:
continue def download_file(url, directory, insecure=False):
response = requests_get("%s%s" % (job_result["log_url"], f), logging.debug("Started fetching %s" % url)
insecure) filename = url.split("/")[-1]
try:
response = requests.get(url, verify=insecure, stream=True)
if response.status_code == 200: if response.status_code == 200:
available_files.append(f) if directory:
return available_files with open("%s/%s" % (directory, filename), 'wb') as f:
for txt in response.iter_content(1024):
f.write(txt)
return filename
except requests.exceptions.ContentDecodingError:
logging.critical("Can not decode content from %s" % url)
def check_specified_files(job_result, insecure, directory=None):
"""Return list of specified files if they exists on logserver. """
args = job_result.get("build_args")
if not job_result["log_url"]:
logging.debug("There is no file to download for build "
"uuid: %s" % job_result["uuid"])
return
build_log_urls = [urljoin(job_result["log_url"], s) for s in file_to_check]
results = []
pool = ThreadPoolExecutor(max_workers=args.workers)
for page in pool.map(download_file, build_log_urls,
itertools.repeat(directory),
itertools.repeat(insecure)):
if page:
results.append(page)
return results
def setup_logging(debug): def setup_logging(debug):
@ -425,7 +464,12 @@ def setup_logging(debug):
def run_build(build): def run_build(build):
"""Submit job informations into log processing system. """ """Submit job informations into log processing system. """
args = build.pop("build_args") args = build.get("build_args")
# NOTE: if build result is "ABORTED", there is no any
# job result files to parse. Skipping that file.
if build["result"].lower() == 'aborted':
return
logging.info( logging.info(
"Processing logs for %s | %s | %s | %s", "Processing logs for %s | %s | %s | %s",
@ -435,18 +479,36 @@ def run_build(build):
build["uuid"], build["uuid"],
) )
results = dict(files=[], jobs=[], invocation={}) if args.download:
logging.debug("Started fetching build logs")
directory = "%s/%s" % (args.directory, build["uuid"])
try:
if not os.path.exists(directory):
os.makedirs(directory)
except PermissionError:
logging.critical("Can not create directory %s" % directory)
except Exception as e:
logging.critical("Exception occured %s on creating dir %s" % (
e, directory))
lmc = LogMatcher( check_specified_files(build, args.insecure, directory)
args.gearman_server, save_build_info(directory, build)
args.gearman_port, else:
build["result"], logging.debug("Parsing content for gearman service")
build["log_url"], results = dict(files=[], jobs=[], invocation={})
{}, files = check_specified_files(build, args.insecure)
) if not files:
results["files"] = check_specified_files(build, args.insecure) return
results["files"] = files
lmc = LogMatcher(
args.gearman_server,
args.gearman_port,
build["result"],
build["log_url"],
{},
)
lmc.submitJobs("push-log", results["files"], build) lmc.submitJobs("push-log", results["files"], build)
def check_connection(logstash_url): def check_connection(logstash_url):
@ -512,6 +574,10 @@ def run(args):
def main(): def main():
args = get_arguments() args = get_arguments()
setup_logging(args.debug) setup_logging(args.debug)
if args.download and args.gearman_server and args.gearman_port:
logging.critical("Can not use logscraper to send logs to gearman "
"and dowload logs. Choose one")
sys.exit(1)
while True: while True:
run(args) run(args)
if not args.follow: if not args.follow:

View File

@ -103,12 +103,24 @@ builds_result = [{
}] }]
class _MockedPoolMapResult:
def __init__(self, func, iterable):
self.func = func
self.iterable = iterable
# mocked results
self._value = [self.func(i) for i in iterable]
def get(self, timeout=0):
return self._value
class FakeArgs(object): class FakeArgs(object):
def __init__(self, zuul_api_url=None, gearman_server=None, def __init__(self, zuul_api_url=None, gearman_server=None,
gearman_port=None, follow=False, insecure=False, gearman_port=None, follow=False, insecure=False,
checkpoint_file=None, ignore_checkpoint=None, checkpoint_file=None, ignore_checkpoint=None,
logstash_url=None, workers=None, max_skipped=None, logstash_url=None, workers=None, max_skipped=None,
job_name=None): job_name=None, download=None, directory=None):
self.zuul_api_url = zuul_api_url self.zuul_api_url = zuul_api_url
self.gearman_server = gearman_server self.gearman_server = gearman_server
@ -121,6 +133,8 @@ class FakeArgs(object):
self.workers = workers self.workers = workers
self.max_skipped = max_skipped self.max_skipped = max_skipped
self.job_name = job_name self.job_name = job_name
self.download = download
self.directory = directory
class TestScraper(base.TestCase): class TestScraper(base.TestCase):
@ -183,7 +197,7 @@ class TestScraper(base.TestCase):
logstash_url='localhost:9999') logstash_url='localhost:9999')
args = logscraper.get_arguments() args = logscraper.get_arguments()
logscraper.check_connection(args.logstash_url) logscraper.check_connection(args.logstash_url)
mock_socket.assert_called_once() self.assertTrue(mock_socket.called)
@mock.patch('socket.socket') @mock.patch('socket.socket')
def test_check_connection_wrong_host(self, mock_socket): def test_check_connection_wrong_host(self, mock_socket):
@ -216,7 +230,8 @@ class TestScraper(base.TestCase):
'someuuid', None) 'someuuid', None)
self.assertRaises(ValueError, make_fake_list, job_result) self.assertRaises(ValueError, make_fake_list, job_result)
@mock.patch('multiprocessing.pool.Pool.map') @mock.patch('logscraper.logscraper.save_build_info')
@mock.patch('logscraper.logscraper.check_specified_files')
@mock.patch('builtins.open', new_callable=mock.mock_open()) @mock.patch('builtins.open', new_callable=mock.mock_open())
@mock.patch('os.path.isfile') @mock.patch('os.path.isfile')
@mock.patch('logscraper.logscraper.check_specified_files', @mock.patch('logscraper.logscraper.check_specified_files',
@ -229,15 +244,24 @@ class TestScraper(base.TestCase):
gearman_port=4731, gearman_port=4731,
workers=1)) workers=1))
def test_run_scraping(self, mock_args, mock_submit, mock_files, def test_run_scraping(self, mock_args, mock_submit, mock_files,
mock_isfile, mock_readfile, mock_map): mock_isfile, mock_readfile, mock_specified_files,
mock_save_buildinfo):
with mock.patch('logscraper.logscraper.get_last_job_results' with mock.patch('logscraper.logscraper.get_last_job_results'
) as mock_job_results: ) as mock_job_results:
args = logscraper.get_arguments() with mock.patch('multiprocessing.pool.Pool.map',
mock_job_results.return_value = [builds_result[0]] lambda self, func, iterable, chunksize=None,
logscraper.run_scraping(args, callback=None, error_callback=None:
'http://somehost.com/api/tenant/tenant1') _MockedPoolMapResult(func, iterable)):
self.assertEqual(builds_result[0], mock_map.call_args.args[1][0]) args = logscraper.get_arguments()
self.assertIn("build_args", mock_map.call_args.args[1][0]) mock_job_results.return_value = [builds_result[0]]
logscraper.run_scraping(
args, 'http://somehost.com/api/tenant/tenant1')
self.assertEqual(builds_result[0]['uuid'],
mock_submit.call_args.args[2]['uuid'])
self.assertTrue(mock_submit.called)
self.assertEqual(builds_result[0],
mock_specified_files.call_args.args[0])
self.assertFalse(mock_save_buildinfo.called)
@mock.patch('logscraper.logscraper.run_scraping') @mock.patch('logscraper.logscraper.run_scraping')
def test_run(self, mock_scraping): def test_run(self, mock_scraping):
@ -251,6 +275,38 @@ class TestScraper(base.TestCase):
logscraper.run(args) logscraper.run(args)
self.assertEqual(3, mock_scraping.call_count) self.assertEqual(3, mock_scraping.call_count)
@mock.patch('logscraper.logscraper.save_build_info')
@mock.patch('logscraper.logscraper.check_specified_files')
@mock.patch('builtins.open', new_callable=mock.mock_open())
@mock.patch('os.path.isfile')
@mock.patch('logscraper.logscraper.check_specified_files',
return_value=['job-output.txt'])
@mock.patch('logscraper.logscraper.LogMatcher.submitJobs')
@mock.patch('argparse.ArgumentParser.parse_args',
return_value=FakeArgs(
zuul_api_url=['http://somehost.com/api/tenant/tenant1'],
workers=1, download=True, directory="/tmp/testdir"))
def test_run_scraping_download(self, mock_args, mock_submit, mock_files,
mock_isfile, mock_readfile,
mock_specified_files, mock_save_buildinfo):
with mock.patch('logscraper.logscraper.get_last_job_results'
) as mock_job_results:
with mock.patch(
'multiprocessing.pool.Pool.map',
lambda self, func, iterable, chunksize=None, callback=None,
error_callback=None: _MockedPoolMapResult(func, iterable),
):
args = logscraper.get_arguments()
mock_job_results.return_value = [builds_result[0]]
logscraper.run_scraping(
args, 'http://somehost.com/api/tenant/tenant1')
self.assertFalse(mock_submit.called)
self.assertTrue(mock_specified_files.called)
self.assertEqual(builds_result[0],
mock_specified_files.call_args.args[0])
self.assertTrue(mock_save_buildinfo.called)
class TestConfig(base.TestCase): class TestConfig(base.TestCase):
@mock.patch('sys.exit') @mock.patch('sys.exit')