Add option to download logs to the directory
The new option '--download' give possibility to download the log files with additional Zuul information files, that later can be used by other tool to send the content to next log processing service. The option was proposed in other patch set [1], which is large and it makes review more complex. [1] https://review.opendev.org/c/openstack/ci-log-processing/+/830337 Change-Id: I88d39c8ca2e186f68296e0f21551eac2c550b5b8
This commit is contained in:
parent
5124657fcd
commit
2ed0fd0912
@ -7,11 +7,19 @@
|
||||
### BUILD CONTAINER IMAGES ###
|
||||
- name: Build container images
|
||||
block:
|
||||
- name: Build logscraper container image
|
||||
- name: Build logscraper container image - Zuul
|
||||
shell: >
|
||||
podman build -t quay.io/logscraper:dev -f Dockerfile
|
||||
args:
|
||||
chdir: "{{ zuul.projects['opendev.org/openstack/ci-log-processing'].src_dir }}"
|
||||
when: zuul is defined
|
||||
|
||||
- name: Build logscraper container image - non Zuul
|
||||
shell: >
|
||||
podman build -t quay.io/logscraper:dev -f Dockerfile
|
||||
args:
|
||||
chdir: "{{ playbook_dir }}"
|
||||
when: zuul is not defined
|
||||
|
||||
- name: Get logscraper image id
|
||||
shell: |
|
||||
@ -23,6 +31,14 @@
|
||||
podman build -t quay.io/loggearman:dev -f loggearman/Dockerfile
|
||||
args:
|
||||
chdir: "{{ zuul.projects['opendev.org/openstack/ci-log-processing'].src_dir }}"
|
||||
when: zuul is defined
|
||||
|
||||
- name: Build loggearman container image - non Zuul
|
||||
shell: >
|
||||
podman build -t quay.io/loggearman:dev -f loggearman/Dockerfile
|
||||
args:
|
||||
chdir: "{{ playbook_dir }}"
|
||||
when: zuul is not defined
|
||||
|
||||
- name: Get loggearman image id
|
||||
shell: |
|
||||
@ -38,7 +54,6 @@
|
||||
container_images:
|
||||
logscraper: "{{ _logscraper_image_id.stdout }}"
|
||||
loggearman: "{{ _loggearman_image_id.stdout }}"
|
||||
when: zuul is defined
|
||||
|
||||
### OPENSEARCH ####
|
||||
- name: Setup Opensearch
|
||||
|
@ -60,10 +60,10 @@ and second one for getting logs from `sometenant` tenant.
|
||||
zuul_api_url: https://zuul.opendev.org/api/tenant/openstack
|
||||
insecure: False
|
||||
- tenant: sometenant
|
||||
gearman_port: 4731
|
||||
gearman_server: someproject.org
|
||||
zuul_api_url: https://zuul.opendev.org/api/tenant/sometenant
|
||||
insecure: True
|
||||
download: true
|
||||
download_dir: /mnt/logscraper
|
||||
roles:
|
||||
- logscraper
|
||||
|
||||
|
@ -15,10 +15,10 @@ container_images:
|
||||
# gearman_port: 4731
|
||||
# gearman_server: logstash.openstack.org
|
||||
# zuul_api_url: https://zuul.opendev.org/api/tenant/openstack
|
||||
# insecure: False
|
||||
# insecure: false
|
||||
# - tenant: sometenant
|
||||
# gearman_port: 4731
|
||||
# gearman_server: logstash.openstack.org
|
||||
# zuul_api_url: https://zuul.opendev.org/api/tenant/sometenant
|
||||
# insecure: True
|
||||
# insecure: true
|
||||
# download: true
|
||||
# download_dir: /mnt/logscraper/sometenant
|
||||
tenant_builds: []
|
||||
|
@ -1,4 +1,13 @@
|
||||
---
|
||||
- name: Create logscraper download directory
|
||||
file:
|
||||
path: "{{ item.download_dir }}"
|
||||
state: directory
|
||||
recurse: true
|
||||
owner: "{{ logscraper_user }}"
|
||||
group: "{{ logscraper_group }}"
|
||||
when: "'download_dir' in item"
|
||||
|
||||
- name: Generate logscraper script
|
||||
template:
|
||||
src: logscraper.sh.j2
|
||||
|
@ -8,9 +8,19 @@
|
||||
--uidmap 1000:{{ logscraper_uid }}:1 \
|
||||
--name logscraper-{{ item.tenant }} \
|
||||
--volume {{ logscraper_dir }}:{{ logscraper_dir }}:z \
|
||||
{% if 'download_dir' in item %}
|
||||
--volume {{ item.download_dir }}:{{ item.download_dir }}:z \
|
||||
{% endif %}
|
||||
{{ container_images['logscraper'] }} \
|
||||
{% if 'gearman_port' in item and 'gearman_server' in item %}
|
||||
--gearman-port {{ item.gearman_port }} \
|
||||
--gearman-server {{ item.gearman_server }} \
|
||||
{% else %}
|
||||
--download \
|
||||
{% endif %}
|
||||
{% if 'download_dir' in item %}
|
||||
--directory {{ item.download_dir }} \
|
||||
{% endif %}
|
||||
--checkpoint-file {{ item.checkpoint_file | default(logscraper_dir + '/checkpoint') }} \
|
||||
--follow \
|
||||
--zuul-api-url {{ item.zuul_api_url }}
|
||||
|
@ -11,6 +11,38 @@ It is available by typing:
|
||||
|
||||
logscraper --help
|
||||
|
||||
Fetch and push last Zuul CI job logs into gearman.
|
||||
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
--zuul-api-url ZUUL_API_URL
|
||||
URL(s) for Zuul API. Parameter can be set multiple
|
||||
times.
|
||||
--job-name JOB_NAME CI job name(s). Parameter can be set multiple times.
|
||||
If not set it would scrape every latest builds.
|
||||
--gearman-server GEARMAN_SERVER
|
||||
Gearman host addresss
|
||||
--gearman-port GEARMAN_PORT
|
||||
Gearman listen port. Defaults to 4730.
|
||||
--follow Keep polling zuul builds
|
||||
--insecure Skip validating SSL cert
|
||||
--checkpoint-file CHECKPOINT_FILE
|
||||
File that will keep information about last uuid
|
||||
timestamp for a job.
|
||||
--logstash-url LOGSTASH_URL
|
||||
When provided, script will check connection to
|
||||
Logstash service before sending to log processing
|
||||
system. For example: logstash.local:9999
|
||||
--workers WORKERS Worker processes for logscraper
|
||||
--max-skipped MAX_SKIPPED
|
||||
How many job results should be checked until last uuid
|
||||
written in checkpoint file is founded
|
||||
--debug Print more information
|
||||
--download Download logs and do not send to gearman service
|
||||
--directory DIRECTORY
|
||||
Directory, where the logs will be stored. Defaults to:
|
||||
/tmp/logscraper
|
||||
|
||||
|
||||
Basic usage
|
||||
-----------
|
||||
@ -43,6 +75,12 @@ Example:
|
||||
|
||||
logscraper --gearman-server localhost --job-name tripleo-ci-centos-8-containers-multinode --job-name openstack-tox-linters --zuul-api-url https://zuul.opendev.org/api/tenant/openstack --zuul-api-url https://zuul.opendev.org/api/tenant/local
|
||||
|
||||
* download logs to /mnt/logscraper. NOTE: if you are using container service, this directory needs to be mounted!
|
||||
|
||||
.. code-block::
|
||||
|
||||
logscraper --zuul-api-url https://zuul.opendev.org/api/tenant/openstack --directory /mnt/logscraper --download
|
||||
|
||||
|
||||
Containerize tool
|
||||
-----------------
|
||||
@ -62,3 +100,10 @@ to the container, for example:
|
||||
.. code-block::
|
||||
|
||||
docker run -v $(pwd):/checkpoint-dir:z -d logscraper logscraper --gearman-server somehost --zuul-api-url https://zuul.opendev.org/api/tenant/openstack --checkpoint-file /checkpoint-dir/checkpoint.txt --follow
|
||||
|
||||
|
||||
In this example, logscraper will download log files to the /mnt/logscraper directory:
|
||||
|
||||
.. code-block::
|
||||
|
||||
docker run -v $(pwd):/checkpoint-dir:z -v /mnt/logscraper:/mnt/logscraper:z -d logscraper logscraper --zuul-api-url https://zuul.opendev.org/api/tenant/openstack --checkpoint-file /checkpoint-dir/checkpoint.txt --directory /mnt/logscraper --download --follow
|
||||
|
@ -23,18 +23,21 @@ The goal is to push recent zuul builds into log gearman processor.
|
||||
|
||||
import argparse
|
||||
import gear
|
||||
import itertools
|
||||
import json
|
||||
import logging
|
||||
import multiprocessing
|
||||
import os
|
||||
import requests
|
||||
import socket
|
||||
import sys
|
||||
import time
|
||||
import urllib
|
||||
import yaml
|
||||
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from distutils.version import StrictVersion as s_version
|
||||
import tenacity
|
||||
from urllib.parse import urljoin
|
||||
|
||||
|
||||
file_to_check = [
|
||||
@ -46,6 +49,8 @@ file_to_check = [
|
||||
"var/log/extra/logstash.txt.gz",
|
||||
"var/log/extra/errors.txt",
|
||||
"var/log/extra/errors.txt.gz",
|
||||
"zuul-info/inventory.yaml",
|
||||
"zuul-info/inventory.yaml.gz"
|
||||
]
|
||||
|
||||
# From: https://opendev.org/opendev/base-jobs/src/branch/master/roles/submit-logstash-jobs/defaults/main.yaml # noqa
|
||||
@ -165,8 +170,7 @@ def get_arguments():
|
||||
"set multiple times. If not set it would scrape "
|
||||
"every latest builds.",
|
||||
action='append')
|
||||
parser.add_argument("--gearman-server", help="Gearman host addresss",
|
||||
required=True)
|
||||
parser.add_argument("--gearman-server", help="Gearman host addresss")
|
||||
parser.add_argument("--gearman-port", help="Gearman listen port. "
|
||||
"Defaults to 4730.",
|
||||
default=4730)
|
||||
@ -181,6 +185,7 @@ def get_arguments():
|
||||
"to log processing system. For example: "
|
||||
"logstash.local:9999")
|
||||
parser.add_argument("--workers", help="Worker processes for logscraper",
|
||||
type=int,
|
||||
default=1)
|
||||
parser.add_argument("--max-skipped", help="How many job results should be "
|
||||
"checked until last uuid written in checkpoint file "
|
||||
@ -188,6 +193,12 @@ def get_arguments():
|
||||
default=500)
|
||||
parser.add_argument("--debug", help="Print more information",
|
||||
action="store_true")
|
||||
parser.add_argument("--download", help="Download logs and do not send "
|
||||
"to gearman service",
|
||||
action="store_true")
|
||||
parser.add_argument("--directory", help="Directory, where the logs will "
|
||||
"be stored. Defaults to: /tmp/logscraper",
|
||||
default="/tmp/logscraper")
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
@ -292,7 +303,7 @@ class LogMatcher(object):
|
||||
fields["build_newrev"] = result.get("newrev", "UNKNOWN")
|
||||
|
||||
fields["node_provider"] = "local"
|
||||
log_url = urllib.parse.urljoin(result["log_url"], filename)
|
||||
log_url = urljoin(result["log_url"], filename)
|
||||
fields["log_url"] = log_url
|
||||
fields["tenant"] = result["tenant"]
|
||||
|
||||
@ -401,17 +412,45 @@ def get_last_job_results(zuul_url, insecure, max_skipped, last_uuid,
|
||||
###############################################################################
|
||||
# Log scraper #
|
||||
###############################################################################
|
||||
def check_specified_files(job_result, insecure):
|
||||
"""Return list of specified files if they exists on logserver. """
|
||||
available_files = []
|
||||
for f in file_to_check:
|
||||
if not job_result["log_url"]:
|
||||
continue
|
||||
response = requests_get("%s%s" % (job_result["log_url"], f),
|
||||
insecure)
|
||||
def save_build_info(directory, build):
|
||||
with open("%s/buildinfo" % directory, "w") as text_file:
|
||||
yaml.dump(build, text_file)
|
||||
|
||||
|
||||
def download_file(url, directory, insecure=False):
|
||||
logging.debug("Started fetching %s" % url)
|
||||
filename = url.split("/")[-1]
|
||||
try:
|
||||
response = requests.get(url, verify=insecure, stream=True)
|
||||
if response.status_code == 200:
|
||||
available_files.append(f)
|
||||
return available_files
|
||||
if directory:
|
||||
with open("%s/%s" % (directory, filename), 'wb') as f:
|
||||
for txt in response.iter_content(1024):
|
||||
f.write(txt)
|
||||
return filename
|
||||
except requests.exceptions.ContentDecodingError:
|
||||
logging.critical("Can not decode content from %s" % url)
|
||||
|
||||
|
||||
def check_specified_files(job_result, insecure, directory=None):
|
||||
"""Return list of specified files if they exists on logserver. """
|
||||
|
||||
args = job_result.get("build_args")
|
||||
if not job_result["log_url"]:
|
||||
logging.debug("There is no file to download for build "
|
||||
"uuid: %s" % job_result["uuid"])
|
||||
return
|
||||
|
||||
build_log_urls = [urljoin(job_result["log_url"], s) for s in file_to_check]
|
||||
|
||||
results = []
|
||||
pool = ThreadPoolExecutor(max_workers=args.workers)
|
||||
for page in pool.map(download_file, build_log_urls,
|
||||
itertools.repeat(directory),
|
||||
itertools.repeat(insecure)):
|
||||
if page:
|
||||
results.append(page)
|
||||
return results
|
||||
|
||||
|
||||
def setup_logging(debug):
|
||||
@ -425,7 +464,12 @@ def setup_logging(debug):
|
||||
|
||||
def run_build(build):
|
||||
"""Submit job informations into log processing system. """
|
||||
args = build.pop("build_args")
|
||||
args = build.get("build_args")
|
||||
|
||||
# NOTE: if build result is "ABORTED", there is no any
|
||||
# job result files to parse. Skipping that file.
|
||||
if build["result"].lower() == 'aborted':
|
||||
return
|
||||
|
||||
logging.info(
|
||||
"Processing logs for %s | %s | %s | %s",
|
||||
@ -435,18 +479,36 @@ def run_build(build):
|
||||
build["uuid"],
|
||||
)
|
||||
|
||||
results = dict(files=[], jobs=[], invocation={})
|
||||
if args.download:
|
||||
logging.debug("Started fetching build logs")
|
||||
directory = "%s/%s" % (args.directory, build["uuid"])
|
||||
try:
|
||||
if not os.path.exists(directory):
|
||||
os.makedirs(directory)
|
||||
except PermissionError:
|
||||
logging.critical("Can not create directory %s" % directory)
|
||||
except Exception as e:
|
||||
logging.critical("Exception occured %s on creating dir %s" % (
|
||||
e, directory))
|
||||
|
||||
lmc = LogMatcher(
|
||||
args.gearman_server,
|
||||
args.gearman_port,
|
||||
build["result"],
|
||||
build["log_url"],
|
||||
{},
|
||||
)
|
||||
results["files"] = check_specified_files(build, args.insecure)
|
||||
check_specified_files(build, args.insecure, directory)
|
||||
save_build_info(directory, build)
|
||||
else:
|
||||
logging.debug("Parsing content for gearman service")
|
||||
results = dict(files=[], jobs=[], invocation={})
|
||||
files = check_specified_files(build, args.insecure)
|
||||
if not files:
|
||||
return
|
||||
results["files"] = files
|
||||
lmc = LogMatcher(
|
||||
args.gearman_server,
|
||||
args.gearman_port,
|
||||
build["result"],
|
||||
build["log_url"],
|
||||
{},
|
||||
)
|
||||
|
||||
lmc.submitJobs("push-log", results["files"], build)
|
||||
lmc.submitJobs("push-log", results["files"], build)
|
||||
|
||||
|
||||
def check_connection(logstash_url):
|
||||
@ -512,6 +574,10 @@ def run(args):
|
||||
def main():
|
||||
args = get_arguments()
|
||||
setup_logging(args.debug)
|
||||
if args.download and args.gearman_server and args.gearman_port:
|
||||
logging.critical("Can not use logscraper to send logs to gearman "
|
||||
"and dowload logs. Choose one")
|
||||
sys.exit(1)
|
||||
while True:
|
||||
run(args)
|
||||
if not args.follow:
|
||||
|
@ -103,12 +103,24 @@ builds_result = [{
|
||||
}]
|
||||
|
||||
|
||||
class _MockedPoolMapResult:
|
||||
def __init__(self, func, iterable):
|
||||
self.func = func
|
||||
self.iterable = iterable
|
||||
|
||||
# mocked results
|
||||
self._value = [self.func(i) for i in iterable]
|
||||
|
||||
def get(self, timeout=0):
|
||||
return self._value
|
||||
|
||||
|
||||
class FakeArgs(object):
|
||||
def __init__(self, zuul_api_url=None, gearman_server=None,
|
||||
gearman_port=None, follow=False, insecure=False,
|
||||
checkpoint_file=None, ignore_checkpoint=None,
|
||||
logstash_url=None, workers=None, max_skipped=None,
|
||||
job_name=None):
|
||||
job_name=None, download=None, directory=None):
|
||||
|
||||
self.zuul_api_url = zuul_api_url
|
||||
self.gearman_server = gearman_server
|
||||
@ -121,6 +133,8 @@ class FakeArgs(object):
|
||||
self.workers = workers
|
||||
self.max_skipped = max_skipped
|
||||
self.job_name = job_name
|
||||
self.download = download
|
||||
self.directory = directory
|
||||
|
||||
|
||||
class TestScraper(base.TestCase):
|
||||
@ -183,7 +197,7 @@ class TestScraper(base.TestCase):
|
||||
logstash_url='localhost:9999')
|
||||
args = logscraper.get_arguments()
|
||||
logscraper.check_connection(args.logstash_url)
|
||||
mock_socket.assert_called_once()
|
||||
self.assertTrue(mock_socket.called)
|
||||
|
||||
@mock.patch('socket.socket')
|
||||
def test_check_connection_wrong_host(self, mock_socket):
|
||||
@ -216,7 +230,8 @@ class TestScraper(base.TestCase):
|
||||
'someuuid', None)
|
||||
self.assertRaises(ValueError, make_fake_list, job_result)
|
||||
|
||||
@mock.patch('multiprocessing.pool.Pool.map')
|
||||
@mock.patch('logscraper.logscraper.save_build_info')
|
||||
@mock.patch('logscraper.logscraper.check_specified_files')
|
||||
@mock.patch('builtins.open', new_callable=mock.mock_open())
|
||||
@mock.patch('os.path.isfile')
|
||||
@mock.patch('logscraper.logscraper.check_specified_files',
|
||||
@ -229,15 +244,24 @@ class TestScraper(base.TestCase):
|
||||
gearman_port=4731,
|
||||
workers=1))
|
||||
def test_run_scraping(self, mock_args, mock_submit, mock_files,
|
||||
mock_isfile, mock_readfile, mock_map):
|
||||
mock_isfile, mock_readfile, mock_specified_files,
|
||||
mock_save_buildinfo):
|
||||
with mock.patch('logscraper.logscraper.get_last_job_results'
|
||||
) as mock_job_results:
|
||||
args = logscraper.get_arguments()
|
||||
mock_job_results.return_value = [builds_result[0]]
|
||||
logscraper.run_scraping(args,
|
||||
'http://somehost.com/api/tenant/tenant1')
|
||||
self.assertEqual(builds_result[0], mock_map.call_args.args[1][0])
|
||||
self.assertIn("build_args", mock_map.call_args.args[1][0])
|
||||
with mock.patch('multiprocessing.pool.Pool.map',
|
||||
lambda self, func, iterable, chunksize=None,
|
||||
callback=None, error_callback=None:
|
||||
_MockedPoolMapResult(func, iterable)):
|
||||
args = logscraper.get_arguments()
|
||||
mock_job_results.return_value = [builds_result[0]]
|
||||
logscraper.run_scraping(
|
||||
args, 'http://somehost.com/api/tenant/tenant1')
|
||||
self.assertEqual(builds_result[0]['uuid'],
|
||||
mock_submit.call_args.args[2]['uuid'])
|
||||
self.assertTrue(mock_submit.called)
|
||||
self.assertEqual(builds_result[0],
|
||||
mock_specified_files.call_args.args[0])
|
||||
self.assertFalse(mock_save_buildinfo.called)
|
||||
|
||||
@mock.patch('logscraper.logscraper.run_scraping')
|
||||
def test_run(self, mock_scraping):
|
||||
@ -251,6 +275,38 @@ class TestScraper(base.TestCase):
|
||||
logscraper.run(args)
|
||||
self.assertEqual(3, mock_scraping.call_count)
|
||||
|
||||
@mock.patch('logscraper.logscraper.save_build_info')
|
||||
@mock.patch('logscraper.logscraper.check_specified_files')
|
||||
@mock.patch('builtins.open', new_callable=mock.mock_open())
|
||||
@mock.patch('os.path.isfile')
|
||||
@mock.patch('logscraper.logscraper.check_specified_files',
|
||||
return_value=['job-output.txt'])
|
||||
@mock.patch('logscraper.logscraper.LogMatcher.submitJobs')
|
||||
@mock.patch('argparse.ArgumentParser.parse_args',
|
||||
return_value=FakeArgs(
|
||||
zuul_api_url=['http://somehost.com/api/tenant/tenant1'],
|
||||
workers=1, download=True, directory="/tmp/testdir"))
|
||||
def test_run_scraping_download(self, mock_args, mock_submit, mock_files,
|
||||
mock_isfile, mock_readfile,
|
||||
mock_specified_files, mock_save_buildinfo):
|
||||
with mock.patch('logscraper.logscraper.get_last_job_results'
|
||||
) as mock_job_results:
|
||||
with mock.patch(
|
||||
'multiprocessing.pool.Pool.map',
|
||||
lambda self, func, iterable, chunksize=None, callback=None,
|
||||
error_callback=None: _MockedPoolMapResult(func, iterable),
|
||||
):
|
||||
args = logscraper.get_arguments()
|
||||
mock_job_results.return_value = [builds_result[0]]
|
||||
logscraper.run_scraping(
|
||||
args, 'http://somehost.com/api/tenant/tenant1')
|
||||
|
||||
self.assertFalse(mock_submit.called)
|
||||
self.assertTrue(mock_specified_files.called)
|
||||
self.assertEqual(builds_result[0],
|
||||
mock_specified_files.call_args.args[0])
|
||||
self.assertTrue(mock_save_buildinfo.called)
|
||||
|
||||
|
||||
class TestConfig(base.TestCase):
|
||||
@mock.patch('sys.exit')
|
||||
|
Loading…
Reference in New Issue
Block a user