From a6bdcc96b46bf88d48875cee7c76d187e257e82d Mon Sep 17 00:00:00 2001
From: Daniel Pawlik <dpawlik@redhat.com>
Date: Wed, 27 Oct 2021 10:26:28 +0200
Subject: [PATCH] Initial project commit

This commit provides Zuul logscraper tool responsible for
getting newest logs by using Zuul API and pushing them to
log processing services.

Change-Id: Iff45fbcb6d4813cc60dc705c60e95f3045984b5c
---
 .gitignore                          |  11 +
 .stestr.conf                        |   3 +
 .zuul.yaml                          |   9 +
 Dockerfile                          |  25 ++
 LICENSE                             | 202 ++++++++++++
 README.rst                          |  44 +++
 doc/assets/openstack.png            | Bin 0 -> 923 bytes
 doc/requirements.txt                |   3 +
 doc/source/_static/openstack.png    |   1 +
 doc/source/conf.py                  |  51 +++
 doc/source/index.rst                |  28 ++
 doc/source/logscraper.rst           |  58 ++++
 logscraper/__init__.py              |   0
 logscraper/logscraper.py            | 472 ++++++++++++++++++++++++++++
 logscraper/tests/__init__.py        |   0
 logscraper/tests/base.py            |  38 +++
 logscraper/tests/test_logscraper.py | 378 ++++++++++++++++++++++
 requirements.txt                    |   4 +
 setup.cfg                           |  23 ++
 setup.py                            |  19 ++
 test-requirements.txt               |   5 +
 tox.ini                             |  30 ++
 22 files changed, 1404 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 .stestr.conf
 create mode 100644 .zuul.yaml
 create mode 100644 Dockerfile
 create mode 100644 LICENSE
 create mode 100644 README.rst
 create mode 100644 doc/assets/openstack.png
 create mode 100644 doc/requirements.txt
 create mode 120000 doc/source/_static/openstack.png
 create mode 100644 doc/source/conf.py
 create mode 100644 doc/source/index.rst
 create mode 100644 doc/source/logscraper.rst
 create mode 100644 logscraper/__init__.py
 create mode 100755 logscraper/logscraper.py
 create mode 100644 logscraper/tests/__init__.py
 create mode 100644 logscraper/tests/base.py
 create mode 100644 logscraper/tests/test_logscraper.py
 create mode 100644 requirements.txt
 create mode 100644 setup.cfg
 create mode 100644 setup.py
 create mode 100644 test-requirements.txt
 create mode 100644 tox.ini

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..baac22b
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,11 @@
+.DS_Store
+*.swp
+*~
+*.pyc
+doc/build/
+.tox/
+logscraper.egg-info/
+.eggs
+AUTHORS
+ChangeLog
+.stestr/
diff --git a/.stestr.conf b/.stestr.conf
new file mode 100644
index 0000000..c15e029
--- /dev/null
+++ b/.stestr.conf
@@ -0,0 +1,3 @@
+[DEFAULT]
+test_path=./logscraper/tests
+top_dir=./
diff --git a/.zuul.yaml b/.zuul.yaml
new file mode 100644
index 0000000..3b2eb1b
--- /dev/null
+++ b/.zuul.yaml
@@ -0,0 +1,9 @@
+---
+- project:
+    templates:
+      - build-tox-docs
+    check: &logcheck
+      jobs:
+        - openstack-tox-pep8
+        - openstack-tox-py38
+    gate: *logcheck
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..3c0e61e
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,25 @@
+# Copyright (C) 2021 Red Hat
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+FROM opendevorg/python-builder:3.8 as builder
+
+COPY . /tmp/src
+RUN assemble
+
+FROM opendevorg/python-base:3.8 as logscraper
+
+COPY --from=builder /output/ /output
+RUN /output/install-from-bindep
+
+ENTRYPOINT ["/usr/local/bin/logscraper"]
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..75b5248
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/README.rst b/README.rst
new file mode 100644
index 0000000..5019f3d
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,44 @@
+Openstack CI log processing
+===========================
+
+The goal of this repository is to provide and check
+functionality of new log processing system base on
+zuul log scraper tool.
+
+Zuul Log Scraper
+----------------
+
+The Zuul Log Scraper tool is responsible for periodical
+check by using Zuul CI API if there are new builds available
+and if there are some, it would push the informations to
+the log processing system.
+
+Testing
+-------
+
+The part of Openstack CI log processing runs a complete testing and
+continuous-integration environment, powered by `Zuul
+<https://zuul-ci.org/>`__.
+
+Any changes to logscraper script or tests will trigger jobs to
+thoroughly test those changes.
+
+Continuous Deployment
+---------------------
+Once changes are reviewed and committed, they will be applied
+automatically to the production hosts.
+
+Contributing
+============
+Contributions are welcome!
+
+Currently only unit tests are available. In the future,
+functional tests would be added.
+
+Documentation
+=============
+The latest documentation is available at
+http://docs.openstack.org/infra/ci-log-processing
+
+That documentation is generated from this repository. You can generate
+it yourself with ``tox -e docs``.
diff --git a/doc/assets/openstack.png b/doc/assets/openstack.png
new file mode 100644
index 0000000000000000000000000000000000000000..bf6ffee7dfa70fa8587b104056edc248a986085e
GIT binary patch
literal 923
zcmV;M17!S(P)<h;3K|Lk000e1NJLTq006WA006WI0{{R33KRAK00004XF*Lt006O%
z3;baP0000WV@Og>004R>004l5008;`004mK004C`008P>0026e000+ooVrmw0002+
zP)t-s|NsC0{{H{{{r~*^{`>s?`}_X+`TqF%{`L0#^!5Gn^!@Sj{qXVp?(qEU?fmKN
z{OIcY<>>m~;`-a)`PbX{&(!$N)Az>A_QcBcvbyrCv+<y+@t&ygoTu=crtp}f@0OwO
zlb`RAo$imD?S+!<gpuuoknDht?0$>ucZTY7g6eL5>27`LZG7o#dg*C->1cT9W_IXe
za_C`l=U;H=U2f)8XXR34<xykhP-5gvU*twv<3(BHLRI29PT?|0;4w$wFh<}oMc^+)
z;4DJlDnQ>SJ>Mlf-Xb~PAUNJ1H{KpK-5oUD95me<Gu;|8usV0)0006iNkl<Zc-rmV
z*;2wl5J1s^Xi!{06h#Gb!(G9B0VM7lnE(G9UwxXyauT}r-XF+$uu>_9o;HSvh=_<t
zcA2s1nc+4)miZakFR!-R!NdLVdI;LB)p<WYIag`+2I24<1jm(JOmm^}7)t4}QHW)p
zJ?)2b>YvWWFqbYuSzVMO%@aqVw2mgC-gLD$ap<>N#JjK54dvC1_ygJK3q<!ftmfiP
zD6yNO4ZZLp68*AZLoa`dM1L;Z-W}Va%(gA>4{u*6df&644}_u*K!XMi8Z>Coph1HM
z4f?;J2Oqz0kSaPj*;!lpeQP@>L0ahN-ST+G&B4xidG|RL^i$P0?((ZoDWG5LqfU>}
zM(eN1qt7QDAO20ACyst!a(-mF^qw^OcFs9^?lx(3r|2AA>?DnDCxC8)1`YcAx4>d!
z)81As?;M@4ZACn_v6wMLM5MLsh-YuaOW>(q<WsXY^ZM1Lz3aUx=jN%s>xidbty|RI
zq0n!SL%#Arg9Z&6G-%MEL4yVj8Z>Coph1HM4f;=`HU58E<w=0bGZ!KvlAX$5i}95Q
z8nh0y%3rFhoC#1leW7yBMdw73%GoWaQ*!yVrsQ1CBc9zc`N?1~XwaZRg9Z&6G-%ME
zK|4fiyg^pwUA-!AMb>%$w91>lRo<zt^Y(g`djeE$WYD?$Lg$tdo%>vj$yJU65u0;*
x&DfiBj4}Sah#f`cAD-?g+y+HNL_|cY`vSS>h+?DDzpelP002ovPDHLkV1h<0;D7)C

literal 0
HcmV?d00001

diff --git a/doc/requirements.txt b/doc/requirements.txt
new file mode 100644
index 0000000..4a62566
--- /dev/null
+++ b/doc/requirements.txt
@@ -0,0 +1,3 @@
+sphinx!=1.6.6,!=1.6.7 # BSD
+openstackdocstheme>=1.32.0 # Apache-2.0
+zuul-sphinx>=0.1.1
diff --git a/doc/source/_static/openstack.png b/doc/source/_static/openstack.png
new file mode 120000
index 0000000..d76d85b
--- /dev/null
+++ b/doc/source/_static/openstack.png
@@ -0,0 +1 @@
+../../assets/openstack.png
\ No newline at end of file
diff --git a/doc/source/conf.py b/doc/source/conf.py
new file mode 100644
index 0000000..126b022
--- /dev/null
+++ b/doc/source/conf.py
@@ -0,0 +1,51 @@
+# -*- coding: utf-8 -*-
+#
+# system-config documentation build configuration file
+import datetime
+import os
+import sys
+# -- General configuration ------------------------------------------------
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+sys.path.insert(0, os.path.abspath('.'))
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = ['zuul_sphinx']
+# We have roles split between zuul-suitable roles at top level roles/*
+# (automatically detected by zuul-sphinx) and playbook-specific roles
+# (might have plugins, etc that make them unsuitable as potential zuul
+# roles).  Document both.
+zuul_role_paths = ['playbooks/roles']
+# The suffix of source filenames.
+source_suffix = '.rst'
+# The master toctree document.
+master_doc = 'index'
+# General information about the project.
+project = u'Openstack System Documentation'
+copyright = ('%d, Openstack Contributors.' % datetime.date.today().year)
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+exclude_patterns = ['_build']
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+# -- Options for HTML output ----------------------------------------------
+# This static content is used by the logo below
+html_static_path = [
+        '_static/',
+    ]
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+html_theme = 'alabaster'
+html_theme_options = {
+        'logo': 'openstack.png'
+    }
+# -- Options for LaTeX output ---------------------------------------------
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title,
+#  author, documentclass [howto, manual, or own class]).
+latex_documents = [
+  ('index', 'system-config.tex', u'system-config Documentation',
+   u'OpenStack CI Log Processing team', 'manual'),
+]
diff --git a/doc/source/index.rst b/doc/source/index.rst
new file mode 100644
index 0000000..a36ab83
--- /dev/null
+++ b/doc/source/index.rst
@@ -0,0 +1,28 @@
+Openstack CI Log Processing
+===========================
+
+This documentation covers the installation and maintenance of the
+Openstack CI Log Processing system.
+
+Security policy
+---------------
+
+If you find or suspect a security issue with any Openstack CI Log
+Processing services, please inform the administrators via email at
+service-incident@lists.opendev.org.
+
+Contents:
+
+.. sidebar:: HOWTOs
+
+   * :doc:`logscraper`
+
+.. toctree::
+   :maxdepth: 2
+
+   logscraper
+
+Indices and tables
+==================
+
+* :ref:`search`
diff --git a/doc/source/logscraper.rst b/doc/source/logscraper.rst
new file mode 100644
index 0000000..306a102
--- /dev/null
+++ b/doc/source/logscraper.rst
@@ -0,0 +1,58 @@
+Logscraper
+==========
+
+The logscraper tool can be running as a one-shot log scrape or
+as periodical check, if some new log jobs are available.
+
+The tool have help function, that is showing available options for it.
+It is available by typing:
+
+.. code-block::
+
+   logscraper --help
+
+
+Basic usage
+-----------
+
+Base on the use case, we can run logscraper.
+
+Example:
+
+* periodical check if there are some new logs for `openstack` tenant:
+
+.. code-block::
+
+  logscraper --gearman-server somehost --zuul-api-url https://zuul.opendev.org/api/tenant/openstack --checkpoint-file /tmp/results-checkpoint.txt --follow
+
+* one shot on getting logs from `zuul` tenant:
+
+.. code-block::
+
+  logscraper --gearman-server localhost --zuul-api-url https://zuul.opendev.org/api/tenant/zuul --checkpoint-file /tmp/zuul-result-timestamp.txt
+
+* periodically scrape logs from tenants: `openstack`, `zuul` and `local`
+
+.. code-block::
+
+  logscraper --gearman-server localhost --zuul-api-url https://zuul.opendev.org/api/tenant/openstack --zuul-api-url https://zuul.opendev.org/api/tenant/zuul --zuul-api-url https://zuul.opendev.org/api/tenant/local --checkpoint-file /tmp/someresults.txt --follow
+
+
+Containerize tool
+-----------------
+
+Instead of using `pip` tool, you can build your own container image
+that contains logscraper tool, for example:
+
+.. code-block::
+
+   docker build -t logscraper -f Dockerfile
+
+Then you can execute commands that are described above.
+
+NOTE: if you want to use parameter `--checkpoint-file`, you need to mount a volume
+to the container, for example:
+
+.. code-block::
+
+   docker run -v $(pwd):/checkpoint-dir:z -d logscraper logscraper --gearman-server somehost --zuul-api-url https://zuul.opendev.org/api/tenant/openstack --checkpoint-file /checkpoint-dir/checkpoint.txt --follow
diff --git a/logscraper/__init__.py b/logscraper/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/logscraper/logscraper.py b/logscraper/logscraper.py
new file mode 100755
index 0000000..d0ac434
--- /dev/null
+++ b/logscraper/logscraper.py
@@ -0,0 +1,472 @@
+#!/usr/bin/env python3
+#
+# Copyright (C) 2021 Red Hat
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+"""
+The goal is to push recent zuul builds into log gearman processor.
+
+[ CLI ] -> [ Config ] -> [ ZuulFetcher ] -> [ LogPublisher ]
+"""
+
+
+import argparse
+import gear
+import json
+import logging
+import multiprocessing
+import requests
+import socket
+import sys
+import time
+import urllib
+import yaml
+
+from distutils.version import StrictVersion as s_version
+
+GEARMAN_SERVER = None
+GEARMAN_PORT = None
+
+file_to_check = [
+    "job-output.txt.gz",
+    "job-output.txt",
+    "postci.txt",
+    "postci.txt.gz",
+    "var/log/extra/logstash.txt",
+    "var/log/extra/logstash.txt.gz",
+    "var/log/extra/errors.txt",
+    "var/log/extra/errors.txt.gz",
+]
+
+# From: https://opendev.org/opendev/base-jobs/src/branch/master/roles/submit-logstash-jobs/defaults/main.yaml # noqa
+logstash_processor_config = """
+files:
+  - name: job-output.txt
+    tags:
+      - console
+      - console.html
+  - name: grenade.sh.txt
+    tags:
+      - console
+      - console.html
+  - name: devstacklog.txt(?!.*summary)
+    tags:
+      - console
+      - console.html
+  - name: apache/keystone.txt
+    tags:
+      - screen
+      - oslofmt
+  - name: apache/horizon_error.txt
+    tags:
+      - apacheerror
+  # TODO(clarkb) Add swift proxy logs here.
+  - name: syslog.txt
+    tags:
+      - syslog
+  - name: tempest.txt
+    tags:
+      - screen
+      - oslofmt
+  - name: javelin.txt
+    tags:
+      - screen
+      - oslofmt
+  # Neutron index log files (files with messages from all test cases)
+  - name: dsvm-functional-index.txt
+    tags:
+      - oslofmt
+  - name: dsvm-fullstack-index.txt
+    tags:
+      - oslofmt
+  - name: screen-s-account.txt
+    tags:
+      - screen
+      - apachecombined
+  - name: screen-s-container.txt
+    tags:
+      - screen
+      - apachecombined
+  - name: screen-s-object.txt
+    tags:
+      - screen
+      - apachecombined
+  # tripleo logs
+  - name: postci.txt
+    tags:
+      - console
+      - postci
+  - name: var/log/extra/logstash.txt
+    tags:
+      - console
+      - postci
+  - name: var/log/extra/errors.txt
+    tags:
+      - console
+      - errors
+  # wildcard logs
+  - name: devstack-gate-.*.txt
+    tags:
+      - console
+      - console.html
+  # NOTE(mriedem): Logs that are known logstash index OOM killers are
+  # blacklisted here until fixed.
+  # screen-monasca-persister.txt: https://storyboard.openstack.org/#!/story/2003911
+  # screen-ovn-northd.txt: https://bugs.launchpad.net/networking-ovn/+bug/1795069
+  - name: screen-(?!(peakmem_tracker|dstat|karaf|kubelet|mistral-engine|monasca-persister|monasca-api|ovn-northd|q-svc)).*.txt
+    tags:
+      - screen
+      - oslofmt
+"""  # noqa
+
+
+###############################################################################
+#                                    CLI                                      #
+###############################################################################
+def get_arguments():
+    parser = argparse.ArgumentParser(description="Fetch and push last Zuul "
+                                     "CI job logs into gearman.")
+    parser.add_argument("--zuul-api-url", help="URL(s) for Zuul API. Parameter"
+                        " can be set multiple times.",
+                        required=True,
+                        action='append')
+    parser.add_argument("--gearman-server", help="Gearman host addresss",
+                        required=True)
+    parser.add_argument("--gearman-port", help="Gearman listen port. "
+                        "Defaults to 4731.",
+                        default=4731)
+    parser.add_argument("--follow", help="Keep polling zuul builds",
+                        action="store_true")
+    parser.add_argument("--insecure", help="Skip validating SSL cert",
+                        action="store_false")
+    parser.add_argument("--checkpoint-file", help="File that will keep "
+                        "information about last uuid timestamp for a job.")
+    parser.add_argument("--logstash-url", help="When provided, script will "
+                        "check connection to Logstash service before sending "
+                        "to log processing system. For example: "
+                        "logstash.local:9999")
+    parser.add_argument("--workers", help="Worker processes for logscraper",
+                        default=1)
+    parser.add_argument("--max-skipped", help="How many job results should be "
+                        "checked until last uuid written in checkpoint file "
+                        "is founded",
+                        default=500)
+    parser.add_argument("--debug", help="Print more information",
+                        action="store_true")
+    args = parser.parse_args()
+    return args
+
+
+###############################################################################
+#                      Configuration of this process                          #
+###############################################################################
+class Config:
+    def __init__(self, args, zuul_api_url):
+        self.checkpoint = None
+        url_path = zuul_api_url.split("/")
+        if url_path[-3] != "api" and url_path[-2] != "tenant":
+            print(
+                "ERROR: zuul-api-url needs to be in the form "
+                "of: https://<fqdn>/api/tenant/<tenant-name>"
+            )
+            sys.exit(1)
+        self.tenant = url_path[-1]
+
+        self.filename = "%s-%s" % (args.checkpoint_file, self.tenant)
+        try:
+            with open(self.filename) as f:
+                self.checkpoint = f.readline()
+        except Exception:
+            logging.exception("Can't load the checkpoint. Creating file")
+
+    def save(self, job_uuid):
+        try:
+            with open(self.filename, 'w') as f:
+                f.write(job_uuid)
+        except Exception as e:
+            raise("Can not write status to the checkpoint file %s" % e)
+
+
+###############################################################################
+#                             Log Processing                                  #
+###############################################################################
+class LogMatcher(object):
+    def __init__(self, server, port, success, log_url, host_vars):
+        self.client = gear.Client()
+        self.client.addServer(server, port)
+        self.hosts = host_vars
+        self.success = success
+        self.log_url = log_url
+
+    def submitJobs(self, jobname, files, result):
+        self.client.waitForServer(90)
+        ret = []
+        for f in files:
+            output = self.makeOutput(f, result)
+            output = json.dumps(output).encode("utf8")
+            job = gear.TextJob(jobname, output)
+            self.client.submitJob(job, background=True)
+            ret.append(dict(handle=job.handle, arguments=output))
+        return ret
+
+    def makeOutput(self, file_object, result):
+        output = {}
+        output["retry"] = False
+        output["event"] = self.makeEvent(file_object, result)
+        output["source_url"] = output["event"]["fields"]["log_url"]
+        return output
+
+    def makeEvent(self, file_object, result):
+        out_event = {}
+        tags = []
+        out_event["fields"] = self.makeFields(file_object, result)
+        config_files = yaml.safe_load(logstash_processor_config)
+        for f in config_files["files"]:
+            if file_object in f["name"] or \
+                    file_object.replace(".gz", "") in f["name"]:
+                tags = f["tags"]
+                break
+
+        out_event["tags"] = [file_object] + tags
+        return out_event
+
+    def makeFields(self, filename, result):
+        fields = {}
+        fields["build_node"] = "zuul-executor"
+        fields["filename"] = filename
+        fields["build_name"] = result["job_name"]
+        fields["build_status"] = (
+            "SUCCESS" if result["result"] == "SUCCESS" else "FAILURE"
+        )
+        fields["project"] = result["project"]
+        fields["voting"] = int(result["voting"])
+        fields["build_set"] = result["buildset"]
+        fields["build_queue"] = result["pipeline"]
+        fields["build_ref"] = result["ref"]
+        fields["build_branch"] = result.get("branch", "UNKNOWN")
+        fields["build_zuul_url"] = "N/A"
+
+        if "change" in result:
+            fields["build_change"] = result["change"]
+            fields["build_patchset"] = result["patchset"]
+        elif "newrev" in result:
+            fields["build_newrev"] = result.get("newrev", "UNKNOWN")
+
+        fields["node_provider"] = "local"
+        log_url = urllib.parse.urljoin(result["log_url"], filename)
+        fields["log_url"] = log_url
+        fields["tenant"] = result["tenant"]
+
+        if "executor" in result and "hostname" in result["executor"]:
+            fields["zuul_executor"] = result["executor"]["hostname"]
+
+        fields["build_uuid"] = result["buildset"]["uuid"]
+
+        return fields
+
+
+###############################################################################
+#                             Fetch zuul builds                               #
+###############################################################################
+def parse_version(zuul_version_txt):
+    """Parse the zuul version returned by the different services:
+
+    >>> parse_version("4.6.0-1.el7")
+    StrictVersion ('4.6')
+    >>> parse_version("4.10.2.dev6 22f04be1")
+    StrictVersion ('4.10.2')
+    >>> parse_version("4.10.2.dev6 22f04be1") > parse_version("4.6.0-1.el7")
+    True
+    >>> parse_version("4.6.0-1.el7") > parse_version("4.7.0")
+    False
+    """
+    if not zuul_version_txt:
+        return
+    zuul_version = zuul_version_txt
+    # drop rpm package suffix
+    zuul_version = zuul_version.split("-")[0]
+    # drop pip package suffix
+    zuul_version = zuul_version.split(".dev")[0]
+    try:
+        return s_version(zuul_version)
+    except Exception:
+        raise ValueError("Invalid zuul version: %s" % zuul_version_txt)
+
+
+def _zuul_complete_available(zuul_url, insecure):
+    """Return additional parameter for zuul url
+
+    When Zuul version is newer that 4.7.0, return additional
+    parameter.
+    """
+    url = zuul_url + "/status"
+    zuul_status = requests.get(url, verify=insecure)
+    zuul_status.raise_for_status()
+    zuul_version = parse_version(zuul_status.json().get("zuul_version"))
+    if zuul_version and zuul_version >= s_version("4.7.0"):
+        return "&complete=true"
+
+
+def get_builds(zuul_url, insecure):
+    """Yield builds dictionary."""
+    pos, size = 0, 100
+    zuul_url = zuul_url.rstrip("/")
+    zuul_complete = _zuul_complete_available(zuul_url, insecure)
+    if zuul_complete:
+        extra = "" + zuul_complete
+    base_url = zuul_url + "/builds?limit=" + str(size) + extra
+
+    known_builds = set()
+    while True:
+        url = base_url + "&skip=" + str(pos)
+        logging.info("Getting job results %s", url)
+        jobs_result = requests.get(url, verify=insecure)
+        jobs_result.raise_for_status()
+
+        for job in jobs_result.json():
+            # It is important here to check we didn't yield builds twice,
+            # as this can happen when using skip if new build get reported
+            # between the two requests.
+            if job["uuid"] not in known_builds:
+                yield job
+            known_builds.add(job["uuid"])
+            pos += 1
+
+
+def get_last_job_results(zuul_url, insecure, max_skipped, last_uuid):
+    """Yield builds until we find the last uuid."""
+    count = 0
+    for build in get_builds(zuul_url, insecure):
+        if count > max_skipped:
+            break
+        if build["uuid"] == last_uuid:
+            break
+        yield build
+        count += 1
+
+
+###############################################################################
+#                              Log scraper                                    #
+###############################################################################
+def check_specified_files(job_result):
+    """Return list of specified files if they exists on logserver. """
+    available_files = []
+    for f in file_to_check:
+        if not job_result["log_url"]:
+            continue
+        response = requests.get("%s%s" % (job_result["log_url"], f))
+        if response.status_code == 200:
+            available_files.append(f)
+    return available_files
+
+
+def setup_logging(debug):
+    if debug:
+        logging.basicConfig(format="%(asctime)s %(message)s",
+                            level=logging.DEBUG)
+    else:
+        logging.basicConfig(level=logging.INFO)
+    logging.debug("Zuul Job Scraper is starting...")
+
+
+def run_build(build):
+    """Submit job informations into log processing system. """
+    logging.info(
+        "Processing logs for %s | %s | %s | %s",
+        build["job_name"],
+        build["end_time"],
+        build["result"],
+        build["uuid"],
+    )
+
+    results = dict(files=[], jobs=[], invocation={})
+
+    lmc = LogMatcher(
+        GEARMAN_SERVER,
+        GEARMAN_PORT,
+        build["result"],
+        build["log_url"],
+        {},
+    )
+    results["files"] = check_specified_files(build)
+
+    lmc.submitJobs("push-log", results["files"], build)
+
+
+def check_connection(logstash_url):
+    """Return True when Logstash service is reachable
+
+    Check if service is up before pushing results.
+    """
+    host, port = logstash_url.split(':')
+    logging.debug("Checking connection to %s on port %s" % (host, port))
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+        return s.connect_ex((host, port)) == 0
+
+
+def run_scraping(args, zuul_api_url):
+    """Get latest job results and push them into log processing service.
+
+    On the end, write newest uuid into checkpoint file, so in the future
+    script will not push log duplication.
+    """
+    config = Config(args, zuul_api_url)
+
+    builds = []
+    for build in get_last_job_results(zuul_api_url, args.insecure,
+                                      args.max_skipped, config.checkpoint):
+        logging.debug("Working on build %s" % build['uuid'])
+        # add missing informations
+        build["tenant"] = config.tenant
+        builds.append(build)
+
+    logging.info("Processing %d builds", len(builds))
+
+    if args.logstash_url and not check_connection(args.logstash_url):
+        logging.critical("Can not connect to logstash %s. "
+                         "Is it up?" % args.logstash_url)
+        return
+
+    if builds:
+        pool = multiprocessing.Pool(int(args.workers))
+        try:
+            pool.map(run_build, builds)
+        finally:
+            config.save(builds[0]['uuid'])
+
+
+def run(args):
+    for zuul_api_url in args.zuul_api_url:
+        logging.info("Starting checking logs for %s" % zuul_api_url)
+        run_scraping(args, zuul_api_url)
+
+
+def main():
+    global GEARMAN_SERVER
+    global GEARMAN_PORT
+
+    args = get_arguments()
+    setup_logging(args.debug)
+
+    GEARMAN_SERVER = args.gearman_server
+    GEARMAN_PORT = args.gearman_port
+    while True:
+        run(args)
+        if not args.follow:
+            break
+        time.sleep(120)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/logscraper/tests/__init__.py b/logscraper/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/logscraper/tests/base.py b/logscraper/tests/base.py
new file mode 100644
index 0000000..e30da99
--- /dev/null
+++ b/logscraper/tests/base.py
@@ -0,0 +1,38 @@
+#!/usr/bin/env python3
+#
+# Copyright (C) 2021 Red Hat
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import os
+
+import fixtures
+import testtools
+
+
+class TestCase(testtools.TestCase):
+
+    true = ('True', 'true', '1', 'yes')
+
+    def setUp(self):
+        super(TestCase, self).setUp()
+        if os.environ.get('OS_STDOUT_CAPTURE') in self.true:
+            stdout = self.useFixture(fixtures.StringStream('stdout')).stream
+            self.useFixture(fixtures.MonkeyPatch('sys.stdout', stdout))
+        if os.environ.get('OS_STDERR_CAPTURE') in self.true:
+            stderr = self.useFixture(fixtures.StringStream('stderr')).stream
+            self.useFixture(fixtures.MonkeyPatch('sys.stderr', stderr))
+        if (os.environ.get('OS_LOG_CAPTURE') != 'False' and os.environ.get(
+                'OS_LOG_CAPTURE') != '0'):
+            self.useFixture(fixtures.LoggerFixture(nuke_handlers=False,
+                                                   level=None))
diff --git a/logscraper/tests/test_logscraper.py b/logscraper/tests/test_logscraper.py
new file mode 100644
index 0000000..311f4f0
--- /dev/null
+++ b/logscraper/tests/test_logscraper.py
@@ -0,0 +1,378 @@
+#!/usr/bin/env python3
+#
+# Copyright (C) 2021 Red Hat
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import json
+
+from logscraper import logscraper
+from logscraper.tests import base
+from unittest import mock
+
+
+builds_result = [{
+    'uuid': 'a0f8968bf8534409bb998e079b41d658',
+    'job_name': 'openstack-tox-py38',
+    'result': 'SUCCESS',
+    'held': False,
+    'start_time': '2021-11-04T08:21:19',
+    'end_time': '2021-11-04T08:26:26',
+    'duration': 307.0,
+    'voting': True,
+    'log_url': 'https://t.com/openstack/a0f8968/',
+    'nodeset': 'ubuntu-focal',
+    'error_detail': None,
+    'final': True,
+    'artifacts': [],
+    'provides': [],
+    'project': 'openstack/tempest',
+    'branch': 'master',
+    'pipeline': 'check',
+    'change': 806255,
+    'patchset': '9',
+    'ref': 'refs/changes/55/806255/9',
+    'newrev': None,
+    'ref_url': 'https://review.opendev.org/806255',
+    'event_id': 'cfa1a0a471f3447ca9b81b20132234bd',
+    'buildset': {
+        'uuid': 'bf11828235c649ff859ad87d7c4aa525'
+    }
+}, {
+    'uuid': '39828646e9b847b6b8560df93838c405',
+    'job_name': 'tripleo-centos-8',
+    'result': 'FAILURE',
+    'held': False,
+    'start_time': '2021-11-04T08:17:46',
+    'end_time': '2021-11-04T08:27:49',
+    'duration': 603.0,
+    'voting': True,
+    'log_url': 'https://t.com/tripleo-8/3982864/',
+    'nodeset': 'centos-8-stream',
+    'error_detail': None,
+    'final': True,
+    'artifacts': [],
+    'provides': [],
+    'project': 'openstack/tripleo-ansible',
+    'branch': 'master',
+    'pipeline': 'check',
+    'change': 816445,
+    'patchset': '1',
+    'ref': 'refs/changes/45/816445/1',
+    'newrev': None,
+    'ref_url': 'https://review.opendev.org/816445',
+    'event_id': '0b8a45988023464fba508d72e51e23ad',
+    'buildset': {
+        'uuid': '4a0ffebe30a94efe819fffc03cf33ea4'
+    }
+}, {
+    'uuid': 'a3fbc73ce599466e9ae1645f6b708f1b',
+    'job_name': 'openstack-tox-lower-constraints',
+    'result': 'ABORTED',
+    'held': False,
+    'start_time': '2021-11-04T08:04:34',
+    'end_time': '2021-11-04T08:04:52',
+    'duration': 18,
+    'voting': True,
+    'log_url': None,
+    'nodeset': 'ubuntu-bionic',
+    'error_detail': None,
+    'final': True,
+    'artifacts': [],
+    'provides': [],
+    'project': 'openstack/nova',
+    'branch': 'stable/victoria',
+    'pipeline': 'check',
+    'change': 816486,
+    'patchset': '1',
+    'ref': 'refs/changes/86/816486/1',
+    'newrev': None,
+    'ref_url': 'https://review.opendev.org/816486',
+    'event_id': '7be89d6aae0944949c3e1b7c811794b0',
+    'buildset': {'uuid': 'bd044dfe3ecc484fbbf74fdeb7fb56aa'}
+}]
+
+
+class FakeArgs(object):
+    def __init__(self, zuul_api_url=None, gearman_server=None,
+                 gearman_port=None, follow=False, insecure=False,
+                 checkpoint_file=None, ignore_checkpoint=None,
+                 logstash_url=None, workers=None, max_skipped=None):
+
+        self.zuul_api_url = zuul_api_url
+        self.gearman_server = gearman_server
+        self.gearman_port = gearman_port
+        self.follow = follow
+        self.insecure = insecure
+        self.checkpoint_file = checkpoint_file
+        self.ignore_checkpoint = ignore_checkpoint
+        self.logstash_url = logstash_url
+        self.workers = workers
+        self.max_skipped = max_skipped
+
+
+class TestScraper(base.TestCase):
+    def test_parse_version(self):
+        ver1 = logscraper.parse_version('4.6.0-1.el7')
+        ver2 = logscraper.parse_version('4.10.2.dev6-22f04be1')
+        ver3 = logscraper.parse_version('4.10.2.dev6 22f04be1')
+        self.assertEqual('4.6', ver1)
+        self.assertEqual('4.10.2', ver2)
+        self.assertEqual('4.10.2', ver3)
+        self.assertRaises(ValueError,
+                          logscraper.parse_version, '123412test123')
+
+    @mock.patch('socket.socket')
+    def test_check_connection(self, mock_socket):
+        with mock.patch('argparse.ArgumentParser.parse_args') as mock_args:
+            mock_args.return_value = FakeArgs(
+                zuul_api_url='somehost.com',
+                gearman_server='localhost',
+                logstash_url='localhost:9999')
+            args = logscraper.get_arguments()
+            logscraper.check_connection(args.logstash_url)
+            mock_socket.assert_called_once()
+
+    @mock.patch('socket.socket')
+    def test_check_connection_wrong_host(self, mock_socket):
+        with mock.patch('argparse.ArgumentParser.parse_args') as mock_args:
+            mock_args.return_value = FakeArgs(
+                zuul_api_url='somehost.com',
+                gearman_server='localhost',
+                logstash_url='localhost')
+            args = logscraper.get_arguments()
+            self.assertRaises(ValueError, logscraper.check_connection,
+                              args.logstash_url)
+
+    @mock.patch('multiprocessing.pool.Pool.map')
+    @mock.patch('builtins.open', new_callable=mock.mock_open())
+    @mock.patch('os.path.isfile')
+    @mock.patch('logscraper.logscraper.GEARMAN_PORT',
+                return_value=4731)
+    @mock.patch('logscraper.logscraper.GEARMAN_SERVER',
+                return_value='localhost')
+    @mock.patch('logscraper.logscraper.check_specified_files',
+                return_value=['job-output.txt'])
+    @mock.patch('logscraper.logscraper.LogMatcher.submitJobs')
+    @mock.patch('argparse.ArgumentParser.parse_args',
+                return_value=FakeArgs(
+                    zuul_api_url=['http://somehost.com/api/tenant/tenant1'],
+                    gearman_server='localhost',
+                    workers=1))
+    def test_run_scraping(self, mock_args, mock_submit, mock_files,
+                          mock_server, mock_port, mock_isfile, mock_readfile,
+                          mock_map):
+        with mock.patch('logscraper.logscraper.get_last_job_results'
+                        ) as mock_job_results:
+            args = logscraper.get_arguments()
+            mock_job_results.return_value = [builds_result[0]]
+            logscraper.run_scraping(args,
+                                    'http://somehost.com/api/tenant/tenant1')
+            self.assertEqual(builds_result[0], mock_map.call_args.args[1][0])
+
+    @mock.patch('logscraper.logscraper.run_scraping')
+    def test_run(self, mock_scraping):
+        with mock.patch('argparse.ArgumentParser.parse_args') as mock_args:
+            mock_args.return_value = FakeArgs(
+                zuul_api_url=['http://somehost.com/api/tenant/tenant1',
+                              'http://somehost.com/api/tenant/tenant2',
+                              'http://somehost.com/api/tenant/tenant3'],
+                gearman_server='localhost')
+            args = logscraper.get_arguments()
+            logscraper.run(args)
+            self.assertEqual(3, mock_scraping.call_count)
+
+
+class TestConfig(base.TestCase):
+    @mock.patch('sys.exit')
+    def test_save(self, mock_sys):
+        # Assume that url is wrong so it raise IndexError
+        with mock.patch('argparse.ArgumentParser.parse_args') as mock_args:
+            mock_args.return_value = FakeArgs(
+                zuul_api_url='somehost.com',
+                gearman_server='localhost')
+            args = logscraper.get_arguments()
+            self.assertRaises(IndexError, logscraper.Config, args,
+                              args.zuul_api_url)
+        # url without tenant
+        with mock.patch('argparse.ArgumentParser.parse_args') as mock_args:
+            mock_args.return_value = FakeArgs(
+                zuul_api_url='https://somehost.com',
+                gearman_server='localhost')
+            args = logscraper.get_arguments()
+            logscraper.Config(args, args.zuul_api_url)
+            mock_sys.assert_called()
+
+        # correct url without job name
+        with mock.patch('argparse.ArgumentParser.parse_args') as mock_args:
+            mock_args.return_value = FakeArgs(
+                zuul_api_url='http://somehost.com/api/tenant/sometenant',
+                gearman_server='localhost',
+                checkpoint_file='/tmp/testfile')
+            args = logscraper.get_arguments()
+            with mock.patch('builtins.open',
+                            new_callable=mock.mock_open()
+                            ) as mock_file:
+                some_config = logscraper.Config(args, args.zuul_api_url)
+                some_config.save('123412312341234')
+                mock_file.assert_called_with('/tmp/testfile-sometenant', 'w')
+
+
+class TestLogMatcher(base.TestCase):
+    @mock.patch('gear.TextJob')
+    @mock.patch('gear.Client.submitJob')
+    @mock.patch('gear.BaseClient.waitForServer')
+    def test_submitJobs(self, mock_gear, mock_gear_client, mock_gear_job):
+        result = builds_result[0]
+        result['files'] = ['job-output.txt']
+        result['tenant'] = 'sometenant'
+        parsed_job = {
+            "build_branch": "master",
+            "build_change": 806255,
+            "build_name": "openstack-tox-py38",
+            "build_node": "zuul-executor",
+            "build_patchset": "9",
+            "build_queue": "check",
+            "build_ref": "refs/changes/55/806255/9",
+            "build_set": {"uuid": "bf11828235c649ff859ad87d7c4aa525"},
+            "build_status": "SUCCESS",
+            "build_uuid": "bf11828235c649ff859ad87d7c4aa525",
+            "build_zuul_url": "N/A",
+            "filename": "job-output.txt",
+            "log_url": "https://t.com/openstack/a0f8968/job-output.txt",
+            "node_provider": "local",
+            "project": "openstack/tempest",
+            "tenant": "sometenant",
+            "voting": 1}
+
+        expected_gear_job = {"retry": False, "event": {
+            "fields": parsed_job,
+            "tags": ["job-output.txt", "console", "console.html"]},
+            "source_url": "https://t.com/openstack/a0f8968/job-output.txt"}
+
+        with mock.patch('argparse.ArgumentParser.parse_args') as mock_args:
+            mock_args.return_value = FakeArgs(
+                zuul_api_url='http://somehost.com/api/tenant/sometenant',
+                gearman_server='localhost',
+                gearman_port='4731')
+            args = logscraper.get_arguments()
+            lmc = logscraper.LogMatcher(args.gearman_server, args.gearman_port,
+                                        result['result'], result['log_url'],
+                                        {})
+            lmc.submitJobs('push-log', result['files'], result)
+            mock_gear_client.assert_called_once()
+            self.assertEqual(
+                expected_gear_job,
+                json.loads(mock_gear_job.call_args.args[1].decode('utf-8'))
+            )
+
+    @mock.patch('gear.TextJob')
+    @mock.patch('gear.Client.submitJob')
+    @mock.patch('gear.BaseClient.waitForServer')
+    def test_submitJobs_failure(self, mock_gear, mock_gear_client,
+                                mock_gear_job):
+        # Take job result that build_status is "ABORTED"
+        result = builds_result[1]
+        result['files'] = ['job-output.txt']
+        result['tenant'] = 'sometenant'
+        parsed_job = {
+            'build_branch': 'master',
+            'build_change': 816445,
+            'build_name': 'tripleo-centos-8',
+            'build_node': 'zuul-executor',
+            'build_patchset': '1',
+            'build_queue': 'check',
+            'build_ref': 'refs/changes/45/816445/1',
+            'build_set': {'uuid': '4a0ffebe30a94efe819fffc03cf33ea4'},
+            'build_status': 'FAILURE',
+            'build_uuid': '4a0ffebe30a94efe819fffc03cf33ea4',
+            'build_zuul_url': 'N/A',
+            'filename': 'job-output.txt',
+            'log_url': 'https://t.com/tripleo-8/3982864/job-output.txt',
+            'node_provider': 'local',
+            'project': 'openstack/tripleo-ansible',
+            'tenant': 'sometenant',
+            'voting': 1}
+
+        expected_gear_job = {"retry": False, "event": {
+            "fields": parsed_job,
+            "tags": ["job-output.txt", "console", "console.html"]},
+            "source_url": "https://t.com/tripleo-8/3982864/job-output.txt"}
+
+        with mock.patch('argparse.ArgumentParser.parse_args') as mock_args:
+            mock_args.return_value = FakeArgs(
+                zuul_api_url='http://somehost.com/api/tenant/sometenant',
+                gearman_server='localhost',
+                gearman_port='4731')
+            args = logscraper.get_arguments()
+            lmc = logscraper.LogMatcher(args.gearman_server, args.gearman_port,
+                                        result['result'], result['log_url'],
+                                        {})
+            lmc.submitJobs('push-log', result['files'], result)
+            mock_gear_client.assert_called_once()
+            self.assertEqual(
+                expected_gear_job,
+                json.loads(mock_gear_job.call_args.args[1].decode('utf-8'))
+            )
+
+    @mock.patch('gear.TextJob')
+    @mock.patch('gear.Client.submitJob')
+    @mock.patch('gear.BaseClient.waitForServer')
+    def test_submitJobs_aborted(self, mock_gear, mock_gear_client,
+                                mock_gear_job):
+        # Take job result that build_status is "ABORTED"
+        result = builds_result[2]
+        result['files'] = ['job-output.txt']
+        result['tenant'] = 'sometenant'
+        parsed_job = {
+            'build_branch': 'stable/victoria',
+            'build_change': 816486,
+            'build_name': 'openstack-tox-lower-constraints',
+            'build_node': 'zuul-executor',
+            'build_patchset': '1',
+            'build_queue': 'check',
+            'build_ref': 'refs/changes/86/816486/1',
+            'build_set': {'uuid': 'bd044dfe3ecc484fbbf74fdeb7fb56aa'},
+            'build_status': 'FAILURE',
+            'build_uuid': 'bd044dfe3ecc484fbbf74fdeb7fb56aa',
+            'build_zuul_url': 'N/A',
+            'filename': 'job-output.txt',
+            'log_url': 'job-output.txt',
+            'node_provider': 'local',
+            'project': 'openstack/nova',
+            'tenant': 'sometenant',
+            'voting': 1}
+
+        # NOTE: normally ABORTED jobs does not provide log_url,
+        # so source_url will be just a file to iterate.
+        # In the logscraper, aborted jobs are just skipped.
+        expected_gear_job = {"retry": False, "event": {
+            "fields": parsed_job,
+            "tags": ["job-output.txt", "console", "console.html"]},
+            "source_url": "job-output.txt"}
+
+        with mock.patch('argparse.ArgumentParser.parse_args') as mock_args:
+            mock_args.return_value = FakeArgs(
+                zuul_api_url='http://somehost.com/api/tenant/sometenant',
+                gearman_server='localhost',
+                gearman_port='4731')
+            args = logscraper.get_arguments()
+            lmc = logscraper.LogMatcher(args.gearman_server, args.gearman_port,
+                                        result['result'], result['log_url'],
+                                        {})
+            lmc.submitJobs('push-log', result['files'], result)
+            mock_gear_client.assert_called_once()
+            self.assertEqual(
+                expected_gear_job,
+                json.loads(mock_gear_job.call_args.args[1].decode('utf-8'))
+            )
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..fb80fc9
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,4 @@
+pbr>=1.6         # Apache-2.0
+gear<0.17
+requests<2.27    # Apache-2.0
+PyYAML<6.1      # MIT
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..f435c35
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,23 @@
+[metadata]
+name = logscraper
+summary = OpenStack CI Log Processing tool
+description-file =
+    README.rst
+author = Openstack Contributors
+author-email = openstack-discuss@lists.openstack.org
+home-page = http://docs.openstack.org/infra/ci-log-processing
+classifier =
+    Environment :: OpenStack
+    Intended Audience :: Information Technology
+    Intended Audience :: System Administrators
+    License :: OSI Approved :: Apache Software License
+    Operating System :: POSIX :: Linux
+    Programming Language :: Python
+    Programming Language :: Python :: 3
+    Programming Language :: Python :: 3.6
+    Programming Language :: Python :: 3.7
+    Programming Language :: Python :: 3.8
+
+[entry_points]
+console_scripts =
+    logscraper = logscraper.logscraper:main
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..1c3f5de
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,19 @@
+# Copyright (C) 2021 Red Hat
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import setuptools
+
+setuptools.setup(
+    setup_requires=['pbr'],
+    pbr=True)
diff --git a/test-requirements.txt b/test-requirements.txt
new file mode 100644
index 0000000..19c2fb9
--- /dev/null
+++ b/test-requirements.txt
@@ -0,0 +1,5 @@
+hacking<4.1.1    # Apache-2.0
+flake8<3.8.5
+pep8<1.7.2
+testtools<2.5.1  # MIT
+stestr<3.3       # Apache-2.0
diff --git a/tox.ini b/tox.ini
new file mode 100644
index 0000000..789b1af
--- /dev/null
+++ b/tox.ini
@@ -0,0 +1,30 @@
+[tox]
+envlist = linters,docs,py38
+
+[testenv]
+basepython = python3
+usedevelop = True
+deps = -r{toxinidir}/requirements.txt
+       -r{toxinidir}/test-requirements.txt
+allowlist_externals = find
+commands =
+  find . -type f -name "*.pyc" -delete
+  find . -type d -name '__pycache__' -delete
+  stestr run {posargs}
+
+[testenv:pep8]
+commands = flake8
+
+[testenv:linters]
+commands = flake8
+
+[testenv:venv]
+commands = {posargs}
+
+[flake8]
+show-source = True
+exclude = .venv,.tox,dist,doc,build,*.egg
+
+[testenv:docs]
+deps = -r{toxinidir}/doc/requirements.txt
+commands = sphinx-build -W -E -b html doc/source doc/build/html