Initial commit

Add the initial proof of concept engagement-stats utility and sufficient boilerplate to produce and test a Python package of it. Change-Id: I43e962ee9c11c830ef503675f5ca3bc5da927262
2020-05-09 15:43:59 +00:00 · 2020-05-09 15:43:59 +00:00 · fd6cd1da6c
commit fd6cd1da6c
parent 7477753eec
15 changed files with 860 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,61 @@
+# Add patterns in here to exclude files created by tools integrated with this
+# repository, such as test frameworks from the project's recommended workflow,
+# rendered documentation and package builds.
+#
+# Don't add patterns to exclude files created by preferred personal tools
+# (editors, IDEs, your operating system itself even). These should instead be
+# maintained outside the repository, for example in a ~/.gitignore file added
+# with:
+#
+# git config --global core.excludesfile '~/.gitignore'
+
+# Bytecompiled Python
+*.py[cod]
+
+# C extensions
+*.so
+
+# Packages
+*.egg*
+*.egg-info
+dist
+build
+eggs
+parts
+bin
+var
+sdist
+develop-eggs
+.installed.cfg
+lib
+lib64
+
+# Installer logs
+pip-log.txt
+
+# Unit test / coverage reports
+cover/
+.coverage*
+!.coveragerc
+.tox
+nosetests.xml
+.testrepository
+.stestr
+.venv
+
+# Translations
+*.mo
+
+# Complexity
+output/*.html
+output/*/index.html
+
+# Sphinx
+doc/build
+
+# pbr generates these
+AUTHORS
+ChangeLog
+
+# Files created by releasenotes build
+releasenotes/build
--- a/.zuul.yaml
+++ b/.zuul.yaml
@ -0,0 +1,9 @@
+- project:
+    templates:
+      - publish-opendev-tox-docs
+    check:
+      jobs:
+        - tox-linters
+    gate:
+      jobs:
+        - tox-linters
--- a/README.rst
+++ b/README.rst
@ -0,0 +1,6 @@
+OpenDev Engagement
+==================
+
+Tooling to generate coarse-grained reports of aggregate
+collaboration activity from publicly available APIs and archives
+provided by OpenDev hosted services.
--- a/doc/requirements.txt
+++ b/doc/requirements.txt
@ -0,0 +1 @@
+sphinx
--- a/doc/source/_static/opendev.svg
+++ b/doc/source/_static/opendev.svg
@ -0,0 +1,86 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   viewBox="0 0 486.98252 289.775"
+   height="289.77499"
+   width="486.98251"
+   xml:space="preserve"
+   version="1.1"
+   id="svg2"><metadata
+     id="metadata8"><rdf:RDF><cc:Work
+         rdf:about=""><dc:format>image/svg+xml</dc:format><dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" /><dc:title></dc:title></cc:Work></rdf:RDF></metadata><defs
+     id="defs6"><clipPath
+       id="clipPath66"
+       clipPathUnits="userSpaceOnUse"><path
+         id="path68"
+         d="m 533,244.125 72,0 0,71.875 -72,0 0,-71.875 z" /></clipPath><clipPath
+       id="clipPath82"
+       clipPathUnits="userSpaceOnUse"><path
+         id="path84"
+         d="m 471.926,323 27.074,0 0,28 -27.074,0 0,-28 z" /></clipPath><clipPath
+       id="clipPath92"
+       clipPathUnits="userSpaceOnUse"><path
+         id="path94"
+         d="m 502,323 27,0 0,37.035 -27,0 0,-37.035 z" /></clipPath><clipPath
+       id="clipPath110"
+       clipPathUnits="userSpaceOnUse"><path
+         id="path112"
+         d="m 645,324 21.719,0 0,26 -21.719,0 0,-26 z" /></clipPath></defs><g
+     transform="matrix(1.25,0,0,1.25,-589.9075,-160.26875)"
+     id="g10"><g
+       transform="matrix(2,0,0,2,-471.926,-360.035)"
+       id="g62"><g
+         clip-path="url(#clipPath66)"
+         id="g64"><path
+           id="path70"
+           style="fill:#ee265e;fill-opacity:1;fill-rule:nonzero;stroke:none"
+           d="m 604.781,279.582 c 0,-19.582 -15.875,-35.457 -35.457,-35.457 -19.582,0 -35.457,15.875 -35.457,35.457 0,19.582 15.875,35.457 35.457,35.457 19.582,0 35.457,-15.875 35.457,-35.457" /><path
+           id="path72"
+           style="fill:#ffffff;fill-opacity:1;fill-rule:nonzero;stroke:none"
+           d="m 546.879,284.004 c 0,-3 2.441,-5.442 5.441,-5.442 3,0 5.442,2.442 5.442,5.442 0,3.004 -2.442,5.441 -5.442,5.441 -3,0 -5.441,-2.437 -5.441,-5.441 m 13.004,0 c 0,-4.168 -3.391,-7.563 -7.563,-7.563 -4.172,0 -7.562,3.395 -7.562,7.563 0,4.172 3.39,7.566 7.562,7.566 4.172,0 7.563,-3.394 7.563,-7.566" /><path
+           id="path74"
+           style="fill:#ffffff;fill-opacity:1;fill-rule:nonzero;stroke:none"
+           d="m 583.555,289.449 -5.442,0 c -3,0 -5.441,-2.441 -5.441,-5.445 0,-3 2.441,-5.442 5.441,-5.442 l 5.442,0 0,10.887 z m 1.062,-13.008 -6.504,0 c -4.172,0 -7.562,3.395 -7.562,7.563 0,4.172 3.39,7.566 7.562,7.566 l 6.504,0 c 0.586,0 1.059,-0.476 1.059,-1.062 l 0,-13.008 c 0,-0.586 -0.473,-1.059 -1.059,-1.059" /><path
+           id="path76"
+           style="fill:#ffffff;fill-opacity:1;fill-rule:nonzero;stroke:none"
+           d="m 590.906,285.203 c -0.664,0 -1.199,-0.539 -1.199,-1.199 0,-0.66 0.535,-1.195 1.199,-1.195 0.66,0 1.196,0.535 1.196,1.195 0,0.66 -0.536,1.199 -1.196,1.199 m 0,10.465 c 0.66,0 1.196,0.539 1.196,1.199 0,0.66 -0.536,1.199 -1.196,1.199 -0.664,0 -1.199,-0.539 -1.199,-1.199 0,-0.66 0.535,-1.199 1.199,-1.199 m -52.019,-15.242 c 0.222,-0.227 0.527,-0.352 0.843,-0.352 0.321,0 0.622,0.125 0.848,0.352 0.469,0.469 0.469,1.23 0,1.695 -0.453,0.453 -1.242,0.453 -1.695,0 -0.465,-0.465 -0.465,-1.226 0.004,-1.695 m 10.359,-7.742 c -0.453,0.453 -1.242,0.453 -1.695,0 -0.469,-0.469 -0.469,-1.227 0,-1.696 0.234,-0.234 0.539,-0.351 0.847,-0.351 0.309,0 0.614,0.117 0.848,0.351 0.465,0.469 0.465,1.227 0,1.696 m 45.566,-13.707 -10.195,0 c -0.586,0 -1.062,0.476 -1.062,1.062 l 0,10.043 -5.512,0 c -5.828,0 -10.828,3.609 -12.898,8.707 -0.676,-1.652 -1.68,-3.195 -3.02,-4.535 -2.887,-2.887 -6.797,-4.313 -10.836,-4.02 -0.148,-0.265 -0.32,-0.519 -0.543,-0.746 -1.293,-1.293 -3.398,-1.293 -4.695,0 -1.293,1.293 -1.293,3.399 0,4.696 0.629,0.625 1.461,0.972 2.347,0.972 0.887,0 1.719,-0.347 2.348,-0.972 0.52,-0.52 0.82,-1.176 0.922,-1.852 3.344,-0.176 6.566,1.031 8.957,3.422 4.602,4.601 4.602,12.09 0,16.691 -4.602,4.602 -12.086,4.602 -16.687,0 -2.137,-2.136 -3.34,-4.945 -3.438,-7.949 0.594,-0.141 1.137,-0.434 1.578,-0.875 1.293,-1.293 1.293,-3.398 0,-4.695 -1.254,-1.254 -3.441,-1.25 -4.691,0 -1.297,1.297 -1.297,3.402 -0.004,4.695 0.293,0.289 0.629,0.512 0.988,0.672 0.051,3.648 1.481,7.066 4.067,9.652 2.714,2.715 6.277,4.071 9.843,4.071 3.567,0 7.129,-1.356 9.844,-4.071 1.363,-1.363 2.379,-2.937 3.059,-4.621 2.093,5.043 7.066,8.602 12.859,8.602 l 9.73,0 c 0.446,1.308 1.676,2.262 3.133,2.262 1.828,0 3.317,-1.493 3.317,-3.321 0,-1.832 -1.489,-3.32 -3.317,-3.32 -1.457,0 -2.687,0.949 -3.133,2.258 l -9.73,0 c -6.508,0 -11.801,-5.293 -11.801,-11.801 0,-6.508 5.293,-11.801 11.801,-11.801 l 6.574,0 c 0.586,0 1.059,-0.473 1.059,-1.058 l 0,-10.047 8.078,0 0,2.722 -2.852,0.004 c -0.582,0 -1.058,0.477 -1.058,1.059 l 0,15.992 c -1.309,0.445 -2.258,1.672 -2.258,3.129 0,1.832 1.488,3.32 3.32,3.32 1.828,0 3.317,-1.488 3.317,-3.32 0,-1.457 -0.95,-2.684 -2.258,-3.129 l 0,-14.93 2.847,-0.004 c 0.586,0 1.059,-0.476 1.059,-1.058 l 0,-4.844 c 0,-0.586 -0.473,-1.062 -1.059,-1.062" /></g></g><g
+       transform="matrix(2,0,0,2,-471.926,-360.035)"
+       id="g78"><g
+         clip-path="url(#clipPath82)"
+         id="g80"><path
+           id="path86"
+           style="fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none"
+           d="m 494.559,336.965 c 0,5.172 -4.184,9.379 -9.325,9.379 -5.172,0 -9.379,-4.207 -9.379,-9.379 0,-5.141 4.207,-9.324 9.379,-9.324 5.141,0 9.325,4.183 9.325,9.324 m -9.325,-13.305 c -7.336,0 -13.308,5.969 -13.308,13.305 0,7.34 5.972,13.308 13.308,13.308 7.336,0 13.305,-5.968 13.305,-13.308 0,-7.336 -5.969,-13.305 -13.305,-13.305" /></g></g><g
+       transform="matrix(2,0,0,2,-471.926,-360.035)"
+       id="g88"><g
+         clip-path="url(#clipPath92)"
+         id="g90"><path
+           id="path96"
+           style="fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none"
+           d="m 524.68,336.965 c 0,5.172 -4.184,9.379 -9.328,9.379 -5.168,0 -9.375,-4.207 -9.375,-9.379 0,-5.141 4.207,-9.324 9.375,-9.324 5.144,0 9.328,4.183 9.328,9.324 m -9.328,-13.305 c -7.336,0 -13.305,5.969 -13.305,13.332 l 0,21.946 c 0,0.656 0.441,1.097 1.101,1.097 l 1.676,0 c 0.66,0 1.102,-0.441 1.102,-1.097 l 0,-12.622 c 2.5,2.52 5.91,3.957 9.426,3.957 7.339,0 13.308,-5.968 13.308,-13.308 0,-7.336 -5.969,-13.305 -13.308,-13.305" /></g></g><g
+       transform="matrix(2,0,0,2,-471.926,-360.035)"
+       id="g98"><path
+         id="path100"
+         style="fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none"
+         d="m 600.582,347.086 c -5.582,0 -10.121,-4.539 -10.121,-10.121 0,-5.61 4.539,-10.172 10.121,-10.172 5.609,0 10.172,4.562 10.172,10.172 0,5.582 -4.563,10.121 -10.172,10.121 m 12.207,-32.395 -1.68,0 c -0.222,0 -0.304,0.082 -0.304,0.305 l 0,14.637 -1.059,-1.145 c -2.375,-2.562 -5.715,-4.035 -9.164,-4.035 -6.898,0 -12.512,5.613 -12.512,12.512 0,6.898 5.614,12.512 12.512,12.512 6.898,0 12.512,-5.614 12.512,-12.512 l -0.035,-0.949 0.035,0 0,-21.02 c 0,-0.223 -0.082,-0.305 -0.305,-0.305" /><path
+         id="path102"
+         style="fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none"
+         d="m 535.961,334.949 c 0.859,-4.933 5.016,-7.515 8.734,-7.515 4.258,0 7.996,3.218 8.785,7.515 l -17.519,0 z m 8.734,-11.289 c -7.351,0 -12.898,5.524 -12.898,12.848 0,6.765 4.957,13.765 13.254,13.765 3.851,0 7.457,-1.539 10.179,-4.359 0.403,-0.461 0.395,-1.019 0,-1.469 l -1.027,-1.281 c -0.277,-0.352 -0.574,-0.441 -0.765,-0.453 -0.45,-0.047 -0.704,0.242 -0.762,0.305 -1.332,1.222 -4.094,3.277 -7.625,3.277 -4.567,0 -8.278,-3.27 -9.09,-7.973 l 20.687,0 c 0.688,0 1.098,-0.41 1.098,-1.101 l 0,-0.039 c -0.406,-7.961 -5.773,-13.52 -13.051,-13.52" /><path
+         id="path104"
+         style="fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none"
+         d="m 621.234,335.043 c 0.844,-5.516 5.446,-8.402 9.555,-8.402 4.715,0 8.844,3.609 9.606,8.398 l 0.113,0.707 -19.379,0 0.105,-0.703 z m 9.914,14.434 c 3.629,0 7.032,-1.454 9.579,-4.086 0.132,-0.157 0.132,-0.27 -0.024,-0.446 l -1.031,-1.289 c -0.086,-0.109 -0.156,-0.148 -0.195,-0.152 -0.067,-0.004 -0.106,0.035 -0.172,0.098 -1.426,1.312 -4.356,3.484 -8.157,3.484 -5.039,0 -9.117,-3.641 -9.914,-8.859 l -0.105,-0.704 21.613,0 c 0.246,0 0.305,-0.054 0.305,-0.304 -0.387,-7.504 -5.426,-12.766 -12.258,-12.766 -6.898,0 -12.101,5.184 -12.101,12.055 0,6.375 4.66,12.969 12.46,12.969" /></g><g
+       transform="matrix(2,0,0,2,-471.926,-360.035)"
+       id="g106"><g
+         clip-path="url(#clipPath110)"
+         id="g108"><path
+           id="path114"
+           style="fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none"
+           d="m 655.48,349.07 1.067,0 c 0.265,0 0.32,-0.156 0.344,-0.215 l 9.824,-23.972 c 0,-0.008 0.004,-0.012 0.004,-0.02 -0.012,0 -0.031,0 -0.051,0 l -2.188,0 c -0.261,0 -0.32,0.16 -0.339,0.211 l -8.106,20.637 -8.043,-20.629 c -0.023,-0.059 -0.082,-0.219 -0.344,-0.219 l -2.238,0 c -0.019,0 -0.035,0 -0.051,0 0.004,0.012 0.008,0.028 0.016,0.043 l 9.754,23.934 c 0.031,0.074 0.086,0.23 0.351,0.23" /></g></g><path
+       d="m 688.37,338.105 z m 3.454,1.586 c 1.32,0 2.204,-0.882 2.204,-2.194 l 0,-28.688 c 0,-11.852 -9.642,-21.508 -21.524,-21.516 l -0.25,-0.008 c -11.844,0 -21.484,9.656 -21.484,21.524 l 0,28.688 c 0,1.312 0.882,2.194 2.202,2.194 l 3.462,0 c 1.312,0 2.194,-0.882 2.194,-2.194 l 0,-28.688 c 0,-7.586 6.18,-13.766 13.774,-13.766 7.594,0 13.766,6.18 13.766,13.766 l 0,28.688 c 0,1.312 0.882,2.194 2.202,2.194 l 3.454,0 z"
+       style="fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none"
+       id="path118" /></g></svg>
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@ -0,0 +1,13 @@
+import os
+import sys
+
+sys.path.insert(0, os.path.abspath('.'))
+source_suffix = '.rst'
+master_doc = 'index'
+project = 'OpenDev Engagement'
+copyright = ('OpenDev Contributors')
+exclude_patterns = ['_build']
+pygments_style = 'sphinx'
+html_static_path = ['_static/']
+html_theme = 'alabaster'
+html_theme_options = {'logo': 'opendev.svg'}
--- a/doc/source/index.rst
+++ b/doc/source/index.rst
@ -0,0 +1 @@
+.. include:: ../../README.rst
--- a/engagement/init.py
+++ b/engagement/init.py
--- a/engagement/stats.py
+++ b/engagement/stats.py
@ -0,0 +1,583 @@
+# Copyright OpenDev Contributors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS
+# IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+# express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+import csv
+import datetime
+import html.parser
+import json
+import os
+import re
+import sys
+import urllib.parse
+
+import requests
+import yaml
+
+
+def requester(
+        url, params={}, headers={}, mode='live', recording=None, verbose=0):
+    """A requests wrapper to consistently retry HTTPS queries"""
+
+    # We key recordings of queries based on a tuple of their URL and parameters
+    # (this may not be stable in Python<3.6 due to lack of dict ordering)
+    if mode == 'replay':
+        # In replay mode, use recorded results for all queries
+        text = recording[(url, params)]
+    else:
+        # In live or record modes, actually use the remote API instead
+        retry = requests.Session()
+        retry.mount("https://", requests.adapters.HTTPAdapter(max_retries=3))
+        response = retry.get(url=url, params=params, headers=headers)
+        text = response.text
+        if verbose >= 2:
+            print("Queried: %s" % response.url)
+        if mode == 'record':
+            # In record mode, also save a copy of the query results to replay
+            recording[(url, params)] = text
+
+    return text
+
+
+def decode_json(raw):
+    """Trap JSON decoding failures and provide more detailed errors"""
+
+    # Gerrit's REST API prepends a JSON-breaker to avoid XSS vulnerabilities
+    if raw.startswith(")]}'"):
+        trimmed = raw[4:]
+    else:
+        trimmed = raw
+
+    # Try to decode and bail with much detail if it fails
+    try:
+        decoded = json.loads(trimmed)
+    except Exception:
+        print('\nrequest returned %s error to query:\n\n    %s\n'
+              '\nwith detail:\n\n    %s\n' % (raw, raw.url, trimmed),
+              file=sys.stderr)
+        raise
+    return decoded
+
+
+def query_gerrit(method, params={}, mode='live', recording=None, verbose=0):
+    """Query the Gerrit REST API and make or replay a recording"""
+
+    url = 'https://review.opendev.org/%s' % method
+    result = requester(
+        url,
+        params=params,
+        headers={'Accept': 'application/json'},
+        mode=mode,
+        recording=recording,
+        verbose=verbose)
+    return decode_json(result)
+
+
+def from_gerrit_time(when):
+    """Translate a Gerrit date/time string into a naive datetime object."""
+
+    return datetime.datetime.strptime(when.split('.')[0], '%Y-%m-%d %H:%M:%S')
+
+
+def to_gerrit_time(when):
+    """Translate a datetime object into a Gerrit date/time string."""
+
+    return when.strftime('%Y-%m-%d %H:%M:%S')
+
+
+def get_projects(recording=None, verbose=0):
+    """Return a sorted list of all namespaced code projects in Gerrit"""
+
+    all_projects = query_gerrit(
+        'projects/', params={'type': 'code'}, recording=recording,
+        verbose=verbose)
+    projects = list()
+    for (project, details) in all_projects.items():
+        if '/' in project:
+            projects.append(project)
+    return sorted(projects)
+
+
+def usage_error():
+    """Write a generic usage message to stderr and exit nonzero"""
+
+    sys.stderr.write(
+        'ERROR: specify report period like YEAR, YEAR-H[1-2], YEAR-Q[1-4],\n'
+        '       or YEAR-[01-12], optionally prefixed by record- or replay-\n'
+        '       if you want to make a recording or reuse a prior recording\n')
+    sys.exit(1)
+
+
+def parse_report_period(when):
+    """Parse a supplied report period string, returning a tuple of
+    after and before datetime objects"""
+
+    monthly = re.compile(r'^(\d{4})-(\d{2})$')
+    quarterly = re.compile(r'^(\d{4})-q([1-4])$', re.IGNORECASE)
+    halfyearly = re.compile(r'^(\d{4})-h([1-4])$', re.IGNORECASE)
+    yearly = re.compile(r'^\d{4}$')
+    if monthly.match(when):
+        start_year = int(monthly.match(when).group(1))
+        start_month = int(monthly.match(when).group(2))
+        end_year = start_year + start_month // 12
+        end_month = 1 + start_month % 12
+    elif quarterly.match(when):
+        start_year = int(quarterly.match(when).group(1))
+        start_month = 1 + 3 * (int(quarterly.match(when).group(2)) - 1)
+        end_year = start_year + (start_month + 2) // 12
+        end_month = 1 + (start_month + 2) % 12
+    elif halfyearly.match(when):
+        start_year = int(halfyearly.match(when).group(1))
+        start_month = 1 + 6 * (int(halfyearly.match(when).group(2)) - 1)
+        end_year = start_year + (start_month + 5) // 12
+        end_month = 1 + (start_month + 5) % 12
+    elif yearly.match(when):
+        start_year = int(yearly.match(when).group())
+        start_month = 1
+        end_year = start_year + 1
+        end_month = 1
+    else:
+        usage_error()
+    after = datetime.datetime(start_year, start_month, 1)
+    before = datetime.datetime(end_year, end_month, 1)
+    return after, before
+
+
+def parse_command_line():
+    """Parse the command line to obtain the report period, then return it"""
+
+    if len(sys.argv) == 2:
+        return sys.argv[1]
+    else:
+        usage_error()
+
+
+def report_times(report, after, before):
+    """Add timestamp values to provided report"""
+
+    report['times'] = dict()
+    report['times']['after'] = to_gerrit_time(after)
+    report['times']['before'] = to_gerrit_time(before)
+    report['times']['generated'] = to_gerrit_time(datetime.datetime.utcnow())
+    return report
+
+
+def get_ml_index(verbose=0):
+    sites = yaml.safe_load(
+        requester('http://lists.opendev.org/archives.yaml', verbose=verbose))
+    return sites
+
+
+def get_ml_archive(listname, site, yearmonth, verbose=0):
+    year, month = yearmonth
+    monthname = datetime.date(1, month, 1).strftime('%B')
+    return requester('http://%s/pipermail/%s/%s-%s.txt' % (
+        site,
+        listname,
+        year,
+        monthname,
+        ), verbose=verbose)
+
+
+def add_ml_activity(ml_activity, site, archive):
+    if archive:
+        for line in archive.split('\n'):
+            # Take care to avoid incorrectly matching on lines which
+            # begin with the word From inside the message body
+            fromline = re.match(
+                    r'From ([^ ]+) at ([0-9A-Za-z\.-]+\.[0-9A-Za-z\.-]+)  '
+                    r'(Sun|Mon|Tue|Wed|Thu|Fri|Sat) '
+                    r'(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) '
+                    r'[ 123][0-9] [012][0-9]:[0-9]{2}:[0-9]{2} [0-9]{4}$',
+                    line)
+            if fromline:
+                localpart, domainpart = fromline.groups()[:2]
+                domainpart = domainpart.lower()
+                address = '%s@%s' % (localpart, domainpart)
+                if address.lower() in (
+                        'build.starlingx@gmail.com',
+                        'hudson@openstack.org',
+                        'info@bitergia.com',
+                        'infra-root@openstack.org',
+                        'jenkins@openstack.org',
+                        'no-reply@openstack.org',
+                        'readthedocs@readthedocs.org',
+                        'review@openstack.org',
+                        'zuul@opendev.org',
+                        'zuul@openstack.org',
+                ) or domainpart in (
+                        'bugs.launchpad.net',
+                        'lists.airshipit.org',
+                        'lists.katacontainers.io',
+                        'lists.opendev.org',
+                        'lists.openinfra.dev',
+                        'lists.openstack.org',
+                        'lists.starlingx.io',
+                        'lists.zuul-ci.org',
+                        'review.opendev.org',
+                        'review.openstack.org',
+                        'storyboard.openstack.org',
+                        'storyboard.opendev.org',
+                        'zuul.opendev.org',
+                        'zuul.openstack.org',
+                ):
+                    continue
+                if address in ml_activity[site]:
+                    ml_activity[site][address] += 1
+                else:
+                    ml_activity[site][address] = 1
+                if address in ml_activity['_total']:
+                    ml_activity['_total'][address] += 1
+                else:
+                    ml_activity['_total'][address] = 1
+
+
+def add_all_ml_activity(ml_activity, sites, yearmonth, verbose=0):
+    for site in sites:
+        if site not in ml_activity:
+            ml_activity[site] = {}
+        for listname in sites[site]:
+            archive = get_ml_archive(
+                listname, site, yearmonth, verbose=verbose)
+            add_ml_activity(ml_activity, site, archive)
+
+
+class ChannelsListParser(html.parser.HTMLParser):
+    def __init__(self):
+        self.channels = list()
+        super().__init__()
+
+    def handle_starttag(self, tag, attrs):
+        if tag == 'a' and attrs[0][1].startswith('%23'):
+            self.channels.append(urllib.parse.unquote(attrs[0][1].strip('/')))
+
+
+def get_channels_list(verbose=0):
+    parser = ChannelsListParser()
+    parser.feed(
+        requester('https://meetings.opendev.org/irclogs/', verbose=verbose))
+    return parser.channels
+
+
+class LogsListParser(html.parser.HTMLParser):
+    def __init__(self):
+        self.logs = list()
+        super().__init__()
+
+    def handle_starttag(self, tag, attrs):
+        if tag == 'a' and attrs[0][1].startswith('%23'):
+            self.logs.append(attrs[0][1])
+
+
+def get_channel_logs(channel, yearmonth, verbose=0):
+    year, month = yearmonth
+    channel = urllib.parse.quote(channel)
+    logs = ''
+    parser = LogsListParser()
+    parser.feed(requester(
+        'https://meetings.opendev.org/irclogs/%s/' % channel, verbose=verbose))
+    for day in range(1, 32):
+        if '%s.%d-%02d-%02d.log.html' % (
+                channel, year, month, day) not in parser.logs:
+            continue
+        result = requester(
+            'https://meetings.opendev.org/irclogs/%s/%s.%d-%02d-%02d.log' % (
+                channel,
+                channel,
+                year,
+                month,
+                day,
+                ), verbose=verbose)
+        if result:
+            logs += result
+    return logs.split('\n')
+
+
+def add_chat_activity(chat_activity, logs, namespace, verbose=0):
+    if logs:
+        for line in logs.split('\n'):
+            logline = re.match(r'[0-9T:-]{19}  <([^ ]+)> ', line)
+            if logline:
+                nick = logline.group(1).strip('@')
+                if nick in (
+                        'opendevmeet',
+                        'opendevreview',
+                        'opendevstatus',
+                        'openstack',
+                        'openstackgerrit',
+                        'openstackstatus',
+                        ):
+                    continue
+                if namespace not in chat_activity:
+                    chat_activity[namespace] = {}
+                    if verbose >= 1:
+                        print("Adding namespace: %s" % namespace)
+                if nick in chat_activity['_all_channels']:
+                    chat_activity['_all_channels'][nick] += 1
+                else:
+                    chat_activity['_all_channels'][nick] = 1
+                    if verbose >= 1:
+                        print("Found chat nick: %s" % nick)
+                if nick in chat_activity[namespace]:
+                    chat_activity[namespace][nick] += 1
+                else:
+                    chat_activity[namespace][nick] = 1
+
+
+def main(verbose=0):
+    """Utility entry point"""
+
+    argument = parse_command_line()
+    if argument.startswith('record-'):
+        mode = 'record'
+        argument = argument[len(mode)+1:]
+    elif argument.startswith('replay-'):
+        mode = 'replay'
+        argument = argument[len(mode)+1:]
+    else:
+        mode = 'live'
+
+    recordfn = 'recordings/%s.yaml' % argument
+    if mode == 'record':
+        recording = {}
+    elif mode == 'replay':
+        recording = yaml.load(open(recordfn), loader=yaml.loader.safeLoader)
+    else:
+        recording = None
+
+    after, before = parse_report_period(argument)
+    changes = dict()
+
+    # Shard querying by project, to help with the inherent instability of
+    # result pagination from the Gerrit API
+    for project in get_projects(recording=recording, verbose=verbose):
+        if verbose >= 1:
+            print("Checking project: %s" % project)
+        offset = 0
+        # Loop due to unavoidable query result pagination
+        while offset >= 0:
+            # We only constrain the query by the after date, as changes created
+            # between the before and after date may have been updated more
+            # recently with a new revision or comment
+            new_changes = query_gerrit('changes/', params={
+                'q': 'project:%s after:{%s}' % (
+                    project, to_gerrit_time(after)),
+                'no-limit': '1',
+                'start': offset,
+                'o': ['ALL_REVISIONS', 'MESSAGES', 'SKIP_DIFFSTAT'],
+                }, recording=recording, verbose=verbose)
+            # Since we redundantly query ranges with offsets to help combat
+            # pagination instability, we must deduplicate results
+            for change in new_changes:
+                if change['id'] not in changes:
+                    changes[change['id']] = change
+            # Offset additional pages by half the returned entry count to help
+            # avoid missing changes due to pagination instability
+            if new_changes and new_changes[-1].get('_more_changes', False):
+                offset += len(new_changes)/2
+            else:
+                offset = -1
+
+    report = {
+        'chat_namespaces': dict(),
+        'ml_sites': dict(),
+        'repo_namespaces': dict(),
+        }
+    report_times(report, after, before)
+    committers = dict()
+    projects_active = dict()
+    reviewers = dict()
+    for change in changes.values():
+        namespace = change['project'].split("/")[0]
+        if namespace not in report['repo_namespaces']:
+            report['repo_namespaces'][namespace] = {
+                'changes_created': 0,
+                'changes_merged': 0,
+                'review_automated': 0,
+                'reviewer_messages': 0,
+                'revisions_pushed': 0,
+                }
+        if namespace not in projects_active:
+            projects_active[namespace] = set()
+        if after < from_gerrit_time(change['created']) < before:
+            # Note that the changes are not returned in chronological
+            # order, so we have to test all of them and can't short-circuit
+            # after the first change which was created too late
+            report['repo_namespaces'][namespace]['changes_created'] += 1
+            projects_active[namespace].add(change['project'])
+            if namespace not in committers:
+                committers[namespace] = set()
+            committers[namespace].add(change['owner']['_account_id'])
+            if verbose >= 2:
+                print("Found created change: %s" % change['_number'])
+        if ('submitted' in change and after < from_gerrit_time(
+                change['submitted']) < before):
+            report['repo_namespaces'][namespace]['changes_merged'] += 1
+            projects_active[namespace].add(change['project'])
+            if verbose >= 2:
+                print("Found merged change: %s" % change['_number'])
+        for revision in change['revisions'].values():
+            if after < from_gerrit_time(revision['created']) < before:
+                report['repo_namespaces'][namespace]['revisions_pushed'] += 1
+                projects_active[namespace].add(change['project'])
+                if namespace not in committers:
+                    committers[namespace] = set()
+                committers[namespace].add(revision['uploader']['_account_id'])
+                if verbose >= 2:
+                    print("Found change revision: %s,%s" % (
+                        change['_number'], revision['_number']))
+        for message in change['messages']:
+            if after < from_gerrit_time(message['date']) < before:
+                if ('tag' in message and message['tag'].startswith(
+                        'autogenerated:')):
+                    report['repo_namespaces'][namespace][
+                        'review_automated'] += 1
+                    projects_active[namespace].add(change['project'])
+                    if verbose >= 2:
+                        print("Found automated comment: %s,%s,%s (%s)" % (
+                            change['_number'],
+                            message['_revision_number'],
+                            message['id'],
+                            message['date']))
+                elif not message['message'].startswith(
+                        'Uploaded patch set'):
+                    report['repo_namespaces'][namespace][
+                        'reviewer_messages'] += 1
+                    projects_active[namespace].add(change['project'])
+                    if namespace not in reviewers:
+                        reviewers[namespace] = set()
+                    reviewers[namespace].add(message['author']['_account_id'])
+                    if verbose >= 2:
+                        print("Found reviewer comment: %s,%s,%s (%s)" % (
+                            change['_number'],
+                            message['_revision_number'],
+                            message['id'],
+                            message['date']))
+    all_committers = set()
+    for namespace in committers:
+        report['repo_namespaces'][namespace]['committers'] = len(
+            committers[namespace])
+        all_committers = all_committers.union(committers[namespace])
+    all_reviewers = set()
+    for namespace in reviewers:
+        report['repo_namespaces'][namespace]['reviewers'] = len(
+            reviewers[namespace])
+        all_reviewers = all_reviewers.union(reviewers[namespace])
+    for namespace in projects_active:
+        report['repo_namespaces'][namespace]['projects_active'] = len(
+            projects_active[namespace])
+
+    ml_activity = {'_total': {}}
+    for scalar_month in range(
+            after.year * 12 + after.month,
+            before.year * 12 + before.month):
+        yearmonth = ((scalar_month - 1) // 12, scalar_month % 12 or 12)
+        add_all_ml_activity(
+            ml_activity, get_ml_index(), yearmonth, verbose=verbose)
+    report['ml_sites'] = {}
+    for site in ml_activity:
+        report['ml_sites'][site] = {'posts': 0, 'senders': 0}
+        for posts in ml_activity[site].values():
+            report['ml_sites'][site]['posts'] += posts
+            report['ml_sites'][site]['senders'] += 1
+
+    chat_activity = {'_all_channels': {}}
+    channels = get_channels_list(verbose=verbose)
+    for channel in channels:
+        namespace = channel.split('-')[0].strip('#')
+        for scalar_month in range(
+                after.year * 12 + after.month,
+                before.year * 12 + before.month):
+            yearmonth = ((scalar_month - 1) // 12, scalar_month % 12 or 12)
+            for logs in get_channel_logs(channel, yearmonth, verbose=verbose):
+                add_chat_activity(
+                    chat_activity, logs, namespace, verbose=verbose)
+    for namespace in chat_activity:
+        report['chat_namespaces'][namespace] = {
+            'messages': sum(chat_activity[namespace].values()),
+            'nicks': len(chat_activity[namespace]),
+            }
+
+    report['totals'] = dict()
+    report['totals']['active_repo_namespaces'] = len(report['repo_namespaces'])
+    report['totals']['committers'] = len(all_committers)
+    report['totals']['reviewers'] = len(all_reviewers)
+    additive_keys = (
+        'changes_created',
+        'changes_merged',
+        'projects_active',
+        'review_automated',
+        'reviewer_messages',
+        'revisions_pushed',
+        )
+    for key in additive_keys:
+        report['totals'][key] = 0
+    # Operate on a copy of the keys since we'll be altering the dict
+    for namespace in list(report['repo_namespaces'].keys()):
+        # Cull inactive namespaces from the report
+        if not report['repo_namespaces'][namespace]['projects_active']:
+            del report['repo_namespaces'][namespace]
+            continue
+        # Summation key totals
+        for key in additive_keys:
+            report['totals'][key] += report['repo_namespaces'][namespace][key]
+
+    report['totals']['mailing_list_posts'] = (
+        report['ml_sites']['_total']['posts'])
+    report['totals']['mailing_list_senders'] = (
+        report['ml_sites']['_total']['senders'])
+    del report['ml_sites']['_total']
+    report['totals']['mailing_list_sites'] = len(report['ml_sites'])
+
+    report['totals']['chat_messages_logged'] = sum(
+        chat_activity['_all_channels'].values())
+    report['totals']['chat_nicknames_logged'] = len(
+        chat_activity['_all_channels'])
+    del report['chat_namespaces']['_all_channels']
+    report['totals']['chat_channel_namespaces'] = len(
+        report['chat_namespaces'])
+
+    # Write a recording if requested
+    if mode == 'record':
+        os.makedirs(os.path.dirname(recordfn), exist_ok=True)
+        open(recordfn, 'w').write(yaml.dump(recording))
+
+    # Write the full YAML structured data report
+    os.makedirs('reports', exist_ok=True)
+    open('reports/%s.yaml' % argument, 'w').write(yaml.dump(report))
+
+    # Write the one-dimensional CSV tabular reports
+    for tabname in ('times', 'totals'):
+        table = [[argument, tabname]]
+        for rowname in report[tabname]:
+            table.append([rowname, report[tabname][rowname]])
+        csv.writer(open('reports/%s_%s.csv' % (
+            argument, tabname), 'w')).writerows(table)
+
+    # Write the two-dimensional CSV tabular reports
+    for tabname in ('chat_namespaces', 'ml_sites', 'repo_namespaces'):
+        table = [[argument]]
+        for colname in report[tabname]:
+            table[0].append(colname)
+            for rowname in report[tabname][colname]:
+                row_updated = False
+                for row in table[1:]:
+                    if row[0] == rowname:
+                        row.append(report[tabname][colname][rowname])
+                        row_updated = True
+                        break
+                if not row_updated:
+                    table.append(
+                        [rowname, report[tabname][colname][rowname]])
+        csv.writer(open('reports/%s_%s.csv' % (
+            argument, tabname), 'w')).writerows(table)
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,3 @@
+[build-system]
+requires = ["pbr>=5.8.0", "setuptools>=36.6.0", "wheel"]
+build-backend = "pbr.build"
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,2 @@
+pyyaml
+requests
--- a/setup.cfg
+++ b/setup.cfg
@ -0,0 +1,44 @@
+[metadata]
+name = opendev-engagement
+summary = Engagement statistics for OpenDev services
+long_description = file: README.rst
+long_description_content_type = text/x-rst; charset=UTF-8
+author = OpenDev Contributors
+author_email = service-discuss@lists.opendev.org
+url = https://docs.opendev.org/opendev/engagement/
+project_urls =
+    Browse Source = https://opendev.org/opendev/engagement
+    Bug Reporting = https://storyboard.openstack.org/#!/project/opendev/engagement
+    Documentation = https://docs.opendev.org/opendev/engagement/
+    Git Clone URL = https://opendev.org/opendev/engagement
+    License Texts = https://opendev.org/opendev/engagement/src/branch/master/LICENSE
+keywords = contributor statistics
+license = Apache License, Version 2.0
+platforms = POSIX/Unix
+classifier =
+    Development Status :: 5 - Production/Stable
+    Environment :: Console
+    Intended Audience :: Developers
+    Intended Audience :: Information Technology
+    License :: OSI Approved :: Apache Software License
+    Operating System :: POSIX :: Linux
+    Programming Language :: Python :: 3
+    Programming Language :: Python :: 3.6
+    Programming Language :: Python :: 3.7
+    Programming Language :: Python :: 3.8
+    Programming Language :: Python :: 3.9
+    Programming Language :: Python :: 3.10
+    Programming Language :: Python :: Implementation :: CPython
+    Topic :: Software Development :: Testing
+    Topic :: Utilities
+
+[options]
+python_requires = >=3.6
+
+[files]
+packages =
+    engagement
+
+[entry_points]
+console_scripts =
+    engagement-stats = engagement.stats:main
--- a/setup.py
+++ b/setup.py
@ -0,0 +1,19 @@
+#!/usr/bin/env python
+# Copyright OpenDev Contributors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import setuptools
+
+setuptools.setup(pbr=True)
--- a/test-requirements.txt
+++ b/test-requirements.txt
@ -0,0 +1 @@
+flake8
--- a/tox.ini
+++ b/tox.ini
@ -0,0 +1,31 @@
+[tox]
+minversion = 3.1
+envlist = linters, py3
+skipdist = True
+ignore_basepython_conflict = True
+
+[testenv]
+basepython = python3
+usedevelop = True
+deps = -r{toxinidir}/test-requirements.txt
+# TODO(fungi): work out a representative replay call with a suitable but
+# small test payload and target results comparison
+commands =
+  python setup.py test --slowest --testr-args='{posargs}'
+
+[testenv:linters]
+commands = flake8
+
+[testenv:docs]
+whitelist_externals = rm
+deps = -r{toxinidir}/doc/requirements.txt
+commands =
+    rm -fr doc/build
+    sphinx-build -W -b html doc/source doc/build/html
+
+[testenv:stats]
+commands = engagement-stats {posargs}
+
+[flake8]
+show-source = True
+exclude=.venv,.git,.tox,dist,doc,*lib/python*,*egg,build