Merge "Drop tools/owners.py"

2019-05-23 07:57:48 +00:00 · 2019-05-23 07:57:48 +00:00 · 01dfccbd99
parent 62c06cc3e9 4cb523cdc9
commit 01dfccbd99
1 changed files with 0 additions and 719 deletions
--- a/tools/owners.py
+++ b/tools/owners.py
@ -1,719 +0,0 @@
-#!/usr/bin/env python
-
-# Copyright (c) 2016 OpenStack Foundation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an "AS
-# IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
-# express or implied. See the License for the specific language
-# governing permissions and limitations under the License.
-
-# Description: When run using OpenStack's Gerrit server, this builds
-# YAML representations of aggregate change owner details and change
-# counts for each governance project-team, as well as a combined set
-# for all teams.
-
-# Rationale: The OpenStack Technical Committee and Project Team Lead
-# elections need electorate rolls taken from "Active Technical
-# Contributors" to any repos under official project-teams over a
-# particular timeframe. Similarly, the OpenStack Foundation gives
-# summit registration discount codes to contributors meeting similar
-# criteria. The Gerrit REST API provides access to all the data
-# necessary to identify these individuals.
-
-# Use: The results end up in files named for each
-# official governance project-team (or "all") ending with a .yaml
-# extension. At the time of writing, it takes approximately 30
-# minutes to run on a well-connected machine with 70-80ms round-trip
-# latency to review.opendev.org.
-
-# An example for generating the March 2016 technical election rolls:
-#
-#     $ virtualenv venv
-#     [...]
-#     $ ./venv/bin/pip install pyyaml requests
-#     [...]
-#     $ ./venv/bin/python tools/owners.py -a 2015-03-04 \
-#           -b 2016-03-04 -i 11131 -i 22816 -o owners \
-#           -r march-2016-elections
-#     MISSING: ansible-build-image
-#     MERGING DUPLICATE ACCOUNT: 8074 into 2467
-#     [...blah, blah, blah...wait for completion...]
-#
-# TODO(fungi): Add a pass which will correctly generate the
-# stable_branch_maintenance.* files. In the meantime, to properly
-# generate the SBM PTL electorate, run a second time with a
-# different -o of sbm, adding the -n and -s options, and then copy
-# the full electorate over like:
-#
-#     $ ./venv/bin/python tools/owners.py -a 2015-03-04 \
-#           -b 2016-03-04 -i 11131 -i 22816 -o sbm \
-#           -r march-2016-elections -n -s 'branch:^stable/.*'
-#     [...wait for completion again...]
-#     $ cp sbm/_electorate.txt owners/stable_branch_maintenance.txt
-#     $ cp sbm/_all_owners.yaml owners/stable_branch_maintenance.yaml
-#
-# Once complete, make a compressed tarball of the owners directory
-# and send it attached to a PGP/MIME signed message to the appointed
-# election officials. The various *.txt files are lists of the
-# preferred addresses of all valid voters for the various PTL
-# elections (whose team names correspond to the file names),
-# suitable for passing directly to CIVS. The similarly named *.yaml
-# files are detailed structured data about the same sets of voters,
-# for use in validating the address lists. The _electorate.txt file
-# is the equivalent address list for the TC election voters, and its
-# corresponding structured data is in _all_owners.yaml.
-
-# You can also do interesting analysis on _all_owners.yaml, for
-# example:
-#
-#     $ ./venv/bin/python
-#     >>> import yaml
-#     >>>
-#     >>> o = yaml.load(open('owners/_all_owners.yaml'))
-#     >>> for c in range(5):
-#     ...     print('Owners of at least %s changes: %s' % (
-#     ...         c+1,
-#     ...         len({k: v for k, v in o.iteritems() if v['count'] > c})))
-#     ...
-#     Owners of at least 1 changes: 3239
-#     Owners of at least 2 changes: 2352
-#     Owners of at least 3 changes: 1924
-#     Owners of at least 4 changes: 1682
-#     Owners of at least 5 changes: 1504
-
-
-from __future__ import print_function
-import argparse
-import csv
-import datetime
-import json
-import os
-import sys
-
-import requests
-import yaml
-
-try:
-    from string import maketrans
-except ImportError:  # Python3
-    maketrans = bytes.maketrans
-
-
-def dumper(data, stream):
-    """Convenience wrapper to consistently set YAML formatting"""
-    return yaml.safe_dump(data, allow_unicode=True, default_flow_style=False,
-                          encoding='utf-8', stream=stream)
-
-
-def normalize_email(email):
-    """Lower-case the domain part of E-mail addresses to better spot
-    duplicate entries, since the domain part is case-insensitive
-    courtesy of DNS while the local part is not necessarily"""
-    local, domain = email.split('@')
-    domain = domain.lower()
-    return '%s@%s' % (local, domain)
-
-
-def normalize_project(project):
-    """Replace spaces and hyphens with underscores in project teams
-    and then lower-case them, for more convenient filenames"""
-    return project.translate(maketrans(' -', '__')).lower()
-
-
-def date_merged(change, after=None, before=None):
-    """Determine the date and time a specific change merged"""
-
-    date = change.get('submitted', None)
-
-    if not date:
-        # Something's terribly wrong with any changes matching this now
-        print(
-            'SKIPPING DATELESS MERGE: change %s for account %s'
-            % (change['_number'], change['owner']['_account_id']),
-            file=sys.stderr)
-        return None
-
-    # Strip superfluous subsecond values as Gerrit always just
-    # reports .000000000 for them anyway
-    date = date.split('.')[0]
-
-    # Pass back an invalid result if it falls after the requested
-    # cutoff
-    if before and date >= before:
-        return None
-
-    # Sanity check for completeness, but since "after" is also used
-    # in the Gerrit query this shouldn't ever actually be reached
-    if after and date < after:
-        return None
-
-    return date
-
-
-def requester(url, params={}, headers={}):
-    """A requests wrapper to consistently retry HTTPS queries"""
-
-    # Try up to 3 times
-    retry = requests.Session()
-    retry.mount("https://", requests.adapters.HTTPAdapter(max_retries=3))
-    return retry.get(url=url, params=params, headers=headers)
-
-
-def decode_json(raw):
-    """Trap JSON decoding failures and provide more detailed errors"""
-
-    # Gerrit's REST API prepends a JSON-breaker to avoid XSS vulnerabilities
-    if raw.text.startswith(")]}'"):
-        trimmed = raw.text[4:]
-    else:
-        trimmed = raw.text
-
-    # Try to decode and bail with much detail if it fails
-    try:
-        decoded = json.loads(trimmed)
-    except:
-        print('\nrequest returned %s error to query:\n\n    %s\n'
-              '\nwith detail:\n\n    %s\n' % (raw, raw.url, trimmed),
-              file=sys.stderr)
-        raise
-    return decoded
-
-
-def query_gerrit(method, params={}):
-    """Query the Gerrit REST API"""
-
-    # The base URL to Gerrit REST API
-    GERRIT_API_URL = 'https://review.opendev.org/'
-
-    raw = requester(GERRIT_API_URL + method, params=params,
-                    headers={'Accept': 'application/json'})
-    return decode_json(raw)
-
-
-def get_from_cgit(project, obj, params={}):
-    """Retrieve a file from the cgit interface"""
-
-    url = 'http://git.openstack.org/cgit/' + project + '/plain/' + obj
-    raw = requester(url, params=params,
-                    headers={'Accept': 'application/json'})
-    return yaml.safe_load(raw.text)
-
-
-def lookup_member(email):
-    """A requests wrapper to querying the OSF member directory API"""
-
-    # The OpenStack foundation member directory lookup API endpoint
-    MEMBER_LOOKUP_URL = 'https://openstackid-resources.openstack.org/'
-
-    # URL pattern for querying foundation members by E-mail address
-    raw = requester(
-            MEMBER_LOOKUP_URL + '/api/public/v1/members',
-            params={'filter[]': [
-                'group_slug==foundation-members',
-                'email==' + email,
-                ]},
-            headers={'Accept': 'application/json'},
-            )
-
-    return decode_json(raw)
-
-
-def usage(argv):
-    """Parse command line argument"""
-    parser = argparse.ArgumentParser(
-        description="When run using OpenDev's Gerrit server, this builds "
-        "YAML representations of aggregate change owner details and change "
-        "counts for each governance project-team, as well as a combined set "
-        "for all teams. Before and after dates/times should be supplied in "
-        "formats Gerrit accepts: https://review.opendev.org/Documentation/"
-        "user-search.html#search-operators")
-    parser.add_argument("-a", "--after", help="Start date for matching merges")
-    parser.add_argument("-b", "--before", help="End date for matching merges")
-    parser.add_argument("-c", "--config", help="Path to script configuration")
-    parser.add_argument("-i", "--ignore", help="Account Id numbers to skip",
-                        action='append')
-    parser.add_argument("-n", "--no-extra-atcs", help='Omit "extra ATCs"',
-                        dest='no_extra_atcs', action='store_true')
-    parser.add_argument("-o", "--outdir", help="Create an output directory")
-    parser.add_argument("-r", "--ref", help="Specify a Governance refname")
-    parser.add_argument("-s", "--sieve", help="Add Gerrit query parameters")
-    return parser.parse_args(argv[1:])
-
-
-def main(argv=sys.argv):
-    """The giant pile of spaghetti which does everything else"""
-
-    # Record the start time for use later
-    start = datetime.datetime.utcnow()
-
-    options = usage(argv)
-
-    # If we're supplied a configuration file, use it
-    if options.config:
-        config = yaml.safe_load(open(options.config))
-    # Otherwise, use nothing
-    else:
-        config = {}
-
-    # Start of the match timeframe for change merges
-    if options.after:
-        after = options.after
-    elif 'after' in config:
-        after = config['after']
-    else:
-        after = None
-
-    # End of the match timeframe for change merges
-    if options.before:
-        before = options.before
-    elif 'before' in config:
-        before = config['before']
-    else:
-        before = None
-
-    # Owner Ids for whom to ignore changes
-    if options.ignore:
-        ignore = [int(i) for i in options.ignore]
-    elif 'ignore' in config:
-        ignore = config['ignore']
-    else:
-        ignore = []
-
-    # Whether to omit "extra ATCs"
-    if options.no_extra_atcs:
-        no_extra_atcs = options.no_extra_atcs
-    elif 'no-extra-atcs' in config:
-        no_extra_atcs = config['no-extra-atcs']
-    else:
-        no_extra_atcs = False
-
-    # Output file directory
-    if options.outdir:
-        outdir = options.outdir
-    elif 'outdir' in config:
-        outdir = config['outdir']
-    else:
-        outdir = '.'
-    if not os.path.isdir(outdir):
-        os.makedirs(outdir)
-
-    # Governance Git repository ref object for reference lists
-    if options.ref:
-        ref = options.ref
-    elif 'ref' in config:
-        ref = config['ref']
-    else:
-        ref = 'refs/heads/master'
-
-    # Gerrit change query additions
-    if options.sieve:
-        sieve = options.sieve
-    elif 'sieve' in config:
-        sieve = config['sieve']
-    else:
-        sieve = None
-
-    # The query identifying relevant changes
-    match = 'status:merged'
-    if after:
-        match = '%s after:"%s"' % (match, after)
-    if sieve:
-        match = '%s %s' % (match, sieve)
-
-    # Retrieve the governance projects list, needs a Git refname as a
-    # parameter
-    # TODO(fungi): make this a configurable option so that you can
-    # for example supply a custom project list for running elections
-    # in unofficial teams
-    gov_projects = get_from_cgit('openstack/governance',
-                                 'reference/projects.yaml',
-                                 {'h': ref})
-
-    # The set of retired or removed "legacy" projects from governance
-    # are merged into the main dict if their retired-on date falls
-    # later than the after parameter for the qualifying time period
-    # TODO(fungi): make this a configurable option
-    old_projects = get_from_cgit('openstack/governance',
-                                 'reference/legacy.yaml',
-                                 {'h': ref})
-    for project in old_projects:
-        for deliverable in old_projects[project]['deliverables']:
-            if 'retired-on' in old_projects[project]['deliverables'][deliverable]:
-                retired = old_projects[project]['deliverables'][deliverable]['retired-on']
-            elif 'retired-on' in old_projects[project]:
-                retired = old_projects[project]['retired-on']
-            else:
-                retired = None
-            if retired:
-                retired = retired.isoformat()
-                if after and after > retired:
-                    continue
-                if project not in gov_projects:
-                    gov_projects[project] = {'deliverables': {}}
-                if deliverable in gov_projects[project]['deliverables']:
-                    print('Skipping duplicate/partially retired deliverable: %s' % deliverable, file=sys.stderr)
-                    continue
-                gov_projects[project]['deliverables'][deliverable] = old_projects[project]['deliverables'][deliverable]
-
-    # A mapping of short (no prefix) to full repo names existing in
-    # Gerrit, used to handle repos which have a different namespace
-    # in governance during transitions and also to filter out repos
-    # listed in governance which don't actually exist
-    ger_repos = dict(
-        [(x.split('/')[-1], x) for x in query_gerrit('projects/')])
-
-    # This will be populated with change owners mapped to the
-    # project-teams maintaining their respective Git repositories
-    projects = {}
-
-    # This will be populated with all change owners and their
-    # account details
-    owners = {}
-
-    # This will be populated with discovered duplicate owners
-    duplicates = {}
-
-    # This will be populated with all individual E-mail addresses of
-    # change owners, to facilitate finding and merging duplicate
-    # accounts
-    all_emails = {}
-
-    # Iterate over all governance project-teams only at filename
-    # generation time
-    for project in gov_projects:
-        # This will be populated with change owner Ids and counts
-        projects[project] = {}
-
-        # Governance project-teams have one or more deliverables
-        for deliverable in gov_projects[project]['deliverables']:
-            # Each deliverable can have multiple repos
-            repos = gov_projects[project]['deliverables'][deliverable]['repos']
-
-            # Operate on repo short-names (no namespace) to avoid
-            # potential namespace mismatches between governance
-            # and Gerrit
-            for repo in [r.split('/')[-1] for r in repos]:
-                # Only process repos which actually exist in Gerrit,
-                # otherwise spew a warning if skipping
-                if repo not in ger_repos:
-                    print('MISSING: %s' % repo, file=sys.stderr)
-                else:
-                    # Query for an arbitrary change set and get
-                    # detailed account information about the most
-                    # recent patchset, paginating at 100 changes
-                    offset = 0
-                    changes = []
-                    while offset >= 0:
-                        changes += query_gerrit('changes/', params={
-                            'q': 'project:%s %s' % (ger_repos[repo], match),
-                            'n': '100',
-                            'start': offset,
-                            'o': [
-                                'CURRENT_COMMIT',
-                                'CURRENT_REVISION',
-                                'DETAILED_ACCOUNTS',
-                                ],
-                            })
-                        if changes and changes[-1].get('_more_changes', False):
-                            offset += 100
-                        else:
-                            offset = -1
-
-                    # Iterate over each matched change in the repo
-                    for change in changes:
-                        # Get the merge date and skip if it's
-                        # outside any requested date range
-                        merged = date_merged(change, after, before)
-                        if not merged:
-                            continue
-
-                        # We index owners by their unique Gerrit
-                        # account Id numbers
-                        owner = change['owner']['_account_id']
-
-                        # If this owner is in the blacklist of Ids
-                        # to skip, then move on to the next change
-                        if owner in ignore:
-                            continue
-
-                        # Seen this owner already?
-                        new_owner = owner
-                        new = False
-                        if owner in duplicates:
-                            owner = duplicates[owner]
-                        elif owner not in owners:
-                            new = True
-
-                        # For new additions, initialize this as
-                        # their first and record specific account
-                        # details
-                        if new:
-                            # Get the set of all E-mail addresses
-                            # Gerrit knows for this owner's account
-                            emails = query_gerrit(
-                                'accounts/%s/emails'
-                                % change['owner']['_account_id'])
-
-                            # Find duplicate addresses and merge
-                            # accounts when that happens
-                            for email in emails:
-                                address = normalize_email(email['email'])
-                                if address in all_emails:
-                                    owner = all_emails[address]
-                                    duplicates[new_owner] = owner
-                                    print(
-                                        'MERGING DUPLICATE ACCOUNT: %s into %s'
-                                        % (new_owner, owner), file=sys.stderr)
-                                    break
-
-                        # For newly found non-duplicate owners,
-                        # initialize the global change count,
-                        # newest/oldest merged dates, and an empty
-                        # list where extra E-mail addresses can be
-                        # added; also track their full name and
-                        # Gerrit username
-                        if new and owner == new_owner:
-                            # TODO(fungi): this is a prime candidate
-                            # to become a struct, or maybe a class
-                            owners[owner] = {
-                                'count': 1,
-                                'extra': [],
-                                'name': change['owner'].get('name'),
-                                'newest': merged,
-                                'oldest': merged,
-                                'username': change['owner'].get('username'),
-                            }
-
-                        # If we've seen this owner on another change
-                        # in any repo then just iterate their global
-                        # change counter and update newest/oldest
-                        # dates
-                        else:
-                            owners[owner]['count'] += 1
-                            if merged > owners[owner]['newest']:
-                                owners[owner]['newest'] = merged
-                            elif merged < owners[owner]['oldest']:
-                                owners[owner]['oldest'] = merged
-
-                        # We only want to add addresses if this is a
-                        # new owner or a new duplicate
-                        if new:
-                            # Iterate over each E-mail address
-                            for email in emails:
-                                # Normalize the address before
-                                # performing any matching since
-                                # Gerrit doesn't do a great job of
-                                # this on its own
-                                address = normalize_email(email['email'])
-
-                                # Track this in the full list of all
-                                # known E-mail addresses
-                                all_emails[address] = owner
-
-                                # Whether Gerrit considers this the
-                                # preferred E-mail address
-                                preferred = email.get('preferred', False)
-
-                                # Store the preferred E-mail address
-                                # under its own key since it has a
-                                # special status, but only if this
-                                # is not a duplicate account
-                                if preferred and owner == new_owner:
-                                    owners[owner]['preferred'] = address
-
-                                    # If this was already added to
-                                    # the extras list due to an
-                                    # additional pre-normalized
-                                    # copy, remove it there
-                                    if address in owners[owner]['extra']:
-                                        owners[owner]['extra'].remove(address)
-
-                                # Store a list of non-preferred
-                                # addresses, deduplicating them in
-                                # case they match post-normalization
-                                # and treating duplicate preferred
-                                # addresses as # non-preferred
-                                else:
-                                    if ((address not in owners[owner]['extra'])
-                                            and (address != owners[owner].get(
-                                                'preferred', ''))):
-                                        owners[owner]['extra'].append(address)
-
-                        # If we've seen this owner on another change
-                        # in a repo under this project-team then
-                        # just iterate their team change counter and
-                        # update newest/oldest dates
-                        if owner in projects[project]:
-                            projects[project][owner]['count'] += 1
-                            if merged > projects[project][owner]['newest']:
-                                projects[project][owner]['newest'] = merged
-                            elif merged < projects[project][owner]['oldest']:
-                                projects[project][owner]['oldest'] = merged
-
-                        # ...otherwise initialize this as their
-                        # first
-                        else:
-                            # TODO(fungi): another potential struct
-                            projects[project][owner] = {
-                                'count': 1,
-                                'newest': merged,
-                                'oldest': merged,
-                            }
-
-    # The negative counter will be used as a makeshift account Id
-    # for non-code contributors; those with owned changes use their
-    # Gerrit account Id instead
-    counter = 1
-
-    # Use the before time as the only contribution time for non-code
-    # contributors, falling back on the script start time if before
-    # was not specified
-    if before:
-        if len(before) == 10:
-            stamp = before + ' 00:00:00'
-        else:
-            stamp = before
-    else:
-        stamp = start.isoformat(sep=' ').split('.')[0]
-
-    # Iterate over all extra-atcs entries
-    if not no_extra_atcs:
-        for project in gov_projects:
-            for extra_atc in gov_projects[project].get('extra-atcs', []):
-                name = extra_atc['name']
-                email = extra_atc['email']
-                address = normalize_email(email)
-                if address in all_emails:
-                    owner = all_emails[address]
-                else:
-                    owner = -counter
-                    all_emails[address] = owner
-                    owners[owner] = {
-                        'count': -1,
-                        'extra': [],
-                        'name': name,
-                        'newest': stamp,
-                        'oldest': stamp,
-                        'preferred': address,
-                        'username': '_non_code_contributor',
-                    }
-                if owner not in projects[project]:
-                    projects[project][owner] = {
-                        'count': -1,
-                        'newest': stamp,
-                        'oldest': stamp,
-                    }
-                counter += 1
-
-    # This will hold an address list for TC electorate rolls
-    electorate = []
-
-    # A table of owners for summit invites
-    invites = []
-
-    # A fresh pass through the owners to build some other datasets
-    for owner in owners:
-        # Sort extra E-mail address lists for ease of comparison
-        owners[owner]['extra'].sort()
-
-        # Build the data used for an invite
-        if 'name' not in owners[owner] or not owners[owner]['name']:
-            print(
-                'SKIPPING MALFORMED OWNER: no fullname found for account %s' %
-                owner, file=sys.stderr)
-            continue
-        if 'preferred' not in owners[owner]:
-            if 'extra' in owners[owner] and owners[owner]['extra']:
-                owners[owner]['preferred'] = owners[owner]['extra'][0]
-                owners[owner]['extra'] = owners[owner]['extra'][1:]
-                print(
-                    'MISSING PREFERRED EMAIL: used first extra address as '
-                    'account %s preferred' % owner, file=sys.stderr)
-            else:
-                print(
-                    'SKIPPING MALFORMED OWNER: no preferred or extra '
-                    'addresses found for account %s' % owner, file=sys.stderr)
-                continue
-        for email in [owners[owner]['preferred']] + owners[owner]['extra']:
-            member = lookup_member(email)
-            if member['data']:
-                owners[owner]['member'] = member['data'][0]['id']
-                continue
-        invite = [owners[owner].get('member','0')]
-        invite.append(owners[owner]['name'].encode('utf-8'))
-        invite.append(owners[owner]['preferred'])
-        invite += owners[owner]['extra']
-        invites.append(invite)
-
-        # Append preferred addresses to the TC electorate for members only
-        if 'member' in owners[owner]:
-            electorate.append(owners[owner]['preferred'] + '\n')
-
-    # Write out a YAML file covering all change owners
-    fd = open(os.path.join(outdir, '_all_owners.yaml'), 'w')
-    dumper(owners, stream=fd)
-    fd.close()
-
-    # Write out a YAML file covering tracked duplicate accounts
-    fd = open(os.path.join(outdir, '_duplicate_owners.yaml'), 'w')
-    dumper(duplicates, stream=fd)
-    fd.close()
-
-    # Write out a team-specific electoral roll for CIVS
-    fd = open(os.path.join(outdir, '_electorate.txt'), 'w')
-    fd.writelines(electorate)
-    fd.close()
-
-    # Write out a CSV file appropriate for the invite2summit tool
-    fd = open(os.path.join(outdir, '_invites.csv'), 'w')
-    csv.writer(fd).writerows(invites)
-    fd.close()
-
-    # Make another pass through the projects so they can be dumped
-    # to our output files
-    for project in projects:
-
-        # This will hold team-specific info for writing
-        output = {}
-
-        # This will hold an address list for PTL electoral rolls
-        electorate = []
-
-        # Use a normalized project name for output file names
-        normalized_project = normalize_project(project)
-
-        # Iterate over each change owner for the current team
-        for owner in projects[project]:
-            # Copy the global owner details into our output since
-            # we're going to modify some
-            output[owner] = dict(owners[owner])
-
-            # Replace the owner change count and newest/oldest
-            # merged dates with the team-specific value rather than
-            # using the count from the global set
-            for field in ('count', 'newest', 'oldest'):
-                output[owner][field] = projects[project][owner][field]
-
-            # Append preferred member addresses to the PTL electoral rolls
-            if 'member' in owners[owner]:
-                electorate.append(owners[owner]['preferred'] + '\n')
-
-        # Write out a team-specific YAML file
-        fd = open(os.path.join(outdir, '%s.yaml' % normalized_project), 'w')
-        dumper(output, stream=fd)
-        fd.close()
-
-        # Write out a team-specific electoral roll for CIVS
-        fd = open(os.path.join(outdir, '%s.txt' % normalized_project), 'w')
-        fd.writelines(electorate)
-        fd.close()
-
-if __name__ == "__main__":
-    main()