Modernize ATCs script to Gerrit 2.8, programs.yaml

* tools/atc/README: Simplify instructions to use mysqlclient
tab-separated-values stdout, and remove obsolete manual steps which
have been automated.

* tools/atc/email-stats.sh: The functionality this shell script
previously provided has now been translated to Python and folded
into the inner script.

* tools/atc/email_stats.py: Implement the old email-stats.sh outer
loop, make it possible to run in a virtualenv, and make previously
hard-coded constants into command-line parameters with sane and
dynamically-determined defaults. Update field order and contents to
cope with differences in Gerrit 2.8, use a proper YAML parser on the
programs.yaml which has also changed format since the previous run,
and adjust for the improved extra-atcs file format as well.

Change-Id: I2deef2e6766ff998190e66737e7cec9cdcd5459c
This commit is contained in:
Jeremy Stanley 2014-06-26 02:35:29 +00:00
parent 69e247d873
commit 19c0ee2d3c
3 changed files with 109 additions and 85 deletions

View File

@ -1,36 +1,24 @@
These are the scripts used to create the ATC lists for use in PTL These are the scripts used to create the ATC lists for use in PTL
elections and Summit invitations. elections and Summit invitations.
0) Write a patch to email_stats.py so steps 1-2 are not necessary. 1) Run the following queries on review.openstack.org:
1) Edit email_stats.py to set your gerrit username.
2) Edit email_stats.py to set the start_date and end_date.
3) Run the following queries on review.openstack.org:
SELECT * FROM accounts sudo -H mysql -e 'SELECT * FROM accounts;' reviewdb > accounts.tab
INTO OUTFILE '/tmp/accounts.csv' sudo -H mysql -e 'SELECT * FROM account_external_ids;' reviewdb > emails.tab
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\n';
SELECT * FROM account_external_ids 2) Copy those files to this directory.
INTO OUTFILE '/tmp/emails.csv' 3) Run:
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\n';
4) Copy those files to this directory.
5) Run:
mkdir out mkdir out
./email-stats.sh virtualenv venv
. venv/bin/activate
pip install paramiko requests pyyaml
./email_stats.py --begin <BEGINDATE>
# optionally specify --end, --keyfile and --user
DATE=`date --iso` DATE=`date --iso`
mkdir $DATE mv out $DATE
for f in out/*.csv ; do
fromdos $f
iconv -f ISO-8859-1 -t UTF-8 -o $DATE/`basename $f` $f
done
cat $DATE/*.csv | sort | uniq > $DATE/all.csv cat $DATE/*.csv | sort | uniq > $DATE/all.csv
6) You can use diff.py to get the new ATCs since the previous run: 4) You can use diff.py to get the new ATCs since the previous run:
./diff.py $OLD-DATE/all.csv $DATE/all.csv $DATE/new.csv ./diff.py $OLD-DATE/all.csv $DATE/all.csv $DATE/new.csv

View File

@ -1,17 +0,0 @@
#!/bin/sh
# Projects counting for code contribution
for project in $(
wget -qO- \
https://git.openstack.org/cgit/openstack/governance/plain/reference/programs.yaml \
| grep '^ *- [A-Za-z_-]\+/[A-Za-z_-]\+$' \
| sed 's/^ *- //'
) ; do
python email_stats.py -p $project -o out/$( basename $project ).csv
done
# Confirmed list of non-code contributors
wget -qO- \
https://git.openstack.org/cgit/openstack/governance/plain/reference/extra-atcs \
| sed -e 's/#.*//' -e 's/^\s*//' -e 's/\s*$//' -e '/^$/d' \
-e 's/[^:]*: \(.*\) (\(.*\)) .*/,\1,\2/' > out/non-code-contributors.csv

141
tools/atc/email_stats.py Normal file → Executable file
View File

@ -1,6 +1,6 @@
#!/usr/bin/python #!/usr/bin/env python
# Copyright (C) 2013 OpenStack Foundation # Copyright (C) 2013-2014 OpenStack Foundation
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
@ -18,16 +18,27 @@
# #
# Soren Hansen wrote the original version of this script. # Soren Hansen wrote the original version of this script.
# James Blair hacked it up to include email addresses from gerrit. # James Blair hacked it up to include email addresses from gerrit.
# Jeremy Stanley overhauled it for gerrit 2.8 and our governance repo.
import csv
import datetime import datetime
import json import json
import optparse import optparse
import paramiko import os
import csv import os.path
import re import re
import paramiko
import requests
import yaml
MAILTO_RE = re.compile('mailto:(.*)') MAILTO_RE = re.compile('mailto:(.*)')
USERNAME_RE = re.compile('username:(.*)') USERNAME_RE = re.compile('username:(.*)')
EXTRA_ATC_RE = re.compile('[^:]*: ([^\(]*) \(([^@]*@[^\)]*)\) \[[^\[]*\]')
PROGRAMS_URL = ('https://git.openstack.org/cgit/openstack/governance/plain'
'/reference/programs.yaml')
EXTRA_ATCS_URL = ('https://git.openstack.org/cgit/openstack/governance/plain'
'/reference/extra-atcs')
class Account(object): class Account(object):
@ -46,35 +57,38 @@ def get_account(accounts, num):
return a return a
def main(): def project_stats(project, output, begin, end, keyfile, user):
accounts = {} accounts = {}
for row in csv.reader(open('accounts.csv')): for row in open('accounts.tab'):
num = int(row[-1]) if not row.startswith('registered_on'):
name = row[1] row = row.split('\t')
email = row[2] num = int(row[13])
a = get_account(accounts, num) name = row[1]
a.full_name = name email = row[2]
if email and email != '\\N': a = get_account(accounts, num)
a.emails.append(email) a.full_name = name
if email and email != 'NULL':
a.emails.append(email)
for row in csv.reader(open('emails.csv')): for row in open('emails.tab'):
num, email, pw, external = row if not row.startswith('account_id'):
num = int(num) num, email, pw, external = row.split('\t')
a = get_account(accounts, num) num = int(num)
if email and email != '\\N' and email not in a.emails: a = get_account(accounts, num)
a.emails.append(email) if email and email != 'NULL' and email not in a.emails:
m = MAILTO_RE.match(external) a.emails.append(email)
if m: m = MAILTO_RE.match(external)
if m.group(1) not in a.emails: if m:
a.emails.append(m.group(1)) if m.group(1) not in a.emails:
m = USERNAME_RE.match(external) a.emails.append(m.group(1))
if m: m = USERNAME_RE.match(external)
if a.username: if m:
print a.num if a.username:
print a.username print a.num
raise Exception("Already a username") print a.username
a.username = m.group(1) raise Exception("Already a username")
a.username = m.group(1)
username_accounts = {} username_accounts = {}
for a in accounts.values(): for a in accounts.values():
@ -82,29 +96,23 @@ def main():
atcs = [] atcs = []
optparser = optparse.OptionParser() QUERY = "project:%s status:merged" % project
optparser.add_option(
'-p', '--project', default='nova',
help='Project to generate stats for')
optparser.add_option(
'-o', '--output', default='out.csv', help='Output file')
options, args = optparser.parse_args()
QUERY = "project:%s status:merged" % options.project
client = paramiko.SSHClient() client = paramiko.SSHClient()
client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
client.load_system_host_keys() client.load_system_host_keys()
client.connect( client.connect(
'review.openstack.org', port=29418, 'review.openstack.org', port=29418,
key_filename='/home/corvus/.ssh/id_rsa', username='CHANGME') key_filename=os.path.expanduser(keyfile), username=user)
stdin, stdout, stderr = client.exec_command( stdin, stdout, stderr = client.exec_command(
'gerrit query %s --all-approvals --format JSON' % QUERY) 'gerrit query %s --all-approvals --format JSON' % QUERY)
done = False done = False
last_sortkey = '' last_sortkey = ''
start_date = datetime.datetime(2012, 9, 27, 0, 0, 0) start_date = datetime.datetime(int(begin[0:4]), int(begin[4:6]),
end_date = datetime.datetime(2013, 7, 30, 0, 0, 0) int(begin[6:8]), 0, 0, 0)
end_date = datetime.datetime(int(end[0:4]), int(end[4:6]), int(end[6:8]),
0, 0, 0)
count = 0 count = 0
earliest = datetime.datetime.now() earliest = datetime.datetime.now()
@ -142,13 +150,58 @@ def main():
'gerrit query %s resume_sortkey:%s --all-approvals' 'gerrit query %s resume_sortkey:%s --all-approvals'
' --format JSON' % (QUERY, last_sortkey)) ' --format JSON' % (QUERY, last_sortkey))
print 'project: %s' % options.project print 'project: %s' % project
print 'examined %s changes' % count print 'examined %s changes' % count
print 'earliest timestamp: %s' % earliest print 'earliest timestamp: %s' % earliest
writer = csv.writer(open(options.output, 'w')) writer = csv.writer(open(output, 'w'))
for a in atcs: for a in atcs:
writer.writerow([a.username, a.full_name] + a.emails) writer.writerow([a.username, a.full_name] + a.emails)
print print
def get_projects(url):
programs_yaml = yaml.load(requests.get(url).text)
projects = []
for program in programs_yaml:
for project in programs_yaml[program]['projects']:
projects.append(project['repo'])
return projects
def get_extra_atcs(url):
extra_atcs = []
for line in requests.get(url).text.split('\n'):
if line and not line.startswith('#'):
extra_atcs.append(line)
return extra_atcs
def main():
today = ''.join(
'%02d' % x for x in datetime.datetime.utcnow().utctimetuple()[:3])
optparser = optparse.OptionParser()
optparser.add_option(
'-b', '--begin', help='begin date (e.g. 20131017)')
optparser.add_option(
'-e', '--end', default=today, help='end date (default is today)')
optparser.add_option(
'-k', '--keyfile', default='~/.ssh/id_rsa',
help='SSH key (default is ~/.ssh/id_rsa)')
optparser.add_option(
'-u', '--user', default=os.environ['USER'],
help='SSH username (default is $USER)')
options, args = optparser.parse_args()
for project in get_projects(PROGRAMS_URL):
output = 'out/%s.csv' % project.split('/')[-1]
project_stats(project, output, options.begin, options.end,
options.keyfile, options.user)
writer = csv.writer(open('out/extra-atcs.csv', 'w'))
for atc in get_extra_atcs(EXTRA_ATCS_URL):
writer.writerow([''] + list(EXTRA_ATC_RE.match(atc).groups()))
if __name__ == "__main__": if __name__ == "__main__":
main() main()