Modernize ATCs script to Gerrit 2.8, programs.yaml
* tools/atc/README: Simplify instructions to use mysqlclient tab-separated-values stdout, and remove obsolete manual steps which have been automated. * tools/atc/email-stats.sh: The functionality this shell script previously provided has now been translated to Python and folded into the inner script. * tools/atc/email_stats.py: Implement the old email-stats.sh outer loop, make it possible to run in a virtualenv, and make previously hard-coded constants into command-line parameters with sane and dynamically-determined defaults. Update field order and contents to cope with differences in Gerrit 2.8, use a proper YAML parser on the programs.yaml which has also changed format since the previous run, and adjust for the improved extra-atcs file format as well. Change-Id: I2deef2e6766ff998190e66737e7cec9cdcd5459c
This commit is contained in:
parent
69e247d873
commit
19c0ee2d3c
@ -1,36 +1,24 @@
|
||||
These are the scripts used to create the ATC lists for use in PTL
|
||||
elections and Summit invitations.
|
||||
|
||||
0) Write a patch to email_stats.py so steps 1-2 are not necessary.
|
||||
1) Edit email_stats.py to set your gerrit username.
|
||||
2) Edit email_stats.py to set the start_date and end_date.
|
||||
3) Run the following queries on review.openstack.org:
|
||||
1) Run the following queries on review.openstack.org:
|
||||
|
||||
SELECT * FROM accounts
|
||||
INTO OUTFILE '/tmp/accounts.csv'
|
||||
FIELDS TERMINATED BY ','
|
||||
ENCLOSED BY '"'
|
||||
LINES TERMINATED BY '\n';
|
||||
sudo -H mysql -e 'SELECT * FROM accounts;' reviewdb > accounts.tab
|
||||
sudo -H mysql -e 'SELECT * FROM account_external_ids;' reviewdb > emails.tab
|
||||
|
||||
SELECT * FROM account_external_ids
|
||||
INTO OUTFILE '/tmp/emails.csv'
|
||||
FIELDS TERMINATED BY ','
|
||||
ENCLOSED BY '"'
|
||||
LINES TERMINATED BY '\n';
|
||||
|
||||
4) Copy those files to this directory.
|
||||
5) Run:
|
||||
2) Copy those files to this directory.
|
||||
3) Run:
|
||||
|
||||
mkdir out
|
||||
./email-stats.sh
|
||||
virtualenv venv
|
||||
. venv/bin/activate
|
||||
pip install paramiko requests pyyaml
|
||||
./email_stats.py --begin <BEGINDATE>
|
||||
# optionally specify --end, --keyfile and --user
|
||||
DATE=`date --iso`
|
||||
mkdir $DATE
|
||||
for f in out/*.csv ; do
|
||||
fromdos $f
|
||||
iconv -f ISO-8859-1 -t UTF-8 -o $DATE/`basename $f` $f
|
||||
done
|
||||
mv out $DATE
|
||||
cat $DATE/*.csv | sort | uniq > $DATE/all.csv
|
||||
|
||||
6) You can use diff.py to get the new ATCs since the previous run:
|
||||
4) You can use diff.py to get the new ATCs since the previous run:
|
||||
|
||||
./diff.py $OLD-DATE/all.csv $DATE/all.csv $DATE/new.csv
|
||||
|
@ -1,17 +0,0 @@
|
||||
#!/bin/sh
|
||||
|
||||
# Projects counting for code contribution
|
||||
for project in $(
|
||||
wget -qO- \
|
||||
https://git.openstack.org/cgit/openstack/governance/plain/reference/programs.yaml \
|
||||
| grep '^ *- [A-Za-z_-]\+/[A-Za-z_-]\+$' \
|
||||
| sed 's/^ *- //'
|
||||
) ; do
|
||||
python email_stats.py -p $project -o out/$( basename $project ).csv
|
||||
done
|
||||
|
||||
# Confirmed list of non-code contributors
|
||||
wget -qO- \
|
||||
https://git.openstack.org/cgit/openstack/governance/plain/reference/extra-atcs \
|
||||
| sed -e 's/#.*//' -e 's/^\s*//' -e 's/\s*$//' -e '/^$/d' \
|
||||
-e 's/[^:]*: \(.*\) (\(.*\)) .*/,\1,\2/' > out/non-code-contributors.csv
|
103
tools/atc/email_stats.py
Normal file → Executable file
103
tools/atc/email_stats.py
Normal file → Executable file
@ -1,6 +1,6 @@
|
||||
#!/usr/bin/python
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Copyright (C) 2013 OpenStack Foundation
|
||||
# Copyright (C) 2013-2014 OpenStack Foundation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@ -18,16 +18,27 @@
|
||||
#
|
||||
# Soren Hansen wrote the original version of this script.
|
||||
# James Blair hacked it up to include email addresses from gerrit.
|
||||
# Jeremy Stanley overhauled it for gerrit 2.8 and our governance repo.
|
||||
|
||||
import csv
|
||||
import datetime
|
||||
import json
|
||||
import optparse
|
||||
import paramiko
|
||||
import csv
|
||||
import os
|
||||
import os.path
|
||||
import re
|
||||
|
||||
import paramiko
|
||||
import requests
|
||||
import yaml
|
||||
|
||||
MAILTO_RE = re.compile('mailto:(.*)')
|
||||
USERNAME_RE = re.compile('username:(.*)')
|
||||
EXTRA_ATC_RE = re.compile('[^:]*: ([^\(]*) \(([^@]*@[^\)]*)\) \[[^\[]*\]')
|
||||
PROGRAMS_URL = ('https://git.openstack.org/cgit/openstack/governance/plain'
|
||||
'/reference/programs.yaml')
|
||||
EXTRA_ATCS_URL = ('https://git.openstack.org/cgit/openstack/governance/plain'
|
||||
'/reference/extra-atcs')
|
||||
|
||||
|
||||
class Account(object):
|
||||
@ -46,23 +57,26 @@ def get_account(accounts, num):
|
||||
return a
|
||||
|
||||
|
||||
def main():
|
||||
def project_stats(project, output, begin, end, keyfile, user):
|
||||
accounts = {}
|
||||
|
||||
for row in csv.reader(open('accounts.csv')):
|
||||
num = int(row[-1])
|
||||
for row in open('accounts.tab'):
|
||||
if not row.startswith('registered_on'):
|
||||
row = row.split('\t')
|
||||
num = int(row[13])
|
||||
name = row[1]
|
||||
email = row[2]
|
||||
a = get_account(accounts, num)
|
||||
a.full_name = name
|
||||
if email and email != '\\N':
|
||||
if email and email != 'NULL':
|
||||
a.emails.append(email)
|
||||
|
||||
for row in csv.reader(open('emails.csv')):
|
||||
num, email, pw, external = row
|
||||
for row in open('emails.tab'):
|
||||
if not row.startswith('account_id'):
|
||||
num, email, pw, external = row.split('\t')
|
||||
num = int(num)
|
||||
a = get_account(accounts, num)
|
||||
if email and email != '\\N' and email not in a.emails:
|
||||
if email and email != 'NULL' and email not in a.emails:
|
||||
a.emails.append(email)
|
||||
m = MAILTO_RE.match(external)
|
||||
if m:
|
||||
@ -82,29 +96,23 @@ def main():
|
||||
|
||||
atcs = []
|
||||
|
||||
optparser = optparse.OptionParser()
|
||||
optparser.add_option(
|
||||
'-p', '--project', default='nova',
|
||||
help='Project to generate stats for')
|
||||
optparser.add_option(
|
||||
'-o', '--output', default='out.csv', help='Output file')
|
||||
options, args = optparser.parse_args()
|
||||
|
||||
QUERY = "project:%s status:merged" % options.project
|
||||
QUERY = "project:%s status:merged" % project
|
||||
|
||||
client = paramiko.SSHClient()
|
||||
client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
||||
client.load_system_host_keys()
|
||||
client.connect(
|
||||
'review.openstack.org', port=29418,
|
||||
key_filename='/home/corvus/.ssh/id_rsa', username='CHANGME')
|
||||
key_filename=os.path.expanduser(keyfile), username=user)
|
||||
stdin, stdout, stderr = client.exec_command(
|
||||
'gerrit query %s --all-approvals --format JSON' % QUERY)
|
||||
|
||||
done = False
|
||||
last_sortkey = ''
|
||||
start_date = datetime.datetime(2012, 9, 27, 0, 0, 0)
|
||||
end_date = datetime.datetime(2013, 7, 30, 0, 0, 0)
|
||||
start_date = datetime.datetime(int(begin[0:4]), int(begin[4:6]),
|
||||
int(begin[6:8]), 0, 0, 0)
|
||||
end_date = datetime.datetime(int(end[0:4]), int(end[4:6]), int(end[6:8]),
|
||||
0, 0, 0)
|
||||
|
||||
count = 0
|
||||
earliest = datetime.datetime.now()
|
||||
@ -142,13 +150,58 @@ def main():
|
||||
'gerrit query %s resume_sortkey:%s --all-approvals'
|
||||
' --format JSON' % (QUERY, last_sortkey))
|
||||
|
||||
print 'project: %s' % options.project
|
||||
print 'project: %s' % project
|
||||
print 'examined %s changes' % count
|
||||
print 'earliest timestamp: %s' % earliest
|
||||
writer = csv.writer(open(options.output, 'w'))
|
||||
writer = csv.writer(open(output, 'w'))
|
||||
for a in atcs:
|
||||
writer.writerow([a.username, a.full_name] + a.emails)
|
||||
print
|
||||
|
||||
|
||||
def get_projects(url):
|
||||
programs_yaml = yaml.load(requests.get(url).text)
|
||||
projects = []
|
||||
for program in programs_yaml:
|
||||
for project in programs_yaml[program]['projects']:
|
||||
projects.append(project['repo'])
|
||||
return projects
|
||||
|
||||
|
||||
def get_extra_atcs(url):
|
||||
extra_atcs = []
|
||||
for line in requests.get(url).text.split('\n'):
|
||||
if line and not line.startswith('#'):
|
||||
extra_atcs.append(line)
|
||||
return extra_atcs
|
||||
|
||||
|
||||
def main():
|
||||
today = ''.join(
|
||||
'%02d' % x for x in datetime.datetime.utcnow().utctimetuple()[:3])
|
||||
|
||||
optparser = optparse.OptionParser()
|
||||
optparser.add_option(
|
||||
'-b', '--begin', help='begin date (e.g. 20131017)')
|
||||
optparser.add_option(
|
||||
'-e', '--end', default=today, help='end date (default is today)')
|
||||
optparser.add_option(
|
||||
'-k', '--keyfile', default='~/.ssh/id_rsa',
|
||||
help='SSH key (default is ~/.ssh/id_rsa)')
|
||||
optparser.add_option(
|
||||
'-u', '--user', default=os.environ['USER'],
|
||||
help='SSH username (default is $USER)')
|
||||
options, args = optparser.parse_args()
|
||||
|
||||
for project in get_projects(PROGRAMS_URL):
|
||||
output = 'out/%s.csv' % project.split('/')[-1]
|
||||
project_stats(project, output, options.begin, options.end,
|
||||
options.keyfile, options.user)
|
||||
|
||||
writer = csv.writer(open('out/extra-atcs.csv', 'w'))
|
||||
for atc in get_extra_atcs(EXTRA_ATCS_URL):
|
||||
writer.writerow([''] + list(EXTRA_ATC_RE.match(atc).groups()))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
Loading…
Reference in New Issue
Block a user