Modernize ATCs script to Gerrit 2.8, programs.yaml
* tools/atc/README: Simplify instructions to use mysqlclient tab-separated-values stdout, and remove obsolete manual steps which have been automated. * tools/atc/email-stats.sh: The functionality this shell script previously provided has now been translated to Python and folded into the inner script. * tools/atc/email_stats.py: Implement the old email-stats.sh outer loop, make it possible to run in a virtualenv, and make previously hard-coded constants into command-line parameters with sane and dynamically-determined defaults. Update field order and contents to cope with differences in Gerrit 2.8, use a proper YAML parser on the programs.yaml which has also changed format since the previous run, and adjust for the improved extra-atcs file format as well. Change-Id: I2deef2e6766ff998190e66737e7cec9cdcd5459c
This commit is contained in:
parent
69e247d873
commit
19c0ee2d3c
@ -1,36 +1,24 @@
|
|||||||
These are the scripts used to create the ATC lists for use in PTL
|
These are the scripts used to create the ATC lists for use in PTL
|
||||||
elections and Summit invitations.
|
elections and Summit invitations.
|
||||||
|
|
||||||
0) Write a patch to email_stats.py so steps 1-2 are not necessary.
|
1) Run the following queries on review.openstack.org:
|
||||||
1) Edit email_stats.py to set your gerrit username.
|
|
||||||
2) Edit email_stats.py to set the start_date and end_date.
|
|
||||||
3) Run the following queries on review.openstack.org:
|
|
||||||
|
|
||||||
SELECT * FROM accounts
|
sudo -H mysql -e 'SELECT * FROM accounts;' reviewdb > accounts.tab
|
||||||
INTO OUTFILE '/tmp/accounts.csv'
|
sudo -H mysql -e 'SELECT * FROM account_external_ids;' reviewdb > emails.tab
|
||||||
FIELDS TERMINATED BY ','
|
|
||||||
ENCLOSED BY '"'
|
|
||||||
LINES TERMINATED BY '\n';
|
|
||||||
|
|
||||||
SELECT * FROM account_external_ids
|
2) Copy those files to this directory.
|
||||||
INTO OUTFILE '/tmp/emails.csv'
|
3) Run:
|
||||||
FIELDS TERMINATED BY ','
|
|
||||||
ENCLOSED BY '"'
|
|
||||||
LINES TERMINATED BY '\n';
|
|
||||||
|
|
||||||
4) Copy those files to this directory.
|
|
||||||
5) Run:
|
|
||||||
|
|
||||||
mkdir out
|
mkdir out
|
||||||
./email-stats.sh
|
virtualenv venv
|
||||||
|
. venv/bin/activate
|
||||||
|
pip install paramiko requests pyyaml
|
||||||
|
./email_stats.py --begin <BEGINDATE>
|
||||||
|
# optionally specify --end, --keyfile and --user
|
||||||
DATE=`date --iso`
|
DATE=`date --iso`
|
||||||
mkdir $DATE
|
mv out $DATE
|
||||||
for f in out/*.csv ; do
|
|
||||||
fromdos $f
|
|
||||||
iconv -f ISO-8859-1 -t UTF-8 -o $DATE/`basename $f` $f
|
|
||||||
done
|
|
||||||
cat $DATE/*.csv | sort | uniq > $DATE/all.csv
|
cat $DATE/*.csv | sort | uniq > $DATE/all.csv
|
||||||
|
|
||||||
6) You can use diff.py to get the new ATCs since the previous run:
|
4) You can use diff.py to get the new ATCs since the previous run:
|
||||||
|
|
||||||
./diff.py $OLD-DATE/all.csv $DATE/all.csv $DATE/new.csv
|
./diff.py $OLD-DATE/all.csv $DATE/all.csv $DATE/new.csv
|
||||||
|
@ -1,17 +0,0 @@
|
|||||||
#!/bin/sh
|
|
||||||
|
|
||||||
# Projects counting for code contribution
|
|
||||||
for project in $(
|
|
||||||
wget -qO- \
|
|
||||||
https://git.openstack.org/cgit/openstack/governance/plain/reference/programs.yaml \
|
|
||||||
| grep '^ *- [A-Za-z_-]\+/[A-Za-z_-]\+$' \
|
|
||||||
| sed 's/^ *- //'
|
|
||||||
) ; do
|
|
||||||
python email_stats.py -p $project -o out/$( basename $project ).csv
|
|
||||||
done
|
|
||||||
|
|
||||||
# Confirmed list of non-code contributors
|
|
||||||
wget -qO- \
|
|
||||||
https://git.openstack.org/cgit/openstack/governance/plain/reference/extra-atcs \
|
|
||||||
| sed -e 's/#.*//' -e 's/^\s*//' -e 's/\s*$//' -e '/^$/d' \
|
|
||||||
-e 's/[^:]*: \(.*\) (\(.*\)) .*/,\1,\2/' > out/non-code-contributors.csv
|
|
141
tools/atc/email_stats.py
Normal file → Executable file
141
tools/atc/email_stats.py
Normal file → Executable file
@ -1,6 +1,6 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
# Copyright (C) 2013 OpenStack Foundation
|
# Copyright (C) 2013-2014 OpenStack Foundation
|
||||||
#
|
#
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
# you may not use this file except in compliance with the License.
|
# you may not use this file except in compliance with the License.
|
||||||
@ -18,16 +18,27 @@
|
|||||||
#
|
#
|
||||||
# Soren Hansen wrote the original version of this script.
|
# Soren Hansen wrote the original version of this script.
|
||||||
# James Blair hacked it up to include email addresses from gerrit.
|
# James Blair hacked it up to include email addresses from gerrit.
|
||||||
|
# Jeremy Stanley overhauled it for gerrit 2.8 and our governance repo.
|
||||||
|
|
||||||
|
import csv
|
||||||
import datetime
|
import datetime
|
||||||
import json
|
import json
|
||||||
import optparse
|
import optparse
|
||||||
import paramiko
|
import os
|
||||||
import csv
|
import os.path
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
import paramiko
|
||||||
|
import requests
|
||||||
|
import yaml
|
||||||
|
|
||||||
MAILTO_RE = re.compile('mailto:(.*)')
|
MAILTO_RE = re.compile('mailto:(.*)')
|
||||||
USERNAME_RE = re.compile('username:(.*)')
|
USERNAME_RE = re.compile('username:(.*)')
|
||||||
|
EXTRA_ATC_RE = re.compile('[^:]*: ([^\(]*) \(([^@]*@[^\)]*)\) \[[^\[]*\]')
|
||||||
|
PROGRAMS_URL = ('https://git.openstack.org/cgit/openstack/governance/plain'
|
||||||
|
'/reference/programs.yaml')
|
||||||
|
EXTRA_ATCS_URL = ('https://git.openstack.org/cgit/openstack/governance/plain'
|
||||||
|
'/reference/extra-atcs')
|
||||||
|
|
||||||
|
|
||||||
class Account(object):
|
class Account(object):
|
||||||
@ -46,35 +57,38 @@ def get_account(accounts, num):
|
|||||||
return a
|
return a
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def project_stats(project, output, begin, end, keyfile, user):
|
||||||
accounts = {}
|
accounts = {}
|
||||||
|
|
||||||
for row in csv.reader(open('accounts.csv')):
|
for row in open('accounts.tab'):
|
||||||
num = int(row[-1])
|
if not row.startswith('registered_on'):
|
||||||
name = row[1]
|
row = row.split('\t')
|
||||||
email = row[2]
|
num = int(row[13])
|
||||||
a = get_account(accounts, num)
|
name = row[1]
|
||||||
a.full_name = name
|
email = row[2]
|
||||||
if email and email != '\\N':
|
a = get_account(accounts, num)
|
||||||
a.emails.append(email)
|
a.full_name = name
|
||||||
|
if email and email != 'NULL':
|
||||||
|
a.emails.append(email)
|
||||||
|
|
||||||
for row in csv.reader(open('emails.csv')):
|
for row in open('emails.tab'):
|
||||||
num, email, pw, external = row
|
if not row.startswith('account_id'):
|
||||||
num = int(num)
|
num, email, pw, external = row.split('\t')
|
||||||
a = get_account(accounts, num)
|
num = int(num)
|
||||||
if email and email != '\\N' and email not in a.emails:
|
a = get_account(accounts, num)
|
||||||
a.emails.append(email)
|
if email and email != 'NULL' and email not in a.emails:
|
||||||
m = MAILTO_RE.match(external)
|
a.emails.append(email)
|
||||||
if m:
|
m = MAILTO_RE.match(external)
|
||||||
if m.group(1) not in a.emails:
|
if m:
|
||||||
a.emails.append(m.group(1))
|
if m.group(1) not in a.emails:
|
||||||
m = USERNAME_RE.match(external)
|
a.emails.append(m.group(1))
|
||||||
if m:
|
m = USERNAME_RE.match(external)
|
||||||
if a.username:
|
if m:
|
||||||
print a.num
|
if a.username:
|
||||||
print a.username
|
print a.num
|
||||||
raise Exception("Already a username")
|
print a.username
|
||||||
a.username = m.group(1)
|
raise Exception("Already a username")
|
||||||
|
a.username = m.group(1)
|
||||||
|
|
||||||
username_accounts = {}
|
username_accounts = {}
|
||||||
for a in accounts.values():
|
for a in accounts.values():
|
||||||
@ -82,29 +96,23 @@ def main():
|
|||||||
|
|
||||||
atcs = []
|
atcs = []
|
||||||
|
|
||||||
optparser = optparse.OptionParser()
|
QUERY = "project:%s status:merged" % project
|
||||||
optparser.add_option(
|
|
||||||
'-p', '--project', default='nova',
|
|
||||||
help='Project to generate stats for')
|
|
||||||
optparser.add_option(
|
|
||||||
'-o', '--output', default='out.csv', help='Output file')
|
|
||||||
options, args = optparser.parse_args()
|
|
||||||
|
|
||||||
QUERY = "project:%s status:merged" % options.project
|
|
||||||
|
|
||||||
client = paramiko.SSHClient()
|
client = paramiko.SSHClient()
|
||||||
client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
||||||
client.load_system_host_keys()
|
client.load_system_host_keys()
|
||||||
client.connect(
|
client.connect(
|
||||||
'review.openstack.org', port=29418,
|
'review.openstack.org', port=29418,
|
||||||
key_filename='/home/corvus/.ssh/id_rsa', username='CHANGME')
|
key_filename=os.path.expanduser(keyfile), username=user)
|
||||||
stdin, stdout, stderr = client.exec_command(
|
stdin, stdout, stderr = client.exec_command(
|
||||||
'gerrit query %s --all-approvals --format JSON' % QUERY)
|
'gerrit query %s --all-approvals --format JSON' % QUERY)
|
||||||
|
|
||||||
done = False
|
done = False
|
||||||
last_sortkey = ''
|
last_sortkey = ''
|
||||||
start_date = datetime.datetime(2012, 9, 27, 0, 0, 0)
|
start_date = datetime.datetime(int(begin[0:4]), int(begin[4:6]),
|
||||||
end_date = datetime.datetime(2013, 7, 30, 0, 0, 0)
|
int(begin[6:8]), 0, 0, 0)
|
||||||
|
end_date = datetime.datetime(int(end[0:4]), int(end[4:6]), int(end[6:8]),
|
||||||
|
0, 0, 0)
|
||||||
|
|
||||||
count = 0
|
count = 0
|
||||||
earliest = datetime.datetime.now()
|
earliest = datetime.datetime.now()
|
||||||
@ -142,13 +150,58 @@ def main():
|
|||||||
'gerrit query %s resume_sortkey:%s --all-approvals'
|
'gerrit query %s resume_sortkey:%s --all-approvals'
|
||||||
' --format JSON' % (QUERY, last_sortkey))
|
' --format JSON' % (QUERY, last_sortkey))
|
||||||
|
|
||||||
print 'project: %s' % options.project
|
print 'project: %s' % project
|
||||||
print 'examined %s changes' % count
|
print 'examined %s changes' % count
|
||||||
print 'earliest timestamp: %s' % earliest
|
print 'earliest timestamp: %s' % earliest
|
||||||
writer = csv.writer(open(options.output, 'w'))
|
writer = csv.writer(open(output, 'w'))
|
||||||
for a in atcs:
|
for a in atcs:
|
||||||
writer.writerow([a.username, a.full_name] + a.emails)
|
writer.writerow([a.username, a.full_name] + a.emails)
|
||||||
print
|
print
|
||||||
|
|
||||||
|
|
||||||
|
def get_projects(url):
|
||||||
|
programs_yaml = yaml.load(requests.get(url).text)
|
||||||
|
projects = []
|
||||||
|
for program in programs_yaml:
|
||||||
|
for project in programs_yaml[program]['projects']:
|
||||||
|
projects.append(project['repo'])
|
||||||
|
return projects
|
||||||
|
|
||||||
|
|
||||||
|
def get_extra_atcs(url):
|
||||||
|
extra_atcs = []
|
||||||
|
for line in requests.get(url).text.split('\n'):
|
||||||
|
if line and not line.startswith('#'):
|
||||||
|
extra_atcs.append(line)
|
||||||
|
return extra_atcs
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
today = ''.join(
|
||||||
|
'%02d' % x for x in datetime.datetime.utcnow().utctimetuple()[:3])
|
||||||
|
|
||||||
|
optparser = optparse.OptionParser()
|
||||||
|
optparser.add_option(
|
||||||
|
'-b', '--begin', help='begin date (e.g. 20131017)')
|
||||||
|
optparser.add_option(
|
||||||
|
'-e', '--end', default=today, help='end date (default is today)')
|
||||||
|
optparser.add_option(
|
||||||
|
'-k', '--keyfile', default='~/.ssh/id_rsa',
|
||||||
|
help='SSH key (default is ~/.ssh/id_rsa)')
|
||||||
|
optparser.add_option(
|
||||||
|
'-u', '--user', default=os.environ['USER'],
|
||||||
|
help='SSH username (default is $USER)')
|
||||||
|
options, args = optparser.parse_args()
|
||||||
|
|
||||||
|
for project in get_projects(PROGRAMS_URL):
|
||||||
|
output = 'out/%s.csv' % project.split('/')[-1]
|
||||||
|
project_stats(project, output, options.begin, options.end,
|
||||||
|
options.keyfile, options.user)
|
||||||
|
|
||||||
|
writer = csv.writer(open('out/extra-atcs.csv', 'w'))
|
||||||
|
for atc in get_extra_atcs(EXTRA_ATCS_URL):
|
||||||
|
writer.writerow([''] + list(EXTRA_ATC_RE.match(atc).groups()))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
Loading…
Reference in New Issue
Block a user