diff --git a/tools/atc/README b/tools/atc/README index 4a606990f8..01277f7cac 100644 --- a/tools/atc/README +++ b/tools/atc/README @@ -1,36 +1,24 @@ These are the scripts used to create the ATC lists for use in PTL elections and Summit invitations. -0) Write a patch to email_stats.py so steps 1-2 are not necessary. -1) Edit email_stats.py to set your gerrit username. -2) Edit email_stats.py to set the start_date and end_date. -3) Run the following queries on review.openstack.org: +1) Run the following queries on review.openstack.org: - SELECT * FROM accounts - INTO OUTFILE '/tmp/accounts.csv' - FIELDS TERMINATED BY ',' - ENCLOSED BY '"' - LINES TERMINATED BY '\n'; + sudo -H mysql -e 'SELECT * FROM accounts;' reviewdb > accounts.tab + sudo -H mysql -e 'SELECT * FROM account_external_ids;' reviewdb > emails.tab - SELECT * FROM account_external_ids - INTO OUTFILE '/tmp/emails.csv' - FIELDS TERMINATED BY ',' - ENCLOSED BY '"' - LINES TERMINATED BY '\n'; - -4) Copy those files to this directory. -5) Run: +2) Copy those files to this directory. +3) Run: mkdir out - ./email-stats.sh + virtualenv venv + . venv/bin/activate + pip install paramiko requests pyyaml + ./email_stats.py --begin + # optionally specify --end, --keyfile and --user DATE=`date --iso` - mkdir $DATE - for f in out/*.csv ; do - fromdos $f - iconv -f ISO-8859-1 -t UTF-8 -o $DATE/`basename $f` $f - done + mv out $DATE cat $DATE/*.csv | sort | uniq > $DATE/all.csv -6) You can use diff.py to get the new ATCs since the previous run: +4) You can use diff.py to get the new ATCs since the previous run: ./diff.py $OLD-DATE/all.csv $DATE/all.csv $DATE/new.csv diff --git a/tools/atc/email-stats.sh b/tools/atc/email-stats.sh deleted file mode 100755 index 522948704d..0000000000 --- a/tools/atc/email-stats.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/sh - -# Projects counting for code contribution -for project in $( - wget -qO- \ - https://git.openstack.org/cgit/openstack/governance/plain/reference/programs.yaml \ - | grep '^ *- [A-Za-z_-]\+/[A-Za-z_-]\+$' \ - | sed 's/^ *- //' -) ; do - python email_stats.py -p $project -o out/$( basename $project ).csv -done - -# Confirmed list of non-code contributors -wget -qO- \ -https://git.openstack.org/cgit/openstack/governance/plain/reference/extra-atcs \ -| sed -e 's/#.*//' -e 's/^\s*//' -e 's/\s*$//' -e '/^$/d' \ --e 's/[^:]*: \(.*\) (\(.*\)) .*/,\1,\2/' > out/non-code-contributors.csv diff --git a/tools/atc/email_stats.py b/tools/atc/email_stats.py old mode 100644 new mode 100755 index c2ecd3aa2c..503f4073c8 --- a/tools/atc/email_stats.py +++ b/tools/atc/email_stats.py @@ -1,6 +1,6 @@ -#!/usr/bin/python +#!/usr/bin/env python -# Copyright (C) 2013 OpenStack Foundation +# Copyright (C) 2013-2014 OpenStack Foundation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -18,16 +18,27 @@ # # Soren Hansen wrote the original version of this script. # James Blair hacked it up to include email addresses from gerrit. +# Jeremy Stanley overhauled it for gerrit 2.8 and our governance repo. +import csv import datetime import json import optparse -import paramiko -import csv +import os +import os.path import re +import paramiko +import requests +import yaml + MAILTO_RE = re.compile('mailto:(.*)') USERNAME_RE = re.compile('username:(.*)') +EXTRA_ATC_RE = re.compile('[^:]*: ([^\(]*) \(([^@]*@[^\)]*)\) \[[^\[]*\]') +PROGRAMS_URL = ('https://git.openstack.org/cgit/openstack/governance/plain' + '/reference/programs.yaml') +EXTRA_ATCS_URL = ('https://git.openstack.org/cgit/openstack/governance/plain' + '/reference/extra-atcs') class Account(object): @@ -46,35 +57,38 @@ def get_account(accounts, num): return a -def main(): +def project_stats(project, output, begin, end, keyfile, user): accounts = {} - for row in csv.reader(open('accounts.csv')): - num = int(row[-1]) - name = row[1] - email = row[2] - a = get_account(accounts, num) - a.full_name = name - if email and email != '\\N': - a.emails.append(email) + for row in open('accounts.tab'): + if not row.startswith('registered_on'): + row = row.split('\t') + num = int(row[13]) + name = row[1] + email = row[2] + a = get_account(accounts, num) + a.full_name = name + if email and email != 'NULL': + a.emails.append(email) - for row in csv.reader(open('emails.csv')): - num, email, pw, external = row - num = int(num) - a = get_account(accounts, num) - if email and email != '\\N' and email not in a.emails: - a.emails.append(email) - m = MAILTO_RE.match(external) - if m: - if m.group(1) not in a.emails: - a.emails.append(m.group(1)) - m = USERNAME_RE.match(external) - if m: - if a.username: - print a.num - print a.username - raise Exception("Already a username") - a.username = m.group(1) + for row in open('emails.tab'): + if not row.startswith('account_id'): + num, email, pw, external = row.split('\t') + num = int(num) + a = get_account(accounts, num) + if email and email != 'NULL' and email not in a.emails: + a.emails.append(email) + m = MAILTO_RE.match(external) + if m: + if m.group(1) not in a.emails: + a.emails.append(m.group(1)) + m = USERNAME_RE.match(external) + if m: + if a.username: + print a.num + print a.username + raise Exception("Already a username") + a.username = m.group(1) username_accounts = {} for a in accounts.values(): @@ -82,29 +96,23 @@ def main(): atcs = [] - optparser = optparse.OptionParser() - optparser.add_option( - '-p', '--project', default='nova', - help='Project to generate stats for') - optparser.add_option( - '-o', '--output', default='out.csv', help='Output file') - options, args = optparser.parse_args() - - QUERY = "project:%s status:merged" % options.project + QUERY = "project:%s status:merged" % project client = paramiko.SSHClient() client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) client.load_system_host_keys() client.connect( 'review.openstack.org', port=29418, - key_filename='/home/corvus/.ssh/id_rsa', username='CHANGME') + key_filename=os.path.expanduser(keyfile), username=user) stdin, stdout, stderr = client.exec_command( 'gerrit query %s --all-approvals --format JSON' % QUERY) done = False last_sortkey = '' - start_date = datetime.datetime(2012, 9, 27, 0, 0, 0) - end_date = datetime.datetime(2013, 7, 30, 0, 0, 0) + start_date = datetime.datetime(int(begin[0:4]), int(begin[4:6]), + int(begin[6:8]), 0, 0, 0) + end_date = datetime.datetime(int(end[0:4]), int(end[4:6]), int(end[6:8]), + 0, 0, 0) count = 0 earliest = datetime.datetime.now() @@ -142,13 +150,58 @@ def main(): 'gerrit query %s resume_sortkey:%s --all-approvals' ' --format JSON' % (QUERY, last_sortkey)) - print 'project: %s' % options.project + print 'project: %s' % project print 'examined %s changes' % count print 'earliest timestamp: %s' % earliest - writer = csv.writer(open(options.output, 'w')) + writer = csv.writer(open(output, 'w')) for a in atcs: writer.writerow([a.username, a.full_name] + a.emails) print + +def get_projects(url): + programs_yaml = yaml.load(requests.get(url).text) + projects = [] + for program in programs_yaml: + for project in programs_yaml[program]['projects']: + projects.append(project['repo']) + return projects + + +def get_extra_atcs(url): + extra_atcs = [] + for line in requests.get(url).text.split('\n'): + if line and not line.startswith('#'): + extra_atcs.append(line) + return extra_atcs + + +def main(): + today = ''.join( + '%02d' % x for x in datetime.datetime.utcnow().utctimetuple()[:3]) + + optparser = optparse.OptionParser() + optparser.add_option( + '-b', '--begin', help='begin date (e.g. 20131017)') + optparser.add_option( + '-e', '--end', default=today, help='end date (default is today)') + optparser.add_option( + '-k', '--keyfile', default='~/.ssh/id_rsa', + help='SSH key (default is ~/.ssh/id_rsa)') + optparser.add_option( + '-u', '--user', default=os.environ['USER'], + help='SSH username (default is $USER)') + options, args = optparser.parse_args() + + for project in get_projects(PROGRAMS_URL): + output = 'out/%s.csv' % project.split('/')[-1] + project_stats(project, output, options.begin, options.end, + options.keyfile, options.user) + + writer = csv.writer(open('out/extra-atcs.csv', 'w')) + for atc in get_extra_atcs(EXTRA_ATCS_URL): + writer.writerow([''] + list(EXTRA_ATC_RE.match(atc).groups())) + + if __name__ == "__main__": main()