Add tools being used to make sense of gerrit account inconsistencies
The first tool has been used to "retire" accounts that have preferred email addresses without a matching external id. The second is being used to make sense of whether or not we can do a bulk retirement of accounts with email conflicts in their external ids. The third is a script that can be used to remove external ids from accounts in bulk based on their email addresses. Change-Id: Idf22cfc9f2bac7d3921e006c40faef4585c2d977
This commit is contained in:
parent
3f1d67b99f
commit
112cbc6cfe
308
tools/gerrit-account-inconsistencies/audit-users.py
Normal file
308
tools/gerrit-account-inconsistencies/audit-users.py
Normal file
@ -0,0 +1,308 @@
|
|||||||
|
# Script to query Gerrit users by email address to debug accounts with email
|
||||||
|
# address conflicts. The idea here is we'll identify which users are active
|
||||||
|
# and need proper manipulation to correct and which are inactive and can
|
||||||
|
# be retired.
|
||||||
|
#
|
||||||
|
# The input list of emails can be generated by a gerrit config consistency
|
||||||
|
# check again external ids.
|
||||||
|
#
|
||||||
|
# This script should also identify when accounts are inactive according to
|
||||||
|
# Gerrit and not just by our "have they pushed or reviewed code in the last
|
||||||
|
# year metric. Accounts that are already inactive can be safely retired too.
|
||||||
|
|
||||||
|
# This script builds and operates on a datastructure that looks like this.
|
||||||
|
# john.doe@example.com:
|
||||||
|
# 1234:
|
||||||
|
# active: True
|
||||||
|
# recently_used: True
|
||||||
|
# recent_change: '2021-01-23 17:31:25.000000000'
|
||||||
|
# recent_review: None
|
||||||
|
# 5678:
|
||||||
|
# active: False
|
||||||
|
# recently_used: False
|
||||||
|
# recent_change: None
|
||||||
|
# recent_review: '2019-03-05 12:15:34.000000000'
|
||||||
|
# active:
|
||||||
|
# - 1234
|
||||||
|
# inactive
|
||||||
|
# - 5678
|
||||||
|
# recently_used:
|
||||||
|
# - 1234
|
||||||
|
# nonrecently_used:
|
||||||
|
# - 5678
|
||||||
|
|
||||||
|
import datetime
|
||||||
|
import json
|
||||||
|
import getpass
|
||||||
|
import requests
|
||||||
|
|
||||||
|
TIME_FORMAT = '%Y-%m-%d %H:%M:%S.%f'
|
||||||
|
TODAY = datetime.datetime.now()
|
||||||
|
DELTAT = datetime.timedelta(days=int(365))
|
||||||
|
SINCET = TODAY - DELTAT
|
||||||
|
|
||||||
|
|
||||||
|
def query_gerrit(loc, query, auth=None):
|
||||||
|
# Need to do this authenticated and as admin. Start with first pass just
|
||||||
|
# normal user, then switch to admin and rerun.
|
||||||
|
if auth:
|
||||||
|
loc = 'a/' + loc
|
||||||
|
r = requests.get('https://review.opendev.org/%s/' % loc,
|
||||||
|
params=query, auth=auth)
|
||||||
|
# Strip off the gerrit json prefix
|
||||||
|
j = json.loads(r.text[5:])
|
||||||
|
return j
|
||||||
|
|
||||||
|
def get_account_detail(account_id, auth=None):
|
||||||
|
# Need to do this authenticated and as admin. We do this without auth
|
||||||
|
# for quicker debugging cycles, but proper data should be generated with
|
||||||
|
# auth.
|
||||||
|
return query_gerrit('accounts/%s/detail' % account_id, {}, auth)
|
||||||
|
|
||||||
|
def get_account_sshkeys(account_id, auth=None):
|
||||||
|
# Need to do this authenticated and as admin. We do this without auth
|
||||||
|
# for quicker debugging cycles, but proper data should be generated with
|
||||||
|
# auth.
|
||||||
|
try:
|
||||||
|
sshkeys = query_gerrit('accounts/%s/sshkeys' % account_id, {}, auth)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
if auth:
|
||||||
|
raise
|
||||||
|
# This handles lack of auth error above
|
||||||
|
sshkeys = []
|
||||||
|
return sshkeys
|
||||||
|
|
||||||
|
def get_account_externalids(account_id, auth=None):
|
||||||
|
# Need to do this authenticated and as admin. We do this without auth
|
||||||
|
# for quicker debugging cycles, but proper data should be generated with
|
||||||
|
# auth.
|
||||||
|
try:
|
||||||
|
eids = query_gerrit('accounts/%s/external.ids' % account_id, {}, auth)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
if auth:
|
||||||
|
raise
|
||||||
|
# This handles lack of auth error above
|
||||||
|
eids = []
|
||||||
|
return eids
|
||||||
|
|
||||||
|
def recently_used(timestamp):
|
||||||
|
# Gerrit apparently gives us nanoseconds which we can't parse.
|
||||||
|
timestamp = timestamp[:-3]
|
||||||
|
activity = datetime.datetime.strptime(timestamp, TIME_FORMAT)
|
||||||
|
if TODAY - activity < DELTAT:
|
||||||
|
# We decide the account was recently used if it has reviewed or
|
||||||
|
# pushed code within the last year.
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def read_email_list():
|
||||||
|
with open('email_list.txt') as f:
|
||||||
|
users = {}
|
||||||
|
for email in f:
|
||||||
|
users[email.strip()] = {}
|
||||||
|
return users
|
||||||
|
|
||||||
|
def check_recent_changes(account_id, account_info, auth):
|
||||||
|
# Gerrit appears to do a reverse sort giving you the newest results
|
||||||
|
# first. Since we only care about the most recent activity we set
|
||||||
|
# n = 1 here.
|
||||||
|
#query = {'q': 'owner:%s after:%s' % (account_id, SINCET.strftime('%Y-%m-%d')), 'n': 1}
|
||||||
|
query = {'q': 'owner:%s' % account_id, 'n': 1}
|
||||||
|
j = query_gerrit('changes', query, auth)
|
||||||
|
if j:
|
||||||
|
account_info['recent_change'] = j[0]['updated']
|
||||||
|
if recently_used(account_info['recent_change']):
|
||||||
|
account_info['recently_used'] = True
|
||||||
|
else:
|
||||||
|
account_info['recent_change'] = None
|
||||||
|
|
||||||
|
#query = {'q': 'reviewedby:%s after:%s' % (account_id, SINCET.strftime('%Y-%m-%d')), 'n': 1}
|
||||||
|
query = {'q': 'reviewedby:%s' % account_id, 'n': 1}
|
||||||
|
j = query_gerrit('changes', query, auth)
|
||||||
|
if j:
|
||||||
|
account_info['recent_review'] = j[0]['updated']
|
||||||
|
if recently_used(account_info['recent_review']):
|
||||||
|
account_info['recently_used'] = True
|
||||||
|
else:
|
||||||
|
account_info['recent_review'] = None
|
||||||
|
|
||||||
|
def get_user_activity(users, auth=None):
|
||||||
|
for email in users.keys():
|
||||||
|
users[email]['active'] = []
|
||||||
|
users[email]['inactive'] = []
|
||||||
|
users[email]['recently_used'] = []
|
||||||
|
users[email]['nonrecently_used'] = []
|
||||||
|
active_query = {'q': 'email:%s is:active' % email}
|
||||||
|
active_j = query_gerrit('accounts', active_query, auth)
|
||||||
|
inactive_query = {'q': 'email:%s is:inactive' % email}
|
||||||
|
inactive_j = query_gerrit('accounts', inactive_query, auth)
|
||||||
|
if len(active_j + inactive_j) < 2:
|
||||||
|
# Using an admin account to query this info seems to address
|
||||||
|
# this problem, but we'll leave this here as a double check.
|
||||||
|
print("Email %s only has one account" % email)
|
||||||
|
continue
|
||||||
|
for account in active_j:
|
||||||
|
account_id = str(account['_account_id'])
|
||||||
|
users[email][account_id] = {'recently_used': False,
|
||||||
|
'active': True,
|
||||||
|
'username': None,
|
||||||
|
'sshkeys': None,
|
||||||
|
'openids': []}
|
||||||
|
users[email]['active'].append(account_id)
|
||||||
|
|
||||||
|
detail = get_account_detail(account_id, auth)
|
||||||
|
if 'username' in detail:
|
||||||
|
users[email][account_id]['username'] = detail['username']
|
||||||
|
sshkeys = get_account_sshkeys(account_id, auth)
|
||||||
|
if sshkeys:
|
||||||
|
users[email][account_id]['sshkeys'] = True
|
||||||
|
eids = get_account_externalids(account_id, auth)
|
||||||
|
for eid in eids:
|
||||||
|
# We only care about login.ubuntu urls now
|
||||||
|
if 'login.ubuntu' in eid['identity']:
|
||||||
|
r = requests.head(eid['identity'])
|
||||||
|
if r.status_code == 200:
|
||||||
|
# If there is an openid and it is valid we add it
|
||||||
|
# to the list of valid openids
|
||||||
|
users[email][account_id]['openids'].append(eid['identity'])
|
||||||
|
|
||||||
|
check_recent_changes(account_id, users[email][account_id], auth)
|
||||||
|
|
||||||
|
if users[email][account_id]['recently_used']:
|
||||||
|
users[email]['recently_used'].append(account_id)
|
||||||
|
else:
|
||||||
|
users[email]['nonrecently_used'].append(account_id)
|
||||||
|
for account in inactive_j:
|
||||||
|
account_id = str(account['_account_id'])
|
||||||
|
users[email][account_id] = {'recently_used': False,
|
||||||
|
'active': False,
|
||||||
|
'username': None,
|
||||||
|
'sshkeys': None,
|
||||||
|
'openids': []}
|
||||||
|
users[email]['inactive'].append(account_id)
|
||||||
|
|
||||||
|
detail = get_account_detail(account_id, auth)
|
||||||
|
if 'username' in detail:
|
||||||
|
users[email][account_id]['username'] = detail['username']
|
||||||
|
sshkeys = get_account_sshkeys(account_id, auth)
|
||||||
|
if sshkeys:
|
||||||
|
users[email][account_id]['sshkeys'] = True
|
||||||
|
eids = get_account_externalids(account_id, auth)
|
||||||
|
for eid in eids:
|
||||||
|
# We only care about login.ubuntu urls now
|
||||||
|
if 'login.ubuntu' in eid['identity']:
|
||||||
|
r = requests.head(eid['identity'])
|
||||||
|
if r.status_code == 200:
|
||||||
|
# If there is an openid and it is valid we add it
|
||||||
|
# to the list of valid openids
|
||||||
|
users[email][account_id]['openids'].append(eid['identity'])
|
||||||
|
|
||||||
|
check_recent_changes(account_id, users[email][account_id], auth)
|
||||||
|
|
||||||
|
if users[email][account_id]['recently_used']:
|
||||||
|
users[email]['recently_used'].append(account_id)
|
||||||
|
else:
|
||||||
|
users[email]['nonrecently_used'].append(account_id)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
query_user = input('Username: ')
|
||||||
|
query_pass = getpass.getpass('Password: ')
|
||||||
|
if query_user and query_pass:
|
||||||
|
auth = (query_user, query_pass)
|
||||||
|
else:
|
||||||
|
auth = None
|
||||||
|
|
||||||
|
users = read_email_list()
|
||||||
|
get_user_activity(users, auth=auth)
|
||||||
|
|
||||||
|
# TODO there are probably better ways to present this data.
|
||||||
|
print()
|
||||||
|
print('Users with inactive accounts. We may just be able to retire these.'
|
||||||
|
'\nThen remove their external ids.')
|
||||||
|
print('Email active accounts|inactive accounts')
|
||||||
|
for email in users:
|
||||||
|
if users[email]['inactive']:
|
||||||
|
print(email + ' ' + ','.join(users[email]['active']) + '|'
|
||||||
|
+ ','.join(users[email]['inactive']))
|
||||||
|
|
||||||
|
print()
|
||||||
|
print('Users without username, ssh keys, valid openid, and no changes or reviews')
|
||||||
|
print('Email accounts with creds or activity|accounts without creds or activity')
|
||||||
|
for email in users:
|
||||||
|
all_accounts = users[email]['recently_used'] + users[email]['nonrecently_used']
|
||||||
|
accounts_with_creds = []
|
||||||
|
accounts_without_creds = []
|
||||||
|
for account_id in all_accounts:
|
||||||
|
if (not users[email][account_id]['username'] and
|
||||||
|
not users[email][account_id]['sshkeys'] and
|
||||||
|
not users[email][account_id]['recent_change'] and
|
||||||
|
not users[email][account_id]['recent_review'] and
|
||||||
|
not users[email][account_id]['openids']):
|
||||||
|
accounts_without_creds.append(account_id)
|
||||||
|
else:
|
||||||
|
accounts_with_creds.append(account_id)
|
||||||
|
if accounts_without_creds:
|
||||||
|
print(email + ' ' +
|
||||||
|
','.join(accounts_with_creds) + '|' +
|
||||||
|
','.join(accounts_without_creds))
|
||||||
|
|
||||||
|
print()
|
||||||
|
print('Users without username, sshkeys and zero changes pushed or reviews')
|
||||||
|
print('Email accounts with usage|accounts without usage')
|
||||||
|
for email in users:
|
||||||
|
all_accounts = users[email]['recently_used'] + users[email]['nonrecently_used']
|
||||||
|
accounts_with_usage = []
|
||||||
|
accounts_without_usage = []
|
||||||
|
for account_id in all_accounts:
|
||||||
|
if (not users[email][account_id]['username'] and
|
||||||
|
not users[email][account_id]['sshkeys'] and
|
||||||
|
not users[email][account_id]['recent_change'] and
|
||||||
|
not users[email][account_id]['recent_review']):
|
||||||
|
accounts_without_usage.append(account_id)
|
||||||
|
else:
|
||||||
|
accounts_with_usage.append(account_id)
|
||||||
|
if accounts_without_usage:
|
||||||
|
print(email + ' ' +
|
||||||
|
','.join(accounts_with_usage) + '|' +
|
||||||
|
','.join(accounts_without_usage))
|
||||||
|
|
||||||
|
print()
|
||||||
|
print('Non recently used Users without username or ssh keys')
|
||||||
|
print('Email accounts with creds|accounts without creds')
|
||||||
|
for email in users:
|
||||||
|
if not users[email]['recently_used'] and users[email]['nonrecently_used']:
|
||||||
|
accounts_with_creds = []
|
||||||
|
accounts_without_creds = []
|
||||||
|
for account_id in users[email]['nonrecently_used']:
|
||||||
|
if not users[email][account_id]['username'] and \
|
||||||
|
not users[email][account_id]['sshkeys']:
|
||||||
|
accounts_without_creds.append(account_id)
|
||||||
|
else:
|
||||||
|
accounts_with_creds.append(account_id)
|
||||||
|
if not accounts_with_creds == users[email]['nonrecently_used']:
|
||||||
|
print(email + ' ' +
|
||||||
|
','.join(accounts_with_creds) + '|' +
|
||||||
|
','.join(accounts_without_creds))
|
||||||
|
|
||||||
|
print()
|
||||||
|
print('Non recently used Users')
|
||||||
|
print('Email non recent accounts')
|
||||||
|
for email in users:
|
||||||
|
if not users[email]['recently_used'] and users[email]['nonrecently_used']:
|
||||||
|
print(email + ' ' + ','.join(users[email]['nonrecently_used']))
|
||||||
|
|
||||||
|
print()
|
||||||
|
print('Recently used Users')
|
||||||
|
print('Email recent accounts|nonrecent accounts')
|
||||||
|
for email in users:
|
||||||
|
if users[email]['recently_used']:
|
||||||
|
print(email + ' ' + ','.join(users[email]['recently_used']) + '|'
|
||||||
|
+ ','.join(users[email]['nonrecently_used']))
|
||||||
|
|
||||||
|
print()
|
||||||
|
print('Emails that need further investigation')
|
||||||
|
for email in users:
|
||||||
|
if not users[email]['recently_used'] and not users[email]['nonrecently_used']:
|
||||||
|
print(email)
|
@ -0,0 +1,71 @@
|
|||||||
|
# This script reads a file with this format:
|
||||||
|
#
|
||||||
|
# email_addr account_id
|
||||||
|
#
|
||||||
|
# It will then remove all external ids with that email addr
|
||||||
|
# in them from the account specified.
|
||||||
|
# Note the account_ids and emails both may be non unique depending
|
||||||
|
# on the gerrit account situation. We iterate over each line in this
|
||||||
|
# file one at a time to avoid problems with deduping in datastructures.
|
||||||
|
|
||||||
|
import getpass
|
||||||
|
import json
|
||||||
|
import requests
|
||||||
|
|
||||||
|
def get_external_ids(account_id, auth):
|
||||||
|
r = requests.get('https://review.opendev.org'
|
||||||
|
'/a/accounts/%s/external.ids' % account_id,
|
||||||
|
auth=auth)
|
||||||
|
# Strip off the gerrit json prefix
|
||||||
|
j = json.loads(r.text[5:])
|
||||||
|
return j
|
||||||
|
|
||||||
|
|
||||||
|
def is_active(account_id, auth):
|
||||||
|
r = requests.get('https://review.opendev.org'
|
||||||
|
'/a/accounts/%s/detail' % account_id,
|
||||||
|
auth=auth)
|
||||||
|
# Strip off the gerrit json prefix
|
||||||
|
j = json.loads(r.text[5:])
|
||||||
|
if 'inactive' in j and j['inactive']:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
query_user = input('Username: ')
|
||||||
|
query_pass = getpass.getpass('Password: ')
|
||||||
|
if query_user and query_pass:
|
||||||
|
auth = (query_user, query_pass)
|
||||||
|
else:
|
||||||
|
print("This script requires authentication")
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
with open('external_id_cleanups.txt') as f:
|
||||||
|
for line in f:
|
||||||
|
(email, account_id) = line.strip().split()
|
||||||
|
print(email + ' ' + account_id)
|
||||||
|
|
||||||
|
if is_active(account_id, auth):
|
||||||
|
print('This account is active. Skipping.')
|
||||||
|
continue
|
||||||
|
|
||||||
|
j = get_external_ids(account_id, auth)
|
||||||
|
print('external IDs: ' + str(j))
|
||||||
|
|
||||||
|
eids_to_remove = []
|
||||||
|
for eid in j:
|
||||||
|
if 'email_address' in eid and eid['email_address'] == email:
|
||||||
|
eids_to_remove.append(eid['identity'])
|
||||||
|
|
||||||
|
if eids_to_remove:
|
||||||
|
print('Removing these external IDs: ' + str(eids_to_remove))
|
||||||
|
url = 'https://review.opendev.org' \
|
||||||
|
'/a/accounts/%s/external.ids:delete' % account_id
|
||||||
|
print(url)
|
||||||
|
r = requests.post(url, json=eids_to_remove, auth=auth)
|
||||||
|
print(r.status_code)
|
||||||
|
print(r.text)
|
||||||
|
else:
|
||||||
|
print('No matching external ids')
|
47
tools/gerrit-account-inconsistencies/retire-user.sh
Normal file
47
tools/gerrit-account-inconsistencies/retire-user.sh
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
# Script to "retire" a gerrit account given its All-Users ref, eg:
|
||||||
|
# refs/users/34/1234
|
||||||
|
# This script should be run within the root of an All-Users repo.
|
||||||
|
#
|
||||||
|
# This will remove the preferred email from the account to fix
|
||||||
|
# issues where the preferred email has no corresponding external id
|
||||||
|
# and set the account to inactive.
|
||||||
|
#
|
||||||
|
# The commit message heredoc should be edited appropriately before
|
||||||
|
# running this script.
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
REF=$1
|
||||||
|
|
||||||
|
git fetch origin $REF
|
||||||
|
git checkout FETCH_HEAD
|
||||||
|
|
||||||
|
sed -i -e '/^\tpreferredEmail = .*/d' account.config
|
||||||
|
# Gerrit accounts are active by default and don't have active record
|
||||||
|
# entries when active.
|
||||||
|
if ! grep 'active = false' account.config ; then
|
||||||
|
echo -e "\tactive = false" >> account.config
|
||||||
|
fi
|
||||||
|
|
||||||
|
git add account.config
|
||||||
|
git commit -F - << EOF
|
||||||
|
Retire this account
|
||||||
|
|
||||||
|
Set the account to inactive and remove its preferred email address.
|
||||||
|
This account appears to be an old style third party CI account. One
|
||||||
|
which the Gerrit admins manually added it as a system account. For
|
||||||
|
a while now we've asked third party CI operators to transition to
|
||||||
|
openid based accounts to reduce our workload. These third party CI
|
||||||
|
systems don't appear currently active and retiring them will fix
|
||||||
|
Gerrit consistency errors. If necessary they can create more modern
|
||||||
|
openid based accounts for their CI systems.
|
||||||
|
|
||||||
|
We are doing this to fix these Gerrit consistency errors:
|
||||||
|
|
||||||
|
Account 'ABXY' has no external ID for its preferred email 'ABXY@example.com'
|
||||||
|
EOF
|
||||||
|
|
||||||
|
#echo '## Verify this commit is correct with git show HEAD'
|
||||||
|
#echo "## If things look good run git push origin HEAD:$REF"
|
||||||
|
git show HEAD
|
||||||
|
git push origin HEAD:$REF
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user