Merge "Add tools being used to make sense of gerrit account inconsistencies"
This commit is contained in:
commit
40471d7ec0
308
tools/gerrit-account-inconsistencies/audit-users.py
Normal file
308
tools/gerrit-account-inconsistencies/audit-users.py
Normal file
@ -0,0 +1,308 @@
|
||||
# Script to query Gerrit users by email address to debug accounts with email
|
||||
# address conflicts. The idea here is we'll identify which users are active
|
||||
# and need proper manipulation to correct and which are inactive and can
|
||||
# be retired.
|
||||
#
|
||||
# The input list of emails can be generated by a gerrit config consistency
|
||||
# check again external ids.
|
||||
#
|
||||
# This script should also identify when accounts are inactive according to
|
||||
# Gerrit and not just by our "have they pushed or reviewed code in the last
|
||||
# year metric. Accounts that are already inactive can be safely retired too.
|
||||
|
||||
# This script builds and operates on a datastructure that looks like this.
|
||||
# john.doe@example.com:
|
||||
# 1234:
|
||||
# active: True
|
||||
# recently_used: True
|
||||
# recent_change: '2021-01-23 17:31:25.000000000'
|
||||
# recent_review: None
|
||||
# 5678:
|
||||
# active: False
|
||||
# recently_used: False
|
||||
# recent_change: None
|
||||
# recent_review: '2019-03-05 12:15:34.000000000'
|
||||
# active:
|
||||
# - 1234
|
||||
# inactive
|
||||
# - 5678
|
||||
# recently_used:
|
||||
# - 1234
|
||||
# nonrecently_used:
|
||||
# - 5678
|
||||
|
||||
import datetime
|
||||
import json
|
||||
import getpass
|
||||
import requests
|
||||
|
||||
TIME_FORMAT = '%Y-%m-%d %H:%M:%S.%f'
|
||||
TODAY = datetime.datetime.now()
|
||||
DELTAT = datetime.timedelta(days=int(365))
|
||||
SINCET = TODAY - DELTAT
|
||||
|
||||
|
||||
def query_gerrit(loc, query, auth=None):
|
||||
# Need to do this authenticated and as admin. Start with first pass just
|
||||
# normal user, then switch to admin and rerun.
|
||||
if auth:
|
||||
loc = 'a/' + loc
|
||||
r = requests.get('https://review.opendev.org/%s/' % loc,
|
||||
params=query, auth=auth)
|
||||
# Strip off the gerrit json prefix
|
||||
j = json.loads(r.text[5:])
|
||||
return j
|
||||
|
||||
def get_account_detail(account_id, auth=None):
|
||||
# Need to do this authenticated and as admin. We do this without auth
|
||||
# for quicker debugging cycles, but proper data should be generated with
|
||||
# auth.
|
||||
return query_gerrit('accounts/%s/detail' % account_id, {}, auth)
|
||||
|
||||
def get_account_sshkeys(account_id, auth=None):
|
||||
# Need to do this authenticated and as admin. We do this without auth
|
||||
# for quicker debugging cycles, but proper data should be generated with
|
||||
# auth.
|
||||
try:
|
||||
sshkeys = query_gerrit('accounts/%s/sshkeys' % account_id, {}, auth)
|
||||
except json.JSONDecodeError:
|
||||
if auth:
|
||||
raise
|
||||
# This handles lack of auth error above
|
||||
sshkeys = []
|
||||
return sshkeys
|
||||
|
||||
def get_account_externalids(account_id, auth=None):
|
||||
# Need to do this authenticated and as admin. We do this without auth
|
||||
# for quicker debugging cycles, but proper data should be generated with
|
||||
# auth.
|
||||
try:
|
||||
eids = query_gerrit('accounts/%s/external.ids' % account_id, {}, auth)
|
||||
except json.JSONDecodeError:
|
||||
if auth:
|
||||
raise
|
||||
# This handles lack of auth error above
|
||||
eids = []
|
||||
return eids
|
||||
|
||||
def recently_used(timestamp):
|
||||
# Gerrit apparently gives us nanoseconds which we can't parse.
|
||||
timestamp = timestamp[:-3]
|
||||
activity = datetime.datetime.strptime(timestamp, TIME_FORMAT)
|
||||
if TODAY - activity < DELTAT:
|
||||
# We decide the account was recently used if it has reviewed or
|
||||
# pushed code within the last year.
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def read_email_list():
|
||||
with open('email_list.txt') as f:
|
||||
users = {}
|
||||
for email in f:
|
||||
users[email.strip()] = {}
|
||||
return users
|
||||
|
||||
def check_recent_changes(account_id, account_info, auth):
|
||||
# Gerrit appears to do a reverse sort giving you the newest results
|
||||
# first. Since we only care about the most recent activity we set
|
||||
# n = 1 here.
|
||||
#query = {'q': 'owner:%s after:%s' % (account_id, SINCET.strftime('%Y-%m-%d')), 'n': 1}
|
||||
query = {'q': 'owner:%s' % account_id, 'n': 1}
|
||||
j = query_gerrit('changes', query, auth)
|
||||
if j:
|
||||
account_info['recent_change'] = j[0]['updated']
|
||||
if recently_used(account_info['recent_change']):
|
||||
account_info['recently_used'] = True
|
||||
else:
|
||||
account_info['recent_change'] = None
|
||||
|
||||
#query = {'q': 'reviewedby:%s after:%s' % (account_id, SINCET.strftime('%Y-%m-%d')), 'n': 1}
|
||||
query = {'q': 'reviewedby:%s' % account_id, 'n': 1}
|
||||
j = query_gerrit('changes', query, auth)
|
||||
if j:
|
||||
account_info['recent_review'] = j[0]['updated']
|
||||
if recently_used(account_info['recent_review']):
|
||||
account_info['recently_used'] = True
|
||||
else:
|
||||
account_info['recent_review'] = None
|
||||
|
||||
def get_user_activity(users, auth=None):
|
||||
for email in users.keys():
|
||||
users[email]['active'] = []
|
||||
users[email]['inactive'] = []
|
||||
users[email]['recently_used'] = []
|
||||
users[email]['nonrecently_used'] = []
|
||||
active_query = {'q': 'email:%s is:active' % email}
|
||||
active_j = query_gerrit('accounts', active_query, auth)
|
||||
inactive_query = {'q': 'email:%s is:inactive' % email}
|
||||
inactive_j = query_gerrit('accounts', inactive_query, auth)
|
||||
if len(active_j + inactive_j) < 2:
|
||||
# Using an admin account to query this info seems to address
|
||||
# this problem, but we'll leave this here as a double check.
|
||||
print("Email %s only has one account" % email)
|
||||
continue
|
||||
for account in active_j:
|
||||
account_id = str(account['_account_id'])
|
||||
users[email][account_id] = {'recently_used': False,
|
||||
'active': True,
|
||||
'username': None,
|
||||
'sshkeys': None,
|
||||
'openids': []}
|
||||
users[email]['active'].append(account_id)
|
||||
|
||||
detail = get_account_detail(account_id, auth)
|
||||
if 'username' in detail:
|
||||
users[email][account_id]['username'] = detail['username']
|
||||
sshkeys = get_account_sshkeys(account_id, auth)
|
||||
if sshkeys:
|
||||
users[email][account_id]['sshkeys'] = True
|
||||
eids = get_account_externalids(account_id, auth)
|
||||
for eid in eids:
|
||||
# We only care about login.ubuntu urls now
|
||||
if 'login.ubuntu' in eid['identity']:
|
||||
r = requests.head(eid['identity'])
|
||||
if r.status_code == 200:
|
||||
# If there is an openid and it is valid we add it
|
||||
# to the list of valid openids
|
||||
users[email][account_id]['openids'].append(eid['identity'])
|
||||
|
||||
check_recent_changes(account_id, users[email][account_id], auth)
|
||||
|
||||
if users[email][account_id]['recently_used']:
|
||||
users[email]['recently_used'].append(account_id)
|
||||
else:
|
||||
users[email]['nonrecently_used'].append(account_id)
|
||||
for account in inactive_j:
|
||||
account_id = str(account['_account_id'])
|
||||
users[email][account_id] = {'recently_used': False,
|
||||
'active': False,
|
||||
'username': None,
|
||||
'sshkeys': None,
|
||||
'openids': []}
|
||||
users[email]['inactive'].append(account_id)
|
||||
|
||||
detail = get_account_detail(account_id, auth)
|
||||
if 'username' in detail:
|
||||
users[email][account_id]['username'] = detail['username']
|
||||
sshkeys = get_account_sshkeys(account_id, auth)
|
||||
if sshkeys:
|
||||
users[email][account_id]['sshkeys'] = True
|
||||
eids = get_account_externalids(account_id, auth)
|
||||
for eid in eids:
|
||||
# We only care about login.ubuntu urls now
|
||||
if 'login.ubuntu' in eid['identity']:
|
||||
r = requests.head(eid['identity'])
|
||||
if r.status_code == 200:
|
||||
# If there is an openid and it is valid we add it
|
||||
# to the list of valid openids
|
||||
users[email][account_id]['openids'].append(eid['identity'])
|
||||
|
||||
check_recent_changes(account_id, users[email][account_id], auth)
|
||||
|
||||
if users[email][account_id]['recently_used']:
|
||||
users[email]['recently_used'].append(account_id)
|
||||
else:
|
||||
users[email]['nonrecently_used'].append(account_id)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
query_user = input('Username: ')
|
||||
query_pass = getpass.getpass('Password: ')
|
||||
if query_user and query_pass:
|
||||
auth = (query_user, query_pass)
|
||||
else:
|
||||
auth = None
|
||||
|
||||
users = read_email_list()
|
||||
get_user_activity(users, auth=auth)
|
||||
|
||||
# TODO there are probably better ways to present this data.
|
||||
print()
|
||||
print('Users with inactive accounts. We may just be able to retire these.'
|
||||
'\nThen remove their external ids.')
|
||||
print('Email active accounts|inactive accounts')
|
||||
for email in users:
|
||||
if users[email]['inactive']:
|
||||
print(email + ' ' + ','.join(users[email]['active']) + '|'
|
||||
+ ','.join(users[email]['inactive']))
|
||||
|
||||
print()
|
||||
print('Users without username, ssh keys, valid openid, and no changes or reviews')
|
||||
print('Email accounts with creds or activity|accounts without creds or activity')
|
||||
for email in users:
|
||||
all_accounts = users[email]['recently_used'] + users[email]['nonrecently_used']
|
||||
accounts_with_creds = []
|
||||
accounts_without_creds = []
|
||||
for account_id in all_accounts:
|
||||
if (not users[email][account_id]['username'] and
|
||||
not users[email][account_id]['sshkeys'] and
|
||||
not users[email][account_id]['recent_change'] and
|
||||
not users[email][account_id]['recent_review'] and
|
||||
not users[email][account_id]['openids']):
|
||||
accounts_without_creds.append(account_id)
|
||||
else:
|
||||
accounts_with_creds.append(account_id)
|
||||
if accounts_without_creds:
|
||||
print(email + ' ' +
|
||||
','.join(accounts_with_creds) + '|' +
|
||||
','.join(accounts_without_creds))
|
||||
|
||||
print()
|
||||
print('Users without username, sshkeys and zero changes pushed or reviews')
|
||||
print('Email accounts with usage|accounts without usage')
|
||||
for email in users:
|
||||
all_accounts = users[email]['recently_used'] + users[email]['nonrecently_used']
|
||||
accounts_with_usage = []
|
||||
accounts_without_usage = []
|
||||
for account_id in all_accounts:
|
||||
if (not users[email][account_id]['username'] and
|
||||
not users[email][account_id]['sshkeys'] and
|
||||
not users[email][account_id]['recent_change'] and
|
||||
not users[email][account_id]['recent_review']):
|
||||
accounts_without_usage.append(account_id)
|
||||
else:
|
||||
accounts_with_usage.append(account_id)
|
||||
if accounts_without_usage:
|
||||
print(email + ' ' +
|
||||
','.join(accounts_with_usage) + '|' +
|
||||
','.join(accounts_without_usage))
|
||||
|
||||
print()
|
||||
print('Non recently used Users without username or ssh keys')
|
||||
print('Email accounts with creds|accounts without creds')
|
||||
for email in users:
|
||||
if not users[email]['recently_used'] and users[email]['nonrecently_used']:
|
||||
accounts_with_creds = []
|
||||
accounts_without_creds = []
|
||||
for account_id in users[email]['nonrecently_used']:
|
||||
if not users[email][account_id]['username'] and \
|
||||
not users[email][account_id]['sshkeys']:
|
||||
accounts_without_creds.append(account_id)
|
||||
else:
|
||||
accounts_with_creds.append(account_id)
|
||||
if not accounts_with_creds == users[email]['nonrecently_used']:
|
||||
print(email + ' ' +
|
||||
','.join(accounts_with_creds) + '|' +
|
||||
','.join(accounts_without_creds))
|
||||
|
||||
print()
|
||||
print('Non recently used Users')
|
||||
print('Email non recent accounts')
|
||||
for email in users:
|
||||
if not users[email]['recently_used'] and users[email]['nonrecently_used']:
|
||||
print(email + ' ' + ','.join(users[email]['nonrecently_used']))
|
||||
|
||||
print()
|
||||
print('Recently used Users')
|
||||
print('Email recent accounts|nonrecent accounts')
|
||||
for email in users:
|
||||
if users[email]['recently_used']:
|
||||
print(email + ' ' + ','.join(users[email]['recently_used']) + '|'
|
||||
+ ','.join(users[email]['nonrecently_used']))
|
||||
|
||||
print()
|
||||
print('Emails that need further investigation')
|
||||
for email in users:
|
||||
if not users[email]['recently_used'] and not users[email]['nonrecently_used']:
|
||||
print(email)
|
@ -0,0 +1,71 @@
|
||||
# This script reads a file with this format:
|
||||
#
|
||||
# email_addr account_id
|
||||
#
|
||||
# It will then remove all external ids with that email addr
|
||||
# in them from the account specified.
|
||||
# Note the account_ids and emails both may be non unique depending
|
||||
# on the gerrit account situation. We iterate over each line in this
|
||||
# file one at a time to avoid problems with deduping in datastructures.
|
||||
|
||||
import getpass
|
||||
import json
|
||||
import requests
|
||||
|
||||
def get_external_ids(account_id, auth):
|
||||
r = requests.get('https://review.opendev.org'
|
||||
'/a/accounts/%s/external.ids' % account_id,
|
||||
auth=auth)
|
||||
# Strip off the gerrit json prefix
|
||||
j = json.loads(r.text[5:])
|
||||
return j
|
||||
|
||||
|
||||
def is_active(account_id, auth):
|
||||
r = requests.get('https://review.opendev.org'
|
||||
'/a/accounts/%s/detail' % account_id,
|
||||
auth=auth)
|
||||
# Strip off the gerrit json prefix
|
||||
j = json.loads(r.text[5:])
|
||||
if 'inactive' in j and j['inactive']:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
query_user = input('Username: ')
|
||||
query_pass = getpass.getpass('Password: ')
|
||||
if query_user and query_pass:
|
||||
auth = (query_user, query_pass)
|
||||
else:
|
||||
print("This script requires authentication")
|
||||
exit(1)
|
||||
|
||||
with open('external_id_cleanups.txt') as f:
|
||||
for line in f:
|
||||
(email, account_id) = line.strip().split()
|
||||
print(email + ' ' + account_id)
|
||||
|
||||
if is_active(account_id, auth):
|
||||
print('This account is active. Skipping.')
|
||||
continue
|
||||
|
||||
j = get_external_ids(account_id, auth)
|
||||
print('external IDs: ' + str(j))
|
||||
|
||||
eids_to_remove = []
|
||||
for eid in j:
|
||||
if 'email_address' in eid and eid['email_address'] == email:
|
||||
eids_to_remove.append(eid['identity'])
|
||||
|
||||
if eids_to_remove:
|
||||
print('Removing these external IDs: ' + str(eids_to_remove))
|
||||
url = 'https://review.opendev.org' \
|
||||
'/a/accounts/%s/external.ids:delete' % account_id
|
||||
print(url)
|
||||
r = requests.post(url, json=eids_to_remove, auth=auth)
|
||||
print(r.status_code)
|
||||
print(r.text)
|
||||
else:
|
||||
print('No matching external ids')
|
47
tools/gerrit-account-inconsistencies/retire-user.sh
Normal file
47
tools/gerrit-account-inconsistencies/retire-user.sh
Normal file
@ -0,0 +1,47 @@
|
||||
# Script to "retire" a gerrit account given its All-Users ref, eg:
|
||||
# refs/users/34/1234
|
||||
# This script should be run within the root of an All-Users repo.
|
||||
#
|
||||
# This will remove the preferred email from the account to fix
|
||||
# issues where the preferred email has no corresponding external id
|
||||
# and set the account to inactive.
|
||||
#
|
||||
# The commit message heredoc should be edited appropriately before
|
||||
# running this script.
|
||||
|
||||
set -ex
|
||||
REF=$1
|
||||
|
||||
git fetch origin $REF
|
||||
git checkout FETCH_HEAD
|
||||
|
||||
sed -i -e '/^\tpreferredEmail = .*/d' account.config
|
||||
# Gerrit accounts are active by default and don't have active record
|
||||
# entries when active.
|
||||
if ! grep 'active = false' account.config ; then
|
||||
echo -e "\tactive = false" >> account.config
|
||||
fi
|
||||
|
||||
git add account.config
|
||||
git commit -F - << EOF
|
||||
Retire this account
|
||||
|
||||
Set the account to inactive and remove its preferred email address.
|
||||
This account appears to be an old style third party CI account. One
|
||||
which the Gerrit admins manually added it as a system account. For
|
||||
a while now we've asked third party CI operators to transition to
|
||||
openid based accounts to reduce our workload. These third party CI
|
||||
systems don't appear currently active and retiring them will fix
|
||||
Gerrit consistency errors. If necessary they can create more modern
|
||||
openid based accounts for their CI systems.
|
||||
|
||||
We are doing this to fix these Gerrit consistency errors:
|
||||
|
||||
Account 'ABXY' has no external ID for its preferred email 'ABXY@example.com'
|
||||
EOF
|
||||
|
||||
#echo '## Verify this commit is correct with git show HEAD'
|
||||
#echo "## If things look good run git push origin HEAD:$REF"
|
||||
git show HEAD
|
||||
git push origin HEAD:$REF
|
||||
|
Loading…
Reference in New Issue
Block a user