add --anonymize flag to the summarize command
This flag can help us look at numbers without being distracted by who is making the contributions, which is useful for understanding if a single organization is contributing disproportionately relative to others. Change-Id: I6fcaf4cc19441f162ec6b9c6f3ee8bd780878a76 Signed-off-by: Doug Hellmann <doug@doughellmann.com>
This commit is contained in:
parent
f9c70b6751
commit
b9aae47c6c
@ -64,3 +64,58 @@ class TestSummarizeBy(base.TestCase):
|
||||
('A',): 2,
|
||||
}
|
||||
self.assertEqual(expected, results)
|
||||
|
||||
|
||||
class TestAnonymize(base.TestCase):
|
||||
|
||||
def test_anonymizer(self):
|
||||
a = summarize.Anonymizer('Field')
|
||||
self.assertEqual('Field 1', a('anything'))
|
||||
self.assertEqual('Field 1', a('anything'))
|
||||
self.assertEqual('Field 2', a('anything else'))
|
||||
self.assertEqual('Field 2', a('anything else'))
|
||||
|
||||
def test_not_needed(self):
|
||||
original = [('a', 'b', 1)]
|
||||
group_by = ('Field1', 'Field2')
|
||||
actual = list(summarize.anonymize(group_by, original))
|
||||
self.assertEqual(original, actual)
|
||||
|
||||
def test_organization(self):
|
||||
original = [
|
||||
('a', 'b', 2),
|
||||
('c', 'd', 1),
|
||||
]
|
||||
group_by = ['Organization', 'Field2']
|
||||
expected = [
|
||||
('Organization 1', 'b', 2),
|
||||
('Organization 2', 'd', 1),
|
||||
]
|
||||
actual = list(summarize.anonymize(group_by, original))
|
||||
self.assertEqual(expected, actual)
|
||||
|
||||
def test_name(self):
|
||||
original = [
|
||||
('a', 'b', 2),
|
||||
('c', 'd', 1),
|
||||
]
|
||||
group_by = ['Field1', 'Name']
|
||||
expected = [
|
||||
('a', 'Name 1', 2),
|
||||
('c', 'Name 2', 1),
|
||||
]
|
||||
actual = list(summarize.anonymize(group_by, original))
|
||||
self.assertEqual(expected, actual)
|
||||
|
||||
def test_email(self):
|
||||
original = [
|
||||
('a', 'b', 2),
|
||||
('c', 'd', 1),
|
||||
]
|
||||
group_by = ['Field1', 'Email']
|
||||
expected = [
|
||||
('a', 'Email 1', 2),
|
||||
('c', 'Email 2', 1),
|
||||
]
|
||||
actual = list(summarize.anonymize(group_by, original))
|
||||
self.assertEqual(expected, actual)
|
||||
|
@ -11,6 +11,7 @@
|
||||
# under the License.
|
||||
|
||||
import collections
|
||||
import itertools
|
||||
import logging
|
||||
|
||||
from goal_tools.who_helped import contributions
|
||||
@ -33,6 +34,41 @@ def _count_distinct(by_names, to_count, data_source):
|
||||
return {k: len(v) for k, v in counts.items()}
|
||||
|
||||
|
||||
class Anonymizer:
|
||||
"Track unique values for a field while masking them."
|
||||
|
||||
def __init__(self, field):
|
||||
self.field = field
|
||||
self.cache = {}
|
||||
self.counter = itertools.count(1)
|
||||
|
||||
def __repr__(self):
|
||||
return 'Anonymizer({!r})'.format(self.field)
|
||||
|
||||
def __call__(self, value):
|
||||
if value not in self.cache:
|
||||
anon = '{} {}'.format(self.field, next(self.counter))
|
||||
self.cache[value] = anon
|
||||
return self.cache[value]
|
||||
|
||||
|
||||
def anonymize(group_by, data):
|
||||
"Turn the fields with identifying information into anonymous strings."
|
||||
generators = {
|
||||
'Organization': Anonymizer('Organization'),
|
||||
'Name': Anonymizer('Name'),
|
||||
'Email': Anonymizer('Email'),
|
||||
}
|
||||
modifiers = [
|
||||
generators.get(field, lambda x: x)
|
||||
for field in group_by
|
||||
]
|
||||
modifiers.append(lambda x: x) # for the count field
|
||||
for row in data:
|
||||
new_row = tuple(m(r) for m, r in zip(modifiers, row))
|
||||
yield new_row
|
||||
|
||||
|
||||
class SummarizeContributions(report.ContributionsReportBase):
|
||||
"Summarize a contribution report."
|
||||
|
||||
@ -54,6 +90,13 @@ class SummarizeContributions(report.ContributionsReportBase):
|
||||
help=('combination of unique values to count '
|
||||
'(may be repeated), defaults to counting each contribution'),
|
||||
)
|
||||
parser.add_argument(
|
||||
'--anonymize', '--anon',
|
||||
dest='anonymize',
|
||||
default=False,
|
||||
action='store_true',
|
||||
help='mask organization and personal identifying information',
|
||||
)
|
||||
return parser
|
||||
|
||||
def take_action(self, parsed_args):
|
||||
@ -74,6 +117,9 @@ class SummarizeContributions(report.ContributionsReportBase):
|
||||
key=lambda x: (x[-1], x[:-1]), # by count first
|
||||
))
|
||||
|
||||
if parsed_args.anonymize:
|
||||
output_rows = anonymize(group_by, output_rows)
|
||||
|
||||
columns = tuple(group_by) + (to_count_column,)
|
||||
|
||||
return (columns, output_rows)
|
||||
|
Loading…
Reference in New Issue
Block a user