add --anonymize flag to the summarize command
This flag can help us look at numbers without being distracted by who is making the contributions, which is useful for understanding if a single organization is contributing disproportionately relative to others. Change-Id: I6fcaf4cc19441f162ec6b9c6f3ee8bd780878a76 Signed-off-by: Doug Hellmann <doug@doughellmann.com>
This commit is contained in:
parent
f9c70b6751
commit
b9aae47c6c
@ -64,3 +64,58 @@ class TestSummarizeBy(base.TestCase):
|
|||||||
('A',): 2,
|
('A',): 2,
|
||||||
}
|
}
|
||||||
self.assertEqual(expected, results)
|
self.assertEqual(expected, results)
|
||||||
|
|
||||||
|
|
||||||
|
class TestAnonymize(base.TestCase):
|
||||||
|
|
||||||
|
def test_anonymizer(self):
|
||||||
|
a = summarize.Anonymizer('Field')
|
||||||
|
self.assertEqual('Field 1', a('anything'))
|
||||||
|
self.assertEqual('Field 1', a('anything'))
|
||||||
|
self.assertEqual('Field 2', a('anything else'))
|
||||||
|
self.assertEqual('Field 2', a('anything else'))
|
||||||
|
|
||||||
|
def test_not_needed(self):
|
||||||
|
original = [('a', 'b', 1)]
|
||||||
|
group_by = ('Field1', 'Field2')
|
||||||
|
actual = list(summarize.anonymize(group_by, original))
|
||||||
|
self.assertEqual(original, actual)
|
||||||
|
|
||||||
|
def test_organization(self):
|
||||||
|
original = [
|
||||||
|
('a', 'b', 2),
|
||||||
|
('c', 'd', 1),
|
||||||
|
]
|
||||||
|
group_by = ['Organization', 'Field2']
|
||||||
|
expected = [
|
||||||
|
('Organization 1', 'b', 2),
|
||||||
|
('Organization 2', 'd', 1),
|
||||||
|
]
|
||||||
|
actual = list(summarize.anonymize(group_by, original))
|
||||||
|
self.assertEqual(expected, actual)
|
||||||
|
|
||||||
|
def test_name(self):
|
||||||
|
original = [
|
||||||
|
('a', 'b', 2),
|
||||||
|
('c', 'd', 1),
|
||||||
|
]
|
||||||
|
group_by = ['Field1', 'Name']
|
||||||
|
expected = [
|
||||||
|
('a', 'Name 1', 2),
|
||||||
|
('c', 'Name 2', 1),
|
||||||
|
]
|
||||||
|
actual = list(summarize.anonymize(group_by, original))
|
||||||
|
self.assertEqual(expected, actual)
|
||||||
|
|
||||||
|
def test_email(self):
|
||||||
|
original = [
|
||||||
|
('a', 'b', 2),
|
||||||
|
('c', 'd', 1),
|
||||||
|
]
|
||||||
|
group_by = ['Field1', 'Email']
|
||||||
|
expected = [
|
||||||
|
('a', 'Email 1', 2),
|
||||||
|
('c', 'Email 2', 1),
|
||||||
|
]
|
||||||
|
actual = list(summarize.anonymize(group_by, original))
|
||||||
|
self.assertEqual(expected, actual)
|
||||||
|
@ -11,6 +11,7 @@
|
|||||||
# under the License.
|
# under the License.
|
||||||
|
|
||||||
import collections
|
import collections
|
||||||
|
import itertools
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from goal_tools.who_helped import contributions
|
from goal_tools.who_helped import contributions
|
||||||
@ -33,6 +34,41 @@ def _count_distinct(by_names, to_count, data_source):
|
|||||||
return {k: len(v) for k, v in counts.items()}
|
return {k: len(v) for k, v in counts.items()}
|
||||||
|
|
||||||
|
|
||||||
|
class Anonymizer:
|
||||||
|
"Track unique values for a field while masking them."
|
||||||
|
|
||||||
|
def __init__(self, field):
|
||||||
|
self.field = field
|
||||||
|
self.cache = {}
|
||||||
|
self.counter = itertools.count(1)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return 'Anonymizer({!r})'.format(self.field)
|
||||||
|
|
||||||
|
def __call__(self, value):
|
||||||
|
if value not in self.cache:
|
||||||
|
anon = '{} {}'.format(self.field, next(self.counter))
|
||||||
|
self.cache[value] = anon
|
||||||
|
return self.cache[value]
|
||||||
|
|
||||||
|
|
||||||
|
def anonymize(group_by, data):
|
||||||
|
"Turn the fields with identifying information into anonymous strings."
|
||||||
|
generators = {
|
||||||
|
'Organization': Anonymizer('Organization'),
|
||||||
|
'Name': Anonymizer('Name'),
|
||||||
|
'Email': Anonymizer('Email'),
|
||||||
|
}
|
||||||
|
modifiers = [
|
||||||
|
generators.get(field, lambda x: x)
|
||||||
|
for field in group_by
|
||||||
|
]
|
||||||
|
modifiers.append(lambda x: x) # for the count field
|
||||||
|
for row in data:
|
||||||
|
new_row = tuple(m(r) for m, r in zip(modifiers, row))
|
||||||
|
yield new_row
|
||||||
|
|
||||||
|
|
||||||
class SummarizeContributions(report.ContributionsReportBase):
|
class SummarizeContributions(report.ContributionsReportBase):
|
||||||
"Summarize a contribution report."
|
"Summarize a contribution report."
|
||||||
|
|
||||||
@ -54,6 +90,13 @@ class SummarizeContributions(report.ContributionsReportBase):
|
|||||||
help=('combination of unique values to count '
|
help=('combination of unique values to count '
|
||||||
'(may be repeated), defaults to counting each contribution'),
|
'(may be repeated), defaults to counting each contribution'),
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--anonymize', '--anon',
|
||||||
|
dest='anonymize',
|
||||||
|
default=False,
|
||||||
|
action='store_true',
|
||||||
|
help='mask organization and personal identifying information',
|
||||||
|
)
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
def take_action(self, parsed_args):
|
def take_action(self, parsed_args):
|
||||||
@ -74,6 +117,9 @@ class SummarizeContributions(report.ContributionsReportBase):
|
|||||||
key=lambda x: (x[-1], x[:-1]), # by count first
|
key=lambda x: (x[-1], x[:-1]), # by count first
|
||||||
))
|
))
|
||||||
|
|
||||||
|
if parsed_args.anonymize:
|
||||||
|
output_rows = anonymize(group_by, output_rows)
|
||||||
|
|
||||||
columns = tuple(group_by) + (to_count_column,)
|
columns = tuple(group_by) + (to_count_column,)
|
||||||
|
|
||||||
return (columns, output_rows)
|
return (columns, output_rows)
|
||||||
|
Loading…
Reference in New Issue
Block a user