Change regexp to named placeholders

Change-Id: Ib39c605c8071833a44de3398583f6160fec775c2
This commit is contained in:
Herman Narkaytis
2013-09-25 17:04:46 +04:00
parent 2ea04f368b
commit 8820e840cf
4 changed files with 16 additions and 35 deletions

View File

@@ -912,7 +912,6 @@ def get_commit_report(records):
@exception_handler() @exception_handler()
def blueprint_report(module, blueprint_name): def blueprint_report(module, blueprint_name):
memory_storage_inst = get_vault()['memory_storage'] memory_storage_inst = get_vault()['memory_storage']
runtime_storage_inst = get_vault()['runtime_storage']
blueprint_id = module + ':' + blueprint_name blueprint_id = module + ':' + blueprint_name
@@ -931,9 +930,6 @@ def blueprint_report(module, blueprint_name):
activity = [] activity = []
for record in memory_storage_inst.get_records(record_ids): for record in memory_storage_inst.get_records(record_ids):
_extend_record(record) _extend_record(record)
if record['record_type'] == 'email':
record['body'] = (runtime_storage_inst.get_by_key('email:%s' %
record['primary_key']))
activity.append(record) activity.append(record)
activity.sort(key=lambda x: x['date']) activity.sort(key=lambda x: x['date'])

View File

@@ -80,9 +80,9 @@ def process_repo(repo, runtime_storage_inst, record_processor_inst):
bp_iterator = lp.log(repo) bp_iterator = lp.log(repo)
bp_iterator_typed = _record_typer(bp_iterator, 'bp') bp_iterator_typed = _record_typer(bp_iterator, 'bp')
processed_mail_iterator = record_processor_inst.process( processed_bp_iterator = record_processor_inst.process(
bp_iterator_typed) bp_iterator_typed)
runtime_storage_inst.set_records(processed_mail_iterator) runtime_storage_inst.set_records(processed_bp_iterator)
vcs_inst = vcs.get_vcs(repo, cfg.CONF.sources_root) vcs_inst = vcs.get_vcs(repo, cfg.CONF.sources_root)
vcs_inst.fetch() vcs_inst.fetch()

View File

@@ -33,11 +33,12 @@ EMAIL_HEADER_PATTERN = ('From \S+(?: at \S+)?\s+'
MAIL_BOX_PATTERN = re.compile( MAIL_BOX_PATTERN = re.compile(
'^' + EMAIL_HEADER_PATTERN + '^' + EMAIL_HEADER_PATTERN +
'From: (\S+(?: at \S+))(?:\W+(\w+(?:\s\w+)*))?.*?\n' 'From: (?P<author_email>\S+(?: at \S+))'
'Date: (.*?)\n' '(?:\W+(?P<author_name>\w+(?:\s\w+)*))?.*?\n'
'Subject: (.*?)(?=\n\S+:)' 'Date: (?P<date>.*?)\n'
'.*?Message-ID: (\S+)\n' 'Subject: (?P<subject>.*?)(?=\n\S+:)'
'\n(.*?)\n' '.*?Message-ID: (?P<message_id>\S+)\n'
'\n(?P<body>.*?)\n'
'(?=' + EMAIL_HEADER_PATTERN + 'From: )', '(?=' + EMAIL_HEADER_PATTERN + 'From: )',
flags=re.MULTILINE | re.DOTALL) flags=re.MULTILINE | re.DOTALL)
@@ -90,30 +91,17 @@ def _retrieve_mails(uri):
content += TRAILING_RECORD content += TRAILING_RECORD
for rec in re.finditer(MAIL_BOX_PATTERN, content): for rec in re.finditer(MAIL_BOX_PATTERN, content):
email = rec.groupdict()
author_email = rec.group(1).replace(' at ', '@', 1) email['author_email'] = email['author_email'].replace(' at ', '@', 1)
if not utils.check_email_validity(author_email): if not utils.check_email_validity(email['author_email']):
continue continue
author_name = rec.group(2) email['date'] = int(email_utils.mktime_tz(
date = int(email_utils.mktime_tz( email_utils.parsedate_tz(email['date'])))
email_utils.parsedate_tz(rec.group(3))))
subject = rec.group(4)
message_id = rec.group(5)
body = rec.group(6)
email = {
'message_id': message_id,
'author_name': author_name,
'author_email': author_email,
'subject': subject,
'date': date,
'body': body,
}
for pattern_name, pattern in MESSAGE_PATTERNS.iteritems(): for pattern_name, pattern in MESSAGE_PATTERNS.iteritems():
collection = set() collection = set()
for item in re.finditer(pattern, body): for item in re.finditer(pattern, email['body']):
groups = item.groupdict() groups = item.groupdict()
item_id = groups['id'] item_id = groups['id']
if 'module' in groups: if 'module' in groups:

View File

@@ -282,11 +282,8 @@ class RecordProcessor(object):
self._update_record_and_user(record) self._update_record_and_user(record)
self._guess_module(record) self._guess_module(record)
if record.get('blueprint_id'): if not record.get('blueprint_id'):
self.runtime_storage_inst.set_by_key( del record['body']
'email:%s' % record['primary_key'], record['body'])
del record['body']
yield record yield record