diff --git a/etc/stackalytics.conf b/etc/stackalytics.conf index 847a15fca..370ad532d 100644 --- a/etc/stackalytics.conf +++ b/etc/stackalytics.conf @@ -58,3 +58,6 @@ # Name of file to store python profiler data. This option works for dashboard only # collect_profiler_stats = + +# How many member profiles to look ahead after the last +# members_look_ahead = 250 diff --git a/stackalytics/processor/config.py b/stackalytics/processor/config.py index 476bcb2eb..3218f7ecb 100644 --- a/stackalytics/processor/config.py +++ b/stackalytics/processor/config.py @@ -65,4 +65,6 @@ OPTS = [ cfg.StrOpt('collect-profiler-stats', help='Name of file to store python profiler data. This option ' 'works for dashboard only'), + cfg.IntOpt('members-look-ahead', default=250, + help='How many member profiles to look ahead after the last'), ] diff --git a/stackalytics/processor/main.py b/stackalytics/processor/main.py index b1773b461..b55597674 100644 --- a/stackalytics/processor/main.py +++ b/stackalytics/processor/main.py @@ -175,7 +175,8 @@ def _process_mail_list(uri, runtime_storage_inst, record_processor_inst): def _process_member_list(uri, runtime_storage_inst, record_processor_inst): member_iterator = mps.log(uri, runtime_storage_inst, - cfg.CONF.days_to_update_members) + cfg.CONF.days_to_update_members, + cfg.CONF.members_look_ahead) member_iterator_typed = _record_typer(member_iterator, 'member') processed_member_iterator = record_processor_inst.process( member_iterator_typed) diff --git a/stackalytics/processor/mps.py b/stackalytics/processor/mps.py index f6ca05fde..b4be2a9e5 100644 --- a/stackalytics/processor/mps.py +++ b/stackalytics/processor/mps.py @@ -29,8 +29,6 @@ NAME_AND_DATE_PATTERN = r'

(?P[^<]*)[\s\S]*?' \ COMPANY_PATTERN = r'Date\sJoined[\s\S]*?(?P[^<]*)' \ r'[\s\S]*?From\s(?P[\s\S]*?)\(Current\)' -CNT_EMPTY_MEMBERS = 50 - def _convert_str_fields_to_unicode(result): for field, value in six.iteritems(result): @@ -69,7 +67,7 @@ def _retrieve_member(uri, member_id, html_parser): return member -def log(uri, runtime_storage_inst, days_to_update_members): +def log(uri, runtime_storage_inst, days_to_update_members, members_look_ahead): LOG.debug('Retrieving new openstack.org members') last_update_members_date = runtime_storage_inst.get_by_key( @@ -90,7 +88,7 @@ def log(uri, runtime_storage_inst, days_to_update_members): cur_index = last_member_index + 1 html_parser = six.moves.html_parser.HTMLParser() - while cnt_empty < CNT_EMPTY_MEMBERS: + while cnt_empty < members_look_ahead: profile_uri = uri + str(cur_index) member = _retrieve_member(profile_uri, str(cur_index), html_parser)