Fixed processing company-names for members
1) Erasing commas 2) Changing sequence of spaces into one space 3) Added three aliases Change-Id: Ie7e15f73c027943e43a17a9b6245ad2fd1f6f36a
This commit is contained in:
parent
86a8393d6a
commit
e40cb6857c
|
@ -5879,7 +5879,7 @@
|
|||
{
|
||||
"domains": [""],
|
||||
"company_name": "*independent",
|
||||
"aliases": ["None", "Non", "l-", ".", "****", "1", "aaa", "-", "dsadsadsadsad", "I dont have one", "company", "n/a", "Self", "Student", "home", "Home Based", "Independent", "Independen", "Independant", "MyHome", "HomeOffice", "Self Employeed", "Self Employed", "myself", "Self-employeed", "individual", "Individual Contributor", "Unaffiliated", "没有", "Null", "Univerisity", "fsfsf", "xxx"]
|
||||
"aliases": ["None", "Non", "l-", ".", "****", "1", "aaa", "-", "dsadsadsadsad", "I dont have one", "company", "n/a", "Self", "Student", "home", "Home Based", "Independent", "Independen", "Independant", "MyHome", "HomeOffice", "Self Employeed", "Self Employed", "myself", "Self-employeed", "individual", "Individual Contributor", "Unaffiliated", "没有", "Null", "Univerisity", "fsfsf", "xxx", "no job"]
|
||||
},
|
||||
{
|
||||
"domains": ["360.cn"],
|
||||
|
@ -6229,7 +6229,7 @@
|
|||
{
|
||||
"domains": ["hp.com"],
|
||||
"company_name": "HP",
|
||||
"aliases": ["HP Cloud", "HP ES GD China", "HP, IBM", "HP Software", "HP Storage Division", "Hewlett Packard", "Hewlett-Packard Company", "Hewlett-Packard", "Hewllet-Packard", "HP R and D", "HP Cloud OS", "HP Networking", "hewelett-packard company", "HewlettPackard", "Hewlett-Pack"]
|
||||
"aliases": ["HP Cloud", "HP ES GD China", "HP, IBM", "HP Software", "HP Storage Division", "Hewlett Packard", "Hewlett-Packard Company", "Hewlett-Packard", "Hewllet-Packard", "HP R and D", "HP Cloud OS", "HP Networking", "hewelett-packard company", "HewlettPackard", "Hewlett-Pack", "Hewlitt-Packard"]
|
||||
},
|
||||
{
|
||||
"domains": ["huawei.com"],
|
||||
|
@ -6239,7 +6239,7 @@
|
|||
{
|
||||
"domains": ["ibm.com", "linux.vnet.ibm.com"],
|
||||
"company_name": "IBM",
|
||||
"aliases": ["IBM Australia", "IBM Canada", "IBM Canada Ltd", "IBM China", "IBM Corporation", "IBM India Pvt Ltd", "IBM India Pvt. Ltd.", "IBM Japan, Ltd.", "IBM Research", "IBM Research - China", "IBM Research Lab, India", "IBM Deutschland Research & Development GmbH", "International Business Machines Corporation"]
|
||||
"aliases": ["IBM Australia", "IBM Canada", "IBM Canada Ltd", "IBM China", "IBM Corporation", "IBM India Pvt Ltd", "IBM India Pvt. Ltd.", "IBM Japan, Ltd.", "IBM Research", "IBM Research - China", "IBM Research Lab, India", "IBM Deutschland Research & Development GmbH", "International Business Machines Corporation", "IBM UK Ltd"]
|
||||
},
|
||||
{
|
||||
"domains": ["ifca.unican.es"],
|
||||
|
|
|
@ -179,7 +179,8 @@ def _get_changed_member_records(runtime_storage_inst, record_processor_inst):
|
|||
if record['record_type'] == 'member' and 'company_name' in record:
|
||||
company_draft = record['company_draft']
|
||||
company_name = record_processor_inst.domains_index.get(
|
||||
utils.normalize_company_name(company_draft)) or company_draft
|
||||
utils.normalize_company_name(company_draft)) or (
|
||||
utils.normalize_company_draft(company_draft))
|
||||
|
||||
if company_name != record['company_name']:
|
||||
record['company_name'] = company_name
|
||||
|
|
|
@ -426,7 +426,7 @@ class RecordProcessor(object):
|
|||
company_draft = record['company_draft']
|
||||
|
||||
company_name = self.domains_index.get(utils.normalize_company_name(
|
||||
company_draft)) or company_draft
|
||||
company_draft)) or (utils.normalize_company_draft(company_draft))
|
||||
|
||||
# author_email is a key to create new user
|
||||
record['author_email'] = user_id
|
||||
|
|
|
@ -210,3 +210,9 @@ def normalize_company_name(name):
|
|||
regex += '|' + '((^|\\s)(' + '|'.join(BAD_NAME_SUFFIXES_WITH_STOPS) + '))'
|
||||
name = re.sub(re.compile(regex, re.IGNORECASE), '', name)
|
||||
return ''.join([c.lower() for c in name if c.isalnum()])
|
||||
|
||||
|
||||
def normalize_company_draft(name):
|
||||
name = re.sub(',', ' ', name)
|
||||
name = re.sub(r'\s+', ' ', name)
|
||||
return name
|
||||
|
|
Loading…
Reference in New Issue