Fixed processing company-names for members
1) Erasing commas 2) Changing sequence of spaces into one space 3) Added three aliases Change-Id: Ie7e15f73c027943e43a17a9b6245ad2fd1f6f36a
This commit is contained in:
parent
86a8393d6a
commit
e40cb6857c
|
@ -5879,7 +5879,7 @@
|
||||||
{
|
{
|
||||||
"domains": [""],
|
"domains": [""],
|
||||||
"company_name": "*independent",
|
"company_name": "*independent",
|
||||||
"aliases": ["None", "Non", "l-", ".", "****", "1", "aaa", "-", "dsadsadsadsad", "I dont have one", "company", "n/a", "Self", "Student", "home", "Home Based", "Independent", "Independen", "Independant", "MyHome", "HomeOffice", "Self Employeed", "Self Employed", "myself", "Self-employeed", "individual", "Individual Contributor", "Unaffiliated", "没有", "Null", "Univerisity", "fsfsf", "xxx"]
|
"aliases": ["None", "Non", "l-", ".", "****", "1", "aaa", "-", "dsadsadsadsad", "I dont have one", "company", "n/a", "Self", "Student", "home", "Home Based", "Independent", "Independen", "Independant", "MyHome", "HomeOffice", "Self Employeed", "Self Employed", "myself", "Self-employeed", "individual", "Individual Contributor", "Unaffiliated", "没有", "Null", "Univerisity", "fsfsf", "xxx", "no job"]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"domains": ["360.cn"],
|
"domains": ["360.cn"],
|
||||||
|
@ -6229,7 +6229,7 @@
|
||||||
{
|
{
|
||||||
"domains": ["hp.com"],
|
"domains": ["hp.com"],
|
||||||
"company_name": "HP",
|
"company_name": "HP",
|
||||||
"aliases": ["HP Cloud", "HP ES GD China", "HP, IBM", "HP Software", "HP Storage Division", "Hewlett Packard", "Hewlett-Packard Company", "Hewlett-Packard", "Hewllet-Packard", "HP R and D", "HP Cloud OS", "HP Networking", "hewelett-packard company", "HewlettPackard", "Hewlett-Pack"]
|
"aliases": ["HP Cloud", "HP ES GD China", "HP, IBM", "HP Software", "HP Storage Division", "Hewlett Packard", "Hewlett-Packard Company", "Hewlett-Packard", "Hewllet-Packard", "HP R and D", "HP Cloud OS", "HP Networking", "hewelett-packard company", "HewlettPackard", "Hewlett-Pack", "Hewlitt-Packard"]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"domains": ["huawei.com"],
|
"domains": ["huawei.com"],
|
||||||
|
@ -6239,7 +6239,7 @@
|
||||||
{
|
{
|
||||||
"domains": ["ibm.com", "linux.vnet.ibm.com"],
|
"domains": ["ibm.com", "linux.vnet.ibm.com"],
|
||||||
"company_name": "IBM",
|
"company_name": "IBM",
|
||||||
"aliases": ["IBM Australia", "IBM Canada", "IBM Canada Ltd", "IBM China", "IBM Corporation", "IBM India Pvt Ltd", "IBM India Pvt. Ltd.", "IBM Japan, Ltd.", "IBM Research", "IBM Research - China", "IBM Research Lab, India", "IBM Deutschland Research & Development GmbH", "International Business Machines Corporation"]
|
"aliases": ["IBM Australia", "IBM Canada", "IBM Canada Ltd", "IBM China", "IBM Corporation", "IBM India Pvt Ltd", "IBM India Pvt. Ltd.", "IBM Japan, Ltd.", "IBM Research", "IBM Research - China", "IBM Research Lab, India", "IBM Deutschland Research & Development GmbH", "International Business Machines Corporation", "IBM UK Ltd"]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"domains": ["ifca.unican.es"],
|
"domains": ["ifca.unican.es"],
|
||||||
|
|
|
@ -179,7 +179,8 @@ def _get_changed_member_records(runtime_storage_inst, record_processor_inst):
|
||||||
if record['record_type'] == 'member' and 'company_name' in record:
|
if record['record_type'] == 'member' and 'company_name' in record:
|
||||||
company_draft = record['company_draft']
|
company_draft = record['company_draft']
|
||||||
company_name = record_processor_inst.domains_index.get(
|
company_name = record_processor_inst.domains_index.get(
|
||||||
utils.normalize_company_name(company_draft)) or company_draft
|
utils.normalize_company_name(company_draft)) or (
|
||||||
|
utils.normalize_company_draft(company_draft))
|
||||||
|
|
||||||
if company_name != record['company_name']:
|
if company_name != record['company_name']:
|
||||||
record['company_name'] = company_name
|
record['company_name'] = company_name
|
||||||
|
|
|
@ -426,7 +426,7 @@ class RecordProcessor(object):
|
||||||
company_draft = record['company_draft']
|
company_draft = record['company_draft']
|
||||||
|
|
||||||
company_name = self.domains_index.get(utils.normalize_company_name(
|
company_name = self.domains_index.get(utils.normalize_company_name(
|
||||||
company_draft)) or company_draft
|
company_draft)) or (utils.normalize_company_draft(company_draft))
|
||||||
|
|
||||||
# author_email is a key to create new user
|
# author_email is a key to create new user
|
||||||
record['author_email'] = user_id
|
record['author_email'] = user_id
|
||||||
|
|
|
@ -210,3 +210,9 @@ def normalize_company_name(name):
|
||||||
regex += '|' + '((^|\\s)(' + '|'.join(BAD_NAME_SUFFIXES_WITH_STOPS) + '))'
|
regex += '|' + '((^|\\s)(' + '|'.join(BAD_NAME_SUFFIXES_WITH_STOPS) + '))'
|
||||||
name = re.sub(re.compile(regex, re.IGNORECASE), '', name)
|
name = re.sub(re.compile(regex, re.IGNORECASE), '', name)
|
||||||
return ''.join([c.lower() for c in name if c.isalnum()])
|
return ''.join([c.lower() for c in name if c.isalnum()])
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_company_draft(name):
|
||||||
|
name = re.sub(',', ' ', name)
|
||||||
|
name = re.sub(r'\s+', ' ', name)
|
||||||
|
return name
|
||||||
|
|
Loading…
Reference in New Issue