Fix McDonnell.

This commit is contained in:
John Wiseman 2019-12-18 23:06:58 -08:00
parent 6b01d921d9
commit c2a56ce98b

View File

@ -42,6 +42,9 @@ def process_changes(db_path, queue):
CITY_STATE_CLEAN_RE = re.compile(r' +- +[a-zA-Z0-9 ]+, [A-Za-z]{2}$') CITY_STATE_CLEAN_RE = re.compile(r' +- +[a-zA-Z0-9 ]+, [A-Za-z]{2}$')
# DO title-case these tokens.
TITLE_CASE = [ TITLE_CASE = [
'AIR', 'AIR',
'CO', 'CO',
@ -55,6 +58,8 @@ TITLE_CASE = [
'SAN' 'SAN'
] ]
# DO NOT title-case these tokens.
NOT_TITLE_CASE = [ NOT_TITLE_CASE = [
'TIS-B' 'TIS-B'
] ]
@ -62,6 +67,12 @@ NOT_TITLE_CASE = [
TITLE_CASE_EXCEPTION_RE = re.compile('[0-9]') TITLE_CASE_EXCEPTION_RE = re.compile('[0-9]')
SUBSTITUTIONS = {
'mcdonnell': 'McDonnell'
}
def contains_upper_and_lower(s): def contains_upper_and_lower(s):
return any(c.isupper() for c in s) and any(c.islower() for c in s) return any(c.isupper() for c in s) and any(c.islower() for c in s)
@ -82,6 +93,7 @@ def fix_type(s):
if s is not None: if s is not None:
tokens = [p for p in s.split(' ') if p] tokens = [p for p in s.split(' ') if p]
tokens = [title_case(t) for t in tokens] tokens = [title_case(t) for t in tokens]
tokens = [SUBSTITUTIONS.get(t.lower(), t) for t in tokens]
s = ' '.join(tokens) s = ' '.join(tokens)
return s return s