diff --git a/cleanbasestation.py b/cleanbasestation.py index 0c420dc..7ec6841 100644 --- a/cleanbasestation.py +++ b/cleanbasestation.py @@ -42,6 +42,9 @@ def process_changes(db_path, queue): CITY_STATE_CLEAN_RE = re.compile(r' +- +[a-zA-Z0-9 ]+, [A-Za-z]{2}$') + +# DO title-case these tokens. + TITLE_CASE = [ 'AIR', 'CO', @@ -55,6 +58,8 @@ TITLE_CASE = [ 'SAN' ] +# DO NOT title-case these tokens. + NOT_TITLE_CASE = [ 'TIS-B' ] @@ -62,6 +67,12 @@ NOT_TITLE_CASE = [ TITLE_CASE_EXCEPTION_RE = re.compile('[0-9]') + +SUBSTITUTIONS = { + 'mcdonnell': 'McDonnell' +} + + def contains_upper_and_lower(s): return any(c.isupper() for c in s) and any(c.islower() for c in s) @@ -82,6 +93,7 @@ def fix_type(s): if s is not None: tokens = [p for p in s.split(' ') if p] tokens = [title_case(t) for t in tokens] + tokens = [SUBSTITUTIONS.get(t.lower(), t) for t in tokens] s = ' '.join(tokens) return s