Foldes SUBSITITUTIONS into RE_SUBSTITUTIONS.

This commit is contained in:
John Wiseman 2019-12-27 16:42:36 -08:00
parent 7d983c6910
commit 259159e838

View File

@ -68,12 +68,9 @@ NOT_TITLE_CASE = [
TITLE_CASE_EXCEPTION_RE = re.compile('[0-9]') TITLE_CASE_EXCEPTION_RE = re.compile('[0-9]')
SUBSTITUTIONS = {
'mcdonnell': 'McDonnell'
}
RE_SUBSTITUTIONS = [ RE_SUBSTITUTIONS = [
# "mcdonnell" -> "McDonnell"
[re.compile(r'\bmcdonnell\b', re.IGNORECASE), 'McDonnell'],
# "AS.350-B-1" -> AS 350 B1" # "AS.350-B-1" -> AS 350 B1"
[re.compile(r'\bAS.?350.?B.?1'), 'AS 350 B1'], [re.compile(r'\bAS.?350.?B.?1'), 'AS 350 B1'],
[re.compile(r'\bAS.?350.?B.?2'), 'AS 350 B2'], [re.compile(r'\bAS.?350.?B.?2'), 'AS 350 B2'],
@ -106,15 +103,15 @@ def title_case(s):
return s.title() return s.title()
# TODO: MCDONNELL -> McDonnell
def fix_type(s): def fix_type(s):
orig_s = s
if s is not None: if s is not None:
tokens = [p for p in s.split(' ') if p] tokens = [p for p in s.split(' ') if p]
tokens = [title_case(t) for t in tokens] tokens = [title_case(t) for t in tokens]
tokens = [SUBSTITUTIONS.get(t.lower(), t) for t in tokens]
s = ' '.join(tokens) s = ' '.join(tokens)
s = do_re_substitutions(s) s = do_re_substitutions(s)
# if s != orig_s:
# print([orig_s, s])
return s return s