Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import sys, re, collections
- months_str = "January|February|March|April|May|June|July|August|September|October|November|December"
- months_short = '|'.join(s[:3] for s in months_str.split("|"))
- day = "((0\d|[1-9])|[1-3]\d)(th|st|nd|rd)?"
- of = "(\ |\ of\ )"
- mon_fmt = "(%s|%s)" % (months_str, months_short)
- year = "(\d|\d\d|\d{4})"
- opt_comma = "(\,\s|\,|\s)"
- parts = {'day':day, 'of':of, 'mon_fmt':mon_fmt, 'year':year, 'comma': opt_comma}
- # 12th of Dec, 1991
- date1_re = r"(\b%(day)s%(of)s%(mon_fmt)s%(comma)s?%(year)s\b)" % parts
- date1_compiled = re.compile(date1_re, re.IGNORECASE)
- # 12/12/1991
- date2_re = r"(\b%(year)s[-./](\d|\d\d)[-./]%(year)s\b)" % parts
- date2_compiled = re.compile(date2_re, re.IGNORECASE)
- # Dec 12, 1991
- date3_re = r"(\b%(mon_fmt)s %(day)s%(comma)s%(year)s\b)" % parts
- date3_compiled = re.compile(date3_re, re.IGNORECASE)
- def find_dates(test):
- dates = []
- num_dates = 0
- for m in re.findall(date1_compiled, test):
- date = '1', m[0]
- num_dates += 1
- for m in re.findall(date2_compiled, test):
- date = '2', m[0]
- num_dates += 1
- for m in re.findall(date3_compiled, test):
- date = '3', m[0]
- num_dates += 1
- return num_dates
- tests = ["December 16, 1773", ".December 16, 1773.", "On June 16, 1773 ", " Jan 16th, 1773",
- "December 1st, 1773", ",December 2nd, 1773,", "December 3rd, 1773", "December 4th, 1773",
- "Dec 2nd, 1773", "May 3rd, 1773", "Feb 1st, 1773", ".Feb 4th, 1773,", " Feb 14th, 1773 ",
- "Feb 14th, 1773", "Dec 16, 1773.", ".Jun 16th, 1773", ", Dec 16, 1773.", ".On Dec 16, 1773.",
- ".12/16/1773.", "12/16/1773", "12-16-1773", "12.16.1773.", "12.16.73",
- "1773.12.16", "1773.1.1", ",1773.12.1.", "1773.1.12",
- "1773-12-16", "1773-1-1", "1773-12-1", ",1773-1-12,",
- "1773/12/16", "1773/1/1", "1773/12/1", "1773/1/12",
- "12th of December 1773", "12th of Dec 1773", "1 of December 1773", "12 of December 1773",
- "1/1/1", "2/2/2012", "2012/9/1", "2013/12/01", "01/02/1201",
- "120 of December 1773", "0 of December 1773",
- "01/12/1201", "41/12/1900"
- ]
- for testcase in tests:
- found = find_dates(testcase)
- found = " " if not found else "++"
- print found, testcase
Advertisement
Add Comment
Please, Sign In to add comment