runnig

date matching regex

Jun 26th, 2013
164
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.35 KB | None | 0 0
  1. import sys, re, collections
  2.  
  3. months_str = "January|February|March|April|May|June|July|August|September|October|November|December"
  4. months_short = '|'.join(s[:3] for s in months_str.split("|"))
  5.  
  6. day = "((0\d|[1-9])|[1-3]\d)(th|st|nd|rd)?"
  7. of = "(\ |\ of\ )"
  8. mon_fmt = "(%s|%s)" % (months_str, months_short)
  9. year = "(\d|\d\d|\d{4})"
  10. opt_comma = "(\,\s|\,|\s)"
  11. parts = {'day':day, 'of':of, 'mon_fmt':mon_fmt, 'year':year, 'comma': opt_comma}
  12.  
  13. # 12th of Dec, 1991
  14. date1_re = r"(\b%(day)s%(of)s%(mon_fmt)s%(comma)s?%(year)s\b)" % parts
  15. date1_compiled = re.compile(date1_re, re.IGNORECASE)
  16.  
  17. # 12/12/1991
  18. date2_re = r"(\b%(year)s[-./](\d|\d\d)[-./]%(year)s\b)" % parts
  19. date2_compiled = re.compile(date2_re, re.IGNORECASE)
  20.  
  21. # Dec 12, 1991
  22. date3_re = r"(\b%(mon_fmt)s %(day)s%(comma)s%(year)s\b)" % parts
  23. date3_compiled = re.compile(date3_re, re.IGNORECASE)
  24.  
  25. def find_dates(test):
  26.  
  27.     dates = []
  28.     num_dates = 0
  29.    
  30.     for m in re.findall(date1_compiled, test):
  31.         date = '1', m[0]
  32.         num_dates += 1
  33.        
  34.     for m in re.findall(date2_compiled, test):
  35.         date = '2', m[0]
  36.         num_dates += 1
  37.        
  38.     for m in re.findall(date3_compiled, test):
  39.         date = '3', m[0]
  40.         num_dates += 1
  41.    
  42.     return num_dates
  43.  
  44. tests = ["December 16, 1773", ".December 16, 1773.", "On June 16, 1773 ", " Jan 16th, 1773",
  45.         "December 1st, 1773", ",December 2nd, 1773,", "December 3rd, 1773", "December 4th, 1773",
  46.         "Dec 2nd, 1773", "May 3rd, 1773",  "Feb 1st, 1773", ".Feb 4th, 1773,", " Feb 14th, 1773 ",                  
  47.         "Feb 14th, 1773", "Dec 16, 1773.",  ".Jun 16th, 1773",  ", Dec 16, 1773.",  ".On Dec 16, 1773.",
  48.         ".12/16/1773.",  "12/16/1773",   "12-16-1773",   "12.16.1773.",  "12.16.73",  
  49.         "1773.12.16", "1773.1.1", ",1773.12.1.", "1773.1.12",
  50.         "1773-12-16", "1773-1-1", "1773-12-1", ",1773-1-12,",
  51.         "1773/12/16", "1773/1/1", "1773/12/1", "1773/1/12",
  52.         "12th of December 1773",  "12th of Dec 1773", "1 of December 1773",  "12 of December 1773",        
  53.         "1/1/1", "2/2/2012", "2012/9/1", "2013/12/01", "01/02/1201",
  54.         "120 of December 1773",     "0 of December 1773",    
  55.         "01/12/1201", "41/12/1900"
  56.          ]
  57.          
  58. for testcase in tests:
  59.     found = find_dates(testcase)
  60.     found = "  " if not found else "++"
  61.     print found, testcase
Advertisement
Add Comment
Please, Sign In to add comment