Advertisement
Guest User

Untitled

a guest
Jan 6th, 2015
479
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 8.87 KB | None | 0 0
  1. import json
  2. import re
  3. import webapp2
  4. from bs4 import BeautifulSoup
  5. from datetime import datetime
  6. import httplib
  7. import logging
  8. from google.appengine.ext import ndb
  9. from google.appengine.api import mail
  10.  
  11.  
  12. class VBStatus(ndb.Model):
  13.     year = ndb.IntegerProperty()
  14.     month = ndb.IntegerProperty()
  15.     status = ndb.BooleanProperty()
  16.  
  17.  
  18. class VBHandler(webapp2.RequestHandler):
  19.     @staticmethod
  20.     def get_month(month):
  21.         options = {
  22.             1: "january",
  23.             2: "february",
  24.             3: "march",
  25.             4: "april",
  26.             5: "may",
  27.             6: "june",
  28.             7: "july",
  29.             8: "august",
  30.             9: "september",
  31.             10: "october",
  32.             11: "november",
  33.             12: "december",
  34.         }
  35.         return options[month]
  36.  
  37.     @staticmethod
  38.     def get_urls(year, month):
  39.         fiscal_year = (year + 1) if month >= 10 else year
  40.         return [
  41.             "/content/visas/english/law-and-policy/bulletin/" +\
  42.             "{fiscal_year}/visa-bulletin-for-{month}-{year}.html".format(
  43.                 fiscal_year=fiscal_year,
  44.                 year=year,
  45.                 month=VBHandler.get_month(month)
  46.             ),
  47.             "/content/visas/english/law-and-policy/bulletin/" +\
  48.             "{fiscal_year}/visa-bulletin-{month}-{year}.html".format(
  49.                 fiscal_year=fiscal_year,
  50.                 year=year,
  51.                 month=VBHandler.get_month(month)
  52.             ),
  53.         ]
  54.  
  55.     @staticmethod
  56.     def parse(soup, table_name):
  57.         row = 0
  58.         column = 0
  59.         result = {}
  60.         visa_type = None
  61.         countries = []
  62.         classes = []
  63.         for tr in soup.find("div", {"class": table_name}).find("table").find("tbody").findAll("tr"):
  64.             for td in tr.findAll("td"):
  65.                 content = ''.join(td.findAll(text=True)).strip()
  66.                 if row == 0 and column == 0:
  67.                     visa_type = VBHandler.normalize_visa_type(content)
  68.                 elif column == 0:
  69.                     visa_class = VBHandler.normalize_visa_class(visa_type, content)
  70.                     classes.append(visa_class)
  71.                     result[visa_class] = {}
  72.                 elif row == 0:
  73.                     countries.append(VBHandler.normalize_country(content))
  74.                 else:
  75.                     date_str = content
  76.                     visa_date = ''
  77.                     if date_str != "C":
  78.                         date = datetime.strptime(date_str, "%d%b%y")
  79.                         visa_date = date.strftime('%Y-%m-%d')
  80.                     result[classes[row-1]][countries[column-1]] = visa_date
  81.                 column += 1
  82.             row += 1
  83.             column = 0
  84.         return {'type': visa_type, 'classes': result}
  85.  
  86.     @staticmethod
  87.     def normalize_visa_type(visa_type):
  88.         visa_type = visa_type.lower()
  89.         if re.search("family", visa_type):
  90.             return "family"
  91.         if re.search("employment", visa_type):
  92.             return "employment"
  93.  
  94.     @staticmethod
  95.     def normalize_visa_class(visa_type, visa_class):
  96.         visa_class = visa_class.lower()
  97.         if visa_type == "employment":
  98.             if re.search("1st", visa_class):
  99.                 return "eb1"
  100.             if re.search("2nd", visa_class):
  101.                 return "eb2"
  102.             if re.search("3rd", visa_class):
  103.                 return "eb3"
  104.             if re.search('4th', visa_class):
  105.                 return 'eb4'
  106.             if re.search('5th', visa_class):
  107.                 return 'eb5'
  108.             if re.search('other', visa_class):
  109.                 return 'eb_ow'
  110.             if re.search('religious', visa_class):
  111.                 return 'eb_crw'
  112.         if visa_type == 'family':
  113.             return visa_class
  114.  
  115.     @staticmethod
  116.     def normalize_country(country):
  117.         country = country.lower()
  118.         if re.search("chargeability", country):
  119.             return "row"
  120.         if re.search("china", country):
  121.             return "china"
  122.         if re.search("india", country):
  123.             return "india"
  124.         if re.search("mexico", country):
  125.             return "mexico"
  126.         if re.search("philippines", country):
  127.             return "philippines"
  128.         return country
  129.  
  130.     def upload_to_parse(self, year, month, vb_data, full_url, force):
  131.         try:
  132.             param_data = {
  133.                 "year": year,
  134.                 "month": month,
  135.                 "full_url": full_url,
  136.                 "data": vb_data,
  137.                 "force": force,
  138.             }
  139.             connection = httplib.HTTPSConnection('api.parse.com', 443)
  140.             connection.connect()
  141.             connection.request(
  142.                 'POST',
  143.                 '/1/functions/updateVisaBulletin',
  144.                 json.dumps(param_data),
  145.                 {
  146.                     "X-Parse-Application-Id": "YOUR_APP_ID",
  147.                     "X-Parse-REST-API-Key": "YOUR_API_KEY",
  148.                     "Content-Type": "application/json"
  149.                 }
  150.             )
  151.             return json.loads(connection.getresponse().read())
  152.         except:
  153.             return {
  154.                 "result": {
  155.                     "error": "fail"
  156.                 }
  157.             }
  158.  
  159.     def get_vb(self, year, month, vb_status, force):
  160.         self.response.content_type = 'application/json'
  161.         urls = VBHandler.get_urls(year, month)
  162.  
  163.         resp = None
  164.         full_url = None
  165.         for url in urls:
  166.             try:
  167.                 headers = {
  168.                     'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1944.0 Safari/537.36',
  169.                 }
  170.                 conn = httplib.HTTPConnection("travel.state.gov")
  171.                 conn.request("GET", url, None, headers)
  172.                 resp = conn.getresponse()
  173.                 if not resp or resp.status != 200:
  174.                     logging.warning("VB not found {year}-{month} @ {url}".format(year=year, month=month, url=url))
  175.                     # try next url
  176.                     continue
  177.                 full_url = "http://travel.state.gov" + url
  178.                 break
  179.             except:
  180.                 # try next url
  181.                 continue
  182.  
  183.         if not resp or resp.status != 200:
  184.             self.response.write(json.dumps({
  185.                 "result": "not found",
  186.             }))
  187.             return
  188.  
  189.         data = resp.read()
  190.         soup = BeautifulSoup(data)
  191.  
  192.         family_based = VBHandler.parse(soup, "visabulletinmaintable")
  193.         employment_based = VBHandler.parse(soup, "visabulletinemploymenttable")
  194.         vb_data = {
  195.             family_based['type']: family_based['classes'],
  196.             employment_based['type']: employment_based['classes'],
  197.         }
  198.  
  199.         upload_result = self.upload_to_parse(year, month, vb_data, full_url, force)
  200.         subject = None
  201.         if 'id' in upload_result['result']:
  202.             subject = "VB Uploaded {year}-{month}: {result}".format(year=year, month=month, result=json.dumps(upload_result))
  203.             logging.info(subject)
  204.             vb_status.status = True
  205.         else:
  206.             subject = "Failed to upload {year}-{month}: {result}".format(year=year, month=month, result=json.dumps(upload_result))
  207.             logging.error(subject)
  208.             vb_status.status = False
  209.         vb_status.put()
  210.         sender_address = "YOUR_SENDER_EMAIL"
  211.         body = json.dumps({
  212.             "year": year,
  213.             "month": month,
  214.             "full_url": full_url,
  215.             "data": vb_data,
  216.             "upload_result": upload_result,
  217.         })
  218.         mail.send_mail(sender_address, "YOUR_EMAIL", subject, body)
  219.         self.response.write(body)
  220.  
  221.     def get(self):
  222.         force = bool(self.request.get('force'))
  223.         if not self.request.get('year') or not self.request.get('month'):
  224.             now = datetime.now()
  225.             year = int(now.strftime("%Y"))
  226.             month = int(now.strftime("%m")) + 1     # check next month
  227.             if month == 13:     # convert to January
  228.                 year += 1
  229.                 month = 1
  230.         else:
  231.             year = int(self.request.get('year'))
  232.             month = int(self.request.get('month'))
  233.  
  234.         query = VBStatus.query(VBStatus.year == year, VBStatus.month == month).fetch(1)
  235.         vb_status = None
  236.         for result in query:
  237.             vb_status = result
  238.             if vb_status.status and not force:
  239.                 logging.info("Already uploaded {year}-{month}".format(year=year, month=month))
  240.                 self.response.write("Already uploaded {year}-{month}".format(year=year, month=month))
  241.                 return
  242.  
  243.         if vb_status is None:
  244.             vb_status = VBStatus(parent=ndb.Key('VB', 'whatever'))
  245.             vb_status.year = year
  246.             vb_status.month = month
  247.             vb_status.status = False
  248.  
  249.         self.get_vb(year, month, vb_status, force)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement