Advertisement
Guest User

Untitled

a guest
Apr 11th, 2017
118
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 12.54 KB | None | 0 0
  1. from __future__ import print_function
  2. import requests
  3. import json
  4. import re
  5. import time
  6. import mysql.connector
  7. import linecache
  8. import itertools
  9. from time import gmtime, strftime
  10.  
  11. db = mysql.connector.connect(user='root', password ='Kielce83', database='indeedProject')
  12. cursor = db.cursor()
  13.  
  14. start = time.clock()
  15. getDate = strftime("%a, %d %b %Y", gmtime())
  16.  
  17. #Format &q='exact+phrase'
  18.  
  19. api_url = 'http://api.indeed.com/ads/apisearch?publisher=6311497045529956&v=2&limit=100000&format=json'
  20.  
  21. with open("jobs_full.txt", "r") as f:
  22.     SearchTerms = f.read().splitlines()
  23.    
  24. Regions = set(['Northeast', 'Southeast', 'Midwest', 'Southwest', 'West'])
  25.  
  26. print (SearchTerms)
  27.  
  28.  
  29. keyList = ["First Line Supervisor of Agricultural Crop and Horticultural Workers", "First Line Supervisor of Animal Husbandry and Animal Care Workers", "First Line Supervisor of Landscaping Lawn Service and Groundskeeping Workers", "Food and Tobacco Roaster Baker and Drying Machine Operator and Tender", "First Line Supervisor of Construction Trades and Extraction Workers", "Helper Brickmasons Blockmasons Stonemasons and Tile Marble Setters", "First Line Supervisor of Office and Administrative Support Workers", "Secretary and Administrative Assistant Except Legal Medical and Executive", "Adult Basic and Secondary Education and Literacy Teacher and Instructor", "Middle School Teacher Except Special and Career Technical Education", "Secondary School Teacher Except Special and Career Technical Education", "Postal Service Mail Sorters Processors and Processing Machine Operator", "Low Vision Therapist Orientation and Mobility Specialist and Vision Rehabilitation Therapist", "Separating Filtering Clarifying Precipitating and Still Machine Setters Operator and Tender", "Lifeguard Ski Patrol and Other Recreational Protective Service Worker", "Computer Numerically Controlled Machine Tool Programmer Metal and Plastic", "Crushing Grinding and Polishing Machine Setter Operator and Tender", "Cutting Punching and Press Machine Setter Operator and Tender Metal and Plastic", "Drilling and Boring Machine Tool Setter Operator and Tender Metal and Plastic", "Electrical and Electronics Repairer Commercial and Industrial Equipment", "Extruding and Drawing Machine Setter Operator and Tender Metal and Plastic", "Extruding and Forming Machine Setters Operator and Tender Synthetic and Glass Fibers", "Extruding Forming Pressing and Compacting Machine Setter Operator and Tenders", "Grinding Lapping Polishing and Buffing Machine Tool Setter Operator and Tender Metal and Plastic", "Heat Treating Equipment Setter Operator and Tender Metal and Plastic", "Lathe and Turning Machine Tool Setter Operator and Tender Metal and Plastic", "Milling and Planing Machine Setter Operator and Tender Metal and Plastic", "Molding Coremaking and Casting Machine Setter Operator and Tender Metal and Plastic", "Multiple Machine Tool Setter Operator and Tender Metal and Plastic", "Plating and Coating Machine Setter Operator and Tender Metal and Plastic", "Telecommunications Equipment Installer and Repairer Except Line Installer", "Textile Winding Twisting and Drawing Out Machine Setter Operator and Tender", "Door To Door Sales Worker News and Street Vendor and Related Worker", "Sales Representative Wholesale and Manufacturing Except Technical and Scientific Products", "Sales Representative Wholesale and Manufacturing Technical and Scientific Products", "Health and Safety Engineer Except Mining Safety Engineer and Inspector", "Ambulance Drivers and Attendant Except Emergency Medical Technician", "Electrical and Electronics Installer and Repairer Transportation Equipment", "First Line Supervisor of Helpers Laborers and Material Movers Hand", "First Line Supervisor of Transportation and Material Moving Machine and Vehicle Operators", "Transportation Vehicle Equipment and Systems Inspector Except Aviation", "Atmospheric Earth Marine and Space Sciences Teacher Postsecondary", "Education Administrator Preschool and Childcare Center or Program", "Cleaning Washing and Metal Pickling Equipment Operator and Tender"]
  30.  
  31. valuesList = ["First Line Supervisor of Agricultural Crop and Horticultural Wor", "First Line Supervisor of Animal Husbandry and Animal Care Worker", "First Line Supervisor of Landscaping Lawn Service and Groundskee", "Food and Tobacco Roaster Baker and Drying Machine Operator and T", "First Line Supervisor of Construction Trades and Extraction Work", "Helper Brickmasons Blockmasons Stonemasons and Tile Marble Sette", "First Line Supervisor of Office and Administrative Support Worke", "Secretary and Administrative Assistant Except Legal Medical and", "Adult Basic and Secondary Education and Literacy Teacher and Ins", "Middle School Teacher Except Special and Career Technical Educat", "Secondary School Teacher Except Special and Career Technical Edu", "Postal Service Mail Sorters Processors and Processing Machine Op", "Low Vision Therapist Orientation and Mobility Specialist and Vis", "Separating Filtering Clarifying Precipitating and Still Machine", "Lifeguard Ski Patrol and Other Recreational Protective Service W", "Computer Numerically Controlled Machine Tool Programmer Metal an", "Crushing Grinding and Polishing Machine Setter Operator and Tend", "Cutting Punching and Press Machine Setter Operator and Tender Me", "Drilling and Boring Machine Tool Setter Operator and Tender Meta", "Electrical and Electronics Repairer Commercial and Industrial Eq", "Extruding and Drawing Machine Setter Operator and Tender Metal a", "Extruding and Forming Machine Setters Operator and Tender Synthe", "Extruding Forming Pressing and Compacting Machine Setter Operato", "Grinding Lapping Polishing and Buffing Machine Tool Setter Opera", "Heat Treating Equipment Setter Operator and Tender Metal and Pla", "Lathe and Turning Machine Tool Setter Operator and Tender Metal", "Milling and Planing Machine Setter Operator and Tender Metal an","Molding Coremaking and Casting Machine Setter Operator and Tend", "Multiple Machine Tool Setter Operator and Tender Metal and Plast", "Plating and Coating Machine Setter Operator and Tender Metal an","Telecommunications Equipment Installer and Repairer Except Lin","Textile Winding Twisting and Drawing Out Machine Setter Operato","Door To Door Sales Worker News and Street Vendor and Related Wor", "Sales Representative Wholesale and Manufacturing Except Technica", "Sales Representative Wholesale and Manufacturing Technical and S", "Health and Safety Engineer Except Mining Safety Engineer and Ins", "Ambulance Drivers and Attendant Except Emergency Medical Technic", "Electrical and Electronics Installer and Repairer Transportatio","First Line Supervisor of Helpers Laborers and Material Movers Ha", "First Line Supervisor of Transportation and Material Moving Mach", "Transportation Vehicle Equipment and Systems Inspector Except Av", "Atmospheric Earth Marine and Space Sciences Teacher Postsecondar", "Education Administrator Preschool and Childcare Center or Progra", "Cleaning Washing and Metal Pickling Equipment Operator and Tende"]
  32.  
  33. writeResults = open("totalResults.txt", "a")
  34. totSum = 0
  35. j = 0
  36. supersum = 0
  37. for Term in SearchTerms:
  38.  
  39.     # check for long table name and substitute shorter name
  40.     while j < len(keyList):
  41.         if (Term == keyList[j]):
  42.             Term = valuesList[j]
  43.             j = j+1
  44.  
  45.    
  46.     # write sum after each category
  47.     if (Term == '-'):
  48.         writeResults.write(repr(totSum) + '\n')
  49.         supersum = totSum + supersum
  50.         totSum = 0
  51.        
  52.     else:
  53.  
  54.         urlfirst = api_url + '&co=us' + '&q=' + Term + '&latlong=1' + '&start=0'  + '&fromage=1'
  55.  
  56.         underscoreTerm = Term.replace(" ", "_")
  57.        
  58.         grabforNum = requests.get(urlfirst)
  59.         json_content = json.loads(grabforNum.content)
  60.         print(json_content["totalResults"])
  61.         results = json_content["totalResults"]
  62.  
  63.     # must match the actual number of job results to the lower of the 25 increment or the last page will repeat over and over
  64.  
  65.        
  66.         writeResults.write(repr(results) + '\n')
  67.  
  68.     #add results to new total sum for category
  69.         totSum = totSum + results
  70.  
  71.        
  72.    
  73.         if (results > 1025):
  74.            
  75.             for Region in Regions:
  76.        
  77.                 urlfirst = api_url + '&co=us' + '&q=' + Term + '&latlong=1' + '&start=0'  + '&fromage=1' + '&l=' + Region
  78.                    
  79.                 grabforNum = requests.get(urlfirst)
  80.                 json_content = json.loads(grabforNum.content)
  81.                 regionResults = json_content["totalResults"]
  82.                
  83.                 for number in range(-25, regionResults - 25, 25):
  84.                    
  85.                     url = api_url + '&co=us' + '&q=' + Term + '&latlong=1' + '&start=' + str(number + 25) + '&fromage=1' + '&l=' + Region
  86.                        
  87.                     response = requests.get(url)
  88.                        
  89.                     jsonResponse=json.loads(response.content)
  90.                     jsonData = jsonResponse["results"]
  91.                        
  92.                     for item in jsonData:
  93.                     #to grab non strings use str(item.get("totalResults")
  94.                     #f.write (str(numresults))
  95.                            
  96.                         date = item.get("date").encode("utf-8")
  97.                         postDate = date[5:16]
  98.                         jobtitle = item.get("jobtitle").encode("utf-8")
  99.                         company = item.get("company").encode("utf-8")
  100.                         formattedLocationFull = item.get("formattedLocationFull").encode("utf-8")
  101.                         url = item.get("url").encode("utf-8")
  102.                         latitude = str(item.get("latitude"))
  103.                         longitude = str(item.get("longitude"))
  104.                     #region = item.get("l")
  105.                        
  106.                         cursor.execute("insert into " + underscoreTerm + " (postDate, jobtitle, company, formattedLocationFull, url, latitude, longitude)" "values (%s, %s, %s, %s, %s, %s, %s)" , (postDate, jobtitle, company, formattedLocationFull, url, latitude, longitude))
  107.                     #cursor.execute("insert into totalResults (postDate, jobtitle" + str(results) + ")" "values (%s, %s)" , (postDate, jobtitle, results))
  108.  
  109.                        
  110.  
  111.                        
  112.                         db.commit()
  113.                            
  114.                         print (results, 'left---Completed' , url)
  115.                         results-=1
  116.         else:
  117.        
  118.             for number in range(-25, results - 25, 25):
  119.                    
  120.                 url = api_url + '&co=us' + '&q=' + Term + '&latlong=1' + '&start=' + str(number + 25) + '&fromage=1'
  121.                
  122.                 response = requests.get(url)
  123.                
  124.                 jsonResponse=json.loads(response.content)
  125.                 jsonData = jsonResponse["results"]
  126.                
  127.                 for item in jsonData:
  128.                 #to grab non strings use str(item.get("totalResults")
  129.                 #f.write (str(numresults))
  130.                    
  131.                     date = item.get("date").encode("utf-8")
  132.                     postDate = date[5:16]
  133.                     jobtitle = item.get("jobtitle").encode("utf-8")
  134.                     company = item.get("company").encode("utf-8")
  135.                     formattedLocationFull = item.get("formattedLocationFull").encode("utf-8")
  136.                     url = item.get("url").encode("utf-8")
  137.                     latitude = str(item.get("latitude"))
  138.                     longitude = str(item.get("longitude"))
  139.                 #region = item.get("l")
  140.  
  141.                     cursor.execute("insert into " + underscoreTerm + " (postDate, jobtitle, company, formattedLocationFull, url, latitude, longitude)" "values (%s, %s, %s, %s, %s, %s, %s)" , (postDate, jobtitle, company, formattedLocationFull, url, latitude, longitude))
  142.                     table_name = "results"
  143.                 #cursor.execute("insert into " + table_name + " (postDate, jobtitle" + str(results) + ")" "values (%s, %s)" , (postDate, jobtitle, results))    
  144.                     db.commit()
  145.                    
  146.                     print (results, 'left---Completed' , url)
  147.                     results-=1
  148.  
  149.  
  150.  
  151.  
  152. writeResults.close()
  153.  
  154.  
  155. with open("totalResults.txt", "r") as h:
  156.     getTotals = h.read().splitlines()
  157.  
  158.    
  159.  
  160. for total in getTotals:
  161.     termcat = " "
  162.     termcat = termcat + "," + total
  163.    
  164.  
  165. cursor.execute ("insert into totalResults values (" + getDate + termcat + ")")
  166. db.commit()
  167.  
  168.  
  169. cursor.close()
  170. db.close()
  171. f.close()
  172. writeResults.close()
  173.  
  174.  
  175. elapsed = (time.clock() - start)
  176. print (elapsed)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement