Advertisement
Guest User

Untitled

a guest
Mar 7th, 2017
109
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.70 KB | None | 0 0
  1. from __future__ import print_function
  2. import requests
  3. import json
  4. import re
  5. import time
  6. import mysql.connector
  7.  
  8. db = mysql.connector.connect(user='root', password ='Kielce83', database='indeedProject')
  9. cursor = db.cursor()
  10.  
  11. start = time.clock()
  12.  
  13. #Format &q='exact+phrase'
  14.  
  15. api_url = 'http://api.indeed.com/ads/apisearch?publisher=6311497045529956&v=2&limit=100000&format=json'
  16.  
  17. with open("jobs.txt", "r") as f:
  18.     SearchTerms = f.read().splitlines()
  19.    
  20. Regions = set(['Northeast', 'South', 'Southwest', 'Midwest', 'West'])
  21.  
  22. print (SearchTerms)
  23.  
  24.  
  25. for Term in SearchTerms:
  26.        
  27.     urlfirst = api_url + '&co=us' + '&q=' + Term + '&latlong=1' + '&start=0'  + '&fromage=1'
  28.  
  29.     underscoreTerm = Term.replace(" ", "_")
  30.        
  31.     grabforNum = requests.get(urlfirst)
  32.     json_content = json.loads(grabforNum.content)
  33.     print(json_content["totalResults"])
  34.     results = json_content["totalResults"]
  35.  
  36.     # must match the actual number of job results to the lower of the 25 increment or the last page will repeat over and over
  37.  
  38.  
  39.     if (results > 1025):
  40.            
  41.         for Region in Regions:
  42.        
  43.             urlfirst = api_url + '&co=us' + '&q=' + Term + '&latlong=1' + '&start=0'  + '&fromage=1' + '&l=' + Region
  44.                    
  45.             grabforNum = requests.get(urlfirst)
  46.             json_content = json.loads(grabforNum.content)
  47.             regionResults = json_content["totalResults"]
  48.                
  49.             for number in range(-25, regionResults - 25, 25):
  50.                    
  51.                 url = api_url + '&co=us' + '&q=' + Term + '&latlong=1' + '&start=' + str(number + 25) + '&fromage=1' + '&l=' + Region
  52.                        
  53.                 response = requests.get(url)
  54.                        
  55.                 jsonResponse=json.loads(response.content)
  56.                 jsonData = jsonResponse["results"]
  57.                        
  58.                 for item in jsonData:
  59.                     #to grab non strings use str(item.get("totalResults")
  60.                     #f.write (str(numresults))
  61.                            
  62.                     date = item.get("date").encode("utf-8")
  63.                     postDate = date[5:16]
  64.                     jobtitle = item.get("jobtitle").encode("utf-8")
  65.                     company = item.get("company").encode("utf-8")
  66.                     formattedLocationFull = item.get("formattedLocationFull").encode("utf-8")
  67.                     url = item.get("url").encode("utf-8")
  68.                     latitude = str(item.get("latitude"))
  69.                     longitude = str(item.get("longitude"))
  70.                     #region = item.get("l")
  71.                        
  72.                     cursor.execute("insert into " + underscoreTerm + " (postDate, jobtitle, company, formattedLocationFull, url, latitude, longitude)" "values (%s, %s, %s, %s, %s, %s, %s)" , (postDate, jobtitle, company, formattedLocationFull, url, latitude, longitude))
  73.                            
  74.                     db.commit()
  75.  
  76.                     print(Term)        
  77.                     print (results, 'left---Completed' , url)
  78.                     results-=1
  79.     else:
  80.        
  81.         for number in range(-25, results - 25, 25):
  82.                    
  83.             url = api_url + '&co=us' + '&q=' + Term + '&latlong=1' + '&start=' + str(number + 25) + '&fromage=1'
  84.                
  85.             response = requests.get(url)
  86.                
  87.             jsonResponse=json.loads(response.content)
  88.             jsonData = jsonResponse["results"]
  89.                
  90.             for item in jsonData:
  91.                 #to grab non strings use str(item.get("totalResults")
  92.                 #f.write (str(numresults))
  93.                    
  94.                 date = item.get("date").encode("utf-8")
  95.                 postDate = date[5:16]
  96.                 jobtitle = item.get("jobtitle").encode("utf-8")
  97.                 company = item.get("company").encode("utf-8")
  98.                 formattedLocationFull = item.get("formattedLocationFull").encode("utf-8")
  99.                 url = item.get("url").encode("utf-8")
  100.                 latitude = str(item.get("latitude"))
  101.                 longitude = str(item.get("longitude"))
  102.                 #region = item.get("l")
  103.  
  104.                 cursor.execute("insert into " + underscoreTerm + " (postDate, jobtitle, company, formattedLocationFull, url, latitude, longitude)" "values (%s, %s, %s, %s, %s, %s, %s)" , (postDate, jobtitle, company, formattedLocationFull, url, latitude, longitude))
  105.                    
  106.                 db.commit()
  107.  
  108.                 print(Term)    
  109.                 print (results, 'left---Completed' , url)
  110.                 results-=1
  111.  
  112. cursor.close()
  113. db.close()
  114. f.close()
  115.  
  116. elapsed = (time.clock() - start)
  117. print (elapsed)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement