Advertisement
Guest User

Untitled

a guest
Mar 21st, 2017
111
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.43 KB | None | 0 0
  1. from __future__ import print_function
  2. import requests
  3. import json
  4. import re
  5. import time
  6. import mysql.connector
  7. import linecache
  8. import itertools
  9.  
  10. db = mysql.connector.connect(user='root', password ='Kielce83', database='indeedProject')
  11. cursor = db.cursor()
  12.  
  13. start = time.clock()
  14.  
  15. #Format &q='exact+phrase'
  16.  
  17. api_url = 'http://api.indeed.com/ads/apisearch?publisher=6311497045529956&v=2&limit=100000&format=json'
  18.  
  19. with open("jobs.txt", "r") as f:
  20.     SearchTerms = f.read().splitlines()
  21.    
  22. Regions = set(['Northeast', 'Southeast', 'Midwest', 'Southwest', 'West'])
  23.  
  24. print (SearchTerms)
  25.  
  26.  
  27. for Term in SearchTerms:
  28.        
  29.     urlfirst = api_url + '&co=us' + '&q=' + Term + '&latlong=1' + '&start=0'  + '&fromage=1'
  30.  
  31.     underscoreTerm = Term.replace(" ", "_")
  32.        
  33.     grabforNum = requests.get(urlfirst)
  34.     json_content = json.loads(grabforNum.content)
  35.     print(json_content["totalResults"])
  36.     results = json_content["totalResults"]
  37.  
  38.     # must match the actual number of job results to the lower of the 25 increment or the last page will repeat over and over
  39.  
  40.     writeResults = open("totalResults.txt", "a")
  41.     writeResults.write(repr(results) + '\n')
  42.    
  43.     if (results > 1025):
  44.            
  45.         for Region in Regions:
  46.        
  47.             urlfirst = api_url + '&co=us' + '&q=' + Term + '&latlong=1' + '&start=0'  + '&fromage=1' + '&l=' + Region
  48.                    
  49.             grabforNum = requests.get(urlfirst)
  50.             json_content = json.loads(grabforNum.content)
  51.             regionResults = json_content["totalResults"]
  52.                
  53.             for number in range(-25, regionResults - 25, 25):
  54.                    
  55.                 url = api_url + '&co=us' + '&q=' + Term + '&latlong=1' + '&start=' + str(number + 25) + '&fromage=1' + '&l=' + Region
  56.                        
  57.                 response = requests.get(url)
  58.                        
  59.                 jsonResponse=json.loads(response.content)
  60.                 jsonData = jsonResponse["results"]
  61.                        
  62.                 for item in jsonData:
  63.                     #to grab non strings use str(item.get("totalResults")
  64.                     #f.write (str(numresults))
  65.                            
  66.                     date = item.get("date").encode("utf-8")
  67.                     postDate = date[5:16]
  68.                     jobtitle = item.get("jobtitle").encode("utf-8")
  69.                     company = item.get("company").encode("utf-8")
  70.                     formattedLocationFull = item.get("formattedLocationFull").encode("utf-8")
  71.                     url = item.get("url").encode("utf-8")
  72.                     latitude = str(item.get("latitude"))
  73.                     longitude = str(item.get("longitude"))
  74.                     #region = item.get("l")
  75.                        
  76.                     cursor.execute("insert into " + underscoreTerm + " (postDate, jobtitle, company, formattedLocationFull, url, latitude, longitude)" "values (%s, %s, %s, %s, %s, %s, %s)" , (postDate, jobtitle, company, formattedLocationFull, url, latitude, longitude))
  77.                     #cursor.execute("insert into totalResults (postDate, jobtitle" + str(results) + ")" "values (%s, %s)" , (postDate, jobtitle, results))
  78.                            
  79.                     db.commit()
  80.                            
  81.                     print (results, 'left---Completed' , url)
  82.                     results-=1
  83.     else:
  84.        
  85.         for number in range(-25, results - 25, 25):
  86.                    
  87.             url = api_url + '&co=us' + '&q=' + Term + '&latlong=1' + '&start=' + str(number + 25) + '&fromage=1'
  88.                
  89.             response = requests.get(url)
  90.                
  91.             jsonResponse=json.loads(response.content)
  92.             jsonData = jsonResponse["results"]
  93.                
  94.             for item in jsonData:
  95.                 #to grab non strings use str(item.get("totalResults")
  96.                 #f.write (str(numresults))
  97.                    
  98.                 date = item.get("date").encode("utf-8")
  99.                 postDate = date[5:16]
  100.                 jobtitle = item.get("jobtitle").encode("utf-8")
  101.                 company = item.get("company").encode("utf-8")
  102.                 formattedLocationFull = item.get("formattedLocationFull").encode("utf-8")
  103.                 url = item.get("url").encode("utf-8")
  104.                 latitude = str(item.get("latitude"))
  105.                 longitude = str(item.get("longitude"))
  106.                 #region = item.get("l")
  107.  
  108.                 cursor.execute("insert into " + underscoreTerm + " (postDate, jobtitle, company, formattedLocationFull, url, latitude, longitude)" "values (%s, %s, %s, %s, %s, %s, %s)" , (postDate, jobtitle, company, formattedLocationFull, url, latitude, longitude))
  109.                 table_name = "results"
  110.                 #cursor.execute("insert into " + table_name + " (postDate, jobtitle" + str(results) + ")" "values (%s, %s)" , (postDate, jobtitle, results))    
  111.                 db.commit()
  112.                    
  113.                 print (results, 'left---Completed' , url)
  114.                 results-=1
  115.  
  116.  
  117. writeResults.close()
  118.  
  119. # Calculate sum of totalResults.txt and store in sum.txt
  120.  
  121.    
  122.    
  123.        
  124.  
  125. #category1sum = map(int, category1)
  126.    
  127. #writeSumFinal.write(repr(sum(category1)) + '\n')
  128. #writeSumFinal.write(repr(sum(category2)) + '\n')
  129.  
  130.    
  131.  
  132.  
  133. cursor.close()
  134. db.close()
  135. f.close()
  136. writeResults.close()
  137.  
  138.  
  139. elapsed = (time.clock() - start)
  140. print (elapsed)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement