Advertisement
illwill

truepeoplesearch.com scraper

Mar 12th, 2018
330
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.91 KB | None | 0 0
  1. # import libraries
  2. import urllib2
  3. import sys
  4. import csv
  5. import re
  6. from datetime import datetime
  7. from bs4 import BeautifulSoup
  8. from urllib2 import Request, urlopen, HTTPError, URLError
  9. CBLUE   = '\33[34m'
  10. CRED = '\033[91m'
  11. CGRN = '\033[92m'
  12. CEND = '\033[0m'
  13. data = []
  14. phone = sys.argv[1]
  15.  
  16. def decodeEmail(e):
  17.     de = ""
  18.     k = int(e[:2], 16)
  19.  
  20.     for i in range(2, len(e)-1, 2):
  21.         de += chr(int(e[i:i+2], 16)^k)
  22.  
  23.     return de
  24. #######################################################################
  25. #truepeoplesearch.com scraper
  26. print("\n[?]"+ CBLUE + " truepeoplesearch.com" + CEND)
  27. page = 'https://www.truepeoplesearch.com/results?phoneno='+phone
  28. try:
  29.     request = urllib2.Request(page)
  30.     request.add_header('User-Agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36')
  31.     page = urllib2.urlopen(request)
  32.     soup = BeautifulSoup(page, 'lxml')
  33.  
  34.  
  35.     if not soup.find('div', attrs={'class':'row pl-1 record-count'}):     #check if any results , if so continue
  36.         information = []
  37.         for person in soup.find_all('div', class_='card card-block shadow-form card-summary'):
  38.             url = person['data-detail-link']
  39.             name = person.find('div', class_='h4').text.replace("\n","").strip()
  40.             age = person.find('span', text='Age ').find_next('span').text.replace("\n","").strip()
  41.             location = person.find('span', text='Lives in ').find_next('span').text
  42.             information.append([name, age, location, url])
  43.  
  44.         for name,age,location,url in information:
  45.             print (CGRN+"[+] "+CRED + "CallerID: "+ CEND+ name +CRED + " Age: "+ CEND+ age)
  46.             print (CGRN+"[+] "+CRED + "Location: "+ CEND+ location)
  47.  
  48.              
  49.  
  50.             spider = "https://www.truepeoplesearch.com" + url
  51.             request = urllib2.Request(spider)
  52.             request.add_header('User-Agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36')
  53.             page2 = urllib2.urlopen(request)
  54.             soup2 = BeautifulSoup(page2, 'lxml')
  55.             addy = soup2.find('a', href=re.compile(r'/results\?streetaddress=.*'))
  56.             location = addy.text.strip().replace("\n",", ")
  57.             print (CGRN+"[+] "+CRED + "LastAddr: "+ CEND+ location)
  58.             for emails in soup2.find_all('a', class_='__cf_email__'):
  59.                 email = emails['data-cfemail']
  60.                 email = decodeEmail(email)
  61.                 print(CGRN+"[+] "+CRED + "EmailAdr: " + CEND+ email)
  62.             print ("")
  63.     else:
  64.         print(CRED + "[+] " + CEND + "No Data")                         #we aint found shit.
  65.  
  66. except HTTPError:
  67.     print (CRED + '[x] '+CEND+'No Data.')
  68. except URLError:
  69.     print (CRED + '[x] '+CEND+'We failed to reach a server.')
  70. #######################################################################
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement