Guest User

Untitled

a guest
Feb 14th, 2019
671
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 10.83 KB | None | 0 0
  1. import httplib
  2. import re
  3. import string
  4. import sys
  5. import smtplib
  6. from email.mime.multipart import MIMEMultipart
  7. from email.mime.text import MIMEText
  8. from email.mime.application import MIMEApplication
  9. import base64
  10. import urllib2
  11. import requests
  12. #import argparse
  13. from urlparse import urlsplit
  14. from collections import deque
  15. from bs4 import BeautifulSoup
  16.  
  17. with open("google_urls.txt", "r") as ins:
  18.     for google_url in ins:
  19.         #parser = argparse.ArgumentParser(description='Lead Gen Script - V1.0')
  20.         #parser.add_argument('-l','--link', help='Google URL', required=True)
  21.         #parser.add_argument('-c','--climit', help='Contact limit', required=True)
  22.         #args = vars(parser.parse_args())
  23.        
  24.         #google_url = str(args['link'])
  25.         #google_url = str(raw_input("Load thy Fireball: "))
  26.         google_links = [google_url]
  27.         links = []
  28.        
  29.         #google_url = str(raw_input("Google Maps URL: "))
  30.        
  31.         hdr = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
  32.                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
  33.                'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
  34.                'Accept-Encoding': 'none',
  35.                'Accept-Language': 'en-US,en;q=0.8',
  36.                'Connection': 'keep-alive'}
  37.        
  38.         def getGoogleLinks(xurl):
  39.             req = urllib2.Request(xurl, headers=hdr)
  40.             html_page = urllib2.urlopen(req)
  41.             soup = BeautifulSoup(html_page, "html.parser")
  42.        
  43.             for link in soup.findAll('a', attrs={'href': re.compile("^/search")}):
  44.                 google_links.append(link.get('href'))
  45.        
  46.             return google_links
  47.        
  48.         google_links = getGoogleLinks(google_url)
  49.         google_links[:] = [yurl for yurl in google_links if 'rllag' in yurl]
  50.         nn = len(google_links)
  51.         for b in range(nn):
  52.             if b != 0:
  53.                 google_links[b] = 'https://www.google.com'+google_links[b]
  54.         print(111,google_links)
  55.         print("Lead Gen Script - V1.0")
  56.         print("----------------------\n")
  57.        
  58.         from_email = "racinecountyeyenews@gmail.com"
  59.         contact_limit=100 #CHANGE THIS TO THE DESIRED CONTACT LIMIT
  60.         #google_url = "https://www.google.com/search?rlz=1C1CHWL_en&q=construction%20companies%20in%20elkhorn%20wisconsin&npsic=0&rflfq=1&rlha=0&rllag=42679380,-88548678,3032&tbm=lcl&ved=2ahUKEwjTr6e99P_fAhVJmK0KHQF4DZQQjGp6BAgAEEg&tbs=lrf:!2m1!1e3!2m4!1e2!5m2!2m1!2e4!3sIAE,lf:1,lf_ui:2&rldoc=1&fll=0,0&fspn=0,NaN&fz=0&sll=0,0&sspn=0,NaN&sz=0&rlfi=hd:;si:&qop=0&rlvp=clear#qop=0&rlfi=hd:;si:&rlvp=clear"
  61.         #google_url = str(raw_input("Google Maps URL: "))
  62.         print("")
  63.         print("----------------------")
  64.         j=0
  65.         for google_link in google_links:
  66.             def getLinks(url):
  67.                 req = urllib2.Request(url, headers=hdr)
  68.                 html_page = urllib2.urlopen(req)
  69.                 soup = BeautifulSoup(html_page, "html.parser")
  70.        
  71.                 for link in soup.findAll('a', attrs={'href': re.compile("^http")}):
  72.                     if 'google' not in link.get('href') and 'blogger' not in link.get('href') and 'youtube' not in link.get('href'):
  73.                         links.append(link.get('href'))
  74.        
  75.                 return links
  76.        
  77.             mail_list = []
  78.             url_list = []
  79.             temp = getLinks(google_link)
  80.        
  81.             for starting_url in temp:
  82.                 try:
  83.                     unprocessed_urls = deque([starting_url])
  84.                     processed_urls = set()
  85.                     i=0
  86.        
  87.                     while len(unprocessed_urls):
  88.                         i=i+1
  89.        
  90.                         url = unprocessed_urls.popleft()
  91.                         processed_urls.add(url)
  92.        
  93.                         parts = urlsplit(url)
  94.                         base_url = "{0.scheme}://{0.netloc}".format(parts)
  95.                         path = url[:url.rfind('/')+1] if '/' in parts.path else url
  96.        
  97.                         if i>20:
  98.                             break
  99.                         #if j>=contact_limit:
  100.                         #    print(str(j)+" URLs - limit reached. Exiting...")
  101.                         #    sys.exit(0)
  102.        
  103.                         print("Crawling URL: %s" % url)
  104.                         try:
  105.                             response = requests.get(url)
  106.                         except (requests.exceptions.MissingSchema, requests.exceptions.ConnectionError):
  107.                             continue
  108.                         print(str(re.findall(r"[a-z0-9\.\-+_]+@[a-z0-9\.\-+_]+\.[a-z]+", response.text, re.I)))
  109.                         if len(re.findall(r"[a-z0-9\.\-+_]+@[a-z0-9\.\-+_]+\.[a-z]+", response.text, re.I)) !=0:
  110.                             #if j>=contact_limit:
  111.                             #    print(str(j)+" URLs - limit reached. Exiting...")
  112.                             #    sys.exit(0)
  113.                             print(8)
  114.                             #mail_list.append(str(re.findall(r"[a-z0-9\.\-+_]+@[a-z0-9\.\-+_]+\.[a-z]+", response.text, re.I)))
  115.                             #print(str(re.findall(r"[a-z0-9\.\-+_]+@[a-z0-9\.\-+_]+\.[a-z]+", response.text, re.I)))
  116.                             s=str(re.findall(r"[a-z0-9\.\-+_]+@[a-z0-9\.\-+_]+\.[a-z]+", response.text, re.I))
  117.                             result0 = re.search("'(.*)'", s)
  118.                             q=str(result0.group(1))
  119.                             q=str(q.split("'")[0])
  120.                             mail_list.append(q)
  121.                             url_list.append(url)
  122.                             #mail_list.append(new_email)
  123.                             break
  124.                         soup = BeautifulSoup(response.text, 'lxml')
  125.        
  126.                         for anchor in soup.find_all("a"):
  127.                             link = anchor.attrs["href"] if "href" in anchor.attrs else ''
  128.                             if link.startswith('/'):
  129.                                 link = base_url + link
  130.                             elif not link.startswith('http'):
  131.                                 link = path + link
  132.                             if not link in unprocessed_urls and not link in processed_urls:
  133.                                 unprocessed_urls.append(link)
  134.                 except SystemExit:
  135.                         print("Successfully exited.")
  136.                 except:
  137.                     pass
  138.        
  139.        
  140.             def SendMail(to_email):
  141.                 #Authenticate to Gmail's SMTP Protocol
  142.                 #server = smtplib.SMTP('smtp.gmail.com', 587)
  143.                 #server.starttls()
  144.        
  145.                 #Encrypt (Encode) Password to avoid plain-text exposure
  146.                 #server.login(("racinecountyeyenews@gmail.com"), ("RacineCountyEye123!"))
  147.        
  148.                 # Create a text/plain message
  149.                 msg = MIMEMultipart('alternative')
  150.                 msg['Subject'] = 'Alex from Racine County Eye News, just reaching out to say hello'
  151.                 msg['From'] = from_email
  152.                 msg['To'] = to_email
  153.        
  154.                 # Attaching the html
  155.                 html = """\
  156. <html><head></head><body><p><span style="font-weight: 400;">Hey there,</span><span style="font-weight: 400;"><br /></span><span style="font-weight: 400;"><br /></span><span style="font-weight: 400;">Great website and I hope you guys are staying warm over there.</span><span style="font-weight: 400;"><br /></span><span style="font-weight: 400;"><br /></span><span style="font-weight: 400;">Just wanted to notify your location that we are currently offering advertising to local businesses that operate in the Racine County area. We average 1.7 million views annually and report high engagement from our viewers.</span><span style="font-weight: 400;"><br /></span><span style="font-weight: 400;"><br /></span><span style="font-weight: 400;">Do you have a marketing budget set aside for this quarter?</span></p></strong><br /><br /><br /><br /><i>Stay Warm!</i><br /><br /></p><p>Alex Ebinal, Customer Relations &amp; Business Development</p><p>Phone Number: (262) 770-5175</p><p>CEO Denise Lockwood: (262) 504-9570<br /><br /></p><p>Email: racinecountyeyenews@gmail.com</p><p>Website: <a href="https://racinecountyeye.com/">https://racinecountyeye.com</a></p><p><a href="https://jobs.racinecountyeye.com/">Southeast Wisconsin Job Board</a> | <a href="https://jobs.racinecountyeye.com/products">Post a Job</a><br /><br /></p><img src="https://lh4.googleusercontent.com/cMe1UvyguVslPKkrgYBraniFqOc-gKu5MHjZG_7VMMPJuVk82drGPjIWtnl2ruJDTwiA1zESoJZZFZu_MINoso7VRZLhMieVxwBbIyH_HOhC5h_nmGbsNcgTt1XwcKNAjhkNp0k" alt="RCE" width="102" height="87"></p></span></div></body></html>"""
  157.        
  158.                 # The main body is just another attachment
  159.                 #body = email.mime.Text.MIMEText("""Test body...""")
  160.                 part = MIMEText(html, 'html')
  161.                 msg.attach(part)
  162.        
  163.                 ###### [COMMENT THE FOLLOW TO NOT INCLUDE ATTACHMENT] ###
  164.                 #filename='More_Information.pdf'
  165.                 #fp=open(filename,'rb')
  166.                 #att = MIMEApplication(fp.read(),_subtype="pdf")
  167.                 #fp.close()
  168.                 #att.add_header('Content-Disposition','attachment',filename=filename)
  169.                 #msg.attach(att)
  170.                 ######
  171.        
  172.                 #Login
  173.                 s = smtplib.SMTP('smtp.gmail.com')
  174.                 s.starttls()
  175.                 s.login(from_email,'RacineCounty123!')
  176.                 s.sendmail(from_email,[to_email], msg.as_string())
  177.                 s.quit()
  178.        
  179.             mail_list = [str(x) for x in mail_list]
  180.             url_list = [str(y) for y in url_list]
  181.             print(mail_list)
  182.             print("\nEmail Addresses fetched:")
  183.             for address in mail_list:
  184.                 print(address)
  185.             print("")
  186.             #j=0
  187.             for address in mail_list:
  188.                 if str(address) not in open("Business3.txt").read() and j<contact_limit:
  189.                     print "Sending mail to "+str(address)+"...",
  190.                     SendMail(str(address))
  191.                     with open("Business3.txt", "a") as myfile:
  192.                         myfile.write(address+":"+url_list[(mail_list.index(address))]+'\n')
  193.                     print "Sent!\n"
  194.                     j=j+1
  195.                 elif str(address) in open("Business3.txt").read():
  196.                     print("Already sent to that email: "+str(address))
  197.                 elif j>=contact_limit:
  198.                     print(str(j)+" URLs - limit reached. Exiting...")
  199.                     sys.exit(0)
  200.             links=[]
  201.             mail_list=[]
  202.             url_list=[]
  203. print("----------------------")
  204. print("Script finished successfully.")
Add Comment
Please, Sign In to add comment