Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import httplib
- import re
- import string
- import sys
- import smtplib
- from email.mime.multipart import MIMEMultipart
- from email.mime.text import MIMEText
- from email.mime.application import MIMEApplication
- import base64
- import urllib2
- import requests
- #import argparse
- from urlparse import urlsplit
- from collections import deque
- from bs4 import BeautifulSoup
- with open("google_urls.txt", "r") as ins:
- for google_url in ins:
- #parser = argparse.ArgumentParser(description='Lead Gen Script - V1.0')
- #parser.add_argument('-l','--link', help='Google URL', required=True)
- #parser.add_argument('-c','--climit', help='Contact limit', required=True)
- #args = vars(parser.parse_args())
- #google_url = str(args['link'])
- #google_url = str(raw_input("Load thy Fireball: "))
- google_links = [google_url]
- links = []
- #google_url = str(raw_input("Google Maps URL: "))
- hdr = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
- 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
- 'Accept-Encoding': 'none',
- 'Accept-Language': 'en-US,en;q=0.8',
- 'Connection': 'keep-alive'}
- def getGoogleLinks(xurl):
- req = urllib2.Request(xurl, headers=hdr)
- html_page = urllib2.urlopen(req)
- soup = BeautifulSoup(html_page, "html.parser")
- for link in soup.findAll('a', attrs={'href': re.compile("^/search")}):
- google_links.append(link.get('href'))
- return google_links
- google_links = getGoogleLinks(google_url)
- google_links[:] = [yurl for yurl in google_links if 'rllag' in yurl]
- nn = len(google_links)
- for b in range(nn):
- if b != 0:
- google_links[b] = 'https://www.google.com'+google_links[b]
- print(111,google_links)
- print("Lead Gen Script - V1.0")
- print("----------------------\n")
- from_email = "racinecountyeyenews@gmail.com"
- contact_limit=100 #CHANGE THIS TO THE DESIRED CONTACT LIMIT
- #google_url = "https://www.google.com/search?rlz=1C1CHWL_en&q=construction%20companies%20in%20elkhorn%20wisconsin&npsic=0&rflfq=1&rlha=0&rllag=42679380,-88548678,3032&tbm=lcl&ved=2ahUKEwjTr6e99P_fAhVJmK0KHQF4DZQQjGp6BAgAEEg&tbs=lrf:!2m1!1e3!2m4!1e2!5m2!2m1!2e4!3sIAE,lf:1,lf_ui:2&rldoc=1&fll=0,0&fspn=0,NaN&fz=0&sll=0,0&sspn=0,NaN&sz=0&rlfi=hd:;si:&qop=0&rlvp=clear#qop=0&rlfi=hd:;si:&rlvp=clear"
- #google_url = str(raw_input("Google Maps URL: "))
- print("")
- print("----------------------")
- j=0
- for google_link in google_links:
- def getLinks(url):
- req = urllib2.Request(url, headers=hdr)
- html_page = urllib2.urlopen(req)
- soup = BeautifulSoup(html_page, "html.parser")
- for link in soup.findAll('a', attrs={'href': re.compile("^http")}):
- if 'google' not in link.get('href') and 'blogger' not in link.get('href') and 'youtube' not in link.get('href'):
- links.append(link.get('href'))
- return links
- mail_list = []
- url_list = []
- temp = getLinks(google_link)
- for starting_url in temp:
- try:
- unprocessed_urls = deque([starting_url])
- processed_urls = set()
- i=0
- while len(unprocessed_urls):
- i=i+1
- url = unprocessed_urls.popleft()
- processed_urls.add(url)
- parts = urlsplit(url)
- base_url = "{0.scheme}://{0.netloc}".format(parts)
- path = url[:url.rfind('/')+1] if '/' in parts.path else url
- if i>20:
- break
- #if j>=contact_limit:
- # print(str(j)+" URLs - limit reached. Exiting...")
- # sys.exit(0)
- print("Crawling URL: %s" % url)
- try:
- response = requests.get(url)
- except (requests.exceptions.MissingSchema, requests.exceptions.ConnectionError):
- continue
- print(str(re.findall(r"[a-z0-9\.\-+_]+@[a-z0-9\.\-+_]+\.[a-z]+", response.text, re.I)))
- if len(re.findall(r"[a-z0-9\.\-+_]+@[a-z0-9\.\-+_]+\.[a-z]+", response.text, re.I)) !=0:
- #if j>=contact_limit:
- # print(str(j)+" URLs - limit reached. Exiting...")
- # sys.exit(0)
- print(8)
- #mail_list.append(str(re.findall(r"[a-z0-9\.\-+_]+@[a-z0-9\.\-+_]+\.[a-z]+", response.text, re.I)))
- #print(str(re.findall(r"[a-z0-9\.\-+_]+@[a-z0-9\.\-+_]+\.[a-z]+", response.text, re.I)))
- s=str(re.findall(r"[a-z0-9\.\-+_]+@[a-z0-9\.\-+_]+\.[a-z]+", response.text, re.I))
- result0 = re.search("'(.*)'", s)
- q=str(result0.group(1))
- q=str(q.split("'")[0])
- mail_list.append(q)
- url_list.append(url)
- #mail_list.append(new_email)
- break
- soup = BeautifulSoup(response.text, 'lxml')
- for anchor in soup.find_all("a"):
- link = anchor.attrs["href"] if "href" in anchor.attrs else ''
- if link.startswith('/'):
- link = base_url + link
- elif not link.startswith('http'):
- link = path + link
- if not link in unprocessed_urls and not link in processed_urls:
- unprocessed_urls.append(link)
- except SystemExit:
- print("Successfully exited.")
- except:
- pass
- def SendMail(to_email):
- #Authenticate to Gmail's SMTP Protocol
- #server = smtplib.SMTP('smtp.gmail.com', 587)
- #server.starttls()
- #Encrypt (Encode) Password to avoid plain-text exposure
- #server.login(("racinecountyeyenews@gmail.com"), ("RacineCountyEye123!"))
- # Create a text/plain message
- msg = MIMEMultipart('alternative')
- msg['Subject'] = 'Alex from Racine County Eye News, just reaching out to say hello'
- msg['From'] = from_email
- msg['To'] = to_email
- # Attaching the html
- html = """\
- <html><head></head><body><p><span style="font-weight: 400;">Hey there,</span><span style="font-weight: 400;"><br /></span><span style="font-weight: 400;"><br /></span><span style="font-weight: 400;">Great website and I hope you guys are staying warm over there.</span><span style="font-weight: 400;"><br /></span><span style="font-weight: 400;"><br /></span><span style="font-weight: 400;">Just wanted to notify your location that we are currently offering advertising to local businesses that operate in the Racine County area. We average 1.7 million views annually and report high engagement from our viewers.</span><span style="font-weight: 400;"><br /></span><span style="font-weight: 400;"><br /></span><span style="font-weight: 400;">Do you have a marketing budget set aside for this quarter?</span></p></strong><br /><br /><br /><br /><i>Stay Warm!</i><br /><br /></p><p>Alex Ebinal, Customer Relations & Business Development</p><p>Phone Number: (262) 770-5175</p><p>CEO Denise Lockwood: (262) 504-9570<br /><br /></p><p>Email: racinecountyeyenews@gmail.com</p><p>Website: <a href="https://racinecountyeye.com/">https://racinecountyeye.com</a></p><p><a href="https://jobs.racinecountyeye.com/">Southeast Wisconsin Job Board</a> | <a href="https://jobs.racinecountyeye.com/products">Post a Job</a><br /><br /></p><img src="https://lh4.googleusercontent.com/cMe1UvyguVslPKkrgYBraniFqOc-gKu5MHjZG_7VMMPJuVk82drGPjIWtnl2ruJDTwiA1zESoJZZFZu_MINoso7VRZLhMieVxwBbIyH_HOhC5h_nmGbsNcgTt1XwcKNAjhkNp0k" alt="RCE" width="102" height="87"></p></span></div></body></html>"""
- # The main body is just another attachment
- #body = email.mime.Text.MIMEText("""Test body...""")
- part = MIMEText(html, 'html')
- msg.attach(part)
- ###### [COMMENT THE FOLLOW TO NOT INCLUDE ATTACHMENT] ###
- #filename='More_Information.pdf'
- #fp=open(filename,'rb')
- #att = MIMEApplication(fp.read(),_subtype="pdf")
- #fp.close()
- #att.add_header('Content-Disposition','attachment',filename=filename)
- #msg.attach(att)
- ######
- #Login
- s = smtplib.SMTP('smtp.gmail.com')
- s.starttls()
- s.login(from_email,'RacineCounty123!')
- s.sendmail(from_email,[to_email], msg.as_string())
- s.quit()
- mail_list = [str(x) for x in mail_list]
- url_list = [str(y) for y in url_list]
- print(mail_list)
- print("\nEmail Addresses fetched:")
- for address in mail_list:
- print(address)
- print("")
- #j=0
- for address in mail_list:
- if str(address) not in open("Business3.txt").read() and j<contact_limit:
- print "Sending mail to "+str(address)+"...",
- SendMail(str(address))
- with open("Business3.txt", "a") as myfile:
- myfile.write(address+":"+url_list[(mail_list.index(address))]+'\n')
- print "Sent!\n"
- j=j+1
- elif str(address) in open("Business3.txt").read():
- print("Already sent to that email: "+str(address))
- elif j>=contact_limit:
- print(str(j)+" URLs - limit reached. Exiting...")
- sys.exit(0)
- links=[]
- mail_list=[]
- url_list=[]
- print("----------------------")
- print("Script finished successfully.")
Add Comment
Please, Sign In to add comment