Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import httplib
- import re
- import string
- import sys
- import smtplib
- from email.mime.multipart import MIMEMultipart
- from email.mime.text import MIMEText
- from email.mime.application import MIMEApplication
- import base64
- import urllib2
- import requests
- #import argparse
- from urlparse import urlsplit
- from collections import deque
- from bs4 import BeautifulSoup
- #parser = argparse.ArgumentParser(description='Lead Gen Script - V1.0')
- #parser.add_argument('-l','--link', help='Google URL', required=True)
- #parser.add_argument('-c','--climit', help='Contact limit', required=True)
- #args = vars(parser.parse_args())
- #google_url = str(args['link'])
- google_url = str(raw_input("Load Thy Cannon: "))
- google_links = [google_url]
- links = []
- #google_url = str(raw_input("Google Maps URL: "))
- hdr = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
- 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
- 'Accept-Encoding': 'none',
- 'Accept-Language': 'en-US,en;q=0.8',
- 'Connection': 'keep-alive'}
- def getGoogleLinks(xurl):
- req = urllib2.Request(xurl, headers=hdr)
- html_page = urllib2.urlopen(req)
- soup = BeautifulSoup(html_page, "html.parser")
- for link in soup.findAll('a', attrs={'href': re.compile("^/search")}):
- google_links.append(link.get('href'))
- return google_links
- google_links = getGoogleLinks(google_url)
- google_links[:] = [yurl for yurl in google_links if 'rllag' in yurl]
- nn = len(google_links)
- for b in range(nn):
- if b != 0:
- google_links[b] = 'https://www.google.com'+google_links[b]
- print(111,google_links)
- print("Lead Gen Script - V1.0")
- print("----------------------\n")
- from_email = "racinecountyeyenews@gmail.com"
- contact_limit=100 #CHANGE THIS TO THE DESIRED CONTACT LIMIT
- #google_url = "https://www.google.com/search?rlz=1C1CHWL_en&q=construction%20companies%20in%20elkhorn%20wisconsin&npsic=0&rflfq=1&rlha=0&rllag=42679380,-88548678,3032&tbm=lcl&ved=2ahUKEwjTr6e99P_fAhVJmK0KHQF4DZQQjGp6BAgAEEg&tbs=lrf:!2m1!1e3!2m4!1e2!5m2!2m1!2e4!3sIAE,lf:1,lf_ui:2&rldoc=1&fll=0,0&fspn=0,NaN&fz=0&sll=0,0&sspn=0,NaN&sz=0&rlfi=hd:;si:&qop=0&rlvp=clear#qop=0&rlfi=hd:;si:&rlvp=clear"
- #google_url = str(raw_input("Google Maps URL: "))
- print("")
- print("----------------------")
- j=0
- for google_link in google_links:
- def getLinks(url):
- req = urllib2.Request(url, headers=hdr)
- html_page = urllib2.urlopen(req)
- soup = BeautifulSoup(html_page, "html.parser")
- for link in soup.findAll('a', attrs={'href': re.compile("^http")}):
- if 'google' not in link.get('href') and 'blogger' not in link.get('href') and 'youtube' not in link.get('href'):
- links.append(link.get('href'))
- return links
- mail_list = []
- url_list = []
- temp = getLinks(google_link)
- for starting_url in temp:
- try:
- unprocessed_urls = deque([starting_url])
- processed_urls = set()
- i=0
- while len(unprocessed_urls):
- i=i+1
- url = unprocessed_urls.popleft()
- processed_urls.add(url)
- parts = urlsplit(url)
- base_url = "{0.scheme}://{0.netloc}".format(parts)
- path = url[:url.rfind('/')+1] if '/' in parts.path else url
- if i>20:
- break
- #if j>=contact_limit:
- # print(str(j)+" URLs - limit reached. Exiting...")
- # sys.exit(0)
- print("Crawling URL: %s" % url)
- try:
- response = requests.get(url)
- except (requests.exceptions.MissingSchema, requests.exceptions.ConnectionError):
- continue
- print(str(re.findall(r"[a-z0-9\.\-+_]+@[a-z0-9\.\-+_]+\.[a-z]+", response.text, re.I)))
- if len(re.findall(r"[a-z0-9\.\-+_]+@[a-z0-9\.\-+_]+\.[a-z]+", response.text, re.I)) !=0:
- #if j>=contact_limit:
- # print(str(j)+" URLs - limit reached. Exiting...")
- # sys.exit(0)
- print(8)
- #mail_list.append(str(re.findall(r"[a-z0-9\.\-+_]+@[a-z0-9\.\-+_]+\.[a-z]+", response.text, re.I)))
- #print(str(re.findall(r"[a-z0-9\.\-+_]+@[a-z0-9\.\-+_]+\.[a-z]+", response.text, re.I)))
- s=str(re.findall(r"[a-z0-9\.\-+_]+@[a-z0-9\.\-+_]+\.[a-z]+", response.text, re.I))
- result0 = re.search("'(.*)'", s)
- q=str(result0.group(1))
- q=str(q.split("'")[0])
- mail_list.append(q)
- url_list.append(url)
- #mail_list.append(new_email)
- break
- soup = BeautifulSoup(response.text, 'lxml')
- for anchor in soup.find_all("a"):
- link = anchor.attrs["href"] if "href" in anchor.attrs else ''
- if link.startswith('/'):
- link = base_url + link
- elif not link.startswith('http'):
- link = path + link
- if not link in unprocessed_urls and not link in processed_urls:
- unprocessed_urls.append(link)
- except SystemExit:
- print("Successfully exited.")
- except:
- pass
- def SendMail(to_email):
- #Authenticate to Gmail's SMTP Protocol
- #server = smtplib.SMTP('smtp.gmail.com', 587)
- #server.starttls()
- #Encrypt (Encode) Password to avoid plain-text exposure
- #server.login(("racinecountyeyenews@gmail.com"), ("RacineCountyEye123!"))
- # Create a text/plain message
- msg = MIMEMultipart('alternative')
- msg['Subject'] = 'Just reaching out to say hello, Alex from Racine County Eye News'
- msg['From'] = from_email
- msg['To'] = to_email
- # Attaching the html
- html = """\
- <html><head></head><body><p>Hey there!<br /><br />Great website and I hope you guys are staying warm over there. This weather has truly been something fierce. I’d just like to introduce myself and offer to be at service to you in any way I can.<br /><br />My name is Alex Ebinal and I work with Racine County Eye news. We are well known in the southeast wisconsin area and have been working with construction employers for over 20 years to solve the biggest problems they face as pain free as possible.<br /><br />I know you must get a lot of emails like this. That’s why the last thing I want to do is be one more person that wastes your time. I would like to learn about your company and your biggest needs right now. If we have something that could be of use to you, I’d love to be the one to solve that problem for you.<br /><br /><br /><strong><br /></strong>If you have a minute, which of the following problems has been most relevant right now in your business? If you’re busy, please feel free to simply respond with a number. I will get back to you as soon as I can. Day or Night.<strong><br /></strong><strong><br /></strong><strong>1) Difficulty finding Skilled Workers</strong><strong><br /></strong><strong>2) High Turnover Costs</strong><strong><br /></strong><strong>3) Finding Effective Advertising</strong><br /><br /><br /><br /><i>Stay Warm!</i><br /><br /></p><p>Alex Ebinal, Customer Relations & Business Development</p><p>Phone Number: (262) 770-5175</p><p>CEO Denise Lockwood: (262) 504-9570<br /><br /></p><p>Email: racinecountyeyenews@gmail.com</p><p>Website: <a href="https://racinecountyeye.com/">https://racinecountyeye.com</a></p><p><a href="https://jobs.racinecountyeye.com/">Southeast Wisconsin Job Board</a> | <a href="https://jobs.racinecountyeye.com/products">Post a Job</a><br /><br /></p><img src="https://lh4.googleusercontent.com/cMe1UvyguVslPKkrgYBraniFqOc-gKu5MHjZG_7VMMPJuVk82drGPjIWtnl2ruJDTwiA1zESoJZZFZu_MINoso7VRZLhMieVxwBbIyH_HOhC5h_nmGbsNcgTt1XwcKNAjhkNp0k" alt="RCE" width="102" height="87"></p></span></div></body></html>"""
- # The main body is just another attachment
- #body = email.mime.Text.MIMEText("""Test body...""")
- part = MIMEText(html, 'html')
- msg.attach(part)
- ###### [COMMENT THE FOLLOW TO NOT INCLUDE ATTACHMENT] ###
- #filename='More_Information.pdf'
- #fp=open(filename,'rb')
- #att = MIMEApplication(fp.read(),_subtype="pdf")
- #fp.close()
- #att.add_header('Content-Disposition','attachment',filename=filename)
- #msg.attach(att)
- ######
- #Login
- s = smtplib.SMTP('smtp.gmail.com')
- s.starttls()
- s.login(from_email,'RacineCountyEye123!')
- s.sendmail(from_email,[to_email], msg.as_string())
- s.quit()
- mail_list = [str(x) for x in mail_list]
- url_list = [str(y) for y in url_list]
- print(mail_list)
- print("\nEmail Addresses fetched:")
- for address in mail_list:
- print(address)
- print("")
- #j=0
- for address in mail_list:
- if str(address) not in open("Business2.txt").read() and j<contact_limit:
- print "Sending mail to "+str(address)+"...",
- SendMail(str(address))
- with open("Business2.txt", "a") as myfile:
- myfile.write(address+":"+url_list[(mail_list.index(address))]+'\n')
- print "Sent!\n"
- j=j+1
- elif str(address) in open("Business2.txt").read():
- print("Already sent to that email: "+str(address))
- elif j>=contact_limit:
- print(str(j)+" URLs - limit reached. Exiting...")
- sys.exit(0)
- links=[]
- mail_list=[]
- url_list=[]
- print("----------------------")
Add Comment
Please, Sign In to add comment