Guest User

Untitled

a guest
Jan 25th, 2019
641
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import httplib
  2. import re
  3. import string
  4. import sys
  5. import smtplib
  6. from email.mime.multipart import MIMEMultipart
  7. from email.mime.text import MIMEText
  8. from email.mime.application import MIMEApplication
  9. import base64
  10. import urllib2
  11. import requests
  12. import argparse
  13. from urlparse import urlsplit
  14. from collections import deque
  15. from bs4 import BeautifulSoup
  16.  
  17. parser = argparse.ArgumentParser(description='Lead Gen Script - V1.0')
  18. parser.add_argument('-l','--link', help='Google URL', required=True)
  19. parser.add_argument('-c','--climit', help='Contact limit', required=True)
  20. args = vars(parser.parse_args())
  21.  
  22. google_url = str(args['link'])
  23.  
  24. google_links = []
  25. links = []
  26.  
  27. def getGoogleLinks(xurl):
  28. req = urllib2.Request(xurl, headers=hdr)
  29. html_page = urllib2.urlopen(req)
  30. soup = BeautifulSoup(html_page, "html.parser")
  31.  
  32. for link in soup.findAll('a', attrs={'href': re.compile("^https://www.google.com/search")}):
  33. google_links.append(link.get('href'))
  34.  
  35. return google_links
  36.  
  37. google_links = getGoogleLinks(google_url)
  38.  
  39. print("Lead Gen Script - V1.0")
  40. print("----------------------\n")
  41.  
  42. contact_limit=int(args['climit']) #CHANGE THIS TO THE DESIRED CONTACT LIMIT
  43. #google_url = "https://www.google.com/search?rlz=1C1CHWL_en&q=construction%20companies%20in%20elkhorn%20wisconsin&npsic=0&rflfq=1&rlha=0&rllag=42679380,-88548678,3032&tbm=lcl&ved=2ahUKEwjTr6e99P_fAhVJmK0KHQF4DZQQjGp6BAgAEEg&tbs=lrf:!2m1!1e3!2m4!1e2!5m2!2m1!2e4!3sIAE,lf:1,lf_ui:2&rldoc=1&fll=0,0&fspn=0,NaN&fz=0&sll=0,0&sspn=0,NaN&sz=0&rlfi=hd:;si:&qop=0&rlvp=clear#qop=0&rlfi=hd:;si:&rlvp=clear"
  44. #google_url = str(raw_input("Google Maps URL: "))
  45. print("")
  46. print("----------------------")
  47. j=0
  48. for google_link in google_links:
  49. hdr = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
  50. 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
  51. 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
  52. 'Accept-Encoding': 'none',
  53. 'Accept-Language': 'en-US,en;q=0.8',
  54. 'Connection': 'keep-alive'}
  55.  
  56. def getLinks(url):
  57. req = urllib2.Request(url, headers=hdr)
  58. html_page = urllib2.urlopen(req)
  59. soup = BeautifulSoup(html_page, "html.parser")
  60.  
  61. for link in soup.findAll('a', attrs={'href': re.compile("^http://")}):
  62. links.append(link.get('href'))
  63.  
  64. return links
  65.  
  66. mail_list = []
  67. url_list = []
  68. temp = getLinks(google_link)
  69.  
  70. for starting_url in temp:
  71. try:
  72. j=j+1
  73.  
  74. unprocessed_urls = deque([starting_url])
  75. processed_urls = set()
  76. i=0
  77.  
  78. while len(unprocessed_urls):
  79. i=i+1
  80.  
  81. url = unprocessed_urls.popleft()
  82. processed_urls.add(url)
  83.  
  84. parts = urlsplit(url)
  85. base_url = "{0.scheme}://{0.netloc}".format(parts)
  86. path = url[:url.rfind('/')+1] if '/' in parts.path else url
  87.  
  88. if i>20:
  89. break
  90. if j>=contact_limit:
  91. print(str(j)+" URLs - limit reached. Exiting...")
  92. sys.exit(0)
  93.  
  94. print("Crawling URL: %s" % url)
  95. try:
  96. response = requests.get(url)
  97. except (requests.exceptions.MissingSchema, requests.exceptions.ConnectionError):
  98. continue
  99. print(str(re.findall(r"[a-z0-9\.\-+_]+@[a-z0-9\.\-+_]+\.[a-z]+", response.text, re.I)))
  100. if len(re.findall(r"[a-z0-9\.\-+_]+@[a-z0-9\.\-+_]+\.[a-z]+", response.text, re.I)) !=0:
  101. print(8)
  102. #mail_list.append(str(re.findall(r"[a-z0-9\.\-+_]+@[a-z0-9\.\-+_]+\.[a-z]+", response.text, re.I)))
  103. #print(str(re.findall(r"[a-z0-9\.\-+_]+@[a-z0-9\.\-+_]+\.[a-z]+", response.text, re.I)))
  104. s=str(re.findall(r"[a-z0-9\.\-+_]+@[a-z0-9\.\-+_]+\.[a-z]+", response.text, re.I))
  105. result0 = re.search("'(.*)'", s)
  106. q=str(result0.group(1))
  107. q=str(q.split("'")[0])
  108. mail_list.append(q)
  109. url_list.append(url)
  110. #mail_list.append(new_email)
  111. break
  112. soup = BeautifulSoup(response.text, 'lxml')
  113.  
  114. for anchor in soup.find_all("a"):
  115. link = anchor.attrs["href"] if "href" in anchor.attrs else ''
  116. if link.startswith('/'):
  117. link = base_url + link
  118. elif not link.startswith('http'):
  119. link = path + link
  120. if not link in unprocessed_urls and not link in processed_urls:
  121. unprocessed_urls.append(link)
  122. except:
  123. pass
  124.  
  125.  
  126. def SendMail(to_email):
  127. #Authenticate to Gmail's SMTP Protocol
  128. #server = smtplib.SMTP('smtp.gmail.com', 587)
  129. #server.starttls()
  130.  
  131. #Encrypt (Encode) Password to avoid plain-text exposure
  132. #server.login(("racinecountyeyenews@gmail.com"), ("RacineCountyEye123!"))
  133.  
  134. # Create a text/plain message
  135. msg = MIMEMultipart('alternative')
  136. msg['Subject'] = 'Alex from Racine County Eye News, just reaching out to say hello'
  137. msg['From'] = 'racinecountyeyenews@gmail.com'
  138. msg['To'] = to_email
  139.  
  140. # Attaching the html
  141. html = """\
  142. <html><head></head><body><div dir=3D"ltr"><span id=3D"gmail-docs-internal-guid-1bb3430c-7fff-13c1-95f1-f4eecfdbbf95"><p dir=3D"ltr" style=3D"line-height:1.38;margin-top:0pt;margin-bottom:0pt"><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap">Hey there!</span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap">Great website. I can tell you and your team haveput a lot of work to get to where you are today. I'll be as briefas possible because I don't want to waste your time.</span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap">We're reaching out to local employers in the construction industry because we know many are suffering losses due to the trouble of finding and retaining skilled workers. It's not a mystery that high turnover canbe a huge cost to business, and we've been working hard on this new solution to fix that problem.</span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;fontvariant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap">Starting on February 1st Racine County Eye News is sending out a newsletter directly to 50,000 job seekers in the Southeast Wisconsin Area. No other news organization is doing this and we have just obtained the right to do so.</span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-weight:700;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"> This tool can be customized to fitthe exact type of job seeker that you're looking to hire through location and income.</span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap">This means we can even help you recruit in areas close to competitors. Because we're launching it in February, we're looking to advertise one or two local construction companies in these newsletters. </span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);backgroundcolor:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap">To get straight to it, we are offering to place a 250 x 300 sized bannerpromoting your company in the newsletter sent to 50,000 job seekers for $250. Any number of months purchased in advance will be at the same price, even as that price increases in the future. Purchasing in advance will also ensure you retain your spot on the newsletter as space is so limited. </span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap">Please feel free to ignore this message if it isn't of interest, and thank you for taking the time to read it. We just want to help thosewho are facing this problem to consider taking advantage of this easy solution. If this is of interest to you, feel free to send an email back or give me a call on my personal line. The sign up process is very easy and can be done either through the phone or directly on our website </span><a href="https://jobs.racinecountyeye.com/payments/new?p=3D78c8cfa5-bf72-4075-a62f-29fb0d55fa06" style=3D"text-decoration-line:none"><span style=3D"font-size:11pt;font-family:Arial;background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap">here</span></a><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap">.</span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap">Thank you,</span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:&quot;Times New Roman&quot;;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap">Alex, Customer Relations &amp; Marketing Representative</span><span style=3D"font-size:11pt;font-family:&quot;Times New Roman&quot;;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;verticalalign:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:&quot;Times New Roman&quot;;color:rgb(0,0,0);background-color:transparent;font-weight:700;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap">Phone Number: </span><span style=3D"font-size:11pt;font-family:&quot;Times New Roman&quot;;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap">(262) 770-5175</span><span style=3D"font-size:11pt;font-family:&quot;Times New Roman&quot;;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:&quot;Times New Roman&quot;;color:rgb(0,0,0);background-color:transparent;font-weight:700;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap">CEO Denise Lockwood: </span><spanstyle=3D"font-size:11pt;font-family:&quot;Times New Roman&quot;;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap">(262) 504-9570</span><span style=3D"font-size:11pt;font-family:&quot;Times New Roman&quot;;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:&quot;Times New Roman&quot;;color:rgb(0,0,0);background-color:transparent;font-weight:700;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap">Email: </span><a href="mailto:Racinecountyeyenews@gmail.com" style=3D"text-decoration-line:none"><span style=3D"fontsize:11pt;font-family:&quot;Times New Roman&quot;;background-color:transparent;font-weight:700;font-variant-numeric:normal;font-variant-east-asian:normal;text-decoration-line:underline;vertical-align:baseline;white-space:prewrap">racinecountyeyenews@gmail.com</span></a></p><p dir=3D"ltr" style=3D"line-height:1.38;margin-top:0pt;margin-bottom:0pt"><span style=3D"font-size:11pt;font-family:&quot;Times New Roman&quot;;color:rgb(0,0,0);background-color:transparent;font-weight:700;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap">Website: </span><a href="https://racinecountyeye.com" style=3D"text-decoration-line:none"><span style=3D"font-size:11pt;font-family:&quot;Times New Roman&quot;;background-color:transparent;font-weight:700;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap">https://racinecountyeye.com</span></a></p><p dir=3D"ltr" style=3D"line-height:1.38;margin-top:0pt;margin-bottom:0pt"><a href="https://jobs.racinecountyeye.com/" style=3D"text-decoration-line:none"><span style=3D"font-size:11pt;font-family:&quot;Times New Roman&quot;;background-color:transparent;font-weight:700;font-variant-numeric:normal;font-variant-east-asian:normal;text-decoration-line:underline;vertical-align:baseline;white-space:pre-wrap">Southeast Wisconsin Job Board</span></a><span style=3D"font-size:11pt;font-family:&quot;Times New Roman&quot;;font-weight:700;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"> | </span><span style=3D"font-size:11pt;font-family:&quot;TimesNew Roman&quot;;color:rgb(0,0,0);background-color:transparent;font-weight:700;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"> </span><a href="https://jobs.racinecountyeye.com/products" style=3D"text-decoration-line:none"><span style=3D"font-size:11pt;font-family:&quot;Times New Roman&quot;;background-color:transparent;font-weight:700;font-variant-numeric:normal;font-variant-east-asian:normal;text-decoration-line:underline;vertical-align:baseline;white-space:pre-wrap">Post a Job</span><span style=3D"font-size:11pt;font-family:&quot;Times New Roman&quot;;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:&quot;Times New Roman&quot;;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br><br></span></a><img src="https://lh4.googleusercontent.com/cMe1UvyguVslPKkrgYBraniFqOc-gKu5MHjZG_7VMMPJuVk82drGPjIWtnl2ruJDTwiA1zESoJZZFZu_MINoso7VRZLhMieVxwBbIyH_HOhC5h_nmGbsNcgTt1XwcKNAjhkNp0k" width="102" height="87" style="border: none; transform: rotate(0rad);"></p></span></div></body></html>"""
  143.  
  144. # The main body is just another attachment
  145. #body = email.mime.Text.MIMEText("""Test body...""")
  146. part = MIMEText(html, 'html')
  147. msg.attach(part)
  148.  
  149. ###### [COMMENT THE FOLLOW TO NOT INCLUDE ATTACHMENT] ###
  150. filename='More_Information.pdf'
  151. fp=open(filename,'rb')
  152. att = MIMEApplication(fp.read(),_subtype="pdf")
  153. fp.close()
  154. att.add_header('Content-Disposition','attachment',filename=filename)
  155. msg.attach(att)
  156. ######
  157.  
  158. #Login
  159. s = smtplib.SMTP('smtp.gmail.com')
  160. s.starttls()
  161. s.login('racinecountyeyenews@gmail.com','RacineCountyEye123!')
  162. s.sendmail('racinecountyeyenews@gmail.com',[to_email], msg.as_string())
  163. s.quit()
  164.  
  165. mail_list = [str(x) for x in mail_list]
  166. url_list = [str(y) for y in url_list]
  167. print(mail_list)
  168. print("\nEmail Addresses fetched:")
  169. for address in mail_list:
  170. print(address)
  171. print("")
  172. for address in mail_list:
  173. if str(address) not in open("already_sent.txt").read():
  174. print "Sending mail to "+str(address)+"...",
  175. SendMail(str(address))
  176. with open("already_sent.txt", "a") as myfile:
  177. myfile.write(address+":"+url_list[(mail_list.index(address))]+'\n')
  178. print "Sent!\n"
  179. print("----------------------")
  180. print("Script finished successfully.")
Add Comment
Please, Sign In to add comment