Guest User

Untitled

a guest
Feb 2nd, 2019
346
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 9.81 KB | None | 0 0
  1. import httplib
  2. import re
  3. import string
  4. import sys
  5. import smtplib
  6. from email.mime.multipart import MIMEMultipart
  7. from email.mime.text import MIMEText
  8. from email.mime.application import MIMEApplication
  9. import base64
  10. import urllib2
  11. import requests
  12. #import argparse
  13. from urlparse import urlsplit
  14. from collections import deque
  15. from bs4 import BeautifulSoup
  16.  
  17. #parser = argparse.ArgumentParser(description='Lead Gen Script - V1.0')
  18. #parser.add_argument('-l','--link', help='Google URL', required=True)
  19. #parser.add_argument('-c','--climit', help='Contact limit', required=True)
  20. #args = vars(parser.parse_args())
  21.  
  22. #google_url = str(args['link'])
  23. google_url = str(raw_input("Load Thy Cannon: "))
  24. google_links = [google_url]
  25. links = []
  26.  
  27. #google_url = str(raw_input("Google Maps URL: "))
  28.  
  29. hdr = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
  30. 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
  31. 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
  32. 'Accept-Encoding': 'none',
  33. 'Accept-Language': 'en-US,en;q=0.8',
  34. 'Connection': 'keep-alive'}
  35.  
  36. def getGoogleLinks(xurl):
  37. req = urllib2.Request(xurl, headers=hdr)
  38. html_page = urllib2.urlopen(req)
  39. soup = BeautifulSoup(html_page, "html.parser")
  40.  
  41. for link in soup.findAll('a', attrs={'href': re.compile("^/search")}):
  42. google_links.append(link.get('href'))
  43.  
  44. return google_links
  45.  
  46. google_links = getGoogleLinks(google_url)
  47. google_links[:] = [yurl for yurl in google_links if 'rllag' in yurl]
  48. nn = len(google_links)
  49. for b in range(nn):
  50. if b != 0:
  51. google_links[b] = 'https://www.google.com'+google_links[b]
  52. print(111,google_links)
  53. print("Lead Gen Script - V1.0")
  54. print("----------------------\n")
  55.  
  56. from_email = "racinecountyeyenews@gmail.com"
  57. contact_limit=100 #CHANGE THIS TO THE DESIRED CONTACT LIMIT
  58. #google_url = "https://www.google.com/search?rlz=1C1CHWL_en&q=construction%20companies%20in%20elkhorn%20wisconsin&npsic=0&rflfq=1&rlha=0&rllag=42679380,-88548678,3032&tbm=lcl&ved=2ahUKEwjTr6e99P_fAhVJmK0KHQF4DZQQjGp6BAgAEEg&tbs=lrf:!2m1!1e3!2m4!1e2!5m2!2m1!2e4!3sIAE,lf:1,lf_ui:2&rldoc=1&fll=0,0&fspn=0,NaN&fz=0&sll=0,0&sspn=0,NaN&sz=0&rlfi=hd:;si:&qop=0&rlvp=clear#qop=0&rlfi=hd:;si:&rlvp=clear"
  59. #google_url = str(raw_input("Google Maps URL: "))
  60. print("")
  61. print("----------------------")
  62. j=0
  63. for google_link in google_links:
  64. def getLinks(url):
  65. req = urllib2.Request(url, headers=hdr)
  66. html_page = urllib2.urlopen(req)
  67. soup = BeautifulSoup(html_page, "html.parser")
  68.  
  69. for link in soup.findAll('a', attrs={'href': re.compile("^http")}):
  70. if 'google' not in link.get('href') and 'blogger' not in link.get('href') and 'youtube' not in link.get('href'):
  71. links.append(link.get('href'))
  72.  
  73. return links
  74.  
  75. mail_list = []
  76. url_list = []
  77. temp = getLinks(google_link)
  78.  
  79. for starting_url in temp:
  80. try:
  81. unprocessed_urls = deque([starting_url])
  82. processed_urls = set()
  83. i=0
  84.  
  85. while len(unprocessed_urls):
  86. i=i+1
  87.  
  88. url = unprocessed_urls.popleft()
  89. processed_urls.add(url)
  90.  
  91. parts = urlsplit(url)
  92. base_url = "{0.scheme}://{0.netloc}".format(parts)
  93. path = url[:url.rfind('/')+1] if '/' in parts.path else url
  94.  
  95. if i>20:
  96. break
  97. #if j>=contact_limit:
  98. # print(str(j)+" URLs - limit reached. Exiting...")
  99. # sys.exit(0)
  100.  
  101. print("Crawling URL: %s" % url)
  102. try:
  103. response = requests.get(url)
  104. except (requests.exceptions.MissingSchema, requests.exceptions.ConnectionError):
  105. continue
  106. print(str(re.findall(r"[a-z0-9\.\-+_]+@[a-z0-9\.\-+_]+\.[a-z]+", response.text, re.I)))
  107. if len(re.findall(r"[a-z0-9\.\-+_]+@[a-z0-9\.\-+_]+\.[a-z]+", response.text, re.I)) !=0:
  108. #if j>=contact_limit:
  109. # print(str(j)+" URLs - limit reached. Exiting...")
  110. # sys.exit(0)
  111. print(8)
  112. #mail_list.append(str(re.findall(r"[a-z0-9\.\-+_]+@[a-z0-9\.\-+_]+\.[a-z]+", response.text, re.I)))
  113. #print(str(re.findall(r"[a-z0-9\.\-+_]+@[a-z0-9\.\-+_]+\.[a-z]+", response.text, re.I)))
  114. s=str(re.findall(r"[a-z0-9\.\-+_]+@[a-z0-9\.\-+_]+\.[a-z]+", response.text, re.I))
  115. result0 = re.search("'(.*)'", s)
  116. q=str(result0.group(1))
  117. q=str(q.split("'")[0])
  118. mail_list.append(q)
  119. url_list.append(url)
  120. #mail_list.append(new_email)
  121. break
  122. soup = BeautifulSoup(response.text, 'lxml')
  123.  
  124. for anchor in soup.find_all("a"):
  125. link = anchor.attrs["href"] if "href" in anchor.attrs else ''
  126. if link.startswith('/'):
  127. link = base_url + link
  128. elif not link.startswith('http'):
  129. link = path + link
  130. if not link in unprocessed_urls and not link in processed_urls:
  131. unprocessed_urls.append(link)
  132. except SystemExit:
  133. print("Successfully exited.")
  134. except:
  135. pass
  136.  
  137.  
  138. def SendMail(to_email):
  139. #Authenticate to Gmail's SMTP Protocol
  140. #server = smtplib.SMTP('smtp.gmail.com', 587)
  141. #server.starttls()
  142.  
  143. #Encrypt (Encode) Password to avoid plain-text exposure
  144. #server.login(("racinecountyeyenews@gmail.com"), ("RacineCountyEye123!"))
  145.  
  146. # Create a text/plain message
  147. msg = MIMEMultipart('alternative')
  148. msg['Subject'] = 'Just reaching out to say hello, Alex from Racine County Eye News'
  149. msg['From'] = from_email
  150. msg['To'] = to_email
  151.  
  152. # Attaching the html
  153. html = """\
  154. <html><head></head><body><p>Hey there!<br /><br />Great website and I hope you guys are staying warm over there. This weather has truly been something fierce. I&rsquo;d just like to introduce myself and offer to be at service to you in any way I can.<br /><br />My name is Alex Ebinal and I work with Racine County Eye news. We are well known in the southeast wisconsin area and have been working with construction employers for over 20 years to solve the biggest problems they face as pain free as possible.<br /><br />I know you must get a lot of emails like this. That&rsquo;s why the last thing I want to do is be one more person that wastes your time. I would like to learn about your company and your biggest needs right now. If we have something that could be of use to you, I&rsquo;d love to be the one to solve that problem for you.<br /><br /><br /><strong><br /></strong>If you have a minute, which of the following problems has been most relevant right now in your business? If you&rsquo;re busy, please feel free to simply respond with a number. I will get back to you as soon as I can. Day or Night.<strong><br /></strong><strong><br /></strong><strong>1) Difficulty finding Skilled Workers</strong><strong><br /></strong><strong>2) High Turnover Costs</strong><strong><br /></strong><strong>3) Finding Effective Advertising</strong><br /><br /><br /><br /><i>Stay Warm!</i><br /><br /></p><p>Alex Ebinal, Customer Relations &amp; Business Development</p><p>Phone Number: (262) 770-5175</p><p>CEO Denise Lockwood: (262) 504-9570<br /><br /></p><p>Email: racinecountyeyenews@gmail.com</p><p>Website: <a href="https://racinecountyeye.com/">https://racinecountyeye.com</a></p><p><a href="https://jobs.racinecountyeye.com/">Southeast Wisconsin Job Board</a> | <a href="https://jobs.racinecountyeye.com/products">Post a Job</a><br /><br /></p><img src="https://lh4.googleusercontent.com/cMe1UvyguVslPKkrgYBraniFqOc-gKu5MHjZG_7VMMPJuVk82drGPjIWtnl2ruJDTwiA1zESoJZZFZu_MINoso7VRZLhMieVxwBbIyH_HOhC5h_nmGbsNcgTt1XwcKNAjhkNp0k" alt="RCE" width="102" height="87"></p></span></div></body></html>"""
  155.  
  156. # The main body is just another attachment
  157. #body = email.mime.Text.MIMEText("""Test body...""")
  158. part = MIMEText(html, 'html')
  159. msg.attach(part)
  160.  
  161. ###### [COMMENT THE FOLLOW TO NOT INCLUDE ATTACHMENT] ###
  162. #filename='More_Information.pdf'
  163. #fp=open(filename,'rb')
  164. #att = MIMEApplication(fp.read(),_subtype="pdf")
  165. #fp.close()
  166. #att.add_header('Content-Disposition','attachment',filename=filename)
  167. #msg.attach(att)
  168. ######
  169.  
  170. #Login
  171. s = smtplib.SMTP('smtp.gmail.com')
  172. s.starttls()
  173. s.login(from_email,'RacineCountyEye123!')
  174. s.sendmail(from_email,[to_email], msg.as_string())
  175. s.quit()
  176.  
  177. mail_list = [str(x) for x in mail_list]
  178. url_list = [str(y) for y in url_list]
  179. print(mail_list)
  180. print("\nEmail Addresses fetched:")
  181. for address in mail_list:
  182. print(address)
  183. print("")
  184. #j=0
  185. for address in mail_list:
  186. if str(address) not in open("Business2.txt").read() and j<contact_limit:
  187. print "Sending mail to "+str(address)+"...",
  188. SendMail(str(address))
  189. with open("Business2.txt", "a") as myfile:
  190. myfile.write(address+":"+url_list[(mail_list.index(address))]+'\n')
  191. print "Sent!\n"
  192. j=j+1
  193. elif str(address) in open("Business2.txt").read():
  194. print("Already sent to that email: "+str(address))
  195. elif j>=contact_limit:
  196. print(str(j)+" URLs - limit reached. Exiting...")
  197. sys.exit(0)
  198. links=[]
  199. mail_list=[]
  200. url_list=[]
  201. print("----------------------")
Add Comment
Please, Sign In to add comment