Untitled

import httplib
import re
import string
import sys
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.application import MIMEApplication
import base64
import urllib2
import requests
#import argparse
from urlparse import urlsplit
from collections import deque
from bs4 import BeautifulSoup

#parser = argparse.ArgumentParser(description='Lead Gen Script - V1.0')
#parser.add_argument('-l','--link', help='Google URL', required=True)
#parser.add_argument('-c','--climit', help='Contact limit', required=True)
#args = vars(parser.parse_args())

#google_url = str(args['link'])
google_url = str(raw_input("Load Thy Cannon: "))
google_links = [google_url]
links = []

#google_url = str(raw_input("Google Maps URL: "))

hdr = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
       'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
       'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
       'Accept-Encoding': 'none',
       'Accept-Language': 'en-US,en;q=0.8',
       'Connection': 'keep-alive'}

def getGoogleLinks(xurl):
    req = urllib2.Request(xurl, headers=hdr)
    html_page = urllib2.urlopen(req)
    soup = BeautifulSoup(html_page, "html.parser")

    for link in soup.findAll('a', attrs={'href': re.compile("^/search")}):
        google_links.append(link.get('href'))

    return google_links

google_links = getGoogleLinks(google_url)
google_links[:] = [yurl for yurl in google_links if 'rllag' in yurl]
nn = len(google_links)
for b in range(nn):
    if b != 0:
        google_links[b] = 'https://www.google.com'+google_links[b]
print(111,google_links)
print("Lead Gen Script - V1.0")
print("----------------------\n")

from_email = "racinecountyeyenews@gmail.com"
contact_limit=100 #CHANGE THIS TO THE DESIRED CONTACT LIMIT
#google_url = "https://www.google.com/search?rlz=1C1CHWL_en&q=construction%20companies%20in%20elkhorn%20wisconsin&npsic=0&rflfq=1&rlha=0&rllag=42679380,-88548678,3032&tbm=lcl&ved=2ahUKEwjTr6e99P_fAhVJmK0KHQF4DZQQjGp6BAgAEEg&tbs=lrf:!2m1!1e3!2m4!1e2!5m2!2m1!2e4!3sIAE,lf:1,lf_ui:2&rldoc=1&fll=0,0&fspn=0,NaN&fz=0&sll=0,0&sspn=0,NaN&sz=0&rlfi=hd:;si:&qop=0&rlvp=clear#qop=0&rlfi=hd:;si:&rlvp=clear"
#google_url = str(raw_input("Google Maps URL: "))
print("")
print("----------------------")
j=0
for google_link in google_links:
    def getLinks(url):
        req = urllib2.Request(url, headers=hdr)
        html_page = urllib2.urlopen(req)
        soup = BeautifulSoup(html_page, "html.parser")

        for link in soup.findAll('a', attrs={'href': re.compile("^http")}):
            if 'google' not in link.get('href') and 'blogger' not in link.get('href') and 'youtube' not in link.get('href'):
                links.append(link.get('href'))

        return links

    mail_list = []
    url_list = []
    temp = getLinks(google_link)

    for starting_url in temp:
        try:
            unprocessed_urls = deque([starting_url])
            processed_urls = set()
            i=0

            while len(unprocessed_urls):
                i=i+1

                url = unprocessed_urls.popleft()
                processed_urls.add(url)

                parts = urlsplit(url)
                base_url = "{0.scheme}://{0.netloc}".format(parts)
                path = url[:url.rfind('/')+1] if '/' in parts.path else url

                if i>20:
                    break
                #if j>=contact_limit:
                #    print(str(j)+" URLs - limit reached. Exiting...")
                #    sys.exit(0)

                print("Crawling URL: %s" % url)
                try:
                    response = requests.get(url)
                except (requests.exceptions.MissingSchema, requests.exceptions.ConnectionError):
                    continue
                print(str(re.findall(r"[a-z0-9\.\-+_]+@[a-z0-9\.\-+_]+\.[a-z]+", response.text, re.I)))
                if len(re.findall(r"[a-z0-9\.\-+_]+@[a-z0-9\.\-+_]+\.[a-z]+", response.text, re.I)) !=0:
                    #if j>=contact_limit:
                    #    print(str(j)+" URLs - limit reached. Exiting...")
                    #    sys.exit(0)
                    print(8)
                    #mail_list.append(str(re.findall(r"[a-z0-9\.\-+_]+@[a-z0-9\.\-+_]+\.[a-z]+", response.text, re.I)))
                    #print(str(re.findall(r"[a-z0-9\.\-+_]+@[a-z0-9\.\-+_]+\.[a-z]+", response.text, re.I)))
                    s=str(re.findall(r"[a-z0-9\.\-+_]+@[a-z0-9\.\-+_]+\.[a-z]+", response.text, re.I))
                    result0 = re.search("'(.*)'", s)
                    q=str(result0.group(1))
                    q=str(q.split("'")[0])
                    mail_list.append(q)
                    url_list.append(url)
                    #mail_list.append(new_email)
                    break
                soup = BeautifulSoup(response.text, 'lxml')

                for anchor in soup.find_all("a"):
                    link = anchor.attrs["href"] if "href" in anchor.attrs else ''
                    if link.startswith('/'):
                        link = base_url + link
                    elif not link.startswith('http'):
                        link = path + link
                    if not link in unprocessed_urls and not link in processed_urls:
                        unprocessed_urls.append(link)
        except SystemExit:
                print("Successfully exited.")
        except:
            pass


    def SendMail(to_email):
        #Authenticate to Gmail's SMTP Protocol
        #server = smtplib.SMTP('smtp.gmail.com', 587)
        #server.starttls()

        #Encrypt (Encode) Password to avoid plain-text exposure
        #server.login(("racinecountyeyenews@gmail.com"), ("RacineCountyEye123!"))

        # Create a text/plain message
        msg = MIMEMultipart('alternative')
        msg['Subject'] = 'Just reaching out to say hello, Alex from Racine County Eye News'
        msg['From'] = from_email
        msg['To'] = to_email

        # Attaching the html
        html = """\
<html><head></head><body><p>Hey there!<br /><br />Great website and I hope you guys are staying warm over there. This weather has truly been something fierce. I&rsquo;d just like to introduce myself and offer to be at service to you in any way I can.<br /><br />My name is Alex Ebinal and I work with Racine County Eye news. We are well known in the southeast wisconsin area and have been working with construction employers for over 20 years to solve the biggest problems they face as pain free as possible.<br /><br />I know you must get a lot of emails like this. That&rsquo;s why the last thing I want to do is be one more person that wastes your time. I would like to learn about your company and your biggest needs right now. If we have something that could be of use to you, I&rsquo;d love to be the one to solve that problem for you.<br /><br /><br /><strong><br /></strong>If you have a minute, which of the following problems has been most relevant right now in your business? If you&rsquo;re busy, please feel free to simply respond with a number. I will get back to you as soon as I can. Day or Night.<strong><br /></strong><strong><br /></strong><strong>1) Difficulty finding Skilled Workers</strong><strong><br /></strong><strong>2) High Turnover Costs</strong><strong><br /></strong><strong>3) Finding Effective Advertising</strong><br /><br /><br /><br /><i>Stay Warm!</i><br /><br /></p><p>Alex Ebinal, Customer Relations &amp; Business Development</p><p>Phone Number: (262) 770-5175</p><p>CEO Denise Lockwood: (262) 504-9570<br /><br /></p><p>Email: racinecountyeyenews@gmail.com</p><p>Website: <a href="https://racinecountyeye.com/">https://racinecountyeye.com</a></p><p><a href="https://jobs.racinecountyeye.com/">Southeast Wisconsin Job Board</a> | <a href="https://jobs.racinecountyeye.com/products">Post a Job</a><br /><br /></p><img src="https://lh4.googleusercontent.com/cMe1UvyguVslPKkrgYBraniFqOc-gKu5MHjZG_7VMMPJuVk82drGPjIWtnl2ruJDTwiA1zESoJZZFZu_MINoso7VRZLhMieVxwBbIyH_HOhC5h_nmGbsNcgTt1XwcKNAjhkNp0k" alt="RCE" width="102" height="87"></p></span></div></body></html>"""

        # The main body is just another attachment
        #body = email.mime.Text.MIMEText("""Test body...""")
        part = MIMEText(html, 'html')
        msg.attach(part)

        ###### [COMMENT THE FOLLOW TO NOT INCLUDE ATTACHMENT] ###
        #filename='More_Information.pdf'
        #fp=open(filename,'rb')
        #att = MIMEApplication(fp.read(),_subtype="pdf")
        #fp.close()
        #att.add_header('Content-Disposition','attachment',filename=filename)
        #msg.attach(att)
        ######

        #Login
        s = smtplib.SMTP('smtp.gmail.com')
        s.starttls()
        s.login(from_email,'RacineCountyEye123!')
        s.sendmail(from_email,[to_email], msg.as_string())
        s.quit()

    mail_list = [str(x) for x in mail_list]
    url_list = [str(y) for y in url_list]
    print(mail_list)
    print("\nEmail Addresses fetched:")
    for address in mail_list:
        print(address)
    print("")
    #j=0
    for address in mail_list:
        if str(address) not in open("Business2.txt").read() and j<contact_limit:
            print "Sending mail to "+str(address)+"...",
            SendMail(str(address))
            with open("Business2.txt", "a") as myfile:
                myfile.write(address+":"+url_list[(mail_list.index(address))]+'\n')
            print "Sent!\n"
            j=j+1
        elif str(address) in open("Business2.txt").read():
            print("Already sent to that email: "+str(address))
        elif j>=contact_limit:
            print(str(j)+" URLs - limit reached. Exiting...")
            sys.exit(0)
    links=[]
    mail_list=[]
    url_list=[]
print("----------------------")