View difference between Paste ID: D37yiUzA and mKYS35rn
SHOW: | | - or go back to the newest paste.
1
import httplib
2
import re
3
import string
4
import sys
5
import smtplib
6
from email.mime.multipart import MIMEMultipart
7
from email.mime.text import MIMEText
8
from email.mime.application import MIMEApplication
9
import base64
10
import urllib2
11
import requests
12
import argparse
13
from urlparse import urlsplit
14
from collections import deque
15
from bs4 import BeautifulSoup
16
17
parser = argparse.ArgumentParser(description='Lead Gen Script - V1.0')
18
parser.add_argument('-l','--link', help='Google URL', required=True)
19
parser.add_argument('-c','--climit', help='Contact limit', required=True)
20
args = vars(parser.parse_args())
21
22
google_url = str(args['link'])
23
24
google_links = []
25
links = []
26
27
def getGoogleLinks(xurl):
28
    req = urllib2.Request(xurl, headers=hdr)
29
    html_page = urllib2.urlopen(req)
30
    soup = BeautifulSoup(html_page, "html.parser")
31
32
    for link in soup.findAll('a', attrs={'href': re.compile("^https://www.google.com/search")}):
33
        google_links.append(link.get('href'))
34
35
    return google_links
36
37
google_links = getGoogleLinks(google_url)
38
39
print("Lead Gen Script - V1.0")
40
print("----------------------\n")
41
42
contact_limit=int(args['climit']) #CHANGE THIS TO THE DESIRED CONTACT LIMIT
43
#google_url = "https://www.google.com/search?rlz=1C1CHWL_en&q=construction%20companies%20in%20elkhorn%20wisconsin&npsic=0&rflfq=1&rlha=0&rllag=42679380,-88548678,3032&tbm=lcl&ved=2ahUKEwjTr6e99P_fAhVJmK0KHQF4DZQQjGp6BAgAEEg&tbs=lrf:!2m1!1e3!2m4!1e2!5m2!2m1!2e4!3sIAE,lf:1,lf_ui:2&rldoc=1&fll=0,0&fspn=0,NaN&fz=0&sll=0,0&sspn=0,NaN&sz=0&rlfi=hd:;si:&qop=0&rlvp=clear#qop=0&rlfi=hd:;si:&rlvp=clear"
44
#google_url = str(raw_input("Google Maps URL: "))
45
print("")
46
print("----------------------")
47
j=0
48
for google_link in google_links:
49
    hdr = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
50
           'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
51
           'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
52
           'Accept-Encoding': 'none',
53
           'Accept-Language': 'en-US,en;q=0.8',
54
           'Connection': 'keep-alive'}
55
56
    def getLinks(url):
57
        req = urllib2.Request(url, headers=hdr)
58
        html_page = urllib2.urlopen(req)
59
        soup = BeautifulSoup(html_page, "html.parser")
60
61
        for link in soup.findAll('a', attrs={'href': re.compile("^http://")}):
62
            links.append(link.get('href'))
63
64
        return links
65
66
    mail_list = []
67
    url_list = []
68
    temp = getLinks(google_link)
69
70
    for starting_url in temp:
71
        try:
72
            j=j+1
73
74
            unprocessed_urls = deque([starting_url])
75
            processed_urls = set()
76
            i=0
77
78
            while len(unprocessed_urls):
79
                i=i+1
80
81
                url = unprocessed_urls.popleft()
82
                processed_urls.add(url)
83
84
                parts = urlsplit(url)
85
                base_url = "{0.scheme}://{0.netloc}".format(parts)
86
                path = url[:url.rfind('/')+1] if '/' in parts.path else url
87
88
                if i>20:
89
                    break
90
                if j>=contact_limit:
91
                    print(str(j)+" URLs - limit reached. Exiting...")
92
                    sys.exit(0)
93
94
                print("Crawling URL: %s" % url)
95
                try:
96
                    response = requests.get(url)
97
                except (requests.exceptions.MissingSchema, requests.exceptions.ConnectionError):
98
                    continue
99
                print(str(re.findall(r"[a-z0-9\.\-+_]+@[a-z0-9\.\-+_]+\.[a-z]+", response.text, re.I)))
100
                if len(re.findall(r"[a-z0-9\.\-+_]+@[a-z0-9\.\-+_]+\.[a-z]+", response.text, re.I)) !=0:
101
                    print(8)
102
                    #mail_list.append(str(re.findall(r"[a-z0-9\.\-+_]+@[a-z0-9\.\-+_]+\.[a-z]+", response.text, re.I)))
103
                    #print(str(re.findall(r"[a-z0-9\.\-+_]+@[a-z0-9\.\-+_]+\.[a-z]+", response.text, re.I)))
104
                    s=str(re.findall(r"[a-z0-9\.\-+_]+@[a-z0-9\.\-+_]+\.[a-z]+", response.text, re.I))
105
                    result0 = re.search("'(.*)'", s)
106
                    q=str(result0.group(1))
107
                    q=str(q.split("'")[0])
108
                    mail_list.append(q)
109
                    url_list.append(url)
110
                    #mail_list.append(new_email)
111
                    break
112
                soup = BeautifulSoup(response.text, 'lxml')
113
114
                for anchor in soup.find_all("a"):
115
                    link = anchor.attrs["href"] if "href" in anchor.attrs else ''
116
                    if link.startswith('/'):
117
                        link = base_url + link
118
                    elif not link.startswith('http'):
119
                        link = path + link
120
                    if not link in unprocessed_urls and not link in processed_urls:
121
                        unprocessed_urls.append(link)
122
        except:
123
            pass
124
125
126
    def SendMail(to_email):
127
        #Authenticate to Gmail's SMTP Protocol
128
        #server = smtplib.SMTP('smtp.gmail.com', 587)
129
        #server.starttls()
130
131
        #Encrypt (Encode) Password to avoid plain-text exposure
132
        #server.login(("racinecountyeyenews@gmail.com"), ("RacineCountyEye123!"))
133
134
        # Create a text/plain message
135
        msg = MIMEMultipart('alternative')
136
        msg['Subject'] = 'Alex from Racine County Eye News, just reaching out to say hello'
137
        msg['From'] = 'racinecountyeyenews@gmail.com'
138-
        msg['To'] = '0xcoto@protonmail.com'
138+
        msg['To'] = to_email
139
140
        # Attaching the html
141
        html = """\
142
<html><head></head><body><div dir=3D"ltr"><span id=3D"gmail-docs-internal-guid-1bb3430c-7fff-13c1-95f1-f4eecfdbbf95"><p dir=3D"ltr" style=3D"line-height:1.38;margin-top:0pt;margin-bottom:0pt"><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap">Hey there!</span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap">Great website. I can tell you and your team haveput a lot of work to get to where you are today. I'll be as briefas possible because I don't want to waste your time.</span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap">We're reaching out to local employers in the construction industry because we know many are suffering losses due to the trouble of finding and retaining skilled workers. It's not a mystery that high turnover canbe a huge cost to business, and we've been working hard on this new solution to fix that problem.</span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;fontvariant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap">Starting on February 1st Racine County Eye News is sending out a newsletter directly to 50,000 job seekers in the Southeast Wisconsin Area. No other news organization is doing this and we have just obtained the right to do so.</span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-weight:700;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"> This tool can be customized to fitthe exact type of job seeker that you're looking to hire through location and income.</span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap">This means we can even help you recruit in areas close to competitors. Because we're launching it in February, we're looking to advertise one or two local construction companies in these newsletters. </span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);backgroundcolor:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap">To get straight to it, we are offering to place a 250 x 300 sized bannerpromoting your company in the newsletter sent to 50,000 job seekers for $250. Any number of months purchased in advance will be at the same price, even as that price increases in the future. Purchasing in advance will also ensure you retain your spot on the newsletter as space is so limited. </span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap">Please feel free to ignore this message if it isn't of interest, and thank you for taking the time to read it. We just want to help thosewho are facing this problem to consider taking advantage of this easy solution. If this is of interest to you, feel free to send an email back or give me a call on my personal line. The sign up process is very easy and can be done either through the phone or directly on our website </span><a href="https://jobs.racinecountyeye.com/payments/new?p=3D78c8cfa5-bf72-4075-a62f-29fb0d55fa06" style=3D"text-decoration-line:none"><span style=3D"font-size:11pt;font-family:Arial;background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap">here</span></a><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap">.</span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap">Thank you,</span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:&quot;Times New Roman&quot;;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap">Alex, Customer Relations &amp; Marketing Representative</span><span style=3D"font-size:11pt;font-family:&quot;Times New Roman&quot;;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;verticalalign:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:&quot;Times New Roman&quot;;color:rgb(0,0,0);background-color:transparent;font-weight:700;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap">Phone Number: </span><span style=3D"font-size:11pt;font-family:&quot;Times New Roman&quot;;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap">(262) 770-5175</span><span style=3D"font-size:11pt;font-family:&quot;Times New Roman&quot;;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:&quot;Times New Roman&quot;;color:rgb(0,0,0);background-color:transparent;font-weight:700;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap">CEO Denise Lockwood: </span><spanstyle=3D"font-size:11pt;font-family:&quot;Times New Roman&quot;;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap">(262) 504-9570</span><span style=3D"font-size:11pt;font-family:&quot;Times New Roman&quot;;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:&quot;Times New Roman&quot;;color:rgb(0,0,0);background-color:transparent;font-weight:700;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap">Email: </span><a href="mailto:Racinecountyeyenews@gmail.com" style=3D"text-decoration-line:none"><span style=3D"fontsize:11pt;font-family:&quot;Times New Roman&quot;;background-color:transparent;font-weight:700;font-variant-numeric:normal;font-variant-east-asian:normal;text-decoration-line:underline;vertical-align:baseline;white-space:prewrap">racinecountyeyenews@gmail.com</span></a></p><p dir=3D"ltr" style=3D"line-height:1.38;margin-top:0pt;margin-bottom:0pt"><span style=3D"font-size:11pt;font-family:&quot;Times New Roman&quot;;color:rgb(0,0,0);background-color:transparent;font-weight:700;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap">Website: </span><a href="https://racinecountyeye.com" style=3D"text-decoration-line:none"><span style=3D"font-size:11pt;font-family:&quot;Times New Roman&quot;;background-color:transparent;font-weight:700;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap">https://racinecountyeye.com</span></a></p><p dir=3D"ltr" style=3D"line-height:1.38;margin-top:0pt;margin-bottom:0pt"><a href="https://jobs.racinecountyeye.com/" style=3D"text-decoration-line:none"><span style=3D"font-size:11pt;font-family:&quot;Times New Roman&quot;;background-color:transparent;font-weight:700;font-variant-numeric:normal;font-variant-east-asian:normal;text-decoration-line:underline;vertical-align:baseline;white-space:pre-wrap">Southeast Wisconsin Job Board</span></a><span style=3D"font-size:11pt;font-family:&quot;Times New Roman&quot;;font-weight:700;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"> | </span><span style=3D"font-size:11pt;font-family:&quot;TimesNew Roman&quot;;color:rgb(0,0,0);background-color:transparent;font-weight:700;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"> </span><a href="https://jobs.racinecountyeye.com/products" style=3D"text-decoration-line:none"><span style=3D"font-size:11pt;font-family:&quot;Times New Roman&quot;;background-color:transparent;font-weight:700;font-variant-numeric:normal;font-variant-east-asian:normal;text-decoration-line:underline;vertical-align:baseline;white-space:pre-wrap">Post a Job</span><span style=3D"font-size:11pt;font-family:&quot;Times New Roman&quot;;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br></span><span style=3D"font-size:11pt;font-family:&quot;Times New Roman&quot;;color:rgb(0,0,0);background-color:transparent;font-variant-numeric:normal;font-variant-east-asian:normal;vertical-align:baseline;white-space:pre-wrap"><br><br></span></a><img src="https://lh4.googleusercontent.com/cMe1UvyguVslPKkrgYBraniFqOc-gKu5MHjZG_7VMMPJuVk82drGPjIWtnl2ruJDTwiA1zESoJZZFZu_MINoso7VRZLhMieVxwBbIyH_HOhC5h_nmGbsNcgTt1XwcKNAjhkNp0k" width="102" height="87" style="border: none; transform: rotate(0rad);"></p></span></div></body></html>"""
143
144
        # The main body is just another attachment
145
        #body = email.mime.Text.MIMEText("""Test body...""")
146
        part = MIMEText(html, 'html')
147
        msg.attach(part)
148
149
        ###### [COMMENT THE FOLLOW TO NOT INCLUDE ATTACHMENT] ###
150
        filename='More_Information.pdf'
151
        fp=open(filename,'rb')
152
        att = MIMEApplication(fp.read(),_subtype="pdf")
153
        fp.close()
154
        att.add_header('Content-Disposition','attachment',filename=filename)
155
        msg.attach(att)
156
        ######
157
158
        #Login
159
        s = smtplib.SMTP('smtp.gmail.com')
160
        s.starttls()
161
        s.login('racinecountyeyenews@gmail.com','RacineCountyEye123!')
162-
        s.sendmail('racinecountyeyenews@gmail.com',['0xcoto@protonmail.com'], msg.as_string())
162+
        s.sendmail('racinecountyeyenews@gmail.com',[to_email], msg.as_string())
163
        s.quit()
164
165
    mail_list = [str(x) for x in mail_list]
166
    url_list = [str(y) for y in url_list]
167
    print(mail_list)
168
    print("\nEmail Addresses fetched:")
169
    for address in mail_list:
170
        print(address)
171
    print("")
172
    for address in mail_list:
173
        if str(address) not in open("already_sent.txt").read():
174
            print "Sending mail to "+str(address)+"...",
175
            SendMail(str(address))
176
            with open("already_sent.txt", "a") as myfile:
177
                myfile.write(address+":"+url_list[(mail_list.index(address))]+'\n')
178
        print "Sent!\n"
179
print("----------------------")
180
print("Script finished successfully.")