Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- # encoding=utf8
- import smtplib
- import sys
- from pageimports import page, page1
- reload(sys)
- sys.setdefaultencoding('utf8')
- data = []
- comparedata = []
- differentdata = []
- def get_links(textmatter):
- textmatter = textmatter.encode('utf')
- start_deadmark = textmatter.find('class="deadmark"></span><a href="')
- startlink = textmatter.find('http',start_deadmark)
- endlink = textmatter.find('"', startlink)
- #textwala
- starttext = textmatter.find('>', endlink)
- endtext = textmatter.find('<',starttext)
- textoutput = textmatter[starttext+1:endtext]
- linkoutput = textmatter[startlink:endlink]
- return textoutput, linkoutput, endtext
- def scrape_all_data(textmatter,datalist):
- '''TEXTMATTER SHOULD BE STRING - DATALIST SHOULD BE LIST
- '''
- x = 0
- while x <=30:
- #if len(textmatter) >0:
- texto, linko, endtext = get_links(textmatter)
- textmatter = textmatter[endtext:]
- datalist.append([texto, linko])
- x +=1
- #else:
- # print 'else break'
- # break
- def compare(textmatter, pagetest, data, comparedata, differentdata):
- if len(data) > 0:
- scrape_all_data(pagetest,comparedata)
- # comparisoin goes here
- # compare data with comparedate
- same = True
- for each in comparedata:
- milgaya = False
- for every in data:
- if each[0] in every:
- milgaya = True
- if milgaya == False:
- differentdata.append(each)
- same = False
- #print differentdata
- #print j # changed data from the comparedata for notification
- if same:
- comparedata = []
- # if everything is same then comparedata = []
- # if it is different
- else:
- fromaddr = 'usefulemail70@gmail.com'
- toaddrs = 'me@aminmemon.com'
- msg = 'The website has been updated'
- username = 'usefulemail70@gmail.com'
- password = 'myeasypassword'
- server = smtplib.SMTP('smtp.gmail.com:587')
- server.starttls()
- server.login(username,password)
- server.sendmail(fromaddr,toaddrs,msg)
- server.quit()
- print differentdata
- #data = comparedata
- differentdata = []
- comparedata = []
- # then find the difference
- # then mail the difference
- # then data = comparedata
- #data = comparedata
- #comparedata = []
- #differentdata = []
- # then comparedata = []
- else:
- scrape_all_data(textmatter,data)
- compare(page,page1, data, comparedata, differentdata)
- compare(page,page1, data, comparedata, differentdata)
- print '-------------------'
- print differentdata
Add Comment
Please, Sign In to add comment