Guest User

Untitled

a guest
Mar 8th, 2016
39
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.83 KB | None | 0 0
  1. #!/usr/bin/python
  2. # encoding=utf8
  3. import smtplib
  4. import time
  5. import sys
  6. from pageimports import page, page1, page3
  7.  
  8. reload(sys)
  9. sys.setdefaultencoding('utf8')
  10.  
  11.  
  12.  
  13.  
  14. data = []
  15. comparedata = []
  16. differentdata = []
  17.  
  18. def get_links(textmatter):
  19. textmatter = textmatter.encode('utf')
  20. start_deadmark = textmatter.find('class="deadmark"></span><a href="')
  21. startlink = textmatter.find('http',start_deadmark)
  22. endlink = textmatter.find('"', startlink)
  23.  
  24. #textwala
  25. starttext = textmatter.find('>', endlink)
  26. endtext = textmatter.find('<',starttext)
  27. textoutput = textmatter[starttext+1:endtext]
  28.  
  29. linkoutput = textmatter[startlink:endlink]
  30. return textoutput, linkoutput, endtext
  31.  
  32.  
  33. def scrape_all_data(textmatter,datalist):
  34. '''TEXTMATTER SHOULD BE STRING - DATALIST SHOULD BE LIST
  35. '''
  36. x = 0
  37. while x <=30:
  38. #if len(textmatter) >0:
  39. texto, linko, endtext = get_links(textmatter)
  40. textmatter = textmatter[endtext:]
  41. datalist.append([texto, linko])
  42. x +=1
  43.  
  44.  
  45.  
  46. #else:
  47. # print 'else break'
  48. # break
  49.  
  50.  
  51. def compare(textmatter, pagetest, data, comparedata, differentdata):
  52. if len(data) > 0:
  53. scrape_all_data(pagetest,comparedata)
  54. # comparisoin goes here
  55. # compare data with comparedate
  56. same = True
  57. for each in comparedata:
  58. milgaya = False
  59. for every in data:
  60. if each[0] in every:
  61. milgaya = True
  62. if milgaya == False:
  63. differentdata.append(each)
  64. same = False
  65. #print differentdata
  66. #print j # changed data from the comparedata for notification
  67. if same:
  68. comparedata = []
  69. print 'no new data on the website'
  70. # if everything is same then comparedata = []
  71. # if it is different
  72. else:
  73. a = ' '.join(differentdata[0])
  74. print a
  75. fromaddr = 'usefulemail70@gmail.com'
  76. toaddrs = 'abdussamad.m7@gmail.com' #enter your email address
  77. msg = """
  78. The website has been updated,
  79. Heading of news and link = %s
  80. """ % (''.join(a))
  81.  
  82. username = 'usefulemail70@gmail.com'
  83. password = 'myeasypassword'
  84.  
  85. server = smtplib.SMTP('smtp.gmail.com:587')
  86. server.starttls()
  87. server.login(username,password)
  88. server.sendmail(fromaddr,toaddrs,msg)
  89. server.quit()
  90. print 'new data on the website:'
  91. print '-----------------------'
  92. print differentdata
  93. data = comparedata
  94. differentdata = []
  95. comparedata = []
  96.  
  97. # then find the difference
  98. # then mail the difference
  99. # then data = comparedata
  100. # then comparedata = []
  101. else:
  102. scrape_all_data(textmatter,data)
  103. print 'data has been added for the first time'
  104.  
  105.  
  106. def timer(page,page1, data, comparedata, differentdata):
  107. min= 2
  108. while min>0:
  109. compare(page,page1, data, comparedata, differentdata)
  110. time.sleep(10) # every 10 secs just for testing later will make it 300 secs
  111. min-=1
  112.  
  113.  
  114. print timer(page,page1, data, comparedata, differentdata)
Add Comment
Please, Sign In to add comment