Guest User

Untitled

a guest
Mar 8th, 2016
37
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.07 KB | None | 0 0
  1. #!/usr/bin/python
  2. # encoding=utf8
  3. import smtplib
  4. import urllib2
  5. import time
  6. import sys
  7. #from pageimports import page, page1
  8.  
  9. reload(sys)
  10. sys.setdefaultencoding('utf8')
  11.  
  12.  
  13.  
  14. req = urllib2.Request('https://news.ycombinator.com/news')
  15. response = urllib2.urlopen(req)
  16. page = response.read()
  17.  
  18. OFFLINE = True
  19.  
  20.  
  21.  
  22. data = []
  23. comparedata = []
  24. differentdata = []
  25.  
  26. def get_links(textmatter):
  27. textmatter = textmatter.encode('utf')
  28. start_deadmark = textmatter.find('class="deadmark"></span><a href="')
  29. startlink = textmatter.find('http',start_deadmark)
  30. endlink = textmatter.find('"', startlink)
  31.  
  32. #textwala
  33. starttext = textmatter.find('>', endlink)
  34. endtext = textmatter.find('<',starttext)
  35. textoutput = textmatter[starttext+1:endtext]
  36.  
  37. linkoutput = textmatter[startlink:endlink]
  38. return textoutput, linkoutput, endtext
  39.  
  40.  
  41. def scrape_all_data(textmatter,datalist):
  42. '''TEXTMATTER SHOULD BE STRING - DATALIST SHOULD BE LIST
  43. '''
  44. x = 0
  45. while x <=30:
  46. #if len(textmatter) >0:
  47. texto, linko, endtext = get_links(textmatter)
  48. textmatter = textmatter[endtext:]
  49. datalist.append([texto, linko])
  50. x +=1
  51.  
  52.  
  53.  
  54. #else:
  55. # print 'else break'
  56. # break
  57.  
  58.  
  59. def compare(textmatter, data, comparedata, differentdata):
  60. if len(data) > 0:
  61. scrape_all_data(textmatter,comparedata)
  62. # comparisoin goes here
  63. # compare data with comparedate
  64. same = True
  65. for each in comparedata:
  66. milgaya = False
  67. for every in data:
  68. if each[0] in every:
  69. milgaya = True
  70. if milgaya == False:
  71. differentdata.append(each)
  72. same = False
  73. #print differentdata
  74. #print j # changed data from the comparedata for notification
  75. if same:
  76. comparedata = []
  77. print 'no new data on the website'
  78. # if everything is same then comparedata = []
  79. # if it is different
  80. else:
  81. a = ' '.join(differentdata[0])
  82. #print a
  83. fromaddr = 'usefulemail70@gmail.com'
  84. toaddrs = 'abdussamad.m7@gmail.com' #enter your email address
  85. msg = """
  86. The website has been updated,
  87. Heading of news and link = %s
  88. """ % (''.join(a))
  89.  
  90. username = 'usefulemail70@gmail.com'
  91. password = 'myeasypassword'
  92.  
  93. server = smtplib.SMTP('smtp.gmail.com:587')
  94. server.starttls()
  95. server.login(username,password)
  96. server.sendmail(fromaddr,toaddrs,msg)
  97. server.quit()
  98. print 'new data on the website:'
  99. print '-----------------------'
  100. print differentdata
  101. data = comparedata
  102. differentdata = []
  103. comparedata = []
  104.  
  105. # then find the difference
  106. # then mail the difference
  107. # then data = comparedata
  108. # then comparedata = []
  109. else:
  110. scrape_all_data(textmatter,data)
  111. print 'data has been added for the first time'
  112. print data
  113.  
  114.  
  115. def timer(page, data, comparedata, differentdata):
  116. # min= 2
  117. while True:
  118. compare(page, data, comparedata, differentdata)
  119. time.sleep(0.1) # every 10 secs just for testing later will make it 300 secs
  120. # min-=1
  121.  
  122.  
  123. # if OFFLINE:
  124.  
  125. # else:
  126. timer(page, data, comparedata, differentdata)
  127.  
  128.  
  129. # Clean the code
  130. # Comment the code
  131. # Better Variable Naming
  132. # Put the code on GitHub.com
Add Comment
Please, Sign In to add comment