Advertisement
Guest User

Look

a guest
Aug 3rd, 2016
385
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.86 KB | None | 0 0
  1. import urllib2 # d/l web pages
  2. import BeautifulSoup # parse URLs
  3. import re # to grep URLs
  4. import time # time.sleep()
  5. from time import gmtime, strftime # time entry
  6. import sys # sys.stdout()
  7.  
  8. archive_url = "http://pastebin.com/archive"
  9.  
  10. # Define the maximum length of saved post in characters
  11. # Huge posts tend to be irrelevant.
  12. max_length = 100000
  13.  
  14. # Define seconds between each download. If you set this to 0, you
  15. # will be banned by Pastebin. The default has been tested.
  16. wait_secs = 2
  17.  
  18. # Define search-words here!
  19. search_list = ['@hotmail.fr',
  20. '@aol.com',
  21. '@yahoo.com',
  22. '@gmail.com',
  23. '@hotmail.com',
  24. '@hotmail.fr:',
  25. '@aol.com:',
  26. '@yahoo.com:',
  27. '@gmail.com:',
  28. '@hotmail.com:',
  29. '@outlook.com:',
  30. '@outlook.fr:',
  31. '@live.fr:',
  32. '@live.be:',
  33. 'http://members.',
  34. 'CharExtractMinHeight=',
  35. '|2021|',
  36. '|2020|',
  37. '|2019|',
  38. '|201',
  39. '|2017|',
  40. '|2016|',
  41. '|2015|',
  42. '| oneClick: ',
  43. 'IBAN:',
  44. '[Wordlist]',
  45. '[Settings] SiteURL=',
  46. '[Wordlist] UserIndex=',
  47. 'https://membres.',
  48. 'http://www.starpass.fr',
  49. 'https://www.paypal.com/fr/cgi-bin/webscr',
  50. 'https://wifi.free.fr/',
  51. 'https://store.playstation.com',
  52. 'https://login.live.com',
  53. 'https://www.amazon.',
  54. 'https://signin.ea.com',
  55. 'http://www.t411.ch',
  56. 'https://www.paypal.com/fr/webapps/mpp/home',
  57. 'https://dossier.admission-postbac.fr/Postbac/authentification']
  58.  
  59. # Define excluded words here!
  60. excl_list = ['video',
  61. '.mkv',
  62. '.wmv',
  63. '.avi',
  64. '.mp4',
  65. 'error report',
  66. 'system information'
  67. 'debug',
  68. 'log',
  69. 'FAQ'
  70. 'using', # Filtering source-code
  71. 'import',
  72. 'include',
  73. 'static',
  74. 'array',
  75. 'function',
  76. 'class',
  77. 'define',
  78. 'git',
  79. '<head>', # Filtering complete HTML files
  80. 'script',
  81. 'CloudFlare', # Some people posting CloudFlare errors
  82. 'Technic'] # Dude using Pastebin as error log
  83.  
  84. class color:
  85. red = '\033[31m'
  86. green = '\033[92m'
  87. reset = '\033[0m'
  88.  
  89. def download_page(dl_url):
  90. try:
  91. response = urllib2.urlopen(dl_url)
  92. text = response.read()
  93. except:
  94. sys.stdout.write("Skipping %s" % dl_url)
  95. text = 0
  96. pass
  97. return text
  98.  
  99. def test_for_relevance(content):
  100. keyword_list = []
  101.  
  102. if content != 0 and len(content) < max_length:
  103. for search_word in search_list:
  104. if search_word in content:
  105. keyword_list.append(search_word)
  106.  
  107. # keyword_list returned empty if an excl_word is seen
  108. if len(excl_list) > 0:
  109. for excl_word in excl_list:
  110. if excl_word in content:
  111. keyword_list = []
  112. return keyword_list
  113.  
  114. def save_as_file(name, content):
  115. sys.stdout.write("Testing pastebin.com/%s:" %name)
  116. keyword_list = test_for_relevance(content)
  117.  
  118. if len(keyword_list) > 0:
  119. sys.stdout.write(color.green + "\t match" + color.reset + "\n")
  120.  
  121. cur_time = strftime("%Y-%m-%d %H:%M:%S", gmtime())
  122.  
  123. index = open("index.html","a")
  124. index.write("{tm}&nbsp;-&nbsp;<A HREF='store.html/#{nm}'>" \
  125. "{nm}</A>&nbsp;-&nbsp;matched: {kw}<br>"\
  126. .format(tm = cur_time, nm = name, \
  127. ct = content, kw =keyword_list))
  128. index.close()
  129.  
  130. store = open("store.html", "a")
  131. store.write("<A NAME='{nm}'><br><pre>{ct}</pre><br><hr>"\
  132. .format(nm=name, ct=content))
  133.  
  134. store.close()
  135. else:
  136. sys.stdout.write(color.red + "\t no match" + color.reset + "\n")
  137. time.sleep(wait_secs)
  138.  
  139. def download_all_urls(url_list):
  140. for url in url_list:
  141. page = download_page("http://pastebin.com/raw.php?i=%s" % url)
  142. save_as_file(url, page)
  143.  
  144. def extract_urls():
  145. """ Returns the stripped 8 char pastebin-string """
  146. sys.stdout.write("Extracting archive-URLs:")
  147.  
  148. request = urllib2.Request(archive_url)
  149. response = urllib2.urlopen(request)
  150.  
  151. soup = BeautifulSoup.BeautifulSoup(response)
  152. links = soup.findAll('a', href=re.compile('^\/([A-Za-z0-9]{8})'))
  153.  
  154. result = []
  155. for link in links:
  156. if not 'settings' in link['href']:
  157. if not 'languages' in link['href']:
  158. result.append(link['href'].encode('ascii')[1)
  159. sys.stdout.write(color.green + "\t success" + color.reset + "\n")
  160. return result
  161.  
  162. def main():
  163. url_list = extract_urls()
  164. download_all_urls(url_list)
  165.  
  166. if name == "__main__":
  167. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement