Advertisement
zymtom

Pastebin scraper

Dec 23rd, 2014
397
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.92 KB | None | 0 0
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. """Pastebin Scraper
  4. Author: -zYMTOM'
  5. """
  6. import re
  7. import requests
  8. from bs4 import BeautifulSoup
  9. def in_array(array, compare0):
  10. exist = False
  11. for i in range(0, len(array)):
  12. if compare0[0] in array[i][0]:
  13. exist = True
  14. break
  15. return exist
  16. links = []
  17. r = requests.post("http://pastebin.com/archive")
  18. var = BeautifulSoup(r.text.encode('utf-8')).find("div", {"id": "content_left"})
  19. regex = re.compile('<a href="\/([A-Za-z]{1,9})">(.*?)<\/a>')
  20. reg = regex.findall(str(var))
  21. for captures in reg:
  22. if in_array(links, captures[0]) == False:
  23. links.insert(len(links)+1, [captures[0], captures[1]])
  24. print(captures[0])
  25.  
  26. for link in links:
  27. r = requests.post("http://pastebin.com/raw.php?i="+link[0])
  28. if not '<div class="content_title">This paste has been removed!</div>' in r.text:
  29. f = open('pastes/'+link[0]+'-'+link[1], 'w')
  30. f.write(r.text.encode('utf-8'))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement