Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- """Pastebin Scraper
- Author: -zYMTOM'
- """
- import re
- import requests
- from bs4 import BeautifulSoup
- def in_array(array, compare0):
- exist = False
- for i in range(0, len(array)):
- if compare0[0] in array[i][0]:
- exist = True
- break
- return exist
- links = []
- r = requests.post("http://pastebin.com/archive")
- var = BeautifulSoup(r.text.encode('utf-8')).find("div", {"id": "content_left"})
- regex = re.compile('<a href="\/([A-Za-z]{1,9})">(.*?)<\/a>')
- reg = regex.findall(str(var))
- for captures in reg:
- if in_array(links, captures[0]) == False:
- links.insert(len(links)+1, [captures[0], captures[1]])
- print(captures[0])
- for link in links:
- r = requests.post("http://pastebin.com/raw.php?i="+link[0])
- if not '<div class="content_title">This paste has been removed!</div>' in r.text:
- f = open('pastes/'+link[0]+'-'+link[1], 'w')
- f.write(r.text.encode('utf-8'))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement