Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # written by zeper56
- # dont skid please >w<
- # (idk why you would even skid my code is DOGSHIT)
- import requests
- from bs4 import BeautifulSoup
- import json
- import time
- import random
- from tqdm import tqdm
- subs = ['webscraping'] # change this as u wish >w<
- def wait(ms):
- time.sleep(ms / 1000.0)
- def scrape(sub):
- url = f'https://old.reddit.com/r/{sub}/'
- headers = {
- 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Mobile/15E148 Safari/604.1'
- }
- posts = []
- pbar = tqdm(total=float('inf'), desc=f'Scraping {sub}', unit='post')
- while True:
- try:
- res = requests.get(url, headers=headers)
- res.raise_for_status()
- soup = BeautifulSoup(res.text, 'html.parser')
- for post in soup.select('.thing'):
- title = post.select_one('.title')
- comments = post.select_one('.comments')
- posts.append({
- 'title': title.get_text() if title else None,
- 'url': title.a['href'] if title and title.a else None,
- 'author': post.select_one('.author').get_text() if post.select_one('.author') else None,
- 'upvotes': post.select_one('.score.unvoted').get_text() if post.select_one('.score.unvoted') else None,
- 'comments': comments.get_text() if comments else None,
- 'imgLinks': [img['src'] for img in post.select('img')],
- 'commentCount': comments.get_text().split(' ')[0] if comments else '0'
- })
- pbar.update(1)
- with open(f'./{sub}.json', 'a') as f:
- json.dump(posts, f, indent=2)
- f.write('\n')
- nextButton = soup.select_one('.next-button a')
- if nextButton:
- url = nextButton['href']
- wait(3000 + random.uniform(2000, 5000))
- else:
- break
- except Exception as e:
- print(f"Error: {e}")
- break
- pbar.close()
- def scrapesubs():
- for sub in subs:
- scrape(sub)
- if __name__ == "__main__":
- scrapesubs()
Advertisement
Add Comment
Please, Sign In to add comment