Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- from bs4 import BeautifulSoup
- import os
- import time
- import re
- from urllib.parse import urljoin
- class PICO8Scraper:
- def __init__(self, output_dir="pico8_games"):
- self.base_url = "https://www.lexaloffle.com"
- self.output_dir = output_dir
- self.session = requests.Session()
- self.headers = {
- 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
- }
- # Create output directory if it doesn't exist
- if not os.path.exists(output_dir):
- os.makedirs(output_dir)
- def sanitize_filename(self, filename):
- """Remove invalid characters from filename"""
- return re.sub(r'[<>:"/\\|?*]', '', filename)
- def get_game_info(self, game_card):
- """Extract game information from a card div"""
- try:
- title = game_card.find('h3').text.strip()
- game_link = game_card.find('a', class_='nerdyButton')['href']
- author = game_card.find_all('a', class_='nerdyButton')[1].text.strip().replace('By: ', '')
- stars = game_card.find('h4').text.strip()
- return {
- 'title': title,
- 'game_link': urljoin(self.base_url, game_link),
- 'author': author,
- 'stars': stars
- }
- except Exception as e:
- print(f"Error parsing game card: {e}")
- return None
- def find_cart_file(self, game_url):
- """Find the .p8.png file URL from the game page"""
- try:
- response = self.session.get(game_url, headers=self.headers)
- response.raise_for_status()
- soup = BeautifulSoup(response.text, 'html.parser')
- # Look for links containing .p8.png
- cart_link = soup.find('a', href=lambda x: x and x.endswith('.p8.png'))
- if cart_link:
- return urljoin(self.base_url, cart_link['href'])
- return None
- except Exception as e:
- print(f"Error finding cart file: {e}")
- return None
- def download_game(self, cart_url, game_title):
- """Download the game cart file"""
- try:
- response = self.session.get(cart_url, headers=self.headers, stream=True)
- response.raise_for_status()
- # Sanitize the filename and add .p8.png extension
- filename = self.sanitize_filename(game_title) + '.p8.png'
- filepath = os.path.join(self.output_dir, filename)
- with open(filepath, 'wb') as f:
- for chunk in response.iter_content(chunk_size=8192):
- f.write(chunk)
- return filepath
- except Exception as e:
- print(f"Error downloading game: {e}")
- return None
- def scrape_games(self, url):
- """Main function to scrape games from the provided URL"""
- try:
- response = self.session.get(url, headers=self.headers)
- response.raise_for_status()
- soup = BeautifulSoup(response.text, 'html.parser')
- # Find all game cards
- game_cards = soup.find_all('div', class_='card')
- for card in game_cards:
- game_info = self.get_game_info(card)
- if not game_info:
- continue
- print(f"\nProcessing: {game_info['title']}")
- print(f"By: {game_info['author']}")
- print(f"Stars: {game_info['stars']}")
- # Find cart file URL
- cart_url = self.find_cart_file(game_info['game_link'])
- if not cart_url:
- print(f"Could not find cart file for {game_info['title']}")
- continue
- # Download the game
- filepath = self.download_game(cart_url, game_info['title'])
- if filepath:
- print(f"Successfully downloaded: {filepath}")
- # Be nice to the server
- time.sleep(2)
- except Exception as e:
- print(f"Error scraping games: {e}")
- # Usage example
- if __name__ == "__main__":
- scraper = PICO8Scraper(output_dir="pico8_games")
- scraper.scrape_games("https://nerdyteachers.com/PICO-8/Games/Top200/")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement