Untitled

import requests
from bs4 import BeautifulSoup
import os
import time
import re
from urllib.parse import urljoin

class PICO8Scraper:
    def __init__(self, output_dir="pico8_games"):
        self.base_url = "https://www.lexaloffle.com"
        self.output_dir = output_dir
        self.session = requests.Session()
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }

        # Create output directory if it doesn't exist
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

    def sanitize_filename(self, filename):
        """Remove invalid characters from filename"""
        return re.sub(r'[<>:"/\\|?*]', '', filename)

    def get_game_info(self, game_card):
        """Extract game information from a card div"""
        try:
            title = game_card.find('h3').text.strip()
            game_link = game_card.find('a', class_='nerdyButton')['href']
            author = game_card.find_all('a', class_='nerdyButton')[1].text.strip().replace('By: ', '')
            stars = game_card.find('h4').text.strip()

            return {
                'title': title,
                'game_link': urljoin(self.base_url, game_link),
                'author': author,
                'stars': stars
            }
        except Exception as e:
            print(f"Error parsing game card: {e}")
            return None

    def find_cart_file(self, game_url):
        """Find the .p8.png file URL from the game page"""
        try:
            response = self.session.get(game_url, headers=self.headers)
            response.raise_for_status()
            soup = BeautifulSoup(response.text, 'html.parser')

            # Look for links containing .p8.png
            cart_link = soup.find('a', href=lambda x: x and x.endswith('.p8.png'))
            if cart_link:
                return urljoin(self.base_url, cart_link['href'])
            return None
        except Exception as e:
            print(f"Error finding cart file: {e}")
            return None

    def download_game(self, cart_url, game_title):
        """Download the game cart file"""
        try:
            response = self.session.get(cart_url, headers=self.headers, stream=True)
            response.raise_for_status()

            # Sanitize the filename and add .p8.png extension
            filename = self.sanitize_filename(game_title) + '.p8.png'
            filepath = os.path.join(self.output_dir, filename)

            with open(filepath, 'wb') as f:
                for chunk in response.iter_content(chunk_size=8192):
                    f.write(chunk)

            return filepath
        except Exception as e:
            print(f"Error downloading game: {e}")
            return None

    def scrape_games(self, url):
        """Main function to scrape games from the provided URL"""
        try:
            response = self.session.get(url, headers=self.headers)
            response.raise_for_status()
            soup = BeautifulSoup(response.text, 'html.parser')

            # Find all game cards
            game_cards = soup.find_all('div', class_='card')

            for card in game_cards:
                game_info = self.get_game_info(card)
                if not game_info:
                    continue

                print(f"\nProcessing: {game_info['title']}")
                print(f"By: {game_info['author']}")
                print(f"Stars: {game_info['stars']}")

                # Find cart file URL
                cart_url = self.find_cart_file(game_info['game_link'])
                if not cart_url:
                    print(f"Could not find cart file for {game_info['title']}")
                    continue

                # Download the game
                filepath = self.download_game(cart_url, game_info['title'])
                if filepath:
                    print(f"Successfully downloaded: {filepath}")

                # Be nice to the server
                time.sleep(2)

        except Exception as e:
            print(f"Error scraping games: {e}")

# Usage example
if __name__ == "__main__":
    scraper = PICO8Scraper(output_dir="pico8_games")
    scraper.scrape_games("https://nerdyteachers.com/PICO-8/Games/Top200/")