Advertisement
Guest User

Untitled

a guest
Nov 5th, 2024
315
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.31 KB | None | 0 0
  1. import requests
  2. from bs4 import BeautifulSoup
  3. import os
  4. import time
  5. import re
  6. from urllib.parse import urljoin
  7.  
  8. class PICO8Scraper:
  9. def __init__(self, output_dir="pico8_games"):
  10. self.base_url = "https://www.lexaloffle.com"
  11. self.output_dir = output_dir
  12. self.session = requests.Session()
  13. self.headers = {
  14. 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
  15. }
  16.  
  17. # Create output directory if it doesn't exist
  18. if not os.path.exists(output_dir):
  19. os.makedirs(output_dir)
  20.  
  21. def sanitize_filename(self, filename):
  22. """Remove invalid characters from filename"""
  23. return re.sub(r'[<>:"/\\|?*]', '', filename)
  24.  
  25. def get_game_info(self, game_card):
  26. """Extract game information from a card div"""
  27. try:
  28. title = game_card.find('h3').text.strip()
  29. game_link = game_card.find('a', class_='nerdyButton')['href']
  30. author = game_card.find_all('a', class_='nerdyButton')[1].text.strip().replace('By: ', '')
  31. stars = game_card.find('h4').text.strip()
  32.  
  33. return {
  34. 'title': title,
  35. 'game_link': urljoin(self.base_url, game_link),
  36. 'author': author,
  37. 'stars': stars
  38. }
  39. except Exception as e:
  40. print(f"Error parsing game card: {e}")
  41. return None
  42.  
  43. def find_cart_file(self, game_url):
  44. """Find the .p8.png file URL from the game page"""
  45. try:
  46. response = self.session.get(game_url, headers=self.headers)
  47. response.raise_for_status()
  48. soup = BeautifulSoup(response.text, 'html.parser')
  49.  
  50. # Look for links containing .p8.png
  51. cart_link = soup.find('a', href=lambda x: x and x.endswith('.p8.png'))
  52. if cart_link:
  53. return urljoin(self.base_url, cart_link['href'])
  54. return None
  55. except Exception as e:
  56. print(f"Error finding cart file: {e}")
  57. return None
  58.  
  59. def download_game(self, cart_url, game_title):
  60. """Download the game cart file"""
  61. try:
  62. response = self.session.get(cart_url, headers=self.headers, stream=True)
  63. response.raise_for_status()
  64.  
  65. # Sanitize the filename and add .p8.png extension
  66. filename = self.sanitize_filename(game_title) + '.p8.png'
  67. filepath = os.path.join(self.output_dir, filename)
  68.  
  69. with open(filepath, 'wb') as f:
  70. for chunk in response.iter_content(chunk_size=8192):
  71. f.write(chunk)
  72.  
  73. return filepath
  74. except Exception as e:
  75. print(f"Error downloading game: {e}")
  76. return None
  77.  
  78. def scrape_games(self, url):
  79. """Main function to scrape games from the provided URL"""
  80. try:
  81. response = self.session.get(url, headers=self.headers)
  82. response.raise_for_status()
  83. soup = BeautifulSoup(response.text, 'html.parser')
  84.  
  85. # Find all game cards
  86. game_cards = soup.find_all('div', class_='card')
  87.  
  88. for card in game_cards:
  89. game_info = self.get_game_info(card)
  90. if not game_info:
  91. continue
  92.  
  93. print(f"\nProcessing: {game_info['title']}")
  94. print(f"By: {game_info['author']}")
  95. print(f"Stars: {game_info['stars']}")
  96.  
  97. # Find cart file URL
  98. cart_url = self.find_cart_file(game_info['game_link'])
  99. if not cart_url:
  100. print(f"Could not find cart file for {game_info['title']}")
  101. continue
  102.  
  103. # Download the game
  104. filepath = self.download_game(cart_url, game_info['title'])
  105. if filepath:
  106. print(f"Successfully downloaded: {filepath}")
  107.  
  108. # Be nice to the server
  109. time.sleep(2)
  110.  
  111. except Exception as e:
  112. print(f"Error scraping games: {e}")
  113.  
  114. # Usage example
  115. if __name__ == "__main__":
  116. scraper = PICO8Scraper(output_dir="pico8_games")
  117. scraper.scrape_games("https://nerdyteachers.com/PICO-8/Games/Top200/")
  118.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement