Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import praw
- import re
- from collections import Counter, defaultdict
- # Replace these with your Reddit app credentials
- REDDIT_CLIENT_ID = 'CLIENT_ID'
- REDDIT_CLIENT_SECRET = 'SECRET_ID'
- REDDIT_USER_AGENT = 'GameRankScraperBot/0.1 by YOUR_USERNAME'
- # Initialize Reddit client
- reddit = praw.Reddit(
- client_id=REDDIT_CLIENT_ID,
- client_secret=REDDIT_CLIENT_SECRET,
- user_agent=REDDIT_USER_AGENT
- )
- # Function to extract ranked game lists from text
- def extract_ranked_games(text):
- pattern = re.compile(r'^\s*(\d{1,2})\.\s*(.+)', re.MULTILINE)
- matches = pattern.findall(text)
- ranked_games = [game.strip() for _, game in matches]
- return ranked_games if len(ranked_games) >= 2 else [] # ignore single-item "lists"
- # Mapping of aliases to canonical names (always use the first name)
- NORMALIZATION_MAP = {
- "BB": "Bloodborne",
- "Bb": "Bloodborne",
- "BloodBorne": "Bloodborne",
- "BLOODBORNE": "Bloodborne",
- "Sekiro: Shadows Die Twice": "Sekiro",
- "DS3": "Dark Souls 3",
- "Ds3": "Dark Souls 3",
- "Dark souls 3": "Dark Souls 3",
- "Dark Souls III": "Dark Souls 3",
- "dark souls 3": "Dark Souls 3",
- "DS2": "Dark Souls 2",
- "Ds2": "Dark Souls 2",
- "Dark souls 2": "Dark Souls 2",
- "Dark souls II": "Dark Souls 2",
- "Dark souls": "Dark Souls",
- "Dark Souls 1": "Dark Souls",
- "DS1": "Dark Souls",
- "Ds1": "Dark Souls",
- "Dark souls 1": "Dark Souls",
- "dark souls": "Dark Souls",
- "DS": "Dark Souls",
- "Demonโs Souls": "Demon's Souls",
- "Demon Souls": "Demon's Souls",
- "DeS": "Demon's Souls",
- "Demons souls": "Demon's Souls",
- "Demon souls": "Demon's Souls",
- "Demons Souls": "Demon's Souls",
- "Demons souls remake": "Demon's Souls",
- "Elden ring": "Elden Ring",
- "ER": "Elden Ring",
- "Elden RIng": "Elden Ring",
- "Elden ring (just a masterpiece overall)": "Elden Ring",
- }
- def normalize_game_name(name):
- return NORMALIZATION_MAP.get(name.strip(), name.strip())
- # Main function to process the Reddit thread
- def scrape_reddit_thread(thread_url):
- submission = reddit.submission(url=thread_url)
- submission.comments.replace_more(limit=None)
- game_counter = Counter()
- all_rankings = []
- position_scores = defaultdict(int)
- appearance_counts = Counter()
- for comment in submission.comments.list():
- games = extract_ranked_games(comment.body)
- if games:
- for i, game in enumerate(games):
- normalized = normalize_game_name(game)
- score = max(10 - i, 1)
- position_scores[normalized] += score
- appearance_counts[normalized] += 1
- print("Weighted scores (rank-sensitive):")
- for game, score in sorted(position_scores.items(), key=lambda x: x[1], reverse=True)[:15]:
- avg_rank = score / appearance_counts[game]
- print(f"{game}: Total Score = {score}, Avg Score = {avg_rank:.2f}, Mentions = {appearance_counts[game]}")
- return {
- "position_scores": position_scores,
- "appearance_counts": appearance_counts,
- "all_rankings": all_rankings
- }
- # Replace with the URL of your Reddit thread
- REDDIT_THREAD_URL = 'THREAD_URL'
- # Run the scraper
- if __name__ == "__main__":
- scrape_reddit_thread(REDDIT_THREAD_URL)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement