Soulsborne ranking

import praw
import re
from collections import Counter, defaultdict

# Replace these with your Reddit app credentials
REDDIT_CLIENT_ID = 'CLIENT_ID'
REDDIT_CLIENT_SECRET = 'SECRET_ID'
REDDIT_USER_AGENT = 'GameRankScraperBot/0.1 by YOUR_USERNAME'

# Initialize Reddit client
reddit = praw.Reddit(
    client_id=REDDIT_CLIENT_ID,
    client_secret=REDDIT_CLIENT_SECRET,
    user_agent=REDDIT_USER_AGENT
)

# Function to extract ranked game lists from text
def extract_ranked_games(text):
    pattern = re.compile(r'^\s*(\d{1,2})\.\s*(.+)', re.MULTILINE)
    matches = pattern.findall(text)
    ranked_games = [game.strip() for _, game in matches]
    return ranked_games if len(ranked_games) >= 2 else []  # ignore single-item "lists"

# Mapping of aliases to canonical names (always use the first name)
NORMALIZATION_MAP = {
    "BB": "Bloodborne",
    "Bb": "Bloodborne",
    "BloodBorne": "Bloodborne",
    "BLOODBORNE": "Bloodborne",

    "Sekiro: Shadows Die Twice": "Sekiro",

    "DS3": "Dark Souls 3",
    "Ds3": "Dark Souls 3",
    "Dark souls 3": "Dark Souls 3",
    "Dark Souls III": "Dark Souls 3",
    "dark souls 3": "Dark Souls 3",

    "DS2": "Dark Souls 2",
    "Ds2": "Dark Souls 2",
    "Dark souls 2": "Dark Souls 2",
    "Dark souls II": "Dark Souls 2",

    "Dark souls": "Dark Souls",
    "Dark Souls 1": "Dark Souls",
    "DS1": "Dark Souls",
    "Ds1": "Dark Souls",
    "Dark souls 1": "Dark Souls",
    "dark souls": "Dark Souls",
    "DS": "Dark Souls",

    "Demon’s Souls": "Demon's Souls",
    "Demon Souls": "Demon's Souls",
    "DeS": "Demon's Souls",
    "Demons souls": "Demon's Souls",
    "Demon souls": "Demon's Souls",
    "Demons Souls": "Demon's Souls",
    "Demons souls remake": "Demon's Souls",

    "Elden ring": "Elden Ring",
    "ER": "Elden Ring",
    "Elden RIng": "Elden Ring",
    "Elden ring (just a masterpiece overall)": "Elden Ring",

}

def normalize_game_name(name):
    return NORMALIZATION_MAP.get(name.strip(), name.strip())

# Main function to process the Reddit thread
def scrape_reddit_thread(thread_url):
    submission = reddit.submission(url=thread_url)
    submission.comments.replace_more(limit=None)
    game_counter = Counter()
    all_rankings = []

    position_scores = defaultdict(int)
    appearance_counts = Counter()

    for comment in submission.comments.list():
        games = extract_ranked_games(comment.body)
        if games:
            for i, game in enumerate(games):
                normalized = normalize_game_name(game)
                score = max(10 - i, 1)
                position_scores[normalized] += score
                appearance_counts[normalized] += 1

    print("Weighted scores (rank-sensitive):")
    for game, score in sorted(position_scores.items(), key=lambda x: x[1], reverse=True)[:15]:
        avg_rank = score / appearance_counts[game]
        print(f"{game}: Total Score = {score}, Avg Score = {avg_rank:.2f}, Mentions = {appearance_counts[game]}")

    return {
        "position_scores": position_scores,
        "appearance_counts": appearance_counts,
        "all_rankings": all_rankings
    }

# Replace with the URL of your Reddit thread
REDDIT_THREAD_URL = 'THREAD_URL'

# Run the scraper
if __name__ == "__main__":
    scrape_reddit_thread(REDDIT_THREAD_URL)