Guest User

Untitled

a guest
Aug 22nd, 2024
224
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.89 KB | None | 0 0
  1. import requests
  2. from bs4 import BeautifulSoup
  3. from collections import defaultdict
  4. import time
  5.  
  6. def fetch_reviews_html(app_id, cursor):
  7.     url = f"https://store.steampowered.com/app/{app_id}/reviews"
  8.     params = {
  9.         'p': cursor,
  10.         'filter': 'all',
  11.         'language': 'all',
  12.     }
  13.  
  14.     response = requests.get(url, params=params)
  15.     if response.status_code == 200:
  16.         return response.text
  17.     else:
  18.         print(f"Error fetching reviews: {response.status_code}")
  19.         return None
  20.  
  21. def scrape_reviews_by_language(app_id, review_limit=1000):
  22.     language_count = defaultdict(int)
  23.     total_reviews = 0
  24.     cursor = 1
  25.  
  26.     while total_reviews < review_limit:
  27.         html_content = fetch_reviews_html(app_id, cursor)
  28.         if not html_content:
  29.             break
  30.  
  31.         soup = BeautifulSoup(html_content, 'html.parser')
  32.         reviews = soup.find_all('div', class_='review_box')
  33.  
  34.         if not reviews:
  35.             break
  36.  
  37.         for review in reviews:
  38.             if total_reviews >= review_limit:
  39.                 break
  40.             language = review['data-language'] if 'data-language' in review.attrs else 'unknown'
  41.             language_count[language] += 1
  42.             total_reviews += 1
  43.  
  44.         cursor += 1
  45.         time.sleep(1)
  46.  
  47.     return language_count, total_reviews
  48.  
  49. def print_language_distribution(language_count, total_reviews):
  50.     print(f"Total Reviews Processed: {total_reviews}")
  51.     print("Language Distribution:")
  52.     for language, count in language_count.items():
  53.         percentage = (count / total_reviews) * 100
  54.         print(f"{language}: {count} ({percentage:.2f}%)")
  55.  
  56. if __name__ == "__main__":
  57.     APP_ID = '2358720'  # Any game ID
  58.     language_count, total_reviews = scrape_reviews_by_language(APP_ID, review_limit=1000) # without limit would take dozens of hours
  59.     print_language_distribution(language_count, total_reviews)
  60.  
Advertisement
Add Comment
Please, Sign In to add comment