Advertisement
speeqz

Subreddit Member Count Graph

Sep 22nd, 2024 (edited)
158
0
Never
1
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.57 KB | Software | 0 0
  1. # Pip
  2.  
  3. pip install requests beautifulsoup4 matplotlib
  4.  
  5. ---
  6. # Distro List
  7.  
  8. Ubuntu, archlinux, chromeos, linuxmint, debian, Fedora, ManjaroLinux, redhat, elementaryos, pop_os, CentOS, Gentoo, openSUSE, SolusProject, NixOS, slackware, MXLinux, CrunchBang, suse, bedrocklinux, AlpineLinux, BudgieRemix, voidlinux, EndeavourOS, GarudaLinux, NobaraProject, Bazzite, cachyos, SteamOS, GUIX, artixlinux, vanillaos, Kalilinux, BlendOS, puppylinux, OpenMandriva, Mageia, RockyLinux, Kubuntu, tuxedocomputers, ClearLinux, kde, zorinos, AlmaLinux, Qubes, xubuntu, Lubuntu, chimeralinux, tails, kdeneon
  9.  
  10. ---
  11. # script.py
  12.  
  13. import requests
  14. from bs4 import BeautifulSoup
  15. import matplotlib
  16. matplotlib.use('Agg')  # Set the backend to Agg
  17. import matplotlib.pyplot as plt
  18. from datetime import datetime
  19. import sys
  20. import shutil
  21.  
  22. def get_subreddit_info(subreddit, current_index, total_subreddits):
  23.     url = f"https://www.reddit.com/r/{subreddit}/"
  24.     headers = {'User-Agent': 'Mozilla/5.0'}
  25.    
  26.     response = requests.get(url, headers=headers)
  27.     if response.status_code != 200:
  28.         print_progress(f"Failed to fetch data for r/{subreddit}", current_index, total_subreddits)
  29.         return None, None
  30.    
  31.     soup = BeautifulSoup(response.text, 'html.parser')
  32.    
  33.     subscriber_element = soup.select_one('faceplate-number[pretty][number]')
  34.     name_element = soup.select_one('h1.flex.items-center.font-bold.text-18.xs\:text-32.mb-0')
  35.    
  36.     if subscriber_element and name_element:
  37.         subscriber_count = int(subscriber_element.get('number'))
  38.         subreddit_name = name_element.text.strip().split()[0]  # Take only the first word
  39.         print_progress(f"Successfully fetched data for r/{subreddit}", current_index, total_subreddits)
  40.         return subreddit_name, subscriber_count
  41.     else:
  42.         print_progress(f"Could not find required information for r/{subreddit}", current_index, total_subreddits)
  43.         return None, None
  44.  
  45. def print_progress(message, current, total):
  46.     terminal_width = shutil.get_terminal_size().columns
  47.     progress = f"{message} - Progress: {current}/{total} subreddits processed"
  48.     sys.stdout.write('\r' + ' ' * terminal_width)  # Clear the entire line
  49.     sys.stdout.write('\r' + progress)
  50.     sys.stdout.flush()
  51.     if current == total:
  52.         print()  # Move to the next line after processing all subreddits
  53.  
  54. def create_bar_graph(subreddits, counts, filename, title):
  55.     # Calculate the maximum count to determine the figure width
  56.     max_count = max(counts)
  57.    
  58.     # Adjust figure width based on the maximum count
  59.     fig_width = 15 + (len(str(max_count)) - 6) * 0.5  # Base width + extra space for larger numbers
  60.    
  61.     fig, ax = plt.subplots(figsize=(fig_width, len(subreddits) * 0.4))
  62.     bars = ax.barh(range(len(subreddits)), counts)
  63.    
  64.     current_year = datetime.now().year
  65.    
  66.     ax.set_title(f"{title} ({current_year})")
  67.     ax.set_xlabel("Number of Subscribers")
  68.     ax.set_ylabel("Subreddit")
  69.     ax.set_yticks(range(len(subreddits)))
  70.     ax.set_yticklabels(subreddits)
  71.    
  72.     # Invert the y-axis to have the largest at the top
  73.     ax.invert_yaxis()
  74.    
  75.     # Add value labels to the end of each bar
  76.     for i, (bar, count) in enumerate(zip(bars, counts)):
  77.         ax.text(bar.get_width(), bar.get_y() + bar.get_height()/2, f' {count:,}',
  78.                 ha='left', va='center')
  79.    
  80.     # Adjust the layout and add extra space on the right
  81.     plt.tight_layout()
  82.     plt.subplots_adjust(right=0.80)  # Increased right margin
  83.    
  84.     # Extend the x-axis limit to ensure all labels are visible
  85.     ax.set_xlim(0, max_count * 1.15)  # Extend x-axis by 15% beyond the maximum count
  86.    
  87.     plt.savefig(filename, dpi=300, bbox_inches='tight')
  88.     print(f"\nGraph saved as '{filename}'")
  89.     plt.close()  # Close the figure to free up memory
  90.  
  91. def main():
  92.     subreddits = input("Enter subreddit names separated by commas: ").split(',')
  93.     subreddits = [s.strip() for s in subreddits]
  94.    
  95.     top_x = input("Enter the number of top distros to graph separately (or press Enter to skip): ").strip()
  96.    
  97.     valid_subreddits = []
  98.     subscriber_counts = []
  99.    
  100.     total_subreddits = len(subreddits)
  101.     for index, subreddit in enumerate(subreddits, 1):
  102.         name, count = get_subreddit_info(subreddit, index, total_subreddits)
  103.         if name and count is not None:
  104.             valid_subreddits.append(name)
  105.             subscriber_counts.append(count)
  106.    
  107.     if valid_subreddits:
  108.         # Sort the data
  109.         sorted_data = sorted(zip(valid_subreddits, subscriber_counts), key=lambda x: x[1], reverse=True)
  110.         sorted_subreddits, sorted_counts = zip(*sorted_data)
  111.        
  112.         # Create graph for all distros
  113.         create_bar_graph(sorted_subreddits, sorted_counts, 'all_distros_subscribers.png', "All Distro Subreddits by User Count")
  114.        
  115.         # Create graph for top X distros if specified
  116.         if top_x:
  117.             try:
  118.                 x = int(top_x)
  119.                 if 0 < x <= len(sorted_subreddits):
  120.                     top_x_subreddits = sorted_subreddits[:x]
  121.                     top_x_counts = sorted_counts[:x]
  122.                     create_bar_graph(top_x_subreddits, top_x_counts, f'top_{x}_distros_subscribers.png', f"Top {x} Distro Subreddits by User Count")
  123.                 else:
  124.                     print(f"Invalid number. Please enter a number between 1 and {len(sorted_subreddits)}.")
  125.             except ValueError:
  126.                 print("Invalid input. Please enter a valid number.")
  127.     else:
  128.         print("No valid data to display.")
  129.  
  130. if __name__ == "__main__":
  131.     main()
Advertisement
Comments
  • speeqz
    1 day
    # Bash 0.35 KB | 0 0
    1. # Example Input/Output:
    2.  
    3. Enter subreddit names separated by commas: Ubuntu, archlinux, chromeos
    4. Enter the number of top distros to graph separately (or press Enter to skip): 10
    5. Successfully fetched data for r/chromeos - Progress: 3/3 subreddits processed
    6.  
    7. Graph saved as 'all_distros_subscribers.png'
    8.  
    9. Graph saved as 'top_10_distros_subscribers.png'
Add Comment
Please, Sign In to add comment
Advertisement