Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Pip
- pip install requests beautifulsoup4 matplotlib
- ---
- # Desktop Distro List
- Ubuntu, archlinux, chromeos, linuxmint, debian, Fedora, ManjaroLinux, redhat, elementaryos, pop_os, CentOS, Gentoo, openSUSE, SolusProject, NixOS, slackware, MXLinux, CrunchBang, suse, bedrocklinux, AlpineLinux, BudgieRemix, voidlinux, EndeavourOS, GarudaLinux, NobaraProject, Bazzite, cachyos, SteamOS, GUIX, artixlinux, vanillaos, Kalilinux, BlendOS, puppylinux, OpenMandriva, Mageia, RockyLinux, Kubuntu, tuxedocomputers, ClearLinux, zorinos, AlmaLinux, Qubes, xubuntu, Lubuntu, chimeralinux, tails, kdeneon, SpiralLinux, ParrotSecurity, BigLinux, FydeOS
- ---
- # script.py
- import requests
- from bs4 import BeautifulSoup
- import matplotlib
- matplotlib.use('Agg') # Set the backend to Agg
- import matplotlib.pyplot as plt
- from datetime import datetime
- import sys
- import shutil
- def get_subreddit_info(subreddit, current_index, total_subreddits):
- url = f"https://www.reddit.com/r/{subreddit}/"
- headers = {'User-Agent': 'Mozilla/5.0'}
- response = requests.get(url, headers=headers)
- if response.status_code != 200:
- print_progress(f"Failed to fetch data for r/{subreddit}", current_index, total_subreddits)
- return None, None
- soup = BeautifulSoup(response.text, 'html.parser')
- subscriber_element = soup.select_one('faceplate-number[pretty][number]')
- name_element = soup.select_one('h1.flex.items-center.font-bold.text-18.xs\:text-32.mb-0')
- if subscriber_element and name_element:
- subscriber_count = int(subscriber_element.get('number'))
- subreddit_name = name_element.text.strip().split()[0] # Take only the first word
- print_progress(f"Successfully fetched data for r/{subreddit}", current_index, total_subreddits)
- return subreddit_name, subscriber_count
- else:
- print_progress(f"Could not find required information for r/{subreddit}", current_index, total_subreddits)
- return None, None
- def print_progress(message, current, total):
- terminal_width = shutil.get_terminal_size().columns
- progress = f"{message} - Progress: {current}/{total} subreddits processed"
- sys.stdout.write('\r' + ' ' * terminal_width) # Clear the entire line
- sys.stdout.write('\r' + progress)
- sys.stdout.flush()
- if current == total:
- print() # Move to the next line after processing all subreddits
- def create_bar_graph(subreddits, counts, filename, title):
- # Calculate the maximum count to determine the figure width
- max_count = max(counts)
- # Adjust figure width based on the maximum count
- fig_width = 15 + (len(str(max_count)) - 6) * 0.5 # Base width + extra space for larger numbers
- fig, ax = plt.subplots(figsize=(fig_width, len(subreddits) * 0.4))
- bars = ax.barh(range(len(subreddits)), counts)
- current_year = datetime.now().year
- ax.set_title(f"{title} ({current_year})")
- ax.set_xlabel("Number of Subscribers")
- ax.set_ylabel("Subreddit")
- ax.set_yticks(range(len(subreddits)))
- ax.set_yticklabels(subreddits)
- # Invert the y-axis to have the largest at the top
- ax.invert_yaxis()
- # Add value labels to the end of each bar
- for i, (bar, count) in enumerate(zip(bars, counts)):
- ax.text(bar.get_width(), bar.get_y() + bar.get_height()/2, f' {count:,}',
- ha='left', va='center')
- # Adjust the layout and add extra space on the right
- plt.tight_layout()
- plt.subplots_adjust(right=0.80) # Increased right margin
- # Extend the x-axis limit to ensure all labels are visible
- ax.set_xlim(0, max_count * 1.15) # Extend x-axis by 15% beyond the maximum count
- plt.savefig(filename, dpi=300, bbox_inches='tight')
- print(f"\nGraph saved as '{filename}'")
- plt.close() # Close the figure to free up memory
- def main():
- subreddits = input("Enter subreddit names separated by commas: ").split(',')
- subreddits = [s.strip() for s in subreddits]
- top_x = input("Enter the number of top distros to graph separately (or press Enter to skip): ").strip()
- valid_subreddits = []
- subscriber_counts = []
- total_subreddits = len(subreddits)
- for index, subreddit in enumerate(subreddits, 1):
- name, count = get_subreddit_info(subreddit, index, total_subreddits)
- if name and count is not None:
- valid_subreddits.append(name)
- subscriber_counts.append(count)
- if valid_subreddits:
- # Sort the data
- sorted_data = sorted(zip(valid_subreddits, subscriber_counts), key=lambda x: x[1], reverse=True)
- sorted_subreddits, sorted_counts = zip(*sorted_data)
- # Create graph for all distros
- create_bar_graph(sorted_subreddits, sorted_counts, 'all_distros_subscribers.png', "All Distro Subreddits by User Count")
- # Create graph for top X distros if specified
- if top_x:
- try:
- x = int(top_x)
- if 0 < x <= len(sorted_subreddits):
- top_x_subreddits = sorted_subreddits[:x]
- top_x_counts = sorted_counts[:x]
- create_bar_graph(top_x_subreddits, top_x_counts, f'top_{x}_distros_subscribers.png', f"Top {x} Distro Subreddits by User Count")
- else:
- print(f"Invalid number. Please enter a number between 1 and {len(sorted_subreddits)}.")
- except ValueError:
- print("Invalid input. Please enter a valid number.")
- else:
- print("No valid data to display.")
- if __name__ == "__main__":
- main()
Advertisement
Comments
-
- # Example Input/Output:
- Enter subreddit names separated by commas: Ubuntu, archlinux, chromeos
- Enter the number of top distros to graph separately (or press Enter to skip): 10
- Successfully fetched data for r/chromeos - Progress: 3/3 subreddits processed
- Graph saved as 'all_distros_subscribers.png'
- Graph saved as 'top_10_distros_subscribers.png'
Add Comment
Please, Sign In to add comment
Advertisement