Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os
- import json
- import requests
- import pandas as pd
- from bs4 import BeautifulSoup
- from io import StringIO
- URL = "https://en.wikipedia.org/wiki/List_of_European_Union_member_states_by_population"
- DATA_FILE = "eu_population_data.json"
- def get_soup(url):
- try:
- response = requests.get(url)
- response.raise_for_status()
- soup_data = BeautifulSoup(response.text, "lxml")
- return soup_data
- except requests.exceptions.RequestException as e:
- print(f"Error fetching data from {url}: {e}")
- return None
- def countries_population(df):
- country_dict = {}
- country_column = "Country"
- population_column = [col for col in df.columns if 'Population' in col][0]
- df[population_column] = df[population_column].astype(str).str.replace(',', '').astype(int)
- for _, row in df.iterrows():
- country = row[country_column]
- population = row[population_column]
- country_dict[country] = {"country_population": population}
- return country_dict
- def population_percentage(data):
- total_population = sum([pop["country_population"] for pop in data.values()])
- for country_data in data.values():
- percentage = (country_data["country_population"] / total_population) * 100
- country_data["country_population_percentage"] = round(percentage, 1)
- return data
- def load_saved_data(file_path):
- if os.path.exists(file_path):
- with open(file_path, "r") as file:
- return json.load(file)
- return None
- def save_data_to_file(data, file_path):
- with open(file_path, "w") as file:
- json.dump(data, file, indent=4)
- def is_data_updated(old_data, new_data):
- return old_data != new_data
- soup = get_soup(URL)
- if soup is not None:
- table = soup.find("table")
- if table is None:
- raise ValueError("No table found on the webpage.")
- df = pd.read_html(StringIO(str(table)))[0]
- df = df[df["Country"] != "European Union"]
- countries_population_dict = countries_population(df)
- countries_population_percentage = population_percentage(countries_population_dict)
- saved_data = load_saved_data(DATA_FILE)
- if saved_data is None or is_data_updated(saved_data, countries_population_percentage):
- print("New data found. Saving to file...")
- save_data_to_file(countries_population_percentage, DATA_FILE)
- else:
- print("No new data. File not updated.")
- else:
- print("Failed to retrieve webpage data.")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement