Untitled

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
import os
import requests
import time


# # Path to the Chrome driver executable
# driver_path = ChromeDriverManager().install()

# # Check if the driver executable already exists
# if os.path.exists(driver_path):
#     # Delete the existing driver
#     os.remove(driver_path)


def download_file(url, filename_without_extension):
    try:
        # Send an HTTP GET request to the URL
        response = requests.get(url)

        # Check if the request was successful (status code 200)
        if response.status_code == 200:
            # Create the "output" folder if it doesn't exist
            output_folder = "output"
            if not os.path.exists(output_folder):
                os.makedirs(output_folder)

            # Get the file extension from the response headers
            content_type = response.headers.get('content-type')
            file_extension = content_type.split('/')[-1]

            # Combine the filename and extension
            filename_with_extension = f"{filename_without_extension}.{file_extension}"

            # Save the file to the output folder
            output_path = os.path.join(output_folder, filename_with_extension)
            with open(output_path, 'wb') as file:
                file.write(response.content)
            print(f"File downloaded and saved as '{output_path}'")
        else:
            print("Failed to download file.")
    except Exception as e:
        print(f"Error while downloading the file: {str(e)}")


def create_and_save_text_file(filename, text_content):
    try:
        # Append ".txt" extension if not already provided
        if not filename.endswith(".txt"):
            filename += ".txt"

        # Open the file in write mode
        with open(filename, 'w') as file:
            # Write the text content to the file
            file.write(text_content)
        print(f"Text file '{filename}' created and text saved successfully.")
    except Exception as e:
        print(f"Error while creating and saving the text file: {str(e)}")


# Set up Chrome driver service
service = Service(ChromeDriverManager(version='114.0.5735.90').install())

# Configure Chrome options
options = webdriver.ChromeOptions()
# options.add_argument("--headless")  # Run Chrome in headless mode (without GUI)

# Initialize Chrome driver
driver = webdriver.Chrome(service=service, options=options)

# Specify the URL of the website you want to scrape
url = "https://www.9gag.com"

# Navigate to the website
driver.get(url)


while True:
    # Scrape elements using XPath
    articles = driver.find_elements(By.XPATH, "//article")
    # Scroll down to the bottom of the page
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)

    if len(articles) >= 10:
        break

# driver.execute_script("window.scrollTo(0, 0);")
time.sleep(2)

for i,article in enumerate(articles):
    try:
        # Scroll to the current article
        driver.execute_script("arguments[0].scrollIntoView();", article)

        # Add a short delay to let the page scroll and load any dynamic content (adjust the time as needed)
        time.sleep(1)
        title =  article.find_element(By.XPATH,'./header/a').text
        id =  article.find_element(By.XPATH,'./header/a/@data-entry-id').text
        mediaSrc =  article.find_element(By.XPATH,'.//video/source/@src | .//picture/img/@src').text
        print("Article " +str(i)+' : '+ article.find_element(By.XPATH,'./header/a').text)
        download_file(mediaSrc,id)
        create_and_save_text_file(id,title)
    except Exception as e:
        continue
# Quit the driver and close the browser
driver.quit()