Untitled

import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from urllib.parse import urlparse, urlunparse

def get_video_urls(url):
    try:
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')
        video_urls = set()  # Using a set to store unique URLs

        # Find all anchor tags with href containing "/video-"
        anchor_tags = soup.find_all('a', href=lambda href: href and '/video-' in href)

        # Get unique URLs from anchor tags
        for tag in anchor_tags:
            video_urls.add(urljoin(response.url, tag['href']))

        return list(video_urls)  # Convert set to list
    except Exception as e:
        print("An error occurred:", e)
        return []

def write_urls_to_file(urls, output_file="output.txt"):
    try:
        with open(output_file, 'a') as file:
            for url in urls:
                file.write(url + '\n')
        print(f"Appended {len(urls)} unique URLs to {output_file}")
    except Exception as e:
        print("An error occurred while writing URLs to file:", e)


def get_page_urls(url, num_pages):
    page_urls = [url]
    parsed_url = urlparse(url)
    for i in range(2, num_pages + 1):
        page_path = parsed_url.path.rstrip('/') + f'/{i}'
        page_url = urlunparse(parsed_url._replace(path=page_path))
        page_urls.append(page_url)
    return page_urls


if __name__ == "__main__":
    webpage_url = input("Enter the URL of the webpage: ")
    num_pages = int(input("How many additional pages would you like to check? "))

    all_video_urls = []

    page_urls = get_page_urls(webpage_url, num_pages)

    print("Fetching page URLs:")
    print(page_urls)

    for page_url in page_urls:
        print(f"Fetching video URLs from page: {page_url}")
        video_urls = get_video_urls(page_url)
        all_video_urls.extend(video_urls)

    write_urls_to_file(all_video_urls)