Advertisement
Guest User

Untitled

a guest
Apr 13th, 2024
92
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.68 KB | None | 0 0
  1. import requests
  2. from bs4 import BeautifulSoup
  3. import re
  4. from urllib.parse import urljoin
  5.  
  6. def get_video_urls(url):
  7.     try:
  8.         response = requests.get(url)
  9.         soup = BeautifulSoup(response.text, 'html.parser')
  10.         video_urls = []
  11.  
  12.         # Find all anchor tags
  13.         anchor_tags = soup.find_all('a')
  14.  
  15.         # Filter anchor tags with resolution format such as "1080p"
  16.         for tag in anchor_tags:
  17.             if re.search(r'\b\d{3,4}p\b', tag.text):
  18.                 video_urls.append(urljoin(response.url, tag['href']))
  19.  
  20.         return video_urls
  21.     except Exception as e:
  22.         print("An error occurred:", e)
  23.         return []
  24.  
  25. def write_urls_to_file(video_urls, output_file="output.txt"):
  26.     try:
  27.         with open(output_file, 'a') as file:
  28.             for url in video_urls:
  29.                 file.write(url + '\n')
  30.         print(f"Appended {len(video_urls)} URLs to {output_file}")
  31.     except Exception as e:
  32.         print("An error occurred while writing URLs to file:", e)
  33.  
  34. def get_page_urls(url, num_pages):
  35.     page_urls = [url]
  36.     for i in range(1, num_pages):
  37.         if i == 1:
  38.             page_urls.append(urljoin(url, f"&p={i}"))
  39.         else:
  40.             page_urls.append(urljoin(url, f"?p={i}"))
  41.     return page_urls
  42.  
  43. if __name__ == "__main__":
  44.     webpage_url = input("Enter the URL of the webpage: ")
  45.     num_pages = int(input("How many additional pages would you like to check? "))
  46.  
  47.     all_video_urls = []
  48.  
  49.     page_urls = get_page_urls(webpage_url, num_pages + 1)
  50.  
  51.     for page_url in page_urls:
  52.         video_urls = get_video_urls(page_url)
  53.         all_video_urls.extend(video_urls)
  54.  
  55.     write_urls_to_file(all_video_urls)
  56.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement