Suppenbiatch

Untitled

Sep 16th, 2020
144
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.70 KB | None | 0 0
  1. from selenium import webdriver
  2. from selenium.common import exceptions
  3. from selenium.webdriver.chrome.options import Options
  4. from selenium.webdriver.common.by import By
  5. from selenium.webdriver.support.expected_conditions import presence_of_element_located
  6. from selenium.webdriver.support.ui import WebDriverWait
  7. import os
  8.  
  9.  
  10. def convert_time(formatted_time: str, shorten_by: int = 5):
  11.     time_array = formatted_time.split(':')
  12.     return (int(time_array[0]) * 60 + int(time_array[1])) - shorten_by
  13.  
  14.  
  15. def get_video(timer: WebDriverWait, int_driver: webdriver):
  16.     try:
  17.         video = timer.until(presence_of_element_located((By.CLASS_NAME, 'ytp-ce-covering-overlay')))
  18.         endcard_urls = int_driver.find_elements_by_class_name('ytp-ce-covering-overlay')
  19.         endcard_names = int_driver.find_elements(By.XPATH, '//div[@class="ytp-ce-video-title yt-ui-ellipsis yt-ui-ellipsis-2"]')
  20.         endcard_items = []
  21.         for i, url_element in enumerate(endcard_urls):
  22.             url = url_element.get_attribute("href")
  23.             name = endcard_names[i].get_attribute("innerHTML")
  24.             endcard_items.append((url, name))
  25.     except exceptions.TimeoutException:
  26.         return None
  27.     return endcard_items
  28.  
  29. ublock_dir = os.path.join(os.getcwd(), 'ublock')
  30. chrome_options = Options()
  31. chrome_options.add_argument('load-extension=' + ublock_dir)
  32. # chrome_options.add_argument("--headless")
  33. # chrome_options.add_argument('--disable-gpu')
  34. with open('service_url.txt', 'r') as f:
  35.     service_url = f.readline()
  36. print(service_url)
  37.  
  38. url = 'https://www.youtube.com/watch?v=pg6RVbqhfK4'
  39. max_level = 5
  40. choices = 2
  41. video_containers = [16, 17]
  42.  
  43.  
  44. num_urls = int(pow(choices, max_level+1)/(choices-1) - choices/(choices-1))
  45. urls = ['' for _ in range(num_urls + 1)]
  46. index_to_open = [i for i in range(num_urls - int(pow(choices, max_level)) + 1)]
  47. urls[0] = f'{url};base\n'
  48.  
  49. with open('videos.csv', 'w', encoding='utf-8') as f:
  50.     f.write('url;title\n')
  51.     f.write(urls[0])
  52.  
  53.  
  54. for index in index_to_open:
  55.     url = urls[index].split(';')[0]
  56.     with webdriver.Remote(service_url, options=chrome_options) as driver:
  57.         timer = WebDriverWait(driver, 3)
  58.         driver.get(url)
  59.         video_info = get_video(timer, driver)
  60.         for video_num, container in enumerate(video_info, start=1):
  61.             current_index = 2 * index + video_num
  62.             urls[current_index] = container[0]
  63.             with open('videos.csv', 'a', encoding='utf-8') as f:
  64.                 csv_line = f'{container[0]};{container[1]}'
  65.                 print(csv_line)
  66.                 f.write(f'{csv_line}\n')
  67.  
  68.         if video_info is None:
  69.             index_to_open.insert(index + 1, index)
  70.             print('retrying', index)
Add Comment
Please, Sign In to add comment