Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from selenium import webdriver
- from selenium.common import exceptions
- from selenium.webdriver.chrome.options import Options
- from selenium.webdriver.common.by import By
- from selenium.webdriver.support.expected_conditions import presence_of_element_located
- from selenium.webdriver.support.ui import WebDriverWait
- import os
- def convert_time(formatted_time: str, shorten_by: int = 5):
- time_array = formatted_time.split(':')
- return (int(time_array[0]) * 60 + int(time_array[1])) - shorten_by
- def get_video(timer: WebDriverWait, int_driver: webdriver):
- try:
- video = timer.until(presence_of_element_located((By.CLASS_NAME, 'ytp-ce-covering-overlay')))
- endcard_urls = int_driver.find_elements_by_class_name('ytp-ce-covering-overlay')
- endcard_names = int_driver.find_elements(By.XPATH, '//div[@class="ytp-ce-video-title yt-ui-ellipsis yt-ui-ellipsis-2"]')
- endcard_items = []
- for i, url_element in enumerate(endcard_urls):
- url = url_element.get_attribute("href")
- name = endcard_names[i].get_attribute("innerHTML")
- endcard_items.append((url, name))
- except exceptions.TimeoutException:
- return None
- return endcard_items
- ublock_dir = os.path.join(os.getcwd(), 'ublock')
- chrome_options = Options()
- chrome_options.add_argument('load-extension=' + ublock_dir)
- # chrome_options.add_argument("--headless")
- # chrome_options.add_argument('--disable-gpu')
- with open('service_url.txt', 'r') as f:
- service_url = f.readline()
- print(service_url)
- url = 'https://www.youtube.com/watch?v=pg6RVbqhfK4'
- max_level = 5
- choices = 2
- video_containers = [16, 17]
- num_urls = int(pow(choices, max_level+1)/(choices-1) - choices/(choices-1))
- urls = ['' for _ in range(num_urls + 1)]
- index_to_open = [i for i in range(num_urls - int(pow(choices, max_level)) + 1)]
- urls[0] = f'{url};base\n'
- with open('videos.csv', 'w', encoding='utf-8') as f:
- f.write('url;title\n')
- f.write(urls[0])
- for index in index_to_open:
- url = urls[index].split(';')[0]
- with webdriver.Remote(service_url, options=chrome_options) as driver:
- timer = WebDriverWait(driver, 3)
- driver.get(url)
- video_info = get_video(timer, driver)
- for video_num, container in enumerate(video_info, start=1):
- current_index = 2 * index + video_num
- urls[current_index] = container[0]
- with open('videos.csv', 'a', encoding='utf-8') as f:
- csv_line = f'{container[0]};{container[1]}'
- print(csv_line)
- f.write(f'{csv_line}\n')
- if video_info is None:
- index_to_open.insert(index + 1, index)
- print('retrying', index)
Add Comment
Please, Sign In to add comment