Advertisement
Guest User

Untitled

a guest
Jul 20th, 2017
59
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.88 KB | None | 0 0
  1. '''
  2. Buu
  3. '''
  4. import selenium.webdriver as webdriver
  5. import time
  6. import os
  7. from sys import sys.argv
  8.  
  9.  
  10. def getChannelName(channel_name):
  11.     print("Please enter the channel that you would like to scrape video titles...")
  12.     channelName = channel_name
  13.     googleSearch = "https://www.google.ca/search?q=%s+youtube&oq=%s+youtube&aqs=chrome..69i57j0l5.2898j0j4&sourceid=chrome&ie=UTF-8#q=%s+youtube&*" %(channelName, channelName, channelName)
  14.     print(googleSearch)
  15.     return googleSearch
  16.  
  17. def googleYoutubePage(channel_name):
  18.     driver = webdriver.Chrome("/Users/%location%/PycharmProjects/YoutubeChannelVideos/chromedriver")
  19.     driver.get(getChannelName(channel_name))
  20.     element = driver.find_element_by_class_name("s") #this is where the link to the proper youtube page lives
  21.     keys = element.text #this grabs the link to the youtube page + other information that will need to be cut
  22.     driver.close()
  23.  
  24.     splitKeys = keys.split(" ") #this needs to be split, because aside from the link it grabs the page description, which we need to truncate
  25.     linkToPage = splitKeys[0] #this is where the link lives
  26.  
  27.     for index, char in enumerate(linkToPage): #this loops over the link to find where the stuff beside the link begins (which is unecessary)
  28.         if char == "\n":
  29.             extraCrapStartsHere = index #it starts here, we know everything beyond here can be cut
  30.  
  31.  
  32.     link = ""
  33.     for i in range(extraCrapStartsHere): #the offical link will be everything in the linkToPage up to where we found suitable to cut
  34.         link = link + linkToPage[i]
  35.  
  36.     videosPage = link + "/videos"
  37.     print(videosPage)
  38.     return videosPage
  39.  
  40. def getVideoTitles(channel_name):
  41.     driver = webdriver.Chrome("/Users/%location%/PycharmProjects/YoutubeChannelVideos/chromedriver")
  42.     driver.get(googleYoutubePage(channel_name))
  43.  
  44.  
  45.     try:
  46.         while True:
  47.            clickLoadMore(driver)
  48.            time.sleep(7) #need to wait for everything to load... the new videos and the load more button
  49.     except BaseException as e: #the exception is catching the end of the page, where there are no more videos to load aka an error
  50.         print("Done Looping... Now Storing Video Titles to a Document...")
  51.         scrapeTitles(driver)
  52.  
  53.  
  54.  
  55. def clickLoadMore(driver):
  56.     clickButton = driver.find_element_by_class_name("load-more-button")
  57.     for i in range(12):  # Getting the element to be clicked needed to be done through bruteforce
  58.         clickButton.click()
  59.         driver.implicitly_wait(1)  # need to wait per each click
  60.  
  61.  
  62. def scrapeTitles(driver):
  63.     os.chdir("/Users/%location%/Desktop/")
  64.     listFile = open(driver.title, "a")
  65.     for title in driver.find_elements_by_class_name("yt-uix-tile-link"):
  66.         strTitle = str(title.text)
  67.         listFile.write(strTitle + '\n')
  68.     listFile.close()
  69.  
  70.     print("Done")
  71.  
  72. def main():
  73.     getVideoTitles(sys.argv[1])
  74.  
  75.  
  76. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement