Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- """
- Usage:
- youTube.py --search=Search [--update=Update time] [--typev=Type of video] [--duration=Duration of video] [--features=Video fraetures] [--sort=Video sort by revelance]
- youTube.py --help
- youTube.py --version
- Options:
- -h --help Open help menu
- -v --version Show webcrawler version
- -s --search='SEARCH' your youtube search :)
- --update='Update' lastHour
- today
- thisWeek
- thisMonth
- thisYear
- --typev='Type of video' video
- channel
- playlist
- movie
- show
- --duration='Duration of video' short
- long
- --features='Video features' 4k
- hd
- subtitles/cc
- 3d
- live
- purchased
- 360
- --sort='Video sort by revelance' uploadDate
- viewCount
- rating
- """
- #!/usr/bin/env python3
- import requests
- from bs4 import BeautifulSoup
- import os
- from docopt import docopt, DocoptExit
- headers = {'User-Agent':'Mozilla/5.0 (X11; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0'}
- mainurl = "https://www.youtube.com/results?q="
- class myCrawler():
- def crawlurls(self,query):
- url = mainurl+query
- self.banner()
- print ('Starting crawler..')
- req = requests.get(url,headers=headers)
- soup = BeautifulSoup(req.content,"html.parser")
- elements = soup.findAll("h3", {"class":"yt-lockup-title"})
- for el in elements:
- for a in el.find_all('a',href=True):
- print ('https://www.youtube.com/'+a['href'])
- def banner(self):
- os.system('clear')
- print(" ___ ____ __")
- print(" / _ \___ ___ ___ ___ ____ / __/_ _____ / /____ __ _ ___")
- print(" / // / -_) -_) _ \/ -_) __/ _\ \/ // (_-</ __/ -_) ' \(_-<")
- print("/____/\__/\__/ .__/\__/_/ /___/\_, /___/\__/\__/_/_/_/___/")
- print(" /_/ /___/ ")
- print("\t\tYouTube WebCrawler v0.1 2016\n\n")
- def main():
- #search = 'teste123'
- #x = myCrawler()
- #x.crawlurls(search)
- x = myCrawler()
- try:
- arguments = docopt(__doc__, version="YouTube WebCrawler v0.1 2016")
- search = arguments['--search']
- update = arguments['--update']
- typev = arguments['--typev']
- duration = arguments['--duration']
- features = arguments['--features']
- sort = arguments['--sort']
- # Possible option of each argument
- updateOptions = ['lastHour','today','thisWeek','thisMonth','thisYear']
- typevOptions = ['video','channel','playlist','movie','show']
- durationOptions = ['short','long']
- featuresOptions = ['4k','hd','subtitles/cc','3d','live','purchased','360']
- sortOptions = ['uploadDate','viewCount','rating']
- # Checking if arguments are the same expected on each option
- if (update != None) and update not in updateOptions:
- raise DocoptExit
- if (typev != None) and typev not in typevOptions:
- raise DocoptExit
- if (duration != None) and duration not in durationOptions:
- raise DocoptExit
- if (features != None) and features not in featuresOptions:
- raise DocoptExit
- if (sort != None) and sort not in sortOptions:
- raise DocoptExit
- print(search)
- #x.crawlurls(search)
- except DocoptExit as e:
- x.banner()
- os.system('python3 youTube.py --help')
- if __name__ == '__main__':
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement