Advertisement
Guest User

Untitled

a guest
May 6th, 2016
77
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.25 KB | None | 0 0
  1. """
  2. Usage:
  3. youTube.py --search=Search [--update=Update time] [--typev=Type of video] [--duration=Duration of video] [--features=Video fraetures] [--sort=Video sort by revelance]
  4. youTube.py --help
  5. youTube.py --version
  6.  
  7. Options:
  8. -h --help Open help menu
  9. -v --version Show webcrawler version
  10. -s --search='SEARCH' your youtube search :)
  11.  
  12. --update='Update' lastHour
  13. today
  14. thisWeek
  15. thisMonth
  16. thisYear
  17.  
  18. --typev='Type of video' video
  19. channel
  20. playlist
  21. movie
  22. show
  23.  
  24. --duration='Duration of video' short
  25. long
  26.  
  27. --features='Video features' 4k
  28. hd
  29. subtitles/cc
  30. 3d
  31. live
  32. purchased
  33. 360
  34.  
  35. --sort='Video sort by revelance' uploadDate
  36. viewCount
  37. rating
  38. """
  39. #!/usr/bin/env python3
  40. import requests
  41. from bs4 import BeautifulSoup
  42. import os
  43. from docopt import docopt, DocoptExit
  44.  
  45.  
  46. headers = {'User-Agent':'Mozilla/5.0 (X11; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0'}
  47. mainurl = "https://www.youtube.com/results?q="
  48.  
  49. class myCrawler():
  50. def crawlurls(self,query):
  51. url = mainurl+query
  52. self.banner()
  53. print ('Starting crawler..')
  54. req = requests.get(url,headers=headers)
  55. soup = BeautifulSoup(req.content,"html.parser")
  56. elements = soup.findAll("h3", {"class":"yt-lockup-title"})
  57. for el in elements:
  58. for a in el.find_all('a',href=True):
  59. print ('https://www.youtube.com/'+a['href'])
  60.  
  61.  
  62. def banner(self):
  63. os.system('clear')
  64. print(" ___ ____ __")
  65. print(" / _ \___ ___ ___ ___ ____ / __/_ _____ / /____ __ _ ___")
  66. print(" / // / -_) -_) _ \/ -_) __/ _\ \/ // (_-</ __/ -_) ' \(_-<")
  67. print("/____/\__/\__/ .__/\__/_/ /___/\_, /___/\__/\__/_/_/_/___/")
  68. print(" /_/ /___/ ")
  69. print("\t\tYouTube WebCrawler v0.1 2016\n\n")
  70.  
  71.  
  72.  
  73. def main():
  74. #search = 'teste123'
  75. #x = myCrawler()
  76. #x.crawlurls(search)
  77. x = myCrawler()
  78. try:
  79. arguments = docopt(__doc__, version="YouTube WebCrawler v0.1 2016")
  80. search = arguments['--search']
  81. update = arguments['--update']
  82. typev = arguments['--typev']
  83. duration = arguments['--duration']
  84. features = arguments['--features']
  85. sort = arguments['--sort']
  86.  
  87. # Possible option of each argument
  88. updateOptions = ['lastHour','today','thisWeek','thisMonth','thisYear']
  89. typevOptions = ['video','channel','playlist','movie','show']
  90. durationOptions = ['short','long']
  91. featuresOptions = ['4k','hd','subtitles/cc','3d','live','purchased','360']
  92. sortOptions = ['uploadDate','viewCount','rating']
  93.  
  94.  
  95. # Checking if arguments are the same expected on each option
  96. if (update != None) and update not in updateOptions:
  97. raise DocoptExit
  98. if (typev != None) and typev not in typevOptions:
  99. raise DocoptExit
  100. if (duration != None) and duration not in durationOptions:
  101. raise DocoptExit
  102. if (features != None) and features not in featuresOptions:
  103. raise DocoptExit
  104. if (sort != None) and sort not in sortOptions:
  105. raise DocoptExit
  106.  
  107.  
  108. print(search)
  109. #x.crawlurls(search)
  110. except DocoptExit as e:
  111. x.banner()
  112. os.system('python3 youTube.py --help')
  113.  
  114.  
  115.  
  116. if __name__ == '__main__':
  117. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement