Advertisement
Guest User

Untitled

a guest
Nov 28th, 2019
159
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.21 KB | None | 0 0
  1. import argparse
  2. import os
  3. import requests
  4. import sys
  5. from slugify import slugify
  6.  
  7. class KhanAcademyVideoDownloader:
  8.  
  9.     def __init__(self):
  10.         self._kindString = "kind"
  11.         self._topicString = "Topic"
  12.         self._videoString = "Video"
  13.  
  14.         self._topicUrl = "http://www.khanacademy.org/api/v1/topic/"
  15.         self._videoUrl = "http://www.khanacademy.org/api/v1/videos/"
  16.  
  17.         self._fileExtension = "mp4"
  18.         self._filePath = []
  19.  
  20.     def parseCommandLineArguments(self):
  21.         parser = argparse.ArgumentParser()
  22.         parser.add_argument("course_name", type=str, help="The name of the course you want to download the videos of.")
  23.         parser.add_argument("-e", type=str, help="The extension of the videos to be downloaded", metavar="video_extension")
  24.         args = parser.parse_args()
  25.         if args.e is not None:
  26.             if len(args.e) > 1:
  27.                 self._fileExtension = args.e
  28.             else:
  29.                 print("The provided extension is too short, by default using mp4.")
  30.         return args.course_name
  31.  
  32.     def visitCourse(self, topic):
  33.         self._filePath.append(topic)
  34.         self.visitElement(requests.get(self._topicUrl + topic).json())
  35.  
  36.     def visitElement(self, element):
  37.         elementKind = element[self._kindString]
  38.         if elementKind == self._topicString:
  39.             self.visitTopic(element)
  40.         elif elementKind == self._videoString:
  41.             self.visitVideo(element)
  42.  
  43.     def visitTopic(self, topicElement):
  44.         self._childrenString = "children"
  45.         self._idString = "id"
  46.  
  47.         childCounter = 0
  48.         for child in topicElement[self._childrenString]:
  49.             childId = child[self._idString]
  50.             childKind = child[self._kindString]
  51.  
  52.             if(childKind == self._topicString or childKind == self._videoString):
  53.                 if childKind == self._topicString:
  54.                     url =  self._topicUrl + childId
  55.                 elif childKind == self._videoString:
  56.                     url = self._videoUrl + childId
  57.                 childCounter += 1
  58.                 self._filePath.append(str(childCounter) + "-" + slugify(child["title"]))
  59.                 self.visitElement(requests.get(url).json())
  60.                 self._filePath.pop()
  61.  
  62.     def visitVideo(self, videoElement):
  63.         downloadUrls = videoElement["download_urls"]
  64.         if self._fileExtension in downloadUrls:
  65.             fileName = "/".join(self._filePath) + "." + self._fileExtension
  66.             if(not os.path.exists(fileName)):
  67.                 fileRequest = requests.get(downloadUrls[self._fileExtension], stream=True)
  68.                 print("Downloading " + fileName)
  69.                 self.saveFile(fileRequest, fileName)
  70.  
  71.     def saveFile(self, fileRequest, fileName):
  72.         if not os.path.exists(os.path.dirname(fileName)):
  73.             os.makedirs(os.path.dirname(fileName))
  74.  
  75.         chunkSize = 2 ** 16
  76.         with open(fileName, 'wb') as fileDescriptor:
  77.             for chunk in fileRequest.iter_content(chunkSize):
  78.                 fileDescriptor.write(chunk)
  79.  
  80. def main():
  81.     k = KhanAcademyVideoDownloader()
  82.     k.visitCourse(k.parseCommandLineArguments())
  83.  
  84. if __name__ == '__main__':
  85.     main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement