Advertisement
Abahbob

ayylmao

Aug 25th, 2016
152
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.44 KB | None | 0 0
  1. from bs4 import BeautifulSoup
  2. import urllib2
  3. import requests
  4. import os
  5.  
  6. fails = 0
  7. server = "http://206.130.99.2:1935/coursesaver/"
  8.  
  9. def downloadVideos(playlist, url, filename):
  10.     with open(filename + "/playlist.m3u8", 'wb') as handle:
  11.         handle.write(playlist)
  12.     for x in playlist.split('\n'):
  13.         if x and x[0] != '#':
  14.             print "downloading", filename + "/" + x
  15.             r = requests.get(server + url + x, stream=True)
  16.             with open(filename + "/" + x, 'wb') as handle:
  17.                 handle.write(r.content)
  18.  
  19. def findVideos(playlist, url, title):
  20.     title = "".join(title.split('\n')[0:5])
  21.     filename = title.replace(" ", "").replace("\n", "").replace("\t", "")
  22.     try:
  23.         os.mkdir(filename)
  24.     except:
  25.         print "Directory", filename, "already exists"
  26.     lines = playlist.split('\n')
  27.     x = lines[len(lines) - 2]
  28.     response = urllib2.urlopen(server + url + x)
  29.     html = response.read()
  30.     downloadVideos(html, url, filename)
  31.  
  32. def findPlaylist(url, id, title):
  33.     global fails, server
  34.     try:
  35.         print url
  36.         lines = title.strip().split('\n')
  37.         newurl = str(id)
  38.         newurl += "_"
  39.         newurl += url.split('/')[6].upper()[0]
  40.         newurl += url.split('/')[5].split('-')[0].replace("o","")
  41.         newurl += '-'
  42.         newurl += lines[0].strip()
  43.         newurl += lines[1].strip().replace(" - ", "-").replace(", ", "").replace(",", "").replace(" ","-")
  44.         # print newurl
  45.         response = urllib2.urlopen(server + newurl + '.smil/playlist.m3u8')
  46.         html = response.read()
  47.         findVideos(html, newurl + '.smil/', title)
  48.     except urllib2.URLError, e:
  49.         print "404"
  50.         fails += 1
  51.  
  52. i = 774
  53. def findPages(url):
  54.     global i
  55.     req = urllib2.Request(url)
  56.     response = urllib2.urlopen(req)
  57.     html_doc = response.read()
  58.     soup = BeautifulSoup(html_doc, 'html.parser')
  59.     for a in soup.find_all("a", { "class" : "list-group-item" }):
  60.         last = a.get('href').split('/')[5]
  61.         if last.split("-")[0].replace("o","") == "2012" and a.get('href').split('/')[4] == 'watch':
  62.             if last.split("-")[0][0] == 'o' and i < 1000:
  63.                 i = 1006
  64.             findPlaylist(a.get('href'), i, a.get_text())
  65.             i += 1
  66.         elif last[0] == 'g' or last[0] == 'o':
  67.             findPages(a.get('href'))
  68. # findPlaylist("https://www.coursesaver.com/video/watch/2012-11-atomic-structure-atomic-mass-periodic-table/g1-atomic-and-nuclear-structure-1", 774, '''
  69.  
  70.                                                                         #   1.
  71.                                                                         # 1 - Atomic Structure, Atomic Mass, Periodic Table
  72.  
  73.                                                                     # ''')
  74. findPages("https://www.coursesaver.com/video/category/dat-videos")
  75. print "Missing videos", fails
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement