Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from bs4 import BeautifulSoup
- import urllib2
- import requests
- import os
- fails = 0
- server = "http://206.130.99.2:1935/coursesaver/"
- def downloadVideos(playlist, url, filename):
- with open(filename + "/playlist.m3u8", 'wb') as handle:
- handle.write(playlist)
- for x in playlist.split('\n'):
- if x and x[0] != '#':
- print "downloading", filename + "/" + x
- r = requests.get(server + url + x, stream=True)
- with open(filename + "/" + x, 'wb') as handle:
- handle.write(r.content)
- def findVideos(playlist, url, title):
- title = "".join(title.split('\n')[0:5])
- filename = title.replace(" ", "").replace("\n", "").replace("\t", "")
- try:
- os.mkdir(filename)
- except:
- print "Directory", filename, "already exists"
- lines = playlist.split('\n')
- x = lines[len(lines) - 2]
- response = urllib2.urlopen(server + url + x)
- html = response.read()
- downloadVideos(html, url, filename)
- def findPlaylist(url, id, title):
- global fails, server
- try:
- print url
- lines = title.strip().split('\n')
- newurl = str(id)
- newurl += "_"
- newurl += url.split('/')[6].upper()[0]
- newurl += url.split('/')[5].split('-')[0].replace("o","")
- newurl += '-'
- newurl += lines[0].strip()
- newurl += lines[1].strip().replace(" - ", "-").replace(", ", "").replace(",", "").replace(" ","-")
- # print newurl
- response = urllib2.urlopen(server + newurl + '.smil/playlist.m3u8')
- html = response.read()
- findVideos(html, newurl + '.smil/', title)
- except urllib2.URLError, e:
- print "404"
- fails += 1
- i = 774
- def findPages(url):
- global i
- req = urllib2.Request(url)
- response = urllib2.urlopen(req)
- html_doc = response.read()
- soup = BeautifulSoup(html_doc, 'html.parser')
- for a in soup.find_all("a", { "class" : "list-group-item" }):
- last = a.get('href').split('/')[5]
- if last.split("-")[0].replace("o","") == "2012" and a.get('href').split('/')[4] == 'watch':
- if last.split("-")[0][0] == 'o' and i < 1000:
- i = 1006
- findPlaylist(a.get('href'), i, a.get_text())
- i += 1
- elif last[0] == 'g' or last[0] == 'o':
- findPages(a.get('href'))
- # findPlaylist("https://www.coursesaver.com/video/watch/2012-11-atomic-structure-atomic-mass-periodic-table/g1-atomic-and-nuclear-structure-1", 774, '''
- # 1.
- # 1 - Atomic Structure, Atomic Mass, Periodic Table
- # ''')
- findPages("https://www.coursesaver.com/video/category/dat-videos")
- print "Missing videos", fails
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement