ayylmao

from bs4 import BeautifulSoup
import urllib2
import requests
import os

fails = 0
server = "http://206.130.99.2:1935/coursesaver/"

def downloadVideos(playlist, url, filename):
    with open(filename + "/playlist.m3u8", 'wb') as handle:
        handle.write(playlist)
    for x in playlist.split('\n'):
        if x and x[0] != '#':
            print "downloading", filename + "/" + x
            r = requests.get(server + url + x, stream=True)
            with open(filename + "/" + x, 'wb') as handle:
                handle.write(r.content)

def findVideos(playlist, url, title):
    title = "".join(title.split('\n')[0:5])
    filename = title.replace(" ", "").replace("\n", "").replace("\t", "")
    try:
        os.mkdir(filename)
    except:
        print "Directory", filename, "already exists"
    lines = playlist.split('\n')
    x = lines[len(lines) - 2]
    response = urllib2.urlopen(server + url + x)
    html = response.read()
    downloadVideos(html, url, filename)

def findPlaylist(url, id, title):
    global fails, server
    try:
        print url
        lines = title.strip().split('\n')
        newurl = str(id)
        newurl += "_"
        newurl += url.split('/')[6].upper()[0]
        newurl += url.split('/')[5].split('-')[0].replace("o","")
        newurl += '-'
        newurl += lines[0].strip()
        newurl += lines[1].strip().replace(" - ", "-").replace(", ", "").replace(",", "").replace(" ","-")
        # print newurl
        response = urllib2.urlopen(server + newurl + '.smil/playlist.m3u8')
        html = response.read()
        findVideos(html, newurl + '.smil/', title)
    except urllib2.URLError, e:
        print "404"
        fails += 1

i = 774
def findPages(url):
    global i
    req = urllib2.Request(url)
    response = urllib2.urlopen(req)
    html_doc = response.read()
    soup = BeautifulSoup(html_doc, 'html.parser')
    for a in soup.find_all("a", { "class" : "list-group-item" }):
        last = a.get('href').split('/')[5]
        if last.split("-")[0].replace("o","") == "2012" and a.get('href').split('/')[4] == 'watch':
            if last.split("-")[0][0] == 'o' and i < 1000:
                i = 1006
            findPlaylist(a.get('href'), i, a.get_text())
            i += 1
        elif last[0] == 'g' or last[0] == 'o':
            findPages(a.get('href'))
# findPlaylist("https://www.coursesaver.com/video/watch/2012-11-atomic-structure-atomic-mass-periodic-table/g1-atomic-and-nuclear-structure-1", 774, '''

                                                                        #   1.
                                                                        # 1 - Atomic Structure, Atomic Mass, Periodic Table

                                                                    # ''')
findPages("https://www.coursesaver.com/video/category/dat-videos")
print "Missing videos", fails