Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- import urllib
- import re
- course = "nlp"
- maindoc = urllib.urlopen("https://class.coursera.org/%s/lecture/preview" % course).read()
- files = []
- for line in maindoc.split("<li"):
- pattern = r'a href="(https://class\.coursera\.org/%s/lecture/preview_view\?lecture_id=\d+)[^>]+>([^<]+)<' % course
- m = re.search(pattern, line)
- if m:
- (url, title) = m.group(1), m.group(2).strip()
- files.append((url, title))
- cnt = 1
- for url, title in files:
- doc = urllib.urlopen(url).read()
- pattern = r'<source type="video/mp4" src="([^\"]+)"'
- m = re.search(pattern, doc)
- print "downloading %.2d - %s.mp4" % (cnt, re.sub("/","-",title))
- urllib.urlretrieve (m.group(1), "%.2d - %s.mp4" % (cnt, re.sub("/","-",title)))
- cnt += 1
Advertisement
Add Comment
Please, Sign In to add comment