SHOW:
|
|
- or go back to the newest paste.
| 1 | #!/usr/bin/python | |
| 2 | import urllib | |
| 3 | import re | |
| 4 | ||
| 5 | - | course = "algo" |
| 5 | + | course = "nlp" |
| 6 | maindoc = urllib.urlopen("https://class.coursera.org/%s/lecture/preview" % course).read()
| |
| 7 | ||
| 8 | files = [] | |
| 9 | for line in maindoc.split("<li"):
| |
| 10 | pattern = r'a href="(https://class\.coursera\.org/%s/lecture/preview_view\?lecture_id=\d+)[^>]+>([^<]+)<' % course | |
| 11 | m = re.search(pattern, line) | |
| 12 | if m: | |
| 13 | (url, title) = m.group(1), m.group(2).strip() | |
| 14 | files.append((url, title)) | |
| 15 | ||
| 16 | cnt = 1 | |
| 17 | for url, title in files: | |
| 18 | doc = urllib.urlopen(url).read() | |
| 19 | pattern = r'<source type="video/mp4" src="([^\"]+)"' | |
| 20 | m = re.search(pattern, doc) | |
| 21 | - | print "downloading %.2d - %s.mp4" % (cnt, title) |
| 21 | + | print "downloading %.2d - %s.mp4" % (cnt, re.sub("/","-",title))
|
| 22 | - | urllib.urlretrieve (m.group(1), "%.2d - %s.mp4" % (cnt, title)) |
| 22 | + | urllib.urlretrieve (m.group(1), "%.2d - %s.mp4" % (cnt, re.sub("/","-",title)))
|
| 23 | cnt += 1 |