SHOW:
|
|
- or go back to the newest paste.
1 | #!/usr/bin/python | |
2 | import urllib | |
3 | import re | |
4 | ||
5 | - | course = "algo" |
5 | + | course = "nlp" |
6 | maindoc = urllib.urlopen("https://class.coursera.org/%s/lecture/preview" % course).read() | |
7 | ||
8 | files = [] | |
9 | for line in maindoc.split("<li"): | |
10 | pattern = r'a href="(https://class\.coursera\.org/%s/lecture/preview_view\?lecture_id=\d+)[^>]+>([^<]+)<' % course | |
11 | m = re.search(pattern, line) | |
12 | if m: | |
13 | (url, title) = m.group(1), m.group(2).strip() | |
14 | files.append((url, title)) | |
15 | ||
16 | cnt = 1 | |
17 | for url, title in files: | |
18 | doc = urllib.urlopen(url).read() | |
19 | pattern = r'<source type="video/mp4" src="([^\"]+)"' | |
20 | m = re.search(pattern, doc) | |
21 | - | print "downloading %.2d - %s.mp4" % (cnt, title) |
21 | + | print "downloading %.2d - %s.mp4" % (cnt, re.sub("/","-",title)) |
22 | - | urllib.urlretrieve (m.group(1), "%.2d - %s.mp4" % (cnt, title)) |
22 | + | urllib.urlretrieve (m.group(1), "%.2d - %s.mp4" % (cnt, re.sub("/","-",title))) |
23 | cnt += 1 |