SHOW:
|
|
- or go back to the newest paste.
1 | #!/usr/bin/env python | |
2 | ||
3 | ''' | |
4 | 2chDown.py | |
5 | - | 2ch.so 2-ch.so 2ch.hk downloader script. |
5 | + | 2ch.so 2-ch.so 2ch.ec downloader script. |
6 | ||
7 | Based on fourdown.py | |
8 | ''' | |
9 | ||
10 | import os.path as op | |
11 | from os import getcwd as cwd, makedirs, system | |
12 | import urllib2, urllib | |
13 | import re | |
14 | import time | |
15 | import sys | |
16 | from PySide.QtCore import * | |
17 | from PySide.QtGui import * | |
18 | from PySide.QtDeclarative import QDeclarativeView | |
19 | ||
20 | ||
21 | ||
22 | USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:6.0) Gecko/20100101 Firefox/6.0' | |
23 | - | class FourDown(object): |
23 | + | |
24 | WGET_PATH = '/usr/bin/wget' | |
25 | ||
26 | ||
27 | class FourDown(QObject): | |
28 | @Slot(str) | |
29 | ||
30 | def __init__(self, url, *args, **kwargs): | |
31 | self.url_regex = re.compile(kwargs.get('regex', REGEX_IMAGE)) | |
32 | self.user_agent = re.compile(kwargs.get('user_agent', USER_AGENT)) | |
33 | self.retry_delay = kwargs.get('retry_delay', 60) | |
34 | self.download_delay = kwargs.get('download_delay', 5) | |
35 | self.page_delay = kwargs.get('page_delay', 60) | |
36 | self.url = url | |
37 | self.save_to = kwargs.get('save_to', None) | |
38 | self.USE_WGET = kwargs.get('USE_WGET', False) | |
39 | self.wget_path = kwargs.get('wget_path', WGET_PATH) | |
40 | if self.save_to is None: | |
41 | self.save_to = cwd() | |
42 | else: | |
43 | self.save_to = op.abspath(self.save_to) | |
44 | self.page = '' | |
45 | start_loop() | |
46 | ||
47 | def get_page(self): | |
48 | request = urllib2.Request(self.url, None, {'User-agent': self.user_agent}) | |
49 | ||
50 | response = urllib2.urlopen(request) | |
51 | ||
52 | self.page = response.read() | |
53 | ||
54 | return self.page | |
55 | ||
56 | def _remove_dupes(self, items): | |
57 | # from here: http://code.activestate.com/recipes/52560/#c3 | |
58 | set = {} | |
59 | return [set.setdefault(e,e) for e in items if e not in set] | |
60 | ||
61 | def _query_images(self): | |
62 | return self._remove_dupes(self.url_regex.findall(self.page)) | |
63 | ||
64 | def _make_path(self): | |
65 | try: | |
66 | makedirs(self.save_to) | |
67 | except OSError: | |
68 | pass | |
69 | ||
70 | def _get_url(self, image, save_file): | |
71 | if self.USE_WGET: | |
72 | system('%s %s -O %s' % (self.wget_path, 'http://2ch.hk' + '/' + image, save_file)) | |
73 | else: | |
74 | urllib.urlretrieve(image, save_file) | |
75 | ||
76 | def get_images(self): | |
77 | self._make_path() | |
78 | images = self._query_images() | |
79 | total = len(images) | |
80 | counter = 0 | |
81 | print '%d images in thread' % total | |
82 | for image in images: | |
83 | counter += 1 | |
84 | - | self._get_url('http://2ch.hk' + '/' + image, save_file) |
84 | + | |
85 | filename = ''.join(image.split('/')[-1:]) | |
86 | - | print '%s Failed getting %s, we will get it next time' % (progress, 'http://2ch.hk' + '/' + image) |
86 | + | |
87 | if not op.isfile(save_file): | |
88 | try: | |
89 | print '%s Getting %s...' % (progress, filename) | |
90 | self._get_url('http://2ch.ec' + '/' + image, save_file) | |
91 | except Exception as error: | |
92 | print '%s Failed getting %s, we will get it next time' % (progress, 'http://2ch.ec' + '/' + image) | |
93 | time.sleep(self.download_delay) | |
94 | ||
95 | def start_loop(self): | |
96 | print 'Using %s to store images' % self.save_to | |
97 | ||
98 | while True: | |
99 | try: | |
100 | print 'Getting page...' | |
101 | self.get_page() | |
102 | except urllib2.HTTPError as error: | |
103 | if error.code == 404: | |
104 | print '404: Stopping...' | |
105 | break | |
106 | else: | |
107 | print 'Error getting page will retry in %s seconds' % self.retry_delay | |
108 | time.sleep(self.retry_delay) | |
109 | continue | |
110 | except urllib2.URLError: | |
111 | print 'Error getting page, will retry in %s seconds' % self.retry_delay | |
112 | time.sleep(self.retry_delay) | |
113 | continue | |
114 | ||
115 | - | if __name__ == '__main__': |
115 | + | |
116 | self.get_images() | |
117 | - | try: |
117 | + | |
118 | - | url = sys.argv[1] |
118 | + | |
119 | - | except IndexError: |
119 | + | |
120 | - | print 'You must provide a url' |
120 | + | |
121 | - | sys.exit(1) |
121 | + | |
122 | # Create Qt application and the QDeclarative view | |
123 | - | try: |
123 | + | app = QApplication(sys.argv) |
124 | - | save_to = sys.argv[2] |
124 | + | view = QDeclarativeView() |
125 | - | except IndexError: |
125 | + | view.setResizeMode(QDeclarativeView.SizeRootObjectToView) |
126 | - | save_to = None |
126 | + | view.showFullScreen() |
127 | ||
128 | - | try: |
128 | + | download = FourDown() |
129 | - | if sys.argv[3] == 'wget': |
129 | + | context = view.rootContext() |
130 | - | use_wget = True |
130 | + | context.setContextProperty("download", download) |
131 | ||
132 | - | use_wget = False |
132 | + | # Create an URL to the QML file |
133 | - | except IndexError: |
133 | + | url = QUrl('main.qml') |
134 | - | use_wget = False |
134 | + | # Set the QML file and show |
135 | view.setSource(url) | |
136 | - | f = FourDown(url, save_to=save_to, USE_WGET=use_wget) |
136 | + | view.show() |
137 | - | f.start_loop() |
137 | + | |
138 | # Enter Qt main loop | |
139 | sys.exit(app.exec_()) |