View difference between Paste ID: 9QCdKkbD and MFcMf9YF
SHOW: | | - or go back to the newest paste.
1
#!/usr/bin/env python
2
3
'''
4
2chDown.py
5-
2ch.so 2-ch.so 2ch.hk downloader script.
5+
2ch.so 2-ch.so 2ch.ec downloader script.
6
7
Based on fourdown.py
8
'''
9
10
import os.path as op
11
from os import getcwd as cwd, makedirs, system
12
import urllib2, urllib
13
import re
14
import time
15
import sys
16
from PySide.QtCore import *
17
from PySide.QtGui import *
18
from PySide.QtDeclarative import QDeclarativeView
19
20
21
22
USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:6.0) Gecko/20100101 Firefox/6.0'
23-
class FourDown(object):
23+
24
WGET_PATH = '/usr/bin/wget'
25
26
27
class FourDown(QObject):
28
    @Slot(str)
29
30
    def __init__(self, url, *args, **kwargs):
31
        self.url_regex = re.compile(kwargs.get('regex', REGEX_IMAGE))
32
        self.user_agent = re.compile(kwargs.get('user_agent', USER_AGENT))
33
        self.retry_delay = kwargs.get('retry_delay', 60)
34
        self.download_delay = kwargs.get('download_delay', 5)
35
        self.page_delay = kwargs.get('page_delay', 60)
36
        self.url = url
37
        self.save_to = kwargs.get('save_to', None)
38
        self.USE_WGET = kwargs.get('USE_WGET', False)
39
        self.wget_path = kwargs.get('wget_path', WGET_PATH)
40
        if self.save_to is None:
41
            self.save_to = cwd()
42
        else:
43
            self.save_to = op.abspath(self.save_to)
44
        self.page = ''
45
	start_loop()
46
47
    def get_page(self):
48
        request = urllib2.Request(self.url, None, {'User-agent': self.user_agent})
49
50
        response = urllib2.urlopen(request)
51
52
        self.page = response.read()
53
54
        return self.page
55
56
    def _remove_dupes(self, items):
57
        # from here: http://code.activestate.com/recipes/52560/#c3
58
        set = {}
59
        return [set.setdefault(e,e) for e in items if e not in set]
60
61
    def _query_images(self):
62
        return self._remove_dupes(self.url_regex.findall(self.page))
63
64
    def _make_path(self):
65
        try:
66
            makedirs(self.save_to)
67
        except OSError:
68
            pass
69
70
    def _get_url(self, image, save_file):
71
        if self.USE_WGET:
72
            system('%s %s -O %s' % (self.wget_path, 'http://2ch.hk' + '/' + image, save_file))
73
        else:
74
            urllib.urlretrieve(image, save_file)
75
76
    def get_images(self):
77
        self._make_path()
78
        images = self._query_images()
79
        total = len(images)
80
        counter = 0
81
        print '%d images in thread' % total
82
        for image in images:
83
            counter += 1
84-
                    self._get_url('http://2ch.hk' + '/' + image, save_file)
84+
85
            filename = ''.join(image.split('/')[-1:])
86-
                    print '%s Failed getting %s, we will get it next time' % (progress, 'http://2ch.hk' + '/' + image)
86+
87
            if not op.isfile(save_file):
88
                try:
89
                    print '%s Getting %s...' % (progress, filename)
90
                    self._get_url('http://2ch.ec' + '/' + image, save_file)
91
                except Exception as error:
92
                    print '%s Failed getting %s, we will get it next time' % (progress, 'http://2ch.ec' + '/' + image)
93
                time.sleep(self.download_delay)
94
95
    def start_loop(self):
96
        print 'Using %s to store images' % self.save_to
97
98
        while True:
99
            try:
100
                print 'Getting page...'
101
                self.get_page()
102
            except urllib2.HTTPError as error:
103
                if error.code == 404:
104
                    print '404: Stopping...'
105
                    break
106
                else:
107
                    print 'Error getting page will retry in %s seconds' % self.retry_delay
108
                    time.sleep(self.retry_delay)
109
                    continue
110
            except urllib2.URLError:
111
                print 'Error getting page, will retry in %s seconds' % self.retry_delay
112
                time.sleep(self.retry_delay)
113
                continue
114
115-
if __name__ == '__main__':
115+
116
            self.get_images()
117-
    try:
117+
118-
        url = sys.argv[1]
118+
119-
    except IndexError:
119+
120-
        print 'You must provide a url'
120+
121-
        sys.exit(1)
121+
122
# Create Qt application and the QDeclarative view
123-
    try:
123+
app = QApplication(sys.argv)
124-
        save_to = sys.argv[2]
124+
view = QDeclarativeView()
125-
    except IndexError:
125+
view.setResizeMode(QDeclarativeView.SizeRootObjectToView)
126-
        save_to = None
126+
view.showFullScreen()
127
128-
    try:
128+
download = FourDown()
129-
        if sys.argv[3] == 'wget':
129+
context = view.rootContext()
130-
            use_wget = True
130+
context.setContextProperty("download", download)
131
132-
            use_wget = False
132+
# Create an URL to the QML file
133-
    except IndexError:
133+
url = QUrl('main.qml')
134-
        use_wget = False
134+
# Set the QML file and show
135
view.setSource(url)
136-
    f = FourDown(url, save_to=save_to, USE_WGET=use_wget)
136+
view.show()
137-
    f.start_loop()
137+
138
# Enter Qt main loop
139
sys.exit(app.exec_())