Advertisement
ffsinit

Untitled

Oct 20th, 2012
53
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/usr/bin/env python
  2.  
  3. '''
  4. 2chDown.py
  5. 2ch.so 2-ch.so 2ch.ec downloader script.
  6.  
  7. Based on fourdown.py
  8. '''
  9.  
  10. import os.path as op
  11. from os import getcwd as cwd, makedirs, system
  12. import urllib2, urllib
  13. import re
  14. import time
  15. import sys
  16. from PySide.QtCore import *
  17. from PySide.QtGui import *
  18. from PySide.QtDeclarative import QDeclarativeView
  19.  
  20.  
  21.  
  22. USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:6.0) Gecko/20100101 Firefox/6.0'
  23. REGEX_IMAGE = '\w+/src/\d+\.(?:png|jpeg|jpg|gif)'
  24. WGET_PATH = '/usr/bin/wget'
  25.  
  26.  
  27. class FourDown(QObject):
  28.     @Slot(str)
  29.  
  30.     def __init__(self, url, *args, **kwargs):
  31.         self.url_regex = re.compile(kwargs.get('regex', REGEX_IMAGE))
  32.         self.user_agent = re.compile(kwargs.get('user_agent', USER_AGENT))
  33.         self.retry_delay = kwargs.get('retry_delay', 60)
  34.         self.download_delay = kwargs.get('download_delay', 5)
  35.         self.page_delay = kwargs.get('page_delay', 60)
  36.         self.url = url
  37.         self.save_to = kwargs.get('save_to', None)
  38.         self.USE_WGET = kwargs.get('USE_WGET', False)
  39.         self.wget_path = kwargs.get('wget_path', WGET_PATH)
  40.         if self.save_to is None:
  41.             self.save_to = cwd()
  42.         else:
  43.             self.save_to = op.abspath(self.save_to)
  44.         self.page = ''
  45.     start_loop()
  46.  
  47.     def get_page(self):
  48.         request = urllib2.Request(self.url, None, {'User-agent': self.user_agent})
  49.  
  50.         response = urllib2.urlopen(request)
  51.  
  52.         self.page = response.read()
  53.  
  54.         return self.page
  55.  
  56.     def _remove_dupes(self, items):
  57.         # from here: http://code.activestate.com/recipes/52560/#c3
  58.         set = {}
  59.         return [set.setdefault(e,e) for e in items if e not in set]
  60.  
  61.     def _query_images(self):
  62.         return self._remove_dupes(self.url_regex.findall(self.page))
  63.  
  64.     def _make_path(self):
  65.         try:
  66.             makedirs(self.save_to)
  67.         except OSError:
  68.             pass
  69.  
  70.     def _get_url(self, image, save_file):
  71.         if self.USE_WGET:
  72.             system('%s %s -O %s' % (self.wget_path, 'http://2ch.hk' + '/' + image, save_file))
  73.         else:
  74.             urllib.urlretrieve(image, save_file)
  75.  
  76.     def get_images(self):
  77.         self._make_path()
  78.         images = self._query_images()
  79.         total = len(images)
  80.         counter = 0
  81.         print '%d images in thread' % total
  82.         for image in images:
  83.             counter += 1
  84.             progress = '[%d/%d]' % (counter, total)
  85.             filename = ''.join(image.split('/')[-1:])
  86.             save_file = op.join(self.save_to, filename)
  87.             if not op.isfile(save_file):
  88.                 try:
  89.                     print '%s Getting %s...' % (progress, filename)
  90.                     self._get_url('http://2ch.ec' + '/' + image, save_file)
  91.                 except Exception as error:
  92.                     print '%s Failed getting %s, we will get it next time' % (progress, 'http://2ch.ec' + '/' + image)
  93.                 time.sleep(self.download_delay)
  94.  
  95.     def start_loop(self):
  96.         print 'Using %s to store images' % self.save_to
  97.  
  98.         while True:
  99.             try:
  100.                 print 'Getting page...'
  101.                 self.get_page()
  102.             except urllib2.HTTPError as error:
  103.                 if error.code == 404:
  104.                     print '404: Stopping...'
  105.                     break
  106.                 else:
  107.                     print 'Error getting page will retry in %s seconds' % self.retry_delay
  108.                     time.sleep(self.retry_delay)
  109.                     continue
  110.             except urllib2.URLError:
  111.                 print 'Error getting page, will retry in %s seconds' % self.retry_delay
  112.                 time.sleep(self.retry_delay)
  113.                 continue
  114.  
  115.             print 'Downloading images...'
  116.             self.get_images()
  117.             print 'Done for now, will check again in %s seconds' % self.page_delay
  118.             time.sleep(self.page_delay)
  119.  
  120.  
  121.  
  122. # Create Qt application and the QDeclarative view
  123. app = QApplication(sys.argv)
  124. view = QDeclarativeView()
  125. view.setResizeMode(QDeclarativeView.SizeRootObjectToView)
  126. view.showFullScreen()
  127.  
  128. download = FourDown()
  129. context = view.rootContext()
  130. context.setContextProperty("download", download)
  131.  
  132. # Create an URL to the QML file
  133. url = QUrl('main.qml')
  134. # Set the QML file and show
  135. view.setSource(url)
  136. view.show()
  137.  
  138. # Enter Qt main loop
  139. sys.exit(app.exec_())
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement