Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import re
- import urllib
- import os
- from Tkinter import Tk
- BASE_DIRECTORY = os.getcwd() + "\\"
- TEMPORARY_IMAGE_PATH = BASE_DIRECTORY + "temp.%s"
- TEMPORARY_IMAGE = TEMPORARY_IMAGE_PATH
- THREAD_URL_TEMPLATE = "http://boards.4chan.org/%s/res/%s"
- THREAD_URL_PATTERN = "^(https?://boards.4chan.org/([a-z]+)/res/([0-9]+))$"
- THREAD_DIRECTORY_TEMPLATE = "%s-%s\\"
- THREAD_DIRECTORY_404_TEMPLATE = "(404) " + THREAD_DIRECTORY_TEMPLATE
- THREAD_DIRECTORY_PATTERN = "^([a-z]+)-([0-9]+)$"
- IMAGE_URL_PATTERN = '<a href="//' \
- '(images.4chan.org/[a-z]+/src/([0-9]+)\.([a-z]+))' \
- '" target="_blank">'
- IMAGE_URL_PATTERN = '<a class="fileThumb" ' \
- 'href="//(i.4cdn.org/[a-z]+/src/([0-9]+)\.([a-z]+))" ' \
- 'target="_blank">'
- THREAD_LINK_PATH = "_thread.url"
- THREAD_LINK_TEMPLATE = "[InternetShortcut]\nURL=%s\nLATEST=%d"
- LATEST_POST_PATTERN = "LATEST=(-?[0-9]+)"
- ERROR_THREAD_404 = -1
- class Image:
- def __init__(self, thread, match):
- self.__thread = thread
- [self.__src, self.__number, self.__ext] = match
- self.__number = int(self.__number)
- conn = urllib.urlopen(self.get_source())
- self.__filesize = int(conn.headers["Content-Length"])
- conn.close()
- def get_thread(self):
- return self.__thread
- def get_filename(self):
- return '%s.%s' % (self.get_number(), self.get_extension())
- def get_source(self):
- return "http://" + self.__src
- def get_destination(self):
- return self.get_thread().get_directory() + self.get_filename()
- def get_number(self):
- return self.__number
- def get_extension(self):
- return self.__ext
- def get_filesize(self):
- return self.__filesize
- def download(self):
- global TEMPORARY_IMAGE
- TEMPORARY_IMAGE = TEMPORARY_IMAGE_PATH % (self.get_extension())
- urllib.urlretrieve(self.get_source(), TEMPORARY_IMAGE)
- os.rename(TEMPORARY_IMAGE, self.get_destination())
- self.get_thread().update_latest(self.get_number())
- def __str__(self):
- return '%s -> %s' % (self.get_source(), self.get_destination())
- class Thread:
- def __init__(self, URL):
- [self.__URL, self.__board, self.__number] = \
- re.findall(THREAD_URL_PATTERN, URL)[0]
- conn = urllib.urlopen(URL)
- self.__src = conn.read()
- conn.close()
- if os.path.isdir(self.get_directory()):
- self.load_latest()
- else:
- self.__latest = -1
- self.create_directory()
- def get_URL(self):
- return self.__URL
- def get_board(self):
- return self.__board
- def get_number(self):
- return self.__number
- def get_source(self):
- return self.__src
- def get_directory(self):
- return BASE_DIRECTORY + THREAD_DIRECTORY_TEMPLATE \
- % (self.get_board(), self.get_number())
- def get_404_directory(self):
- return BASE_DIRECTORY + THREAD_DIRECTORY_404_TEMPLATE \
- % (self.get_board(), self.get_number())
- def get_link_path(self):
- return self.get_directory() + THREAD_LINK_PATH
- def get_latest(self):
- return self.__latest
- def set_latest(self, latest):
- self.__latest = latest
- def load_latest(self):
- f = open(self.get_link_path(), 'r')
- contents = f.read()
- f.close()
- self.__latest = int(re.findall(LATEST_POST_PATTERN, contents)[0])
- def update_latest(self, new_latest):
- self.set_latest(new_latest)
- self.create_shortcut()
- def create_directory(self):
- os.makedirs(self.get_directory())
- self.create_shortcut()
- def create_shortcut(self):
- contents = THREAD_LINK_TEMPLATE % (self.get_URL(), self.get_latest())
- f = open(self.get_link_path(), 'w')
- f.write(contents)
- f.close()
- def get_images(self):
- matches = re.findall(IMAGE_URL_PATTERN, self.get_source())
- if len(matches) == 0: return ERROR_THREAD_404
- images = []
- for match in matches:
- image = Image(self, match)
- if image.get_number() > self.get_latest():
- images.append(image)
- return images
- def kill(self):
- os.rename(self.get_directory(), self.get_404_directory())
- def download_new_images():
- try:
- def display_status():
- width = num_digits(maximum)
- line = "\r%s: %" + str(width) + "d/%" + str(width) + "d (%3d%%)"
- print (line % \
- (message, index, maximum, 100 * index / maximum)),
- if index == maximum: print
- print "Loading threads"
- # Load threads
- threads = []
- files = os.listdir(BASE_DIRECTORY)
- for filename in files:
- if os.path.isdir(filename):
- match = re.findall(THREAD_DIRECTORY_PATTERN, filename)
- if match:
- URL = THREAD_URL_TEMPLATE % match[0]
- threads.append(Thread(URL))
- num_threads = len(threads)
- if num_threads == 0:
- print 'No threads were found!'
- return
- print '%d threads found!' % num_threads
- # Scan threads for new images
- message = "Scanning threads"
- maximum = num_threads
- images = []
- for index, thread in enumerate(threads):
- display_status()
- new_images = thread.get_images()
- if new_images == ERROR_THREAD_404:
- thread.kill()
- else:
- images.extend(new_images)
- index = maximum
- display_status()
- num_images = len(images)
- if num_images == 0:
- print 'No new images were found!'
- return
- print '%d new images found!' % num_images
- # Calculate total filesize
- bytes = 0
- for image in images:
- bytes += image.get_filesize()
- print "Total file size: %s" % format_file_size(bytes)
- # Download images
- message = "Downloading images"
- maximum = num_images
- for index, image in enumerate(images):
- display_status()
- image.download()
- index = maximum
- display_status()
- except KeyboardInterrupt:
- try:
- os.remove(TEMPORARY_IMAGE)
- except:
- pass
- print "\nDownload cancelled"
- def add_thread():
- tk = Tk()
- URL = tk.clipboard_get()
- tk.destroy()
- if re.findall(THREAD_URL_PATTERN, URL):
- Thread(URL)
- print "Successfully added: %s" % URL
- else:
- print "Invalid URL: %s" % URL
- def num_digits(number):
- return len(str(number))
- def format_file_size(bytes):
- for x in ['B','KB','MB','GB']:
- if bytes < 1024.0:
- return "%.1f%s" % (bytes, x)
- bytes /= 1024.0
- return "%.1f%s" % (bytes, 'TB')
- actions = {
- "a" : ["Add thread from Clipboard", add_thread],
- "d" : ["Download new images", download_new_images],
- "q" : ["Quit", exit]
- }
- PROMPT = "\aPlease choose a command\n" + \
- '\n'.join(["%s : %s" % (key, actions[key][0]) \
- for key in sorted(actions.keys())]) + '\n'
- def main():
- while True:
- user_input = raw_input(PROMPT).lower()[0]
- print
- if user_input in actions:
- actions[user_input][1]()
- print
- else:
- print "Invalid command\n"
- if __name__ == "__main__":
- main()
Add Comment
Please, Sign In to add comment