get.py

#functions

import help
import syn

from urllib.request import Request, urlopen
from urllib.error import URLError
import os
import re
import sys

#----------------------------------------
def top(folder):
    count = estimated_archive_size()
    end = 1

    print("Do you still want to continue downloading? Answers: 'yes', 'no'")
    answer = input("").lower()
    if answer != "yes":
        return

    print("Download all, last x pages, first x pages or a certain page range?")
    print("Answers: 'all', 'last', 'first', 'range'")
    answer = input("").lower()
    if answer == "all":
        pass # last page to first page
    elif answer == "last":
        print("How many?")
        end = count - int(input("")) + 1
    elif answer == "first":
        print("How many?")
        count = int(input(""))
    elif answer == "range":
        print("From?")
        count = int(input(""))
        print("To?")
        end = int(input(""))
        if count < end:
            count, end = end, count
    else:
        return

    range = count - end + 1
    while (count >= end) and (count > 0):
        page(count, folder)
        count = count - 1

    syn.write_db_to(folder)
    print("Congratulations. All " + str(range) + " Pages were downloaded.")
    return

def page(page, folder):
    offset = int(page) * 25 - 25
    u = req('http://ffffound.com/?offset=' + str(offset) + '&')

    s = u.read()
    u.close()

    t = b'<blockquote id="asseti\w{40}"'
    pattern = re.compile(t)
    match = re.findall(pattern, s)
    count = len(match)
    print(str(count) + " Hashes found.")

    existances = 0
    successes = 0
    for i in range(count):
        h = match[i][22:-1].decode()
        status = hash(h, folder)
        if status == 1:
            successes = successes + 1
        else:
            existances = existances + 1
    print(str(existances) + " files already existed, " + str(successes) +
          " were downloaded.")
    print("Page " + str(page) + " is done.")
    return

def hash(hash, folder):
    if syn.has(hash):
        return 0

    u = req('http://ffffound.com/image/' + hash)

    s = u.read()
    u.close()

    t = b'src="http://img.ffffound.com/static-data/assets/+\d/\w{40}_m.[a-z]{3}"'
    pattern = re.compile(t)
    match = re.findall(pattern, s)
    URI = match[0][5:-1].decode()

    image(URI, folder)
    syn.add(hash)
    return 1

def image(URI, folder):

    u = req(URI)

    n = URI.split('/')[-1]

    if not os.path.exists(folder):
        print("Folder " + folder + " doesn't exist and will be created now.")
        os.makedirs(folder)

    f = open(folder+n, 'wb')
    content = u.read()
    u.close()
    f.write(content)
    f.close()
    return

#----------------------------------------
def estimated_archive_size():
    last_page = last_page_number()
    images = last_page*25
    print("Up to now about " + str(last_page) + " pages and "
          + str(images) + " images exist.")
    print("If each file has a size of 1 MiB, the archive would contain: "
          + convert(images*1024**2))
    print("If each file has a size of 500 KiB, the archive would contain: "
          + convert(images*1024*500))
    print("If each file has a size of 250 KiB, the archive would contain: "
          + convert(images*1024*250))
    print("If each file has a size of 125 KiB, the archive would contain: "
          + convert(images*1024*125))
    return last_page

def last_page_number():
    u = req('http://ffffound.com/?offset=' + str(1000000000) + '&')

    s = u.read()
    u.close()

    t = b'./\?offset=\d+&"'
    pattern = re.compile(t)
    match = re.findall(pattern, s)
    offset = int(match[-1][10:-2].decode()) # only the last one is important
    page = int((offset + 25) / 25)
    return page

def version():
    print(help.version)
    return

def author():
    print(help.author)
    return
#----------------------------------------
def req(URI):
    try:
        return urlopen(Request(URI))
    except URLError as e:
        if hasattr(e, 'reason'):
            print(e.reason)
        elif hasattr(e, 'code'):
            print(e.code)
        return

def convert(number):
    if number > 1024**5:
        return (str(number / 1024**5) + "PiB")
    elif number > 1024**4:
        return (str(number / 1024**4) + "TiB")
    elif number > 1024**3:
        return (str(number / 1024**3) + "GiB")
    elif number > 1024**2:
        return (str(number / 1024**2) + "MiB")
    elif number > 1024:
        return (str(number / 1024) + "KiB")