Advertisement
Guest User

get.py

a guest
Jun 19th, 2011
336
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.67 KB | None | 0 0
  1. #functions
  2.  
  3. import help
  4. import syn
  5.  
  6. from urllib.request import Request, urlopen
  7. from urllib.error import URLError
  8. import os
  9. import re
  10. import sys
  11.  
  12. #----------------------------------------
  13. def top(folder):
  14.     count = estimated_archive_size()
  15.     end = 1
  16.    
  17.     print("Do you still want to continue downloading? Answers: 'yes', 'no'")
  18.     answer = input("").lower()
  19.     if answer != "yes":
  20.         return
  21.    
  22.     print("Download all, last x pages, first x pages or a certain page range?")
  23.     print("Answers: 'all', 'last', 'first', 'range'")
  24.     answer = input("").lower()
  25.     if answer == "all":
  26.         pass # last page to first page
  27.     elif answer == "last":
  28.         print("How many?")
  29.         end = count - int(input("")) + 1
  30.     elif answer == "first":
  31.         print("How many?")
  32.         count = int(input(""))
  33.     elif answer == "range":
  34.         print("From?")
  35.         count = int(input(""))
  36.         print("To?")
  37.         end = int(input(""))
  38.         if count < end:
  39.             count, end = end, count
  40.     else:
  41.         return
  42.    
  43.     range = count - end + 1
  44.     while (count >= end) and (count > 0):
  45.         page(count, folder)
  46.         count = count - 1
  47.    
  48.     syn.write_db_to(folder)
  49.     print("Congratulations. All " + str(range) + " Pages were downloaded.")
  50.     return
  51.  
  52. def page(page, folder):
  53.     offset = int(page) * 25 - 25
  54.     u = req('http://ffffound.com/?offset=' + str(offset) + '&')
  55.    
  56.     s = u.read()
  57.     u.close()
  58.        
  59.     t = b'<blockquote id="asseti\w{40}"'
  60.     pattern = re.compile(t)
  61.     match = re.findall(pattern, s)
  62.     count = len(match)
  63.     print(str(count) + " Hashes found.")
  64.  
  65.     existances = 0
  66.     successes = 0
  67.     for i in range(count):
  68.         h = match[i][22:-1].decode()
  69.         status = hash(h, folder)
  70.         if status == 1:
  71.             successes = successes + 1
  72.         else:
  73.             existances = existances + 1
  74.     print(str(existances) + " files already existed, " + str(successes) +
  75.           " were downloaded.")
  76.     print("Page " + str(page) + " is done.")
  77.     return
  78.  
  79. def hash(hash, folder):
  80.     if syn.has(hash):
  81.         return 0
  82.    
  83.     u = req('http://ffffound.com/image/' + hash)
  84.    
  85.     s = u.read()
  86.     u.close()
  87.        
  88.     t = b'src="http://img.ffffound.com/static-data/assets/+\d/\w{40}_m.[a-z]{3}"'
  89.     pattern = re.compile(t)
  90.     match = re.findall(pattern, s)
  91.     URI = match[0][5:-1].decode()
  92.    
  93.     image(URI, folder)
  94.     syn.add(hash)
  95.     return 1
  96.  
  97. def image(URI, folder):
  98.    
  99.     u = req(URI)
  100.        
  101.     n = URI.split('/')[-1]
  102.    
  103.     if not os.path.exists(folder):
  104.         print("Folder " + folder + " doesn't exist and will be created now.")
  105.         os.makedirs(folder)
  106.    
  107.     f = open(folder+n, 'wb')
  108.     content = u.read()
  109.     u.close()
  110.     f.write(content)
  111.     f.close()
  112.     return
  113.  
  114. #----------------------------------------
  115. def estimated_archive_size():
  116.     last_page = last_page_number()
  117.     images = last_page*25
  118.     print("Up to now about " + str(last_page) + " pages and "
  119.           + str(images) + " images exist.")
  120.     print("If each file has a size of 1 MiB, the archive would contain: "
  121.           + convert(images*1024**2))
  122.     print("If each file has a size of 500 KiB, the archive would contain: "
  123.           + convert(images*1024*500))
  124.     print("If each file has a size of 250 KiB, the archive would contain: "
  125.           + convert(images*1024*250))
  126.     print("If each file has a size of 125 KiB, the archive would contain: "
  127.           + convert(images*1024*125))
  128.     return last_page
  129.    
  130. def last_page_number():
  131.     u = req('http://ffffound.com/?offset=' + str(1000000000) + '&')
  132.    
  133.     s = u.read()
  134.     u.close()
  135.  
  136.     t = b'./\?offset=\d+&"'
  137.     pattern = re.compile(t)
  138.     match = re.findall(pattern, s)
  139.     offset = int(match[-1][10:-2].decode()) # only the last one is important
  140.     page = int((offset + 25) / 25)
  141.     return page
  142.  
  143. def version():
  144.     print(help.version)
  145.     return
  146.    
  147. def author():
  148.     print(help.author)
  149.     return
  150. #----------------------------------------
  151. def req(URI):
  152.     try:
  153.         return urlopen(Request(URI))
  154.     except URLError as e:
  155.         if hasattr(e, 'reason'):
  156.             print(e.reason)
  157.         elif hasattr(e, 'code'):
  158.             print(e.code)
  159.         return
  160.  
  161. def convert(number):
  162.     if number > 1024**5:
  163.         return (str(number / 1024**5) + "PiB")
  164.     elif number > 1024**4:
  165.         return (str(number / 1024**4) + "TiB")
  166.     elif number > 1024**3:
  167.         return (str(number / 1024**3) + "GiB")
  168.     elif number > 1024**2:
  169.         return (str(number / 1024**2) + "MiB")
  170.     elif number > 1024:
  171.         return (str(number / 1024) + "KiB")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement