Guest User

Untitled

a guest
Aug 2nd, 2020
93
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.83 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2. """
  3. @author: Anon
  4.  
  5. Changelog:
  6. 2020/08/02: Added ?_=RANDOM to get not recoded file version / Added timeout and retries request function
  7.  
  8. """
  9. import requests, urllib.request, time, socket, os
  10.  
  11.    
  12. def GetFile(Thread, myPath):
  13.     socket.setdefaulttimeout(15)
  14.     t0 = time.time()
  15.     Filepath = ("https://media.8kun.top/file_store/" + str(Thread["tim"]) + str(Thread['ext']+ "?_=RANDOM"))
  16.     LocalFileName = str(Thread['filename']) + str(Thread['ext'])
  17.     fullfilename = os.path.join(myPath , LocalFileName)
  18.     for attempt in range(3):
  19.         try:
  20.              urllib.request.urlretrieve(Filepath, fullfilename)
  21.         except Exception as x:
  22.             print('It failed :(', x.__class__.__name__)
  23.             print('Failed url: ' + Filepath)
  24.         else:
  25.             print('Downloaded: ' + LocalFileName)
  26.             break
  27.         finally:
  28.             t1 = time.time()
  29.             print('Took', t1 - t0, 'seconds')    
  30.    
  31. def main(myPath):
  32.     t0 = time.time()
  33.     r = requests.get('https://8kun.top/vichan/catalog.json');
  34.     Pages = r.json()
  35.     i = 0
  36.    
  37.     with open(myPath + "Data.txt", 'a', encoding='utf-8') as outfile:
  38.         print("-----------------------Vichan-----------------------" + str(i), file=outfile)
  39.         for Page in Pages:
  40.             Threads = Page['threads']
  41.             for Thread in Threads:
  42.                 print ("PostID:" + str(Thread['no']) + " | Time: " + str(Thread['time']) + ' | Replies: ' + str(Thread['replies']) + ' | com: ' + str(Thread['com']) + '\n', file=outfile)
  43.                 if 'tim' in Thread:
  44.                     GetFile(Thread, myPath)
  45.                     i += 1
  46.     t1 = time.time()
  47.     print("TotalFiles collected: " + str(i))
  48.     print("Total time: ", t1 - t0, 'seconds')
  49.        
  50.    
  51. myPath = r"C:/Tmp/ChanArchiveOutput/"
  52. main(myPath)
Add Comment
Please, Sign In to add comment