xah

acQQmanhua v0.0.0

xah
Mar 3rd, 2017 (edited)
134
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.42 KB | None | 0 0
  1. #This is xah's crappy code for extracting images off ac.qq.com
  2. #You'll need to download the requests. Written for png image extraction.
  3.  
  4. import requests, re, os, base64
  5. from PIL import Image
  6.  
  7. def base64ToString(b):
  8.     return base64.b64decode(b).decode('utf-8')
  9.  
  10. def name(loop):
  11.     loop = int(loop) + 1
  12.     if  loop < 10:
  13.         loop = '00' + str(loop)
  14.  
  15.     elif loop < 100:
  16.         loop = '0' + str(loop)
  17.  
  18.     else:
  19.         loop = str(loop)
  20.  
  21.     return str(loop)
  22.  
  23. prefix = input('Please input a prefix: ')
  24. folder = input('Please input a folder name: ')
  25. url = input('Please input the url of the ac.qq.com chapter: ')
  26.  
  27. res = requests.get(url)
  28. soup = res.text
  29.  
  30. codesearcher = re.compile(r"var DATA        = '(.*?)',")
  31. c = codesearcher.search(str(soup))
  32.  
  33. code = base64ToString(c.group(1)[1:])
  34.  
  35. picsearcher = re.compile(r'"url":"(.*?)"}')
  36. p = picsearcher.findall(code)
  37.  
  38. pageNum = '001'
  39. os.makedirs(folder, exist_ok=True)
  40.  
  41. for i in p:
  42.     regex = re.compile(r'\\')
  43.     other = regex.sub('', i)
  44.    
  45.     if other.startswith('http://comic.qq.com/'):
  46.         break
  47.  
  48.     print('Downloading page %s...' % other)
  49.     fileName = os.path.join(folder, prefix + pageNum + '.png')
  50.     imageFile = open(fileName, 'wb')
  51.     imageFile.write(requests.get(other, stream=True).content)
  52.     imageFile.close()
  53.  
  54.     img = Image.open(fileName)
  55.     img.save(fileName, 'png')
  56.  
  57.     pageNum = name(pageNum)
  58.    
  59. print('Done')
Add Comment
Please, Sign In to add comment