xah

KKmanhua v0.0.1

xah
Feb 26th, 2017 (edited)
143
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.90 KB | None | 0 0
  1. #newest update to xah's crappy code for extracting images off KuaiKanManhua.com
  2. #it's less crappy than the earlier version lol: it's quicker & automatically downloads all of the latest chapters
  3.  
  4. import requests, os, bs4, re
  5.  
  6. def name(loop):
  7.     loop = int(loop) + 1
  8.     if  loop < 10:
  9.         loop = '00' + str(loop)
  10.  
  11.     elif loop < 100:
  12.         loop = '0' + str(loop)
  13.  
  14.     else:
  15.         loop = str(loop)
  16.  
  17.     return str(loop)
  18.  
  19. def modifier(file):
  20.     findWor = re.compile(r'\D+')
  21.     words = findWor.search(file)
  22.    
  23.     findNum = re.compile(r'\d+')
  24.     number1 = findNum.search(file)
  25.     number2 = number1.group()
  26.    
  27.     file = words.group() + name(number2)
  28.     return file
  29.  
  30. prefix = input('Please input a prefix: ')
  31. folder = input('Please input a folder name: ')
  32. url = input('Please input the url of the KuaiKanManhua chapter: ')
  33.  
  34. while url:
  35.     os.makedirs(folder, exist_ok=True)
  36.    
  37.     res = requests.get(url)
  38.     res.raise_for_status()
  39.     soup = bs4.BeautifulSoup(res.text, 'html.parser')
  40.  
  41.     imgsearcher = re.compile(r'data-kksrc="(.*?)"')
  42.     comicUrl = imgsearcher.findall(str(soup))
  43.  
  44.     pageNum = '001'
  45.  
  46.     for i in comicUrl:
  47.         if i in comicUrl[:int(pageNum) - 1]:
  48.             break
  49.  
  50.         print('Downloading page %s...' % i)
  51.         imageFile = open(os.path.join(folder, os.path.basename(prefix + pageNum + '.png')), 'wb')
  52.         imageFile.write(requests.get(i).content)
  53.         imageFile.close()
  54.  
  55.         pageNum = name(pageNum)
  56.  
  57.     print('\n\nDone with: ' + folder)
  58.  
  59.     asearcher = re.compile(r'href="(.*?)" title="下一话"')
  60.     newUrl = asearcher.search(str(soup))
  61.  
  62.     if newUrl:
  63.         print('Now beginning next chapter...\n\n')
  64.         url = 'http://www.kuaikanmanhua.com' + newUrl.group(1)
  65.         folder = modifier(folder)
  66.         prefix = modifier(prefix) + '_'
  67.  
  68.     else:
  69.         break
  70.  
  71. print('\n\nNo more new chapters. \nDone.')
Add Comment
Please, Sign In to add comment