Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #newest update to xah's crappy code for extracting images off KuaiKanManhua.com
- #it's less crappy than the earlier version lol: it's quicker & automatically downloads all of the latest chapters
- import requests, os, bs4, re
- def name(loop):
- loop = int(loop) + 1
- if loop < 10:
- loop = '00' + str(loop)
- elif loop < 100:
- loop = '0' + str(loop)
- else:
- loop = str(loop)
- return str(loop)
- def modifier(file):
- findWor = re.compile(r'\D+')
- words = findWor.search(file)
- findNum = re.compile(r'\d+')
- number1 = findNum.search(file)
- number2 = number1.group()
- file = words.group() + name(number2)
- return file
- prefix = input('Please input a prefix: ')
- folder = input('Please input a folder name: ')
- url = input('Please input the url of the KuaiKanManhua chapter: ')
- while url:
- os.makedirs(folder, exist_ok=True)
- res = requests.get(url)
- res.raise_for_status()
- soup = bs4.BeautifulSoup(res.text, 'html.parser')
- imgsearcher = re.compile(r'data-kksrc="(.*?)"')
- comicUrl = imgsearcher.findall(str(soup))
- pageNum = '001'
- for i in comicUrl:
- if i in comicUrl[:int(pageNum) - 1]:
- break
- print('Downloading page %s...' % i)
- imageFile = open(os.path.join(folder, os.path.basename(prefix + pageNum + '.png')), 'wb')
- imageFile.write(requests.get(i).content)
- imageFile.close()
- pageNum = name(pageNum)
- print('\n\nDone with: ' + folder)
- asearcher = re.compile(r'href="(.*?)" title="下一话"')
- newUrl = asearcher.search(str(soup))
- if newUrl:
- print('Now beginning next chapter...\n\n')
- url = 'http://www.kuaikanmanhua.com' + newUrl.group(1)
- folder = modifier(folder)
- prefix = modifier(prefix) + '_'
- else:
- break
- print('\n\nNo more new chapters. \nDone.')
Add Comment
Please, Sign In to add comment