Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #This is xah's crappy code for extracting images off KuaiKanManhua.com
- #It's the crappiest thing ever, I know, but it (sort of) works!!
- import requests, os, bs4, re
- print("""This code extracts the images off KuaiKanManhua.com.
- Note, this program saves files as:
- prefix + number + .jpg
- The prefix can be defined by you.
- The folder name can be defined by you.
- (The folder will be where this program is saved at.) \n\n""")
- prefix = input('Please input a prefix: ')
- folder = input('Please input a folder name: ')
- url = input('Please input the url of the KuaiKanManhua chapter: ')
- os.makedirs(folder, exist_ok=True)
- res = requests.get(url)
- res.raise_for_status()
- #parsing the file / extracting file
- soup = bs4.BeautifulSoup(res.text, 'html.parser')
- comicElem = soup.select('img[data-kksrc]')
- comicElem = str(comicElem)
- searcher = re.compile(r'data-kksrc="(.*?)"')
- comicUrl = searcher.findall(comicElem)
- loop = '001'
- for i in comicUrl:
- #Because of the weird loop kkmanhua has...
- if i in comicUrl[:int(loop) - 1]:
- break
- #Writing the image
- print('Downloading page %s...' % i)
- imageFile = open(os.path.join(folder, os.path.basename(prefix + loop + '.png')), 'wb')
- imageFile.write(requests.get(i).content)
- imageFile.close()
- #For better naming reasons
- loop = int(loop) + 1
- if loop < 10:
- loop = '00' + str(loop)
- elif loop < 100:
- loop = '0' + str(loop)
- else:
- loop = str(loop)
- #finding next chapter
- nextCh = soup.select('a[title="???"]')
- nextCh = str(comicElem)
- print('Done')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement