Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # usage: python3 readmanga.me_downloader.py <link>
- import json
- import requests
- import os
- import bs4
- import re
- import sys
- from multiprocessing import Pool
- import time
- def download_manga(manga):
- print('\ndownloading...')
- path = './%s/' % manga['title']
- if not os.path.exists(path):
- os.mkdir(path)
- info_file = open(path + 'download_info.txt', 'a')
- pool = Pool(40)
- count = 0
- downloaded_pages = 0
- for volume, chapters in manga['volumes'].items():
- v_path = '%s/volume_%s' % (path, volume)
- if not os.path.exists(v_path):
- os.mkdir(v_path)
- for chapter_num, pages in chapters.items():
- ch_path = '%s/chapter_%s' % (v_path, chapter_num)
- if not os.path.exists(ch_path):
- os.mkdir(ch_path)
- pics = []
- for k, v in enumerate(pages):
- ext = v[v.rindex('.') + 1:]
- pic_name = '%s/%d.%s' % (ch_path, k, ext)
- pics.append((pic_name, v))
- # непосредственно скачивание
- result = pool.map(download_pic, pics)
- count += len(pages)
- fails = [r for r in result if r != 'ok']
- downloaded_pages += len(result) - len(fails)
- info_file.write(''.join(fails))
- sys.stdout.write('\rdownload: chapter %s. total progress: %.2f%%.' %
- (chapter_num, float(count) / manga['count'] * 100))
- sys.stdout.flush()
- info_file.write('%d of %d pictures total were downloaded\n' % (downloaded_pages, manga['count']))
- info_file.close()
- def download_pic(i):
- name = i[0]
- address = i[1]
- if os.path.exists(name):
- return 'ok'
- with open(name, 'wb') as f:
- r = requests.get(address)
- if r.ok:
- f.write(r.content)
- return 'ok'
- return 'pic %-60s was NOT downloaded. response status: %s\n' % r.status_code
- # что-то хитровыебанное
- def get_all_pages(chapter):
- r = requests.get('http://readmanga.me' + chapter)
- # тупо вырезаем ссылки из js скрипта
- txt = r.text[r.text.index('rm_h.init('):]
- txt = txt[:txt.index('</script>')]
- txt = txt[txt.index('[') + 1:txt.rindex(']')]
- # там такая херня вот
- # ['auto/03/21','http://e3.postfact.ru/',"/00/Wa__v01_c001_009.png_res.jpg",1100,1600]
- return [p[1] + p[0] + p[2] for p in eval(txt)]
- def start(link):
- link += '/' if link[-1] != '/' else ''
- link += 'vol1/1?mature=1'
- r = requests.get(link, headers={'User-Agent': "don't block me pls"})
- if not r.ok:
- raise Exception(r.__repr__())
- print('getting manga info...')
- s = bs4.BeautifulSoup(r.text, 'html.parser')
- title = str(s.find('a', {'class': 'manga-link'}).string)
- if os.path.exists(title + '.json'):
- with open(title + '.json', 'r') as f:
- download_manga(json.loads(f.read()))
- return
- # ссылки на все главы
- data = [v.attrs['value']
- for v in s.find('select', {'id': 'chapterSelectorSelect'}).contents
- if type(v) is bs4.Tag]
- data.reverse()
- print('chapters: %d' % len(data))
- # regex 4lulz
- vol_regex = re.compile('\/vol[0-9]+\/')
- chapter_regex = re.compile('\/[0-9]+\?')
- volumes = {}
- count = 0
- for c in data:
- volume_num = int(vol_regex.findall(c)[0].lstrip('/vol').rstrip('/'))
- chapter_num = int(chapter_regex.findall(c)[0].lstrip('/').rstrip('?'))
- sys.stdout.write('\rget: volume: %3d. chapter: %4d' % (volume_num, chapter_num))
- sys.stdout.flush()
- # тянем ссылки на пикчи
- pages = get_all_pages(c)
- d = {chapter_num: pages}
- count += len(pages)
- if volume_num in volumes.keys():
- volumes[volume_num].update(d)
- continue
- volumes.update({volume_num: d})
- manga_info = {
- 'title': title,
- 'volumes': volumes,
- 'count': count
- }
- with open(title + '.json', 'w') as f:
- f.write(json.dumps(manga_info))
- download_manga(manga_info)
- if __name__ == '__main__':
- try:
- link = sys.argv[1]
- except:
- print('use: python3 readmanga.me_downloader.py <link>\n'
- 'example: python3 readmanga.me_downloader.py http://readmanga.me/wa')
- exit(1)
- t = time.time()
- start(link)
- print('\n%f seconds' % (time.time() - t))
Add Comment
Please, Sign In to add comment