Guest User

readmanga.me_downloader.py

a guest
Jul 13th, 2017
99
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.58 KB | None | 0 0
  1. # usage: python3 readmanga.me_downloader.py <link>
  2.  
  3. import json
  4. import requests
  5. import os
  6. import bs4
  7. import re
  8. import sys
  9. from multiprocessing import Pool
  10.  
  11. import time
  12.  
  13.  
  14. def download_manga(manga):
  15.     print('\ndownloading...')
  16.     path = './%s/' % manga['title']
  17.  
  18.     if not os.path.exists(path):
  19.         os.mkdir(path)
  20.  
  21.     info_file = open(path + 'download_info.txt', 'a')
  22.  
  23.     pool = Pool(40)
  24.     count = 0
  25.     downloaded_pages = 0
  26.     for volume, chapters in manga['volumes'].items():
  27.         v_path = '%s/volume_%s' % (path, volume)
  28.  
  29.         if not os.path.exists(v_path):
  30.             os.mkdir(v_path)
  31.  
  32.         for chapter_num, pages in chapters.items():
  33.             ch_path = '%s/chapter_%s' % (v_path, chapter_num)
  34.             if not os.path.exists(ch_path):
  35.                 os.mkdir(ch_path)
  36.  
  37.             pics = []
  38.             for k, v in enumerate(pages):
  39.                 ext = v[v.rindex('.') + 1:]
  40.                 pic_name = '%s/%d.%s' % (ch_path, k, ext)
  41.                 pics.append((pic_name, v))
  42.  
  43.             # непосредственно скачивание
  44.             result = pool.map(download_pic, pics)
  45.             count += len(pages)
  46.  
  47.             fails = [r for r in result if r != 'ok']
  48.             downloaded_pages += len(result) - len(fails)
  49.  
  50.             info_file.write(''.join(fails))
  51.             sys.stdout.write('\rdownload: chapter %s. total progress: %.2f%%.' %
  52.                              (chapter_num, float(count) / manga['count'] * 100))
  53.            
  54.             sys.stdout.flush()
  55.  
  56.     info_file.write('%d of %d pictures total were downloaded\n' % (downloaded_pages, manga['count']))
  57.     info_file.close()
  58.  
  59.  
  60. def download_pic(i):
  61.     name = i[0]
  62.     address = i[1]
  63.  
  64.     if os.path.exists(name):
  65.         return 'ok'
  66.  
  67.     with open(name, 'wb') as f:
  68.         r = requests.get(address)
  69.         if r.ok:
  70.             f.write(r.content)
  71.             return 'ok'
  72.  
  73.         return 'pic %-60s was NOT downloaded. response status: %s\n' % r.status_code
  74.  
  75.  
  76. # что-то хитровыебанное
  77. def get_all_pages(chapter):
  78.     r = requests.get('http://readmanga.me' + chapter)
  79.  
  80.     # тупо вырезаем ссылки из js скрипта
  81.     txt = r.text[r.text.index('rm_h.init('):]
  82.     txt = txt[:txt.index('</script>')]
  83.  
  84.     txt = txt[txt.index('[') + 1:txt.rindex(']')]
  85.  
  86.     # там такая херня вот
  87.     # ['auto/03/21','http://e3.postfact.ru/',"/00/Wa__v01_c001_009.png_res.jpg",1100,1600]
  88.     return [p[1] + p[0] + p[2] for p in eval(txt)]
  89.  
  90.  
  91. def start(link):
  92.     link += '/' if link[-1] != '/' else ''
  93.     link += 'vol1/1?mature=1'
  94.     r = requests.get(link, headers={'User-Agent': "don't block me pls"})
  95.  
  96.     if not r.ok:
  97.         raise Exception(r.__repr__())
  98.  
  99.     print('getting manga info...')
  100.     s = bs4.BeautifulSoup(r.text, 'html.parser')
  101.     title = str(s.find('a', {'class': 'manga-link'}).string)
  102.  
  103.     if os.path.exists(title + '.json'):
  104.         with open(title + '.json', 'r') as f:
  105.             download_manga(json.loads(f.read()))
  106.             return
  107.  
  108.     # ссылки на все главы
  109.     data = [v.attrs['value']
  110.             for v in s.find('select', {'id': 'chapterSelectorSelect'}).contents
  111.             if type(v) is bs4.Tag]
  112.  
  113.     data.reverse()
  114.     print('chapters: %d' % len(data))
  115.  
  116.     # regex 4lulz
  117.     vol_regex = re.compile('\/vol[0-9]+\/')
  118.     chapter_regex = re.compile('\/[0-9]+\?')
  119.  
  120.     volumes = {}
  121.  
  122.     count = 0
  123.     for c in data:
  124.         volume_num = int(vol_regex.findall(c)[0].lstrip('/vol').rstrip('/'))
  125.         chapter_num = int(chapter_regex.findall(c)[0].lstrip('/').rstrip('?'))
  126.  
  127.         sys.stdout.write('\rget: volume: %3d. chapter: %4d' % (volume_num, chapter_num))
  128.         sys.stdout.flush()
  129.  
  130.         # тянем ссылки на пикчи
  131.         pages = get_all_pages(c)
  132.         d = {chapter_num: pages}
  133.         count += len(pages)
  134.  
  135.         if volume_num in volumes.keys():
  136.             volumes[volume_num].update(d)
  137.             continue
  138.         volumes.update({volume_num: d})
  139.  
  140.     manga_info = {
  141.         'title': title,
  142.         'volumes': volumes,
  143.         'count': count
  144.     }
  145.  
  146.     with open(title + '.json', 'w') as f:
  147.         f.write(json.dumps(manga_info))
  148.  
  149.     download_manga(manga_info)
  150.  
  151.  
  152. if __name__ == '__main__':
  153.     try:
  154.         link = sys.argv[1]
  155.     except:
  156.         print('use: python3 readmanga.me_downloader.py <link>\n'
  157.               'example: python3 readmanga.me_downloader.py http://readmanga.me/wa')
  158.         exit(1)
  159.  
  160.     t = time.time()
  161.     start(link)
  162.     print('\n%f seconds' % (time.time() - t))
Add Comment
Please, Sign In to add comment