Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python3
- import requests
- from lxml import html
- import sys
- import os
- import glob
- import zipfile
- class MgkaDL:
- def __init__(self, url):
- self.session = requests.Session()
- self.saved_urls = []
- self.url = url
- self.load_known()
- if self.url == 'update':
- print("Updating all ...")
- else:
- if url not in self.saved_urls:
- print("Downloading new url")
- f = open("mgkadl_save.txt", "a")
- f.write("%s\n" % url)
- f.close()
- self.set_url(url)
- def set_url(self, url):
- self.url = url
- self.title = url.split('/')[-1]
- print('Downloading %s' % self.title)
- self.dlpath = 'mgkadl/%s' % self.title
- if not os.path.exists(self.dlpath):
- os.makedirs(self.dlpath)
- def load_known(self):
- try:
- f = open("mgkadl_save.txt", "r")
- for line in f:
- self.saved_urls.append(line[:-1])
- print("Loaded %d urls" % len(self.saved_urls))
- except FileNotFoundError:
- print("No saved urls found")
- def get_chapters(self):
- r = self.session.get(self.url)
- if r.status_code is not 200:
- print('Error retrieving page: %d' % r.status_code)
- print(self.url)
- sys.exit(1)
- tree = html.fromstring(r.content)
- self.urls = []
- l = tree.find_class('chapter-list')[0]
- for x in l:
- self.urls.append(x[0][0].get('href'))
- self.urls = self.urls[::-1]
- def get_image(self, url, zfile):
- r = self.session.get(url, stream=True)
- filename = url.split('/')[-1]
- print(filename, end='')
- if r.status_code is not 200:
- print('Error retrieving %s: %d' % (filename, r.status_code))
- print(url)
- sys.exit(1)
- zfile.writestr(filename, r.raw.read())
- def get_images(self, url):
- r = self.session.get(url)
- if r.status_code is not 200:
- print('Error retrieving page: %d' % r.status_code)
- print(url)
- sys.exit(1)
- tree = html.fromstring(r.content)
- l = tree.get_element_by_id('vungdoc')
- i = 1
- c = len(l)
- chapter = url.split('/')[-1]
- zfile = zipfile.ZipFile('%s/%s.cbz' % (self.dlpath, chapter), mode='w')
- for x in l:
- if x.tag == 'img':
- print("\r%3d / %3d : " % (i, c), end='')
- self.get_image(x.get('src'), zfile)
- i += 1
- print()
- def download(self):
- if self.url == 'update':
- for url in self.saved_urls:
- print("Updating from %s" % url)
- self.set_url(url)
- self.__download()
- else:
- self.__download()
- def __download(self):
- self.get_chapters()
- print("Available chapters: %d" % len(self.urls))
- downloaded = len(glob.glob('%s/*.cbz' % self.dlpath))
- print("Counted in downloaded: %d" % downloaded)
- for url in self.urls[downloaded:]:
- print("Chapter: %d" % (downloaded + 1))
- self.get_images(url)
- downloaded += 1
- if len(sys.argv) != 2:
- print("Usage: %s [url|update]" % sys.argv[0])
- sys.exit(1)
- dl = MgkaDL(sys.argv[1])
- dl.download()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement