Advertisement
Guest User

Untitled

a guest
Aug 21st, 2019
93
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.29 KB | None | 0 0
  1. #!/usr/bin/env python3
  2. import requests
  3. from lxml import html
  4. import sys
  5. import os
  6. import glob
  7. import zipfile
  8.  
  9. class MgkaDL:
  10. def __init__(self, url):
  11. self.session = requests.Session()
  12. self.saved_urls = []
  13. self.url = url
  14. self.load_known()
  15. if self.url == 'update':
  16. print("Updating all ...")
  17. else:
  18. if url not in self.saved_urls:
  19. print("Downloading new url")
  20. f = open("mgkadl_save.txt", "a")
  21. f.write("%s\n" % url)
  22. f.close()
  23. self.set_url(url)
  24.  
  25.  
  26. def set_url(self, url):
  27. self.url = url
  28. self.title = url.split('/')[-1]
  29. print('Downloading %s' % self.title)
  30. self.dlpath = 'mgkadl/%s' % self.title
  31. if not os.path.exists(self.dlpath):
  32. os.makedirs(self.dlpath)
  33.  
  34.  
  35. def load_known(self):
  36. try:
  37. f = open("mgkadl_save.txt", "r")
  38. for line in f:
  39. self.saved_urls.append(line[:-1])
  40. print("Loaded %d urls" % len(self.saved_urls))
  41. except FileNotFoundError:
  42. print("No saved urls found")
  43.  
  44.  
  45. def get_chapters(self):
  46. r = self.session.get(self.url)
  47. if r.status_code is not 200:
  48. print('Error retrieving page: %d' % r.status_code)
  49. print(self.url)
  50. sys.exit(1)
  51. tree = html.fromstring(r.content)
  52. self.urls = []
  53. l = tree.find_class('chapter-list')[0]
  54. for x in l:
  55. self.urls.append(x[0][0].get('href'))
  56. self.urls = self.urls[::-1]
  57.  
  58. def get_image(self, url, zfile):
  59. r = self.session.get(url, stream=True)
  60. filename = url.split('/')[-1]
  61. print(filename, end='')
  62. if r.status_code is not 200:
  63. print('Error retrieving %s: %d' % (filename, r.status_code))
  64. print(url)
  65. sys.exit(1)
  66. zfile.writestr(filename, r.raw.read())
  67.  
  68. def get_images(self, url):
  69. r = self.session.get(url)
  70. if r.status_code is not 200:
  71. print('Error retrieving page: %d' % r.status_code)
  72. print(url)
  73. sys.exit(1)
  74. tree = html.fromstring(r.content)
  75. l = tree.get_element_by_id('vungdoc')
  76. i = 1
  77. c = len(l)
  78. chapter = url.split('/')[-1]
  79. zfile = zipfile.ZipFile('%s/%s.cbz' % (self.dlpath, chapter), mode='w')
  80. for x in l:
  81. if x.tag == 'img':
  82. print("\r%3d / %3d : " % (i, c), end='')
  83. self.get_image(x.get('src'), zfile)
  84. i += 1
  85. print()
  86.  
  87. def download(self):
  88. if self.url == 'update':
  89. for url in self.saved_urls:
  90. print("Updating from %s" % url)
  91. self.set_url(url)
  92. self.__download()
  93. else:
  94. self.__download()
  95.  
  96. def __download(self):
  97. self.get_chapters()
  98. print("Available chapters: %d" % len(self.urls))
  99. downloaded = len(glob.glob('%s/*.cbz' % self.dlpath))
  100. print("Counted in downloaded: %d" % downloaded)
  101. for url in self.urls[downloaded:]:
  102. print("Chapter: %d" % (downloaded + 1))
  103. self.get_images(url)
  104. downloaded += 1
  105.  
  106. if len(sys.argv) != 2:
  107. print("Usage: %s [url|update]" % sys.argv[0])
  108. sys.exit(1)
  109.  
  110. dl = MgkaDL(sys.argv[1])
  111. dl.download()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement