Advertisement
anonymousvntk

Untitled

Jul 22nd, 2021
23
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 7.32 KB | None | 0 0
  1. const axios = require("axios");
  2. const cheerio = require('cheerio');
  3. const Promise = require('bluebird')
  4. const _ = require('lodash');
  5. const fs = require('fs')
  6. const download = require('../../utils/imagedownload/index');
  7.  
  8. async function DownloadNettruyentop(link) {
  9. let mangaName = link.split("/");
  10. mangaName = mangaName[mangaName.length - 1];
  11. let mangaId = mangaName.split("-");
  12. mangaId = mangaId[mangaId.length - 1];
  13. console.log({mangaName, mangaId});
  14. let allChapterLinks = [];
  15. let chapterData = await axios.get(`http://www.nettruyentop.com/Comic/Services/ComicService.asmx/ProcessChapterPreLoad?comicId=${mangaId}&commentId=-1`);
  16. chapterData = chapterData.data;
  17. if (chapterData.success) {
  18. chapterData = chapterData.chapters;
  19. allChapterLinks = chapterData.map(e => {
  20. let _temp = e.url.split("/");
  21. _temp = _temp[_temp.length - 2].split("-")[1];
  22. e.chapter = parseFloat(_temp);
  23. e.url = "http://www.nettruyentop.com" + e.url
  24. return e
  25. })
  26. } else {
  27. console.log("Không lấy được danh sách chap qua API, tiến hành lấy HTML")
  28. let chapterData = await axios.get(link);
  29. chapterData = chapterData.data;
  30. chapterData = cheerio.load(chapterData);
  31. let chapterTable = chapterData('#nt_listchapter > nav > ul > li')
  32. for (let i=1; i<=chapterTable.length;i++) {
  33. let aSelector = chapterData(`#nt_listchapter > nav > ul > li:nth-child(${i+1}) > div.col-xs-5.chapter > a`);
  34. if (aSelector.attr('href')) {
  35. allChapterLinks.push({
  36. url: aSelector.attr('href'),
  37. name: aSelector.text(),
  38. chapter: parseFloat(aSelector.text().split(" ")[1])
  39. })
  40. }
  41. }
  42. }
  43. allChapterLinks.reverse();
  44. console.log(`Có tổng cộng ${allChapterLinks.length} chapter`)
  45. let outDir = `output/nettruyentop-${mangaName}/`;
  46. console.log("Tên folder chứa ảnh: ", outDir)
  47. if (!fs.existsSync(outDir)) {
  48. fs.mkdirSync(outDir);
  49. }
  50. let chapterImageLinkFile = outDir + "images.txt";
  51. if (!fs.existsSync(chapterImageLinkFile)) {
  52. fs.writeFileSync(chapterImageLinkFile, "")
  53. }
  54. let imageLinksFile = fs.readFileSync(outDir + "images.txt");
  55. imageLinksFile = imageLinksFile.toString();
  56. let chapterToGetImages = [];
  57.  
  58. if (imageLinksFile.length !== 0) {
  59. let downloadedChapter = JSON.parse(imageLinksFile);
  60. allChapterLinks.map(each => {
  61. let isDownloaded = false;
  62. downloadedChapter.map(ae => {
  63. if (each.chapter === ae.chapter) {
  64. isDownloaded = true
  65. }
  66. })
  67. if (isDownloaded === false) {
  68. chapterToGetImages.push(each)
  69. }
  70. })
  71. } else {
  72. chapterToGetImages = allChapterLinks;
  73. }
  74. console.log(`Còn ${chapterToGetImages.length} chapter chưa lấy ảnh`)
  75. let allChapterChunk = _.chunk(chapterToGetImages, 2);
  76. let allImageInEachChapter;
  77. if (imageLinksFile.length !== 0) {
  78. let alreadyHaveImageLinkChap = JSON.parse(imageLinksFile);
  79. allImageInEachChapter = alreadyHaveImageLinkChap;
  80. } else {
  81. allImageInEachChapter = []
  82. }
  83. let chapterIndex = 1;
  84. await Promise.each(allChapterChunk, async (eachChapterChunk, index) => {
  85. console.log(`Current chunk: ${index + 1}/${allChapterChunk.length} - ${eachChapterChunk.length} items`)
  86. let downloadChapterRequest = [];
  87. eachChapterChunk.map(each => {
  88. downloadChapterRequest.push(axios.get(each.url))
  89. })
  90. let downloadChapterResult = await Promise.all(downloadChapterRequest);
  91. downloadChapterResult = downloadChapterResult.map(each => {
  92. return each.data
  93. })
  94. console.log(`Downloaded chunk: ${index + 1}/${allChapterChunk.length}`)
  95. downloadChapterResult.map(async (each, cIndex) => {
  96. console.log(`Analyzing Chapter ${chapterIndex}/${allChapterChunk.length}`)
  97. let chapterDetail = cheerio.load(each);
  98. let imageBox = chapterDetail('.reading-detail > div')
  99. let totalImage = imageBox.length;
  100. let chapterImgs = [];
  101. for (let i = 1; i <= totalImage; i++) {
  102. let imageSelector = chapterDetail(`#page_${i} > img`);
  103. chapterImgs.push(imageSelector.attr('src'))
  104. }
  105. chapterIndex++;
  106. allImageInEachChapter.push({
  107. chapterId: eachChapterChunk[cIndex].chapterId,
  108. name: eachChapterChunk[cIndex].name,
  109. url: eachChapterChunk[cIndex].url,
  110. chapter: eachChapterChunk[cIndex].chapter,
  111. images: chapterImgs
  112. })
  113. await fs.writeFileSync(chapterImageLinkFile, JSON.stringify(allImageInEachChapter))
  114. })
  115. })
  116.  
  117. imageLinksFile = fs.readFileSync(outDir + "images.txt");
  118. allImageInEachChapter = JSON.parse(imageLinksFile.toString());
  119. console.log("Đã lấy xong link ảnh")
  120. await Promise.each(allImageInEachChapter, async eachChapter => {
  121. console.log("Bắt đầu tải ảnh chapter: ", eachChapter.chapter)
  122. let tempOutDir = outDir + eachChapter.chapter;
  123. if (!fs.existsSync(tempOutDir)) {
  124. fs.mkdirSync(tempOutDir);
  125. }
  126. let downloadUrls = [];
  127. for (let i = 0; i < eachChapter.images.length; i++) {
  128. let fileDest = `${tempOutDir}/${i}.jpg`;
  129. if (!fs.existsSync(fileDest)) {
  130. let headers = {
  131. 'Connection': 'keep-alive',
  132. 'Cache-Control': 'max-age=0',
  133. 'Upgrade-Insecure-Requests': '1',
  134. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36',
  135. 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
  136. 'Accept-Language': 'en-GB,en;q=0.9,en-US;q=0.8,vi;q=0.7',
  137. "Referer": "http://www.nettruyentop.com/"
  138. }
  139. if (!eachChapter.images[i].startsWith("http")) {
  140. eachChapter.images[i] = "http://"+eachChapter.images[i].slice(2, eachChapter.images[i].length);
  141. }
  142. const options = {
  143. url: eachChapter.images[i],
  144. dest: fileDest,
  145. headers: headers
  146. }
  147. downloadUrls.push(options)
  148. }
  149. }
  150. downloadUrls = _.chunk(downloadUrls, 5);
  151. await Promise.each(downloadUrls, async (eachChunk, index) => {
  152. console.log(`Đang tải ảnh có tốc độ chậm chunk ${index+1}/${downloadUrls.length} - Chapter ${eachChapter.chapter}`);
  153. let downloadRequest = [];
  154. eachChunk.map(e => {
  155. downloadRequest.push(download.image(e))
  156. })
  157. await Promise.all(downloadRequest)
  158. })
  159. })
  160. console.log("Đã tải xong toàn bộ ảnh")
  161. return allImageInEachChapter
  162. }
  163.  
  164. module.exports = {
  165. DownloadNettruyentop
  166. }
  167.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement