Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- const axios = require("axios");
- const cheerio = require('cheerio');
- const Promise = require('bluebird')
- const _ = require('lodash');
- const fs = require('fs')
- const download = require('../../utils/imagedownload/index');
- async function DownloadNettruyentop(link) {
- let mangaName = link.split("/");
- mangaName = mangaName[mangaName.length - 1];
- let mangaId = mangaName.split("-");
- mangaId = mangaId[mangaId.length - 1];
- console.log({mangaName, mangaId});
- let allChapterLinks = [];
- let chapterData = await axios.get(`http://www.nettruyentop.com/Comic/Services/ComicService.asmx/ProcessChapterPreLoad?comicId=${mangaId}&commentId=-1`);
- chapterData = chapterData.data;
- if (chapterData.success) {
- chapterData = chapterData.chapters;
- allChapterLinks = chapterData.map(e => {
- let _temp = e.url.split("/");
- _temp = _temp[_temp.length - 2].split("-")[1];
- e.chapter = parseFloat(_temp);
- e.url = "http://www.nettruyentop.com" + e.url
- return e
- })
- } else {
- console.log("Không lấy được danh sách chap qua API, tiến hành lấy HTML")
- let chapterData = await axios.get(link);
- chapterData = chapterData.data;
- chapterData = cheerio.load(chapterData);
- let chapterTable = chapterData('#nt_listchapter > nav > ul > li')
- for (let i=1; i<=chapterTable.length;i++) {
- let aSelector = chapterData(`#nt_listchapter > nav > ul > li:nth-child(${i+1}) > div.col-xs-5.chapter > a`);
- if (aSelector.attr('href')) {
- allChapterLinks.push({
- url: aSelector.attr('href'),
- name: aSelector.text(),
- chapter: parseFloat(aSelector.text().split(" ")[1])
- })
- }
- }
- }
- allChapterLinks.reverse();
- console.log(`Có tổng cộng ${allChapterLinks.length} chapter`)
- let outDir = `output/nettruyentop-${mangaName}/`;
- console.log("Tên folder chứa ảnh: ", outDir)
- if (!fs.existsSync(outDir)) {
- fs.mkdirSync(outDir);
- }
- let chapterImageLinkFile = outDir + "images.txt";
- if (!fs.existsSync(chapterImageLinkFile)) {
- fs.writeFileSync(chapterImageLinkFile, "")
- }
- let imageLinksFile = fs.readFileSync(outDir + "images.txt");
- imageLinksFile = imageLinksFile.toString();
- let chapterToGetImages = [];
- if (imageLinksFile.length !== 0) {
- let downloadedChapter = JSON.parse(imageLinksFile);
- allChapterLinks.map(each => {
- let isDownloaded = false;
- downloadedChapter.map(ae => {
- if (each.chapter === ae.chapter) {
- isDownloaded = true
- }
- })
- if (isDownloaded === false) {
- chapterToGetImages.push(each)
- }
- })
- } else {
- chapterToGetImages = allChapterLinks;
- }
- console.log(`Còn ${chapterToGetImages.length} chapter chưa lấy ảnh`)
- let allChapterChunk = _.chunk(chapterToGetImages, 2);
- let allImageInEachChapter;
- if (imageLinksFile.length !== 0) {
- let alreadyHaveImageLinkChap = JSON.parse(imageLinksFile);
- allImageInEachChapter = alreadyHaveImageLinkChap;
- } else {
- allImageInEachChapter = []
- }
- let chapterIndex = 1;
- await Promise.each(allChapterChunk, async (eachChapterChunk, index) => {
- console.log(`Current chunk: ${index + 1}/${allChapterChunk.length} - ${eachChapterChunk.length} items`)
- let downloadChapterRequest = [];
- eachChapterChunk.map(each => {
- downloadChapterRequest.push(axios.get(each.url))
- })
- let downloadChapterResult = await Promise.all(downloadChapterRequest);
- downloadChapterResult = downloadChapterResult.map(each => {
- return each.data
- })
- console.log(`Downloaded chunk: ${index + 1}/${allChapterChunk.length}`)
- downloadChapterResult.map(async (each, cIndex) => {
- console.log(`Analyzing Chapter ${chapterIndex}/${allChapterChunk.length}`)
- let chapterDetail = cheerio.load(each);
- let imageBox = chapterDetail('.reading-detail > div')
- let totalImage = imageBox.length;
- let chapterImgs = [];
- for (let i = 1; i <= totalImage; i++) {
- let imageSelector = chapterDetail(`#page_${i} > img`);
- chapterImgs.push(imageSelector.attr('src'))
- }
- chapterIndex++;
- allImageInEachChapter.push({
- chapterId: eachChapterChunk[cIndex].chapterId,
- name: eachChapterChunk[cIndex].name,
- url: eachChapterChunk[cIndex].url,
- chapter: eachChapterChunk[cIndex].chapter,
- images: chapterImgs
- })
- await fs.writeFileSync(chapterImageLinkFile, JSON.stringify(allImageInEachChapter))
- })
- })
- imageLinksFile = fs.readFileSync(outDir + "images.txt");
- allImageInEachChapter = JSON.parse(imageLinksFile.toString());
- console.log("Đã lấy xong link ảnh")
- await Promise.each(allImageInEachChapter, async eachChapter => {
- console.log("Bắt đầu tải ảnh chapter: ", eachChapter.chapter)
- let tempOutDir = outDir + eachChapter.chapter;
- if (!fs.existsSync(tempOutDir)) {
- fs.mkdirSync(tempOutDir);
- }
- let downloadUrls = [];
- for (let i = 0; i < eachChapter.images.length; i++) {
- let fileDest = `${tempOutDir}/${i}.jpg`;
- if (!fs.existsSync(fileDest)) {
- let headers = {
- 'Connection': 'keep-alive',
- 'Cache-Control': 'max-age=0',
- 'Upgrade-Insecure-Requests': '1',
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36',
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
- 'Accept-Language': 'en-GB,en;q=0.9,en-US;q=0.8,vi;q=0.7',
- "Referer": "http://www.nettruyentop.com/"
- }
- if (!eachChapter.images[i].startsWith("http")) {
- eachChapter.images[i] = "http://"+eachChapter.images[i].slice(2, eachChapter.images[i].length);
- }
- const options = {
- url: eachChapter.images[i],
- dest: fileDest,
- headers: headers
- }
- downloadUrls.push(options)
- }
- }
- downloadUrls = _.chunk(downloadUrls, 5);
- await Promise.each(downloadUrls, async (eachChunk, index) => {
- console.log(`Đang tải ảnh có tốc độ chậm chunk ${index+1}/${downloadUrls.length} - Chapter ${eachChapter.chapter}`);
- let downloadRequest = [];
- eachChunk.map(e => {
- downloadRequest.push(download.image(e))
- })
- await Promise.all(downloadRequest)
- })
- })
- console.log("Đã tải xong toàn bộ ảnh")
- return allImageInEachChapter
- }
- module.exports = {
- DownloadNettruyentop
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement