Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- const axios = require("axios");
- const cheerio = require('cheerio');
- const Promise = require('bluebird')
- const _ = require('lodash');
- var CryptoJS = require("crypto-js");
- const fs = require('fs')
- const download = require('../../utils/imagedownload/index');
- async function DownloadNgonPhong(link) {
- let resp = await axios.get(link);
- console.log("Get data manga done")
- resp = resp.data;
- const $ = cheerio.load(resp);
- let allChapterLink = [];
- let chapterTable = $(`body > section > div.info > div > div > div.m-b-0 > div.table-wrapper.chapter-table > table > tbody > tr`);
- for (let i = 1; i <= chapterTable.length; i++) {
- let currentChapter = $(`body > section > div.info > div > div > div.m-b-0 > div.table-wrapper.chapter-table > table > tbody > tr:nth-child(${i}) > td:nth-child(1) > a > span.hidden-sm.hidden-xs`);
- let currentChapterName = currentChapter.html();
- if (currentChapterName) {
- currentChapterName = currentChapterName.split(" ");
- currentChapterName = currentChapterName[currentChapterName.length -1]
- let currentChapterNumber = parseFloat(currentChapterName);
- let selectorChapterLink = $(`body > section > div.info > div > div > div.m-b-0 > div.table-wrapper.chapter-table > table > tbody > tr:nth-child(${i}) > td:nth-child(1) > a`);
- let currentChapterHref = selectorChapterLink.attr('href')
- allChapterLink.push({
- currentChapterName, currentChapterNumber, currentChapterHref
- })
- }
- }
- allChapterLink.reverse();
- console.log(`Có tổng cộng ${allChapterLink.length} chapter`)
- let mangaName = link.split("/");
- mangaName = mangaName[mangaName.length - 2];
- let outDir = `output/ngonphong-${mangaName}/`;
- console.log("Tên folder chứa ảnh: ", outDir)
- if (!fs.existsSync(outDir)) {
- fs.mkdirSync(outDir);
- }
- let chapterImageLinkFile = outDir + "images.txt";
- if (!fs.existsSync(chapterImageLinkFile)) {
- fs.writeFileSync(chapterImageLinkFile, "")
- }
- let imageLinksFile = fs.readFileSync(outDir + "images.txt");
- imageLinksFile = imageLinksFile.toString();
- let chapterToGetImages = [];
- if (imageLinksFile.length !== 0) {
- let alreadyHaveImageLinkChap = JSON.parse(imageLinksFile);
- //Check xem trong file có chapter nào rồi thì bỏ qua chapter đó
- // Chỉ get link ảnh ở những chapter chưa có link
- allChapterLink.map(each => {
- let isDownloaded = false;
- alreadyHaveImageLinkChap.map(ae => {
- if (each.currentChapterNumber === ae.chapter) {
- isDownloaded = true
- }
- })
- if (isDownloaded === false) {
- chapterToGetImages.push(each)
- }
- })
- } else {
- chapterToGetImages = allChapterLink;
- }
- console.log(`Có ${chapterToGetImages.length} chapter chưa lấy ảnh`);
- let allChapterChunk = _.chunk(chapterToGetImages, 3);
- let allImageInEachChapter;
- if (imageLinksFile.length !== 0) {
- let alreadyHaveImageLinkChap = JSON.parse(imageLinksFile);
- allImageInEachChapter = alreadyHaveImageLinkChap;
- } else {
- allImageInEachChapter = []
- }
- let chapterIndex = 1;
- await Promise.each(allChapterChunk, async (eachChapterChunk, index) => {
- console.log(`Current chunk: ${index}/${allChapterChunk.length} - ${eachChapterChunk.length} items`)
- let downloadChapterRequest = [];
- eachChapterChunk.map(each => {
- downloadChapterRequest.push(axios.get(each.currentChapterHref))
- })
- let downloadChapterResult = await Promise.all(downloadChapterRequest);
- downloadChapterResult = downloadChapterResult.map(each => {
- return each.data
- })
- console.log(`Downloaded chunk: ${index}/${allChapterChunk.length}`)
- downloadChapterResult.map(async (each, cIndex) => {
- console.log(`Downloading Chapter ${chapterIndex}/${chapterToGetImages.length}`)
- let chapterDetail = cheerio.load(each);
- let chapterContentSelector = chapterDetail('#view-chapter')
- let chapterContentText = chapterContentSelector.html();
- let htmlContent = chapterContentText.match(/var htmlContent=(.*);/);
- let jsonContent = JSON.parse(JSON.parse(htmlContent[1]));
- const passphrase = '@9jriuQ^@'+'~4ZoLils*'+'^u$UxZ!Qzy<ytt_Z2}'
- var encrypted = jsonContent.ciphertext;
- var salt = CryptoJS.enc.Hex.parse(jsonContent.salt);
- var iv = CryptoJS.enc.Hex.parse(jsonContent.iv);
- var key = CryptoJS.PBKDF2(passphrase, salt, {
- hasher: CryptoJS.algo.SHA512,
- keySize: 64 / 8,
- iterations: 999
- });
- var decrypted = CryptoJS.AES.decrypt(encrypted, key, {
- iv: iv
- });
- let decryptedStr = decrypted.toString(CryptoJS.enc.Utf8);
- decryptedStr = decryptedStr.replace(/@9jriuQ\^@/g, '.');
- decryptedStr = decryptedStr.replace(/~4ZoLils\*/g, ':');
- decryptedStr = decryptedStr.replace(/\^u\$UxZ!Qzy<ytt_Z2}/g, '/');
- let chapterHtml = cheerio.load(decryptedStr);
- let chapterImgs = []
- chapterHtml('img').map((index, el) => {
- let imageLink = chapterHtml(el).attr('data-9jriuq');
- chapterImgs.push(imageLink)
- });
- allImageInEachChapter.push({
- chapter: eachChapterChunk[cIndex].currentChapterNumber,
- images: chapterImgs
- })
- fs.writeFileSync(chapterImageLinkFile, JSON.stringify(allImageInEachChapter))
- chapterIndex++;
- })
- })
- console.log("Đã lấy xong link ảnh")
- // Chia ảnh thành 2 loại tải nhanh và tải chậm
- await Promise.each(allImageInEachChapter, async eachChapter => {
- console.log("Bắt đầu tải ảnh chapter ", eachChapter.chapter)
- let tempOutDir = outDir + eachChapter.chapter;
- if (!fs.existsSync(tempOutDir)) {
- fs.mkdirSync(tempOutDir);
- }
- // Check ảnh, nếu ảnh tải nhanh được thì tải 1 phát hết
- // Nếu ảnh ko tải nhanh được thì tải từng cái một
- let fastDownloadUrls = [];
- let slowDownloadUrls = [];
- for (let i = 0; i < eachChapter.images.length; i++) {
- let fileDest = `${tempOutDir}/${i}.jpg`;
- if (!fs.existsSync(fileDest)) {
- let headers = {
- 'Connection': 'keep-alive',
- 'Cache-Control': 'max-age=0',
- 'Upgrade-Insecure-Requests': '1',
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36',
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
- 'Accept-Language': 'en-GB,en;q=0.9,en-US;q=0.8,vi;q=0.7'
- }
- if (eachChapter.images[i].includes("proxy/view.php?url=")) {
- eachChapter.images[i] = eachChapter.images[i].split("proxy/view.php?url=")[1];
- } else {
- headers["referer"] = "https://www.ocumeo.com/"
- }
- const options = {
- url: eachChapter.images[i],
- dest: `${tempOutDir}/${i}.jpg`,
- headers: headers
- }
- if (options.url.includes("blogspot.com")) {
- fastDownloadUrls.push(options)
- } else {
- slowDownloadUrls.push(options)
- }
- }
- }
- if (fastDownloadUrls.length > 0) {
- // Với ảnh tải nhanh được thì tải song song
- console.log("Đang tải ảnh có tốc độ cao")
- fastDownloadUrls = _.chunk(fastDownloadUrls, 10); // Ảnh tải được liên tục thì tải 1 lúc 10 cái
- await Promise.each(fastDownloadUrls, async (eachChunk, index) => {
- console.log(`Đang tải ảnh tốc độ cao chunk ${index}/${fastDownloadUrls.length}`)
- let downloadRequest = [];
- eachChunk.map(e => {
- downloadRequest.push(download.image(e))
- })
- await Promise.all(downloadRequest)
- })
- }
- if (slowDownloadUrls.length > 0) {
- console.log("Đang tải những ảnh có tốc độ chậm")
- slowDownloadUrls = _.chunk(slowDownloadUrls, 2); // Ảnh chậm thì tải 1 lúc 2 cái
- await Promise.each(slowDownloadUrls, async (eachChunk, index) => {
- console.log(`Đang tải ảnh có tốc độ chậm chunk ${index}/${slowDownloadUrls.length} - Chapter ${eachChapter.chapter}`);
- let downloadRequest = [];
- eachChunk.map(e => {
- downloadRequest.push(download.image(e))
- })
- await Promise.all(downloadRequest)
- })
- }
- })
- console.log("Đã tải xong toàn bộ ảnh")
- return allImageInEachChapter
- }
- module.exports = {
- DownloadNgonPhong
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement