Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- let request = require('request'),
- iconv = require('iconv-lite'),
- cheerio = require('cheerio'),
- charset = require('charset'),
- xlsx = require('node-xlsx').default,
- XLSX = require('xlsx'),
- //Both modules SUCKS, next time use "xlsx-style" or "js-xlsx"
- xl_lines = [],
- urls = [],
- xl_tits = [],
- xl_descs = [],
- xl_h1s = [],
- site_urls = [],
- site_tits = [],
- site_descs = [],
- site_h1s = []
- const log = val => console.log(val)
- const type = val => log(typeof (val))
- //debug module, temporary stuff.
- const xlsxFile = xlsx.parse(`${'xl'}/table2.xlsx`)
- xlsxFile[0].data.forEach(item => item != '' ? xl_lines.push(item) : 0) //skip empty rows
- xl_lines.splice(0, 1) //skip table titles
- xl_lines.forEach(item => urls.push(item[0]))
- xl_lines.forEach(item => xl_tits.push(item[1]))
- xl_lines.forEach(item => xl_descs.push(item[2]))
- xl_lines.forEach(item => xl_h1s.push(item[3]))
- //rewrite with recursive self-invoking ( () => () ) ? nah, whatever
- const get = url => {
- let opt = {
- url: url,
- encoding: null,
- simple: false,
- auth: {
- user: 'user',
- password: 'pass'
- }
- }
- let decoder
- request(opt, (err, res, body) => {
- decoder = charset(res.headers, body);
- res.body = iconv.decode(body, decoder)
- let $ = cheerio.load(res.body)
- site_tits.push($('title').text().replace(/\r?\n$/, ''))
- site_h1s.push(($('h1').text()).replace(/\r?\n$/, ''))
- site_descs.push($('meta[name=description]').attr("content").replace(/\r?\n$/, ''))
- })
- }
- let indexGet = 0,
- interval = 400,
- finishTime = urls.length * interval
- //I need to learn async , await loops defenitly.400ms per url sucks.
- const getAll = (...urls) => {
- if (indexGet < urls.length) {
- get(urls[indexGet])
- setTimeout(() => getAll(...urls), interval)
- indexGet++
- log('got ' + indexGet + " values")
- }
- }
- getAll(...urls)
- let xl_data = Array(urls.length + 2)
- //hardcoded shit starts here
- //fill array with some values
- for (let i = 0; i < urls.length + 2; i++) {
- xl_data[i] = [' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '] //11 cells in row
- }
- //fill headers
- xl_headers = ['url', '', 'XL-T', 'XL-D', 'XL-H', '', 'SITE-T', 'SITE-D', 'SITE-H', '', 'TITS', 'DESCS', 'H1']
- xl_data.unshift(xl_headers)
- //fill values from xl file
- for (let i = 0; i < urls.length; i++) {
- xl_data[i + 1][0] = urls[i]
- xl_tits[i] ? 0 : xl_tits[i] = 'EMPTY-t'
- xl_descs[i] ? 0 : xl_descs[i] = 'EMPTY-d'
- xl_h1s[i] ? 0 : xl_h1s[i] = 'EMPTY-h'
- xl_data[i + 1][2] = xl_tits[i]
- xl_data[i + 1][3] = xl_descs[i]
- xl_data[i + 1][4] = xl_h1s[i]
- }
- let tits = [],
- descs = [],
- h1s = []
- setTimeout(() => {
- //fill values from site
- for (let i = 0; i < urls.length; i++) {
- xl_data[i + 1][6] = site_tits[i]
- xl_data[i + 1][7] = site_descs[i]
- xl_data[i + 1][8] = site_h1s[i]
- xl_tits[i] ? tits.push(xl_tits[i].localeCompare(site_tits[i])) : 0
- xl_descs[i] ? descs.push(xl_descs[i].localeCompare(site_descs[i])) : 0
- xl_h1s[i] ? h1s.push(xl_h1s[i].localeCompare(site_h1s[i])) : 0
- tits[i] === 0 ? tits[i] = 'ok' : tits[i] = 'CHECK-t'
- descs[i] === 0 ? descs[i] = 'ok' : descs[i] = 'CHECK-d'
- h1s[i] === 0 ? h1s[i] = 'ok' : h1s[i] = 'CHECK-h'
- xl_data[i + 1][10] = tits[i]
- xl_data[i + 1][11] = descs[i]
- xl_data[i + 1][12] = h1s[i]
- }
- //writing to xlsx file
- let ws = XLSX.utils.json_to_sheet(xl_data)
- let wb = XLSX.utils.book_new();
- XLSX.utils.book_append_sheet(wb, ws, "Checked")
- XLSX.writeFile(wb, "Result.xlsx")
- log('DONE!')
- }, finishTime)
Add Comment
Please, Sign In to add comment