Guest User

Untitled

a guest
Jun 6th, 2018
117
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.64 KB | None | 0 0
  1. let request = require('request'),
  2. iconv = require('iconv-lite'),
  3. cheerio = require('cheerio'),
  4. charset = require('charset'),
  5. xlsx = require('node-xlsx').default,
  6. XLSX = require('xlsx'),
  7. //Both modules SUCKS, next time use "xlsx-style" or "js-xlsx"
  8. xl_lines = [],
  9.  
  10. urls = [],
  11. xl_tits = [],
  12. xl_descs = [],
  13. xl_h1s = [],
  14.  
  15. site_urls = [],
  16. site_tits = [],
  17. site_descs = [],
  18. site_h1s = []
  19.  
  20. const log = val => console.log(val)
  21. const type = val => log(typeof (val))
  22. //debug module, temporary stuff.
  23.  
  24. const xlsxFile = xlsx.parse(`${'xl'}/table2.xlsx`)
  25.  
  26. xlsxFile[0].data.forEach(item => item != '' ? xl_lines.push(item) : 0) //skip empty rows
  27.  
  28. xl_lines.splice(0, 1) //skip table titles
  29.  
  30. xl_lines.forEach(item => urls.push(item[0]))
  31.  
  32. xl_lines.forEach(item => xl_tits.push(item[1]))
  33.  
  34. xl_lines.forEach(item => xl_descs.push(item[2]))
  35.  
  36. xl_lines.forEach(item => xl_h1s.push(item[3]))
  37. //rewrite with recursive self-invoking ( () => () ) ? nah, whatever
  38. const get = url => {
  39. let opt = {
  40. url: url,
  41. encoding: null,
  42. simple: false,
  43. auth: {
  44. user: 'user',
  45. password: 'pass'
  46. }
  47. }
  48. let decoder
  49. request(opt, (err, res, body) => {
  50. decoder = charset(res.headers, body);
  51. res.body = iconv.decode(body, decoder)
  52. let $ = cheerio.load(res.body)
  53.  
  54. site_tits.push($('title').text().replace(/\r?\n$/, ''))
  55. site_h1s.push(($('h1').text()).replace(/\r?\n$/, ''))
  56. site_descs.push($('meta[name=description]').attr("content").replace(/\r?\n$/, ''))
  57.  
  58. })
  59. }
  60.  
  61. let indexGet = 0,
  62. interval = 400,
  63. finishTime = urls.length * interval
  64. //I need to learn async , await loops defenitly.400ms per url sucks.
  65. const getAll = (...urls) => {
  66. if (indexGet < urls.length) {
  67. get(urls[indexGet])
  68. setTimeout(() => getAll(...urls), interval)
  69. indexGet++
  70. log('got ' + indexGet + " values")
  71. }
  72.  
  73. }
  74.  
  75. getAll(...urls)
  76.  
  77.  
  78. let xl_data = Array(urls.length + 2)
  79.  
  80. //hardcoded shit starts here
  81. //fill array with some values
  82. for (let i = 0; i < urls.length + 2; i++) {
  83. xl_data[i] = [' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '] //11 cells in row
  84. }
  85. //fill headers
  86. xl_headers = ['url', '', 'XL-T', 'XL-D', 'XL-H', '', 'SITE-T', 'SITE-D', 'SITE-H', '', 'TITS', 'DESCS', 'H1']
  87. xl_data.unshift(xl_headers)
  88. //fill values from xl file
  89. for (let i = 0; i < urls.length; i++) {
  90. xl_data[i + 1][0] = urls[i]
  91.  
  92. xl_tits[i] ? 0 : xl_tits[i] = 'EMPTY-t'
  93. xl_descs[i] ? 0 : xl_descs[i] = 'EMPTY-d'
  94. xl_h1s[i] ? 0 : xl_h1s[i] = 'EMPTY-h'
  95.  
  96. xl_data[i + 1][2] = xl_tits[i]
  97. xl_data[i + 1][3] = xl_descs[i]
  98. xl_data[i + 1][4] = xl_h1s[i]
  99. }
  100.  
  101. let tits = [],
  102. descs = [],
  103. h1s = []
  104.  
  105. setTimeout(() => {
  106. //fill values from site
  107. for (let i = 0; i < urls.length; i++) {
  108. xl_data[i + 1][6] = site_tits[i]
  109. xl_data[i + 1][7] = site_descs[i]
  110. xl_data[i + 1][8] = site_h1s[i]
  111.  
  112. xl_tits[i] ? tits.push(xl_tits[i].localeCompare(site_tits[i])) : 0
  113. xl_descs[i] ? descs.push(xl_descs[i].localeCompare(site_descs[i])) : 0
  114. xl_h1s[i] ? h1s.push(xl_h1s[i].localeCompare(site_h1s[i])) : 0
  115.  
  116. tits[i] === 0 ? tits[i] = 'ok' : tits[i] = 'CHECK-t'
  117. descs[i] === 0 ? descs[i] = 'ok' : descs[i] = 'CHECK-d'
  118. h1s[i] === 0 ? h1s[i] = 'ok' : h1s[i] = 'CHECK-h'
  119.  
  120. xl_data[i + 1][10] = tits[i]
  121. xl_data[i + 1][11] = descs[i]
  122. xl_data[i + 1][12] = h1s[i]
  123. }
  124. //writing to xlsx file
  125. let ws = XLSX.utils.json_to_sheet(xl_data)
  126. let wb = XLSX.utils.book_new();
  127. XLSX.utils.book_append_sheet(wb, ws, "Checked")
  128. XLSX.writeFile(wb, "Result.xlsx")
  129. log('DONE!')
  130. }, finishTime)
Add Comment
Please, Sign In to add comment