Advertisement
Guest User

new_scan

a guest
May 29th, 2018
259
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. const TIMEOUT_FOR_RECONNECTION = 2;
  2.  
  3. var fs = require('fs');
  4.  
  5. var clearAllTimeouts = () => {
  6.   if(timeoutIndex.vi) {
  7.     clearTimeout(timeoutIndex.vi);
  8.   }
  9.   if(timeoutIndex.cz) {
  10.     clearTimeout(timeoutIndex.cz);
  11.   }
  12.   delete global_headers.cookie;
  13. }
  14. var request = require('request');
  15. request = request.defaults({
  16.   followAllRedirects: true
  17. });
  18. var moment = require('moment');
  19. var log4js = require('log4js');
  20. var log4js_appenders = {};
  21. log4js_appenders.log = {
  22.   type: 'file',
  23.   filename: 'logs/email.log'
  24. };
  25. log4js.configure({
  26.   appenders: log4js_appenders,
  27.   categories: {
  28.     default: {
  29.       appenders: ["log"],
  30.       level: 'info'
  31.     }
  32.   }
  33. });
  34. const logger = log4js.getLogger(profile);
  35. const dateSearchRange = 2; // 2 days
  36. const previousCode = [
  37.    "xxxxxx",,"xxxxx"
  38. ];
  39. var timeoutIndex = {};
  40. var email_sending_status = false;
  41. var from_mail = "haithanha@seznam.cz";
  42. var host_mail = "smtp.seznam.cz";
  43. var mail_password = "Online22**";
  44. var destination_mail = "toilanam93@gmail.com";
  45. var nodemailer = require('nodemailer');
  46. var search_date;
  47. try {
  48.   var argv = process.argv[2];
  49.   search_date = moment(argv);
  50. }
  51. catch(e) {
  52.   search_date = moment();
  53. }
  54. var home_urls = {
  55.   vi: 'https://onmyojiguide.com/guide/bounty-list/',
  56.   cz: 'https://onmyojiguide.com/guide/bounty-list/'
  57. }
  58. var global_headers = {
  59.   'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
  60.   'referer': 'https://onmyojiguide.com/'
  61. }
  62. var regex_pattern = {
  63.   vi: /<h2 class="article_title"><a href="([^\"]*)">[^<]*<\/a><\/h2>\s*<span class="hidden">, <\/span>\s*<p class="articleDate">([0-9\.\s\/:]*)[^<]*<span class="updated">\s*Aktualizováno:\s*<span class="time">([^<]*)<\/span>[^<]*<\/span>[^<]*<\/p>[^<]*(<img [^>]*>)?[^<]*<p class="article_perex">([^<]*)/g,
  64.   cz: /<h2 class="article_title"><a href="([^\"]*)">[^<]*<\/a><\/h2>\s*<span class="hidden">, <\/span>\s*<p class="articleDate">([0-9\.\s\/:]*)[^<]*<span class="updated">\s*Aktualizováno:\s*<span class="time">([^<]*)<\/span>[^<]*<\/span>[^<]*<\/p>[^<]*(<img [^>]*>)?[^<]*<p class="article_perex">([^<]*)/g
  65. }
  66. var date_format = "DD.MM.YYYY / HH:mm";
  67. let poolConfig = {
  68.   host: host_mail,
  69.   port: 465,
  70.   secure: true, // use TLS
  71.   auth: {
  72.     user: from_mail,
  73.     pass: mail_password
  74.   }
  75. };
  76. let transporter = nodemailer.createTransport(poolConfig);
  77. var profile = require('./profile.json');
  78. var message = {
  79.   from: `${profile.fn} ${profile.ln} <${from_mail}>`,
  80.   to: `ONM <${destination_mail}>`,
  81.   subject: "",
  82.   attachments: []
  83. };
  84. if(profile.dependant && profile.dependant.fn) {
  85.   message.text = `
  86.     Word: ${profile.fn}
  87.     meaning: ${profile.ln}
  88.     Similar: ${profile.Similar}
  89.     Type: ${profile.Type}
  90.     Ship: ${profile.Ship}
  91.     My: ${profile.My}
  92.     ${profile.app}
  93.  
  94.     Word: ${profile.dependant.fn}
  95.     meaning: ${profile.dependant.ln}
  96.     Similar: ${profile.dependant.Similar}
  97.     Type: ${profile.dependant.Type}
  98.     Ship: ${profile.dependant.Ship}
  99.     My: ${profile.dependant.My}
  100.     ${profile.dependant.app}
  101.   `;
  102. }
  103. else {
  104.   message.text = `
  105.     Word: ${profile.fn}
  106.     meaning: ${profile.ln}
  107.     Similar: ${profile.Similar}
  108.     Type: ${profile.Type}
  109.     Ship: ${profile.Ship}
  110.     My: ${profile.My}
  111.     ${profile.app}
  112.   `;
  113. }
  114. profile.files.forEach(function(filename) {
  115.   message.attachments.push({
  116.     path: filename,
  117.     type: 'application/pdf',
  118.     filename: filename
  119.   })
  120. });
  121.  
  122. var getCookie = (lang = 'cz') => {
  123.   setTimeout(getCookie,2500);
  124.   request({
  125.     method: 'get',
  126.     uri: home_urls[lang],
  127.     headers: global_headers
  128.   }, function(e, r, b) {
  129.     if(!e) {
  130.       var matches = /document.cookie=\"([^;]*);/g.exec(b);
  131.       if(matches && matches.length && matches[1]) {
  132.         global_headers.referer = home_urls[lang];
  133.         global_headers.cookie = matches[1];
  134.         logger.info("Successfully get cookie");
  135.       }
  136.       else {
  137.         logger.error("No cookie found.");
  138.         delete global_headers.cookie;
  139.       }
  140.     } else {
  141.       logger.error("Can't get cookie. Probably blocked?");
  142.       delete global_headers.cookie;
  143.     }
  144.   });
  145. }
  146. var startWorking = () => {
  147.   logger.info("Start scanning");
  148.   if(!global_headers.cookie) {
  149.     getCookie();
  150.     setTimeout(scanForNewPost, TIMEOUT_FOR_RECONNECTION*1000, 'vi');
  151.   }
  152.   else {
  153.     run();
  154.   }
  155. }
  156.  
  157. //var run = () => {
  158. //  scanPost("vi");
  159. //  scanPost("cz");
  160. //}
  161.  
  162. var scanForNewPost = (language = 'vi', date = search_date) => {
  163.   if(!global_headers.cookie) {
  164.     return;
  165.   }
  166.   if (language == 'vi') {
  167.       language = 'cz';
  168.   } else {
  169.       language = 'vi';
  170.   }
  171.   var uri = home_urls[language];
  172.   console.log('Scanning for new post in url', uri);
  173.   request({
  174.     method: 'get',
  175.     uri: uri,
  176.     headers: global_headers
  177.   }, function(e, r, b) {
  178.     if(!e) {
  179.       regex_pattern[language].lastIndex = 0;
  180.       var articles = b.match(regex_pattern[language]);
  181.       if (articles != null) {
  182.           articles.forEach(function(article) {
  183.             regex_pattern[language].lastIndex = 0;
  184.             var elements = regex_pattern[language].exec(article);
  185.             if(elements && elements.length >= 3) {
  186.               var url = elements[1];
  187.               var published_date = moment(elements[2].trim(), date_format);
  188.               var edited_date = moment(elements[3].trim(), date_format);
  189.               var content = elements[5];
  190.               var code = findCodeInString(content);
  191.               if(code && code != "OLDCODE" && !email_sending_status) {
  192.                 email_sending_status = true;
  193.                 message.subject = code;
  194.                 transporter.sendMail(message, function(err, message) {
  195.                   console.log(err || message);
  196.                 });
  197.               }
  198.               // var recent = Math.floor(Math.abs((date - published_date) / 86400000)) <= dateSearchRange || Math.floor(Math.abs((date - edited_date) / 86400000)) <= dateSearchRange
  199.               // if(recent) {
  200.                 // logger.info(language + " Found a new post " + url);
  201.                 scanForNewCode(url, language);
  202.                 // return;
  203.               // }
  204.             }
  205.           });
  206.           logger.info(language + " No new post, retry");
  207.           if(!email_sending_status) {
  208.             scanForNewPost(language);
  209.           }
  210.       } else {
  211.           logger.error(language + " No new articles, scan again!");
  212.           clearAllTimeouts();
  213.           setTimeout(scanForNewPost, TIMEOUT_FOR_RECONNECTION*1000, language);
  214.       }
  215.     }
  216.     else {
  217.       logger.error(language + " Can't fetch page for new posts. Probably banned?");
  218.       clearAllTimeouts();
  219.       setTimeout(scanForNewPost, TIMEOUT_FOR_RECONNECTION*1000, language);
  220.     }
  221.   })
  222. }
  223. var scanForNewCode = (url, language = 'vi') => {
  224.   var uri = 'https://onmyojiguide.com/' + url;
  225.   console.log('Scanning for new code in url', uri);
  226.   request({
  227.     method: 'get',
  228.     uri: uri,
  229.     headers: global_headers
  230.   }, function(e, r, b) {
  231.     if(!e) {
  232.       var content = b.match(/<div class="article_content">[^]*<div id="menu_container" class="noprint">/g);
  233.      
  234.       if(content && content[0]) {
  235.        if(content && content[0]) {
  236.         var code = findCodeInString(content[0]);
  237.         if(code && code != "OLDCODE" && !email_sending_status) {
  238.           email_sending_status = true;
  239.           message.subject = code;
  240.           transporter.sendMail(message, function(err, message) {
  241.             console.log(err || message);
  242.           });
  243.         }
  244.         else {
  245.           logger.info(language + " No code in post");
  246.         }
  247.       }
  248.     } else {
  249.       logger.error(language + " Can't fetch article. Probably blocked?");
  250.       clearAllTimeouts();
  251.       setTimeout(scanForNewPost, TIMEOUT_FOR_RECONNECTION*1000, language);
  252.     }
  253.   }
  254.   else {
  255.       logger.error(language + " Can't fetch article. Probably blocked?");
  256.       clearAllTimeouts();
  257.       setTimeout(scanForNewPost, TIMEOUT_FOR_RECONNECTION*1000, language);
  258.     }
  259.   })
  260. }
  261. var findCodeInString = (s) => {
  262.   var code = s.match(/[a-z0-9]{12}/g);
  263.   if(code && code[0]) {
  264.     if (!isNaN(code[0])) {
  265.       return false;
  266.     }
  267.     console.log(code[0]);
  268.     if(previousCode.indexOf(code[0]) == -1) {
  269.       return code[0];
  270.     }
  271.     else {
  272.       return "OLDCODE";
  273.     }
  274.   }
  275.   else {
  276.     return false;
  277.   }
  278. }
  279. var prepare = () => {
  280.   if(!global_headers.cookie) {
  281.     setTimeout(prepare, 1500);
  282.   }
  283.   else {
  284.     run();
  285.   }
  286. }
  287. var run = () => {
  288.   scanForNewPost('vi');
  289. }
  290.  
  291.  
  292.  
  293.  
  294. console.log('~~~~~ Started the working.. ~~~~~');
  295.  
  296.  
  297. var retryGettingCookie = () => {
  298.   console.log(`Can not get the result successfully!! Will try again after ${TIMEOUT_FOR_RECONNECTION} seconds`);
  299.   logger.error(`Can not get the result successfully!! Will try again after ${TIMEOUT_FOR_RECONNECTION} seconds`);
  300.   setTimeout(() => {
  301.     startWorking();
  302.   }, 1000 * TIMEOUT_FOR_RECONNECTION);
  303. }
  304.  
  305. var startWorking = () => {
  306.   getCookie();
  307.   prepare();
  308. }
  309.  
  310.  
  311.  
  312. startWorking();
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement