Advertisement
nagrizolich

Untitled

Oct 3rd, 2017
112
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. "use strict";
  2.  
  3. require("babel-core/register");
  4.  
  5. import fetch from "node-fetch";
  6. import striptags from "striptags";
  7. import co from "co";
  8. import _ from "underscore";
  9. import chalk from "chalk";
  10. import mongodb from "mongodb";
  11. import crypto from 'crypto';
  12. import VKApi from 'node-vkapi';
  13.  
  14. const mongoUrl = 'mongodb://localhost:27017/bugurt';
  15. const keyWords = ["butthurt", "бугурт", "буггурт", "баттхерт", "батхерт"];
  16. const host = "http://2ch.hk";
  17. const ext = ".json";
  18. const mainLoopIntervalMin = 10;
  19. const poolLoopIntervalSec = 5;
  20.  
  21. const VK = new VKApi({
  22.     app: {
  23.         id: 1,
  24.         secret: ''
  25.     },
  26.     auth: {
  27.         login: '',
  28.         pass: ''
  29.     }
  30. });
  31.  
  32. let threadsPool = [];
  33. let bugurtDb = null;
  34.  
  35. let checkCaps = (str) => striptags(str).toUpperCase() === striptags(str);
  36. let countDogs = (str) => (str.match(/\@/g) || []).length;
  37. let handleError = (error) => console.log(error);
  38. let cleanString = (str) => striptags(str).replace(/@/g, "\n@\n").replace(/>>[0-9]*/g, "");
  39.  
  40. let detectButthurt = (thread) => {
  41.     let expr = new RegExp(keyWords.join("|"));
  42.     let dogsOut = countDogs(thread.comment) > 0;
  43.     let capsOn = checkCaps(thread.comment);
  44.     let subjContainsKw = expr.test(thread.subject.toLowerCase());
  45.     let hasText = thread.comment.length > 0;
  46.  
  47.     return hasText && (subjContainsKw || capsOn || dogsOut);
  48. };
  49.  
  50. let fetchThreadsLight = function*(url) {
  51.     let response = yield fetch(url);
  52.     let parsedResponse = yield response.json();
  53.     return parsedResponse.threads;
  54. };
  55.  
  56. let findSuspiciousThreads = (threads) => {
  57.     let suspiciousThreads = [];
  58.  
  59.     for (let thread of threads) {
  60.         let butthurtDetected = detectButthurt(thread);
  61.         if (butthurtDetected) {
  62.             suspiciousThreads.push(thread);
  63.         }
  64.     }
  65.     return suspiciousThreads;
  66. };
  67.  
  68. let getSuspiciousThreads = function*() {
  69.     let threads = [];
  70.  
  71.     // threads w/o posts
  72.     threads = yield * fetchThreadsLight(host + "/b/threads" + ext);
  73.  
  74.     return findSuspiciousThreads(threads);
  75. };
  76.  
  77. let getFullThreads = (threads) => {
  78.     let res = [];
  79.     for (let thread of threads) {
  80.         let tid = thread.num || thread.current_thread;
  81.         let url = host + "/b/res/" + tid + ext;
  82.         res.push(fetch(url));
  83.     }
  84.     return Promise.all(res);
  85. };
  86.  
  87. let parseLoadedThreads = (threads) => {
  88.     let res = [];
  89.  
  90.     for (let thread of threads) {
  91.         if (+thread.status === 200) {
  92.             res.push(thread.json());
  93.         }
  94.     }
  95.     return Promise.all(res);
  96. };
  97.  
  98. let improvedButthurtDetector = (thread) => {
  99.     // 0 post is OP post
  100.     // thread.threads[0].posts;
  101.     let dogs = 0;
  102.     for (let post of thread.threads[0].posts) {
  103.         dogs += countDogs(post.comment);
  104.     }
  105.     return dogs > 3;
  106. };
  107.  
  108. let addThreadsToPool = (threads) => {
  109.     for (let thread of threads) {
  110.         let notInPool = !findItemById(threadsPool, thread.current_thread, 'current_thread');
  111.         if (notInPool && thread.butthurtDetected) {
  112.             console.log('not in pool')
  113.             threadsPool.push(thread);
  114.         }
  115.     }
  116.     return threads;
  117. };
  118.  
  119. let detectButthurtThreads = (parsedThreads) => {
  120.     for (let thread of parsedThreads) {
  121.         thread.butthurtDetected = improvedButthurtDetector(thread);
  122.     }
  123.  
  124.     return _.filter(parsedThreads, (thread) => thread.butthurtDetected);
  125. };
  126.  
  127. let findButthurtPosts = (posts) => {
  128.     return _.filter(posts, (post) => countDogs(post.comment) > 0);
  129. };
  130.  
  131. let findItemById = (items, id, sourceIdName) => {
  132.     return _.find(items, (item) => +item[sourceIdName] === +id);
  133. };
  134.  
  135. let checkUniqButthurt = (butthurt) => {
  136.     return new Promise((resolve, reject) => {
  137.         bugurtDb.collection('docs').findOne({ _id: butthurt.bid }, (err, res) => {
  138.             butthurt.dbCheckDone = true;
  139.             console.log('db check reporting: butthurt ' + butthurt.bid + ' exists? -' + !!(res));
  140.             resolve(res || butthurt);
  141.         });
  142.     });
  143. };
  144.  
  145. let removeDuplicateButthurts = (freshPool) => {
  146.     let checkQueries = [];
  147.     let butthurts = [];
  148.  
  149.     console.log('fresh pool length ' + freshPool.length)
  150.  
  151.     for (let thread of freshPool) {
  152.         butthurts.push(...findButthurtPosts(thread.threads[0].posts));
  153.     }
  154.  
  155.     for (let butthurt of butthurts) {
  156.         if (!butthurt.dbCheckDone) {
  157.             let md5 = crypto.createHash('md5');
  158.             butthurt.bid = md5.update(butthurt.comment).digest('hex');
  159.             console.log('db check new butthurt ' + butthurt.bid);
  160.             checkQueries.push(checkUniqButthurt(butthurt));
  161.         }
  162.     }
  163.    
  164.     return Promise.all(checkQueries).then((res) => _.filter(res, (butthurt) => butthurt.bid && !butthurt._id));
  165. };
  166.  
  167. let postToVk = (butthurt) => {
  168.     VK.auth.user({scope: ['wall']}).then((token) => {
  169.         return VK.call('wall.post', {
  170.             owner_id: -133063215,
  171.             friends_only: 0,
  172.             from_group: 1,
  173.             message: cleanString(butthurt.comment)
  174.         }).catch((error) => console.log(error));
  175.     });
  176. };
  177.  
  178. let storeUniqButthurts = (uniqButthurts) => {
  179.     let queries = [];
  180.  
  181.     console.log('new butthurts ' + uniqButthurts.length);
  182.  
  183.     for (let uniqButthurt of uniqButthurts) {
  184.         let bt = { _id: uniqButthurt.bid,
  185.             comment: uniqButthurt.comment,
  186.             published_at: +new Date()
  187.         };
  188.         //console.log("-----------------------------------");
  189.         //console.log(chalk.yellow.bold(cleanString(bt.comment)));
  190.         //console.log("-----------------------------------");
  191.         queries.push(new Promise((resolve, reject) => {
  192.             bugurtDb.collection('docs').insertOne(bt, () => {
  193.                 //postToVk(bt);
  194.                 resolve();
  195.             });
  196.         }));
  197.     }
  198.  
  199.     return Promise.all(queries);
  200. };
  201.  
  202. let pushNewPosts = (freshThread, threadInPool) => {
  203.     for (let parsedPost of freshThread.threads[0].posts) {
  204.         //let postFound = _.find(threadInPool.threads[0].posts, (p) => +p.num === +parsedPost.num);
  205.         let postFound = findItemById(threadInPool.threads[0].posts, parsedPost.num, 'num');
  206.         if (!postFound) {
  207.             threadInPool.threads[0].posts.push(parsedPost);
  208.         }
  209.     }
  210. };
  211.  
  212. let refreshThreadsPool = (parsedThreads) => {
  213.     console.log('pool before refresh: ' + threadsPool.length);
  214.     for (let parsedThread of parsedThreads) {
  215.         let threadInPool = findItemById(threadsPool, parsedThread.current_thread, 'current_thread');
  216.         if (!threadInPool) {
  217.             threadsPool.push(parsedThread);
  218.         } else {
  219.             pushNewPosts(parsedThread, threadInPool);
  220.         }
  221.     }
  222.     threadsPool = _.filter(threadsPool, (threadInPool) => {
  223.         return findItemById(parsedThreads, threadInPool.current_thread, 'current_thread');
  224.     });
  225.     return threadsPool;
  226. };
  227.  
  228. let poolLoop = () => {
  229.     console.log("\nnew pool tick");;
  230.     console.log("pool length: " + threadsPool.length);;
  231.     if (threadsPool.length === 0) {
  232.         return;
  233.     }
  234.  
  235.     for (let thread of threadsPool) {
  236.         console.log(host + "/b/res/" + thread.current_thread + ".html");;
  237.     }
  238.  
  239.     getFullThreads(threadsPool).then((loadedThreads) => parseLoadedThreads(loadedThreads))
  240.         .then((parsedThreads) => refreshThreadsPool(parsedThreads))
  241.         .then((freshPool) => removeDuplicateButthurts(freshPool))
  242.         .then((newButthurts) => storeUniqButthurts(newButthurts))
  243.         .catch((error) => handleError(error));
  244. };
  245.  
  246. let mainLoop = (db) => {
  247.     console.log("\nnew main tick");
  248.     co(getSuspiciousThreads).then((suspiciousThreads) => getFullThreads(suspiciousThreads))
  249.         .then((loadedThreads) => parseLoadedThreads(loadedThreads))
  250.         .then((parsedThreads) => detectButthurtThreads(parsedThreads))
  251.         .then((butthurtThreads) => addThreadsToPool(butthurtThreads))
  252.         .catch((error) => handleError(error));
  253. };
  254.  
  255. mongodb.MongoClient.connect(mongoUrl, (err, db) => {
  256.     bugurtDb = db;
  257.     mainLoop();
  258.     setInterval(mainLoop, 15 * 1000);
  259.     setInterval(poolLoop, poolLoopIntervalSec * 1000);
  260. });
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement