Advertisement
Guest User

Untitled

a guest
Nov 13th, 2016
217
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. "use strict";
  2.  
  3. require("babel-core/register");
  4.  
  5. import fetch from "node-fetch";
  6. import striptags from "striptags";
  7. import co from "co";
  8. import _ from "underscore";
  9. import chalk from "chalk";
  10. import mongodb from "mongodb";
  11. import crypto from 'crypto';
  12.  
  13. const mongoUrl = 'mongodb://localhost:27017/bugurt';
  14. const keyWords = ["butthurt", "бугурт", "буггурт", "баттхерт", "батхерт"];
  15. const host = "http://2ch.hk";
  16. const ext = ".json";
  17. const mainLoopIntervalMin = 10;
  18. const poolLoopIntervalSec = 5;
  19.  
  20. let threadsPool = [];
  21. let shownButthurts = [];
  22. let bugurtDb = null;
  23.  
  24. let checkCaps = (str) => striptags(str).toUpperCase() === striptags(str);
  25. let countDogs = (str) => (str.match(/\@/g) || []).length;
  26. let handleError = (error) => console.log(error);
  27. let cleanString = (str) => striptags(str).replace(/@/g, "\n@\n").replace(/>>[0-9]*/g, "");
  28.  
  29. let detectButthurt = (thread) => {
  30.     let expr = new RegExp(keyWords.join("|"));
  31.     let dogsOut = countDogs(thread.comment) > 0;
  32.     let capsOn = checkCaps(thread.comment);
  33.     let subjContainsKw = expr.test(thread.subject.toLowerCase());
  34.     let hasText = thread.comment.length > 0;
  35.  
  36.     return hasText && (subjContainsKw || capsOn || dogsOut);
  37. };
  38.  
  39. let fetchThreadsLight = function*(url) {
  40.     let response = yield fetch(url);
  41.     let parsedResponse = yield response.json();
  42.     return parsedResponse.threads;
  43. };
  44.  
  45. let findSuspiciousThreads = (threads) => {
  46.     let suspiciousThreads = [];
  47.  
  48.     for (let thread of threads) {
  49.         let butthurtDetected = detectButthurt(thread);
  50.         if (butthurtDetected) {
  51.             suspiciousThreads.push(thread);
  52.         }
  53.     }
  54.     return suspiciousThreads;
  55. };
  56.  
  57. let getSuspiciousThreads = function*() {
  58.     let threads = [];
  59.  
  60.     // threads w/o posts
  61.     threads = yield * fetchThreadsLight(host + "/b/threads" + ext);
  62.  
  63.     return findSuspiciousThreads(threads);
  64. };
  65.  
  66. let getFullThreads = co.wrap(function*(threads) {
  67.     let res = [];
  68.     for (let thread of threads) {
  69.         let tid = thread.num || thread.current_thread;
  70.         let url = host + "/b/res/" + tid + ext;
  71.         res.push(fetch(url));
  72.     }
  73.     return yield res;
  74. });
  75.  
  76. let parseLoadedThreads = co.wrap(function*(threads) {
  77.     let res = [];
  78.  
  79.     for (let thread of threads) {
  80.         if (+thread.status === 200) {
  81.             res.push(thread.json());
  82.         }
  83.     }
  84.     return yield res;
  85. });
  86.  
  87. let improvedButthurtDetector = (thread) => {
  88.     // 0 post is OP post
  89.     // thread.threads[0].posts;
  90.     let dogs = 0;
  91.     for (let post of thread.threads[0].posts) {
  92.         dogs += countDogs(post.comment);
  93.     }
  94.     return dogs > 3;
  95. };
  96.  
  97. let addThreadsToPool = (threads) => {
  98.     for (let thread of threads) {
  99.         let notInPool = !findItemByPropMatch(threadsPool, thread.current_thread, 'current_thread');
  100.         if (notInPool && thread.butthurtDetected) {
  101.             console.log('not in pool')
  102.             threadsPool.push(thread);
  103.         }
  104.     }
  105.     return threads;
  106. };
  107.  
  108. let detectButthurtThreads = (parsedThreads) => {
  109.     for (let thread of parsedThreads) {
  110.         thread.butthurtDetected = improvedButthurtDetector(thread);
  111.     }
  112.  
  113.     return _.filter(parsedThreads, (thread) => thread.butthurtDetected);
  114. };
  115.  
  116. let findButthurtPosts = (posts) => {
  117.     return _.filter(posts, (post) => {
  118.         return countDogs(post.comment) > 0;
  119.     });
  120. };
  121.  
  122. let findItemByPropMatch = (items, sourcePropVal, propName) => {
  123.     return _.find(items, (item) => +item[propName] === +sourcePropVal);
  124. };
  125.  
  126. let checkUniqButthurt = (butthurt) => {
  127.     return new Promise((resolve, reject) => {
  128.         bugurtDb.collection('docs').findOne({ _id: butthurt.bid }, (err, res) => {
  129.             butthurt.dbCheckDone = true;
  130.             console.log('db check reporting: butthurt ' + butthurt.bid + ' exists? -' + !!(res));
  131.             resolve(res || butthurt);
  132.         });
  133.     });
  134. };
  135.  
  136. let removeDuplicateButthurts = (freshPool) => {
  137.     let checkQueries = [];
  138.     let butthurts = [];
  139.  
  140.     for (let thread of freshPool) {
  141.         butthurts.push(...findButthurtPosts(thread.threads[0].posts));
  142.     }
  143.  
  144.     for (let butthurt of butthurts) {
  145.         if (!butthurt.dbCheckDone) {
  146.             let md5 = crypto.createHash('md5');
  147.             butthurt.bid = md5.update(butthurt.comment).digest('hex');
  148.             console.log('db check new butthurt ' + butthurt.bid);
  149.             checkQueries.push(checkUniqButthurt(butthurt));
  150.         }
  151.     }
  152.    
  153.     return Promise.all(checkQueries).then((res) => _.filter(res, (butthurt) => butthurt.bid && !butthurt._id));
  154. };
  155.  
  156. let storeUniqButthurts = (uniqButthurts) => {
  157.     let queries = [];
  158.  
  159.     console.log('new butthurts ' + uniqButthurts.length);
  160.  
  161.     for (let uniqButthurt of uniqButthurts) {
  162.         let bt = { _id: uniqButthurt.bid, comment: uniqButthurt.comment };
  163.         console.log("-----------------------------------");
  164.         console.log(chalk.yellow.bold(cleanString(bt.comment)));
  165.         console.log("-----------------------------------");
  166.         queries.push(new Promise((resolve, reject) => {
  167.             bugurtDb.collection('docs').insertOne(bt, () => resolve());
  168.         }));
  169.     }
  170.  
  171.     return Promise.all(queries);
  172. };
  173.  
  174. let pushNewPosts = (freshThread, threadInPool) => {
  175.     for (let parsedPost of freshThread.threads[0].posts) {
  176.         //let postFound = _.find(threadInPool.threads[0].posts, (p) => +p.num === +parsedPost.num);
  177.         let postFound = findItemByPropMatch(threadInPool.threads[0].posts, parsedPost.num, 'num');
  178.         if (!postFound) {
  179.             threadInPool.threads[0].posts.push(parsedPost);
  180.         }
  181.     }
  182. };
  183.  
  184. let refreshThreadsPool = (parsedThreads) => {
  185.     for (let parsedThread of parsedThreads) {
  186.         let threadInPool = findItemByPropMatch(threadsPool, parsedThread.current_thread, 'current_thread');
  187.         if (!threadInPool) {
  188.             threadsPool.push(parsedThread);
  189.         } else {
  190.             pushNewPosts(parsedThread, threadInPool);
  191.         }
  192.     }
  193.     return threadsPool;
  194. };
  195.  
  196. let poolLoop = () => {
  197.     console.log("\nnew pool tick");;
  198.     console.log("pool length: " + threadsPool.length);;
  199.     if (threadsPool.length === 0) {
  200.         return;
  201.     }
  202.  
  203.     for (let thread of threadsPool) {
  204.         console.log(host + "/b/res/" + thread.current_thread + ".html");;
  205.     }
  206.  
  207.     getFullThreads(threadsPool).then((loadedThreads) => parseLoadedThreads(loadedThreads))
  208.         .then((parsedThreads) => refreshThreadsPool(parsedThreads))
  209.         .then((freshPool) => removeDuplicateButthurts(freshPool))
  210.         .then((newButthurts) => storeUniqButthurts(newButthurts))
  211.         .catch((error) => handleError(error));
  212. };
  213.  
  214. let mainLoop = (db) => {
  215.     console.log("\nnew main tick");
  216.     co(getSuspiciousThreads).then((suspiciousThreads) => getFullThreads(suspiciousThreads))
  217.         .then((loadedThreads) => parseLoadedThreads(loadedThreads))
  218.         .then((parsedThreads) => detectButthurtThreads(parsedThreads))
  219.         .then((butthurtThreads) => addThreadsToPool(butthurtThreads))
  220.         .catch((error) => handleError(error));
  221. };
  222.  
  223. mongodb.MongoClient.connect(mongoUrl, (err, db) => {
  224.     bugurtDb = db;
  225.     mainLoop();
  226.     setInterval(mainLoop, 15 * 1000);
  227.     setInterval(poolLoop, poolLoopIntervalSec * 1000);
  228. });
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement