Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- "use strict";
- require("babel-core/register");
- import fetch from "node-fetch";
- import striptags from "striptags";
- import co from "co";
- import _ from "underscore";
- import chalk from "chalk";
- import mongodb from "mongodb";
- import crypto from 'crypto';
- import VKApi from 'node-vkapi';
- const mongoUrl = 'mongodb://localhost:27017/bugurt';
- const keyWords = ["butthurt", "бугурт", "буггурт", "баттхерт", "батхерт"];
- const host = "http://2ch.hk";
- const ext = ".json";
- const mainLoopIntervalMin = 10;
- const poolLoopIntervalSec = 5;
- const VK = new VKApi({
- app: {
- id: 1,
- secret: ''
- },
- auth: {
- login: '',
- pass: ''
- }
- });
- let threadsPool = [];
- let bugurtDb = null;
- let checkCaps = (str) => striptags(str).toUpperCase() === striptags(str);
- let countDogs = (str) => (str.match(/\@/g) || []).length;
- let handleError = (error) => console.log(error);
- let cleanString = (str) => striptags(str).replace(/@/g, "\n@\n").replace(/>>[0-9]*/g, "");
- let detectButthurt = (thread) => {
- let expr = new RegExp(keyWords.join("|"));
- let dogsOut = countDogs(thread.comment) > 0;
- let capsOn = checkCaps(thread.comment);
- let subjContainsKw = expr.test(thread.subject.toLowerCase());
- let hasText = thread.comment.length > 0;
- return hasText && (subjContainsKw || capsOn || dogsOut);
- };
- let fetchThreadsLight = function*(url) {
- let response = yield fetch(url);
- let parsedResponse = yield response.json();
- return parsedResponse.threads;
- };
- let findSuspiciousThreads = (threads) => {
- let suspiciousThreads = [];
- for (let thread of threads) {
- let butthurtDetected = detectButthurt(thread);
- if (butthurtDetected) {
- suspiciousThreads.push(thread);
- }
- }
- return suspiciousThreads;
- };
- let getSuspiciousThreads = function*() {
- let threads = [];
- // threads w/o posts
- threads = yield * fetchThreadsLight(host + "/b/threads" + ext);
- return findSuspiciousThreads(threads);
- };
- let getFullThreads = (threads) => {
- let res = [];
- for (let thread of threads) {
- let tid = thread.num || thread.current_thread;
- let url = host + "/b/res/" + tid + ext;
- res.push(fetch(url));
- }
- return Promise.all(res);
- };
- let parseLoadedThreads = (threads) => {
- let res = [];
- for (let thread of threads) {
- if (+thread.status === 200) {
- res.push(thread.json());
- }
- }
- return Promise.all(res);
- };
- let improvedButthurtDetector = (thread) => {
- // 0 post is OP post
- // thread.threads[0].posts;
- let dogs = 0;
- for (let post of thread.threads[0].posts) {
- dogs += countDogs(post.comment);
- }
- return dogs > 3;
- };
- let addThreadsToPool = (threads) => {
- for (let thread of threads) {
- let notInPool = !findItemById(threadsPool, thread.current_thread, 'current_thread');
- if (notInPool && thread.butthurtDetected) {
- console.log('not in pool')
- threadsPool.push(thread);
- }
- }
- return threads;
- };
- let detectButthurtThreads = (parsedThreads) => {
- for (let thread of parsedThreads) {
- thread.butthurtDetected = improvedButthurtDetector(thread);
- }
- return _.filter(parsedThreads, (thread) => thread.butthurtDetected);
- };
- let findButthurtPosts = (posts) => {
- return _.filter(posts, (post) => countDogs(post.comment) > 0);
- };
- let findItemById = (items, id, sourceIdName) => {
- return _.find(items, (item) => +item[sourceIdName] === +id);
- };
- let checkUniqButthurt = (butthurt) => {
- return new Promise((resolve, reject) => {
- bugurtDb.collection('docs').findOne({ _id: butthurt.bid }, (err, res) => {
- butthurt.dbCheckDone = true;
- console.log('db check reporting: butthurt ' + butthurt.bid + ' exists? -' + !!(res));
- resolve(res || butthurt);
- });
- });
- };
- let removeDuplicateButthurts = (freshPool) => {
- let checkQueries = [];
- let butthurts = [];
- console.log('fresh pool length ' + freshPool.length)
- for (let thread of freshPool) {
- butthurts.push(...findButthurtPosts(thread.threads[0].posts));
- }
- for (let butthurt of butthurts) {
- if (!butthurt.dbCheckDone) {
- let md5 = crypto.createHash('md5');
- butthurt.bid = md5.update(butthurt.comment).digest('hex');
- console.log('db check new butthurt ' + butthurt.bid);
- checkQueries.push(checkUniqButthurt(butthurt));
- }
- }
- return Promise.all(checkQueries).then((res) => _.filter(res, (butthurt) => butthurt.bid && !butthurt._id));
- };
- let postToVk = (butthurt) => {
- VK.auth.user({scope: ['wall']}).then((token) => {
- return VK.call('wall.post', {
- owner_id: -133063215,
- friends_only: 0,
- from_group: 1,
- message: cleanString(butthurt.comment)
- }).catch((error) => console.log(error));
- });
- };
- let storeUniqButthurts = (uniqButthurts) => {
- let queries = [];
- console.log('new butthurts ' + uniqButthurts.length);
- for (let uniqButthurt of uniqButthurts) {
- let bt = { _id: uniqButthurt.bid,
- comment: uniqButthurt.comment,
- published_at: +new Date()
- };
- //console.log("-----------------------------------");
- //console.log(chalk.yellow.bold(cleanString(bt.comment)));
- //console.log("-----------------------------------");
- queries.push(new Promise((resolve, reject) => {
- bugurtDb.collection('docs').insertOne(bt, () => {
- //postToVk(bt);
- resolve();
- });
- }));
- }
- return Promise.all(queries);
- };
- let pushNewPosts = (freshThread, threadInPool) => {
- for (let parsedPost of freshThread.threads[0].posts) {
- //let postFound = _.find(threadInPool.threads[0].posts, (p) => +p.num === +parsedPost.num);
- let postFound = findItemById(threadInPool.threads[0].posts, parsedPost.num, 'num');
- if (!postFound) {
- threadInPool.threads[0].posts.push(parsedPost);
- }
- }
- };
- let refreshThreadsPool = (parsedThreads) => {
- console.log('pool before refresh: ' + threadsPool.length);
- for (let parsedThread of parsedThreads) {
- let threadInPool = findItemById(threadsPool, parsedThread.current_thread, 'current_thread');
- if (!threadInPool) {
- threadsPool.push(parsedThread);
- } else {
- pushNewPosts(parsedThread, threadInPool);
- }
- }
- threadsPool = _.filter(threadsPool, (threadInPool) => {
- return findItemById(parsedThreads, threadInPool.current_thread, 'current_thread');
- });
- return threadsPool;
- };
- let poolLoop = () => {
- console.log("\nnew pool tick");;
- console.log("pool length: " + threadsPool.length);;
- if (threadsPool.length === 0) {
- return;
- }
- for (let thread of threadsPool) {
- console.log(host + "/b/res/" + thread.current_thread + ".html");;
- }
- getFullThreads(threadsPool).then((loadedThreads) => parseLoadedThreads(loadedThreads))
- .then((parsedThreads) => refreshThreadsPool(parsedThreads))
- .then((freshPool) => removeDuplicateButthurts(freshPool))
- .then((newButthurts) => storeUniqButthurts(newButthurts))
- .catch((error) => handleError(error));
- };
- let mainLoop = (db) => {
- console.log("\nnew main tick");
- co(getSuspiciousThreads).then((suspiciousThreads) => getFullThreads(suspiciousThreads))
- .then((loadedThreads) => parseLoadedThreads(loadedThreads))
- .then((parsedThreads) => detectButthurtThreads(parsedThreads))
- .then((butthurtThreads) => addThreadsToPool(butthurtThreads))
- .catch((error) => handleError(error));
- };
- mongodb.MongoClient.connect(mongoUrl, (err, db) => {
- bugurtDb = db;
- mainLoop();
- setInterval(mainLoop, 15 * 1000);
- setInterval(poolLoop, poolLoopIntervalSec * 1000);
- });
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement