Guest User

Untitled

a guest
Nov 27th, 2016
49
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. "use strict";
  2.  
  3. // Modules
  4. var request = require("request");
  5. var readline = require("readline");
  6. var os = require("os");
  7. var fs = require("fs-extra");
  8. var Crawler = require("crawler");
  9. var url = require("url");
  10. var Promise = require("bluebird");
  11.  
  12. // Constant
  13. const rl = readline.createInterface({
  14.   input: process.stdin,
  15.   output: process.stdout
  16. });
  17. const redditUrl = "https://www.reddit.com/user/";
  18.  
  19. // Variables
  20.  
  21. var usernameUrl = "";
  22. var keywords = [];
  23.  
  24. var searchedArray = [];
  25.  
  26. console.log("Reddit profile scraper... case insensitive and search is based off space delimeter (typos will not appear in filter)");
  27.  
  28. // Crawler
  29. var c = new Crawler({
  30.   maxConnections: 10,
  31.   callback: function(error, result, $) {
  32.     new Promise(function(resolve, reject) {
  33.  
  34.       $("a.title").each(function(index, a) {
  35.         for (var i = 0; i < keywords.length; i++) {
  36.           if ($(a).text().toLowerCase().search(new RegExp("(^| )" + keywords[i] +"(?![^\\W])")) !== -1) {
  37.             searchedArray.push({
  38.               "url": $(a).attr("href"),
  39.               "matchType": "Title",
  40.               "matchContent": $(a).text()
  41.             });
  42.           }
  43.         }
  44.       });
  45.  
  46.       $(".usertext-body").each(function(index, a) {
  47.         for (var i = 0; i < keywords.length; i++) {
  48.           if ($(a).text().toLowerCase().search(new RegExp("(^| )" + keywords[i] +"(?![^\\W])")) !== -1) {
  49.         searchedArray.push({
  50.           "url": $(a).parent().parent().parent().find(".title").attr("href"),
  51.           "matchType": "Comment",
  52.           "matchContent": $(a).text()
  53.         });
  54.       }
  55.     }
  56.       });
  57.  
  58.       for (var i = 0; i < searchedArray.length; i++) {
  59.         console.log("URL: " + searchedArray[i].url);
  60.         console.log("MatchType: " + searchedArray[i].matchType);
  61.         console.log("Content: " + searchedArray[i].matchContent);
  62.       }
  63.  
  64.       searchedArray = [];
  65.       resolve($(".next-button a").attr("href"));
  66.     }).then(function(url){
  67.         c.queue(url);
  68.       });
  69.   }
  70. });
  71.  
  72. function askQuestion() {
  73.   searchedArray = [];
  74.   rl.question("Search username: ", function(usernameInput) {
  75.     if (usernameInput) {
  76.       usernameUrl = redditUrl + usernameInput;
  77.       rl.question("Search keywords separated by comma: ", function(keywordsInput) {
  78.         if (keywords) {
  79.           keywords = keywordsInput.split(",");
  80.           console.log("Starting to search " + usernameInput + "...");
  81.           c.queue(usernameUrl);
  82.         } else process.exit(1);
  83.       });
  84.     } else process.exit(1);
  85.   });
  86. }
  87.  
  88. askQuestion();
Add Comment
Please, Sign In to add comment