Advertisement
Guest User

Untitled

a guest
Sep 23rd, 2016
75
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.98 KB | None | 0 0
  1. // Load config for mysql and other things
  2. var config = require('./config.json');
  3.  
  4. // Load nodejs modules
  5. var mysql = require('mysql');
  6. var cheerio = require('cheerio');
  7. var sleep = require('sleep');
  8. var fs = require("fs");
  9. var async = require("async");
  10. var colors = require('colors');
  11.  
  12. var each = require('async-each');
  13. var entities = require("entities");
  14. var moment = require("moment");
  15.  
  16.  
  17. // The offset of where to start when resuming
  18. var offset = 16000;
  19.  
  20. var connection = mysql.createPool({
  21. connectionLimit : 10,
  22. host: "localhost",
  23. user: "root",
  24. password: "root",
  25. database: "Scratch_forums"
  26. });
  27.  
  28. connection.on('enqueue', function () {
  29. //console.log('Waiting for available connection slot'.rainbow);
  30. });
  31.  
  32. function addUser(author) {
  33. connection.query('SELECT count(*) FROM `user` where username=? ', [author], function (error, results, fields) {
  34. if (error) {
  35. console.log('Error: ' + error.message);
  36. }
  37. if (results[0]['count(*)'] == 0) {
  38. connection.query('INSERT INTO `user` (`username`) VALUES (?)', [author], function (err) {
  39. console.log(author.blue + " inserted into DB".yellow);
  40. });
  41. } else {
  42. console.log(author.blue + " already in DB".red);
  43. }
  44. });
  45. }
  46.  
  47. function addPost(parentForum,childForum,author,post,time) {
  48. connection.query('SELECT count(*) FROM `posts` where `author` = ? and `postTime` = ? and `topic_name` = ? and `forum_name` = ? ', [author, time, childForum, parentForum], function (error, results, fields) {
  49. if (error) {
  50. console.log('Error: ' + error.message);
  51. }
  52. if (results[0]['count(*)'] == 0) {
  53. //console.log("inserting into db".yellow);
  54. connection.query('INSERT INTO `posts` (`author`,`post`,`postTime`,`topic_name`, `forum_name`) VALUES (?,?,?,?,?)', [author, post, time, childForum, parentForum], function (err) {
  55. console.log(author.magenta + "'s post inserted into DB".magenta);
  56. });
  57. } else {
  58. console.log('post already in db'.yellow)
  59. }
  60. });
  61. }
  62.  
  63. function indexpage(threadID,pageID,cb) {
  64. console.log("Page ".blue+pageID.blue+ "being indexed in thread ".blue + threadID.blue);
  65. fs.readFile('../ScratchForumArchive/' + threadID + '/' + pageID, 'utf8', function (err, content) {
  66. $ = cheerio.load(content, {decodeEntities: true});
  67. //console.log('../ScratchForumArchive/' + threadID + '/' + pageID, content);
  68. var parentThread = $('.linkst ul').children('li').next().children('a').text();
  69. var threadName = $('.linkst ul').children('li').next().next().text().replace(' » ','');
  70. console.log(threadName.red + " in ".red + parentThread.red + "being indexed".red);
  71. $('.blockpost').each(function (i, e) {
  72.  
  73. var time = $(this).children('h2').children('span').children('a').text();
  74.  
  75. var post = $(this).children('.box').children('.inbox').children('.postright').children('.postmsg').text();
  76. var author = $(this).children('.box').children('.inbox').children('.postleft').children('dl').children('dt').text();
  77. //addUser(author);
  78. //console.log(moment(time, "YYYY-MM-DD HH:mm:ss").format(),time);
  79. addPost(parentThread,threadName,author,post.trim(),moment(time, "YYYY-MM-DD HH:mm:ss").format());
  80. });
  81. setTimeout(function(){
  82. cb();
  83. },200);
  84. });
  85. }
  86.  
  87. function indexthread(threadID,cb) {
  88. fs.readdir('../ScratchForumArchive/' + threadID, function (err, pageID) {
  89. async.eachLimit(pageID, 1, function(id,cb) {
  90. indexpage(threadID,id,cb)
  91. },function() {
  92. cb();
  93. });
  94. });
  95. }
  96.  
  97. fs.readdir('../ScratchForumArchive', function(err,threadID){
  98. async.eachLimit(threadID.slice(offset), 1,
  99. function(id,cb) {
  100. console.log("Starting work on item ".black.bgRed +threadID.indexOf(id).toString().black.bgRed);
  101. indexthread(id,cb)
  102. }
  103. );
  104. });
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement