Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // Load config for mysql and other things
- var config = require('./config.json');
- // Load nodejs modules
- var mysql = require('mysql');
- var cheerio = require('cheerio');
- var sleep = require('sleep');
- var fs = require("fs");
- var async = require("async");
- var colors = require('colors');
- var each = require('async-each');
- var entities = require("entities");
- var moment = require("moment");
- // The offset of where to start when resuming
- var offset = 16000;
- var connection = mysql.createPool({
- connectionLimit : 10,
- host: "localhost",
- user: "root",
- password: "root",
- database: "Scratch_forums"
- });
- connection.on('enqueue', function () {
- //console.log('Waiting for available connection slot'.rainbow);
- });
- function addUser(author) {
- connection.query('SELECT count(*) FROM `user` where username=? ', [author], function (error, results, fields) {
- if (error) {
- console.log('Error: ' + error.message);
- }
- if (results[0]['count(*)'] == 0) {
- connection.query('INSERT INTO `user` (`username`) VALUES (?)', [author], function (err) {
- console.log(author.blue + " inserted into DB".yellow);
- });
- } else {
- console.log(author.blue + " already in DB".red);
- }
- });
- }
- function addPost(parentForum,childForum,author,post,time) {
- connection.query('SELECT count(*) FROM `posts` where `author` = ? and `postTime` = ? and `topic_name` = ? and `forum_name` = ? ', [author, time, childForum, parentForum], function (error, results, fields) {
- if (error) {
- console.log('Error: ' + error.message);
- }
- if (results[0]['count(*)'] == 0) {
- //console.log("inserting into db".yellow);
- connection.query('INSERT INTO `posts` (`author`,`post`,`postTime`,`topic_name`, `forum_name`) VALUES (?,?,?,?,?)', [author, post, time, childForum, parentForum], function (err) {
- console.log(author.magenta + "'s post inserted into DB".magenta);
- });
- } else {
- console.log('post already in db'.yellow)
- }
- });
- }
- function indexpage(threadID,pageID,cb) {
- console.log("Page ".blue+pageID.blue+ "being indexed in thread ".blue + threadID.blue);
- fs.readFile('../ScratchForumArchive/' + threadID + '/' + pageID, 'utf8', function (err, content) {
- $ = cheerio.load(content, {decodeEntities: true});
- //console.log('../ScratchForumArchive/' + threadID + '/' + pageID, content);
- var parentThread = $('.linkst ul').children('li').next().children('a').text();
- var threadName = $('.linkst ul').children('li').next().next().text().replace(' » ','');
- console.log(threadName.red + " in ".red + parentThread.red + "being indexed".red);
- $('.blockpost').each(function (i, e) {
- var time = $(this).children('h2').children('span').children('a').text();
- var post = $(this).children('.box').children('.inbox').children('.postright').children('.postmsg').text();
- var author = $(this).children('.box').children('.inbox').children('.postleft').children('dl').children('dt').text();
- //addUser(author);
- //console.log(moment(time, "YYYY-MM-DD HH:mm:ss").format(),time);
- addPost(parentThread,threadName,author,post.trim(),moment(time, "YYYY-MM-DD HH:mm:ss").format());
- });
- setTimeout(function(){
- cb();
- },200);
- });
- }
- function indexthread(threadID,cb) {
- fs.readdir('../ScratchForumArchive/' + threadID, function (err, pageID) {
- async.eachLimit(pageID, 1, function(id,cb) {
- indexpage(threadID,id,cb)
- },function() {
- cb();
- });
- });
- }
- fs.readdir('../ScratchForumArchive', function(err,threadID){
- async.eachLimit(threadID.slice(offset), 1,
- function(id,cb) {
- console.log("Starting work on item ".black.bgRed +threadID.indexOf(id).toString().black.bgRed);
- indexthread(id,cb)
- }
- );
- });
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement