Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- var express = require('express');
- var app = express();
- var cheerio = require('cheerio');
- var request = require('request');
- var async = require('async');
- var fs = require('fs');
- app.use(express.static(__dirname + '/public'));
- app.get('/', function(req,res){
- res.sendfile(__dirname+'/index.html');
- });
- app.get('/process_get', function(req, res) {
- var artist = req.query.artist; //get from html form
- var words = ""; //words from each song in string
- var counter = {}; //analyze words and put them into json form
- res.writeHead(200, {'Content-Type': 'text/html'});
- res.write("<h1>"+artist+"</h1>");
- var artistList = "http://www.metrolyrics.com/"+artist+"-alpage-1.html";
- var nextUrl = artistList;
- async.whilst(function(){ return nextUrl != "javascript:void(0)" && nextUrl !== undefined;},
- function(next){
- request(nextUrl,function(err,resp,html){
- if(!err){
- var $ = cheerio.load(html);
- var urls = $(".songs-table.compact a");
- async.each(urls,function(url,doneCallback){
- var urll = $(url).attr("href");
- var title = $(url).attr('title').toLowerCase();
- if(title.indexOf(artist.replace(/-/g," ").toLowerCase()) > -1){
- console.log(title+" <br>");
- request(urll,function(err, resp, html) {
- if(!err){
- var $page = cheerio.load(html);
- words += $page(".js-lyric-text").text() + " ";
- }
- });
- }
- return doneCallback();
- });
- }
- nextUrl = $('.button.next').attr("href");
- next();
- });
- }, function(err){
- if (!err){
- var cleaned = clean(words, counter);
- for(var i = 0; i < cleaned.length; i++){
- res.write(cleaned[i].word + " : ");
- res.write(cleaned[i].count+"<br>");
- }
- res.end();
- }
- });
- });
- app.listen(process.env.PORT, process.env.IP, function(){
- console.log("Server listening...");
- });
- var clean = function(words,counter){
- counter = {};
- words = words.replace(/\s+/g, " ").replace(/[^a-zA-Z ]/g, "").toLowerCase();
- var data = fs.readFileSync('common.txt', 'utf8').toString().split("\n");
- for(var i in data){
- var re = new RegExp(" "+data[i]+" ", 'g');
- words = words.replace(re, ' ');
- }
- console.log(words);
- words.split(" ").forEach(function (word) {
- if (word.length > 20) {
- return;
- }
- if(counter[word]) {
- counter[word]++;
- } else {
- counter[word] = 1;
- }
- });
- var count = [];
- for (var e in counter) {
- count.push({
- word: e,
- count: counter[e]
- });
- }
- count.sort(function (a, b) {
- return b.count - a.count;
- });
- return count.slice(0, 30);
- };
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement