Advertisement
Guest User

Untitled

a guest
May 3rd, 2016
55
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.84 KB | None | 0 0
  1. var express = require('express');
  2. var app = express();
  3. var cheerio = require('cheerio');
  4. var request = require('request');
  5. var async = require('async');
  6. var fs = require('fs');
  7.  
  8. app.use(express.static(__dirname + '/public'));
  9.  
  10. app.get('/', function(req,res){
  11. res.sendfile(__dirname+'/index.html');
  12. });
  13.  
  14. app.get('/process_get', function(req, res) {
  15. var artist = req.query.artist; //get from html form
  16. var words = ""; //words from each song in string
  17. var counter = {}; //analyze words and put them into json form
  18.  
  19. res.writeHead(200, {'Content-Type': 'text/html'});
  20. res.write("<h1>"+artist+"</h1>");
  21.  
  22. var artistList = "http://www.metrolyrics.com/"+artist+"-alpage-1.html";
  23. var nextUrl = artistList;
  24.  
  25.  
  26. async.whilst(function(){ return nextUrl != "javascript:void(0)" && nextUrl !== undefined;},
  27. function(next){
  28. request(nextUrl,function(err,resp,html){
  29. if(!err){
  30. var $ = cheerio.load(html);
  31. var urls = $(".songs-table.compact a");
  32.  
  33.  
  34. async.each(urls,function(url,doneCallback){
  35. var urll = $(url).attr("href");
  36. var title = $(url).attr('title').toLowerCase();
  37. if(title.indexOf(artist.replace(/-/g," ").toLowerCase()) > -1){
  38. console.log(title+" <br>");
  39. request(urll,function(err, resp, html) {
  40. if(!err){
  41. var $page = cheerio.load(html);
  42. words += $page(".js-lyric-text").text() + " ";
  43. }
  44. });
  45. }
  46. return doneCallback();
  47. });
  48. }
  49.  
  50. nextUrl = $('.button.next').attr("href");
  51. next();
  52. });
  53. }, function(err){
  54. if (!err){
  55. var cleaned = clean(words, counter);
  56. for(var i = 0; i < cleaned.length; i++){
  57. res.write(cleaned[i].word + " : ");
  58. res.write(cleaned[i].count+"<br>");
  59. }
  60. res.end();
  61. }
  62. });
  63. });
  64.  
  65. app.listen(process.env.PORT, process.env.IP, function(){
  66. console.log("Server listening...");
  67. });
  68.  
  69. var clean = function(words,counter){
  70. counter = {};
  71. words = words.replace(/\s+/g, " ").replace(/[^a-zA-Z ]/g, "").toLowerCase();
  72.  
  73.  
  74. var data = fs.readFileSync('common.txt', 'utf8').toString().split("\n");
  75. for(var i in data){
  76. var re = new RegExp(" "+data[i]+" ", 'g');
  77. words = words.replace(re, ' ');
  78. }
  79.  
  80. console.log(words);
  81.  
  82. words.split(" ").forEach(function (word) {
  83. if (word.length > 20) {
  84. return;
  85. }
  86. if(counter[word]) {
  87. counter[word]++;
  88. } else {
  89. counter[word] = 1;
  90. }
  91. });
  92. var count = [];
  93. for (var e in counter) {
  94. count.push({
  95. word: e,
  96. count: counter[e]
  97. });
  98. }
  99. count.sort(function (a, b) {
  100. return b.count - a.count;
  101. });
  102.  
  103. return count.slice(0, 30);
  104. };
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement