Guest User

Untitled

a guest
Jul 15th, 2018
105
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.17 KB | None | 0 0
  1. var request = require('request'),
  2. jsdom = require('jsdom'),
  3. net = require('net'),
  4. DocumentSaver = require('./scrobblepage.js'),
  5. filter = require('./documentfilter.js');
  6.  
  7.  
  8.  
  9. exports.handleRequests = function(response, URLlst){
  10. //Crawl a page, gather text, filter it
  11. //Send it to the server and get the topic representations back here.
  12. //Calculate the similarity based on topics and the representative words.
  13. //return similarity based on topics...
  14. this.URLList = URLlst;//ARRAY OF URL's
  15. this.response = response;
  16. //What is to be sent back on this response object.
  17. this.topicDist = {};//{'URL':'Topic Distribution'}
  18. this.repWords = {};//{'URL':'Rep words'}
  19. this.docStrings = {};//{'URL': 'word occurances'}
  20. this.docsDownLoaded =0;
  21. this.docsProcessed =0;
  22.  
  23. this.init = function(){
  24. //Make the URL List by crawling the pages
  25. //After crawling a page, filter it to make the word occurance thingy
  26. //TODO: Cache last 10000 documents in DB
  27. //TODO: How to regulate the traffic to the C++ server?
  28. //TODO: Will this scale as # of connection from the users go up?
  29. var self = this;
  30. for(u in self.URLList){
  31.  
  32. (function f(url, urlnum){
  33.  
  34. request({uri:url}, function (error, response, body) {
  35.  
  36. if (!error && response.statusCode == 200) {
  37.  
  38. var window = jsdom.jsdom(body).createWindow();
  39.  
  40. jsdom.jQueryify(window, 'jquery.min.js', function (window, jQuery) {
  41. var saver = new DocumentSaver();
  42. //get the text from the document
  43. saver.processDocument(window.document, function(text){
  44. self.docsDownLoaded++;
  45. console.log('Got some text...')
  46. filter.filterPage(text, function(wordOccurance){
  47. //console.log(wordOccurance);
  48. //got the text after filtering
  49. //Send it to C++ process
  50. self.calculateTopicDist(wordOccurance, urlnum);
  51. })
  52.  
  53. }, jQuery);
  54. });
  55. }else{
  56. //Increase document count
  57. console.log('Unprocessed doc:: ');
  58. console.log(url);
  59. self.docStrings[url] ='!!UNPROCESSED!!';
  60. self.docsProcessed++;
  61. }
  62. });
  63. })(self.URLList[u],u)
  64.  
  65. }
  66. }
  67.  
  68. this.calculateTopicDist = function(wordOccurance, urlnum){
  69.  
  70. var s = new net.Stream();
  71. var self = this;
  72. //Make a new connection and send it to the C++ process
  73. s.setEncoding('utf8');
  74. s.on('connect', function(){
  75.  
  76. console.log('***Sending data for document:'+urlnum);
  77. console.log(wordOccurance.length);
  78. s.write(wordOccurance, encoding='utf8');
  79.  
  80. })
  81.  
  82. //handler to recieve the data back after the inference. /Recieve the inference results.
  83. s.on('data', function(data){
  84.  
  85. console.log('*******DATA RECIEVED FOR:'+urlnum);
  86.  
  87. console.log(data);
  88. self.docStrings[self.URLList[urlnum]] = data.trim();
  89. self.docsProcessed++;
  90. if(self.docsProcessed > self.URLList.length-1){
  91. console.log(JSON.stringify(self.docStrings));
  92. self.response.writeHead(200, {'Content-Type': 'text/html'});
  93. self.response.write(JSON.stringify(self.docStrings), encoding ='utf8');
  94. self.response.end();
  95. }
  96. s.end();
  97. })
  98. s.on('close', function(had_error){
  99. console.log('Closed the stream');
  100. })
  101. s.connect('/tmp/ts1');
  102. }
  103.  
  104. }
Add Comment
Please, Sign In to add comment