Need a unique gift idea?
A Pastebin account makes a great Christmas gift
SHARE
TWEET

Untitled

a guest Sep 24th, 2018 56 Never
Upgrade to PRO!
ENDING IN00days00hours00mins00secs
 
  1. // Salita.js
  2. // Extracts words from dialogues.
  3. // Prerequisites: Node.js
  4. // How to run: `node salita.js <file>`
  5. // (c) 2018 Ned Palacios
  6.  
  7. const fs = require('fs');
  8.  
  9. // Blacklisted words
  10. const blacklist = ['ang', 'ang mga', 'si', 'sina', 'so', 'ah', 'ahh', 'at', 'kung', 'po', 'sa', 'ka', 'ko', 'ay', 'ba', 'na', 'ng', 'hay', 'eh', 'oo', 'o', 'op', 'nyo', 'mo', 'umm', 'may', 'oh', 'ok', 'kahit', 'duterte', 'bali', 'nalang', 'tsaka', 'mga', 'ninyo', 'niyo', 'ako', 'din', 'pero'];
  11. // Get contents of the file
  12. const textContents = fs.readFileSync(process.argv[2], 'utf8');
  13. // Gets only the sentence.
  14. const sentences = textContents.split('\n').map(s => s.split(': ')[1]);
  15.  
  16. // Sanitize: strips all symbols and blacklisted words
  17. function sanitize(sentence) {
  18.     // Sentence -> words
  19.     let words = sentence.split(' ');
  20.        
  21.     // Removes words from the blacklist and show only "truthy" values.
  22.     return words.map(word => {
  23.         return !blacklist.includes(word.replace(/[^a-zA-Z ]/g, "").toLowerCase()) && word.replace(/[^a-zA-Z ]/g, "");
  24.     }).filter(Boolean);
  25. }
  26.  
  27. // Analyze
  28. function analyze() {
  29.     // Extract all words from all sentences.
  30.     let wordsArray = sentences.map(s => sanitize(s));
  31.    
  32.     // This is where we will store the unique words.
  33.     let words = [];
  34.  
  35.     // Checks if the word exists. If the word exists, the word will not be pushed to the word array.
  36.     wordsArray.forEach(w => {
  37.         w.map(word => {
  38.             !words.includes(word.toLowerCase()) && words.push(word.toLowerCase());
  39.         });
  40.     });
  41.  
  42.     // Output final result to output.txt
  43.     fs.writeFileSync('./output.txt', words.join(', \n'), { encoding: 'utf-8' });
  44.    
  45.     // Indicates success.
  46.     console.log('Done.');
  47.     console.log(`Original: ${sentences.map(s => s.split(' ').length).reduce((p, v) => p + v)} Words`);
  48.     console.log(`Sanitized: ${wordsArray.map(w => w.length).reduce((p, v) => p + v)} Words`);
  49.     console.log(`Reduced to ${words.length} words; ${Math.floor(words.length/5)} per member`);
  50. }
  51.  
  52. // Execute
  53. analyze();
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top