Guest User

Untitled

a guest
Sep 24th, 2018
104
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.99 KB | None | 0 0
  1. // Salita.js
  2. // Extracts words from dialogues.
  3. // Prerequisites: Node.js
  4. // How to run: `node salita.js <file>`
  5. // (c) 2018 Ned Palacios
  6.  
  7. const fs = require('fs');
  8.  
  9. // Blacklisted words
  10. const blacklist = ['ang', 'ang mga', 'si', 'sina', 'so', 'ah', 'ahh', 'at', 'kung', 'po', 'sa', 'ka', 'ko', 'ay', 'ba', 'na', 'ng', 'hay', 'eh', 'oo', 'o', 'op', 'nyo', 'mo', 'umm', 'may', 'oh', 'ok', 'kahit', 'duterte', 'bali', 'nalang', 'tsaka', 'mga', 'ninyo', 'niyo', 'ako', 'din', 'pero'];
  11. // Get contents of the file
  12. const textContents = fs.readFileSync(process.argv[2], 'utf8');
  13. // Gets only the sentence.
  14. const sentences = textContents.split('\n').map(s => s.split(': ')[1]);
  15.  
  16. // Sanitize: strips all symbols and blacklisted words
  17. function sanitize(sentence) {
  18. // Sentence -> words
  19. let words = sentence.split(' ');
  20.  
  21. // Removes words from the blacklist and show only "truthy" values.
  22. return words.map(word => {
  23. return !blacklist.includes(word.replace(/[^a-zA-Z ]/g, "").toLowerCase()) && word.replace(/[^a-zA-Z ]/g, "");
  24. }).filter(Boolean);
  25. }
  26.  
  27. // Analyze
  28. function analyze() {
  29. // Extract all words from all sentences.
  30. let wordsArray = sentences.map(s => sanitize(s));
  31.  
  32. // This is where we will store the unique words.
  33. let words = [];
  34.  
  35. // Checks if the word exists. If the word exists, the word will not be pushed to the word array.
  36. wordsArray.forEach(w => {
  37. w.map(word => {
  38. !words.includes(word.toLowerCase()) && words.push(word.toLowerCase());
  39. });
  40. });
  41.  
  42. // Output final result to output.txt
  43. fs.writeFileSync('./output.txt', words.join(', \n'), { encoding: 'utf-8' });
  44.  
  45. // Indicates success.
  46. console.log('Done.');
  47. console.log(`Original: ${sentences.map(s => s.split(' ').length).reduce((p, v) => p + v)} Words`);
  48. console.log(`Sanitized: ${wordsArray.map(w => w.length).reduce((p, v) => p + v)} Words`);
  49. console.log(`Reduced to ${words.length} words; ${Math.floor(words.length/5)} per member`);
  50. }
  51.  
  52. // Execute
  53. analyze();
Add Comment
Please, Sign In to add comment