SHARE
TWEET

Untitled

a guest Dec 15th, 2018 73 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/usr/bin/ruby
  2. #CS132A - Ruby - Lab 3 - Text Analyzer
  3. #Author:  Zach
  4. #Purpose:  Utilize Ruby File.class to analyze Text files
  5. #          and generate statitical information therein.
  6.  
  7. Dir.glob("*.txt").each do |text_file|
  8.  
  9. # Local Variables / Text Files
  10. stopwords = IO.readlines("stop_words.txt").map{|i| i.chomp}
  11. line_count = text_file.size
  12. text = File.read(text_file)
  13.  
  14. # Count the characters
  15. tot_chars = text.length                          
  16. tot_chars_no_space = text.gsub(/\s+/, '').length
  17.  
  18. # Count the words, sentences and paragraphs
  19. word_count = text.split.length
  20. sent_count = text.split(/\.|\?|!/).length
  21. para_count = text.split(/\n\n/).length
  22.  
  23. # Stop Words Processing
  24. puts "Meaningful Words"
  25. puts
  26. keywords = text.split(/\s+/).select {|word| !stopwords.include?(word)}
  27. puts keywords.join(' ')
  28. good_percent = ((keywords.length.to_f / word_count.to_f) * 100).to_i
  29.  
  30. # Ideal Sentences
  31. puts "Ideal sentences from text"
  32. sentences = text.gsub(/\s+/, ' ').strip.split(/\.|\?|!/)
  33. sentences_sorted = sentences.sort_by { |sentence| sentence.length }
  34. foo = sentences_sorted.length / 7
  35. ideal_sentences = sentences_sorted.slice(foo, foo + 1)
  36. ideal_sentences = ideal_sentences.select { |sentence| sentence =~/\sis\W|\sare\W/ }
  37. puts
  38.  
  39. # Common Words
  40. puts "10 Most common words from text"
  41. words_less_stop = (keywords - stopwords).group_by{ |x| x}.sort_by{ |word, hits| -hits.length}[0..9].map(&:first)
  42. puts words_less_stop
  43. puts
  44.  
  45. # Output Statements
  46. puts "Innagural Speech Statistics"
  47. puts "The total number of lines in the inaugural speech is #{line_count}\."
  48. puts "The total number of characters in the first part is #{tot_chars}\."
  49. puts "The total number of characters less whitespace is #{tot_chars_no_space}\."
  50. puts "The total number of words is #{word_count}\."
  51. puts "The total number of sentences is #{sent_count}\."
  52. puts "The total number of paragraphs is #{para_count}\."
  53. puts "The average sentences per paragraph is #{sent_count/para_count}\."
  54. puts "The average words per sentence is #{word_count/ sent_count}\."
  55. puts "#{good_percent}% of all words in the text are non-fluff words."
  56. puts
  57. puts "The ideal sentences are:\n\n" + ideal_sentences.join(". ")
  58. puts
  59.  
  60. end
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top