Guest User

Untitled

a guest
Dec 16th, 2018
107
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.66 KB | None | 0 0
  1. #!/usr/bin/ruby
  2. #CS132A - Ruby - Lab 3 - Text Analyzer
  3. #Author: Zach
  4. #Purpose: Utilize Ruby File.class to analyze Text files
  5. # and generate statitical information therein.
  6.  
  7.  
  8.  
  9. STOPWORDS = IO.readlines('stop_words.txt').map{|i| i.chomp}
  10.  
  11. file_stats = []
  12.  
  13. Dir.glob("*.txt").each do |text_file|
  14.  
  15. # Local Variables / Text Files
  16. line_count = text_file.size
  17. text = File.read(text_file)
  18.  
  19. # Count the characters
  20. char_stats = count_chars(text)
  21.  
  22. # Count the words, sentences and paragraphs
  23. chunk_stats = count_chunks(text)
  24.  
  25. # Stop Words Processing
  26. puts "Meaningful Words"
  27. puts
  28. keywords = text.split(/\s+/).select {|word| !STOPWORDS.include?(word)}
  29. puts keywords.join(' ')
  30. good_percent = ((keywords.length.to_f / word_count.to_f) * 100).to_i
  31.  
  32. # Ideal Sentences
  33. puts "Ideal sentences from text"
  34. sentences = text.gsub(/\s+/, ' ').strip.split(/\.|\?|!/)
  35. sentences_sorted = sentences.sort_by { |sentence| sentence.length }
  36. foo = sentences_sorted.length / 7
  37. ideal_sentences = sentences_sorted.slice(foo, foo + 1)
  38. ideal_sentences = ideal_sentences.select { |sentence| sentence =~/\sis\W|\sare\W/ }
  39. puts
  40.  
  41. # Common Words
  42. puts "10 Most common words from text"
  43. words_less_stop = (keywords - STOPWORDS).group_by{ |x| x}.sort_by{ |word, hits| -hits.length}[0..9].map(&:first)
  44. puts words_less_stop
  45. puts
  46.  
  47. # Output Statements
  48. puts "Innagural Speech Statistics"
  49. puts "The total number of lines in the inaugural speech is #{line_count}\."
  50. puts "The total number of characters in the first part is #{tot_chars}\."
  51. puts "The total number of characters less whitespace is #{tot_chars_no_space}\."
  52. puts "The total number of words is #{word_count}\."
  53. puts "The total number of sentences is #{sent_count}\."
  54. puts "The total number of paragraphs is #{para_count}\."
  55. puts "The average sentences per paragraph is #{sent_count/para_count}\."
  56. puts "The average words per sentence is #{word_count/ sent_count}\."
  57. puts "#{good_percent}% of all words in the text are non-fluff words."
  58. puts
  59. puts "The ideal sentences are:\n\n" + ideal_sentences.join(". ")
  60. puts
  61.  
  62. # Collect our stats and stuff them in the results array
  63. file_stats << [char_stats, chunk_stats]
  64. end
  65.  
  66. def count_chars(text)
  67. # Count the characters; return a hash with stats
  68. {
  69. :tot_chars => text.length
  70. :tot_chars_no_space => text.gsub(/\s+/, '').length
  71. }
  72. end
  73.  
  74. def count_chunks(text)
  75. # Count the words, sentences and paragraphs; return a hash with stats
  76. {
  77. :word_count => text.split.length
  78. :sent_count => text.split(/\.|\?|!/).length
  79. :para_count => text.split(/\n\n/).length
  80. }
  81. end
Add Comment
Please, Sign In to add comment