Untitled

#!/usr/bin/ruby
#CS132A - Ruby - Lab 3 - Text Analyzer
#Author:  Zach
#Purpose:  Utilize Ruby File.class to analyze Text files
#          and generate statitical information therein.


STOPWORDS = IO.readlines('stop_words.txt').map {|i| i.chomp}

your_text_files = Dir.glob("*.txt")

your_text_files.each do |text_file|

  # Local Variables / Text Files
  line_count = text_file.size
  text = File.read(text_file)

  # Count the characters
  tot_chars = text.length
  tot_chars_no_space = text.gsub(/\s+/, '').length

  # Count the words, sentences and paragraphs
  word_count = text.split.length
  sent_count = text.split(/\.|\?|!/).length
  para_count = text.split(/\n\n/).length

  # Stop Words Processing
  puts "Meaningful Words"
  puts
  keywords = text.split(/\s+/).select {|word| !STOPWORDS.include?(word)}
  puts keywords.join(' ')
  good_percent = ((keywords.length.to_f / word_count.to_f) * 100).to_i

  # Ideal Sentences
  puts "Ideal sentences from text"
  sentences = text.gsub(/\s+/, ' ').strip.split(/\.|\?|!/)
  sentences_sorted = sentences.sort_by { |sentence| sentence.length }
  foo = sentences_sorted.length / 7
  ideal_sentences = sentences_sorted.slice(foo, foo + 1)
  ideal_sentences = ideal_sentences.select { |sentence| sentence =~/\sis\W|\sare\W/ }
  puts

  # Common Words
  puts "10 Most common words from text"
  words_less_stop = (keywords - STOPWORDS).group_by{ |x| x}.sort_by{ |word, hits| -hits.length}[0..9].map(&:first)
  puts words_less_stop
  puts

  # Output Statements
  puts "Innagural Speech Statistics"
  puts "The total number of lines in the inaugural speech is #{line_count}\."
  puts "The total number of characters in the first part is #{tot_chars}\."
  puts "The total number of characters less whitespace is #{tot_chars_no_space}\."
  puts "The total number of words is #{word_count}\."
  puts "The total number of sentences is #{sent_count}\."
  puts "The total number of paragraphs is #{para_count}\."
  puts "The average sentences per paragraph is #{sent_count/para_count}\."
  puts "The average words per sentence is #{word_count/ sent_count}\."
  puts "#{good_percent}% of all words in the text are non-fluff words."
  puts
  puts "The ideal sentences are:\n\n" + ideal_sentences.join(". ")
  puts

end