require 'mail' require 'find' require 'csv' mail_csv = CSV.open("/home/cc/cs186/fa12/class/cs186-me/fa12/hw1/mail.csv","wb") tokens_csv = CSV.open("/home/cc/cs186/fa12/class/cs186-me/fa12/hw1/tokens.csv","wb") token_counts_csv = CSV.open("/home/cc/cs186/fa12/class/cs186-me/fa12/hw1/token_counts.csv","wb") state_counts_csv = CSV.open("/home/cc/cs186/fa12/class/cs186-me/fa12/hw1/state_counts.csv","wb") mail_csv << ["file_name", "from", "to", "cc", "subject", "date", "message_id", "body"] tokens_csv << ["message_id", "token"] token_counts_csv << ["token", "count"] state_counts_csv << ["token", "count"] hash = Hash.new(0) states = Hash.new(0) Find.find(ARGV[0]) do |file| if file =~/.*\.txt/ mail = Mail.read(file) if(!mail.from.nil?) body = mail.body.decoded tokens = body.split(/[^a-zA-Z]/) #writing to mail.csv mail_csv << [file, mail.from, mail.to, mail.cc, mail.subject, mail.date.to_s, mail.message_id, mail.body.decoded] #states array one_word_state_arr = ["alabama", "alaska", "arizona", "arkansas", "california", "colorado", "connecticut", "delaware", "florida", "georgia", "hawaii", "idaho", "illinois", "indiana", "iowa", "kansas", "kentucky", "louisiana", "maine", "maryland", "massachusetts", "michigan", "minnesota", "mississippi", "missouri", "montana", "nebraska", "nevada", "hampshire", "jersey", "mexico", "york", "dakota", "ohio", "oklahoma", "oregon", "pennsylvania", "rhode", "carolina", "tennessee", "texas", "utah", "vermont", "virginia", "washington", "wisconsin", "wyoming"] tokens.each do |token| downcased = token.downcase() if downcased != "" tokens_csv << [mail.message_id, downcased] #writing to tokens.csv hash[downcased] += 1 #for token_counts.csv if one_word_state_arr.include?(downcased) #for state_counts.csv states[downcased] += 1 end end end end end end #write resulting hash to token_counts.csv hash.each do |key, value| token_counts_csv << [key, value] end #write resulting states to state_counts.csv states.each do |key, value| state_counts_csv << [key, value] end