Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Given a time ordered list of pageviews ("events"),
- # determine how many "visits" each user had,
- # how long each visit lasted and how many page views occurred during each visit.
- # A visit ends when the user has not had another page view for 1 hour,
- # a new pageview after 1 hours starts a new visit (the time between visits is 1 hour).
- require 'enumerator'
- class Array
- def to_event
- Event.new(self)
- end
- end
- class Event < Array
- def user_id
- self[2]
- end
- def when
- Time.at(self[3].to_i)
- end
- def valid?
- self.user_id != "NULL"
- end
- end
- class UserStadistics < Hash
- def <<(event)
- unless self.keys.include? event.user_id
- key = event.user_id
- self[key] = { :visits => [] }
- self[key][:visits] << { :count => 1, :begin => event.when , :end => event.when }
- else
- if ( event.when - self[event.user_id][:visits].last[:end] ) < 3600
- self[event.user_id][:visits].last[:count] = self[event.user_id][:visits].last[:count] + 1
- self[event.user_id][:visits].last[:end] = event.when
- else
- self[event.user_id][:visits] << { :count => 1, :begin => event.when, :end => event.when }
- end
- end
- end
- def get_file_hash(filename)
- events = []
- lines = File.new(filename).readlines
- lines.each do |line|
- row = line.strip.split("\t").to_event
- events << row
- end
- events.select{|row| row.valid?}
- end
- def get_visits(filename)
- events = get_file_hash(filename)
- events.each do |event|
- self << event
- end
- end
- def print
- self.each do |user|
- p "-------------------------------------------------------------"
- p "User ID: #{user[0]}"
- p " Visits: #{user[1][:visits].count} "
- user[1][:visits].enum_for(:each_with_index).collect do |visit, index|
- p " Visit #{index + 1} - #{visit[:count]} pages views Duration: #{visit[:end] - visit[:begin]} secs"
- end
- end
- p "================================================================"
- end
- end
- users = UserStadistics.new
- users.get_visits('./events.csv')
- users.print
Add Comment
Please, Sign In to add comment