Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- module Statistics
- def self.residuals(xdata,ydata)
- residuals = []
- xdata.each_with_index {|x, i|
- y = ydata[i]
- y_hat = self.y_hat(x,xdata,ydata)
- residuals << y - y_hat
- }
- residuals
- end
- def self.y_hat(x,xdata,ydata)
- lin_reg_a(xdata,ydata) + lin_reg_b(xdata,ydata)*x.to_f
- end
- def self.lin_reg_b(xdata,ydata)
- std_x = self.standard_deviation(xdata)
- std_y = self.standard_deviation(ydata)
- (self.correlation(xdata,ydata))*(std_y/std_x)
- end
- def self.lin_reg_a(xdata,ydata)
- x_bar = self.mean(xdata)
- y_bar = self.mean(ydata)
- b = self.lin_reg_b(xdata,ydata)
- y_bar - (b*x_bar)
- end
- def self.correlation(xdata,ydata)
- raise ArgumentError unless xdata.length == ydata.length
- std_x = self.standard_deviation(xdata)
- std_y = self.standard_deviation(ydata)
- x_bar = self.mean(xdata)
- y_bar = self.mean(ydata)
- s = 0.0
- xdata.each_with_index {|x,i|
- y = ydata[i]
- s = s + (self.z_score(x,xdata)*self.z_score(y,ydata)).to_f
- }
- (1.to_f/(xdata.size-1).to_f)*s
- end
- def self.z_score(datum,data)
- (datum - self.mean(data)).to_f/self.standard_deviation(data)
- end
- def self.standard_deviation(data)
- s = data.inject(0) {|m,o| m + (o - self.mean(data).to_f )**2.to_f }
- std = (1.to_f/(data.size-1).to_f)*s
- Math.sqrt(std).to_f
- end
- def self.mode(data)
- x = data.inject(Hash.new(0)) {|m,o| m[o] += 1;m }
- if x.values.uniq.size == 1
- nil
- else
- x.to_a.max {|a,o| a[1] <=> o[1] }[0]
- end
- end
- def self.median(data)
- if data.size % 2 == 0
- x = (data.size - 2) / 2
- y = data[x-1,2]
- return y.inject(0) {|m,o| m+o}.to_f / 2.0
- else
- x = (data.size - 1) / 2
- return *data[x-1,1]
- end
- end
- def self.mean(data)
- data.inject(0) {|m,o| m+o }.to_f / data.size.to_f
- end
- def self.sum(data)
- data.inject(0) {|m,o| m+o }.to_f
- end
- end
Add Comment
Please, Sign In to add comment