from math import sqrt, pow from operator import mul, sub def mean(xs): if len(xs) == 0: return 0 return sum(xs) / len(xs) def stddev(xs): if len(xs) == 0: return 0 m = mean(xs) return sqrt(sum([pow(x - m,2) for x in xs])/float(len(xs))) def linreg(data): xs = [x for x,y in data] ys = [y for x,y in data] return (slope(xs, ys), intercept(xs, ys)) def linregs(data): return "f(x) = {0}x + {1}".format(*linreg(data)) def slope(xs, ys): n = len(xs) return (n * sum(map(mul,xs,ys)) - (sum(xs) * sum(ys)))/(n*sum(map(mul,xs,xs)) - pow(sum(xs),2)) def intercept(xs, ys): return mean(ys) - (slope(xs,ys)* mean(xs)) def correlation(data): xs = [x for x,y in data] ys = [y for x,y in data] n = len(xs) xvar = map(sub, xs, [mean(xs)] * n) yvar = map(sub, ys, [mean(ys)] * n) return sum(map(mul, xvar, yvar))/(n*stddev(xs)*stddev(ys))