Check out the Pastebin Gadgets Shop. We have thousands of fun, geeky & affordable gadgets on sale :-)Want more features on Pastebin? Sign Up, it's FREE!
tweet

# python

By: a guest on Sep 18th, 2011  |  syntax: Python  |  size: 7.93 KB  |  views: 127  |  expires: Never
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
1. from __future__ import division
2. from scipy import c_, ones, dot, stats, diff
3. from scipy.linalg import inv, solve, det
4. import numpy as np
5. from numpy import log, pi, sqrt, square, diagonal
6. from numpy.random import randn, seed
7. import time
8.
9. class ols:
10.     """
11.    Author: Vincent Nijs (+ ?)
12.
13.    Email: v-nijs at kellogg.northwestern.edu
14.
16.
17.    Dependencies: See import statement at the top of this file
18.
19.    Doc: Class for multi-variate regression using OLS
20.
21.    For usage examples of other class methods see the class tests at the bottom of this file. To see the class in action
22.    simply run this file using 'python ols.py'. This will generate some simulated data and run various analyses. If you have rpy installed
23.    the same model will also be estimated by R for confirmation.
24.
25.    Input:
26.         dependent variable
27.        y_varnm = string with the variable label for y
28.        x = independent variables, note that a constant is added by default
29.        x_varnm = string or list of variable labels for the independent variables
30.
31.    Output:
32.        There are no values returned by the class. Summary provides printed output.
33.        All other measures can be accessed as follows:
34.
35.        Step 1: Create an OLS instance by passing data to the class
36.
37.            m = ols(y,x,y_varnm = 'y',x_varnm = ['x1','x2','x3','x4'])
38.
39.        Step 2: Get specific metrics
40.
41.            To print the coefficients:
42.                >>> print m.b
43.            To print the coefficients p-values:
44.                >>> print m.p
45.
46.    """
47.
48. y = [29.4, 29.9, 31.4, 32.8, 33.6, 34.6, 35.5, 36.3, 37.2, 37.8, 38.5, 38.8,
49.                 38.6, 38.8, 39, 39.7, 40.6, 41.3, 42.5, 43.9, 44.9, 45.3, 45.8, 46.5,
50.                 77.1, 48.2, 48.8, 50.5, 51, 51.3, 50.7, 50.7, 50.6, 50.7, 50.6, 50.7]
51.             #tuition
52. x1 = [376, 407, 438, 432, 433, 479, 512, 543, 583, 635, 714, 798, 891,
53.                 971, 1045, 1106, 1218, 1285, 1356, 1454, 1624, 1782, 1942, 2057, 2179,
54.                 2271, 2360, 2506, 2562, 2700, 2903, 3319, 3629, 3874, 4102, 4291]
55.             #research and development
56. x2 = [28740.00, 30952.00, 33359.00, 35671.00, 39435.00, 43338.00, 48719.00, 55379.00, 63224.00,
57.                 72292.00, 80748.00, 89950.00, 102244.00, 114671.00, 120249.00, 126360.00, 133881.00, 141891.00,
58.                 151993.00, 160876.00, 165350.00, 165730.00, 169207.00, 183625.00, 197346.00, 212152.00, 226402.00,
59.                 267298.00, 277366.00, 276022.00, 288324.00, 299201.00, 322104.00, 347048.00, 372535.00,
60.                 397629.00]
61.             #one/none parents
62. x3 = [11610, 12143, 12486, 13015, 13028, 13327, 14074, 14094, 14458, 14878, 15610, 15649,
63.                 15584, 16326, 16379, 16923, 17237, 17088, 17634, 18435, 19327, 19712, 21424, 21978,
64.                 22684, 22597, 22735, 22217, 22214, 22655, 23098, 23602, 24013, 24003, 21593, 22319]
65.
66. def __init__(self,y,x1,y_varnm = 'y',x_varnm = ''):
67.     """
68.    Initializing the ols class.
69.    """
70.     self.y = y
71.     #self.x1 = c_[ones(x1.shape[0]),x1]
72.     self.y_varnm = y_varnm
73.     if not isinstance(x_varnm,list):
74.         self.x_varnm = ['const'] + list(x_varnm)
75.     else:
76.         self.x_varnm = ['const'] + x_varnm
77.
78.     # Estimate model using OLS
79.     self.estimate()
80.
81. def estimate(self):
82.
83.     # estimating coefficients, and basic stats
84.     self.inv_xx = inv(dot(self.x.T,self.x))
85.     xy = dot(self.x.T,self.y)
86.     self.b = dot(self.inv_xx,xy)                    # estimate coefficients
87.
88.     self.nobs = self.y.shape[0]                     # number of observations
89.     self.ncoef = self.x.shape[1]                    # number of coef.
90.     self.df_e = self.nobs - self.ncoef              # degrees of freedom, error
91.     self.df_r = self.ncoef - 1                      # degrees of freedom, regression
92.
93.     self.e = self.y - dot(self.x,self.b)            # residuals
94.     self.sse = dot(self.e,self.e)/self.df_e         # SSE
95.     self.se = sqrt(diagonal(self.sse*self.inv_xx))  # coef. standard errors
96.     self.t = self.b / self.se                       # coef. t-statistics
97.     self.p = (1-stats.t.cdf(abs(self.t), self.df_e)) * 2    # coef. p-values
98.
99.     self.R2 = 1 - self.e.var()/self.y.var()         # model R-squared
101.
102.     self.F = (self.R2/self.df_r) / ((1-self.R2)/self.df_e)  # model F-statistic
103.     self.Fpv = 1-stats.f.cdf(self.F, self.df_r, self.df_e)  # F-statistic p-value
104.
105. def dw(self):
106.     """
107.    Calculates the Durbin-Waston statistic
108.    """
109.     de = diff(self.e,1)
110.     dw = dot(de,de) / dot(self.e,self.e);
111.
112.     return dw
113.
114. def omni(self):
115.     """
116.    Omnibus test for normality
117.    """
118.     return stats.normaltest(self.e)
119.
120. def JB(self):
121.     """
122.    Calculate residual skewness, kurtosis, and do the JB test for normality
123.    """
124.
125.     # Calculate residual skewness and kurtosis
126.     skew = stats.skew(self.e)
127.     kurtosis = 3 + stats.kurtosis(self.e)
128.
129.     # Calculate the Jarque-Bera test for normality
130.     JB = (self.nobs/6) * (square(skew) + (1/4)*square(kurtosis-3))
131.     JBpv = 1-stats.chi2.cdf(JB,2);
132.
133.     return JB, JBpv, skew, kurtosis
134.
135. def ll(self):
136.     """
137.    Calculate model log-likelihood and two information criteria
138.    """
139.
140.     # Model log-likelihood, AIC, and BIC criterion values
141.     ll = -(self.nobs*1/2)*(1+log(2*pi)) - (self.nobs/2)*log(dot(self.e,self.e)/self.nobs)
142.     aic = -2*ll/self.nobs + (2*self.ncoef/self.nobs)
143.     bic = -2*ll/self.nobs + (self.ncoef*log(self.nobs))/self.nobs
144.
145.     return ll, aic, bic
146.
147. def summary(self):
148.     """
149.    Printing model output to screen
150.    """
151.
152.     # local time & date
153.     t = time.localtime()
154.
155.     # extra stats
156.     ll, aic, bic = self.ll()
157.     JB, JBpv, skew, kurtosis = self.JB()
158.     omni, omnipv = self.omni()
159.
160.     # printing output to screen
161.     print '\n=============================================================================='
162.     print "Dependent Variable: " + self.y_varnm
163.     print "Method: Least Squares"
164.     print "Date: ", time.strftime("%a, %d %b %Y",t)
165.     print "Time: ", time.strftime("%H:%M:%S",t)
166.     print '# obs:               %5.0f' % self.nobs
167.     print '# variables:     %5.0f' % self.ncoef
168.     print '=============================================================================='
169.     print 'variable     coefficient     std. Error      t-statistic     prob.'
170.     print '=============================================================================='
171.     for i in range(len(self.x_varnm)):
172.         print '''% -5s          % -5.6f     % -5.6f     % -5.6f     % -5.6f''' % tuple([self.x_varnm[i],self.b[i],self.se[i],self.t[i],self.p[i]])
173.     print '=============================================================================='
174.     print 'Models stats                         Residual stats'
175.     print '=============================================================================='
176.     print 'R-squared            % -5.6f         Durbin-Watson stat  % -5.6f' % tuple([self.R2, self.dw()])
177.     print 'Adjusted R-squared   % -5.6f         Omnibus stat        % -5.6f' % tuple([self.R2adj, omni])
178.     print 'F-statistic          % -5.6f         Prob(Omnibus stat)  % -5.6f' % tuple([self.F, omnipv])
179.     print 'Prob (F-statistic)   % -5.6f                 JB stat             % -5.6f' % tuple([self.Fpv, JB])
180.     print 'Log likelihood       % -5.6f                 Prob(JB)            % -5.6f' % tuple([ll, JBpv])
181.     print 'AIC criterion        % -5.6f         Skew                % -5.6f' % tuple([aic, skew])
182.     print 'BIC criterion        % -5.6f         Kurtosis            % -5.6f' % tuple([bic, kurtosis])
183.     print '=============================================================================='
184.
185. if __name__ == '__main__':
186.
187.         ##########################
188.         ### testing the ols class
189.         ##########################
190.
191.
192.         # intercept is added, by default
193.     m = ols(y, x, y_varnm = 'y',x_varnm = ['x1','x2','x3'])
194.     m.summary()
clone this paste RAW Paste Data
Top