Pastebin launched a little side project called HostCabi.net, check it out ;-)Don't like ads? PRO users don't see any ads ;-)
Guest

python

By: a guest on Sep 18th, 2011  |  syntax: Python  |  size: 7.93 KB  |  hits: 121  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. from __future__ import division
  2. from scipy import c_, ones, dot, stats, diff
  3. from scipy.linalg import inv, solve, det
  4. import numpy as np
  5. from numpy import log, pi, sqrt, square, diagonal
  6. from numpy.random import randn, seed
  7. import time
  8.  
  9. class ols:
  10.     """
  11.    Author: Vincent Nijs (+ ?)
  12.  
  13.    Email: v-nijs at kellogg.northwestern.edu
  14.  
  15.    Last Modified: Mon Jan 15 17:56:17 CST 2007
  16.    
  17.    Dependencies: See import statement at the top of this file
  18.  
  19.    Doc: Class for multi-variate regression using OLS
  20.  
  21.    For usage examples of other class methods see the class tests at the bottom of this file. To see the class in action
  22.    simply run this file using 'python ols.py'. This will generate some simulated data and run various analyses. If you have rpy installed
  23.    the same model will also be estimated by R for confirmation.
  24.  
  25.    Input:
  26.         dependent variable
  27.        y_varnm = string with the variable label for y
  28.        x = independent variables, note that a constant is added by default
  29.        x_varnm = string or list of variable labels for the independent variables
  30.    
  31.    Output:
  32.        There are no values returned by the class. Summary provides printed output.
  33.        All other measures can be accessed as follows:
  34.  
  35.        Step 1: Create an OLS instance by passing data to the class
  36.  
  37.            m = ols(y,x,y_varnm = 'y',x_varnm = ['x1','x2','x3','x4'])
  38.  
  39.        Step 2: Get specific metrics
  40.  
  41.            To print the coefficients:
  42.                >>> print m.b
  43.            To print the coefficients p-values:
  44.                >>> print m.p
  45.    
  46.    """
  47.  
  48. y = [29.4, 29.9, 31.4, 32.8, 33.6, 34.6, 35.5, 36.3, 37.2, 37.8, 38.5, 38.8,
  49.                 38.6, 38.8, 39, 39.7, 40.6, 41.3, 42.5, 43.9, 44.9, 45.3, 45.8, 46.5,
  50.                 77.1, 48.2, 48.8, 50.5, 51, 51.3, 50.7, 50.7, 50.6, 50.7, 50.6, 50.7]
  51.             #tuition
  52. x1 = [376, 407, 438, 432, 433, 479, 512, 543, 583, 635, 714, 798, 891,
  53.                 971, 1045, 1106, 1218, 1285, 1356, 1454, 1624, 1782, 1942, 2057, 2179,
  54.                 2271, 2360, 2506, 2562, 2700, 2903, 3319, 3629, 3874, 4102, 4291]
  55.             #research and development
  56. x2 = [28740.00, 30952.00, 33359.00, 35671.00, 39435.00, 43338.00, 48719.00, 55379.00, 63224.00,
  57.                 72292.00, 80748.00, 89950.00, 102244.00, 114671.00, 120249.00, 126360.00, 133881.00, 141891.00,
  58.                 151993.00, 160876.00, 165350.00, 165730.00, 169207.00, 183625.00, 197346.00, 212152.00, 226402.00,
  59.                 267298.00, 277366.00, 276022.00, 288324.00, 299201.00, 322104.00, 347048.00, 372535.00,
  60.                 397629.00]
  61.             #one/none parents
  62. x3 = [11610, 12143, 12486, 13015, 13028, 13327, 14074, 14094, 14458, 14878, 15610, 15649,
  63.                 15584, 16326, 16379, 16923, 17237, 17088, 17634, 18435, 19327, 19712, 21424, 21978,
  64.                 22684, 22597, 22735, 22217, 22214, 22655, 23098, 23602, 24013, 24003, 21593, 22319]
  65.  
  66. def __init__(self,y,x1,y_varnm = 'y',x_varnm = ''):
  67.     """
  68.    Initializing the ols class.
  69.    """
  70.     self.y = y
  71.     #self.x1 = c_[ones(x1.shape[0]),x1]
  72.     self.y_varnm = y_varnm
  73.     if not isinstance(x_varnm,list):
  74.         self.x_varnm = ['const'] + list(x_varnm)
  75.     else:
  76.         self.x_varnm = ['const'] + x_varnm
  77.  
  78.     # Estimate model using OLS
  79.     self.estimate()
  80.  
  81. def estimate(self):
  82.  
  83.     # estimating coefficients, and basic stats
  84.     self.inv_xx = inv(dot(self.x.T,self.x))
  85.     xy = dot(self.x.T,self.y)
  86.     self.b = dot(self.inv_xx,xy)                    # estimate coefficients
  87.  
  88.     self.nobs = self.y.shape[0]                     # number of observations
  89.     self.ncoef = self.x.shape[1]                    # number of coef.
  90.     self.df_e = self.nobs - self.ncoef              # degrees of freedom, error
  91.     self.df_r = self.ncoef - 1                      # degrees of freedom, regression
  92.  
  93.     self.e = self.y - dot(self.x,self.b)            # residuals
  94.     self.sse = dot(self.e,self.e)/self.df_e         # SSE
  95.     self.se = sqrt(diagonal(self.sse*self.inv_xx))  # coef. standard errors
  96.     self.t = self.b / self.se                       # coef. t-statistics
  97.     self.p = (1-stats.t.cdf(abs(self.t), self.df_e)) * 2    # coef. p-values
  98.  
  99.     self.R2 = 1 - self.e.var()/self.y.var()         # model R-squared
  100.     self.R2adj = 1-(1-self.R2)*((self.nobs-1)/(self.nobs-self.ncoef))   # adjusted R-square
  101.  
  102.     self.F = (self.R2/self.df_r) / ((1-self.R2)/self.df_e)  # model F-statistic
  103.     self.Fpv = 1-stats.f.cdf(self.F, self.df_r, self.df_e)  # F-statistic p-value
  104.  
  105. def dw(self):
  106.     """
  107.    Calculates the Durbin-Waston statistic
  108.    """
  109.     de = diff(self.e,1)
  110.     dw = dot(de,de) / dot(self.e,self.e);
  111.  
  112.     return dw
  113.  
  114. def omni(self):
  115.     """
  116.    Omnibus test for normality
  117.    """
  118.     return stats.normaltest(self.e)
  119.  
  120. def JB(self):
  121.     """
  122.    Calculate residual skewness, kurtosis, and do the JB test for normality
  123.    """
  124.  
  125.     # Calculate residual skewness and kurtosis
  126.     skew = stats.skew(self.e)
  127.     kurtosis = 3 + stats.kurtosis(self.e)
  128.    
  129.     # Calculate the Jarque-Bera test for normality
  130.     JB = (self.nobs/6) * (square(skew) + (1/4)*square(kurtosis-3))
  131.     JBpv = 1-stats.chi2.cdf(JB,2);
  132.  
  133.     return JB, JBpv, skew, kurtosis
  134.  
  135. def ll(self):
  136.     """
  137.    Calculate model log-likelihood and two information criteria
  138.    """
  139.    
  140.     # Model log-likelihood, AIC, and BIC criterion values
  141.     ll = -(self.nobs*1/2)*(1+log(2*pi)) - (self.nobs/2)*log(dot(self.e,self.e)/self.nobs)
  142.     aic = -2*ll/self.nobs + (2*self.ncoef/self.nobs)
  143.     bic = -2*ll/self.nobs + (self.ncoef*log(self.nobs))/self.nobs
  144.  
  145.     return ll, aic, bic
  146.  
  147. def summary(self):
  148.     """
  149.    Printing model output to screen
  150.    """
  151.  
  152.     # local time & date
  153.     t = time.localtime()
  154.  
  155.     # extra stats
  156.     ll, aic, bic = self.ll()
  157.     JB, JBpv, skew, kurtosis = self.JB()
  158.     omni, omnipv = self.omni()
  159.  
  160.     # printing output to screen
  161.     print '\n=============================================================================='
  162.     print "Dependent Variable: " + self.y_varnm
  163.     print "Method: Least Squares"
  164.     print "Date: ", time.strftime("%a, %d %b %Y",t)
  165.     print "Time: ", time.strftime("%H:%M:%S",t)
  166.     print '# obs:               %5.0f' % self.nobs
  167.     print '# variables:     %5.0f' % self.ncoef
  168.     print '=============================================================================='
  169.     print 'variable     coefficient     std. Error      t-statistic     prob.'
  170.     print '=============================================================================='
  171.     for i in range(len(self.x_varnm)):
  172.         print '''% -5s          % -5.6f     % -5.6f     % -5.6f     % -5.6f''' % tuple([self.x_varnm[i],self.b[i],self.se[i],self.t[i],self.p[i]])
  173.     print '=============================================================================='
  174.     print 'Models stats                         Residual stats'
  175.     print '=============================================================================='
  176.     print 'R-squared            % -5.6f         Durbin-Watson stat  % -5.6f' % tuple([self.R2, self.dw()])
  177.     print 'Adjusted R-squared   % -5.6f         Omnibus stat        % -5.6f' % tuple([self.R2adj, omni])
  178.     print 'F-statistic          % -5.6f         Prob(Omnibus stat)  % -5.6f' % tuple([self.F, omnipv])
  179.     print 'Prob (F-statistic)   % -5.6f                 JB stat             % -5.6f' % tuple([self.Fpv, JB])
  180.     print 'Log likelihood       % -5.6f                 Prob(JB)            % -5.6f' % tuple([ll, JBpv])
  181.     print 'AIC criterion        % -5.6f         Skew                % -5.6f' % tuple([aic, skew])
  182.     print 'BIC criterion        % -5.6f         Kurtosis            % -5.6f' % tuple([bic, kurtosis])
  183.     print '=============================================================================='
  184.  
  185. if __name__ == '__main__':
  186.  
  187.         ##########################
  188.         ### testing the ols class
  189.         ##########################
  190.  
  191.  
  192.         # intercept is added, by default
  193.     m = ols(y, x, y_varnm = 'y',x_varnm = ['x1','x2','x3'])
  194.     m.summary()