Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # ------------------------------------------------------------------------
- # The following Python code is implemented by Professor Terje Haukaas at
- # the University of British Columbia in Vancouver, Canada. It is made
- # freely available online at terje.civil.ubc.ca together with notes,
- # examples, and additional Python code. Please be cautious when using
- # this code; it may contain bugs and comes without warranty of any kind.
- #
- # In fact, this is a particularly quick-and-dirty implementation of
- # Ordinary and Bayesian Linear Regression. One part of this code is a
- # search for good explanatory functions. That isn't always meaningful
- # but it provides an insight for the specific data given below, which
- # was actually generated with a deterministic model with four input-
- # parameters. If you know structural analysis, can you guess which
- # model generated the data?
- # ------------------------------------------------------------------------
- # Import useful libraries
- import numpy as np
- import matplotlib.pyplot as plt
- # Input data (first column=response, second=intercept, the rest are regressors)
- data = np.array([(6.8, 1.0, 1000.0, 2000.0, 9500.0, 41096604.2),
- (22.1, 1.0, 1572.0, 2532.0, 11573.0, 33260953.5),
- (59.8, 1.0, 1288.0, 2801.0, 13647.0, 11565502.7),
- (5.9, 1.0, 1543.0, 1370.0, 14614.0, 15284895.2),
- (20.5, 1.0, 1572.0, 1113.0, 9872.0, 3562069.3),
- (30.1, 1.0, 877.0, 2323.0, 12614.0, 9653979.2),
- (176.1, 1.0, 1345.0, 2980.0, 12950.0, 5202934.7),
- (11.0, 1.0, 1233.0, 2301.0, 15342.0, 29747448.2),
- (26.4, 1.0, 1675.0, 1503.0, 10812.0, 6640981.3),
- (21.2, 1.0, 1777.0, 2117.0, 12609.0, 21041461.3),
- (4.7, 1.0, 843.0, 1448.0, 8606.0, 21041461.3),
- (0.9, 1.0, 1909.0, 736.0, 9514.0, 31034422.7),
- (35.9, 1.0, 1971.0, 1301.0, 13521.0, 2980441.3),
- (0.6, 1.0, 1122.0, 759.0, 14311.0, 17859214.7),
- (0.8, 1.0, 1833.0, 854.0, 11561.0, 40574196.0),
- (24.0, 1.0, 1943.0, 2382.0, 9712.0, 37532448.0),
- (39.3, 1.0, 1225.0, 1588.0, 11686.0, 3562069.3),
- (21.8, 1.0, 1332.0, 1866.0, 15565.0, 8504460.2),
- (40.0, 1.0, 1354.0, 1792.0, 15418.0, 4214833.3),
- (63.3, 1.0, 548.0, 2988.0, 8489.0, 9067078.7),
- (4.1, 1.0, 1257.0, 790.0, 12152.0, 4100925.2),
- (9.6, 1.0, 1280.0, 2098.0, 12282.0, 33260953.5),
- (48.2, 1.0, 1456.0, 2464.0, 13828.0, 10902678.2),
- (0.5, 1.0, 1178.0, 797.0, 15198.0, 25333333.3),
- (9.8, 1.0, 940.0, 2154.0, 9893.0, 32357991.2),
- (66.0, 1.0, 1342.0, 2703.0, 12034.0, 11120725.3),
- (41.7, 1.0, 1125.0, 2976.0, 10736.0, 22064924.8),
- (8.0, 1.0, 738.0, 2139.0, 15346.0, 19726762.7),
- (5.9, 1.0, 1982.0, 1148.0, 13479.0, 12490321.3),
- (1.8, 1.0, 1882.0, 955.0, 8676.0, 34180559.8),
- (5.6, 1.0, 1533.0, 1055.0, 10570.0, 10058989.5),
- (13.0, 1.0, 1331.0, 2302.0, 11716.0, 35591509.3),
- (55.5, 1.0, 712.0, 2693.0, 8302.0, 10058989.5),
- (29.7, 1.0, 1002.0, 2848.0, 8850.0, 29326500.0),
- (32.4, 1.0, 1787.0, 2024.0, 8113.0, 18777513.2),
- (9.1, 1.0, 1303.0, 1932.0, 8888.0, 38528833.3),
- (18.6, 1.0, 690.0, 1686.0, 9824.0, 6037642.7),
- (0.8, 1.0, 1339.0, 922.0, 13510.0, 31034422.7),
- (4.3, 1.0, 1084.0, 1905.0, 15503.0, 37532448.0),
- (100.9, 1.0, 1274.0, 2290.0, 8373.0, 6037642.7)])
- # Extract the y-vector and the X-matrix
- rows = data.shape[0]
- columns = len(data[0])
- y = data[:,0]
- X = data[:,1:columns]
- # Determine number of regressors (k) and observations (n)
- n = rows
- k = columns-1
- nu = n-k
- # Print info and give warning if there is little data
- print('\n'"There are", n, "observations and", k, "regressors")
- # Compute the ordinary least squares estimate, i.e., the mean of the thetas
- XX = np.transpose(X).dot(X)
- XXinv = np.linalg.inv(XX)
- XXinvX = XXinv.dot(np.transpose(X))
- mean_theta = XXinvX.dot(y)
- # Plot predictions vs. observations
- plt.ion()
- predictions = []
- for i in range(n):
- model = 0
- for j in range(k):
- model += mean_theta[j] * X[i, j]
- predictions.append(model)
- plt.plot(y, predictions, 'ko')
- plt.xlabel("Observations")
- plt.ylabel("Predictions")
- plt.show()
- print('\n'"Pausing a few seconds before closing the plot...")
- plt.pause(2)
- # Compute X*theta and the ordinary least squares error variance
- Xtheta = X.dot(mean_theta)
- y_minus_Xtheta = y - Xtheta
- dot_product_y_minus_Xtheta = (np.transpose(y_minus_Xtheta)).dot(y_minus_Xtheta)
- s_squared = dot_product_y_minus_Xtheta/nu
- # Compute the covariance matrix for the model parameters
- covarianceMatrix = s_squared * XXinv
- # Collect the standard deviations from the covariance matrix
- stdv_theta = []
- for i in range(k):
- stdv_theta.append(np.sqrt(covarianceMatrix[i, i]))
- # Collect the coefficient of variation in a vector (in percent!)
- cov_theta = []
- for i in range(k):
- cov_theta.append(abs(100.0 * stdv_theta[i] / mean_theta[i]))
- # Place 1/stdvs on the diagonal of the D^-1 matrix and extract the correlation matrix
- Dinv = np.eye(k)
- for i in range(k):
- Dinv[i,i] = 1.0/stdv_theta[i]
- correlationMatrix = (Dinv.dot(covarianceMatrix)).dot(Dinv)
- # Compute statistics for the epsilon variable
- mean_sigma = np.sqrt(s_squared)
- variance_sigma = s_squared/(2.0*(nu-4.0))
- stdv_sigma = np.sqrt(variance_sigma)
- cov_sigma = abs(stdv_sigma/mean_sigma)
- # Print the results
- print('\n'"Mean of Sigma = %.2f C.o.v. of Sigma = %.2f" % (mean_sigma, 100.0*cov_sigma), "percent")
- print('\n'"Posterior statistics for the model parameters (thetas):\n")
- for i in range(k):
- print(" Mean(%i) = %9.3g Cov(%i) = %.2f" % (i+1, mean_theta[i], i+1, cov_theta[i]), "percent")
- # Loop over all possible explanatory functions of the form x1^m1 * x2^m2 * ...
- if int(sum(data[:,1])) != n:
- print("WARNING: The search algorithm assumes 1.0 in the first column of the data")
- powerLimit = 3
- numCombinations = (2 * powerLimit +1)**(k-1)
- mValues = np.full(k-1, -3)
- print('\n'"Results from the search for explanatory functions, if any:")
- for combinations in range(numCombinations):
- newX = np.ones((n, 2))
- myString = ""
- for j in range(k-1):
- # Record a string with the full explanatory function
- if mValues[j] != 0:
- if j != 0:
- myString += " "
- if mValues[j] < 0:
- myString += "/ "
- else:
- myString += "* "
- if mValues[j] == 1 or mValues[j] == -1:
- myString += ("x%i" % (j + 1))
- else:
- myString += ("x%i^%i" % (j + 1, abs(mValues[j])))
- # Calculate the value of this explanatory function for each observation
- for i in range(n):
- newX[i, 1] *= X[i, j+1]**mValues[j]
- # Compute the ordinary least squares estimate, i.e., the mean of the thetas
- XX = np.transpose(newX).dot(newX)
- if np.linalg.cond(XX) < 1 / np.sys.float_info.epsilon:
- XXinv = np.linalg.inv(XX)
- XXinvX = XXinv.dot(np.transpose(newX))
- mean_theta = XXinvX.dot(y)
- # Mean and standard deviation of this explanatory function
- vectorSum = 0.0
- vectorSumSquared = 0.0
- for i in range(n):
- value = newX[i, 1]
- vectorSum += value
- vectorSumSquared += value * value
- meanExp = vectorSum / (float(n))
- varianceExp = 1.0 / (float(n) - 1.0) * (vectorSumSquared - float(n) * meanExp * meanExp)
- stdvExp = np.sqrt(varianceExp)
- # Proceed only if stdvExp is reasonable
- if (stdvExp / meanExp > 1e-8):
- # Mean and standard deviation of y
- vectorSum = 0.0
- vectorSumSquared = 0.0
- for i in range(n):
- value = y[i]
- vectorSum += value
- vectorSumSquared += value * value
- meanY = vectorSum / (float(n))
- varianceY = 1.0 / (float(n) - 1.0) * (vectorSumSquared - float(n) * meanY * meanY)
- stdvY = np.sqrt(varianceY)
- # For now, do a simple check of correlation between y and the explanatory function
- productSum = 0.0
- for i in range(n):
- productSum += newX[i, 1] * y[i]
- correlation = 1.0 / (float(n) - 1.0) * (productSum - n * meanExp * meanY) / (stdvExp * stdvY)
- if correlation > 0.95:
- print("Correlation = %.2f for explanatory function %.3g" % (correlation, mean_theta[1]), myString)
- # Increment m-values
- counter = 0
- for a in range(k-1):
- if mValues[counter] < powerLimit:
- mValues[counter] += 1
- break
- else:
- mValues[counter] = -powerLimit
- counter += 1
- if a == k-1:
- break
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement