Advertisement
Guest User

Untitled

a guest
Oct 17th, 2019
100
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 8.86 KB | None | 0 0
  1. # ------------------------------------------------------------------------
  2. # The following Python code is implemented by Professor Terje Haukaas at
  3. # the University of British Columbia in Vancouver, Canada. It is made
  4. # freely available online at terje.civil.ubc.ca together with notes,
  5. # examples, and additional Python code. Please be cautious when using
  6. # this code; it may contain bugs and comes without warranty of any kind.
  7. #
  8. # In fact, this is a particularly quick-and-dirty implementation of
  9. # Ordinary and Bayesian Linear Regression. One part of this code is a
  10. # search for good explanatory functions. That isn't always meaningful
  11. # but it provides an insight for the specific data given below, which
  12. # was actually generated with a deterministic model with four input-
  13. # parameters. If you know structural analysis, can you guess which
  14. # model generated the data?
  15. # ------------------------------------------------------------------------
  16.  
  17.  
  18. # Import useful libraries
  19. import numpy as np
  20. import matplotlib.pyplot as plt
  21.  
  22.  
  23. # Input data (first column=response, second=intercept, the rest are regressors)
  24. data = np.array([(6.8, 1.0, 1000.0, 2000.0, 9500.0, 41096604.2),
  25. (22.1, 1.0, 1572.0, 2532.0, 11573.0, 33260953.5),
  26. (59.8, 1.0, 1288.0, 2801.0, 13647.0, 11565502.7),
  27. (5.9, 1.0, 1543.0, 1370.0, 14614.0, 15284895.2),
  28. (20.5, 1.0, 1572.0, 1113.0, 9872.0, 3562069.3),
  29. (30.1, 1.0, 877.0, 2323.0, 12614.0, 9653979.2),
  30. (176.1, 1.0, 1345.0, 2980.0, 12950.0, 5202934.7),
  31. (11.0, 1.0, 1233.0, 2301.0, 15342.0, 29747448.2),
  32. (26.4, 1.0, 1675.0, 1503.0, 10812.0, 6640981.3),
  33. (21.2, 1.0, 1777.0, 2117.0, 12609.0, 21041461.3),
  34. (4.7, 1.0, 843.0, 1448.0, 8606.0, 21041461.3),
  35. (0.9, 1.0, 1909.0, 736.0, 9514.0, 31034422.7),
  36. (35.9, 1.0, 1971.0, 1301.0, 13521.0, 2980441.3),
  37. (0.6, 1.0, 1122.0, 759.0, 14311.0, 17859214.7),
  38. (0.8, 1.0, 1833.0, 854.0, 11561.0, 40574196.0),
  39. (24.0, 1.0, 1943.0, 2382.0, 9712.0, 37532448.0),
  40. (39.3, 1.0, 1225.0, 1588.0, 11686.0, 3562069.3),
  41. (21.8, 1.0, 1332.0, 1866.0, 15565.0, 8504460.2),
  42. (40.0, 1.0, 1354.0, 1792.0, 15418.0, 4214833.3),
  43. (63.3, 1.0, 548.0, 2988.0, 8489.0, 9067078.7),
  44. (4.1, 1.0, 1257.0, 790.0, 12152.0, 4100925.2),
  45. (9.6, 1.0, 1280.0, 2098.0, 12282.0, 33260953.5),
  46. (48.2, 1.0, 1456.0, 2464.0, 13828.0, 10902678.2),
  47. (0.5, 1.0, 1178.0, 797.0, 15198.0, 25333333.3),
  48. (9.8, 1.0, 940.0, 2154.0, 9893.0, 32357991.2),
  49. (66.0, 1.0, 1342.0, 2703.0, 12034.0, 11120725.3),
  50. (41.7, 1.0, 1125.0, 2976.0, 10736.0, 22064924.8),
  51. (8.0, 1.0, 738.0, 2139.0, 15346.0, 19726762.7),
  52. (5.9, 1.0, 1982.0, 1148.0, 13479.0, 12490321.3),
  53. (1.8, 1.0, 1882.0, 955.0, 8676.0, 34180559.8),
  54. (5.6, 1.0, 1533.0, 1055.0, 10570.0, 10058989.5),
  55. (13.0, 1.0, 1331.0, 2302.0, 11716.0, 35591509.3),
  56. (55.5, 1.0, 712.0, 2693.0, 8302.0, 10058989.5),
  57. (29.7, 1.0, 1002.0, 2848.0, 8850.0, 29326500.0),
  58. (32.4, 1.0, 1787.0, 2024.0, 8113.0, 18777513.2),
  59. (9.1, 1.0, 1303.0, 1932.0, 8888.0, 38528833.3),
  60. (18.6, 1.0, 690.0, 1686.0, 9824.0, 6037642.7),
  61. (0.8, 1.0, 1339.0, 922.0, 13510.0, 31034422.7),
  62. (4.3, 1.0, 1084.0, 1905.0, 15503.0, 37532448.0),
  63. (100.9, 1.0, 1274.0, 2290.0, 8373.0, 6037642.7)])
  64.  
  65.  
  66. # Extract the y-vector and the X-matrix
  67. rows = data.shape[0]
  68. columns = len(data[0])
  69. y = data[:,0]
  70. X = data[:,1:columns]
  71.  
  72.  
  73. # Determine number of regressors (k) and observations (n)
  74. n = rows
  75. k = columns-1
  76. nu = n-k
  77.  
  78.  
  79. # Print info and give warning if there is little data
  80. print('\n'"There are", n, "observations and", k, "regressors")
  81.  
  82.  
  83. # Compute the ordinary least squares estimate, i.e., the mean of the thetas
  84. XX = np.transpose(X).dot(X)
  85. XXinv = np.linalg.inv(XX)
  86. XXinvX = XXinv.dot(np.transpose(X))
  87. mean_theta = XXinvX.dot(y)
  88.  
  89.  
  90. # Plot predictions vs. observations
  91. plt.ion()
  92. predictions = []
  93. for i in range(n):
  94. model = 0
  95. for j in range(k):
  96. model += mean_theta[j] * X[i, j]
  97.  
  98. predictions.append(model)
  99.  
  100. plt.plot(y, predictions, 'ko')
  101. plt.xlabel("Observations")
  102. plt.ylabel("Predictions")
  103. plt.show()
  104. print('\n'"Pausing a few seconds before closing the plot...")
  105. plt.pause(2)
  106.  
  107.  
  108. # Compute X*theta and the ordinary least squares error variance
  109. Xtheta = X.dot(mean_theta)
  110. y_minus_Xtheta = y - Xtheta
  111. dot_product_y_minus_Xtheta = (np.transpose(y_minus_Xtheta)).dot(y_minus_Xtheta)
  112. s_squared = dot_product_y_minus_Xtheta/nu
  113.  
  114.  
  115. # Compute the covariance matrix for the model parameters
  116. covarianceMatrix = s_squared * XXinv
  117.  
  118.  
  119. # Collect the standard deviations from the covariance matrix
  120. stdv_theta = []
  121. for i in range(k):
  122. stdv_theta.append(np.sqrt(covarianceMatrix[i, i]))
  123.  
  124.  
  125. # Collect the coefficient of variation in a vector (in percent!)
  126. cov_theta = []
  127. for i in range(k):
  128. cov_theta.append(abs(100.0 * stdv_theta[i] / mean_theta[i]))
  129.  
  130.  
  131. # Place 1/stdvs on the diagonal of the D^-1 matrix and extract the correlation matrix
  132. Dinv = np.eye(k)
  133. for i in range(k):
  134. Dinv[i,i] = 1.0/stdv_theta[i]
  135.  
  136. correlationMatrix = (Dinv.dot(covarianceMatrix)).dot(Dinv)
  137.  
  138.  
  139. # Compute statistics for the epsilon variable
  140. mean_sigma = np.sqrt(s_squared)
  141. variance_sigma = s_squared/(2.0*(nu-4.0))
  142. stdv_sigma = np.sqrt(variance_sigma)
  143. cov_sigma = abs(stdv_sigma/mean_sigma)
  144.  
  145.  
  146. # Print the results
  147. print('\n'"Mean of Sigma = %.2f C.o.v. of Sigma = %.2f" % (mean_sigma, 100.0*cov_sigma), "percent")
  148. print('\n'"Posterior statistics for the model parameters (thetas):\n")
  149. for i in range(k):
  150. print(" Mean(%i) = %9.3g Cov(%i) = %.2f" % (i+1, mean_theta[i], i+1, cov_theta[i]), "percent")
  151.  
  152.  
  153. # Loop over all possible explanatory functions of the form x1^m1 * x2^m2 * ...
  154. if int(sum(data[:,1])) != n:
  155. print("WARNING: The search algorithm assumes 1.0 in the first column of the data")
  156. powerLimit = 3
  157. numCombinations = (2 * powerLimit +1)**(k-1)
  158. mValues = np.full(k-1, -3)
  159. print('\n'"Results from the search for explanatory functions, if any:")
  160.  
  161. for combinations in range(numCombinations):
  162.  
  163. newX = np.ones((n, 2))
  164. myString = ""
  165.  
  166. for j in range(k-1):
  167.  
  168.  
  169. # Record a string with the full explanatory function
  170. if mValues[j] != 0:
  171.  
  172. if j != 0:
  173. myString += " "
  174.  
  175. if mValues[j] < 0:
  176. myString += "/ "
  177. else:
  178. myString += "* "
  179.  
  180. if mValues[j] == 1 or mValues[j] == -1:
  181. myString += ("x%i" % (j + 1))
  182. else:
  183. myString += ("x%i^%i" % (j + 1, abs(mValues[j])))
  184.  
  185.  
  186. # Calculate the value of this explanatory function for each observation
  187. for i in range(n):
  188.  
  189. newX[i, 1] *= X[i, j+1]**mValues[j]
  190.  
  191.  
  192. # Compute the ordinary least squares estimate, i.e., the mean of the thetas
  193. XX = np.transpose(newX).dot(newX)
  194.  
  195. if np.linalg.cond(XX) < 1 / np.sys.float_info.epsilon:
  196. XXinv = np.linalg.inv(XX)
  197. XXinvX = XXinv.dot(np.transpose(newX))
  198. mean_theta = XXinvX.dot(y)
  199.  
  200. # Mean and standard deviation of this explanatory function
  201. vectorSum = 0.0
  202. vectorSumSquared = 0.0
  203. for i in range(n):
  204. value = newX[i, 1]
  205. vectorSum += value
  206. vectorSumSquared += value * value
  207.  
  208. meanExp = vectorSum / (float(n))
  209. varianceExp = 1.0 / (float(n) - 1.0) * (vectorSumSquared - float(n) * meanExp * meanExp)
  210. stdvExp = np.sqrt(varianceExp)
  211.  
  212. # Proceed only if stdvExp is reasonable
  213. if (stdvExp / meanExp > 1e-8):
  214.  
  215. # Mean and standard deviation of y
  216. vectorSum = 0.0
  217. vectorSumSquared = 0.0
  218. for i in range(n):
  219. value = y[i]
  220. vectorSum += value
  221. vectorSumSquared += value * value
  222.  
  223. meanY = vectorSum / (float(n))
  224. varianceY = 1.0 / (float(n) - 1.0) * (vectorSumSquared - float(n) * meanY * meanY)
  225. stdvY = np.sqrt(varianceY)
  226.  
  227. # For now, do a simple check of correlation between y and the explanatory function
  228. productSum = 0.0
  229. for i in range(n):
  230. productSum += newX[i, 1] * y[i]
  231.  
  232. correlation = 1.0 / (float(n) - 1.0) * (productSum - n * meanExp * meanY) / (stdvExp * stdvY)
  233.  
  234. if correlation > 0.95:
  235. print("Correlation = %.2f for explanatory function %.3g" % (correlation, mean_theta[1]), myString)
  236.  
  237.  
  238. # Increment m-values
  239. counter = 0
  240. for a in range(k-1):
  241. if mValues[counter] < powerLimit:
  242. mValues[counter] += 1
  243. break
  244. else:
  245. mValues[counter] = -powerLimit
  246. counter += 1
  247. if a == k-1:
  248. break
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement