Advertisement
Guest User

Untitled

a guest
Sep 21st, 2017
119
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.02 KB | None | 0 0
  1. import numpy as np
  2. import pandas as pd
  3. from matplotlib import pyplot as plt
  4.  
  5. from sklearn.gaussian_process import GaussianProcessRegressor
  6. from sklearn.gaussian_process.kernels \
  7. import RBF, WhiteKernel, RationalQuadratic, ExpSineSquared
  8. from sklearn.datasets import fetch_mldata
  9.  
  10. merdgedData = pd.read_csv('../data/exportovanDataSet.csv')
  11.  
  12. data = merdgedData[merdgedData['zelja']== 1]
  13. data = data.drop_duplicates(subset='IDUcenik',keep='first')
  14. from sklearn.preprocessing import LabelEncoder, Imputer
  15.  
  16. lb_make = LabelEncoder()
  17.  
  18. data['sifraprofila'] = data['sifraprofila'].factorize()[0]
  19. data["sifraprofila"] = lb_make.fit_transform(data["sifraprofila"])
  20.  
  21. data['skolaprofil'] = data['skolaprofil'].factorize()[0]
  22. data["skolaprofil"] = lb_make.fit_transform(data["skolaprofil"])
  23.  
  24. data['NazivProfila'] = data['NazivProfila'].factorize()[0]
  25. data["NazivProfila"] = lb_make.fit_transform(data["NazivProfila"])
  26.  
  27. # print(data)
  28.  
  29. dataFinal = data.apply(lambda x:x.fillna(x.value_counts().index[0]))
  30.  
  31. X = dataFinal
  32. y =dataFinal['bodova']
  33.  
  34. del X['bodova']
  35.  
  36. print(X)
  37.  
  38. # Kernel with parameters given in GPML book
  39. k1 = 66.0**2 * RBF(length_scale=67.0) # long term smooth rising trend
  40. k2 = 2.4**2 * RBF(length_scale=90.0) \
  41. * ExpSineSquared(length_scale=1.3, periodicity=1.0) # seasonal component
  42. # medium term irregularity
  43. k3 = 0.66**2 \
  44. * RationalQuadratic(length_scale=1.2, alpha=0.78)
  45. k4 = 0.18**2 * RBF(length_scale=0.134) \
  46. + WhiteKernel(noise_level=0.19**2) # noise terms
  47. kernel_gpml = k1 + k2 + k3 + k4
  48.  
  49. gp = GaussianProcessRegressor(kernel=kernel_gpml, alpha=0,
  50. optimizer=None, normalize_y=True)
  51. gp.fit(X, y)
  52.  
  53. print("GPML kernel: %s" % gp.kernel_)
  54. print("Log-marginal-likelihood: %.3f"
  55. % gp.log_marginal_likelihood(gp.kernel_.theta))
  56.  
  57. # Kernel with optimized parameters
  58. k1 = 50.0**2 * RBF(length_scale=50.0) # long term smooth rising trend
  59. k2 = 2.0**2 * RBF(length_scale=100.0) \
  60. * ExpSineSquared(length_scale=1.0, periodicity=1.0,
  61. periodicity_bounds="fixed") # seasonal component
  62. # medium term irregularities
  63. k3 = 0.5**2 * RationalQuadratic(length_scale=1.0, alpha=1.0)
  64. k4 = 0.1**2 * RBF(length_scale=0.1) \
  65. + WhiteKernel(noise_level=0.1**2,
  66. noise_level_bounds=(1e-3, np.inf)) # noise terms
  67. kernel = k1 + k2 + k3 + k4
  68.  
  69. gp = GaussianProcessRegressor(kernel=kernel, alpha=0,
  70. normalize_y=True)
  71. gp.fit(X, y)
  72.  
  73. print("\nLearned kernel: %s" % gp.kernel_)
  74. print("Log-marginal-likelihood: %.3f"
  75. % gp.log_marginal_likelihood(gp.kernel_.theta))
  76.  
  77. X_ = np.linspace(X.min(), X.max() + 30, 1000)[:, np.newaxis]
  78. y_pred, y_std = gp.predict(X_, return_std=True)
  79.  
  80. # Illustration
  81. plt.scatter(X, y, c='k')
  82. plt.plot(X_, y_pred)
  83. plt.fill_between(X_[:, 0], y_pred - y_std, y_pred + y_std,
  84. alpha=0.5, color='k')
  85. plt.xlim(X_.min(), X_.max())
  86. plt.xlabel("Year")
  87. plt.ylabel(r"CO$_2$ in ppm")
  88. plt.title(r"Atmospheric CO$_2$ concentration at Mauna Loa")
  89. plt.tight_layout()
  90. plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement