Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import pandas as pd
- from matplotlib import pyplot as plt
- from sklearn.gaussian_process import GaussianProcessRegressor
- from sklearn.gaussian_process.kernels \
- import RBF, WhiteKernel, RationalQuadratic, ExpSineSquared
- from sklearn.datasets import fetch_mldata
- merdgedData = pd.read_csv('../data/exportovanDataSet.csv')
- data = merdgedData[merdgedData['zelja']== 1]
- data = data.drop_duplicates(subset='IDUcenik',keep='first')
- from sklearn.preprocessing import LabelEncoder, Imputer
- lb_make = LabelEncoder()
- data['sifraprofila'] = data['sifraprofila'].factorize()[0]
- data["sifraprofila"] = lb_make.fit_transform(data["sifraprofila"])
- data['skolaprofil'] = data['skolaprofil'].factorize()[0]
- data["skolaprofil"] = lb_make.fit_transform(data["skolaprofil"])
- data['NazivProfila'] = data['NazivProfila'].factorize()[0]
- data["NazivProfila"] = lb_make.fit_transform(data["NazivProfila"])
- # print(data)
- dataFinal = data.apply(lambda x:x.fillna(x.value_counts().index[0]))
- X = dataFinal
- y =dataFinal['bodova']
- del X['bodova']
- print(X)
- # Kernel with parameters given in GPML book
- k1 = 66.0**2 * RBF(length_scale=67.0) # long term smooth rising trend
- k2 = 2.4**2 * RBF(length_scale=90.0) \
- * ExpSineSquared(length_scale=1.3, periodicity=1.0) # seasonal component
- # medium term irregularity
- k3 = 0.66**2 \
- * RationalQuadratic(length_scale=1.2, alpha=0.78)
- k4 = 0.18**2 * RBF(length_scale=0.134) \
- + WhiteKernel(noise_level=0.19**2) # noise terms
- kernel_gpml = k1 + k2 + k3 + k4
- gp = GaussianProcessRegressor(kernel=kernel_gpml, alpha=0,
- optimizer=None, normalize_y=True)
- gp.fit(X, y)
- print("GPML kernel: %s" % gp.kernel_)
- print("Log-marginal-likelihood: %.3f"
- % gp.log_marginal_likelihood(gp.kernel_.theta))
- # Kernel with optimized parameters
- k1 = 50.0**2 * RBF(length_scale=50.0) # long term smooth rising trend
- k2 = 2.0**2 * RBF(length_scale=100.0) \
- * ExpSineSquared(length_scale=1.0, periodicity=1.0,
- periodicity_bounds="fixed") # seasonal component
- # medium term irregularities
- k3 = 0.5**2 * RationalQuadratic(length_scale=1.0, alpha=1.0)
- k4 = 0.1**2 * RBF(length_scale=0.1) \
- + WhiteKernel(noise_level=0.1**2,
- noise_level_bounds=(1e-3, np.inf)) # noise terms
- kernel = k1 + k2 + k3 + k4
- gp = GaussianProcessRegressor(kernel=kernel, alpha=0,
- normalize_y=True)
- gp.fit(X, y)
- print("\nLearned kernel: %s" % gp.kernel_)
- print("Log-marginal-likelihood: %.3f"
- % gp.log_marginal_likelihood(gp.kernel_.theta))
- X_ = np.linspace(X.min(), X.max() + 30, 1000)[:, np.newaxis]
- y_pred, y_std = gp.predict(X_, return_std=True)
- # Illustration
- plt.scatter(X, y, c='k')
- plt.plot(X_, y_pred)
- plt.fill_between(X_[:, 0], y_pred - y_std, y_pred + y_std,
- alpha=0.5, color='k')
- plt.xlim(X_.min(), X_.max())
- plt.xlabel("Year")
- plt.ylabel(r"CO$_2$ in ppm")
- plt.title(r"Atmospheric CO$_2$ concentration at Mauna Loa")
- plt.tight_layout()
- plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement