Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import matplotlib.pyplot as plt
- import numpy as np
- from sklearn import linear_model
- from sklearn.linear_model import Ridge
- from sklearn.preprocessing import PolynomialFeatures
- from sklearn.pipeline import make_pipeline
- # 1) Generate training data
- TOTAL_SAMPLES = 1 # faking data collected (age, distange_to_feet) for TOTAL_SAMPLES people
- train_X = []
- train_Y = []
- for _ in range(TOTAL_SAMPLES):
- # noise to add to the data
- mu, sigma = 0, 0.3 # mean and standard deviation
- noise_x = np.random.normal(mu, sigma, 100)
- noise_y = np.random.normal(mu, sigma*1.5, 100)
- train_X += [i + 1 + noise_x[i] for i in range(1,100,2)]
- train_Y += [abs(np.log(i+1)*2 + noise_y[i]) for i in range(1,100,2)]
- # Plotting the training data and the ground truth curve
- fig, ax = plt.subplots()
- ax.set_title('Distance to reach the feet depeding on age')
- ax.set_ylabel('Distance to feet (cm)')
- ax.set_xlabel('Age')
- fig.set_size_inches(20,10)
- plt.scatter(train_X, train_Y, color='g', label='"Real" data points')
- plt.plot(range(100), [np.log(i+1)*2 for i in range(100)], label='Ideal curve')
- plt.legend(loc='lower right')
- # 2) Create linear regression object and train it
- regr = linear_model.LinearRegression()
- train_X = np.array(train_X).reshape(-1, 1) # reshaping the data because we have only 1 dimension in the features
- regr.fit(train_X, train_Y)
- # 3) Predict a few values with the new model
- X_test = [[0], [10], [20], [30], [40], [50], [60], [70], [80], [100]]
- Y_pred = regr.predict(X_test)
- print(Y_pred)
- # >> [ 4.5010285 5.06034505 5.61966161 6.17897817 6.73829473 7.29761128 7.85692784 8.4162444 8.97556095 10.09419407]
- # Plot outputs
- fig, ax = plt.subplots()
- ax.set_title('Distance to reach the feet depeding on age')
- ax.set_ylabel('Distance to feet (cm)')
- ax.set_xlabel('Age')
- fig.set_size_inches(20,10)
- plt.scatter(train_X, train_Y, color='g', label='"Real" data points')
- plt.plot(X_test, Y_pred, color='blue', linewidth=5, label="Regression model")
- plt.legend(loc='lower right')
- # 4) Polynomial regression, plot models with degree 3 and 11
- colors = ['orange', 'red']
- legends = ['Polynomial interpolation (degree=3)', 'Polynomial interpolation (degree=11)']
- lw = 2
- x_plot = np.linspace(0, 100, 10000).reshape(-1, 1)
- for count, degree in enumerate([3, 11]):
- fig, ax = plt.subplots()
- fig.set_size_inches(20,10)
- ax.set_title('Distance to reach the feet depeding on age')
- ax.set_ylabel('Distance to feet (cm)')
- model = make_pipeline(PolynomialFeatures(degree), Ridge())
- model.fit(train_X, train_Y)
- y_plot = model.predict(x_plot)
- plt.scatter(train_X, train_Y, color='g', label='"Real" data points')
- plt.plot(x_plot, y_plot, color=colors[count], linewidth=lw, label=legends[count])
- plt.legend(loc='lower right')
Add Comment
Please, Sign In to add comment