Untitled

import matplotlib.pyplot as plt
import numpy as np
from sklearn import linear_model
from sklearn.linear_model import Ridge
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline

# 1) Generate training data
TOTAL_SAMPLES = 1 # faking data collected (age, distange_to_feet) for TOTAL_SAMPLES people

train_X = []
train_Y = []

for _ in range(TOTAL_SAMPLES):
    # noise to add to the data
    mu, sigma = 0, 0.3 # mean and standard deviation
    noise_x = np.random.normal(mu, sigma, 100)
    noise_y = np.random.normal(mu, sigma*1.5, 100)
    train_X += [i + 1 + noise_x[i] for i in range(1,100,2)]
    train_Y += [abs(np.log(i+1)*2 + noise_y[i]) for i in range(1,100,2)]

# Plotting the training data and the ground truth curve
fig, ax = plt.subplots()
ax.set_title('Distance to reach the feet depeding on age')
ax.set_ylabel('Distance to feet (cm)')
ax.set_xlabel('Age')
fig.set_size_inches(20,10)
plt.scatter(train_X, train_Y, color='g', label='"Real" data points')
plt.plot(range(100), [np.log(i+1)*2 for i in range(100)], label='Ideal curve')
plt.legend(loc='lower right')

# 2) Create linear regression object and train it
regr = linear_model.LinearRegression()
train_X = np.array(train_X).reshape(-1, 1) # reshaping the data because we have only 1 dimension in the features
regr.fit(train_X, train_Y)

# 3) Predict a few values with the new model
X_test = [[0], [10], [20], [30], [40], [50], [60], [70], [80], [100]]
Y_pred = regr.predict(X_test)
print(Y_pred)
# >> [ 4.5010285   5.06034505  5.61966161  6.17897817  6.73829473  7.29761128 7.85692784  8.4162444   8.97556095 10.09419407]

# Plot outputs
fig, ax = plt.subplots()
ax.set_title('Distance to reach the feet depeding on age')
ax.set_ylabel('Distance to feet (cm)')
ax.set_xlabel('Age')
fig.set_size_inches(20,10)
plt.scatter(train_X, train_Y, color='g', label='"Real" data points')
plt.plot(X_test, Y_pred, color='blue', linewidth=5, label="Regression model")
plt.legend(loc='lower right')

# 4) Polynomial regression, plot models with degree 3 and 11
colors = ['orange', 'red']
legends = ['Polynomial interpolation (degree=3)', 'Polynomial interpolation (degree=11)']
lw = 2

x_plot = np.linspace(0, 100, 10000).reshape(-1, 1)
for count, degree in enumerate([3, 11]):
    fig, ax = plt.subplots()
    fig.set_size_inches(20,10)
    ax.set_title('Distance to reach the feet depeding on age')
    ax.set_ylabel('Distance to feet (cm)')
    model = make_pipeline(PolynomialFeatures(degree), Ridge())
    model.fit(train_X, train_Y)
    y_plot = model.predict(x_plot)
    plt.scatter(train_X, train_Y, color='g', label='"Real" data points')
    plt.plot(x_plot, y_plot, color=colors[count], linewidth=lw, label=legends[count])
    plt.legend(loc='lower right')