Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Linear Regression
- import pandas
- df = pandas.read_csv('london_merged.csv')
- print(df)
- print(df.shape)
- print(df.describe())
- subset = df[['hum', 'wind_speed', 'cnt']]
- array = subset.values
- X = array[:, 0:2] # means all rows from columns 0...1
- y = array[:, 2] # 9th is counted here
- from sklearn import model_selection
- X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, y, test_size=0.30, random_state=42)
- # Linear regression
- from sklearn.linear_model import LinearRegression
- from sklearn.neighbors import KNeighborsRegressor
- from sklearn.tree import DecisionTreeRegressor
- from sklearn.svm import SVR
- model = LinearRegression()
- model.fit(X_train, Y_train)
- print('Learning completed!')
- # ask the model to predict X_test
- predictions = model.predict(X_test)
- print(predictions)
- # check accuracy/performance
- from sklearn.metrics import r2_score
- # r squared shows the percentage
- print('R squared: ', r2_score(Y_test, predictions))
- from sklearn.metrics import mean_squared_error
- print('Mean squared error ', mean_squared_error(Y_test, predictions))
- # above its squared, so we find square root
- new = [[94.0, 8.0]]
- observation = model.predict(new)
- print('You will share ', observation, 'bikes')
- # plot linear regression
- import matplotlib.pyplot as plt
- plt.style.use('seaborn')
- fig, ax = plt.subplots()
- ax.scatter(Y_test, predictions)
- ax.plot(Y_test, Y_test)
- ax.set_title('Predictions vs Y_test')
- ax.set_xlabel('Y test')
- ax.set_ylabel('Predictions')
- plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement