Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import pandas as pd
- import matplotlib.pyplot as plt
- import datetime
- import seaborn as sns
- #PLOTLY
- import plotly
- import plotly.plotly as py
- import plotly.offline as offline
- import plotly.graph_objs as go
- from plotly.offline import download_plotlyjs, init_notebook_mode,plot,iplot
- import cufflinks as cf
- cf.set_config_file(offline=True)
- from plotly.graph_objs import Scatter, Figure, Layout
- data = pd.read_csv('/Users/rustamislamnurov/Desktop/train.csv', nrows=1101)
- print(data.head())
- print(data.isnull().sum())
- #data = data.dropna()
- #sns.distplot(data['fare'])
- #plt.title('asdda')
- from sklearn.model_selection import train_test_split
- from sklearn import metrics
- from sklearn.metrics import mean_squared_error
- from sklearn.model_selection import train_test_split, cross_val_score
- from sklearn.linear_model import LinearRegression
- import xgboost as xgb
- ml = data[1000:]
- y = ml['fare']
- X = ml.drop(['fare','date','time'],axis = 1)
- X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=43, test_size = 0.3)
- lr = LinearRegression()
- lr.fit(X_train, y_train)
- print(lr.score(X_train,y_train))
- print(lr.score(X_test,y_test))
- y_pred = lr.predict(X_test)
- lrmse = np.sqrt(metrics.mean_squared_error(y_pred, y_test))
- print(lrmse)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement