Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pickle
- import os
- from sklearn.model_selection import train_test_split
- from sklearn.dummy import DummyRegressor
- import seaborn as sns
- # Set a random seed so that we can see the difference between classifiers
- random_seed = 1
- # Load data from the internet, or from cache if available on disk
- if not os.path.isfile("regression_data.pkl"):
- df_mpg = sns.load_dataset('mpg')
- df_mpg = df_mpg.dropna()
- df_mpg = df_mpg.drop('origin', axis=1)
- df_mpg = df_mpg.drop('name', axis=1)
- X = df_mpg.drop('mpg', axis=1)
- y = df_mpg['mpg']
- pickle.dump((X, y), open("regression_data.pkl", "wb"))
- else:
- print("Loading data...")
- X, y = pickle.load(open("regression_data.pkl", "rb"))
- # Separate train/test data from original data
- print("Splitting test and train data...")
- X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, test_size=0.2, random_state=random_seed)
- # Create your classifier
- classifier = DummyRegressor()
- # Fit the classifier
- print("Fitting the classifier...")
- classifier.fit(X_train, y_train)
- print("Training set score: %f" % classifier.score(X_train, y_train))
- print("Test set score: %f" % classifier.score(X_test, y_test))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement