Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Import `tensorflow`
- import tensorflow as tf
- # Initialize two constants
- x1 = tf.constant([1,2,3,4])
- x2 = tf.constant([5,6,7,8])
- # Multiply
- result = tf.multiply(x1, x2)
- # Initialize Session and run `result`
- with tf.Session() as sess:
- output = sess.run(result)
- print(output)
- # Import `tensorflow`
- import tensorflow as tf
- # Initialize two constants
- x1 = tf.constant([1,2,3,4])
- x2 = tf.constant([5,6,7,8])
- # Multiply
- result = tf.multiply(x1, x2)
- # Intialize the Session
- sess = tf.Session()
- # Print the result
- print(sess.run(result))
- # Close the session
- sess.close()
- # Setup the pipeline
- steps = [('scaler', StandardScaler()),
- ('SVM', SVC())]
- pipeline = Pipeline(steps)
- # Specify the hyperparameter space
- parameters = {'SVM__C':[1, 10, 100],
- 'SVM__gamma':[0.1, 0.01]}
- # Create train and test sets
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=21)
- # Instantiate the GridSearchCV object: cv
- cv = GridSearchCV(pipeline, parameters)
- # Fit to the training set
- cv.fit(X_train, y_train)
- # Predict the labels of the test set: y_pred
- y_pred = cv.predict(X_test)
- # Compute and print metrics
- print("Accuracy: {}".format(cv.score(X_test, y_test)))
- print(classification_report(y_test, y_pred))
- print("Tuned Model Parameters: {}".format(cv.best_params_))
- # Import necessary modules
- from sklearn.preprocessing import Imputer
- from sklearn.pipeline import Pipeline
- from sklearn.svm import SVC
- # Setup the pipeline steps: steps
- steps = [('imputation', Imputer(missing_values='NaN', strategy='most_frequent', axis=0)),
- ('SVM', SVC())]
- # Create the pipeline: pipeline
- pipeline = Pipeline(steps)
- # Create training and test sets
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
- # Fit the pipeline to the train set
- pipeline.fit(X_train, y_train)
- # Predict the labels of the test set
- y_pred = pipeline.predict(X_test)
- # Compute metrics
- print(classification_report(y_test, y_pred))
- # Import the Imputer module
- from sklearn.svm import SVC
- from sklearn.preprocessing import Imputer
- # Setup the Imputation transformer: imp
- imp = Imputer(missing_values='NaN', strategy='most_frequent', axis=0)
- # Instantiate the SVC classifier: clf
- clf = SVC()
- # Setup the pipeline with the required steps: steps
- steps = [('imputation', imp),
- ('SVM', clf)]
- # Import necessary modules
- from sklearn.model_selection import cross_val_score
- from sklearn.linear_model import Ridge
- # Instantiate a ridge regressor: ridge
- ridge = Ridge(alpha=0.5, normalize=True)
- # Perform 5-fold cross-validation: ridge_cv
- ridge_cv = cross_val_score(ridge, X, y, cv=5)
- # Print the cross-validated scores
- print(ridge_cv)
- # Create dummy variables: df_region
- df_region = pd.get_dummies(df)
- # Print the columns of df_region
- print(df_region.columns)
- # Create dummy variables with drop_first=True: df_region
- df_region = pd.get_dummies(df, drop_first=True)
- # Print the new columns of df_region
- print(df_region.columns)
- # Import pandas
- import pandas as pd
- # Read 'gapminder.csv' into a DataFrame: df
- df = pd.read_csv('gapminder.csv')
- # Create a boxplot of life expectancy per region
- df.boxplot('life', 'Region', rot=60)
- # Show the plot
- plt.show()
- # Import necessary modules
- from sklearn.model_selection import cross_val_score
- from sklearn.metrics import roc_auc_score
- # Compute predicted probabilities: y_pred_prob
- y_pred_prob = logreg.predict_proba(X_test)[:,1]
- # Compute and print AUC score
- print("AUC: {}".format(roc_auc_score(y_test, y_pred_prob)))
- # Compute cross-validated AUC scores: cv_auc
- cv_auc = cross_val_score(logreg, X, y, cv=5, scoring='roc_auc')
- # Print list of AUC scores
- print("AUC scores computed using 5-fold cross-validation: {}".format(cv_auc))
- # Import necessary modules
- from sklearn.metrics import roc_curve
- # Compute predicted probabilities: y_pred_prob
- y_pred_prob = logreg.predict_proba(X_test)[:,1]
- # Generate ROC curve values: fpr, tpr, thresholds
- fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob)
- # Plot ROC curve
- plt.plot([0, 1], [0, 1], 'k--')
- plt.plot(fpr, tpr)
- plt.xlabel('False Positive Rate')
- plt.ylabel('True Positive Rate')
- plt.title('ROC Curve')
- plt.show()
- # Setup arrays to store train and test accuracies
- neighbors = np.arange(1, 9)
- train_accuracy = np.empty(len(neighbors))
- test_accuracy = np.empty(len(neighbors))
- # Loop over different values of k
- for i, k in enumerate(neighbors):
- # Setup a k-NN Classifier with k neighbors: knn
- knn = KNeighborsClassifier(n_neighbors=k)
- # Fit the classifier to the training data
- knn.fit(X_train, y_train)
- #Compute accuracy on the training set
- train_accuracy[i] = knn.score(X_train, y_train)
- #Compute accuracy on the testing set
- test_accuracy[i] = knn.score(X_test, y_test)
- # Generate plot
- plt.title('k-NN: Varying Number of Neighbors')
- plt.plot(neighbors, test_accuracy, label = 'Testing Accuracy')
- plt.plot(neighbors, train_accuracy, label = 'Training Accuracy')
- plt.legend()
- plt.xlabel('Number of Neighbors')
- plt.ylabel('Accuracy')
- plt.show()
- # Import necessary modules
- from sklearn.model_selection import train_test_split
- from sklearn.metrics import mean_squared_error
- from sklearn.linear_model import LinearRegression
- # Create training and test sets
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=42)
- # Create the regressor: reg_all
- reg_all = LinearRegression()
- # Fit the regressor to the training data
- reg_all.fit(X_train, y_train)
- # Predict on the test data: y_pred
- y_pred = reg_all.predict(X_test)
- # Compute and print R^2 and RMSE
- print("R^2: {}".format(reg_all.score(X_test, y_test)))
- rmse = np.sqrt(mean_squared_error(y_test, y_pred))
- print("Root Mean Squared Error: {}".format(rmse))
- By default, scikit-learn's cross_val_score() function uses $R_2R_2$ as the metric of choice for regression. Since you are performing 5-fold cross-validation, the function will return 5 scores. Your job is to compute these 5 scores and then take their average.
- # Import the necessary modules
- from sklearn.linear_model import LinearRegression
- from sklearn.model_selection import cross_val_score
- # Create a linear regression object: reg
- reg = LinearRegression()
- # Compute 5-fold cross-validation scores: cv_scores
- cv_scores = cross_val_score(reg, X, y, cv=5)
- # Print the 5-fold cross-validation scores
- print(cv_scores)
- # Print the average 5-fold cross-validation score
- print("Average 5-Fold CV Score: {}".format(np.mean(cv_scores)))
- # Import necessary modules
- from sklearn.metrics import classification_report
- from sklearn.metrics import confusion_matrix
- # Create training and test set
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.4, random_state=42)
- # Instantiate a k-NN classifier: knn
- knn = KNeighborsClassifier(n_neighbors=6)
- # Fit the classifier to the training data
- knn.fit(X_train, y_train)
- # Predict the labels of the test data: y_pred
- y_pred = knn.predict(X_test)
- # Generate the confusion matrix and classification report
- print(confusion_matrix(y_test, y_pred))
- print(classification_report(y_test, y_pred))
Add Comment
Please, Sign In to add comment