Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- from sklearn.tree import DecisionTreeClassifier
- from sklearn.model_selection import train_test_split
- from sklearn.pipeline import Pipeline
- from sklearn.impute import SimpleImputer
- from category_encoders import OneHotEncoder
- # lendo o dataset
- df = pd.read_csv("train.csv")
- # retirando colunas com nome, ingresso e cabine dos conjuntos
- df.drop(["Name", "Ticket", "Cabin"], axis=1, inplace=True)
- # dividindo em conjunto de treino e test
- X_train, X_test, y_train, y_test = train_test_split(df.drop(['Survived'], axis=1),
- df['Survived'],
- test_size=0.2,
- random_state=42)
- # criando o modelo usando pipeline
- model = Pipeline(steps=[
- ('one-hot encoder', OneHotEncoder()),
- ('imputer', SimpleImputer(strategy='mean')),
- ('tree', DecisionTreeClassifier(max_depth=3, random_state=0))
- ])
- # treinando o modelo
- model.fit(X_train, y_train)
- train_score = model.score(X_train, y_train)
- # avaliando o modelo
- test_score = model.score(X_test, y_test)
- print("Train score: {}".format(train_score))
- print("Test score: {}".format(test_score))
- # OUTPUT
- # Train score: 0.8342696629213483
- # Test score: 0.7988826815642458
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement