SHARE
TWEET

Untitled

a guest Jul 23rd, 2019 56 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import pandas as pd
  2. from sklearn.tree import DecisionTreeClassifier
  3. from sklearn.model_selection import train_test_split
  4. from sklearn.pipeline import Pipeline
  5. from sklearn.impute import SimpleImputer
  6. from category_encoders import OneHotEncoder
  7.  
  8. # lendo o dataset
  9. df = pd.read_csv("train.csv")
  10.  
  11. # retirando colunas com nome, ingresso e cabine dos conjuntos
  12. df.drop(["Name", "Ticket", "Cabin"], axis=1, inplace=True)
  13.  
  14. # dividindo em conjunto de treino e test
  15. X_train, X_test, y_train, y_test = train_test_split(df.drop(['Survived'], axis=1),
  16.                                                     df['Survived'],
  17.                                                     test_size=0.2,
  18.                                                     random_state=42)
  19.  
  20. # criando o modelo usando pipeline
  21. model = Pipeline(steps=[
  22.     ('one-hot encoder', OneHotEncoder()),
  23.     ('imputer', SimpleImputer(strategy='mean')),
  24.     ('tree', DecisionTreeClassifier(max_depth=3, random_state=0))
  25. ])
  26.  
  27. # treinando o modelo
  28. model.fit(X_train, y_train)
  29. train_score = model.score(X_train, y_train)
  30.  
  31. # avaliando o modelo
  32. test_score = model.score(X_test, y_test)
  33.  
  34. print("Train score: {}".format(train_score))
  35. print("Test score: {}".format(test_score))
  36. # OUTPUT
  37. # Train score: 0.8342696629213483
  38. # Test score: 0.7988826815642458
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top