Advertisement
Guest User

Untitled

a guest
Jul 23rd, 2019
87
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.26 KB | None | 0 0
  1. import pandas as pd
  2. from sklearn.tree import DecisionTreeClassifier
  3. from sklearn.model_selection import train_test_split
  4. from sklearn.pipeline import Pipeline
  5. from sklearn.impute import SimpleImputer
  6. from category_encoders import OneHotEncoder
  7.  
  8. # lendo o dataset
  9. df = pd.read_csv("train.csv")
  10.  
  11. # retirando colunas com nome, ingresso e cabine dos conjuntos
  12. df.drop(["Name", "Ticket", "Cabin"], axis=1, inplace=True)
  13.  
  14. # dividindo em conjunto de treino e test
  15. X_train, X_test, y_train, y_test = train_test_split(df.drop(['Survived'], axis=1),
  16. df['Survived'],
  17. test_size=0.2,
  18. random_state=42)
  19.  
  20. # criando o modelo usando pipeline
  21. model = Pipeline(steps=[
  22. ('one-hot encoder', OneHotEncoder()),
  23. ('imputer', SimpleImputer(strategy='mean')),
  24. ('tree', DecisionTreeClassifier(max_depth=3, random_state=0))
  25. ])
  26.  
  27. # treinando o modelo
  28. model.fit(X_train, y_train)
  29. train_score = model.score(X_train, y_train)
  30.  
  31. # avaliando o modelo
  32. test_score = model.score(X_test, y_test)
  33.  
  34. print("Train score: {}".format(train_score))
  35. print("Test score: {}".format(test_score))
  36. # OUTPUT
  37. # Train score: 0.8342696629213483
  38. # Test score: 0.7988826815642458
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement