Advertisement
Guest User

Untitled

a guest
Dec 15th, 2018
78
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.21 KB | None | 0 0
  1. import pandas as pd
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. import seaborn as sns
  5.  
  6. from sklearn.model_selection import train_test_split as tr_tst_splt
  7. from catboost import FeaturesData
  8. from catboost import CatBoostRegressor
  9. from catboost import Pool
  10. from catboost import CatBoost
  11.  
  12. df = pd.read_csv('https://github.com/NikitaKurkov/datasets/raw/master/Video_Games.csv')
  13. df = df.drop(columns = 'Name')
  14. df = df.drop(columns = 'Rating')
  15. df['Critic_Score'].fillna(df['Critic_Score'].median(), inplace = True)
  16. df['Critic_Count'].fillna(df['Critic_Count'].median(), inplace = True)
  17. df['User_Score'].fillna(df['User_Score'].median(), inplace = True)
  18. df['User_Count'].fillna(df['User_Count'].median(), inplace = True)
  19. df['Developer'].fillna('None', inplace = True)
  20. df['Year_of_Release'].clip(upper = 2015.0, inplace = True)
  21. df['Target'] = df['JP_Sales']
  22. df = df.drop(columns = 'JP_Sales')
  23. group = df.groupby('Target')
  24. train, test = tr_tst_splt(df, test_size = 0.3)
  25.  
  26. cols_cat = test.select_dtypes(include = 'object').columns.values.tolist()
  27. cols_num = test.select_dtypes(include = 'number').columns.values.tolist()
  28.  
  29. label_train = np.array(train['Target'], dtype = np.float32)
  30. categorical_train = np.array(train[cols_cat])
  31. numeric_train = np.array(train[cols_num], dtype = np.float32)
  32.  
  33. label_test = np.array(test['Target'], dtype = np.float32)
  34. categorical_test = np.array(test[cols_cat])
  35. numeric_test = np.array(test[cols_num], dtype = np.float32)
  36.  
  37. train_data = Pool(FeaturesData(num_feature_data = numeric_train,
  38. cat_feature_data = categorical_train,
  39. num_feature_names = cols_num,
  40. cat_feature_names = cols_cat),
  41. label = label_train)
  42.  
  43. test_data = Pool(FeaturesData(num_feature_data = numeric_test,
  44. cat_feature_data = categorical_test,
  45. num_feature_names = cols_num,
  46. cat_feature_names = cols_cat),
  47. label = label_test)
  48.  
  49. model = CatBoostRegressor(iterations = 100, depth = 10, loss_function = 'LogLoss',
  50. logging_level = 'Verbose', eval_metric = 'RMSE')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement