Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import numpy as np
- import matplotlib.pyplot as plt
- import seaborn as sns
- from sklearn.model_selection import train_test_split as tr_tst_splt
- from catboost import FeaturesData
- from catboost import CatBoostRegressor
- from catboost import Pool
- from catboost import CatBoost
- df = pd.read_csv('https://github.com/NikitaKurkov/datasets/raw/master/Video_Games.csv')
- df = df.drop(columns = 'Name')
- df = df.drop(columns = 'Rating')
- df['Critic_Score'].fillna(df['Critic_Score'].median(), inplace = True)
- df['Critic_Count'].fillna(df['Critic_Count'].median(), inplace = True)
- df['User_Score'].fillna(df['User_Score'].median(), inplace = True)
- df['User_Count'].fillna(df['User_Count'].median(), inplace = True)
- df['Developer'].fillna('None', inplace = True)
- df['Year_of_Release'].clip(upper = 2015.0, inplace = True)
- df['Target'] = df['JP_Sales']
- df = df.drop(columns = 'JP_Sales')
- group = df.groupby('Target')
- train, test = tr_tst_splt(df, test_size = 0.3)
- cols_cat = test.select_dtypes(include = 'object').columns.values.tolist()
- cols_num = test.select_dtypes(include = 'number').columns.values.tolist()
- label_train = np.array(train['Target'], dtype = np.float32)
- categorical_train = np.array(train[cols_cat])
- numeric_train = np.array(train[cols_num], dtype = np.float32)
- label_test = np.array(test['Target'], dtype = np.float32)
- categorical_test = np.array(test[cols_cat])
- numeric_test = np.array(test[cols_num], dtype = np.float32)
- train_data = Pool(FeaturesData(num_feature_data = numeric_train,
- cat_feature_data = categorical_train,
- num_feature_names = cols_num,
- cat_feature_names = cols_cat),
- label = label_train)
- test_data = Pool(FeaturesData(num_feature_data = numeric_test,
- cat_feature_data = categorical_test,
- num_feature_names = cols_num,
- cat_feature_names = cols_cat),
- label = label_test)
- model = CatBoostRegressor(iterations = 100, depth = 10, loss_function = 'LogLoss',
- logging_level = 'Verbose', eval_metric = 'RMSE')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement