Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import pandas as pd
- import os
- def load_data(base_dir):
- print(f"base_dir = {base_dir}")
- print(os.listdir(f"{base_dir}"))
- x_train = None
- for i in range(1, 5):
- filename = f"{base_dir}/x_train_{i}.npz"
- with np.load(filename) as data:
- print(f"files in {filename}: {data.files}")
- temp_data = data[data.files[0]]
- if x_train is None:
- x_train = temp_data
- else:
- x_train = np.concatenate((x_train, temp_data))
- with np.load(f'{base_dir}/y_train.npz') as data:
- print(f"files in {base_dir}/y_train.npz: {data.files}")
- y_train = data[data.files[0]]
- with np.load(f'{base_dir}/x_test.npz') as data:
- print(f"files in {base_dir}/x_test.npz: {data.files}")
- x_test = data[data.files[0]]
- return x_train, y_train, x_test
- def save_submission(submission, name="prediction.csv"):
- result = pd.DataFrame(submission)
- result = result.rename({0: "Label", }, axis=1)
- result.index.name = "Id"
- result.index += 1
- result.to_csv(name)
- x, y, t = load_data("../input")
- import catboost as cb
- CBR = cb.CatBoostRegressor(iterations=10000,
- learning_rate=0.1,
- depth=8,
- l2_leaf_reg=0.1,
- model_size_reg=None,
- rsm=None,
- loss_function='RMSE',
- border_count=None,
- feature_border_type=None,
- fold_permutation_block_size=None,
- od_pval=None,
- od_wait=None,
- od_type=None,
- nan_mode=None,
- counter_calc_method=None,
- leaf_estimation_iterations=None,
- leaf_estimation_method=None,
- thread_count=2,
- random_seed=None,
- use_best_model=None,
- verbose=None,
- logging_level=None,
- metric_period=None,
- ctr_leaf_count_limit=None,
- store_all_simple_ctr=None,
- max_ctr_complexity=None,
- has_time=None,
- allow_const_label=None,
- one_hot_max_size=None,
- random_strength=None,
- name=None,
- ignored_features=None,
- train_dir=None,
- custom_metric=None,
- eval_metric=None,
- bagging_temperature=None,
- save_snapshot=None,
- snapshot_file=None,
- snapshot_interval=None,
- fold_len_multiplier=None,
- used_ram_limit=None,
- gpu_ram_part=None,
- allow_writing_files=None,
- final_ctr_computation_mode=None,
- approx_on_full_history=None,
- boosting_type=None,
- simple_ctr=None,
- combinations_ctr=None,
- per_feature_ctr=None,
- task_type="GPU",
- device_config=None,
- devices=None,
- bootstrap_type=None,
- subsample=None,
- max_depth=None,
- n_estimators=None,
- num_boost_round=None,
- num_trees=None,
- colsample_bylevel=None,
- random_state=42,
- reg_lambda=None,
- objective=None,
- eta=None,
- max_bin=None,
- gpu_cat_features_storage=None,
- data_partition=None,
- metadata=None,
- early_stopping_rounds=None,
- cat_features=None)
- CBR.fit(x, y * 5)
- submission = CBR.predict(t).astype(int)
- submission[submission > 20] = 20
- submission[submission < 0] = 0
- save_submission(submission)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement