Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- t = train.columns.difference(test.columns)
- c = train.columns.difference(t).difference(dates)
- X_test = test[c]
- X_train = train[train.fecha_dato.isin(['2016-05-28', '2016-04-28', '2016-03-28'])][c]
- y_train = train[train.fecha_dato.isin(['2016-05-28', '2016-04-28', '2016-03-28'])][t]
- clf = RandomForestClassifier(n_estimators=100, criterion='entropy', n_jobs=-1, random_state=5)
- import time
- t0 = time.time()
- clf.fit(X_train, y_train)
- time.time()-t0
- last_month_target = np.array(train[train.fecha_dato == '2016-05-28'][['ncodpers']+list(t)])
- last_products = {}
- for i in range(last_month_target.shape[0]):
- row = last_month_target[i]
- cust_id = row[0]
- used_products = set(np.where(row[1:] == 1)[0])
- last_products[cust_id] = used_products
- y_pred = np.array(clf.predict_proba(X_test))[:, :, 1].T
- target_cols = np.array([
- 'ind_ahor_fin_ult1',
- 'ind_aval_fin_ult1',
- 'ind_cco_fin_ult1',
- 'ind_cder_fin_ult1',
- 'ind_cno_fin_ult1',
- 'ind_ctju_fin_ult1',
- 'ind_ctma_fin_ult1',
- 'ind_ctop_fin_ult1',
- 'ind_ctpp_fin_ult1',
- 'ind_deco_fin_ult1',
- 'ind_deme_fin_ult1',
- 'ind_dela_fin_ult1',
- 'ind_ecue_fin_ult1',
- 'ind_fond_fin_ult1',
- 'ind_hip_fin_ult1',
- 'ind_plan_fin_ult1',
- 'ind_pres_fin_ult1',
- 'ind_reca_fin_ult1',
- 'ind_tjcr_fin_ult1',
- 'ind_valo_fin_ult1',
- 'ind_viv_fin_ult1',
- 'ind_nomina_ult1',
- 'ind_nom_pens_ult1',
- 'ind_recibo_ult1'
- ])
- ncodpers = np.array(list(set(test['ncodpers'])))
- y_pred = np.argsort(y_pred, axis=1)
- y_pred = np.fliplr(y_pred)
- preds = []
- for i in range(y_pred.shape[0]):
- cust_id = ncodpers[i]
- used_products = last_products.get(cust_id, {})
- pred_top_products = []
- for product_id in y_pred[i]:
- if product_id not in used_products:
- pred_top_products.append(product_id)
- if len(pred_top_products) == 7:
- break
- preds.append(np.array(pred_top_products))
- final_preds = [' '.join(list(target_cols[pred])) for pred in preds]
- out = pd.DataFrame({'ncodpers': ncodpers, 'added_products': final_preds})
- out.to_csv('submission.csv', index=False)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement