Advertisement
Guest User

Untitled

a guest
Dec 11th, 2016
76
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.07 KB | None | 0 0
  1. t = train.columns.difference(test.columns)
  2. c = train.columns.difference(t).difference(dates)
  3. X_test = test[c]
  4. X_train = train[train.fecha_dato.isin(['2016-05-28', '2016-04-28', '2016-03-28'])][c]
  5. y_train = train[train.fecha_dato.isin(['2016-05-28', '2016-04-28', '2016-03-28'])][t]
  6. clf = RandomForestClassifier(n_estimators=100, criterion='entropy', n_jobs=-1, random_state=5)
  7. import time
  8. t0 = time.time()
  9. clf.fit(X_train, y_train)
  10. time.time()-t0
  11.  
  12. last_month_target = np.array(train[train.fecha_dato == '2016-05-28'][['ncodpers']+list(t)])
  13. last_products = {}
  14. for i in range(last_month_target.shape[0]):
  15. row = last_month_target[i]
  16. cust_id = row[0]
  17. used_products = set(np.where(row[1:] == 1)[0])
  18. last_products[cust_id] = used_products
  19.  
  20. y_pred = np.array(clf.predict_proba(X_test))[:, :, 1].T
  21.  
  22.  
  23.  
  24. target_cols = np.array([
  25. 'ind_ahor_fin_ult1',
  26. 'ind_aval_fin_ult1',
  27. 'ind_cco_fin_ult1',
  28. 'ind_cder_fin_ult1',
  29. 'ind_cno_fin_ult1',
  30. 'ind_ctju_fin_ult1',
  31. 'ind_ctma_fin_ult1',
  32. 'ind_ctop_fin_ult1',
  33. 'ind_ctpp_fin_ult1',
  34. 'ind_deco_fin_ult1',
  35. 'ind_deme_fin_ult1',
  36. 'ind_dela_fin_ult1',
  37. 'ind_ecue_fin_ult1',
  38. 'ind_fond_fin_ult1',
  39. 'ind_hip_fin_ult1',
  40. 'ind_plan_fin_ult1',
  41. 'ind_pres_fin_ult1',
  42. 'ind_reca_fin_ult1',
  43. 'ind_tjcr_fin_ult1',
  44. 'ind_valo_fin_ult1',
  45. 'ind_viv_fin_ult1',
  46. 'ind_nomina_ult1',
  47. 'ind_nom_pens_ult1',
  48. 'ind_recibo_ult1'
  49. ])
  50.  
  51.  
  52. ncodpers = np.array(list(set(test['ncodpers'])))
  53.  
  54. y_pred = np.argsort(y_pred, axis=1)
  55. y_pred = np.fliplr(y_pred)
  56.  
  57. preds = []
  58. for i in range(y_pred.shape[0]):
  59. cust_id = ncodpers[i]
  60. used_products = last_products.get(cust_id, {})
  61.  
  62. pred_top_products = []
  63. for product_id in y_pred[i]:
  64. if product_id not in used_products:
  65. pred_top_products.append(product_id)
  66. if len(pred_top_products) == 7:
  67. break
  68.  
  69. preds.append(np.array(pred_top_products))
  70.  
  71. final_preds = [' '.join(list(target_cols[pred])) for pred in preds]
  72. out = pd.DataFrame({'ncodpers': ncodpers, 'added_products': final_preds})
  73. out.to_csv('submission.csv', index=False)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement