Guest User

Untitled

a guest
May 21st, 2018
87
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.87 KB | None | 0 0
  1. # import xgboost as xgb
  2. import numpy as np
  3. from logger import log
  4. from presample import sample
  5.  
  6.  
  7. sample_x = np.load(r"...\Trends\h03v09_independent.npy", mmap_mode="r")
  8. sample_y = np.load(r"...\Trends\h03v09_dependent.npy", mmap_mode="r")
  9.  
  10. sample_indices = sample(dependent=sample_y, n=20000, min=600, max=8000)
  11. log.info("Number of indices taken: %s" % str(sample_indices.shape))
  12. """
  13. 2018-05-16 14:24:03,914 INFO MainProcess: Masking exclusions from Dependent array
  14. 2018-05-16 14:24:03,953 INFO MainProcess: Getting class stats from Dependent array
  15. 2018-05-16 14:24:04,071 INFO MainProcess: Classes present: [1 2 3 4 5 6 7 8]
  16. 2018-05-16 14:24:04,071 INFO MainProcess: Class percentages: [4.99, 27.69, 35.0, 28.23, 0.99, 1.09, 0.02, 1.99]
  17. 2018-05-16 14:24:04,071 INFO MainProcess: Adj class percents: [4.63, 25.66, 32.43, 26.16, 2.78, 2.78, 2.78, 2.78]
  18. 2018-05-16 14:24:04,535 INFO MainProcess: Number of indices taken: (21585,)
  19. """
  20. sample_indices = sample(dependent=sample_y, n=200000, min=6000, max=80000)
  21. log.info("Number of indices taken: %s" % str(sample_indices.shape))
  22. """
  23. 2018-05-16 14:24:04,542 INFO MainProcess: Masking exclusions from Dependent array
  24. 2018-05-16 14:24:04,567 INFO MainProcess: Getting class stats from Dependent array
  25. 2018-05-16 14:24:04,695 INFO MainProcess: Classes present: [1 2 3 4 5 6 7 8]
  26. 2018-05-16 14:24:04,695 INFO MainProcess: Class percentages: [4.99, 27.69, 35.0, 28.23, 0.99, 1.09, 0.02, 1.99]
  27. 2018-05-16 14:24:04,697 INFO MainProcess: Adj class percents: [4.63, 25.66, 32.43, 26.16, 2.78, 2.78, 2.78, 2.78]
  28. 2018-05-16 14:24:05,170 INFO MainProcess: Number of indices taken: (211431,)
  29. """
  30. sample_indices = sample(dependent=sample_y, n=2000000, min=60000, max=800000)
  31. log.info("Number of indices taken: %s" % str(sample_indices.shape))
  32. """
  33. 2018-05-16 14:24:05,182 INFO MainProcess: Masking exclusions from Dependent array
  34. 2018-05-16 14:24:05,207 INFO MainProcess: Getting class stats from Dependent array
  35. 2018-05-16 14:24:05,331 INFO MainProcess: Classes present: [1 2 3 4 5 6 7 8]
  36. 2018-05-16 14:24:05,332 INFO MainProcess: Class percentages: [4.99, 27.69, 35.0, 28.23, 0.99, 1.09, 0.02, 1.99]
  37. 2018-05-16 14:24:05,332 INFO MainProcess: Adj class percents: [4.62, 25.66, 32.43, 26.16, 2.78, 2.78, 2.78, 2.78]
  38. 2018-05-16 14:24:06,016 INFO MainProcess: Number of indices taken: (2099835,)
  39. """
  40. sample_indices = sample(dependent=sample_y, n=20000000, min=600000, max=8000000)
  41. log.info("Number of indices taken: %s" % str(sample_indices.shape))
  42. """
  43. 2018-05-16 14:24:06,023 INFO MainProcess: Masking exclusions from Dependent array
  44. 2018-05-16 14:24:06,049 INFO MainProcess: Getting class stats from Dependent array
  45. 2018-05-16 14:24:06,159 INFO MainProcess: Classes present: [1 2 3 4 5 6 7 8]
  46. 2018-05-16 14:24:06,175 INFO MainProcess: Class percentages: [4.99, 27.69, 35.0, 28.23, 0.99, 1.09, 0.02, 1.99]
  47. 2018-05-16 14:24:06,176 INFO MainProcess: Adj class percents: [4.62, 25.66, 32.43, 26.16, 2.78, 2.78, 2.78, 2.78]
  48. 2018-05-16 14:24:07,485 INFO MainProcess: Number of indices taken: (8415216,)
  49. """
  50. sample_indices = sample(dependent=sample_y, n=100000000, min=6000000, max=80000000)
  51. log.info("Number of indices taken: %s" % str(sample_indices.shape))
  52. """
  53. 2018-05-16 14:24:07,498 INFO MainProcess: Masking exclusions from Dependent array
  54. 2018-05-16 14:24:07,524 INFO MainProcess: Getting class stats from Dependent array
  55. 2018-05-16 14:24:07,642 INFO MainProcess: Classes present: [1 2 3 4 5 6 7 8]
  56. 2018-05-16 14:24:07,642 INFO MainProcess: Class percentages: [4.99, 27.69, 35.0, 28.23, 0.99, 1.09, 0.02, 1.99]
  57. 2018-05-16 14:24:07,642 INFO MainProcess: Adj class percents: [4.96, 22.9, 28.94, 23.35, 4.96, 4.96, 4.96, 4.96]
  58. 2018-05-16 14:24:08,994 INFO MainProcess: Number of indices taken: (8415216,)
  59. """
  60.  
  61. """No Pre-Sample (Full dataset used)"""
  62. log.info("No Presample taken")
  63. log.info("Number of indices used: %s" % str(sample_y.shape))
  64. vals, cnts = np.unique(sample_y, return_counts=True)
  65. prct = cnts / np.sum(cnts)
  66. disp_prct = [round(p * 100, 2) for p in prct]
  67. adj_counts = np.ceil(sample_y.shape[0] * prct)
  68. prct_2 = adj_counts / np.sum(adj_counts)
  69. disp_prct_2 = [round(p * 100, 2) for p in prct_2]
  70. log.info("Classes present: %s" % str(vals))
  71. log.info("Class percentages: %s" % str(disp_prct))
  72. log.info("Adj class percents: %s " % str(disp_prct_2))
  73. """
  74. 2018-05-16 14:24:09,011 INFO MainProcess: No Presample taken
  75. 2018-05-16 14:24:09,013 INFO MainProcess: Number of indices used: (8415216,)
  76. 2018-05-16 14:24:09,115 INFO MainProcess: Classes present: [1 2 3 4 5 6 7 8]
  77. 2018-05-16 14:24:09,115 INFO MainProcess: Class percentages: [4.99, 27.69, 35.0, 28.23, 0.99, 1.09, 0.02, 1.99]
  78. 2018-05-16 14:24:09,115 INFO MainProcess: Adj class percents: [4.99, 27.69, 35.0, 28.23, 0.99, 1.09, 0.02, 1.99]
  79. """
Add Comment
Please, Sign In to add comment