Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # import xgboost as xgb
- import numpy as np
- from logger import log
- from presample import sample
- sample_x = np.load(r"...\Trends\h03v09_independent.npy", mmap_mode="r")
- sample_y = np.load(r"...\Trends\h03v09_dependent.npy", mmap_mode="r")
- sample_indices = sample(dependent=sample_y, n=20000, min=600, max=8000)
- log.info("Number of indices taken: %s" % str(sample_indices.shape))
- """
- 2018-05-16 14:24:03,914 INFO MainProcess: Masking exclusions from Dependent array
- 2018-05-16 14:24:03,953 INFO MainProcess: Getting class stats from Dependent array
- 2018-05-16 14:24:04,071 INFO MainProcess: Classes present: [1 2 3 4 5 6 7 8]
- 2018-05-16 14:24:04,071 INFO MainProcess: Class percentages: [4.99, 27.69, 35.0, 28.23, 0.99, 1.09, 0.02, 1.99]
- 2018-05-16 14:24:04,071 INFO MainProcess: Adj class percents: [4.63, 25.66, 32.43, 26.16, 2.78, 2.78, 2.78, 2.78]
- 2018-05-16 14:24:04,535 INFO MainProcess: Number of indices taken: (21585,)
- """
- sample_indices = sample(dependent=sample_y, n=200000, min=6000, max=80000)
- log.info("Number of indices taken: %s" % str(sample_indices.shape))
- """
- 2018-05-16 14:24:04,542 INFO MainProcess: Masking exclusions from Dependent array
- 2018-05-16 14:24:04,567 INFO MainProcess: Getting class stats from Dependent array
- 2018-05-16 14:24:04,695 INFO MainProcess: Classes present: [1 2 3 4 5 6 7 8]
- 2018-05-16 14:24:04,695 INFO MainProcess: Class percentages: [4.99, 27.69, 35.0, 28.23, 0.99, 1.09, 0.02, 1.99]
- 2018-05-16 14:24:04,697 INFO MainProcess: Adj class percents: [4.63, 25.66, 32.43, 26.16, 2.78, 2.78, 2.78, 2.78]
- 2018-05-16 14:24:05,170 INFO MainProcess: Number of indices taken: (211431,)
- """
- sample_indices = sample(dependent=sample_y, n=2000000, min=60000, max=800000)
- log.info("Number of indices taken: %s" % str(sample_indices.shape))
- """
- 2018-05-16 14:24:05,182 INFO MainProcess: Masking exclusions from Dependent array
- 2018-05-16 14:24:05,207 INFO MainProcess: Getting class stats from Dependent array
- 2018-05-16 14:24:05,331 INFO MainProcess: Classes present: [1 2 3 4 5 6 7 8]
- 2018-05-16 14:24:05,332 INFO MainProcess: Class percentages: [4.99, 27.69, 35.0, 28.23, 0.99, 1.09, 0.02, 1.99]
- 2018-05-16 14:24:05,332 INFO MainProcess: Adj class percents: [4.62, 25.66, 32.43, 26.16, 2.78, 2.78, 2.78, 2.78]
- 2018-05-16 14:24:06,016 INFO MainProcess: Number of indices taken: (2099835,)
- """
- sample_indices = sample(dependent=sample_y, n=20000000, min=600000, max=8000000)
- log.info("Number of indices taken: %s" % str(sample_indices.shape))
- """
- 2018-05-16 14:24:06,023 INFO MainProcess: Masking exclusions from Dependent array
- 2018-05-16 14:24:06,049 INFO MainProcess: Getting class stats from Dependent array
- 2018-05-16 14:24:06,159 INFO MainProcess: Classes present: [1 2 3 4 5 6 7 8]
- 2018-05-16 14:24:06,175 INFO MainProcess: Class percentages: [4.99, 27.69, 35.0, 28.23, 0.99, 1.09, 0.02, 1.99]
- 2018-05-16 14:24:06,176 INFO MainProcess: Adj class percents: [4.62, 25.66, 32.43, 26.16, 2.78, 2.78, 2.78, 2.78]
- 2018-05-16 14:24:07,485 INFO MainProcess: Number of indices taken: (8415216,)
- """
- sample_indices = sample(dependent=sample_y, n=100000000, min=6000000, max=80000000)
- log.info("Number of indices taken: %s" % str(sample_indices.shape))
- """
- 2018-05-16 14:24:07,498 INFO MainProcess: Masking exclusions from Dependent array
- 2018-05-16 14:24:07,524 INFO MainProcess: Getting class stats from Dependent array
- 2018-05-16 14:24:07,642 INFO MainProcess: Classes present: [1 2 3 4 5 6 7 8]
- 2018-05-16 14:24:07,642 INFO MainProcess: Class percentages: [4.99, 27.69, 35.0, 28.23, 0.99, 1.09, 0.02, 1.99]
- 2018-05-16 14:24:07,642 INFO MainProcess: Adj class percents: [4.96, 22.9, 28.94, 23.35, 4.96, 4.96, 4.96, 4.96]
- 2018-05-16 14:24:08,994 INFO MainProcess: Number of indices taken: (8415216,)
- """
- """No Pre-Sample (Full dataset used)"""
- log.info("No Presample taken")
- log.info("Number of indices used: %s" % str(sample_y.shape))
- vals, cnts = np.unique(sample_y, return_counts=True)
- prct = cnts / np.sum(cnts)
- disp_prct = [round(p * 100, 2) for p in prct]
- adj_counts = np.ceil(sample_y.shape[0] * prct)
- prct_2 = adj_counts / np.sum(adj_counts)
- disp_prct_2 = [round(p * 100, 2) for p in prct_2]
- log.info("Classes present: %s" % str(vals))
- log.info("Class percentages: %s" % str(disp_prct))
- log.info("Adj class percents: %s " % str(disp_prct_2))
- """
- 2018-05-16 14:24:09,011 INFO MainProcess: No Presample taken
- 2018-05-16 14:24:09,013 INFO MainProcess: Number of indices used: (8415216,)
- 2018-05-16 14:24:09,115 INFO MainProcess: Classes present: [1 2 3 4 5 6 7 8]
- 2018-05-16 14:24:09,115 INFO MainProcess: Class percentages: [4.99, 27.69, 35.0, 28.23, 0.99, 1.09, 0.02, 1.99]
- 2018-05-16 14:24:09,115 INFO MainProcess: Adj class percents: [4.99, 27.69, 35.0, 28.23, 0.99, 1.09, 0.02, 1.99]
- """
Add Comment
Please, Sign In to add comment