Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- census = pd.read_csv('hw5_census_dist/train_data.csv')
- census = census.drop('fnlwgt', axis=1)
- for category in census.keys():
- replace = None
- if type(census[category][0]) == str:
- replace = census[category].mode()[0]
- else:
- replace = int(census[category].mean())
- census[category] = census[category].replace('?', replace)
- census = shuffle(census)
- length = int(0.2 * len(census))
- census_training_data = pd.DataFrame.as_matrix(census.drop('label', axis=1)[length:])
- census_validation_data = pd.DataFrame.as_matrix(census.drop('label', axis=1)[:length])
- census_training_labels = pd.DataFrame.as_matrix(census['label'][length:])
- census_validation_labels = pd.DataFrame.as_matrix(census['label'][:length])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement