Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import numpy as np
- from sklearn import preprocessing
- from sklearn.feature_extraction import DictVectorizer
- from sklearn.ensemble import RandomForestClassifier
- from sklearn.datasets import make_classification
- def ranfor_prediction(train, labels, test):
- RandomForest = RandomForestClassifier(n_estimators=10, max_depth = 7)
- RandomForest.fit(train, labels)
- return RandomForest.predict(test)
- train = pd.read_csv('train.csv', index_col=0)
- test = pd.read_csv('test.csv', index_col=0)
- labels = train.Hazard
- train.drop('T2_V10', axis=1, inplace=True)
- train.drop('T2_V7', axis=1, inplace=True)
- train.drop('T1_V13', axis=1, inplace=True)
- train.drop('T1_V10', axis=1, inplace=True)
- test.drop('T2_V10', axis=1, inplace=True)
- test.drop('T2_V7', axis=1, inplace=True)
- test.drop('T1_V13', axis=1, inplace=True)
- test.drop('T1_V10', axis=1, inplace=True)
- columns = train.columns
- test_index = test.index
- train_temp = np.array(train)
- test_temp = np.array(test)
- for i in range(train_temp.shape[1]):
- le = preprocessing.LabelEncoder()
- le.fit(list(train_temp[:,i]) + list(test_temp[:,i]))
- le.transform(train_temp[:,i])#при ошибке указывает на эту строку
- train_temp[:,i] = le.transform(train_temp[:,i])
- test_temp[:,i] = le.transform(test_temp[:,i])
- train_temp = train_temp.astype(float)
- test_temp = test_temp.astype(float)
- prediction = ranfor_prediction(train_temp, labels, test_temp)
- prediction = pd.DataFrame({"Id": test_index, "Hazard": prediction})
- prediction = prediction.set_index('Id')
- prediction.to_csv('result.csv')
Add Comment
Please, Sign In to add comment