Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from sklearn.ensemble import RandomForestClassifier
- import pandas as pd
- import numpy as np
- from datetime import datetime
- import time
- data = pd.read_csv('//home//alex//Загрузки//transport_data.csv')
- df1 = data[(data.label == '0') | (data.label == '1')|(data.label == '2')]
- df1.label = df1.label.astype(int)
- mod = RandomForestClassifier()
- mod.fit(df1.iloc[:,:2], df1.iloc[:,4])
- y_pred = mod.predict(data[(data.label == '?')].iloc[:,:2])
- sl = data[(data.label == '-')& (((data.log< 30.3754)&(data.lat>59.9432)&(data.log> 30.3333))|((data.log<30.3959)&
- (data.log>30.35) & (data.lat<59.925))|((data.lat>59.9584)& (data.log <30.2798)&(data.log>30.2636))) ]
- data = data.drop(np.append(sl.index.values, 19053))
- data = data.reset_index(drop = True)
- request = np.array([])
- trans = np.array([])
- day = np.array([])
- translation_day = dict({29:0, 30:1, 1:2, 2:3, 3:4})
- for i in range(data.shape[0]):
- request = np.append(request, datetime.utcfromtimestamp(data.iloc[i,2]).strftime('%H:%M:%S'))
- trans = np.append(trans, datetime.utcfromtimestamp(data.iloc[i, 3]).strftime('%H:%M:%S'))
- day = np.append(day, int(translation_day[datetime.utcfromtimestamp(data.iloc[i, 3]).day]))
- data["day"] = day
- data["request"] = request
- data["trans"] = trans
- max_ =max(max(df1[df1.label == 0].log), max(df1[df1.label == 1].log))
- min_1 = 30.309
- min_2 = 30.3149
- unknown = mod.predict(data[((data.label == '-')|(data.label=='?'))&((data.log < min_1)|((data.log < min_2)&(data.lat <59.9364))|(data.log> max_))].iloc[:, 0:2])
- j = 0
- for i in list(data[((data.label == '-')|(data.label=='?'))].index.values):
- if (data.iloc[i, 0]< min_2 and data.iloc[i, 1]<59.9364) or data.iloc[i, 0] <min_1 or data.iloc[i, 0] > max_ :
- data.iloc[i, 4] = unknown[j]
- j+= 1
- time_limits_min = []
- time_limits_max = []
- for j in range (5):
- cur_time_min = []
- cur_time_max = []
- for i in range(3):
- cur_time_max.append(max([data_str[3] for data_str in data[(data.day==j)&(data.label ==i)].values if data_str[4] == 2 or data_str[0] < 30.35]))
- cur_time_min.append(min([data_str[3] for data_str in data[(data.day==j)&(data.label ==i)].values if data_str[4] == 2 or data_str[0] < 30.35]))
- time_limits_min.append(cur_time_min)
- time_limits_max.append(cur_time_max)
- index_quest = 0
- for i in data[(data.label == '?')].values:
- day_i = i[5]
- if not((i[0] < min_1)or((i[0] < min_2)and(i[1] <59.9364))or(i[0]> max_)):
- if day_i != 0:
- if time_limits_min[int(day_i)][y_pred[index_quest]] > i[3]:
- if time_limits_min[int(day_i)][(y_pred[index_quest] +1)%3] <i[3]:
- y_pred[index_quest] = (y_pred[index_quest] +1)%3
- else:
- y_pred[index_quest] = (y_pred[index_quest] +2)%3
- if day_i != 4:
- if time_limits_max[int(day_i)][y_pred[index_quest]] < i[3]:
- if time_limits_max[int(day_i)][(y_pred[index_quest] +1)%3] >i[3]:
- y_pred[index_quest] = (y_pred[index_quest] +1)%3
- else:
- y_pred[index_quest] = (y_pred[index_quest] +2)%3
- index_quest+=1
- with open('your_file.txt', 'w') as f: #файл куда записываем
- for item in y_pred:
- f.write("%s\n" % int(item))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement