Advertisement
Guest User

Untitled

a guest
Jan 17th, 2020
101
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.29 KB | None | 0 0
  1. from sklearn.ensemble import RandomForestClassifier
  2. import pandas as pd
  3. import numpy as np
  4. from datetime import datetime
  5. import time
  6.  
  7. data = pd.read_csv('//home//alex//Загрузки//transport_data.csv')
  8. df1 = data[(data.label == '0') | (data.label == '1')|(data.label == '2')]
  9. df1.label = df1.label.astype(int)
  10. mod = RandomForestClassifier()
  11. mod.fit(df1.iloc[:,:2], df1.iloc[:,4])
  12. y_pred = mod.predict(data[(data.label == '?')].iloc[:,:2])
  13. sl = data[(data.label == '-')& (((data.log< 30.3754)&(data.lat>59.9432)&(data.log> 30.3333))|((data.log<30.3959)&
  14. (data.log>30.35) & (data.lat<59.925))|((data.lat>59.9584)& (data.log <30.2798)&(data.log>30.2636))) ]
  15. data = data.drop(np.append(sl.index.values, 19053))
  16. data = data.reset_index(drop = True)
  17.  
  18. request = np.array([])
  19. trans = np.array([])
  20. day = np.array([])
  21. translation_day = dict({29:0, 30:1, 1:2, 2:3, 3:4})
  22. for i in range(data.shape[0]):
  23. request = np.append(request, datetime.utcfromtimestamp(data.iloc[i,2]).strftime('%H:%M:%S'))
  24. trans = np.append(trans, datetime.utcfromtimestamp(data.iloc[i, 3]).strftime('%H:%M:%S'))
  25. day = np.append(day, int(translation_day[datetime.utcfromtimestamp(data.iloc[i, 3]).day]))
  26. data["day"] = day
  27. data["request"] = request
  28. data["trans"] = trans
  29.  
  30. max_ =max(max(df1[df1.label == 0].log), max(df1[df1.label == 1].log))
  31. min_1 = 30.309
  32. min_2 = 30.3149
  33. unknown = mod.predict(data[((data.label == '-')|(data.label=='?'))&((data.log < min_1)|((data.log < min_2)&(data.lat <59.9364))|(data.log> max_))].iloc[:, 0:2])
  34.  
  35. j = 0
  36. for i in list(data[((data.label == '-')|(data.label=='?'))].index.values):
  37. if (data.iloc[i, 0]< min_2 and data.iloc[i, 1]<59.9364) or data.iloc[i, 0] <min_1 or data.iloc[i, 0] > max_ :
  38. data.iloc[i, 4] = unknown[j]
  39. j+= 1
  40.  
  41. time_limits_min = []
  42. time_limits_max = []
  43. for j in range (5):
  44. cur_time_min = []
  45. cur_time_max = []
  46. for i in range(3):
  47. cur_time_max.append(max([data_str[3] for data_str in data[(data.day==j)&(data.label ==i)].values if data_str[4] == 2 or data_str[0] < 30.35]))
  48. cur_time_min.append(min([data_str[3] for data_str in data[(data.day==j)&(data.label ==i)].values if data_str[4] == 2 or data_str[0] < 30.35]))
  49. time_limits_min.append(cur_time_min)
  50. time_limits_max.append(cur_time_max)
  51.  
  52. index_quest = 0
  53. for i in data[(data.label == '?')].values:
  54. day_i = i[5]
  55. if not((i[0] < min_1)or((i[0] < min_2)and(i[1] <59.9364))or(i[0]> max_)):
  56. if day_i != 0:
  57. if time_limits_min[int(day_i)][y_pred[index_quest]] > i[3]:
  58. if time_limits_min[int(day_i)][(y_pred[index_quest] +1)%3] <i[3]:
  59. y_pred[index_quest] = (y_pred[index_quest] +1)%3
  60. else:
  61. y_pred[index_quest] = (y_pred[index_quest] +2)%3
  62.  
  63. if day_i != 4:
  64. if time_limits_max[int(day_i)][y_pred[index_quest]] < i[3]:
  65. if time_limits_max[int(day_i)][(y_pred[index_quest] +1)%3] >i[3]:
  66. y_pred[index_quest] = (y_pred[index_quest] +1)%3
  67. else:
  68. y_pred[index_quest] = (y_pred[index_quest] +2)%3
  69. index_quest+=1
  70.  
  71. with open('your_file.txt', 'w') as f: #файл куда записываем
  72. for item in y_pred:
  73. f.write("%s\n" % int(item))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement