Advertisement
Guest User

Untitled

a guest
Feb 10th, 2016
68
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 11.85 KB | None | 0 0
  1. __author__ = 'Ivan'
  2. from datetime import datetime, timedelta
  3. import matplotlib.pyplot as plt
  4. import math
  5. import numpy as np
  6. from scipy.optimize import minimize
  7.  
  8.  
  9. def load_real_data(period, start_date):
  10. with open("./../data/GI_C1NB_C1FG_SHEP_restored_20130901000000_01.txt", "r") as data:
  11. data = data.read()
  12. s1 = {}
  13. end_date = start_date + timedelta(days=period)
  14. flag_date = datetime.strptime("2013-12-30 00:00:00", "%Y-%m-%d %H:%M:%S")
  15. if end_date > flag_date:
  16. print "Too big period"
  17. return Exception
  18. for line in data.split("\n"):
  19. line = line.split("\t")
  20. if start_date == end_date:
  21. return s1
  22. else:
  23. s1[start_date] = float(line[2])
  24. start_date += timedelta(hours=1)
  25. return s1
  26.  
  27.  
  28. def load_forecast(forecast, period, start_date):
  29. with open("../data/" + forecast) as data:
  30. data = data.read()
  31. end_date = start_date + timedelta(days=period)
  32. flag_date = datetime.strptime("2013-12-30 00:00:00", "%Y-%m-%d %H:%M:%S")
  33. if end_date > flag_date:
  34. print "Too big forecast period"
  35. return Exception
  36. forecast = {}
  37. for line in data.split("\n"):
  38. line = line.split("\t") if len(line.split("\t")) != 0 else line.split(" ")
  39. for elem in line[1:7]:
  40. if start_date == end_date:
  41. return forecast
  42. else:
  43. forecast[start_date] = float(elem)
  44. start_date += timedelta(hours=1)
  45.  
  46.  
  47. def print_plot(real_data, forecast, start_date, period):
  48. real_data_values = []
  49. forecast_values = []
  50. for i in range(0, period * 24):
  51. real_data_values.append(float(real_data[start_date]))
  52. forecast_values.append(float(forecast[start_date]))
  53. start_date += timedelta(hours=1)
  54. plt.plot(real_data_values, "r", forecast_values, "b")
  55. plt.xticks(range(0, period * 24, 6))
  56. plt.grid()
  57. plt.show()
  58.  
  59.  
  60. def print_multiplot(real_data, forecast, forecast_opt, start_date, period):
  61. real_data_values = []
  62. forecast_values = []
  63. opt_forecast_values = []
  64. for i in range(0, period * 24):
  65. real_data_values.append(float(real_data[start_date]))
  66. forecast_values.append(float(forecast[start_date]))
  67. opt_forecast_values.append(float(forecast_opt[start_date]))
  68. start_date += timedelta(hours=1)
  69. plt.plot(real_data_values, "r", forecast_values, "b", opt_forecast_values, "g")
  70. plt.xticks(range(0, period * 24, 6))
  71. plt.grid()
  72. plt.show()
  73.  
  74.  
  75. def print_multiplot3(real_data, forecast1, forecast2, ensemble, start_date, period):
  76. real_data_values = []
  77. forecast1_values = []
  78. forecast2_values = []
  79. ensemble_values = []
  80. for i in range(0, period * 24):
  81. real_data_values.append(float(real_data[start_date]))
  82. forecast1_values.append(float(forecast1[start_date]))
  83. forecast2_values.append(float(forecast2[start_date]))
  84. ensemble_values.append(float(ensemble[start_date]))
  85. start_date += timedelta(hours=1)
  86. plt.plot(real_data_values, "r", forecast1_values, "b", forecast2_values, "g", ensemble_values, "*")
  87. plt.xticks(range(0, period * 24, 6))
  88. plt.grid()
  89. plt.show()
  90.  
  91.  
  92. def calc_delta(s1, fc, start_date, period):
  93. sum_delta = 0.
  94. for i in range(0, period * 24):
  95. sum_delta += float(fc[start_date]) - float(s1[start_date])
  96. start_date += timedelta(hours=1)
  97. return sum_delta / (period * 24)
  98.  
  99.  
  100. def subtract_delta(forecast, delta, start_date, period):
  101. for i in range(0, period * 24):
  102. forecast[start_date] = float(forecast[start_date]) - delta
  103. start_date += timedelta(hours=1)
  104. return forecast
  105.  
  106.  
  107. def calc_derivatives(forecast, start_date, period):
  108. derivative = {}
  109. for i in range(0, period * 24 - 1):
  110. df = float(forecast[start_date + timedelta(hours=1)] - forecast[start_date])
  111. derivative[start_date] = df
  112. start_date += timedelta(hours=1)
  113. derivative[start_date] = forecast[start_date] - forecast[start_date - timedelta(hours=1)]
  114. return derivative
  115.  
  116.  
  117. def calc_optforecast(forecast, s1, start_date, period):
  118. a_0_ls = np.linspace(-5.0, 5.0, 101)
  119. a_1_ls = np.linspace(0.0, 1.0, 11)
  120. alpha = 0.00005
  121. m = period * 24
  122. ens = {}
  123. lowest_mae = 10000
  124. k = 0
  125. for a_1 in a_1_ls:
  126. print str(k) + "% processed"
  127. k += 10
  128. for a_0 in a_0_ls:
  129. core = create_ensemble(forecast, a_1, a_0, start_date, period)
  130.  
  131. a_1 -= alpha * forecast[start_date] * (core[start_date] - s1[start_date]) / m
  132. a_0 -= alpha * (core[start_date] - s1[start_date]) / m
  133.  
  134. new_ens = create_ensemble(forecast, a_1, a_0, start_date, period)
  135. new_mae = calc_mae(new_ens, s1, start_date, period)
  136. if new_mae < lowest_mae:
  137. print new_mae, a_1, a_0
  138. lowest_mae = new_mae
  139.  
  140.  
  141.  
  142. def calc_optforecast2(forecast1, forecast2, s1, start_date, period):
  143.  
  144. alpha = 0.005
  145. old_mae = 999999
  146. old_forecast = {}
  147. m = period * 24
  148. a_1 = np.linspace(0.0, 1.0, 11)
  149. a_2 = np.linspace(0.0, 1.0, 11)
  150. a_0 = np.linspace(-5.0, 5.0, 101)
  151. k = 0
  152. lowest_mae = 10000
  153. lowest_mae_a0 = 1000
  154. lowest_mae_a1 = 1000
  155. lowest_mae_a2 = 1000
  156. for elem1 in a_1:
  157.  
  158. for elem2 in a_2:
  159. print str(k) + "% processed"
  160. k += 1
  161. for elem0 in a_0:
  162. for i in range(0, m):
  163. core = create_ensemble2(forecast1, forecast2, elem1, elem2, elem0, start_date, period)
  164. elem1 -= alpha * (core[start_date] - s1[start_date]) * forecast1[start_date] / m
  165. elem2 -= alpha * (core[start_date] - s1[start_date]) * forecast2[start_date] / m
  166. elem0 -= alpha * (core[start_date] - s1[start_date]) / m
  167. new_ens = create_ensemble2(forecast1, forecast2, elem1, elem2, elem0, start_date, period)
  168. new_mae = calc_mae(new_ens, s1, start_date, period)
  169. if new_mae < lowest_mae:
  170. print new_mae, elem1, elem2, elem0
  171. lowest_mae = new_mae
  172. lowest_mae_a1 = elem1
  173. lowest_mae_a2 = elem2
  174. lowest_mae_a0 = elem0
  175. # if new_mae > old_mae:
  176. # return old_forecast
  177. # elif i == (period * 24 - 1):
  178. # return new_ens
  179. old_mae = new_mae
  180. # old_forecast = new_ens
  181. return str(lowest_mae) + "|" + str(lowest_mae_a1) + "|" + str(lowest_mae_a2) + "|" + str(lowest_mae_a0)
  182.  
  183.  
  184.  
  185. def cacl_optforecast3(forecast1, forecast2, forecast3, s1, start_date, period):
  186. a_0 = 0
  187. a_1 = 0
  188. a_2 = 0
  189. a_3 = 0
  190. alpha = 0.005
  191. old_mae = 9999999
  192. old_forecast = {}
  193. for i in range(0, period*24):
  194. h = abs(forecast1[start_date] * a_1 + forecast2[start_date] * a_2 + forecast3[start_date] * a_3 + a_0 - s1[start_date]) / (period * 24)
  195. a_0 -= (h - s1[start_date]) * alpha / (period * 24)
  196. a_1 -= (h - s1[start_date]) * forecast1[start_date] * alpha / (period * 24)
  197. a_2 -= (h - s1[start_date]) * forecast2[start_date] * alpha / (period * 24)
  198. a_3 -= (h - s1[start_date]) * forecast3[start_date] * alpha / (period * 24)
  199.  
  200. new_forecast = create_ensemble3(forecast1, forecast2, forecast3, a_1, a_2, a_3, a_0, start_date, period)
  201. new_mae = calc_mae(new_forecast, s1, start_date, period)
  202. print new_mae
  203. if new_mae > old_mae:
  204. return old_forecast
  205. elif i == (period * 24 - 1):
  206. return new_forecast
  207. old_mae = new_mae
  208. old_forecast = new_forecast
  209.  
  210.  
  211. def calc_mae(forecast, real_data, start_date, period):
  212. diff = 0
  213. for i in range(0, period * 24):
  214. diff += abs(forecast[start_date] - real_data[start_date])
  215. start_date += timedelta(hours=1)
  216. return diff / (period * 24)
  217.  
  218.  
  219. def create_ensemble(forecast, a_1, a_0, start_date, period):
  220. new_forecast = {}
  221. for i in range(0, period * 24):
  222. new_forecast[start_date] = a_1 * forecast[start_date] + a_0
  223. start_date += timedelta(hours=1)
  224. return new_forecast
  225.  
  226.  
  227. def create_ensemble2(forecast1, forecast2, a_1, a_2, a_0, start_date, period):
  228. new_forecast = {}
  229. for i in range(0, period * 24):
  230. new_forecast[start_date] = a_1 * forecast1[start_date] + a_2 * forecast2[start_date] + a_0
  231. start_date += timedelta(hours=1)
  232. return new_forecast
  233.  
  234.  
  235. def create_ensemble3(forecast1, forecast2, forecast3, a_1, a_2, a_3, a_0, start_date, period):
  236. new_forecast = {}
  237. for i in range(0, period * 24):
  238. new_forecast[start_date] = a_1 * forecast1[start_date] + a_2 * forecast2[start_date] + a_3 * forecast3[start_date] + a_0
  239. start_date += timedelta(hours=1)
  240. return new_forecast
  241.  
  242.  
  243. def write_into_file(ensemble, ensemble_file, start_date, period):
  244. output = open(ensemble_file, "w")
  245. for i in range(0, period * 24):
  246. row_to_write = str(start_date) + "\t" + str(ensemble[start_date]) + "\n"
  247. start_date += timedelta(hours=1)
  248. output.write(row_to_write)
  249.  
  250.  
  251. def biplot(prog_data, real_data, start_date, period):
  252. real_data_list = []
  253. prog_data_list = []
  254. for iter in range(0, period*24):
  255. real_data_list.append(real_data[start_date])
  256. prog_data_list.append(prog_data[start_date])
  257. start_date += timedelta(hours=1)
  258. plt.scatter(prog_data_list, real_data_list)
  259. plt.xlim(-50,250)
  260. plt.ylim(-50,200)
  261. plt.xlabel("PC{}".format(prog_data_list))
  262. plt.ylabel("PC{}".format(real_data_list))
  263. plt.grid()
  264. plt.show()
  265.  
  266.  
  267. def main():
  268. hiromb = "HIROMB-S1.txt"
  269. bsm_wowc_hirlam = "BSM-WOWC-HIRLAM-S1.txt"
  270. baltp_hirlam = "BALTP_HIRLAM_2m-S1.txt"
  271. period = 10
  272. start_date = datetime.strptime("2013-10-21 00:00:00", "%Y-%m-%d %H:%M:%S")
  273. s1 = load_real_data(period, start_date)
  274.  
  275. # fc1 = load_forecast(hiromb, period, start_date)
  276. # fc2 = load_forecast(bsm_wowc_hirlam, period, start_date)
  277. # fc3 = load_forecast(baltp_hirlam, period, start_date)
  278. #
  279. # delta1 = calc_delta(s1, fc1, start_date, period)
  280. # delta2 = calc_delta(s1, fc2, start_date, period)
  281. # delta3 = calc_delta(s1, fc3, start_date, period)
  282. #
  283. # fc1 = subtract_delta(fc1, delta1, start_date, period)
  284. # fc2 = subtract_delta(fc2, delta2, start_date, period)
  285. # fc3 = subtract_delta(fc3, delta3, start_date, period)
  286.  
  287. for i in range(period * 24):
  288. print str(start_date) + " :: " + str(s1[start_date])
  289. start_date += timedelta(hours=1)
  290.  
  291. # print "12"
  292. # ens12 = calc_optforecast2(fc1, fc2, s1, start_date, period)
  293. # print ens12
  294. # ens13 = calc_optforecast2(fc1, fc3, s1, start_date, period)
  295. # print ens13
  296. # ens23 = calc_optforecast2(fc2, fc3, s1, start_date, period)
  297. # print ens23
  298.  
  299. # biplot(fc1, s1, start_date, period)
  300. # biplot(fc2, s1, start_date, period)
  301. # biplot(fc3, s1, start_date, period)
  302. # ens12 = create_ensemble2(fc1, fc2, 0.240848817032, -0.339333042156, -4.80130300901, start_date, period)
  303. # ens13 = create_ensemble2(fc1, fc2, 0.000124166666667, 0.0027669471875, -5.0000625, start_date, period)
  304. # ens23 = create_ensemble2(fc1, fc2, -0.145222151392, 0.0814548941271, -4.99732229471, start_date, period)
  305. # print_multiplot3(s1, fc1, fc2, ens12, start_date, period)
  306. # print_multiplot3(s1, fc1, fc3, ens13, start_date, period)
  307. # print_multiplot3(s1, fc2, fc3, ens23, start_date, period)
  308.  
  309.  
  310.  
  311.  
  312.  
  313. # print_multiplot(s1, fc1, ens123, start_date, period)
  314. # print_multiplot(s1, fc1, ens1, start_date, period)
  315. # print_multiplot(s1, fc2, ens13, start_date, period)
  316. # print_multiplot(s1, fc3, ens23, start_date, period)
  317.  
  318.  
  319.  
  320. if __name__ == '__main__':
  321. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement