Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- __author__ = 'Ivan'
- from datetime import datetime, timedelta
- import matplotlib.pyplot as plt
- import math
- import numpy as np
- from scipy.optimize import minimize
- def load_real_data(period, start_date):
- with open("./../data/GI_C1NB_C1FG_SHEP_restored_20130901000000_01.txt", "r") as data:
- data = data.read()
- s1 = {}
- end_date = start_date + timedelta(days=period)
- flag_date = datetime.strptime("2013-12-30 00:00:00", "%Y-%m-%d %H:%M:%S")
- if end_date > flag_date:
- print "Too big period"
- return Exception
- for line in data.split("\n"):
- line = line.split("\t")
- if start_date == end_date:
- return s1
- else:
- s1[start_date] = float(line[2])
- start_date += timedelta(hours=1)
- return s1
- def load_forecast(forecast, period, start_date):
- with open("../data/" + forecast) as data:
- data = data.read()
- end_date = start_date + timedelta(days=period)
- flag_date = datetime.strptime("2013-12-30 00:00:00", "%Y-%m-%d %H:%M:%S")
- if end_date > flag_date:
- print "Too big forecast period"
- return Exception
- forecast = {}
- for line in data.split("\n"):
- line = line.split("\t") if len(line.split("\t")) != 0 else line.split(" ")
- for elem in line[1:7]:
- if start_date == end_date:
- return forecast
- else:
- forecast[start_date] = float(elem)
- start_date += timedelta(hours=1)
- def print_plot(real_data, forecast, start_date, period):
- real_data_values = []
- forecast_values = []
- for i in range(0, period * 24):
- real_data_values.append(float(real_data[start_date]))
- forecast_values.append(float(forecast[start_date]))
- start_date += timedelta(hours=1)
- plt.plot(real_data_values, "r", forecast_values, "b")
- plt.xticks(range(0, period * 24, 6))
- plt.grid()
- plt.show()
- def print_multiplot(real_data, forecast, forecast_opt, start_date, period):
- real_data_values = []
- forecast_values = []
- opt_forecast_values = []
- for i in range(0, period * 24):
- real_data_values.append(float(real_data[start_date]))
- forecast_values.append(float(forecast[start_date]))
- opt_forecast_values.append(float(forecast_opt[start_date]))
- start_date += timedelta(hours=1)
- plt.plot(real_data_values, "r", forecast_values, "b", opt_forecast_values, "g")
- plt.xticks(range(0, period * 24, 6))
- plt.grid()
- plt.show()
- def print_multiplot3(real_data, forecast1, forecast2, ensemble, start_date, period):
- real_data_values = []
- forecast1_values = []
- forecast2_values = []
- ensemble_values = []
- for i in range(0, period * 24):
- real_data_values.append(float(real_data[start_date]))
- forecast1_values.append(float(forecast1[start_date]))
- forecast2_values.append(float(forecast2[start_date]))
- ensemble_values.append(float(ensemble[start_date]))
- start_date += timedelta(hours=1)
- plt.plot(real_data_values, "r", forecast1_values, "b", forecast2_values, "g", ensemble_values, "*")
- plt.xticks(range(0, period * 24, 6))
- plt.grid()
- plt.show()
- def calc_delta(s1, fc, start_date, period):
- sum_delta = 0.
- for i in range(0, period * 24):
- sum_delta += float(fc[start_date]) - float(s1[start_date])
- start_date += timedelta(hours=1)
- return sum_delta / (period * 24)
- def subtract_delta(forecast, delta, start_date, period):
- for i in range(0, period * 24):
- forecast[start_date] = float(forecast[start_date]) - delta
- start_date += timedelta(hours=1)
- return forecast
- def calc_derivatives(forecast, start_date, period):
- derivative = {}
- for i in range(0, period * 24 - 1):
- df = float(forecast[start_date + timedelta(hours=1)] - forecast[start_date])
- derivative[start_date] = df
- start_date += timedelta(hours=1)
- derivative[start_date] = forecast[start_date] - forecast[start_date - timedelta(hours=1)]
- return derivative
- def calc_optforecast(forecast, s1, start_date, period):
- a_0_ls = np.linspace(-5.0, 5.0, 101)
- a_1_ls = np.linspace(0.0, 1.0, 11)
- alpha = 0.00005
- m = period * 24
- ens = {}
- lowest_mae = 10000
- k = 0
- for a_1 in a_1_ls:
- print str(k) + "% processed"
- k += 10
- for a_0 in a_0_ls:
- core = create_ensemble(forecast, a_1, a_0, start_date, period)
- a_1 -= alpha * forecast[start_date] * (core[start_date] - s1[start_date]) / m
- a_0 -= alpha * (core[start_date] - s1[start_date]) / m
- new_ens = create_ensemble(forecast, a_1, a_0, start_date, period)
- new_mae = calc_mae(new_ens, s1, start_date, period)
- if new_mae < lowest_mae:
- print new_mae, a_1, a_0
- lowest_mae = new_mae
- def calc_optforecast2(forecast1, forecast2, s1, start_date, period):
- alpha = 0.005
- old_mae = 999999
- old_forecast = {}
- m = period * 24
- a_1 = np.linspace(0.0, 1.0, 11)
- a_2 = np.linspace(0.0, 1.0, 11)
- a_0 = np.linspace(-5.0, 5.0, 101)
- k = 0
- lowest_mae = 10000
- lowest_mae_a0 = 1000
- lowest_mae_a1 = 1000
- lowest_mae_a2 = 1000
- for elem1 in a_1:
- for elem2 in a_2:
- print str(k) + "% processed"
- k += 1
- for elem0 in a_0:
- for i in range(0, m):
- core = create_ensemble2(forecast1, forecast2, elem1, elem2, elem0, start_date, period)
- elem1 -= alpha * (core[start_date] - s1[start_date]) * forecast1[start_date] / m
- elem2 -= alpha * (core[start_date] - s1[start_date]) * forecast2[start_date] / m
- elem0 -= alpha * (core[start_date] - s1[start_date]) / m
- new_ens = create_ensemble2(forecast1, forecast2, elem1, elem2, elem0, start_date, period)
- new_mae = calc_mae(new_ens, s1, start_date, period)
- if new_mae < lowest_mae:
- print new_mae, elem1, elem2, elem0
- lowest_mae = new_mae
- lowest_mae_a1 = elem1
- lowest_mae_a2 = elem2
- lowest_mae_a0 = elem0
- # if new_mae > old_mae:
- # return old_forecast
- # elif i == (period * 24 - 1):
- # return new_ens
- old_mae = new_mae
- # old_forecast = new_ens
- return str(lowest_mae) + "|" + str(lowest_mae_a1) + "|" + str(lowest_mae_a2) + "|" + str(lowest_mae_a0)
- def cacl_optforecast3(forecast1, forecast2, forecast3, s1, start_date, period):
- a_0 = 0
- a_1 = 0
- a_2 = 0
- a_3 = 0
- alpha = 0.005
- old_mae = 9999999
- old_forecast = {}
- for i in range(0, period*24):
- h = abs(forecast1[start_date] * a_1 + forecast2[start_date] * a_2 + forecast3[start_date] * a_3 + a_0 - s1[start_date]) / (period * 24)
- a_0 -= (h - s1[start_date]) * alpha / (period * 24)
- a_1 -= (h - s1[start_date]) * forecast1[start_date] * alpha / (period * 24)
- a_2 -= (h - s1[start_date]) * forecast2[start_date] * alpha / (period * 24)
- a_3 -= (h - s1[start_date]) * forecast3[start_date] * alpha / (period * 24)
- new_forecast = create_ensemble3(forecast1, forecast2, forecast3, a_1, a_2, a_3, a_0, start_date, period)
- new_mae = calc_mae(new_forecast, s1, start_date, period)
- print new_mae
- if new_mae > old_mae:
- return old_forecast
- elif i == (period * 24 - 1):
- return new_forecast
- old_mae = new_mae
- old_forecast = new_forecast
- def calc_mae(forecast, real_data, start_date, period):
- diff = 0
- for i in range(0, period * 24):
- diff += abs(forecast[start_date] - real_data[start_date])
- start_date += timedelta(hours=1)
- return diff / (period * 24)
- def create_ensemble(forecast, a_1, a_0, start_date, period):
- new_forecast = {}
- for i in range(0, period * 24):
- new_forecast[start_date] = a_1 * forecast[start_date] + a_0
- start_date += timedelta(hours=1)
- return new_forecast
- def create_ensemble2(forecast1, forecast2, a_1, a_2, a_0, start_date, period):
- new_forecast = {}
- for i in range(0, period * 24):
- new_forecast[start_date] = a_1 * forecast1[start_date] + a_2 * forecast2[start_date] + a_0
- start_date += timedelta(hours=1)
- return new_forecast
- def create_ensemble3(forecast1, forecast2, forecast3, a_1, a_2, a_3, a_0, start_date, period):
- new_forecast = {}
- for i in range(0, period * 24):
- new_forecast[start_date] = a_1 * forecast1[start_date] + a_2 * forecast2[start_date] + a_3 * forecast3[start_date] + a_0
- start_date += timedelta(hours=1)
- return new_forecast
- def write_into_file(ensemble, ensemble_file, start_date, period):
- output = open(ensemble_file, "w")
- for i in range(0, period * 24):
- row_to_write = str(start_date) + "\t" + str(ensemble[start_date]) + "\n"
- start_date += timedelta(hours=1)
- output.write(row_to_write)
- def biplot(prog_data, real_data, start_date, period):
- real_data_list = []
- prog_data_list = []
- for iter in range(0, period*24):
- real_data_list.append(real_data[start_date])
- prog_data_list.append(prog_data[start_date])
- start_date += timedelta(hours=1)
- plt.scatter(prog_data_list, real_data_list)
- plt.xlim(-50,250)
- plt.ylim(-50,200)
- plt.xlabel("PC{}".format(prog_data_list))
- plt.ylabel("PC{}".format(real_data_list))
- plt.grid()
- plt.show()
- def main():
- hiromb = "HIROMB-S1.txt"
- bsm_wowc_hirlam = "BSM-WOWC-HIRLAM-S1.txt"
- baltp_hirlam = "BALTP_HIRLAM_2m-S1.txt"
- period = 10
- start_date = datetime.strptime("2013-10-21 00:00:00", "%Y-%m-%d %H:%M:%S")
- s1 = load_real_data(period, start_date)
- # fc1 = load_forecast(hiromb, period, start_date)
- # fc2 = load_forecast(bsm_wowc_hirlam, period, start_date)
- # fc3 = load_forecast(baltp_hirlam, period, start_date)
- #
- # delta1 = calc_delta(s1, fc1, start_date, period)
- # delta2 = calc_delta(s1, fc2, start_date, period)
- # delta3 = calc_delta(s1, fc3, start_date, period)
- #
- # fc1 = subtract_delta(fc1, delta1, start_date, period)
- # fc2 = subtract_delta(fc2, delta2, start_date, period)
- # fc3 = subtract_delta(fc3, delta3, start_date, period)
- for i in range(period * 24):
- print str(start_date) + " :: " + str(s1[start_date])
- start_date += timedelta(hours=1)
- # print "12"
- # ens12 = calc_optforecast2(fc1, fc2, s1, start_date, period)
- # print ens12
- # ens13 = calc_optforecast2(fc1, fc3, s1, start_date, period)
- # print ens13
- # ens23 = calc_optforecast2(fc2, fc3, s1, start_date, period)
- # print ens23
- # biplot(fc1, s1, start_date, period)
- # biplot(fc2, s1, start_date, period)
- # biplot(fc3, s1, start_date, period)
- # ens12 = create_ensemble2(fc1, fc2, 0.240848817032, -0.339333042156, -4.80130300901, start_date, period)
- # ens13 = create_ensemble2(fc1, fc2, 0.000124166666667, 0.0027669471875, -5.0000625, start_date, period)
- # ens23 = create_ensemble2(fc1, fc2, -0.145222151392, 0.0814548941271, -4.99732229471, start_date, period)
- # print_multiplot3(s1, fc1, fc2, ens12, start_date, period)
- # print_multiplot3(s1, fc1, fc3, ens13, start_date, period)
- # print_multiplot3(s1, fc2, fc3, ens23, start_date, period)
- # print_multiplot(s1, fc1, ens123, start_date, period)
- # print_multiplot(s1, fc1, ens1, start_date, period)
- # print_multiplot(s1, fc2, ens13, start_date, period)
- # print_multiplot(s1, fc3, ens23, start_date, period)
- if __name__ == '__main__':
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement