Guest User

Untitled

a guest
Jun 23rd, 2018
86
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 7.84 KB | None | 0 0
  1. function [eplus_in_curr, userdata] = RadiantControlFileBaseline(cmd,eplus_out_prev, eplus_in_prev, time, stepNumber, userdata)
  2. if strcmp(cmd,'init')
  3. addpath('./RL_lib')
  4.  
  5. epsilon1 = 0.7; % Initial value
  6. epsilon2 = 0.7; % Initial value
  7. epsilon3 = 0.7; % Initial value
  8.  
  9. discount = 0.8;
  10. learnRate = 0.99;
  11. successRate =1;
  12. % Temperature setpoint and actual temp state space definition
  13. tsps = [15:0.2:26];
  14. temps = tsps; % setting the same
  15. actions = [0, -0.1, 0.1];
  16. % [states, R, Q] = RL_setup(tsps, temps, actions);
  17.  
  18. load Q1.mat;
  19. load Q2.mat;
  20. load Q3.mat;
  21. load states.mat
  22. load R.mat
  23.  
  24. % Q3 = Q;
  25. % Q2 = Q;
  26. % Q1 = Q;
  27.  
  28. z3 = [20, 15];
  29. z2 = [20, 15];
  30. z1 = [20, 15];
  31. [next_state, state_index3] = min(abs(sum(states - repmat(z3,[size(states,1),1]).^2, 2)));
  32.  
  33. if (rand()>epsilon1) && rand()<=successRate
  34. [~,action_idx3] = max(Q3(state_index3,:)); % Pick the action the Q matrix thinks is best!
  35. else
  36. action_idx3 = randi(length(actions),1); % Random action!
  37. end
  38. action_idx3
  39. act = actions(action_idx3); % Taking chosen action
  40. eplus_in_prev
  41. %% Update Q matrix (z1 has to be updated at this step)
  42. z3_new = [20, eplus_out_prev.temp3(end)];
  43. z2_new = [20, eplus_out_prev.temp2(end)];
  44. z1_new = [20, eplus_out_prev.temp1(end)];
  45. [~, new_state_index3] = min(abs(sum(states - repmat(z3_new,[size(states,1),1]).^2, 2))); % Interpolate again to find the new state the system is closest to.
  46. %OLD_one = Q(next_state_index, action_idx)
  47. %% Updating Action-Value function with Sarsa
  48. Q3(state_index3, action_idx3) = Q3(state_index3,action_idx3) + learnRate * ( R(new_state_index3) ...
  49. + discount*max(Q3(new_state_index3,:)) - Q3(state_index3,action_idx3));
  50. Q2(state_index3, action_idx3) = Q2(state_index3,action_idx3) + learnRate * ( R(new_state_index3) ...
  51. + discount*max(Q2(new_state_index3,:)) - Q2(state_index3,action_idx3));
  52. Q1(state_index3, action_idx3) = Q1(state_index3,action_idx3) + learnRate * ( R(new_state_index3) ...
  53. + discount*max(Q1(new_state_index3,:)) - Q1(state_index3,action_idx3));
  54.  
  55. userdata.currState3 = z3_new;
  56. userdata.currState2 = z2_new;
  57. userdata.currState1 = z1_new;
  58. eplus_in_curr.tsp1 = 20+act;
  59. eplus_in_curr.tsp2 = 20+act;
  60. eplus_in_curr.tsp3 = 20+act;
  61. userdata.old_tsp3 = eplus_in_curr.tsp3;
  62. userdata.old_tsp2 = eplus_in_curr.tsp2;
  63. userdata.old_tsp1 = eplus_in_curr.tsp1;
  64. userdata.Q1 = Q1;
  65. userdata.Q2 = Q2;
  66. userdata.Q3 = Q3;
  67. userdata.states = states;
  68. userdata.R = R;
  69. save('./RL_lib/Q1.mat','Q1');
  70. save('./RL_lib/Q2.mat','Q2');
  71. save('./RL_lib/Q3.mat','Q3');
  72. save('./RL_lib/states.mat','states');
  73. save('./RL_lib/R.mat','R');
  74.  
  75. epsilonDecay = 0.98; % Decay factor per iteration.
  76. epsilon1 = epsilon1*epsilonDecay;
  77. epsilonDecay = 0.98; % Decay factor per iteration.
  78. epsilon2 = epsilon2*epsilonDecay;
  79. epsilonDecay = 0.98; % Decay factor per iteration.
  80. epsilon3 = epsilon3*epsilonDecay;
  81. userdata.epsilon1 = epsilon1;userdata.epsilon2 = epsilon2;userdata.epsilon3 = epsilon3;
  82. elseif strcmp(cmd,'normal')
  83.  
  84. z3_new = [eplus_in_prev.tsp3(end), eplus_out_prev.temp3(end)];
  85. z2_new = [eplus_in_prev.tsp2(end), eplus_out_prev.temp2(end)];
  86. z1_new = [eplus_in_prev.tsp1(end), eplus_out_prev.temp1(end)];
  87. temperature = [z1_new(2),z2_new(2),z3_new(2)]
  88. Q3 = userdata.Q3;
  89. Q2 = userdata.Q2;
  90. Q1 = userdata.Q1;
  91. addpath('./RL_lib')
  92. %load Q.mat;
  93. R = userdata.R;
  94. actions = [ 0, -0.1, 0.1];
  95. states = userdata.states;
  96. %%
  97. successRate = 1;
  98. epsilon1 = userdata.epsilon1; % Initial value
  99. epsilon2 = userdata.epsilon2; % Initial value
  100. epsilon3 = userdata.epsilon3; % Initial value
  101. epsilonDecay = 0.98; % Decay factor per iteration.
  102.  
  103. discount = 0.8;
  104. learnRate = 0.99;
  105. %% curr state
  106. z3 = userdata.currState3;
  107. z2 = userdata.currState2;
  108. z1 = userdata.currState1;
  109. [~, state_index3] = min(sum(abs(states - repmat(z3,[size(states,1),1])).^2, 2));
  110. [~, state_index2] = min(sum(abs(states - repmat(z2,[size(states,1),1])).^2, 2));
  111. [~, state_index1] = min(sum(abs(states - repmat(z1,[size(states,1),1])).^2, 2));
  112.  
  113. if (rand()>epsilon3) && rand()<=successRate
  114. [~,action_idx3] = max(Q3(state_index3,:)); % Pick the action the Q matrix thinks is best!
  115. else
  116. action_idx3 = randi(length(actions),1); % Random action!
  117. end
  118. if (rand()>epsilon2) && rand()<=successRate
  119. [~,action_idx2] = max(Q2(state_index2,:)); % Pick the action the Q matrix thinks is best!
  120. else
  121. action_idx2 = randi(length(actions),1); % Random action!
  122. end
  123. if (rand()>epsilon1) && rand()<=successRate
  124. [~,action_idx1] = max(Q1(state_index1,:)); % Pick the action the Q matrix thinks is best!
  125. else
  126. action_idx1 = randi(length(actions),1); % Random action!
  127. end
  128.  
  129. act3 = actions(action_idx3); % Taking chosen action (which way change TSP)
  130. act2 = actions(action_idx2);
  131. act1 = actions(action_idx1);
  132.  
  133. %% New state acquired
  134.  
  135. [~, new_state_index3] = min(sum(abs(states - repmat(z3_new,[size(states,1),1])).^2, 2)); % Interpolate again to find the new state the system is closest to.
  136. OLD_Q3= Q3(state_index3, action_idx3);
  137. Q3(state_index3, action_idx3) = Q3(state_index3,action_idx3) + learnRate * ( R(new_state_index3)...
  138. + discount*max(Q3(new_state_index3,:)) - Q3(state_index3,action_idx3));
  139. Change3= R(new_state_index3) + discount*max(Q3(new_state_index3,:)) - Q3(state_index3,action_idx3) ;
  140. %--------------------------------------------------------------------
  141. [~, new_state_index2] = min(sum(abs(states - repmat(z2_new,[size(states,1),1])).^2, 2)); % Interpolate again to find the new state the system is closest to.
  142. OLD_Q2= Q2(state_index2, action_idx2);
  143. Q2(state_index2, action_idx2) = Q2(state_index2,action_idx2) + learnRate * ( R(new_state_index2)...
  144. + discount*max(Q2(new_state_index2,:)) - Q2(state_index2,action_idx2));
  145. Change2= R(new_state_index2) + discount*max(Q2(new_state_index2,:)) - Q2(state_index2,action_idx2) ;
  146. %_-----------------------------------------------------------------------------------
  147. [~, new_state_index1] = min(sum(abs(states - repmat(z1_new,[size(states,1),1])).^2, 2)); % Interpolate again to find the new state the system is closest to.
  148. OLD_Q1= Q1(state_index1, action_idx1);
  149. Q1(state_index1, action_idx1) = Q1(state_index1,action_idx1) + learnRate * ( R(new_state_index1)...
  150. + discount*max(Q1(new_state_index1,:)) - Q1(state_index1,action_idx1));
  151. Change1= R(new_state_index1) + discount*max(Q1(new_state_index1,:)) - Q1(state_index1,action_idx1) ;
  152.  
  153.  
  154. % Output
  155. indexes = [ state_index1, new_state_index1;state_index2, new_state_index2;state_index3, new_state_index3;]
  156. % epsilon
  157.  
  158. % Best_Q = discount*max(Q3(new_state_index3,:));
  159. % ---------------------------------------------------------------------------------------------
  160. userdata.currState3 = z3_new;
  161. userdata.currState2 = z2_new;
  162. userdata.currState1 = z1_new;
  163. eplus_in_curr.tsp1 = userdata.old_tsp1 + act1;
  164. eplus_in_curr.tsp2 = userdata.old_tsp2 + act2;
  165. eplus_in_curr.tsp3 = userdata.old_tsp3 + act3;
  166.  
  167. userdata.old_tsp3 = eplus_in_curr.tsp3;
  168. userdata.old_tsp2 = eplus_in_curr.tsp2;
  169. userdata.old_tsp1 = eplus_in_curr.tsp1;
  170.  
  171. userdata.Q3 = Q3;
  172. userdata.Q2 = Q2;
  173. userdata.Q1 = Q1;
  174. TSPs = [ userdata.old_tsp1, userdata.old_tsp3, userdata.old_tsp3]
  175. save('./RL_lib/Q1.mat','Q1');
  176. save('./RL_lib/Q2.mat','Q2');
  177. save('./RL_lib/Q3.mat','Q3');
  178.  
  179. epsilon1 = epsilon1*epsilonDecay;
  180. epsilon2 = epsilon2*epsilonDecay;
  181. epsilon3 = epsilon3*epsilonDecay;
  182. userdata.epsilon1 = epsilon1;
  183. userdata.epsilon2 = epsilon2;
  184. userdata.epsilon3 = epsilon3;
  185. end
Add Comment
Please, Sign In to add comment