Advertisement
Guest User

Untitled

a guest
Apr 25th, 2019
85
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.71 KB | None | 0 0
  1. /*
  2. * Reinforcement Learning Book / Example 4.2
  3. *
  4. * $ sudo apt-get install libgsl-dev libgsl2
  5. * $ gcc car_rental.c -lgsl -lgslcblas -lm -o car_rental
  6. */
  7.  
  8.  
  9. #include <stdio.h>
  10. #include <stdlib.h>
  11. #include <gsl/gsl_randist.h>
  12.  
  13. #define Size 20
  14. #define Iteration 100
  15. #define Rent1_mean 3
  16. #define Rent2_mean 4
  17. #define Return1_mean 3
  18. #define Return2_mean 2
  19.  
  20. // Global variables
  21. float value[Size + 1][Size + 1];
  22. int policy[Size + 1][Size + 1];
  23.  
  24.  
  25. void show_value(){
  26. printf("[");
  27. for (int c1 = 0; c1 <= Size; c1++) {
  28. printf("[");
  29. for (int c2 = 0; c2 <= Size; c2++) {
  30. printf("%3.0f", value[c1][c2]);
  31. if (c2 < Size) {
  32. printf(", ");
  33. }
  34. }
  35. printf("]");
  36. if (c1 < Size) {
  37. printf(",\n ");
  38. }
  39. }
  40. printf("]\n");
  41. }
  42.  
  43.  
  44. void show_policy(){
  45. printf("[");
  46. for (int c1 = 0; c1 <= Size; c1++) {
  47. printf("[");
  48. for (int c2 = 0; c2 <= Size; c2++) {
  49. printf("%2d", policy[c1][c2]);
  50. if (c2 < Size) {
  51. printf(", ");
  52. }
  53. }
  54. printf("]");
  55. if (c1 < Size) {
  56. printf(",\n ");
  57. }
  58. }
  59. printf("]\n");
  60. }
  61.  
  62.  
  63. float calc_q_val(int c1, int c2, int a){
  64. float q_val = 0;
  65. int _c1, _c2;
  66. float c1_rent_prob, c2_rent_prob, c1_return_prob, c2_return_prob, prob;
  67.  
  68. // Moving cars
  69. c1 -= a;
  70. c2 += a;
  71. q_val -= 2*abs(a);
  72.  
  73. for (int c1_rent = 0; c1_rent <= c1; c1_rent++) {
  74. for (int c2_rent = 0; c2_rent <= c2; c2_rent++) {
  75. for (int c1_return = 0; c1_return <= Size-(c1-c1_rent); c1_return++) {
  76. for (int c2_return = 0; c2_return <= Size-(c2-c2_rent); c2_return++) {
  77. _c1 = c1 - c1_rent + c1_return;
  78. _c2 = c2 - c2_rent + c2_return;
  79.  
  80. if (c1_rent == c1) {
  81. c1_rent_prob = 1;
  82. for (int n = 0; n < c1_rent; n++) {
  83. c1_rent_prob -= gsl_ran_poisson_pdf(n, Rent1_mean);
  84. }
  85. } else {
  86. c1_rent_prob = gsl_ran_poisson_pdf(c1_rent, Rent1_mean);
  87. }
  88.  
  89. if (c2_rent == c2) {
  90. c2_rent_prob = 1;
  91. for (int n = 0; n < c2_rent; n++) {
  92. c2_rent_prob -= gsl_ran_poisson_pdf(n, Rent2_mean);
  93. }
  94. } else {
  95. c2_rent_prob = gsl_ran_poisson_pdf(c2_rent, Rent2_mean);
  96. }
  97.  
  98. if (c1_return == Size-(c1-c1_rent)) {
  99. c1_return_prob = 1;
  100. for (int n = 0; n < c1_return; n++) {
  101. c1_return_prob -= gsl_ran_poisson_pdf(n, Return1_mean);
  102. }
  103. } else {
  104. c1_return_prob = gsl_ran_poisson_pdf(c1_return, Return1_mean);
  105. }
  106.  
  107. if (c2_return == Size-(c2-c2_rent)) {
  108. c2_return_prob = 1;
  109. for (int n = 0; n < c2_return; n++) {
  110. c2_return_prob -= gsl_ran_poisson_pdf(n, Return2_mean);
  111. }
  112. } else {
  113. c2_return_prob = gsl_ran_poisson_pdf(c2_return, Return2_mean);
  114. }
  115.  
  116. prob = c1_rent_prob * c2_rent_prob * c1_return_prob * c2_return_prob;
  117. q_val += prob * (10 * (c1_rent + c2_rent) + 0.9 * value[_c1][_c2]);
  118. }
  119. }
  120. }
  121. }
  122. return q_val;
  123. }
  124.  
  125.  
  126. int min(int a, int b){
  127. if (a < b) {
  128. return a;
  129. } else {
  130. return b;
  131. }
  132. }
  133.  
  134.  
  135. void run() {
  136. float q_val;
  137. float value_update;
  138. int policy_update;
  139.  
  140. for (int c1 = 0; c1 <= Size; c1++) {
  141. for (int c2 = 0; c2 <= Size; c2++) {
  142. value[c1][c2] = 0;
  143. policy[c1][c2] = 0;
  144. }
  145. }
  146.  
  147. printf("# Iteration 0\n");
  148. show_policy();
  149. show_value();
  150.  
  151. for (int i = 1; i < Iteration; i++) {
  152. for (int c1 = 0; c1 <= Size; c1++) {
  153. for (int c2 = 0; c2 <= Size; c2++) {
  154. int update = 0;
  155. for (int a = -min(c2, 5); a <= min(c1, 5); a++) {
  156. q_val = calc_q_val(c1, c2, a);
  157. if (update == 0 || q_val > value_update) {
  158. value_update = q_val;
  159. policy_update = a;
  160. update = 1;
  161. }
  162. }
  163. value[c1][c2] = value_update;
  164. policy[c1][c2] = policy_update;
  165. }
  166. }
  167. printf("# Iteration %d\n", i);
  168. show_policy();
  169. show_value();
  170. }
  171. }
  172.  
  173.  
  174. int main() {
  175. run();
  176. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement