Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /*
- * Reinforcement Learning Book / Example 4.2
- *
- * $ sudo apt-get install libgsl-dev libgsl2
- * $ gcc car_rental.c -lgsl -lgslcblas -lm -o car_rental
- */
- #include <stdio.h>
- #include <stdlib.h>
- #include <gsl/gsl_randist.h>
- #define Size 20
- #define Iteration 100
- #define Rent1_mean 3
- #define Rent2_mean 4
- #define Return1_mean 3
- #define Return2_mean 2
- // Global variables
- float value[Size + 1][Size + 1];
- int policy[Size + 1][Size + 1];
- void show_value(){
- printf("[");
- for (int c1 = 0; c1 <= Size; c1++) {
- printf("[");
- for (int c2 = 0; c2 <= Size; c2++) {
- printf("%3.0f", value[c1][c2]);
- if (c2 < Size) {
- printf(", ");
- }
- }
- printf("]");
- if (c1 < Size) {
- printf(",\n ");
- }
- }
- printf("]\n");
- }
- void show_policy(){
- printf("[");
- for (int c1 = 0; c1 <= Size; c1++) {
- printf("[");
- for (int c2 = 0; c2 <= Size; c2++) {
- printf("%2d", policy[c1][c2]);
- if (c2 < Size) {
- printf(", ");
- }
- }
- printf("]");
- if (c1 < Size) {
- printf(",\n ");
- }
- }
- printf("]\n");
- }
- float calc_q_val(int c1, int c2, int a){
- float q_val = 0;
- int _c1, _c2;
- float c1_rent_prob, c2_rent_prob, c1_return_prob, c2_return_prob, prob;
- // Moving cars
- c1 -= a;
- c2 += a;
- q_val -= 2*abs(a);
- for (int c1_rent = 0; c1_rent <= c1; c1_rent++) {
- for (int c2_rent = 0; c2_rent <= c2; c2_rent++) {
- for (int c1_return = 0; c1_return <= Size-(c1-c1_rent); c1_return++) {
- for (int c2_return = 0; c2_return <= Size-(c2-c2_rent); c2_return++) {
- _c1 = c1 - c1_rent + c1_return;
- _c2 = c2 - c2_rent + c2_return;
- if (c1_rent == c1) {
- c1_rent_prob = 1;
- for (int n = 0; n < c1_rent; n++) {
- c1_rent_prob -= gsl_ran_poisson_pdf(n, Rent1_mean);
- }
- } else {
- c1_rent_prob = gsl_ran_poisson_pdf(c1_rent, Rent1_mean);
- }
- if (c2_rent == c2) {
- c2_rent_prob = 1;
- for (int n = 0; n < c2_rent; n++) {
- c2_rent_prob -= gsl_ran_poisson_pdf(n, Rent2_mean);
- }
- } else {
- c2_rent_prob = gsl_ran_poisson_pdf(c2_rent, Rent2_mean);
- }
- if (c1_return == Size-(c1-c1_rent)) {
- c1_return_prob = 1;
- for (int n = 0; n < c1_return; n++) {
- c1_return_prob -= gsl_ran_poisson_pdf(n, Return1_mean);
- }
- } else {
- c1_return_prob = gsl_ran_poisson_pdf(c1_return, Return1_mean);
- }
- if (c2_return == Size-(c2-c2_rent)) {
- c2_return_prob = 1;
- for (int n = 0; n < c2_return; n++) {
- c2_return_prob -= gsl_ran_poisson_pdf(n, Return2_mean);
- }
- } else {
- c2_return_prob = gsl_ran_poisson_pdf(c2_return, Return2_mean);
- }
- prob = c1_rent_prob * c2_rent_prob * c1_return_prob * c2_return_prob;
- q_val += prob * (10 * (c1_rent + c2_rent) + 0.9 * value[_c1][_c2]);
- }
- }
- }
- }
- return q_val;
- }
- int min(int a, int b){
- if (a < b) {
- return a;
- } else {
- return b;
- }
- }
- void run() {
- float q_val;
- float value_update;
- int policy_update;
- for (int c1 = 0; c1 <= Size; c1++) {
- for (int c2 = 0; c2 <= Size; c2++) {
- value[c1][c2] = 0;
- policy[c1][c2] = 0;
- }
- }
- printf("# Iteration 0\n");
- show_policy();
- show_value();
- for (int i = 1; i < Iteration; i++) {
- for (int c1 = 0; c1 <= Size; c1++) {
- for (int c2 = 0; c2 <= Size; c2++) {
- int update = 0;
- for (int a = -min(c2, 5); a <= min(c1, 5); a++) {
- q_val = calc_q_val(c1, c2, a);
- if (update == 0 || q_val > value_update) {
- value_update = q_val;
- policy_update = a;
- update = 1;
- }
- }
- value[c1][c2] = value_update;
- policy[c1][c2] = policy_update;
- }
- }
- printf("# Iteration %d\n", i);
- show_policy();
- show_value();
- }
- }
- int main() {
- run();
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement