Advertisement
Guest User

Untitled

a guest
Apr 25th, 2018
91
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.64 KB | None | 0 0
  1. import numpy as np
  2. from math import *
  3.  
  4. MAX_NUM_CARS = 20
  5. MAX_MOVE_CARS = 5
  6. RENT_FIRST_LOC = 3
  7. RENT_SECOND_LOC = 4
  8. RETURN_FIRST_LOC = 3
  9. RETURN_SECOND_LOC = 2
  10. DISCOUNT = 0.9
  11. MONEY_PER_CAR = 10
  12. MOVE_CAR_COST = 2
  13.  
  14. matrixPoisson = dict()
  15. def poisson(n, lambd):
  16. global matrixPoisson
  17. key = 10 * n + lambd
  18. if key not in matrixPoisson.keys():
  19. matrixPoisson[key] = exp(-lambd) * pow(lambd, n) / factorial(n)
  20. return matrixPoisson[key]
  21.  
  22. def cacheProfit(estimated_rented):
  23. profitRented = []
  24. for i in range(MAX_NUM_CARS+1):
  25. profitRented.append(0)
  26.  
  27. for car_available in range(MAX_NUM_CARS + 1):
  28. for car_rented in range(MAX_NUM_CARS + 1):
  29. profitRented[car_available] += MONEY_PER_CAR * min(car_available, car_rented) * poisson(car_rented, estimated_rented)
  30. return profitRented
  31.  
  32. def cacheRentReturnsProbability(estimated_rented, estimated_return):
  33. probabilityComplexCache = []
  34. for i in range(MAX_NUM_CARS + 1):
  35. probabilityComplexCache.append([])
  36. for j in range(MAX_NUM_CARS + 1):
  37. probabilityComplexCache[i].append(0)
  38.  
  39. for car in range(MAX_NUM_CARS + 1):
  40. for car_rented in range(MAX_NUM_CARS + 1):
  41. for car_return in range(MAX_NUM_CARS + 1):
  42. car_real_rented = min(car, car_rented)
  43. car_available = max(0, min(MAX_NUM_CARS, car + car_return - car_real_rented))
  44. probabilityComplexCache[car][car_available] += poisson(car_rented, estimated_rented) * poisson(car_return, estimated_return)
  45. return probabilityComplexCache
  46.  
  47.  
  48. def evaluatePolicy(returnsLoc1, returnsLoc2, rentReturnsLoc1, rentReturnsLoc2, policy):
  49. valueMatrix = []
  50. for i in range(MAX_NUM_CARS + 1):
  51. valueMatrix.append([])
  52. for j in range(MAX_NUM_CARS + 1):
  53. valueMatrix[i].append(0)
  54.  
  55. delta = 1
  56. while (delta > 1e-6):
  57. delta = 0
  58. for car_loc_1 in range(MAX_NUM_CARS + 1):
  59. for car_loc_2 in range(MAX_NUM_CARS + 1):
  60. profit_prev = valueMatrix[car_loc_1][car_loc_2]
  61. car_moved = int(policy[car_loc_1][car_loc_2])
  62. profit = -MOVE_CAR_COST * abs(car_moved)
  63. for car_real_loc_1 in range(MAX_NUM_CARS + 1):
  64. for car_real_loc_2 in range(MAX_NUM_CARS + 1):
  65. if (car_loc_1 - car_moved >= 0) & (car_loc_2 + car_moved >= 0) & (car_loc_1 - car_moved <= MAX_NUM_CARS) & (car_loc_2 + car_moved <= MAX_NUM_CARS):
  66. profit += rentReturnsLoc1[car_loc_1 - car_moved][car_real_loc_1] * rentReturnsLoc2[car_loc_2 + car_moved][car_real_loc_2] * (returnsLoc1[car_loc_1 - car_moved] + returnsLoc2[car_loc_2 + car_moved] + DISCOUNT * valueMatrix[car_real_loc_1][car_real_loc_2])
  67. valueMatrix[car_loc_1][car_loc_2] = profit
  68. delta = max(delta, abs(profit_prev - profit))
  69.  
  70. return valueMatrix
  71.  
  72. def improvePolicy(returnsLoc1, returnsLoc2, rentReturnsLoc1, rentReturnsLoc2, valueMatrix):
  73. policy = []
  74. for i in range(MAX_NUM_CARS + 1):
  75. policy.append([])
  76. for j in range(MAX_NUM_CARS + 1):
  77. policy[i].append(0)
  78.  
  79. for car_loc_1 in range(MAX_NUM_CARS + 1):
  80. for car_loc_2 in range(MAX_NUM_CARS + 1):
  81. max_profit = 0
  82. for car_moved in range(-MAX_MOVE_CARS, MAX_MOVE_CARS+1):
  83. if (car_loc_1 - car_moved >= 0) & (car_loc_2 + car_moved >= 0) & (car_loc_1 - car_moved <= MAX_NUM_CARS) & (car_loc_2 + car_moved <= MAX_NUM_CARS):
  84. profit = -MOVE_CAR_COST * abs(car_moved)
  85. for car_real_loc_1 in range(MAX_NUM_CARS + 1):
  86. for car_real_loc_2 in range(MAX_NUM_CARS + 1):
  87. profit += rentReturnsLoc1[car_loc_1 - car_moved][car_real_loc_1] * rentReturnsLoc2[car_loc_2 + car_moved][car_real_loc_2] * (returnsLoc1[car_loc_1 - car_moved] + returnsLoc2[car_loc_2 + car_moved] + DISCOUNT * valueMatrix[car_real_loc_1][car_real_loc_2])
  88. if profit > max_profit:
  89. policy[car_loc_1][car_loc_2] = car_moved
  90. max_profit = profit
  91. return policy
  92.  
  93. def initCache():
  94. cachedReturnsLoc1 = cacheProfit(RENT_FIRST_LOC)
  95. cachedReturnsLoc2 = cacheProfit(RENT_SECOND_LOC)
  96.  
  97. cachedRentReturnsLoc1 = cacheRentReturnsProbability(RENT_FIRST_LOC, RETURN_FIRST_LOC)
  98. cachedRentReturnsLoc2 = cacheRentReturnsProbability(RENT_SECOND_LOC, RETURN_SECOND_LOC)
  99.  
  100. valueMatrix = []
  101. for i in range(MAX_NUM_CARS + 1):
  102. valueMatrix.append([])
  103. for j in range(MAX_NUM_CARS + 1):
  104. valueMatrix[i].append(0)
  105.  
  106. policy = []
  107. for i in range(MAX_NUM_CARS + 1):
  108. policy.append([])
  109. for j in range(MAX_NUM_CARS + 1):
  110. policy[i].append(0)
  111.  
  112. return cachedReturnsLoc1, cachedReturnsLoc2, cachedRentReturnsLoc1, cachedRentReturnsLoc2, valueMatrix, policy
  113.  
  114. def main():
  115. cachedReturnsLoc1, cachedReturnsLoc2, cachedRentReturnsLoc1, cachedRentReturnsLoc2, valueMatrix, policy = initCache()
  116.  
  117. while True:
  118. valueMatrix = evaluatePolicy(cachedReturnsLoc1, cachedReturnsLoc2, cachedRentReturnsLoc1, cachedRentReturnsLoc2, policy)
  119. newPolicy = improvePolicy(cachedReturnsLoc1, cachedReturnsLoc2, cachedRentReturnsLoc1, cachedRentReturnsLoc2, valueMatrix)
  120. policyStable = np.sum(newPolicy != policy)
  121. policy = newPolicy
  122.  
  123. if policyStable == 0:
  124. break
  125.  
  126. for i in range(len(policy)):
  127. for j in range(len(policy[0])):
  128. print(int(policy[i][j]), end="\t")
  129. print()
  130.  
  131. pass
  132.  
  133. if __name__ == "__main__":
  134. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement