Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- from math import *
- MAX_NUM_CARS = 20
- MAX_MOVE_CARS = 5
- RENT_FIRST_LOC = 3
- RENT_SECOND_LOC = 4
- RETURN_FIRST_LOC = 3
- RETURN_SECOND_LOC = 2
- DISCOUNT = 0.9
- MONEY_PER_CAR = 10
- MOVE_CAR_COST = 2
- matrixPoisson = dict()
- def poisson(n, lambd):
- global matrixPoisson
- key = 10 * n + lambd
- if key not in matrixPoisson.keys():
- matrixPoisson[key] = exp(-lambd) * pow(lambd, n) / factorial(n)
- return matrixPoisson[key]
- def cacheProfit(estimated_rented):
- profitRented = []
- for i in range(MAX_NUM_CARS+1):
- profitRented.append(0)
- for car_available in range(MAX_NUM_CARS + 1):
- for car_rented in range(MAX_NUM_CARS + 1):
- profitRented[car_available] += MONEY_PER_CAR * min(car_available, car_rented) * poisson(car_rented, estimated_rented)
- return profitRented
- def cacheRentReturnsProbability(estimated_rented, estimated_return):
- probabilityComplexCache = []
- for i in range(MAX_NUM_CARS + 1):
- probabilityComplexCache.append([])
- for j in range(MAX_NUM_CARS + 1):
- probabilityComplexCache[i].append(0)
- for car in range(MAX_NUM_CARS + 1):
- for car_rented in range(MAX_NUM_CARS + 1):
- for car_return in range(MAX_NUM_CARS + 1):
- car_real_rented = min(car, car_rented)
- car_available = max(0, min(MAX_NUM_CARS, car + car_return - car_real_rented))
- probabilityComplexCache[car][car_available] += poisson(car_rented, estimated_rented) * poisson(car_return, estimated_return)
- return probabilityComplexCache
- def evaluatePolicy(returnsLoc1, returnsLoc2, rentReturnsLoc1, rentReturnsLoc2, policy):
- valueMatrix = []
- for i in range(MAX_NUM_CARS + 1):
- valueMatrix.append([])
- for j in range(MAX_NUM_CARS + 1):
- valueMatrix[i].append(0)
- delta = 1
- while (delta > 1e-6):
- delta = 0
- for car_loc_1 in range(MAX_NUM_CARS + 1):
- for car_loc_2 in range(MAX_NUM_CARS + 1):
- profit_prev = valueMatrix[car_loc_1][car_loc_2]
- car_moved = int(policy[car_loc_1][car_loc_2])
- profit = -MOVE_CAR_COST * abs(car_moved)
- for car_real_loc_1 in range(MAX_NUM_CARS + 1):
- for car_real_loc_2 in range(MAX_NUM_CARS + 1):
- if (car_loc_1 - car_moved >= 0) & (car_loc_2 + car_moved >= 0) & (car_loc_1 - car_moved <= MAX_NUM_CARS) & (car_loc_2 + car_moved <= MAX_NUM_CARS):
- profit += rentReturnsLoc1[car_loc_1 - car_moved][car_real_loc_1] * rentReturnsLoc2[car_loc_2 + car_moved][car_real_loc_2] * (returnsLoc1[car_loc_1 - car_moved] + returnsLoc2[car_loc_2 + car_moved] + DISCOUNT * valueMatrix[car_real_loc_1][car_real_loc_2])
- valueMatrix[car_loc_1][car_loc_2] = profit
- delta = max(delta, abs(profit_prev - profit))
- return valueMatrix
- def improvePolicy(returnsLoc1, returnsLoc2, rentReturnsLoc1, rentReturnsLoc2, valueMatrix):
- policy = []
- for i in range(MAX_NUM_CARS + 1):
- policy.append([])
- for j in range(MAX_NUM_CARS + 1):
- policy[i].append(0)
- for car_loc_1 in range(MAX_NUM_CARS + 1):
- for car_loc_2 in range(MAX_NUM_CARS + 1):
- max_profit = 0
- for car_moved in range(-MAX_MOVE_CARS, MAX_MOVE_CARS+1):
- if (car_loc_1 - car_moved >= 0) & (car_loc_2 + car_moved >= 0) & (car_loc_1 - car_moved <= MAX_NUM_CARS) & (car_loc_2 + car_moved <= MAX_NUM_CARS):
- profit = -MOVE_CAR_COST * abs(car_moved)
- for car_real_loc_1 in range(MAX_NUM_CARS + 1):
- for car_real_loc_2 in range(MAX_NUM_CARS + 1):
- profit += rentReturnsLoc1[car_loc_1 - car_moved][car_real_loc_1] * rentReturnsLoc2[car_loc_2 + car_moved][car_real_loc_2] * (returnsLoc1[car_loc_1 - car_moved] + returnsLoc2[car_loc_2 + car_moved] + DISCOUNT * valueMatrix[car_real_loc_1][car_real_loc_2])
- if profit > max_profit:
- policy[car_loc_1][car_loc_2] = car_moved
- max_profit = profit
- return policy
- def initCache():
- cachedReturnsLoc1 = cacheProfit(RENT_FIRST_LOC)
- cachedReturnsLoc2 = cacheProfit(RENT_SECOND_LOC)
- cachedRentReturnsLoc1 = cacheRentReturnsProbability(RENT_FIRST_LOC, RETURN_FIRST_LOC)
- cachedRentReturnsLoc2 = cacheRentReturnsProbability(RENT_SECOND_LOC, RETURN_SECOND_LOC)
- valueMatrix = []
- for i in range(MAX_NUM_CARS + 1):
- valueMatrix.append([])
- for j in range(MAX_NUM_CARS + 1):
- valueMatrix[i].append(0)
- policy = []
- for i in range(MAX_NUM_CARS + 1):
- policy.append([])
- for j in range(MAX_NUM_CARS + 1):
- policy[i].append(0)
- return cachedReturnsLoc1, cachedReturnsLoc2, cachedRentReturnsLoc1, cachedRentReturnsLoc2, valueMatrix, policy
- def main():
- cachedReturnsLoc1, cachedReturnsLoc2, cachedRentReturnsLoc1, cachedRentReturnsLoc2, valueMatrix, policy = initCache()
- while True:
- valueMatrix = evaluatePolicy(cachedReturnsLoc1, cachedReturnsLoc2, cachedRentReturnsLoc1, cachedRentReturnsLoc2, policy)
- newPolicy = improvePolicy(cachedReturnsLoc1, cachedReturnsLoc2, cachedRentReturnsLoc1, cachedRentReturnsLoc2, valueMatrix)
- policyStable = np.sum(newPolicy != policy)
- policy = newPolicy
- if policyStable == 0:
- break
- for i in range(len(policy)):
- for j in range(len(policy[0])):
- print(int(policy[i][j]), end="\t")
- print()
- pass
- if __name__ == "__main__":
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement