Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import re
- import numpy as np
- class MDP:
- m = None
- n = None
- beta = 0.1
- matrix = []
- reward = []
- utilities = []
- best_policy = [None] * 10
- def __init__(self, file):
- f = open(file)
- n = 0
- m = 0
- matrix = []
- i = -1 #so that we allocate to the correct group
- for line in f:
- temp = line.replace('\n', '').replace(' ', ' ').split(' ')
- if len(temp) == 1:
- i += 1
- matrix.append([])
- continue
- elif len(temp) == 2:
- temp = map(int, temp)
- n = temp[0]
- m = temp[1]
- elif len(temp) == n:
- temp = map(float, temp)
- matrix[i].append(temp)
- reward = matrix[i][0]
- matrix.pop(i)
- self.n = n
- self.m = m
- self.matrix = matrix
- self.reward = reward
- self.utilities.append(self.reward)
- def calc_delta(self):
- num = (1 * np.exp(-10)) * np.power((1 - self.beta), 2)
- den = 2 * np.power(self.beta, 2)
- return float(num / den)
- def greatest_change(self):
- max_val = 0.0
- for count in range(0,10):
- diff = self.utilities[(len(self.utilities)-1)][count] - self.utilities[(len(self.utilities)-2)][count]
- if abs(diff) > max_val:
- max_val = abs(diff)
- return max_val
- def set_beta(self, b):
- self.beta = b
- def generate_utility(MDP,i):
- current_utility = []
- for state in range(0,10):
- decision = -1.0
- best_utility = -1.0
- for action in range(0,4):
- total_consequence = 0.0
- total_consequence += MDP.reward[state]
- for consequence in range(0,10):
- total_consequence += MDP.matrix[action][state][consequence] * MDP.utilities[i][consequence] * MDP.beta
- if total_consequence > best_utility:
- best_utility = total_consequence
- decision = action
- current_utility.append(best_utility)
- MDP.best_policy[state] = decision
- MDP.utilities.append(current_utility)
- if __name__ == "__main__":
- array = MDP('test-data-for-MDP.txt')
- iterator = 0
- while True:
- generate_utility(array,iterator)
- diff = array.greatest_change()
- if diff < array.calc_delta():
- break
- iterator = iterator + 1
- print "Final Utility: " + str(array.utilities[len(array.utilities)-1])
- print "Final Policy " + str(array.best_policy)
- print "Iterations: " + str(iterator)
- print "Delta Threshold " + str(array.calc_delta())
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement