Advertisement
Guest User

Final Result

a guest
May 29th, 2016
66
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.46 KB | None | 0 0
  1. import re
  2. import numpy as np
  3.  
  4. class MDP:
  5.         m = None
  6.         n = None
  7.     beta = 0.1
  8.         matrix = []
  9.         reward = []
  10.         utilities = []
  11.         best_policy = [None] * 10
  12.     def __init__(self, file):
  13.         f = open(file)
  14.             n = 0
  15.             m = 0
  16.             matrix = []
  17.             i = -1 #so that we allocate to the correct group
  18.             for line in f:
  19.                 temp = line.replace('\n', '').replace('    ', ' ').split(' ')
  20.                 if len(temp) == 1:
  21.                         i += 1
  22.                         matrix.append([])
  23.                         continue
  24.                 elif len(temp) == 2:
  25.                         temp = map(int, temp)
  26.                         n = temp[0]
  27.                     m = temp[1]
  28.                 elif len(temp) == n:
  29.                         temp = map(float, temp)
  30.                         matrix[i].append(temp)
  31.             reward = matrix[i][0]
  32.             matrix.pop(i)
  33.         self.n = n
  34.         self.m = m
  35.         self.matrix = matrix
  36.         self.reward = reward
  37.         self.utilities.append(self.reward)
  38.  
  39.         def calc_delta(self):
  40.                 num = (1 * np.exp(-10)) * np.power((1 - self.beta), 2)
  41.                 den = 2 * np.power(self.beta, 2)
  42.                 return float(num / den)
  43.  
  44.     def greatest_change(self):
  45.         max_val = 0.0
  46.         for count in range(0,10):
  47.             diff = self.utilities[(len(self.utilities)-1)][count] - self.utilities[(len(self.utilities)-2)][count]
  48.             if abs(diff) > max_val:
  49.                 max_val = abs(diff)
  50.         return max_val
  51.  
  52.         def set_beta(self, b):
  53.                 self.beta = b
  54.  
  55. def generate_utility(MDP,i):
  56.     current_utility = []
  57.     for state in range(0,10):
  58.         decision = -1.0
  59.         best_utility = -1.0
  60.         for action in range(0,4):
  61.             total_consequence = 0.0
  62.             total_consequence += MDP.reward[state]
  63.             for consequence in range(0,10):
  64.                 total_consequence += MDP.matrix[action][state][consequence] * MDP.utilities[i][consequence] * MDP.beta
  65.             if total_consequence > best_utility:
  66.                 best_utility = total_consequence
  67.                 decision = action
  68.         current_utility.append(best_utility)
  69.         MDP.best_policy[state] = decision
  70.     MDP.utilities.append(current_utility)
  71.    
  72. if __name__ == "__main__":
  73.         array = MDP('test-data-for-MDP.txt')
  74.     iterator = 0
  75.     while True:
  76.         generate_utility(array,iterator)
  77.         diff = array.greatest_change()
  78.         if diff < array.calc_delta():
  79.             break
  80.         iterator = iterator + 1
  81.     print "Final Utility: " + str(array.utilities[len(array.utilities)-1])
  82.     print "Final Policy " + str(array.best_policy)
  83.     print "Iterations: " + str(iterator)
  84.     print "Delta Threshold " + str(array.calc_delta())
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement