Advertisement
Guest User

Untitled

a guest
Jun 19th, 2019
61
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.56 KB | None | 0 0
  1. import tensorflow as tf
  2.  
  3. class Model:
  4. def __init__(self, alpha=0.1, discount=0.9):
  5. self.W = np.random.random(4)
  6. self.alpha = alpha
  7. self.discount = discount
  8.  
  9. def predict(self, a, y, x, W=None):
  10. if W is None:
  11. W=self.W
  12. b = 1
  13. out = b*W[0] + a*W[1] + y*W[2] + x*W[3]
  14. return out
  15.  
  16. def gradient(self, a, y, x):
  17. W=self.W
  18. W=tf.convert_to_tensor(W)
  19. with tf.GradientTape() as t:
  20. t.watch(W)
  21. out=self.predict(a,y,x, W)
  22. derivative = t.gradient(out, W)
  23. return derivative
  24.  
  25. def updateW(self,oldState,action,newState,reward, terminal=False):
  26. if terminal:
  27. Qold=self.predict(action,oldState[0],oldState[1])
  28. Qoldgrad=self.gradient(action,oldState[0],oldState[1])
  29. self.W = self.W + self.alpha * (reward - Qold) * Qoldgrad
  30. else:
  31. actionspace=4
  32. A=None
  33. highestVal=None
  34. for a in range(1, actionspace +1): #get highest value action from newState
  35. aval = self.predict(a, newState[0], newState[1])
  36. if highestVal is None:
  37. highestVal = aval
  38. A = a
  39. elif (aval > highestVal):
  40. highestVal = aval
  41. A = a
  42. Qold=self.predict(action,oldState[0],oldState[1])
  43. Qnew=self.predict(A,newState[0],newState[1])
  44. Qoldgrad=self.gradient(action,oldState[0],oldState[1])
  45. print(self.W, self.alpha, reward, self.discount, Qnew, Qold, Qoldgrad)
  46. #now update W
  47. self.W = self.W + self.alpha * (reward + self.discount*Qnew - Qold) * Qoldgrad
  48. model = Model()
  49.  
  50. for i in range(10):
  51. model.updateW([3,2],1,[3,4],0)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement