Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import random
- class Environment:
- def __init__(self, c_1, c_2):
- self.c_1 = c_1
- self.c_2 = c_2
- def penalty(self, action):
- if action == 1:
- if random.random() <= self.c_1:
- return True
- else:
- return False
- elif action == 2:
- if random.random() <= self.c_2:
- return True
- else:
- return False
- class Tsetlin:
- def __init__(self, n):
- # n is the number of states per action
- self.n = n
- # Initial state selected randomly
- self.state = random.choice([self.n, self.n+1])
- def reward(self):
- if self.state <= self.n and self.state > 1:
- self.state -= 1
- elif self.state > self.n and self.state < 2*self.n:
- self.state += 1
- def penalize(self):
- if self.state <= self.n:
- self.state += 1
- elif self.state > self.n:
- self.state -= 1
- def makeDecision(self):
- if self.state <= self.n:
- return 1
- else:
- return 2
- env = Environment(0.1, 0.3)
- la = Tsetlin(3)
- action_count = [0, 0]
- for i in range(500):
- action = la.makeDecision()
- action_count[action - 1] += 1
- penalty = env.penalty(action)
- print("State:", la.state, "Action:", action)
- if penalty:
- print("Penalty")
- la.penalize()
- else:
- print("Reward")
- la.reward()
- print("New state: ", la.state, "\n")
- print("#Action 1: ", action_count[0], " #Action 2: ", action_count[1])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement