LyWang

do_class_AI_1

Nov 7th, 2018
153
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 7.32 KB | None | 0 0
  1. import numpy
  2. import random
  3.  
  4.  
  5. class eGreedyAgent:
  6.     value = []
  7.     counter = []
  8.     probability = [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1]
  9.     score = 0
  10.     preference = 0
  11.     counter_exploration = 0
  12.     counter_exploitation = 0
  13.  
  14.     def __init__(self, e1=0.2, n=10, temp=1):
  15.         self.e = e1
  16.         self.score = 0
  17.         self.value = []
  18.         self.counter = []
  19.         self.temp = temp
  20.         for s in range(n):
  21.             self.value.append([s, 0])
  22.             self.counter.append(0)
  23.  
  24.     def random_choice(self):
  25.         action = random.randint(0, 9)
  26.         self.counter[action] += 1
  27.         return action
  28.  
  29.     def greedy_choice(self):
  30.         r = numpy.random.random()
  31.         max_action = self.max_finder()
  32.  
  33.         if r > self.e:
  34.             self.counter[max_action] += 1
  35.             self.counter_exploitation += 1
  36.             return max_action  # exploit
  37.         else:
  38.             action = random.randint(0, 8)
  39.             if action >= max_action:
  40.                 action += 1
  41.             self.counter[action] += 1
  42.             self.counter_exploration += 1
  43.             return action  # exploration
  44.  
  45.     def persuit_choice(self):
  46.         r = numpy.random.random_sample()
  47.         # print("r is ")
  48.         # print(r)
  49.         max_action = self.max_finder()
  50.         # print('max_action')
  51.         # print(max_action)
  52.         self.probability[max_action] += 0.01 * (1 - self.probability[max_action])
  53.         for u in range(10):
  54.             if u != max_action:
  55.                 self.probability[u] += 0.01 * (0 - self.probability[u])
  56.         possibility_lake = 1
  57.         for i in range(10):
  58.             possibility_lake -= self.probability[i]
  59.             # print('probability is')
  60.             # print(possibility_lake)
  61.             # print(r)
  62.             # print('&&&&')
  63.             # print(possibility_lake)
  64.             # print('')
  65.             if r > possibility_lake:
  66.                 self.counter[i] += 1
  67.                 # print('thus i')
  68.                 # print(i)
  69.                 return i
  70.  
  71.     def exponential_choice(self, temp=1):
  72.         r = numpy.random.random()
  73.         value_exp = []
  74.         maxrange = self.value[0][1]
  75.         for v in range(10):
  76.             if self.value[v][1] > maxrange:
  77.                 maxrange = self.value[v][1]
  78.         for u in range(10):
  79.             if maxrange > 30:
  80.                 value_exp.append(numpy.exp((self.value[u][1] - (maxrange - 30)) / temp))
  81.             elif maxrange < 10:
  82.                 value_exp.append(numpy.exp((self.value[u][1] + (10 - maxrange)) / temp))
  83.             else:
  84.                 value_exp.append(numpy.exp(self.value[u][1] / temp))
  85.  
  86.         # print('*****')
  87.         # print('1')
  88.         # print(self.value)
  89.         # print('2')
  90.         # print(value_exp)
  91.         # print('*****')
  92.         sum_value_exp = sum(value_exp)
  93.         sum_possibility = sum_value_exp
  94.         # print(value_exp)
  95.         for pointer in range(10):
  96.             sum_possibility = sum_possibility - value_exp[pointer]
  97.             # print(sum_possibility / sum_value_exp)
  98.             # print(r)
  99.             # print('-----')
  100.             if sum_possibility / sum_value_exp < r:
  101.                 self.counter[pointer] = self.counter[pointer] + 1
  102.                 # print(pointer)
  103.                 # print('pointer')
  104.                 return pointer
  105.  
  106.     def value_feedback(self, action=0, reward=0):
  107.         self.score += reward
  108.         self.value[action][1] = self.value[action][1] + (reward - self.value[action][1]) / self.counter[action]
  109.  
  110.     def nonstationary_value_feedback(self, action=0, reward=0):
  111.         self.score += reward
  112.         self.value[action][1] = self.value[action][1] + 0.1 * (reward - self.value[action][1])
  113.  
  114.     def reinforcement_comparison_feedback(self, action=0, reward=0):
  115.         # print('reward')
  116.         # print(reward)
  117.         # print('value')
  118.         # print(self.value)
  119.         self.score += reward
  120.         value_exp = []
  121.         maxrange = self.value[0][1]
  122.         for v in range(10):
  123.             if self.value[v][1] > maxrange:
  124.                 maxrange = self.value[v][1]
  125.         # print('maxrange')
  126.         # print(maxrange)
  127.         for u in range(10):
  128.             if maxrange > 30:
  129.                 value_exp.append(numpy.exp((self.value[u][1] - (maxrange - 30)) / self.temp))
  130.             elif maxrange < 10:
  131.                 value_exp.append(numpy.exp((self.value[u][1] + (10 - maxrange)) / self.temp))
  132.             else:
  133.                 value_exp.append(numpy.exp(self.value[u][1] / self.temp))
  134.         self.value[action][1] = self.value[action][1] + 0.1 * (reward - self.preference) * (
  135.                 1 - value_exp[action] / sum(value_exp))
  136.         # print(value_exp)
  137.         # print('aftervalue')
  138.         # print(self.value)
  139.         self.preference = self.preference + 0.1 * (reward - self.preference)
  140.  
  141.     def reinforcement_comparison_feedback_without_factor(self, action=0, reward=0):
  142.         # print('reward')
  143.         # print(reward)
  144.         # print('value')
  145.         # print(self.value)
  146.         self.score += reward
  147.         value_exp = []
  148.         maxrange = self.value[0][1]
  149.         for v in range(10):
  150.             if self.value[v][1] > maxrange:
  151.                 maxrange = self.value[v][1]
  152.         # print('maxrange')
  153.         # print(maxrange)
  154.         for u in range(10):
  155.             if maxrange > 30:
  156.                 value_exp.append(numpy.exp((self.value[u][1] - (maxrange - 30)) / self.temp))
  157.             elif maxrange < 10:
  158.                 value_exp.append(numpy.exp((self.value[u][1] + (10 - maxrange)) / self.temp))
  159.             else:
  160.                 value_exp.append(numpy.exp(self.value[u][1] / self.temp))
  161.         self.value[action][1] = self.value[action][1] + 0.1 * (reward - self.preference)
  162.         # print(value_exp)
  163.         # print('aftervalue')
  164.         # print(self.value)
  165.         self.preference = self.preference + 0.1 * (reward - self.preference)
  166.  
  167.     def max_finder(self):
  168.         max_value = 0
  169.         max_action = 0
  170.         for r1 in range(10):
  171.             if max_value < self.value[r1][1]:
  172.                 max_value = self.value[r1][1]
  173.                 max_action = self.value[r1][0]
  174.         return max_action
  175.  
  176.     def reset(self):
  177.         # print("result")
  178.         # print(self.value)
  179.         # print(self.counter)
  180.         self.score = 0
  181.         self.value = []
  182.         self.counter = []
  183.         self.probability = [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1]
  184.  
  185.  
  186. class task:
  187.     tasks = []
  188.  
  189.     def __init__(self, num=2000, stationary=True):
  190.         self.tasks = []
  191.         if stationary:
  192.             for n in range(num):
  193.                 subtask = []
  194.                 for j in range(10):
  195.                     subtask.append(numpy.random.rand())
  196.                 self.tasks.append(subtask)
  197.         else:
  198.             for n in range(num):
  199.                 subtask = []
  200.                 for j in range(10):
  201.                     subtask.append(0)
  202.                 self.tasks.append(subtask)
  203.  
  204.     def reward(self, subtask=0, action=0):
  205.         return numpy.random.normal(self.tasks[subtask][action])
  206.  
  207.     def nonstationarymove(self, subtask=0):
  208.         for n in range(10):
  209.             dice = numpy.random.rand()
  210.             if dice > 0.5:
  211.                 self.tasks[subtask][n] += numpy.random.rand()
  212.             else:
  213.                 self.tasks[subtask][n] -= numpy.random.rand()
Add Comment
Please, Sign In to add comment