AI_DLA_IDJOTOW.PY

from tensorforce import Configuration
from tensorforce.agents import TRPOAgent
from tensorforce.core.networks import layered_network_builder
import random

# Przyklad "nauczania ze wzmocnieniem" na podstawie frameworka Tensorforce: https://github.com/reinforceio/tensorforce/

# siec uczy sie "obstawiac" wyniki prostego generatora losowego.
# Dostaje punkty za postawienie 0 lub 1 jesli generator wylosuje wartosc<5
# Za jak nawyzszy "zaklad" przy najwiekszej liczbie dostaje max punktow czyli 10
# Po 8192 loopach skutecznosc sieci to 100%:)
# Tych parametrow ponizej jeszcze nie rozumiem :F


config = Configuration(
    batch_size=10,
    states=dict(shape=(2,)),
    actions=dict(continuous=False, num_actions=4),
    network=layered_network_builder([dict(type='dense', size=3)])
)

class MyClass:
    def __init__(self):
        self.state=[random.randint(0,10),0] # Nie wiem czemu musza byc 2 wymiary, inaczej nie dziala :/
    def get_state(self): # Czy inaczej, wylosuj()
        return self.state

    def execute(self,action): # Funkcja "uczaca"
        #print("\texecute:",action)
        if(self.state[0]<5 and action<=1):
            return 10 # Nagroda 10
        if(self.state[0]>=9):
            return random.randint(8,10) # obstawiamy
        if(self.state[0] >=5 and action>=3 and action<=9):
            return random.randint(6,8)
        else:
            return 0

agent = TRPOAgent(config=config)

for i in range(8192):
    client = MyClass()

    state = client.get_state()
    print("GENERATOR:",state)
    # Get prediction from agent, execute
    action = agent.act(state=state)
    print("(AI)STAWIAM:",action)
    reward = client.execute(action)
    print("NAGRODA:",reward)
    # Add experience, agent automatically updates model according to batch size
    agent.observe(reward=reward, terminal=True)