Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- from __future__ import print_function
- import argparse
- import sys
- import random
- import numpy as np
- from collections import deque
- import json
- import socket
- from keras.initializers import normal, identity
- from keras.models import model_from_json
- from keras.models import Sequential
- from keras.layers.core import Dense, Dropout, Activation, Flatten
- from keras.optimizers import SGD , Adam
- #from keras.utils import plot_model
- import tensorflow as tf
- GAME = 'glidethroughthesky' # the name of the game being played for log files
- ACTIONS = 3 # number of valid actions
- GAMMA = 0.99 # decay rate of past observations
- OBSERVATION = 50
- EXPLORE = 3000000. # frames over which to anneal epsilon
- FINAL_EPSILON = 0.0001 # final value of epsilon
- INITIAL_EPSILON = 0.01 # starting value of epsilon
- REPLAY_MEMORY = 50000 # number of previous transitions to remember
- BATCH = 32 # size of minibatch
- FRAME_PER_ACTION = 1
- LEARNING_RATE = 1e-4
- ### init values
- features = {
- "targetYDistance": 25,
- "midYDistance": 25,
- # "targetDistance": 999,
- # "midDistance": 999,
- "distance": 25,
- "vY": 0,
- "mana": 3
- }
- passedWall = False
- dashTarget = False
- died = False
- win = False
- passedWalls = 0
- client = None
- address = None
- def buildmodel():
- global features
- model = Sequential()
- model.add(Dense(units = 32, input_dim = len(features), activation = 'relu'))
- model.add(Dense(units = 32, input_dim = len(features), activation = 'relu'))
- model.add(Dense(units = ACTIONS, activation = 'softmax'))
- adam = Adam(lr=LEARNING_RATE)
- model.compile(loss='mse',optimizer=adam)
- model.summary()
- return model
- def standardize(features):
- f = features.copy()
- f["mana"] = f["mana"]/10
- f["distance"] = (f["distance"])/25
- f["targetYDistance"] = (f["targetYDistance"]-8)/16
- f["midYDistance"] = (f["midYDistance"]-8)/16
- f["vY"] = (f["vY"]+20)/40
- #print(json.dumps(f))
- return f
- def updateFeatures(dataMerge, died, win, dashTarget, passedWall):
- global client
- size = 8192
- loadedData = client.recv(size).decode("utf-8")
- endIndex = loadedData[:loadedData.find('}')]
- dataMerge += loadedData
- if endIndex == -1:
- return
- try:
- data = json.loads(dataMerge[:dataMerge.find('}')+1])
- dataMerge = ""
- except ValueError:
- return;
- if "targetYDistance" in data:
- features["targetYDistance"] = float(data["targetYDistance"])
- if "midYDistance" in data:
- features["midYDistance"] = float(data["midYDistance"])
- if "distance" in data:
- features["distance"] = float(data["distance"])
- # if "targetDistance" in data:
- # features["targetDistance"] = float(data["targetDistance"])
- #
- # if "midDistance" in data:
- # features["midDistance"] = float(data["midDistance"])
- if "mana" in data:
- features["mana"] = float(data["mana"])
- if "vY" in data:
- features["vY"] = float(data["vY"])
- if "died" in data:
- died = True
- if "win" in data:
- win = True
- if "passedWall" in data:
- passedWall = True
- if "dashTarget" in data:
- dashTarget = True
- return died, win, dashTarget, passedWall
- def waitForFeatures():
- oldFeatures = features
- died = False
- win = False
- dashTarget = False
- passedWall = False
- global passedWalls
- oldFeatures = features
- while True:
- dataMerge = ""
- died, win, dashTarget, passedWall = updateFeatures(dataMerge, died, win, dashTarget, passedWall)
- for key, value in oldFeatures.items():
- if features[key] == value:
- continue
- break
- terminal = False
- reward = 0
- if passedWall:
- print("passedWall")
- reward = 1
- passedWalls += 1
- else:
- if oldFeatures["mana"] > features["mana"] and dashTarget == False:
- reward = -2
- if oldFeatures["mana"] == 1 and features["mana"] == 0:
- reward = -5
- if dashTarget:
- print("dashTarget")
- reward = 2
- if oldFeatures["mana"] == 1 and features["mana"] == 0:
- reward = -2
- if features["mana"] == -1:
- reward = -10
- if died:
- print("------------ DIED ------------")
- reward = -1 - 10 * abs(oldFeatures["midYDistance"]) - 10*features["mana"]
- terminal = True
- if win:
- print(" ** ------------ --- ------------")
- print(" ** ------------ WIN ------------")
- print(" ** ------------ --- ------------")
- if died or win:
- with open("log.txt", "a") as outfile:
- outfile.write(str(passedWalls) + "\r\n")
- passedWalls = 0
- #print ("\t" + json.dumps(features))
- return standardize(features), reward, terminal
- def sendAction(a_t):
- global client
- if a_t[1] == 1:
- client.send(("dash\n").encode())
- if a_t[2] == 1:
- client.send(("jump\n").encode())
- def trainNetwork(model,args):
- ### # store the previous observations in replay memory
- D = deque()
- do_nothing = np.zeros(ACTIONS)
- do_nothing[0] = 1
- sendAction(do_nothing)
- s_t, r_0, terminal = waitForFeatures()
- OBSERVE = OBSERVATION #mislim da mi ovde treba 0, posto nema konvolutivnu mrezu
- if args['mode'] == 'Load':
- epsilon = FINAL_EPSILON
- epsilon = 0.008
- # epsilon = INITIAL_EPSILON
- print ("Now we load weight")
- model.load_weights("model.h5")
- adam = Adam(lr=LEARNING_RATE)
- model.compile(loss='mse',optimizer=adam)
- print ("Weight load successfully")
- else: #We go to training mode
- epsilon = INITIAL_EPSILON
- t = 0
- while (True):
- loss = 0
- Q_sa = 0
- action_index = 0
- r_t = 0
- a_t = np.zeros([ACTIONS])
- #choose an action epsilon greedy
- if t % FRAME_PER_ACTION == 0:
- if random.random() <= epsilon:
- action_index = random.randrange(ACTIONS)
- a_t[action_index] = 1
- print("----------Random Action: ", a_t , " ---------- " , epsilon)
- else:
- p = np.fromiter(s_t.values(), float).reshape((1, len(features)))
- q = model.predict(p)
- max_Q = np.argmax(q)
- action_index = max_Q
- a_t[max_Q] = 1
- #We reduced the epsilon gradually
- if epsilon > FINAL_EPSILON and t > OBSERVE:
- epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE
- # global features
- old_features = features
- sendAction(a_t)
- s_t1, r_t, terminal = waitForFeatures()
- D.append((np.fromiter(old_features.values(), float).reshape((1, len(features))), action_index, r_t, (np.fromiter(s_t1.values(), float).reshape((1, len(features)))), terminal))
- if len(D) > REPLAY_MEMORY:
- D.popleft()
- #only train if done observing
- if t > OBSERVE:
- #sample a minibatch to train on
- minibatch = random.sample(D, BATCH)
- #Now we do the experience replay
- state_t, action_t, reward_t, state_t1, terminal = zip(*minibatch)
- state_t = np.concatenate(state_t)
- state_t1 = np.concatenate(state_t1)
- targets = model.predict(state_t)
- Q_sa = model.predict(state_t1)
- targets[range(BATCH), action_t] = reward_t + GAMMA*np.max(Q_sa, axis=1)*np.invert(terminal)
- loss += model.train_on_batch(state_t, targets)
- s_t = s_t1
- t = t + 1
- # save progress every 10000 iterations
- if t % 10000 == 0:
- print("Now we save model")
- model.save_weights("model.h5", overwrite=True)
- with open("model.json", "w") as outfile:
- json.dump(model.to_json(), outfile)
- # print info
- state = ""
- if t <= OBSERVE:
- state = "observe"
- elif t > OBSERVE and t <= OBSERVE + EXPLORE:
- state = "explore"
- else:
- state = "train"
- #print("TIMESTEP", t, "/ STATE", state, \
- # "/ EPSILON", epsilon, "/ ACTION", action_index, "/ REWARD", r_t, \
- # "/ Q_MAX " , np.max(Q_sa), "/ Loss ", loss)
- print("Episode finished!")
- print("************************")
- def main():
- parser = argparse.ArgumentParser(description='Description of your program')
- parser.add_argument('-m','--mode', help='Train / Load', required=False)
- args = vars(parser.parse_args())
- model = buildmodel()
- host = ''
- port = 50000
- backlog = 5
- size = 4096
- s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
- s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
- s.bind((host,port))
- s.listen(backlog)
- global client
- global address
- client, address = s.accept()
- # while client != None:
- # client, address = s.accept() # TODO WAIT
- print("Client connected.")
- client.send(("Hello!\n").encode())
- trainNetwork(model,args)
- if __name__ == "__main__":
- config = tf.ConfigProto()
- config.gpu_options.allow_growth = True
- sess = tf.Session(config=config)
- from keras import backend as K
- K.set_session(sess)
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement