Untitled

#!/usr/bin/env python
from __future__ import print_function

import argparse
import sys
import random
import numpy as np
from collections import deque

import json
import socket

from keras.initializers import normal, identity
from keras.models import model_from_json
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.optimizers import SGD , Adam
#from keras.utils import plot_model
import tensorflow as tf

GAME = 'glidethroughthesky' # the name of the game being played for log files
ACTIONS = 3 # number of valid actions
GAMMA = 0.99 # decay rate of past observations
OBSERVATION = 50
EXPLORE = 3000000. # frames over which to anneal epsilon
FINAL_EPSILON = 0.0001 # final value of epsilon
INITIAL_EPSILON = 0.01 # starting value of epsilon
REPLAY_MEMORY = 50000 # number of previous transitions to remember
BATCH = 32 # size of minibatch
FRAME_PER_ACTION = 1
LEARNING_RATE = 1e-4

### init values
features = {
    "targetYDistance": 25,
    "midYDistance": 25,
    # "targetDistance": 999,
    # "midDistance": 999,
    "distance": 25,
    "vY": 0,
    "mana": 3
}

passedWall = False
dashTarget = False
died = False
win = False

passedWalls = 0

client = None
address = None

def buildmodel():
    global features
    model = Sequential()
    model.add(Dense(units = 32, input_dim = len(features), activation = 'relu'))
    model.add(Dense(units = 32, input_dim = len(features), activation = 'relu'))
    model.add(Dense(units = ACTIONS, activation = 'softmax'))

    adam = Adam(lr=LEARNING_RATE)
    model.compile(loss='mse',optimizer=adam)
    model.summary()
    return model

def standardize(features):
    f = features.copy()
    f["mana"] = f["mana"]/10
    f["distance"] = (f["distance"])/25
    f["targetYDistance"] = (f["targetYDistance"]-8)/16
    f["midYDistance"] = (f["midYDistance"]-8)/16
    f["vY"] = (f["vY"]+20)/40

    #print(json.dumps(f))
    return f

def updateFeatures(dataMerge, died, win, dashTarget, passedWall):
    global client
    size = 8192

    loadedData = client.recv(size).decode("utf-8")
    endIndex = loadedData[:loadedData.find('}')]
    dataMerge += loadedData

    if endIndex == -1:
        return

    try:
        data = json.loads(dataMerge[:dataMerge.find('}')+1])
        dataMerge = ""

    except ValueError:
        return;


    if "targetYDistance" in data:
        features["targetYDistance"] = float(data["targetYDistance"])

    if "midYDistance" in data:
        features["midYDistance"] = float(data["midYDistance"])

    if "distance" in data:
        features["distance"] = float(data["distance"])

    # if "targetDistance" in data:
    #     features["targetDistance"] = float(data["targetDistance"])
    #
    # if "midDistance" in data:
    #     features["midDistance"] = float(data["midDistance"])

    if "mana" in data:
        features["mana"] = float(data["mana"])

    if "vY" in data:
        features["vY"] = float(data["vY"])


    if "died" in data:
        died = True

    if "win" in data:
        win = True

    if "passedWall" in data:
        passedWall = True

    if "dashTarget" in data:
        dashTarget = True

    return died, win, dashTarget, passedWall

def waitForFeatures():
    oldFeatures = features

    died = False
    win = False
    dashTarget = False
    passedWall = False
    global passedWalls

    oldFeatures = features
    while True:
        dataMerge = ""
        died, win, dashTarget, passedWall = updateFeatures(dataMerge, died, win, dashTarget, passedWall)
        for key, value in oldFeatures.items():
            if features[key] == value:
                continue
        break

    terminal = False
    reward = 0

    if passedWall:
        print("passedWall")
        reward = 1
        passedWalls += 1
    else:
        if oldFeatures["mana"] > features["mana"] and dashTarget == False:
            reward = -2
        if oldFeatures["mana"] == 1 and features["mana"] == 0:
            reward = -5

    if dashTarget:
        print("dashTarget")
        reward = 2

    if oldFeatures["mana"] == 1 and features["mana"] == 0:
        reward = -2

    if features["mana"] == -1:
        reward = -10

    if died:
        print("------------ DIED ------------")
        reward = -1 - 10 * abs(oldFeatures["midYDistance"]) - 10*features["mana"]
        terminal = True

    if win:
        print("  **   ------------ --- ------------")
        print(" **    ------------ WIN ------------")
        print("  **   ------------ --- ------------")

    if died or win:
        with open("log.txt", "a") as outfile:
            outfile.write(str(passedWalls) + "\r\n")
        passedWalls = 0

    #print ("\t" + json.dumps(features))

    return standardize(features), reward, terminal

def sendAction(a_t):
    global client

    if a_t[1] == 1:
        client.send(("dash\n").encode())
    if a_t[2] == 1:
        client.send(("jump\n").encode())

def trainNetwork(model,args):
    ### # store the previous observations in replay memory
    D = deque()

    do_nothing = np.zeros(ACTIONS)
    do_nothing[0] = 1
    sendAction(do_nothing)
    s_t, r_0, terminal = waitForFeatures()

    OBSERVE = OBSERVATION    #mislim da mi ovde treba 0, posto nema konvolutivnu mrezu
    if args['mode'] == 'Load':
        epsilon = FINAL_EPSILON
        epsilon = 0.008
        # epsilon = INITIAL_EPSILON
        print ("Now we load weight")
        model.load_weights("model.h5")
        adam = Adam(lr=LEARNING_RATE)
        model.compile(loss='mse',optimizer=adam)
        print ("Weight load successfully")
    else:                       #We go to training mode
        epsilon = INITIAL_EPSILON

    t = 0
    while (True):
        loss = 0
        Q_sa = 0
        action_index = 0
        r_t = 0
        a_t = np.zeros([ACTIONS])
        #choose an action epsilon greedy
        if t % FRAME_PER_ACTION == 0:
            if random.random() <= epsilon:
                action_index = random.randrange(ACTIONS)
                a_t[action_index] = 1
                print("----------Random Action: ", a_t , " ---------- " , epsilon)
            else:
                p = np.fromiter(s_t.values(), float).reshape((1, len(features)))
                q = model.predict(p)
                max_Q = np.argmax(q)
                action_index = max_Q
                a_t[max_Q] = 1

        #We reduced the epsilon gradually
        if epsilon > FINAL_EPSILON and t > OBSERVE:
            epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE

        # global features
        old_features = features
        sendAction(a_t)
        s_t1, r_t, terminal = waitForFeatures()

        D.append((np.fromiter(old_features.values(), float).reshape((1, len(features))), action_index, r_t, (np.fromiter(s_t1.values(), float).reshape((1, len(features)))), terminal))
        if len(D) > REPLAY_MEMORY:
            D.popleft()

        #only train if done observing
        if t > OBSERVE:
            #sample a minibatch to train on
            minibatch = random.sample(D, BATCH)

            #Now we do the experience replay
            state_t, action_t, reward_t, state_t1, terminal = zip(*minibatch)
            state_t = np.concatenate(state_t)
            state_t1 = np.concatenate(state_t1)
            targets = model.predict(state_t)
            Q_sa = model.predict(state_t1)
            targets[range(BATCH), action_t] = reward_t + GAMMA*np.max(Q_sa, axis=1)*np.invert(terminal)

            loss += model.train_on_batch(state_t, targets)

        s_t = s_t1
        t = t + 1

        # save progress every 10000 iterations
        if t % 10000 == 0:
            print("Now we save model")
            model.save_weights("model.h5", overwrite=True)
            with open("model.json", "w") as outfile:
                json.dump(model.to_json(), outfile)

        # print info
        state = ""
        if t <= OBSERVE:
            state = "observe"
        elif t > OBSERVE and t <= OBSERVE + EXPLORE:
            state = "explore"
        else:
            state = "train"

        #print("TIMESTEP", t, "/ STATE", state, \
        #    "/ EPSILON", epsilon, "/ ACTION", action_index, "/ REWARD", r_t, \
        #    "/ Q_MAX " , np.max(Q_sa), "/ Loss ", loss)

    print("Episode finished!")
    print("************************")

def main():
    parser = argparse.ArgumentParser(description='Description of your program')
    parser.add_argument('-m','--mode', help='Train / Load', required=False)
    args = vars(parser.parse_args())

    model = buildmodel()

    host = ''
    port = 50000
    backlog = 5
    size = 4096
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
    s.bind((host,port))
    s.listen(backlog)

    global client
    global address

    client, address = s.accept()
    # while client != None:
    #     client, address = s.accept() # TODO WAIT
    print("Client connected.")
    client.send(("Hello!\n").encode())

    trainNetwork(model,args)

if __name__ == "__main__":
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    from keras import backend as K
    K.set_session(sess)
    main()