Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- if not game:GetService("RunService"):IsRunMode() and script.Name ~= "AIsDeepQClone" then
- script.Enabled = false
- elseif game:GetService("RunService"):IsRunMode() then
- script.AIShutdown.Value = true
- end
- if script.Disabled then return end
- local RunService = game:GetService("RunService")
- local DataStoreService = game:GetService("DataStoreService")
- local NetworkStorage = DataStoreService:GetDataStore("NetworkStorage")
- local SS = game.ServerStorage
- local OpenML = require(SS.OpenML)
- local function sigmoid(x)
- return 1 / (1 + math.exp(-x))
- end
- local ITERATION_GUI = workspace.Iteration.SurfaceGui.TextLabel
- local FINISHES_GUI = workspace.Finishes.SurfaceGui.TextLabel
- local GOAL = workspace.goal
- local SPAWN = workspace.spawn
- local PASSED_COORDS = {
- workspace.Passed1.Position.X, workspace.Passed2.Position.X,
- workspace.Passed3.Position.X, workspace.Passed4.Position.X,
- workspace.Passed5.Position.X, workspace.Passed6.Position.X,
- workspace.Passed7.Position.X, workspace.Passed8.Position.X,
- workspace.Passed9.Position.X, workspace.Passed10.Position.X,
- workspace.Passed11.Position.X, workspace.Passed12.Position.X,
- workspace.Passed13.Position.X, workspace.Passed14.Position.X,
- workspace.Passed15.Position.X, workspace.Passed16.Position.X,
- }
- FINISHES_GUI.FinishCount.Value = 0
- FINISHES_GUI.Text = "Finishes: 0"
- ITERATION_GUI.IterationCount.Value = 0
- ITERATION_GUI.Text = "Iteration: 0"
- workspace.Finishes.CFrame = CFrame.new(-128.949203, 90.3132095, 84.4396667, 0.999922872, 0.00161167711, -0.0123128593, 0, 0.991541922, 0.129786715, 0.0124178901, -0.129776701, 0.991465509)
- workspace.Iteration.CFrame = CFrame.new(-128.988525, 86.216301, 87.6062622, 0.999922872, 0.00161167711, -0.0123128593, 0, 0.991541922, 0.129786715, 0.0124178901, -0.129776701, 0.991465509)
- local AGENT_BODY = SS.AgentRig:Clone()
- script.Agent.Value = AGENT_BODY
- local HUMANOID_ROOT = AGENT_BODY.PrimaryPart
- local HUMANOID = AGENT_BODY.Humanoid
- local ADAM_OPTIMIZER = OpenML.Optimizers.Adam.new()
- local PROPAGATOR = OpenML.Algorithms.Propagator
- local FUNCTION_ACTIVATION = OpenML.ActivationFunctions.ReLU
- local FUNCTION_LOSS = OpenML.Calculate.MSE
- local DQL = OpenML.Algorithms.DQL.new()
- local NETWORK
- local NetworkIterationsSave, NetworkFinishesSave, steps = nil, nil, 0
- local consecutiveFalls = 0
- local livingPenalty = 0.002
- if script.AIUpdated.Value then
- local data1 = NetworkStorage:GetAsync("EfficientNetwork1")
- NetworkIterationsSave = NetworkStorage:GetAsync("EfficientNetwork2")
- NetworkFinishesSave = NetworkStorage:GetAsync("EfficientNetwork3")
- steps = NetworkStorage:GetAsync("EfficientNetworkSteps")
- NETWORK = OpenML.Resources.MLP.Decompress(data1, 'IEEE754')
- local savedCFrame = NetworkStorage:GetAsync("EfficientNetworkCFrame")
- if savedCFrame then
- HUMANOID_ROOT.CFrame = CFrame.new(table.unpack(savedCFrame))
- end
- elseif script.AIShutdown.Value then
- local data2 = NetworkStorage:GetAsync("ShutdownNetwork1")
- NetworkIterationsSave = NetworkStorage:GetAsync("ShutdownNetwork2")
- NetworkFinishesSave = NetworkStorage:GetAsync("ShutdownNetwork3")
- steps = NetworkStorage:GetAsync("ShutdownNetworkSteps")
- NETWORK = OpenML.Resources.MLP.Decompress(data2, 'IEEE754')
- local savedCFrame = NetworkStorage:GetAsync("ShutdownNetworkCFrame")
- if savedCFrame then
- HUMANOID_ROOT.CFrame = CFrame.new(table.unpack(savedCFrame))
- end
- else
- NETWORK = OpenML.Resources.MLP.new({7, 32, 32, 16, 2}, function()
- return Random.new():NextNumber() * 2 - 1
- end)
- HUMANOID_ROOT.CFrame = SPAWN.CFrame
- end
- setmetatable(NETWORK, { __index = PROPAGATOR })
- if NetworkIterationsSave and NetworkFinishesSave then
- ITERATION_GUI.IterationCount.Value = NetworkIterationsSave
- ITERATION_GUI.Text = "Iteration: " .. tostring(NetworkIterationsSave)
- FINISHES_GUI.FinishCount.Value = NetworkFinishesSave
- FINISHES_GUI.Text = "Finishes: " .. tostring(NetworkFinishesSave)
- PREVIOUS_DIST = (HUMANOID_ROOT.Position - GOAL.Position).Magnitude
- end
- AGENT_BODY.Parent = workspace
- HUMANOID.AutoRotate = false
- DQL.OnForwardPropagation = function(states)
- return NETWORK:ForwardPropagation(states, FUNCTION_ACTIVATION)
- end
- local LEARNING_RATE = 0.001
- DQL.OnBackPropagation = function(activations, target)
- return NETWORK:BackPropagation(activations, target, {
- ActivationFunction = FUNCTION_ACTIVATION,
- Optimizer = ADAM_OPTIMIZER,
- LearningRate = LEARNING_RATE
- })
- end
- local Params = RaycastParams.new()
- Params.FilterDescendantsInstances = {AGENT_BODY}
- Params.FilterType = Enum.RaycastFilterType.Exclude
- local PREVIOUS_DIST = (SPAWN.Position - GOAL.Position).Magnitude
- local LAST_PASSED = 0
- local SECOND_LAST_PASSED = 0 -- before LAST_PASSED
- local SAME_PASSED = 0 -- has the agent passed the same platform for 2 episodes/iterations? e.g. if he passed platform 5 ONLY and EXACTLY twice in a row then this val would be 2
- local TOTAL_REWARD = 0
- local TIME_STILL = 0
- game:BindToClose(function()
- warn("shutting down")
- local compressed = OpenML.Resources.MLP.Compress(NETWORK, "IEEE754")
- NetworkStorage:SetAsync("ShutdownNetwork1", compressed)
- NetworkStorage:SetAsync("ShutdownNetwork2", ITERATION_GUI.IterationCount.Value)
- NetworkStorage:SetAsync("ShutdownNetwork3", FINISHES_GUI.FinishCount.Value)
- NetworkStorage:SetAsync("ShutdownNetworkCFrame", {HUMANOID_ROOT.CFrame:GetComponents()})
- NetworkStorage:SetAsync("ShutdownNetworkSteps", steps)
- warn("saved")
- end)
- RunService.Stepped:Connect(function(_, deltaTime)
- steps += 1
- local distToGoal = (HUMANOID_ROOT.Position - GOAL.Position).Magnitude
- local downRay = workspace:Raycast(HUMANOID_ROOT.Position, Vector3.new(0, -1, 0) * 4, Params)
- local furthestRay = workspace:Raycast(HUMANOID_ROOT.Position, Vector3.new(-1, -1, 0) * 4, Params)
- local intermediateRay = workspace:Raycast(HUMANOID_ROOT.Position, Vector3.new(-0.5, -1, 0) * 4, Params)
- local function getDepth(ray)
- return ray and (ray.Position - HUMANOID_ROOT.Position).Magnitude or 4
- end
- local inputs = {
- distToGoal,
- (downRay and downRay.Instance) and 1 or 0,
- HUMANOID_ROOT.Velocity.Magnitude,
- HUMANOID_ROOT.Velocity.Y,
- getDepth(downRay),
- getDepth(furthestRay),
- getDepth(intermediateRay),
- }
- local activations = NETWORK:ForwardPropagation(inputs, FUNCTION_ACTIVATION)
- local actions = activations[#activations]
- local epsilon_start = 1
- local epsilon_end = 0.05
- local decay_steps = 2e6
- local epsilon = math.max(epsilon_end, epsilon_start - (steps / decay_steps) * (epsilon_start - epsilon_end))
- local isEpsilon = math.random() <= epsilon
- local chosenAction = isEpsilon and math.random(1, 2) or (actions[1] > actions[2] and 1 or 2)
- if chosenAction == 1 then
- HUMANOID:MoveTo(GOAL.Position)
- else
- HUMANOID:MoveTo(HUMANOID_ROOT.Position)
- HUMANOID.Jump = true
- end
- local reward
- local distDiff = PREVIOUS_DIST - distToGoal
- if distToGoal < PREVIOUS_DIST and math.abs(distDiff) > 0.26 then
- reward = distDiff * 0.3
- TIME_STILL = 0
- else
- reward = 0
- TIME_STILL += deltaTime
- warn("TIME_STILL:", TIME_STILL)
- if TIME_STILL > 3 then
- warn("Punishing")
- reward = -0.1 + ((TIME_STILL - 3) * -0.01) -- reward punishment calculation in milliseconds
- end
- end
- local xPos = HUMANOID_ROOT.Position.X
- for i, threshold in ipairs(PASSED_COORDS) do
- if xPos < threshold and LAST_PASSED < i then
- LAST_PASSED = i
- reward = i * 20
- break
- end
- end
- TOTAL_REWARD += reward
- TOTAL_REWARD -= livingPenalty
- local touchingBottom = downRay and downRay.Instance == workspace.bottom
- if touchingBottom then
- local fallPenalty = -10
- if (TIME_STILL - 3) * -0.02 >= fallPenalty then
- consecutiveFalls += 1
- else
- consecutiveFalls = 0
- end
- if SECOND_LAST_PASSED == LAST_PASSED and SECOND_LAST_PASSED > 0 then
- SAME_PASSED += 1
- else
- SAME_PASSED = 0
- end
- SECOND_LAST_PASSED = LAST_PASSED
- if SAME_PASSED >= 5 then
- fallPenalty = reward + 5
- elseif consecutiveFalls >= 10 then
- fallPenalty = ((TIME_STILL - 3) * -0.02) + 1
- end
- reward = fallPenalty
- TOTAL_REWARD = 0
- TIME_STILL = 0
- LAST_PASSED = 0
- HUMANOID_ROOT.CFrame = SPAWN.CFrame
- ITERATION_GUI.IterationCount.Value += 1
- warn("line 227 + 1")
- ITERATION_GUI.Text = "Iteration: " .. tostring(ITERATION_GUI.IterationCount.Value)
- PREVIOUS_DIST = (HUMANOID_ROOT.Position - GOAL.Position).Magnitude
- end
- PREVIOUS_DIST = distToGoal
- print("Reward:",reward - livingPenalty)
- print("Total reward:",TOTAL_REWARD)
- print("State:", unpack(actions))
- print("Current Epsilon:",epsilon)
- print("Selected Action:", if chosenAction == 1 then "Move" else "Jump", "Epsilon:", isEpsilon)
- if distToGoal < 2 then
- if HUMANOID_ROOT.CFrame ~= SPAWN.CFrame then
- reward = 480 -- finishing obby reward is equal to passed16 reward * 1.5
- end
- warn("Finished Simulation!")
- HUMANOID_ROOT.CFrame = SPAWN.CFrame
- ITERATION_GUI.IterationCount.Value += 1
- warn('line 247 + 1')
- ITERATION_GUI.Text = "Iteration: " .. tostring(ITERATION_GUI.IterationCount.Value)
- FINISHES_GUI.FinishCount.Value += 1
- FINISHES_GUI.Text = "Finishes: " .. tostring(FINISHES_GUI.FinishCount.Value)
- PREVIOUS_DIST = (HUMANOID_ROOT.Position - GOAL.Position).Magnitude
- local compressed = OpenML.Resources.MLP.Compress(NETWORK, 'IEEE754')
- NetworkStorage:SetAsync("EfficientNetwork1", compressed)
- NetworkStorage:SetAsync("EfficientNetwork2", ITERATION_GUI.IterationCount.Value)
- NetworkStorage:SetAsync("EfficientNetwork3", FINISHES_GUI.FinishCount.Value)
- NetworkStorage:SetAsync("EfficientNetworkCFrame", {HUMANOID_ROOT.CFrame:GetComponents()})
- NetworkStorage:SetAsync("EfficientNetworkSteps", steps)
- end
- DQL:Learn({
- State = inputs,
- Action = chosenAction,
- Reward = reward - livingPenalty,
- })
- end)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement