Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import random
- import numpy as np
- import matplotlib.pyplot as plt
- from math import sqrt
- #Parameters
- T = 360 #time span (days)
- RT = 1 # review time (days) - time between 2 placed orders
- LT = 1 # lead time (days) - time that goes from when an order is placed to the moment it arrives
- h = 1 # on-hand inventory cost (unit cost)
- b = 5 # backorder cost (unit cost)
- I0 = 300 # initial stock (units)
- dm = 100 # average demand
- dmdp = 10 # standard deviation for demand
- d = [] #demand following normal distribution (units per day)
- random.seed(100)
- for j in range(T):
- #d.append(round(np.random.normal(loc=0.0,scale=1.0)*dmdp+dm,0))
- d.append(random.randint(90, 110))
- k = 1.96 # safety factor
- ss = sqrt(RT+LT)*dmdp*k # safety stock (units) -> k=1.96, 1.64, 1.28
- s = [] # target stock (units)
- for j in range(T):
- s.append((RT+LT)*dm + ss)
- #Variable Initialization
- Qt = [0] * T # quantity to order each day (units)
- It = [0] * T # inventory at the end of each day (units)
- NIt = [0] * T # net inventory at the start of each day (units) = (on-hand + in transit) inventory
- Ip = [0] * T # on-hand inventory at the end of each day (units)
- Im = [0] * T # backorder at the end of each day (units)
- dsat = [0] * T #satisfied demand (units)
- #Initial Values
- It[0] = I0 - d[0]
- NIt[0] = I0
- if (It[0] >= 0):
- Ip[0] = It[0]
- else:
- Im[0] = -It[0]
- if (NIt[0] < s[0]):
- Qt[0] = s[0] - NIt[0]
- else:
- Qt[0] = 0
- if d[0] >= I0:
- dsat[0] = I0
- else:
- dsat[0] = d[0]
- def test(T, LT, d, s):
- i=1
- while i < T:
- j=1
- NIt[i] = Ip[i-1]
- while j <= LT:
- if (i-j >= 0):
- NIt[i] = NIt[i] + Qt[i-j]
- j += 1
- NIt[i] = round(NIt[i], 0)
- if (NIt[i] < s[i]):
- Qt[i] = s[i] - NIt[i]
- else:
- Qt[i] = 0
- Qt[i] = round(Qt[i], 0)
- if (i - LT >= 0):
- It[i] = Ip[i-1] - d[i] + Qt[i-LT]
- else:
- It[i] = Ip[i-1] - d[i]
- It[i] = round(It[i], 0)
- if It[i] >= 0:
- Ip[i] = It[i]
- else:
- Im[i] = -It[i]
- if (d[i] >= Ip[i-1] + Qt[i-LT]):
- dsat[i] = Ip[i-1] + Qt[i-LT]
- else:
- dsat[i] = d[i]
- i += 1
- return Qt, NIt, It, Ip, Im, dsat
- def objective(T, Ip, Im, h, b):
- sumIp = Ip[0]
- sumIm = Im[0]
- ym = 0
- if (Im[0] > 0):
- ym = 1
- i = 1
- while i < T:
- sumIp = sumIp + Ip[i]
- sumIm = sumIm + Im[i]
- if (Im[i] > 0):
- ym +=1
- i += 1
- return h*sumIp+b*sumIm, sumIm, ym
- [Qt, NIt, It, Ip, Im, dsat] = test(T, LT, d, s)
- [obj, sumIm, ym] = objective(T, Ip, Im, h, b)
- sumd = 0
- for j in range(T):
- sumd = sumd + d[j]
- alpha = ym/T
- beta = sumIm/sumd
- # Training
- # Ip - on-hand inventory
- # Im - Lost orders
- # T - no. of days (360)
- # Qt - order quantity
- def reward(t):
- return h*Ip[t]+b*Im[t]
- Q = np.matrix(np.zeros([9,9]))
- iteration = 0
- t = 0
- MAX_ITERATION = 500
- alp = 0.2 # learning rate (between 0 and 1)
- exploitation_p = 0.15 # exploitation probability (incresed after each iteration until it reaches 1)
- while iteration <= MAX_ITERATION:
- while t < T-1:
- if Ip[t] <= 8:
- state = 0
- if Ip[t] > 8 and Ip[t] <= 14:
- state = 1
- if Ip[t] > 14 and Ip[t] <= 20:
- state = 2
- if Ip[t] > 20 and Ip[t] <= 26:
- state = 3
- if Ip[t] > 26 and Ip[t] <= 32:
- state = 4
- if Ip[t] > 32 and Ip[t] <= 38:
- state = 5
- if Ip[t] > 38 and Ip[t] <= 44:
- state = 6
- if Ip[t] > 44 and Ip[t] <= 50:
- state = 7
- if Ip[t] > 50:
- state = 8
- rd = random.random()
- if rd < exploitation_p:
- action = np.where(Q[state,] == np.max(Q[state,]))[1]
- if np.size(action) > 1:
- action = np.random.choice(action,1)
- elif rd >= exploitation_p:
- av_act = np.where(Q[state,] < 999999)[1]
- action = np.random.choice(av_act,1)
- action = int(action)
- rew = reward(t+1)
- if Ip[t+1] <= 8:
- next_state = 0
- if Ip[t+1] > 8 and Ip[t+1] <= 14:
- next_state = 1
- if Ip[t+1] > 14 and Ip[t+1] <= 20:
- next_state = 2
- if Ip[t+1] > 20 and Ip[t+1] <= 26:
- next_state = 3
- if Ip[t+1] > 26 and Ip[t+1] <= 32:
- next_state = 4
- if Ip[t+1] > 32 and Ip[t+1] <= 38:
- next_state = 5
- if Ip[t+1] > 38 and Ip[t+1] <= 44:
- next_state = 6
- if Ip[t+1] > 44 and Ip[t+1] <= 50:
- next_state = 7
- if Ip[t+1] > 50:
- next_state = 8
- next_action = np.where(Q[next_state,] == np.max(Q[next_state,]))[1]
- if np.size(next_action) > 1:
- next_action = np.random.choice(next_action,1)
- next_action = int(next_action)
- Q[state, action] = Q[state, action] + alp*(-rew+Q[next_state, next_action]-Q[state, action])
- t += 1
- if (exploitation_p < 1):
- exploitation_p = exploitation_p + 0.05
- t = 0
- iteration += 1
- # Testing
- Ip = [0] * T
- Im = [0] * T
- It[0] = I0 - d[0]
- if (It[0] >= 0):
- Ip[0] = It[0]
- else:
- Im[0] = -It[0]
- Qt[0] = 0
- Qbase = 100
- sumIp = Ip[0]
- sumIm = Im[0]
- i = 1
- while i < T:
- if (i - LT >= 0):
- It[i] = Ip[i-1] - d[i] + Qt[i-LT]
- else:
- It[i] = Ip[i-1] - d[i]
- It[i] = round(It[i], 0)
- if It[i] >= 0:
- Ip[i] = It[i]
- else:
- Im[i] = -It[i]
- if Ip[i] <= 8:
- state = 0
- if Ip[i] > 8 and Ip[i] <= 14:
- state = 1
- if Ip[i] > 14 and Ip[i] <= 20:
- state = 2
- if Ip[i] > 20 and Ip[i] <= 26:
- state = 3
- if Ip[i] > 26 and Ip[i] <= 32:
- state = 4
- if Ip[i] > 32 and Ip[i] <= 38:
- state = 5
- if Ip[i] > 38 and Ip[i] <= 44:
- state = 6
- if Ip[i] > 44 and Ip[i] <= 50:
- state = 7
- if Ip[i] > 50:
- state = 8
- action = np.where(Q[state,] == np.max(Q[state,]))[1]
- if np.size(action) > 1:
- action = np.random.choice(action,1)
- action = int(action)
- if action == 0:
- Qt[i] = Qbase
- if action == 1:
- Qt[i] = Qbase * 0.95
- if action == 2:
- Qt[i] = Qbase * 0.9
- if action == 3:
- Qt[i] = Qbase * 0.85
- if action == 4:
- Qt[i] = Qbase * 0.8
- if action == 5:
- Qt[i] = Qbase * 0.75
- if action == 6:
- Qt[i] = Qbase * 0.7
- if action == 7:
- Qt[i] = Qbase * 0.65
- if action == 8:
- Qt[i] = Qbase * 0.6
- sumIp = sumIp + Ip[i]
- sumIm = sumIm + Im[i]
- i += 1
- objfunc = h*sumIp+b*sumIm
- print(objfunc)
Add Comment
Please, Sign In to add comment