Advertisement
Guest User

Untitled

a guest
Jul 17th, 2018
90
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 7.40 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on Fri Jun 29 00:14:24 2018
  4.  
  5. @author: Mario
  6. """
  7. import numpy as np
  8.  
  9.  
  10. text = open('Hemingway.txt', 'r').read()
  11. chars = list(set(text))
  12. text_sz, vocab_sz = len(text), len(chars)
  13.  
  14. #print('text has %d , %d unique chars' % (text_size, vocab_cnt))
  15. char_to_int={ch:i for i,ch in enumerate(chars)}
  16. int_to_char={i:ch for i,ch in enumerate(chars)}
  17.  
  18. #print(char_to_int,'\n\n',int_to_char)
  19. vector_for_a=np.zeros((vocab_sz,1))
  20. vector_for_a[char_to_int['a']]=1 #set position of a to 1
  21. #print(vector_for_a)
  22.  
  23. #hyperparams
  24. hidden_sz=100
  25. seq_len=25
  26. learn_rt=1e-1
  27.  
  28. #model params (output comes first bc features are in columns)
  29. wx_h=np.random.randn(hidden_sz,vocab_sz)*.01 #shape is (100,63) values are from StdNorm
  30. wh_h=np.random.randn(hidden_sz,hidden_sz)*.01 #shape is (100,100) this is our recurrent matrix
  31. wh_y=np.random.randn(vocab_sz,hidden_sz)*.01 ##shape is (63,100) values are from StdNorm
  32. bh=np.zeros((hidden_sz,1))
  33. by=np.zeros((vocab_sz,1))
  34.  
  35. def Loss(inputs, target, prev_h):
  36. x,h,y,p={},{},{},{} #input, hidenstate,output, probabilites for char
  37. h[-1]=np.copy(prev_h) #.copy() creates seperate array
  38. loss=0
  39. #feedforward
  40. for t in range(len(inputs)):
  41. x[t] = np.zeros((vocab_sz,1)) # set x at given to a zero vector
  42. x[t][inputs[t]] = 1 #mark a 1 wherever the char goes
  43. h[t] = np.tanh(np.dot(wx_h, x[t]) + np.dot(wh_h, h[t-1]) + bh) # hidden state
  44. y[t] = np.dot(wh_y, h[t]) + by # unnormalized log probabilities for next chars
  45. p[t] = np.exp(y[t]) / np.sum(np.exp(y[t])) # probabilities for next chars
  46. loss += -np.log(p[t][targets[t],0]) # softmax (cross-entropy loss)
  47.  
  48. #basically if it's changing, we're going to derive it
  49. dwx_h, dwh_h, dwh_y = np.zeros_like(wx_h), np.zeros_like(wh_h), np.zeros_like(wh_y)
  50. dbh, dby = np.zeros_like(bh), np.zeros_like(by)
  51. dhnext = np.zeros_like(h[0]) #next time step
  52.  
  53. #back_propagation
  54. for t in reversed(range(len(inputs))):
  55. dy = np.copy(p[t]) #y-probabilities at t
  56. #derive our first gradient
  57. dy[targets[t]] -= 1 # backprop into y
  58. dwh_y += np.dot(dy, h[t].T) #from y to hidden layer
  59. #derivative of output bias is change or shift in y
  60. dby += dy
  61. #backpropagate!
  62. dh = np.dot(wh_y.T, dy) + dhnext # backprop into h
  63. dhraw = (1 - h[t] * h[t]) * dh # change in tanh nonlinearity
  64. dbh += dhraw #change of hidden bias
  65. dwx_h += np.dot(dhraw, x[t].T) #derivative of input to hidden layer weight
  66. dwh_h += np.dot(dhraw, h[t-1].T) #derivative of hidden layer to hidden layer weight
  67. dhnext = np.dot(wh_h.T, dhraw)
  68. for dparam in [dwx_h, dwh_h, dwh_y, dbh, dby]:
  69. np.clip(dparam, -5, 5, out=dparam) # clip to mitigate vanishing gradients
  70. return loss, dwx_h, dwh_h, dwh_y, dbh, dby, h[len(inputs)-1]
  71.  
  72. def sample(h, seed_char, n):
  73. #create vector
  74. x = np.zeros((vocab_sz, 1))
  75. #customize it for our seed char
  76. x[seed_char] = 1
  77. #list to store generated chars
  78. gen_char = []
  79. #for as many characters as we want to generate
  80. for t in range(n):
  81. h = np.tanh(np.dot(wx_h, x) + np.dot(wh_h, h) + bh)
  82. #compute output (unnormalised)
  83. y = np.dot(wh_y, h) + by
  84. ## probabilities for next chars
  85. p = np.exp(y) / np.sum(np.exp(y))
  86. #pick one with the highest probability
  87. ch = np.random.choice(range(vocab_sz), p=p.ravel())
  88. # create a vector
  89. x = np.zeros((vocab_sz, 1))
  90. #customize it for the predicted char
  91. x[ch] = 1
  92. #add it to the list
  93. gen_char.append(ch)
  94.  
  95. txt = ''.join(int_to_char[ch] for ch in gen_char)
  96. print('----\n %s \n----' % (txt, ))
  97.  
  98. hprev = np.zeros((hidden_sz,1)) # reset RNN memory
  99. #predict the 200 next characters given 'a'
  100. sample(hprev,char_to_int['a'],200)
  101.  
  102. p=0
  103. inputs = [char_to_int[ch] for ch in text[p:p+seq_len]]
  104. print("inputs", inputs)
  105. targets = [char_to_int[ch] for ch in text[p+1:p+seq_len+1]]
  106. print("targets", targets)
  107.  
  108. n, p = 0, 0
  109. mwx_h, mwh_h, mwh_y = np.zeros_like(wx_h), np.zeros_like(wh_h), np.zeros_like(wh_y)
  110. mbh, mby = np.zeros_like(bh), np.zeros_like(by) # memory variables for Adagrad
  111. smooth_loss = -np.log(1.0/vocab_sz)*seq_len # loss at iteration 0
  112. while n<=1000*100:
  113. # prepare inputs (we're sweeping from left to right in steps seq_len long)
  114. # check "How to feed the loss function to see how this part works
  115. if p+seq_len+1 >= len(text) or n == 0:
  116. hprev = np.zeros((hidden_sz,1)) # reset RNN memory
  117. p = 0 # go from start of text
  118. inputs = [char_to_int[ch] for ch in text[p:p+seq_len]]
  119. targets = [char_to_int[ch] for ch in text[p+1:p+seq_len+1]]
  120.  
  121. # forward seq_len characters through the net and fetch gradient
  122. loss, dwx_h, dwh_h, dwh_y, dbh, dby, hprev = Loss(inputs, targets, hprev)
  123. smooth_loss = smooth_loss * 0.999 + loss * 0.001
  124.  
  125. # sample from the model now and then
  126. if n % 1000 == 0:
  127. print('iter %d, loss: %f' % (n, smooth_loss)) # print progress
  128. sample(hprev, inputs[0], 200)
  129.  
  130. # perform parameter update with Adagrad
  131. for param, dparam, mem in zip([wx_h, wh_h, wh_y, bh, by],
  132. [dwx_h, dwh_h, dwh_y, dbh, dby],
  133. [mwx_h, mwh_h, mwh_y, mbh, mby]):
  134. mem += dparam * dparam
  135. param += -learn_rt * dparam / np.sqrt(mem + 1e-8) # adagrad update
  136.  
  137. p += seq_len # move text pointer
  138. n += 1 # iteration counter
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement