Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- """
- Created on Fri Jun 29 00:14:24 2018
- @author: Mario
- """
- import numpy as np
- text = open('Hemingway.txt', 'r').read()
- chars = list(set(text))
- text_sz, vocab_sz = len(text), len(chars)
- #print('text has %d , %d unique chars' % (text_size, vocab_cnt))
- char_to_int={ch:i for i,ch in enumerate(chars)}
- int_to_char={i:ch for i,ch in enumerate(chars)}
- #print(char_to_int,'\n\n',int_to_char)
- vector_for_a=np.zeros((vocab_sz,1))
- vector_for_a[char_to_int['a']]=1 #set position of a to 1
- #print(vector_for_a)
- #hyperparams
- hidden_sz=100
- seq_len=25
- learn_rt=1e-1
- #model params (output comes first bc features are in columns)
- wx_h=np.random.randn(hidden_sz,vocab_sz)*.01 #shape is (100,63) values are from StdNorm
- wh_h=np.random.randn(hidden_sz,hidden_sz)*.01 #shape is (100,100) this is our recurrent matrix
- wh_y=np.random.randn(vocab_sz,hidden_sz)*.01 ##shape is (63,100) values are from StdNorm
- bh=np.zeros((hidden_sz,1))
- by=np.zeros((vocab_sz,1))
- def Loss(inputs, target, prev_h):
- x,h,y,p={},{},{},{} #input, hidenstate,output, probabilites for char
- h[-1]=np.copy(prev_h) #.copy() creates seperate array
- loss=0
- #feedforward
- for t in range(len(inputs)):
- x[t] = np.zeros((vocab_sz,1)) # set x at given to a zero vector
- x[t][inputs[t]] = 1 #mark a 1 wherever the char goes
- h[t] = np.tanh(np.dot(wx_h, x[t]) + np.dot(wh_h, h[t-1]) + bh) # hidden state
- y[t] = np.dot(wh_y, h[t]) + by # unnormalized log probabilities for next chars
- p[t] = np.exp(y[t]) / np.sum(np.exp(y[t])) # probabilities for next chars
- loss += -np.log(p[t][targets[t],0]) # softmax (cross-entropy loss)
- #basically if it's changing, we're going to derive it
- dwx_h, dwh_h, dwh_y = np.zeros_like(wx_h), np.zeros_like(wh_h), np.zeros_like(wh_y)
- dbh, dby = np.zeros_like(bh), np.zeros_like(by)
- dhnext = np.zeros_like(h[0]) #next time step
- #back_propagation
- for t in reversed(range(len(inputs))):
- dy = np.copy(p[t]) #y-probabilities at t
- #derive our first gradient
- dy[targets[t]] -= 1 # backprop into y
- dwh_y += np.dot(dy, h[t].T) #from y to hidden layer
- #derivative of output bias is change or shift in y
- dby += dy
- #backpropagate!
- dh = np.dot(wh_y.T, dy) + dhnext # backprop into h
- dhraw = (1 - h[t] * h[t]) * dh # change in tanh nonlinearity
- dbh += dhraw #change of hidden bias
- dwx_h += np.dot(dhraw, x[t].T) #derivative of input to hidden layer weight
- dwh_h += np.dot(dhraw, h[t-1].T) #derivative of hidden layer to hidden layer weight
- dhnext = np.dot(wh_h.T, dhraw)
- for dparam in [dwx_h, dwh_h, dwh_y, dbh, dby]:
- np.clip(dparam, -5, 5, out=dparam) # clip to mitigate vanishing gradients
- return loss, dwx_h, dwh_h, dwh_y, dbh, dby, h[len(inputs)-1]
- def sample(h, seed_char, n):
- #create vector
- x = np.zeros((vocab_sz, 1))
- #customize it for our seed char
- x[seed_char] = 1
- #list to store generated chars
- gen_char = []
- #for as many characters as we want to generate
- for t in range(n):
- h = np.tanh(np.dot(wx_h, x) + np.dot(wh_h, h) + bh)
- #compute output (unnormalised)
- y = np.dot(wh_y, h) + by
- ## probabilities for next chars
- p = np.exp(y) / np.sum(np.exp(y))
- #pick one with the highest probability
- ch = np.random.choice(range(vocab_sz), p=p.ravel())
- # create a vector
- x = np.zeros((vocab_sz, 1))
- #customize it for the predicted char
- x[ch] = 1
- #add it to the list
- gen_char.append(ch)
- txt = ''.join(int_to_char[ch] for ch in gen_char)
- print('----\n %s \n----' % (txt, ))
- hprev = np.zeros((hidden_sz,1)) # reset RNN memory
- #predict the 200 next characters given 'a'
- sample(hprev,char_to_int['a'],200)
- p=0
- inputs = [char_to_int[ch] for ch in text[p:p+seq_len]]
- print("inputs", inputs)
- targets = [char_to_int[ch] for ch in text[p+1:p+seq_len+1]]
- print("targets", targets)
- n, p = 0, 0
- mwx_h, mwh_h, mwh_y = np.zeros_like(wx_h), np.zeros_like(wh_h), np.zeros_like(wh_y)
- mbh, mby = np.zeros_like(bh), np.zeros_like(by) # memory variables for Adagrad
- smooth_loss = -np.log(1.0/vocab_sz)*seq_len # loss at iteration 0
- while n<=1000*100:
- # prepare inputs (we're sweeping from left to right in steps seq_len long)
- # check "How to feed the loss function to see how this part works
- if p+seq_len+1 >= len(text) or n == 0:
- hprev = np.zeros((hidden_sz,1)) # reset RNN memory
- p = 0 # go from start of text
- inputs = [char_to_int[ch] for ch in text[p:p+seq_len]]
- targets = [char_to_int[ch] for ch in text[p+1:p+seq_len+1]]
- # forward seq_len characters through the net and fetch gradient
- loss, dwx_h, dwh_h, dwh_y, dbh, dby, hprev = Loss(inputs, targets, hprev)
- smooth_loss = smooth_loss * 0.999 + loss * 0.001
- # sample from the model now and then
- if n % 1000 == 0:
- print('iter %d, loss: %f' % (n, smooth_loss)) # print progress
- sample(hprev, inputs[0], 200)
- # perform parameter update with Adagrad
- for param, dparam, mem in zip([wx_h, wh_h, wh_y, bh, by],
- [dwx_h, dwh_h, dwh_y, dbh, dby],
- [mwx_h, mwh_h, mwh_y, mbh, mby]):
- mem += dparam * dparam
- param += -learn_rt * dparam / np.sqrt(mem + 1e-8) # adagrad update
- p += seq_len # move text pointer
- n += 1 # iteration counter
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement