Untitled

# -*- coding: utf-8 -*-
"""
Created on Fri Jun 29 00:14:24 2018

@author: Mario
"""
import numpy as np


text = open('Hemingway.txt', 'r').read()
chars = list(set(text))
text_sz, vocab_sz = len(text), len(chars)

#print('text has %d , %d unique chars' % (text_size, vocab_cnt))
char_to_int={ch:i for i,ch in enumerate(chars)}
int_to_char={i:ch for i,ch in enumerate(chars)}

#print(char_to_int,'\n\n',int_to_char)
vector_for_a=np.zeros((vocab_sz,1))
vector_for_a[char_to_int['a']]=1 #set position of a to 1
#print(vector_for_a)

#hyperparams
hidden_sz=100
seq_len=25
learn_rt=1e-1

#model params (output comes first bc features are in columns)
wx_h=np.random.randn(hidden_sz,vocab_sz)*.01 #shape is (100,63) values are from StdNorm
wh_h=np.random.randn(hidden_sz,hidden_sz)*.01 #shape is (100,100) this is our recurrent matrix
wh_y=np.random.randn(vocab_sz,hidden_sz)*.01 ##shape is (63,100) values are from StdNorm
bh=np.zeros((hidden_sz,1))
by=np.zeros((vocab_sz,1))

def Loss(inputs, target, prev_h):
    x,h,y,p={},{},{},{} #input, hidenstate,output, probabilites for char
    h[-1]=np.copy(prev_h) #.copy() creates seperate array
    loss=0
    #feedforward
    for t in range(len(inputs)):
        x[t] = np.zeros((vocab_sz,1)) # set x at given to a zero vector
        x[t][inputs[t]] = 1 #mark a 1 wherever the char goes
        h[t] = np.tanh(np.dot(wx_h, x[t]) + np.dot(wh_h, h[t-1]) + bh) # hidden state
        y[t] = np.dot(wh_y, h[t]) + by # unnormalized log probabilities for next chars
        p[t] = np.exp(y[t]) / np.sum(np.exp(y[t])) # probabilities for next chars
        loss += -np.log(p[t][targets[t],0]) # softmax (cross-entropy loss)

    #basically if it's changing, we're going to derive it
    dwx_h, dwh_h, dwh_y = np.zeros_like(wx_h), np.zeros_like(wh_h), np.zeros_like(wh_y)
    dbh, dby = np.zeros_like(bh), np.zeros_like(by)
    dhnext = np.zeros_like(h[0]) #next time step

    #back_propagation
    for t in reversed(range(len(inputs))):
        dy = np.copy(p[t]) #y-probabilities at t
        #derive our first gradient
        dy[targets[t]] -= 1 # backprop into y
        dwh_y += np.dot(dy, h[t].T) #from y to hidden layer
        #derivative of output bias is change or shift in y
        dby += dy
        #backpropagate!
        dh = np.dot(wh_y.T, dy) + dhnext # backprop into h
        dhraw = (1 - h[t] * h[t]) * dh # change in tanh nonlinearity
        dbh += dhraw #change of hidden bias
        dwx_h += np.dot(dhraw, x[t].T) #derivative of input to hidden layer weight
        dwh_h += np.dot(dhraw, h[t-1].T) #derivative of hidden layer to hidden layer weight
        dhnext = np.dot(wh_h.T, dhraw)
    for dparam in [dwx_h, dwh_h, dwh_y, dbh, dby]:
        np.clip(dparam, -5, 5, out=dparam) # clip to mitigate vanishing gradients
    return loss, dwx_h, dwh_h, dwh_y, dbh, dby, h[len(inputs)-1]

def sample(h, seed_char, n):
      #create vector
      x = np.zeros((vocab_sz, 1))
      #customize it for our seed char
      x[seed_char] = 1
      #list to store generated chars
      gen_char = []
      #for as many characters as we want to generate
      for t in range(n):
          h = np.tanh(np.dot(wx_h, x) + np.dot(wh_h, h) + bh)
          #compute output (unnormalised)
          y = np.dot(wh_y, h) + by
          ## probabilities for next chars
          p = np.exp(y) / np.sum(np.exp(y))
          #pick one with the highest probability
          ch = np.random.choice(range(vocab_sz), p=p.ravel())
          # create a vector
          x = np.zeros((vocab_sz, 1))
          #customize it for the predicted char
          x[ch] = 1
           #add it to the list
          gen_char.append(ch)

      txt = ''.join(int_to_char[ch] for ch in gen_char)
      print('----\n %s \n----' % (txt, ))

hprev = np.zeros((hidden_sz,1)) # reset RNN memory
#predict the 200 next characters given 'a'
sample(hprev,char_to_int['a'],200)

p=0
inputs = [char_to_int[ch] for ch in text[p:p+seq_len]]
print("inputs", inputs)
targets = [char_to_int[ch] for ch in text[p+1:p+seq_len+1]]
print("targets", targets)

n, p = 0, 0
mwx_h, mwh_h, mwh_y = np.zeros_like(wx_h), np.zeros_like(wh_h), np.zeros_like(wh_y)
mbh, mby = np.zeros_like(bh), np.zeros_like(by) # memory variables for Adagrad
smooth_loss = -np.log(1.0/vocab_sz)*seq_len # loss at iteration 0
while n<=1000*100:
  # prepare inputs (we're sweeping from left to right in steps seq_len long)
  # check "How to feed the loss function to see how this part works
  if p+seq_len+1 >= len(text) or n == 0:
    hprev = np.zeros((hidden_sz,1)) # reset RNN memory
    p = 0 # go from start of text
  inputs = [char_to_int[ch] for ch in text[p:p+seq_len]]
  targets = [char_to_int[ch] for ch in text[p+1:p+seq_len+1]]

  # forward seq_len characters through the net and fetch gradient
  loss, dwx_h, dwh_h, dwh_y, dbh, dby, hprev = Loss(inputs, targets, hprev)
  smooth_loss = smooth_loss * 0.999 + loss * 0.001

  # sample from the model now and then
  if n % 1000 == 0:
    print('iter %d, loss: %f' % (n, smooth_loss)) # print progress
    sample(hprev, inputs[0], 200)

  # perform parameter update with Adagrad
  for param, dparam, mem in zip([wx_h, wh_h, wh_y, bh, by],
                                [dwx_h, dwh_h, dwh_y, dbh, dby],
                                [mwx_h, mwh_h, mwh_y, mbh, mby]):
    mem += dparam * dparam
    param += -learn_rt * dparam / np.sqrt(mem + 1e-8) # adagrad update

  p += seq_len # move text pointer
  n += 1 # iteration counter