Guest User

Untitled

a guest
Nov 21st, 2017
111
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.48 KB | None | 0 0
  1. import torch
  2. import torch.nn as nn
  3. import torch.nn.functional as F
  4. from torch.autograd import Variable
  5.  
  6.  
  7. class CharaEmbedLayer(nn.Module):
  8. def __init__(self, inverted_vocab, n_chara=128, n_dim=30):
  9. super(CharaEmbedLayer, self).__init__()
  10. self.inverted_vocab = inverted_vocab
  11. self.inverted_vocab[0] = ''
  12. self.n_chara = n_chara
  13. self.n_dim = n_dim
  14. self.chara_embed = nn.Embedding(n_chara, n_dim, padding_idx=0)
  15. self.conv_5 = nn.Conv1d(1, 1, 5, padding=4)
  16. self.conv_4 = nn.Conv1d(1, 1, 4, padding=3)
  17. self.conv_3 = nn.Conv1d(1, 1, 3, padding=2)
  18. self.fc = nn.Linear(3 * n_dim, n_dim)
  19.  
  20. """
  21. Input format:
  22. size: batch_size * length
  23. Ouptut format:
  24. size: batch_size * length * n_dim
  25. """
  26.  
  27. def forward(self, input):
  28. len_max = -1
  29. batch_size = len(input)
  30. seq_length = len(input[0])
  31. tokens = [[self.inverted_vocab[token] for token in line] for line in input]
  32. for line in tokens:
  33. row_max = max([len(token) for token in line])
  34. if row_max > len_max:
  35. len_max = row_max
  36.  
  37. input_array = []
  38. for line in tokens:
  39. row_array = []
  40. for token in line:
  41. token_array = []
  42. for chara in token:
  43. token_array.append(ord(chara))
  44. token_array.extend([0] * (len_max - len(token_array)))
  45. row_array.append(token_array)
  46. input_array.append(row_array)
  47.  
  48. """
  49. input_var
  50. size: batch_size, length, max_chara_len
  51. """
  52. input_var = Variable(torch.LongTensor(input_array).cuda())
  53. """
  54. input_emb
  55. size: batch_size * length, max_chara_len, n_dim
  56. """
  57. input_emb = self.chara_embed(input_var.view(-1, len_max))
  58. out_5 = self.conv_5(input_emb.transpose(-1, -2).contiguous().view(-1, 1, len_max)).max(dim=-1)[0].view(batch_size, seq_length, self.n_dim)
  59. out_4 = self.conv_4(input_emb.transpose(-1, -2).contiguous().view(-1, 1, len_max)).max(dim=-1)[0].view(batch_size, seq_length, self.n_dim)
  60. out_3 = self.conv_3(input_emb.transpose(-1, -2).contiguous().view(-1, 1, len_max)).max(dim=-1)[0].view(batch_size, seq_length, self.n_dim)
  61. out = self.fc(torch.cat([out_3, out_4, out_5], dim=-1))
  62. return out
  63.  
  64. if __name__ == '__main__':
  65. chara_embed = CharaEmbedLayer({1: 'what', 2: 'the', 3: 'fuck'}).cuda()
  66. print chara_embed([[1, 0, 0], [2, 1, 0], [3, 2, 1]])
Add Comment
Please, Sign In to add comment