Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def prepare_data(seqs,labels):
- """
- create the matrics from the datasets
- this pad each sequence to the same length:the length of the longest seuence or maxlen.
- if maxlen is set,we will out all sequence to this maximum length.
- this swap the axis
- """
- #x:a list of sentences
- lengths = [len(s) for s in seqs]
- n_samples = len(seqs)
- maxlen = numpy.max(lengths)
- x = numpy.zeros((maxlen,n_samples)).astype('int64')
- x_mask = numpy.ones((maxlen,n_samples)).astype(theano.config.floatx)
- for idx,s in enumerate(seqs):
- x[:lengths[idx],idx] = s
- x_mask *= (1-(x == 0)) #构建mask矩阵的绝佳技巧
- return x,x_mask,labels
Add Comment
Please, Sign In to add comment