Advertisement
Guest User

Untitled

a guest
Mar 23rd, 2019
98
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.46 KB | None | 0 0
  1. """Byte pair encoding utilities"""
  2. import os
  3. import sentencepiece as spm
  4.  
  5. class Encoder:
  6.     def __init__(self, filename):
  7.         self.sp = spm.SentencePieceProcessor()
  8.         self.sp.Load(filename)
  9.  
  10.     def encode(self, text):
  11.         return self.sp.EncodeAsIds(text)
  12.  
  13.     def decode(self, tokens):
  14.         return self.sp.DecodeIds(tokens.tolist())
  15.  
  16. def get_encoder(model_name):
  17.     return Encoder(os.path.join('models', model_name, 'sp.model'))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement