Advertisement
Guest User

Untitled

a guest
Sep 20th, 2019
111
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.41 KB | None | 0 0
  1. class FastAiRobertaTokenizer(BaseTokenizer):
  2. def __init__(self, tokenizer: RobertaTokenizer, max_seq_len: int=128, **kwargs):
  3. self._pretrained_tokenizer = tokenizer
  4. self.max_seq_len = max_seq_len
  5. def __call__(self, *args, **kwargs):
  6. return self
  7. def tokenizer(self, t:str) -> List[str]:
  8. return ["<s>"] + self._pretrained_tokenizer.tokenize(t)[:self.max_seq_len - 2] + ["</s>"]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement