Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- class FastAiRobertaTokenizer(BaseTokenizer):
- def __init__(self, tokenizer: RobertaTokenizer, max_seq_len: int=128, **kwargs):
- self._pretrained_tokenizer = tokenizer
- self.max_seq_len = max_seq_len
- def __call__(self, *args, **kwargs):
- return self
- def tokenizer(self, t:str) -> List[str]:
- return ["<s>"] + self._pretrained_tokenizer.tokenize(t)[:self.max_seq_len - 2] + ["</s>"]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement