Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- LA X ]
- # Welcome PyTorch-Transformers (formely known as pytorch-pretrained-bert)!
- import torch
- from pytorch_transformers import *
- # Simple and standard API for 6 transformer architectures & 27 pretrained model weights:
- MODELS = [(BertModel, R I EI LT 'bert-base-uncased'),
- (OpenAIGPTModel, OpenAIGPTTokenizer, 'openai-gpt'),
- (GPT2Model, GPT2Tokentizer, o[l
- (TransfoXLModel, TransfoXLTokenizer, 'transfo-x1-wt103'),
- (XLNetModel, XLNetTokenizer, 'xlnet-base-cased'),
- (XLMModel, XLMTokentizer, ‘xlm-mlm-enfr-1024")]
- # Let's encode some text in a sequence of hidden-states using each model:
- for model_class, tokenizer_class, pretrained_weights in MODELS:
- # Load pretrained model/tokenizer
- tokenizer = tokenizer_class.from_pretrained(pretrained_weights)
- model = model_class.from_pretrained(pretrained_weights)
- # Encode text
- input_ids = torch.tensor([tokenizer.encode("Here is some text to encode")])
- last_hidden_states = model(input_ids)[0] # Models outputs are now tuples
- # Models can return full list of hidden-states & attentions weights at each layer
- model = model_class.from_pretrained(pretrained_weights,
- output_hidden_states=True,
- output_attentions=True)
- input_ids = torch.tensor([tokenizer.encode("Let's see hidden-states and attentions")])
- all_hidden_states, all_attentions = model(input_ids)[-2:]
- # Models are compatible with Torchscript
- model = model_class.from_pretrained(pretrained_weights, torchscript=True)
- traced_model = torch.jit.trace(model, (input_ids,))
- # Simple serialization for models and tokenizers
- model.save_pretrained('./directory/to/save/') # save
- model = model_class.from_pretrained('./directory/to/save/') # re-load
- # SOTA examples for GLUE, SQUAD, text generation...
Advertisement
Add Comment
Please, Sign In to add comment