Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <bound method Module.__dir__ of DataParallel(
- (module): BertForMultiLabelSequenceClassification(
- (bert): BertModel(
- (embeddings): BertEmbeddings(
- (word_embeddings): Embedding(30522, 768)
- (position_embeddings): Embedding(512, 768)
- (token_type_embeddings): Embedding(2, 768)
- (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
- (dropout): Dropout(p=0.1)
- )
- (encoder): BertEncoder(
- (layer): ModuleList(
- (0): BertLayer(
- (attention): BertAttention(
- (self): BertSelfAttention(
- (query): Linear(in_features=768, out_features=768, bias=True)
- (key): Linear(in_features=768, out_features=768, bias=True)
- (value): Linear(in_features=768, out_features=768, bias=True)
- (dropout): Dropout(p=0.1)
- )
- (output): BertSelfOutput(
- (dense): Linear(in_features=768, out_features=768, bias=True)
- (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
- (dropout): Dropout(p=0.1)
- )
- )
- (intermediate): BertIntermediate(
- (dense): Linear(in_features=768, out_features=3072, bias=True)
- )
- (output): BertOutput(
- (dense): Linear(in_features=3072, out_features=768, bias=True)
- (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
- (dropout): Dropout(p=0.1)
- )
- )
- (1): BertLayer(
- (attention): BertAttention(
- (self): BertSelfAttention(
- (query): Linear(in_features=768, out_features=768, bias=True)
- (key): Linear(in_features=768, out_features=768, bias=True)
- (value): Linear(in_features=768, out_features=768, bias=True)
- (dropout): Dropout(p=0.1)
- )
- (output): BertSelfOutput(
- (dense): Linear(in_features=768, out_features=768, bias=True)
- (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
- (dropout): Dropout(p=0.1)
- )
- )
- (intermediate): BertIntermediate(
- (dense): Linear(in_features=768, out_features=3072, bias=True)
- )
- (output): BertOutput(
- (dense): Linear(in_features=3072, out_features=768, bias=True)
- (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
- (dropout): Dropout(p=0.1)
- )
- )
- (2): BertLayer(
- (attention): BertAttention(
- (self): BertSelfAttention(
- (query): Linear(in_features=768, out_features=768, bias=True)
- (key): Linear(in_features=768, out_features=768, bias=True)
- (value): Linear(in_features=768, out_features=768, bias=True)
- (dropout): Dropout(p=0.1)
- )
- (output): BertSelfOutput(
- (dense): Linear(in_features=768, out_features=768, bias=True)
- (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
- (dropout): Dropout(p=0.1)
- )
- )
- (intermediate): BertIntermediate(
- (dense): Linear(in_features=768, out_features=3072, bias=True)
- )
- (output): BertOutput(
- (dense): Linear(in_features=3072, out_features=768, bias=True)
- (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
- (dropout): Dropout(p=0.1)
- )
- )
- (3): BertLayer(
- (attention): BertAttention(
- (self): BertSelfAttention(
- (query): Linear(in_features=768, out_features=768, bias=True)
- (key): Linear(in_features=768, out_features=768, bias=True)
- (value): Linear(in_features=768, out_features=768, bias=True)
- (dropout): Dropout(p=0.1)
- )
- (output): BertSelfOutput(
- (dense): Linear(in_features=768, out_features=768, bias=True)
- (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
- (dropout): Dropout(p=0.1)
- )
- )
- (intermediate): BertIntermediate(
- (dense): Linear(in_features=768, out_features=3072, bias=True)
- )
- (output): BertOutput(
- (dense): Linear(in_features=3072, out_features=768, bias=True)
- (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
- (dropout): Dropout(p=0.1)
- )
- )
- (4): BertLayer(
- (attention): BertAttention(
- (self): BertSelfAttention(
- (query): Linear(in_features=768, out_features=768, bias=True)
- (key): Linear(in_features=768, out_features=768, bias=True)
- (value): Linear(in_features=768, out_features=768, bias=True)
- (dropout): Dropout(p=0.1)
- )
- (output): BertSelfOutput(
- (dense): Linear(in_features=768, out_features=768, bias=True)
- (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
- (dropout): Dropout(p=0.1)
- )
- )
- (intermediate): BertIntermediate(
- (dense): Linear(in_features=768, out_features=3072, bias=True)
- )
- (output): BertOutput(
- (dense): Linear(in_features=3072, out_features=768, bias=True)
- (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
- (dropout): Dropout(p=0.1)
- )
- )
- (5): BertLayer(
- (attention): BertAttention(
- (self): BertSelfAttention(
- (query): Linear(in_features=768, out_features=768, bias=True)
- (key): Linear(in_features=768, out_features=768, bias=True)
- (value): Linear(in_features=768, out_features=768, bias=True)
- (dropout): Dropout(p=0.1)
- )
- (output): BertSelfOutput(
- (dense): Linear(in_features=768, out_features=768, bias=True)
- (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
- (dropout): Dropout(p=0.1)
- )
- )
- (intermediate): BertIntermediate(
- (dense): Linear(in_features=768, out_features=3072, bias=True)
- )
- (output): BertOutput(
- (dense): Linear(in_features=3072, out_features=768, bias=True)
- (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
- (dropout): Dropout(p=0.1)
- )
- )
- (6): BertLayer(
- (attention): BertAttention(
- (self): BertSelfAttention(
- (query): Linear(in_features=768, out_features=768, bias=True)
- (key): Linear(in_features=768, out_features=768, bias=True)
- (value): Linear(in_features=768, out_features=768, bias=True)
- (dropout): Dropout(p=0.1)
- )
- (output): BertSelfOutput(
- (dense): Linear(in_features=768, out_features=768, bias=True)
- (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
- (dropout): Dropout(p=0.1)
- )
- )
- (intermediate): BertIntermediate(
- (dense): Linear(in_features=768, out_features=3072, bias=True)
- )
- (output): BertOutput(
- (dense): Linear(in_features=3072, out_features=768, bias=True)
- (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
- (dropout): Dropout(p=0.1)
- )
- )
- (7): BertLayer(
- (attention): BertAttention(
- (self): BertSelfAttention(
- (query): Linear(in_features=768, out_features=768, bias=True)
- (key): Linear(in_features=768, out_features=768, bias=True)
- (value): Linear(in_features=768, out_features=768, bias=True)
- (dropout): Dropout(p=0.1)
- )
- (output): BertSelfOutput(
- (dense): Linear(in_features=768, out_features=768, bias=True)
- (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
- (dropout): Dropout(p=0.1)
- )
- )
- (intermediate): BertIntermediate(
- (dense): Linear(in_features=768, out_features=3072, bias=True)
- )
- (output): BertOutput(
- (dense): Linear(in_features=3072, out_features=768, bias=True)
- (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
- (dropout): Dropout(p=0.1)
- )
- )
- (8): BertLayer(
- (attention): BertAttention(
- (self): BertSelfAttention(
- (query): Linear(in_features=768, out_features=768, bias=True)
- (key): Linear(in_features=768, out_features=768, bias=True)
- (value): Linear(in_features=768, out_features=768, bias=True)
- (dropout): Dropout(p=0.1)
- )
- (output): BertSelfOutput(
- (dense): Linear(in_features=768, out_features=768, bias=True)
- (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
- (dropout): Dropout(p=0.1)
- )
- )
- (intermediate): BertIntermediate(
- (dense): Linear(in_features=768, out_features=3072, bias=True)
- )
- (output): BertOutput(
- (dense): Linear(in_features=3072, out_features=768, bias=True)
- (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
- (dropout): Dropout(p=0.1)
- )
- )
- (9): BertLayer(
- (attention): BertAttention(
- (self): BertSelfAttention(
- (query): Linear(in_features=768, out_features=768, bias=True)
- (key): Linear(in_features=768, out_features=768, bias=True)
- (value): Linear(in_features=768, out_features=768, bias=True)
- (dropout): Dropout(p=0.1)
- )
- (output): BertSelfOutput(
- (dense): Linear(in_features=768, out_features=768, bias=True)
- (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
- (dropout): Dropout(p=0.1)
- )
- )
- (intermediate): BertIntermediate(
- (dense): Linear(in_features=768, out_features=3072, bias=True)
- )
- (output): BertOutput(
- (dense): Linear(in_features=3072, out_features=768, bias=True)
- (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
- (dropout): Dropout(p=0.1)
- )
- )
- (10): BertLayer(
- (attention): BertAttention(
- (self): BertSelfAttention(
- (query): Linear(in_features=768, out_features=768, bias=True)
- (key): Linear(in_features=768, out_features=768, bias=True)
- (value): Linear(in_features=768, out_features=768, bias=True)
- (dropout): Dropout(p=0.1)
- )
- (output): BertSelfOutput(
- (dense): Linear(in_features=768, out_features=768, bias=True)
- (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
- (dropout): Dropout(p=0.1)
- )
- )
- (intermediate): BertIntermediate(
- (dense): Linear(in_features=768, out_features=3072, bias=True)
- )
- (output): BertOutput(
- (dense): Linear(in_features=3072, out_features=768, bias=True)
- (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
- (dropout): Dropout(p=0.1)
- )
- )
- (11): BertLayer(
- (attention): BertAttention(
- (self): BertSelfAttention(
- (query): Linear(in_features=768, out_features=768, bias=True)
- (key): Linear(in_features=768, out_features=768, bias=True)
- (value): Linear(in_features=768, out_features=768, bias=True)
- (dropout): Dropout(p=0.1)
- )
- (output): BertSelfOutput(
- (dense): Linear(in_features=768, out_features=768, bias=True)
- (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
- (dropout): Dropout(p=0.1)
- )
- )
- (intermediate): BertIntermediate(
- (dense): Linear(in_features=768, out_features=3072, bias=True)
- )
- (output): BertOutput(
- (dense): Linear(in_features=3072, out_features=768, bias=True)
- (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
- (dropout): Dropout(p=0.1)
- )
- )
- )
- )
- (pooler): BertPooler(
- (dense): Linear(in_features=768, out_features=768, bias=True)
- (activation): Tanh()
- )
- )
- (dropout): Dropout(p=0.1)
- (classifier): Linear(in_features=768, out_features=50, bias=True)
- )
- )>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement