Advertisement
Guest User

Untitled

a guest
Apr 23rd, 2019
79
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 13.90 KB | None | 0 0
  1. <bound method Module.__dir__ of DataParallel(
  2. (module): BertForMultiLabelSequenceClassification(
  3. (bert): BertModel(
  4. (embeddings): BertEmbeddings(
  5. (word_embeddings): Embedding(30522, 768)
  6. (position_embeddings): Embedding(512, 768)
  7. (token_type_embeddings): Embedding(2, 768)
  8. (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
  9. (dropout): Dropout(p=0.1)
  10. )
  11. (encoder): BertEncoder(
  12. (layer): ModuleList(
  13. (0): BertLayer(
  14. (attention): BertAttention(
  15. (self): BertSelfAttention(
  16. (query): Linear(in_features=768, out_features=768, bias=True)
  17. (key): Linear(in_features=768, out_features=768, bias=True)
  18. (value): Linear(in_features=768, out_features=768, bias=True)
  19. (dropout): Dropout(p=0.1)
  20. )
  21. (output): BertSelfOutput(
  22. (dense): Linear(in_features=768, out_features=768, bias=True)
  23. (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
  24. (dropout): Dropout(p=0.1)
  25. )
  26. )
  27. (intermediate): BertIntermediate(
  28. (dense): Linear(in_features=768, out_features=3072, bias=True)
  29. )
  30. (output): BertOutput(
  31. (dense): Linear(in_features=3072, out_features=768, bias=True)
  32. (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
  33. (dropout): Dropout(p=0.1)
  34. )
  35. )
  36. (1): BertLayer(
  37. (attention): BertAttention(
  38. (self): BertSelfAttention(
  39. (query): Linear(in_features=768, out_features=768, bias=True)
  40. (key): Linear(in_features=768, out_features=768, bias=True)
  41. (value): Linear(in_features=768, out_features=768, bias=True)
  42. (dropout): Dropout(p=0.1)
  43. )
  44. (output): BertSelfOutput(
  45. (dense): Linear(in_features=768, out_features=768, bias=True)
  46. (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
  47. (dropout): Dropout(p=0.1)
  48. )
  49. )
  50. (intermediate): BertIntermediate(
  51. (dense): Linear(in_features=768, out_features=3072, bias=True)
  52. )
  53. (output): BertOutput(
  54. (dense): Linear(in_features=3072, out_features=768, bias=True)
  55. (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
  56. (dropout): Dropout(p=0.1)
  57. )
  58. )
  59. (2): BertLayer(
  60. (attention): BertAttention(
  61. (self): BertSelfAttention(
  62. (query): Linear(in_features=768, out_features=768, bias=True)
  63. (key): Linear(in_features=768, out_features=768, bias=True)
  64. (value): Linear(in_features=768, out_features=768, bias=True)
  65. (dropout): Dropout(p=0.1)
  66. )
  67. (output): BertSelfOutput(
  68. (dense): Linear(in_features=768, out_features=768, bias=True)
  69. (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
  70. (dropout): Dropout(p=0.1)
  71. )
  72. )
  73. (intermediate): BertIntermediate(
  74. (dense): Linear(in_features=768, out_features=3072, bias=True)
  75. )
  76. (output): BertOutput(
  77. (dense): Linear(in_features=3072, out_features=768, bias=True)
  78. (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
  79. (dropout): Dropout(p=0.1)
  80. )
  81. )
  82. (3): BertLayer(
  83. (attention): BertAttention(
  84. (self): BertSelfAttention(
  85. (query): Linear(in_features=768, out_features=768, bias=True)
  86. (key): Linear(in_features=768, out_features=768, bias=True)
  87. (value): Linear(in_features=768, out_features=768, bias=True)
  88. (dropout): Dropout(p=0.1)
  89. )
  90. (output): BertSelfOutput(
  91. (dense): Linear(in_features=768, out_features=768, bias=True)
  92. (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
  93. (dropout): Dropout(p=0.1)
  94. )
  95. )
  96. (intermediate): BertIntermediate(
  97. (dense): Linear(in_features=768, out_features=3072, bias=True)
  98. )
  99. (output): BertOutput(
  100. (dense): Linear(in_features=3072, out_features=768, bias=True)
  101. (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
  102. (dropout): Dropout(p=0.1)
  103. )
  104. )
  105. (4): BertLayer(
  106. (attention): BertAttention(
  107. (self): BertSelfAttention(
  108. (query): Linear(in_features=768, out_features=768, bias=True)
  109. (key): Linear(in_features=768, out_features=768, bias=True)
  110. (value): Linear(in_features=768, out_features=768, bias=True)
  111. (dropout): Dropout(p=0.1)
  112. )
  113. (output): BertSelfOutput(
  114. (dense): Linear(in_features=768, out_features=768, bias=True)
  115. (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
  116. (dropout): Dropout(p=0.1)
  117. )
  118. )
  119. (intermediate): BertIntermediate(
  120. (dense): Linear(in_features=768, out_features=3072, bias=True)
  121. )
  122. (output): BertOutput(
  123. (dense): Linear(in_features=3072, out_features=768, bias=True)
  124. (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
  125. (dropout): Dropout(p=0.1)
  126. )
  127. )
  128. (5): BertLayer(
  129. (attention): BertAttention(
  130. (self): BertSelfAttention(
  131. (query): Linear(in_features=768, out_features=768, bias=True)
  132. (key): Linear(in_features=768, out_features=768, bias=True)
  133. (value): Linear(in_features=768, out_features=768, bias=True)
  134. (dropout): Dropout(p=0.1)
  135. )
  136. (output): BertSelfOutput(
  137. (dense): Linear(in_features=768, out_features=768, bias=True)
  138. (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
  139. (dropout): Dropout(p=0.1)
  140. )
  141. )
  142. (intermediate): BertIntermediate(
  143. (dense): Linear(in_features=768, out_features=3072, bias=True)
  144. )
  145. (output): BertOutput(
  146. (dense): Linear(in_features=3072, out_features=768, bias=True)
  147. (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
  148. (dropout): Dropout(p=0.1)
  149. )
  150. )
  151. (6): BertLayer(
  152. (attention): BertAttention(
  153. (self): BertSelfAttention(
  154. (query): Linear(in_features=768, out_features=768, bias=True)
  155. (key): Linear(in_features=768, out_features=768, bias=True)
  156. (value): Linear(in_features=768, out_features=768, bias=True)
  157. (dropout): Dropout(p=0.1)
  158. )
  159. (output): BertSelfOutput(
  160. (dense): Linear(in_features=768, out_features=768, bias=True)
  161. (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
  162. (dropout): Dropout(p=0.1)
  163. )
  164. )
  165. (intermediate): BertIntermediate(
  166. (dense): Linear(in_features=768, out_features=3072, bias=True)
  167. )
  168. (output): BertOutput(
  169. (dense): Linear(in_features=3072, out_features=768, bias=True)
  170. (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
  171. (dropout): Dropout(p=0.1)
  172. )
  173. )
  174. (7): BertLayer(
  175. (attention): BertAttention(
  176. (self): BertSelfAttention(
  177. (query): Linear(in_features=768, out_features=768, bias=True)
  178. (key): Linear(in_features=768, out_features=768, bias=True)
  179. (value): Linear(in_features=768, out_features=768, bias=True)
  180. (dropout): Dropout(p=0.1)
  181. )
  182. (output): BertSelfOutput(
  183. (dense): Linear(in_features=768, out_features=768, bias=True)
  184. (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
  185. (dropout): Dropout(p=0.1)
  186. )
  187. )
  188. (intermediate): BertIntermediate(
  189. (dense): Linear(in_features=768, out_features=3072, bias=True)
  190. )
  191. (output): BertOutput(
  192. (dense): Linear(in_features=3072, out_features=768, bias=True)
  193. (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
  194. (dropout): Dropout(p=0.1)
  195. )
  196. )
  197. (8): BertLayer(
  198. (attention): BertAttention(
  199. (self): BertSelfAttention(
  200. (query): Linear(in_features=768, out_features=768, bias=True)
  201. (key): Linear(in_features=768, out_features=768, bias=True)
  202. (value): Linear(in_features=768, out_features=768, bias=True)
  203. (dropout): Dropout(p=0.1)
  204. )
  205. (output): BertSelfOutput(
  206. (dense): Linear(in_features=768, out_features=768, bias=True)
  207. (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
  208. (dropout): Dropout(p=0.1)
  209. )
  210. )
  211. (intermediate): BertIntermediate(
  212. (dense): Linear(in_features=768, out_features=3072, bias=True)
  213. )
  214. (output): BertOutput(
  215. (dense): Linear(in_features=3072, out_features=768, bias=True)
  216. (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
  217. (dropout): Dropout(p=0.1)
  218. )
  219. )
  220. (9): BertLayer(
  221. (attention): BertAttention(
  222. (self): BertSelfAttention(
  223. (query): Linear(in_features=768, out_features=768, bias=True)
  224. (key): Linear(in_features=768, out_features=768, bias=True)
  225. (value): Linear(in_features=768, out_features=768, bias=True)
  226. (dropout): Dropout(p=0.1)
  227. )
  228. (output): BertSelfOutput(
  229. (dense): Linear(in_features=768, out_features=768, bias=True)
  230. (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
  231. (dropout): Dropout(p=0.1)
  232. )
  233. )
  234. (intermediate): BertIntermediate(
  235. (dense): Linear(in_features=768, out_features=3072, bias=True)
  236. )
  237. (output): BertOutput(
  238. (dense): Linear(in_features=3072, out_features=768, bias=True)
  239. (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
  240. (dropout): Dropout(p=0.1)
  241. )
  242. )
  243. (10): BertLayer(
  244. (attention): BertAttention(
  245. (self): BertSelfAttention(
  246. (query): Linear(in_features=768, out_features=768, bias=True)
  247. (key): Linear(in_features=768, out_features=768, bias=True)
  248. (value): Linear(in_features=768, out_features=768, bias=True)
  249. (dropout): Dropout(p=0.1)
  250. )
  251. (output): BertSelfOutput(
  252. (dense): Linear(in_features=768, out_features=768, bias=True)
  253. (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
  254. (dropout): Dropout(p=0.1)
  255. )
  256. )
  257. (intermediate): BertIntermediate(
  258. (dense): Linear(in_features=768, out_features=3072, bias=True)
  259. )
  260. (output): BertOutput(
  261. (dense): Linear(in_features=3072, out_features=768, bias=True)
  262. (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
  263. (dropout): Dropout(p=0.1)
  264. )
  265. )
  266. (11): BertLayer(
  267. (attention): BertAttention(
  268. (self): BertSelfAttention(
  269. (query): Linear(in_features=768, out_features=768, bias=True)
  270. (key): Linear(in_features=768, out_features=768, bias=True)
  271. (value): Linear(in_features=768, out_features=768, bias=True)
  272. (dropout): Dropout(p=0.1)
  273. )
  274. (output): BertSelfOutput(
  275. (dense): Linear(in_features=768, out_features=768, bias=True)
  276. (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
  277. (dropout): Dropout(p=0.1)
  278. )
  279. )
  280. (intermediate): BertIntermediate(
  281. (dense): Linear(in_features=768, out_features=3072, bias=True)
  282. )
  283. (output): BertOutput(
  284. (dense): Linear(in_features=3072, out_features=768, bias=True)
  285. (LayerNorm): FusedLayerNorm(torch.Size([768]), eps=1e-12, elementwise_affine=True)
  286. (dropout): Dropout(p=0.1)
  287. )
  288. )
  289. )
  290. )
  291. (pooler): BertPooler(
  292. (dense): Linear(in_features=768, out_features=768, bias=True)
  293. (activation): Tanh()
  294. )
  295. )
  296. (dropout): Dropout(p=0.1)
  297. (classifier): Linear(in_features=768, out_features=50, bias=True)
  298. )
  299. )>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement