Guest User

without_atomic_load.txt

a guest
Oct 5th, 2018
189
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 11.77 KB | None | 0 0
  1. Dump of assembler code for function bucket_enqueue_single:
  2. /export/dpdk.org/build/include/rte_lcore.h:
  3. 72 rte_lcore_id(void)
  4. 73 {
  5. 74 return RTE_PER_LCORE(_lcore_id);
  6. 0x000000000068d1a0 <+0>: 02 08 40 f9 ldr x2, [x0, #16]
  7. 0x000000000068d1a4 <+4>: 44 d0 3b d5 mrs x4, tpidr_el0
  8. 0x000000000068d1a8 <+8>: 43 26 00 90 adrp x3, 0xb55000
  9. 0x000000000068d1ac <+12>: 63 f4 45 f9 ldr x3, [x3, #3048]
  10. 0x000000000068d1b0 <+16>: 22 00 02 8a and x2, x1, x2
  11. 0x000000000068d1b4 <+20>: 84 68 63 b8 ldr w4, [x4, x3]
  12.  
  13. /export/dpdk.org/drivers/mempool/bucket/rte_mempool_bucket.c:
  14. 111 if (likely(hdr->lcore_id == lcore_id)) {
  15. 0x000000000068d1b8 <+24>: 43 00 40 b9 ldr w3, [x2]
  16. 0x000000000068d1bc <+28>: 7f 00 04 6b cmp w3, w4
  17. 0x000000000068d1c0 <+32>: 81 02 00 54 b.ne 0x68d210 <bucket_enqueue_single+112> // b.any
  18.  
  19. 112 if (hdr->fill_cnt < bd->obj_per_bucket - 1) {
  20. 0x000000000068d1c4 <+36>: 01 08 40 b9 ldr w1, [x0, #8]
  21. 0x000000000068d1c8 <+40>: 44 10 40 39 ldrb w4, [x2, #4]
  22. 0x000000000068d1cc <+44>: 21 04 00 51 sub w1, w1, #0x1
  23. 0x000000000068d1d0 <+48>: 9f 00 01 6b cmp w4, w1
  24. 0x000000000068d1d4 <+52>: a2 00 00 54 b.cs 0x68d1e8 <bucket_enqueue_single+72> // b.hs, b.nlast
  25.  
  26. 113 hdr->fill_cnt++;
  27. 0x000000000068d1d8 <+56>: 84 04 00 11 add w4, w4, #0x1
  28. 0x000000000068d1dc <+60>: 00 00 80 52 mov w0, #0x0 // #0
  29. 0x000000000068d1e0 <+64>: 44 10 00 39 strb w4, [x2, #4]
  30. 0x000000000068d1e4 <+68>: c0 03 5f d6 ret
  31.  
  32. 114 } else {
  33. 115 hdr->fill_cnt = 0;
  34. 0x000000000068d1e8 <+72>: 03 4c 23 8b add x3, x0, w3, uxtw #3
  35. 0x000000000068d1ec <+76>: 5f 10 00 39 strb wzr, [x2, #4]
  36.  
  37. 116 /* Stack is big enough to put all buckets */
  38. 117 bucket_stack_push(bd->buckets[lcore_id], hdr);
  39. 0x000000000068d1f0 <+80>: 00 00 80 52 mov w0, #0x0 // #0
  40. 0x000000000068d1f4 <+84>: 63 10 40 f9 ldr x3, [x3, #32]
  41.  
  42. 82 stack->objects[stack->top++] = obj;
  43. 0x000000000068d1f8 <+88>: 61 00 40 b9 ldr w1, [x3]
  44. 0x000000000068d1fc <+92>: 24 04 00 11 add w4, w1, #0x1
  45. 0x000000000068d200 <+96>: 64 00 00 b9 str w4, [x3]
  46. 0x000000000068d204 <+100>: 61 4c 21 8b add x1, x3, w1, uxtw #3
  47. 0x000000000068d208 <+104>: 22 04 00 f9 str x2, [x1, #8]
  48. 0x000000000068d20c <+108>: c0 03 5f d6 ret
  49.  
  50. 118 }
  51. 119 } else if (hdr->lcore_id != LCORE_ID_ANY) {
  52. 0x000000000068d210 <+112>: 7f 04 00 31 cmn w3, #0x1
  53. 0x000000000068d214 <+116>: 01 06 00 54 b.ne 0x68d2d4 <bucket_enqueue_single+308> // b.any
  54.  
  55. 121 bd->adoption_buffer_rings[hdr->lcore_id];
  56. 122
  57. 123 rc = rte_ring_enqueue(adopt_ring, obj);
  58. 124 /* Ring is big enough to put all objects */
  59. 125 RTE_ASSERT(rc == 0);
  60. 126 } else if (hdr->fill_cnt < bd->obj_per_bucket - 1) {
  61. 0x000000000068d218 <+120>: 03 08 40 b9 ldr w3, [x0, #8]
  62. 0x000000000068d21c <+124>: 41 10 40 39 ldrb w1, [x2, #4]
  63. 0x000000000068d220 <+128>: 63 04 00 51 sub w3, w3, #0x1
  64. 0x000000000068d224 <+132>: 3f 00 03 6b cmp w1, w3
  65. 0x000000000068d228 <+136>: a2 00 00 54 b.cs 0x68d23c <bucket_enqueue_single+156> // b.hs, b.nlast
  66.  
  67. 127 hdr->fill_cnt++;
  68. 0x000000000068d22c <+140>: 21 04 00 11 add w1, w1, #0x1
  69. 0x000000000068d230 <+144>: 00 00 80 52 mov w0, #0x0 // #0
  70. 0x000000000068d234 <+148>: 41 10 00 39 strb w1, [x2, #4]
  71. 0x000000000068d238 <+152>: c0 03 5f d6 ret
  72.  
  73. 128 } else {
  74. 129 hdr->fill_cnt = 0;
  75. 0x000000000068d23c <+156>: 5f 10 00 39 strb wzr, [x2, #4]
  76.  
  77. 130 rc = rte_ring_enqueue(bd->shared_bucket_ring, hdr);
  78. 0x000000000068d240 <+160>: 00 0c 40 f9 ldr x0, [x0, #24]
  79.  
  80. /export/dpdk.org/build/include/rte_ring.h:
  81. 532 return rte_ring_enqueue_bulk(r, &obj, 1, NULL) ? 0 : -ENOBUFS;
  82. 0x000000000068d244 <+164>: 04 00 04 91 add x4, x0, #0x100
  83. 0x000000000068d248 <+168>: 03 38 40 b9 ldr w3, [x0, #56]
  84. 0x000000000068d24c <+172>: 05 08 41 b9 ldr w5, [x0, #264]
  85.  
  86. /export/dpdk.org/build/include/rte_ring_c11_mem.h:
  87. 63 *old_head = __atomic_load_n(&r->prod.head, __ATOMIC_ACQUIRE);
  88. 0x000000000068d250 <+176>: 81 fc df 88 ldar w1, [x4]
  89. 0x000000000068d254 <+180>: 25 0a 00 34 cbz w5, 0x68d398 <bucket_enqueue_single+504>
  90.  
  91. 64 do {
  92. 65 /* Reset n to the initial burst count */
  93. 66 n = max;
  94. 67
  95. 68 /* load-acquire synchronize with store-release of ht->tail
  96. 69 * in update_tail.
  97. 70 */
  98. 71 const uint32_t cons_tail = __atomic_load_n(&r->cons.tail,
  99. 0x000000000068d258 <+184>: 06 10 08 91 add x6, x0, #0x204
  100. 0x000000000068d25c <+188>: c6 fc df 88 ldar w6, [x6]
  101.  
  102. 72 __ATOMIC_ACQUIRE);
  103. 73
  104. 74 /* The subtraction is done between two unsigned 32bits value
  105. 75 * (the result is always modulo 32 bits even if we have
  106. 76 * *old_head > cons_tail). So 'free_entries' is always between 0
  107. 77 * and capacity (which is < size).
  108. 78 */
  109. 79 *free_entries = (capacity + cons_tail - *old_head);
  110. 80
  111. 81 /* check that we have enough room in ring */
  112. 82 if (unlikely(n > *free_entries))
  113. 0x000000000068d260 <+192>: 63 00 06 0b add w3, w3, w6
  114. 0x000000000068d264 <+196>: 3f 00 03 6b cmp w1, w3
  115. 0x000000000068d268 <+200>: 40 09 00 54 b.eq 0x68d390 <bucket_enqueue_single+496> // b.none
  116.  
  117. 83 n = (behavior == RTE_RING_QUEUE_FIXED) ?
  118. 84 0 : *free_entries;
  119. 85
  120. 86 if (n == 0)
  121. 87 return 0;
  122. 88
  123. 89 *new_head = *old_head + n;
  124. 0x000000000068d26c <+204>: 26 04 00 11 add w6, w1, #0x1
  125.  
  126. 90 if (is_sp)
  127. 91 r->prod.head = *new_head, success = 1;
  128. 0x000000000068d270 <+208>: 06 00 01 b9 str w6, [x0, #256]
  129.  
  130. /export/dpdk.org/build/include/rte_ring.h:
  131. 349 ENQUEUE_PTRS(r, &r[1], prod_head, obj_table, n, void *);
  132. 0x000000000068d274 <+212>: 07 0c 46 29 ldp w7, w3, [x0, #48]
  133. 0x000000000068d278 <+216>: 08 00 0c 91 add x8, x0, #0x300
  134. 0x000000000068d27c <+220>: 23 00 03 0a and w3, w1, w3
  135. 0x000000000068d280 <+224>: 69 04 00 11 add w9, w3, #0x1
  136. 0x000000000068d284 <+228>: ff 00 09 6b cmp w7, w9
  137. 0x000000000068d288 <+232>: 49 01 00 54 b.ls 0x68d2b0 <bucket_enqueue_single+272> // b.plast
  138. 0x000000000068d28c <+236>: 02 59 23 f8 str x2, [x8, w3, uxtw #3]
  139.  
  140. /export/dpdk.org/build/include/rte_ring_c11_mem.h:
  141. 23 if (!single)
  142. 0x000000000068d290 <+240>: 85 00 00 35 cbnz w5, 0x68d2a0 <bucket_enqueue_single+256>
  143. 0x000000000068d294 <+244>: 82 04 40 b9 ldr w2, [x4, #4]
  144. 0x000000000068d298 <+248>: 5f 00 01 6b cmp w2, w1
  145. 0x000000000068d29c <+252>: 21 01 00 54 b.ne 0x68d2c0 <bucket_enqueue_single+288> // b.any
  146.  
  147. 24 while (unlikely(ht->tail != old_val))
  148. 25 rte_pause();
  149. 26
  150. 27 __atomic_store_n(&ht->tail, new_val, __ATOMIC_RELEASE);
  151. 0x000000000068d2a0 <+256>: 00 10 04 91 add x0, x0, #0x104
  152. 0x000000000068d2a4 <+260>: 06 fc 9f 88 stlr w6, [x0]
  153.  
  154. /export/dpdk.org/build/include/rte_ring.h:
  155. 355 return n;
  156. 0x000000000068d2a8 <+264>: 00 00 80 52 mov w0, #0x0 // #0
  157. 0x000000000068d2ac <+268>: c0 03 5f d6 ret
  158. 0x000000000068d2b0 <+272>: ff 00 03 6b cmp w7, w3
  159. 0x000000000068d2b4 <+276>: c8 fe ff 54 b.hi 0x68d28c <bucket_enqueue_single+236> // b.pmore
  160.  
  161. 349 ENQUEUE_PTRS(r, &r[1], prod_head, obj_table, n, void *);
  162. 0x000000000068d2b8 <+280>: 02 80 01 f9 str x2, [x0, #768]
  163. 0x000000000068d2bc <+284>: f5 ff ff 17 b 0x68d290 <bucket_enqueue_single+240>
  164.  
  165. /export/dpdk.org/build/include/rte_pause_64.h:
  166. 17 asm volatile("yield" ::: "memory");
  167. 0x000000000068d2c0 <+288>: 3f 20 03 d5 yield
  168. 0x000000000068d2c4 <+292>: 82 04 40 b9 ldr w2, [x4, #4]
  169. 0x000000000068d2c8 <+296>: 3f 00 02 6b cmp w1, w2
  170. 0x000000000068d2cc <+300>: a0 fe ff 54 b.eq 0x68d2a0 <bucket_enqueue_single+256> // b.none
  171. 0x000000000068d2d0 <+304>: fc ff ff 17 b 0x68d2c0 <bucket_enqueue_single+288>
  172. 0x000000000068d2d4 <+308>: 03 4c 23 8b add x3, x0, w3, uxtw #3
  173. 0x000000000068d2d8 <+312>: 60 10 42 f9 ldr x0, [x3, #1056]
  174. 0x000000000068d2dc <+316>: 04 00 04 91 add x4, x0, #0x100
  175. 0x000000000068d2e0 <+320>: 03 38 40 b9 ldr w3, [x0, #56]
  176. 0x000000000068d2e4 <+324>: 05 08 41 b9 ldr w5, [x0, #264]
  177. 0x000000000068d2e8 <+328>: 82 fc df 88 ldar w2, [x4]
  178. 0x000000000068d2ec <+332>: 07 10 08 91 add x7, x0, #0x204
  179. 0x000000000068d2f0 <+336>: 65 03 00 34 cbz w5, 0x68d35c <bucket_enqueue_single+444>
  180. 0x000000000068d2f4 <+340>: e6 fc df 88 ldar w6, [x7]
  181. 0x000000000068d2f8 <+344>: 63 00 06 0b add w3, w3, w6
  182. 0x000000000068d2fc <+348>: 5f 00 03 6b cmp w2, w3
  183. 0x000000000068d300 <+352>: 80 04 00 54 b.eq 0x68d390 <bucket_enqueue_single+496> // b.none
  184. 0x000000000068d304 <+356>: 46 04 00 11 add w6, w2, #0x1
  185. 0x000000000068d308 <+360>: 06 00 01 b9 str w6, [x0, #256]
  186. 0x000000000068d30c <+364>: 07 0c 46 29 ldp w7, w3, [x0, #48]
  187. 0x000000000068d310 <+368>: 09 00 0c 91 add x9, x0, #0x300
  188. 0x000000000068d314 <+372>: 43 00 03 0a and w3, w2, w3
  189. 0x000000000068d318 <+376>: 68 04 00 11 add w8, w3, #0x1
  190. 0x000000000068d31c <+380>: ff 00 08 6b cmp w7, w8
  191. 0x000000000068d320 <+384>: 69 01 00 54 b.ls 0x68d34c <bucket_enqueue_single+428> // b.plast
  192. 0x000000000068d324 <+388>: 21 59 23 f8 str x1, [x9, w3, uxtw #3]
  193. 0x000000000068d328 <+392>: c5 fb ff 35 cbnz w5, 0x68d2a0 <bucket_enqueue_single+256>
  194. 0x000000000068d32c <+396>: 81 04 40 b9 ldr w1, [x4, #4]
  195. 0x000000000068d330 <+400>: 5f 00 01 6b cmp w2, w1
  196. 0x000000000068d334 <+404>: 60 fb ff 54 b.eq 0x68d2a0 <bucket_enqueue_single+256> // b.none
  197. 0x000000000068d338 <+408>: 3f 20 03 d5 yield
  198. 0x000000000068d33c <+412>: 81 04 40 b9 ldr w1, [x4, #4]
  199. 0x000000000068d340 <+416>: 3f 00 02 6b cmp w1, w2
  200. 0x000000000068d344 <+420>: e0 fa ff 54 b.eq 0x68d2a0 <bucket_enqueue_single+256> // b.none
  201. 0x000000000068d348 <+424>: fc ff ff 17 b 0x68d338 <bucket_enqueue_single+408>
  202. 0x000000000068d34c <+428>: ff 00 03 6b cmp w7, w3
  203. 0x000000000068d350 <+432>: a8 fe ff 54 b.hi 0x68d324 <bucket_enqueue_single+388> // b.pmore
  204. 0x000000000068d354 <+436>: 01 80 01 f9 str x1, [x0, #768]
  205. 0x000000000068d358 <+440>: f4 ff ff 17 b 0x68d328 <bucket_enqueue_single+392>
  206. 0x000000000068d35c <+444>: e8 fc df 88 ldar w8, [x7]
  207. 0x000000000068d360 <+448>: 68 00 08 0b add w8, w3, w8
  208. 0x000000000068d364 <+452>: 46 04 00 11 add w6, w2, #0x1
  209. 0x000000000068d368 <+456>: 5f 00 08 6b cmp w2, w8
  210. 0x000000000068d36c <+460>: 20 01 00 54 b.eq 0x68d390 <bucket_enqueue_single+496> // b.none
  211. 0x000000000068d370 <+464>: 88 fc 5f 88 ldaxr w8, [x4]
  212. 0x000000000068d374 <+468>: 1f 01 02 6b cmp w8, w2
  213. 0x000000000068d378 <+472>: 61 00 00 54 b.ne 0x68d384 <bucket_enqueue_single+484> // b.any
  214. 0x000000000068d37c <+476>: 86 7c 09 88 stxr w9, w6, [x4]
  215. 0x000000000068d380 <+480>: 89 ff ff 35 cbnz w9, 0x68d370 <bucket_enqueue_single+464>
  216. 0x000000000068d384 <+484>: e2 03 08 2a mov w2, w8
  217. 0x000000000068d388 <+488>: 20 fc ff 54 b.eq 0x68d30c <bucket_enqueue_single+364> // b.none
  218. 0x000000000068d38c <+492>: f4 ff ff 17 b 0x68d35c <bucket_enqueue_single+444>
  219. 0x000000000068d390 <+496>: 00 0d 80 12 mov w0, #0xffffff97 // #-105
  220. 0x000000000068d394 <+500>: c0 03 5f d6 ret
  221. 0x000000000068d398 <+504>: 08 10 08 91 add x8, x0, #0x204
  222. 0x000000000068d39c <+508>: 07 fd df 88 ldar w7, [x8]
  223. 0x000000000068d3a0 <+512>: 67 00 07 0b add w7, w3, w7
  224. 0x000000000068d3a4 <+516>: 26 04 00 11 add w6, w1, #0x1
  225. 0x000000000068d3a8 <+520>: ff 00 01 6b cmp w7, w1
  226. 0x000000000068d3ac <+524>: 20 ff ff 54 b.eq 0x68d390 <bucket_enqueue_single+496> // b.none
  227. 0x000000000068d3b0 <+528>: 87 fc 5f 88 ldaxr w7, [x4]
  228. 0x000000000068d3b4 <+532>: ff 00 01 6b cmp w7, w1
  229. 0x000000000068d3b8 <+536>: 61 00 00 54 b.ne 0x68d3c4 <bucket_enqueue_single+548> // b.any
  230. 0x000000000068d3bc <+540>: 86 7c 09 88 stxr w9, w6, [x4]
  231. 0x000000000068d3c0 <+544>: 89 ff ff 35 cbnz w9, 0x68d3b0 <bucket_enqueue_single+528>
  232. 0x000000000068d3c4 <+548>: e1 03 07 2a mov w1, w7
  233. 0x000000000068d3c8 <+552>: 60 f5 ff 54 b.eq 0x68d274 <bucket_enqueue_single+212> // b.none
  234. 0x000000000068d3cc <+556>: f4 ff ff 17 b 0x68d39c <bucket_enqueue_single+508>
  235. End of assembler dump.
Add Comment
Please, Sign In to add comment