Guest User

with_atomic_load.txt

a guest
Oct 5th, 2018
139
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 13.05 KB | None | 0 0
  1. Dump of assembler code for function bucket_enqueue_single:
  2. /export/dpdk.org/build/include/rte_lcore.h:
  3. 72 rte_lcore_id(void)
  4. 73 {
  5. 74 return RTE_PER_LCORE(_lcore_id);
  6. 0x000000000068cdc0 <+0>: 02 08 40 f9 ldr x2, [x0, #16]
  7. 0x000000000068cdc4 <+4>: 44 d0 3b d5 mrs x4, tpidr_el0
  8. 0x000000000068cdc8 <+8>: 43 26 00 90 adrp x3, 0xb54000
  9. 0x000000000068cdcc <+12>: 63 f4 45 f9 ldr x3, [x3, #3048]
  10. 0x000000000068cdd0 <+16>: 22 00 02 8a and x2, x1, x2
  11. 0x000000000068cdd4 <+20>: 84 68 63 b8 ldr w4, [x4, x3]
  12.  
  13. /export/dpdk.org/drivers/mempool/bucket/rte_mempool_bucket.c:
  14. 111 if (likely(hdr->lcore_id == lcore_id)) {
  15. 0x000000000068cdd8 <+24>: 43 00 40 b9 ldr w3, [x2]
  16. 0x000000000068cddc <+28>: 7f 00 04 6b cmp w3, w4
  17. 0x000000000068cde0 <+32>: 81 02 00 54 b.ne 0x68ce30 <bucket_enqueue_single+112> // b.any
  18.  
  19. 112 if (hdr->fill_cnt < bd->obj_per_bucket - 1) {
  20. 0x000000000068cde4 <+36>: 01 08 40 b9 ldr w1, [x0, #8]
  21. 0x000000000068cde8 <+40>: 44 10 40 39 ldrb w4, [x2, #4]
  22. 0x000000000068cdec <+44>: 21 04 00 51 sub w1, w1, #0x1
  23. 0x000000000068cdf0 <+48>: 9f 00 01 6b cmp w4, w1
  24. 0x000000000068cdf4 <+52>: a2 00 00 54 b.cs 0x68ce08 <bucket_enqueue_single+72> // b.hs, b.nlast
  25.  
  26. 113 hdr->fill_cnt++;
  27. 0x000000000068cdf8 <+56>: 84 04 00 11 add w4, w4, #0x1
  28. 0x000000000068cdfc <+60>: 00 00 80 52 mov w0, #0x0 // #0
  29. 0x000000000068ce00 <+64>: 44 10 00 39 strb w4, [x2, #4]
  30. 0x000000000068ce04 <+68>: c0 03 5f d6 ret
  31.  
  32. 114 } else {
  33. 115 hdr->fill_cnt = 0;
  34. 0x000000000068ce08 <+72>: 03 4c 23 8b add x3, x0, w3, uxtw #3
  35. 0x000000000068ce0c <+76>: 5f 10 00 39 strb wzr, [x2, #4]
  36.  
  37. 116 /* Stack is big enough to put all buckets */
  38. 117 bucket_stack_push(bd->buckets[lcore_id], hdr);
  39. 0x000000000068ce10 <+80>: 00 00 80 52 mov w0, #0x0 // #0
  40. 0x000000000068ce14 <+84>: 63 10 40 f9 ldr x3, [x3, #32]
  41.  
  42. 82 stack->objects[stack->top++] = obj;
  43. 0x000000000068ce18 <+88>: 61 00 40 b9 ldr w1, [x3]
  44. 0x000000000068ce1c <+92>: 24 04 00 11 add w4, w1, #0x1
  45. 0x000000000068ce20 <+96>: 64 00 00 b9 str w4, [x3]
  46. 0x000000000068ce24 <+100>: 61 4c 21 8b add x1, x3, w1, uxtw #3
  47. 0x000000000068ce28 <+104>: 22 04 00 f9 str x2, [x1, #8]
  48. 0x000000000068ce2c <+108>: c0 03 5f d6 ret
  49.  
  50. 118 }
  51. 119 } else if (hdr->lcore_id != LCORE_ID_ANY) {
  52. 0x000000000068ce30 <+112>: 7f 04 00 31 cmn w3, #0x1
  53. 0x000000000068ce34 <+116>: 61 05 00 54 b.ne 0x68cee0 <bucket_enqueue_single+288> // b.any
  54.  
  55. 121 bd->adoption_buffer_rings[hdr->lcore_id];
  56. 122
  57. 123 rc = rte_ring_enqueue(adopt_ring, obj);
  58. 124 /* Ring is big enough to put all objects */
  59. 125 RTE_ASSERT(rc == 0);
  60. 126 } else if (hdr->fill_cnt < bd->obj_per_bucket - 1) {
  61. 0x000000000068ce38 <+120>: 03 08 40 b9 ldr w3, [x0, #8]
  62. 0x000000000068ce3c <+124>: 41 10 40 39 ldrb w1, [x2, #4]
  63. 0x000000000068ce40 <+128>: 63 04 00 51 sub w3, w3, #0x1
  64. 0x000000000068ce44 <+132>: 3f 00 03 6b cmp w1, w3
  65. 0x000000000068ce48 <+136>: a2 00 00 54 b.cs 0x68ce5c <bucket_enqueue_single+156> // b.hs, b.nlast
  66.  
  67. 127 hdr->fill_cnt++;
  68. 0x000000000068ce4c <+140>: 21 04 00 11 add w1, w1, #0x1
  69. 0x000000000068ce50 <+144>: 00 00 80 52 mov w0, #0x0 // #0
  70. 0x000000000068ce54 <+148>: 41 10 00 39 strb w1, [x2, #4]
  71. 0x000000000068ce58 <+152>: c0 03 5f d6 ret
  72.  
  73. 128 } else {
  74. 129 hdr->fill_cnt = 0;
  75. 0x000000000068ce5c <+156>: 5f 10 00 39 strb wzr, [x2, #4]
  76.  
  77. 130 rc = rte_ring_enqueue(bd->shared_bucket_ring, hdr);
  78. 0x000000000068ce60 <+160>: 00 0c 40 f9 ldr x0, [x0, #24]
  79.  
  80. /export/dpdk.org/build/include/rte_ring.h:
  81. 532 return rte_ring_enqueue_bulk(r, &obj, 1, NULL) ? 0 : -ENOBUFS;
  82. 0x000000000068ce64 <+164>: 05 00 04 91 add x5, x0, #0x100
  83. 0x000000000068ce68 <+168>: 03 38 40 b9 ldr w3, [x0, #56]
  84. 0x000000000068ce6c <+172>: 04 08 41 b9 ldr w4, [x0, #264]
  85.  
  86. /export/dpdk.org/build/include/rte_ring_c11_mem.h:
  87. 64 *old_head = __atomic_load_n(&r->prod.head, __ATOMIC_ACQUIRE);
  88. 0x000000000068ce70 <+176>: a1 fc df 88 ldar w1, [x5]
  89. 0x000000000068ce74 <+180>: a4 09 00 34 cbz w4, 0x68cfa8 <bucket_enqueue_single+488>
  90.  
  91. 65 do {
  92. 66 /* Reset n to the initial burst count */
  93. 67 n = max;
  94. 68
  95. 69 /* load-acquire synchronize with store-release of ht->tail
  96. 70 * in update_tail.
  97. 71 */
  98. 72 const uint32_t cons_tail = __atomic_load_n(&r->cons.tail,
  99. 0x000000000068ce78 <+184>: 05 10 08 91 add x5, x0, #0x204
  100. 0x000000000068ce7c <+188>: a5 fc df 88 ldar w5, [x5]
  101.  
  102. 73 __ATOMIC_ACQUIRE);
  103. 74
  104. 75 /* The subtraction is done between two unsigned 32bits value
  105. 76 * (the result is always modulo 32 bits even if we have
  106. 77 * *old_head > cons_tail). So 'free_entries' is always between 0
  107. 78 * and capacity (which is < size).
  108. 79 */
  109. 80 *free_entries = (capacity + cons_tail - *old_head);
  110. 81
  111. 82 /* check that we have enough room in ring */
  112. 83 if (unlikely(n > *free_entries))
  113. 0x000000000068ce80 <+192>: 63 00 05 0b add w3, w3, w5
  114. 0x000000000068ce84 <+196>: 3f 00 03 6b cmp w1, w3
  115. 0x000000000068ce88 <+200>: c0 08 00 54 b.eq 0x68cfa0 <bucket_enqueue_single+480> // b.none
  116.  
  117. 84 n = (behavior == RTE_RING_QUEUE_FIXED) ?
  118. 85 0 : *free_entries;
  119. 86
  120. 87 if (n == 0)
  121. 88 return 0;
  122. 89
  123. 90 *new_head = *old_head + n;
  124. 0x000000000068ce8c <+204>: 26 04 00 11 add w6, w1, #0x1
  125.  
  126. 91 if (is_sp)
  127. 92 r->prod.head = *new_head, success = 1;
  128. 0x000000000068ce90 <+208>: 06 00 01 b9 str w6, [x0, #256]
  129.  
  130. /export/dpdk.org/build/include/rte_ring.h:
  131. 349 ENQUEUE_PTRS(r, &r[1], prod_head, obj_table, n, void *);
  132. 0x000000000068ce94 <+212>: 05 0c 46 29 ldp w5, w3, [x0, #48]
  133. 0x000000000068ce98 <+216>: 07 00 0c 91 add x7, x0, #0x300
  134. 0x000000000068ce9c <+220>: 23 00 03 0a and w3, w1, w3
  135. 0x000000000068cea0 <+224>: 68 04 00 11 add w8, w3, #0x1
  136. 0x000000000068cea4 <+228>: bf 00 08 6b cmp w5, w8
  137. 0x000000000068cea8 <+232>: 49 01 00 54 b.ls 0x68ced0 <bucket_enqueue_single+272> // b.plast
  138. 0x000000000068ceac <+236>: e2 58 23 f8 str x2, [x7, w3, uxtw #3]
  139.  
  140. /export/dpdk.org/build/include/rte_ring_c11_mem.h:
  141. 23 if (!single)
  142. 0x000000000068ceb0 <+240>: 00 10 04 91 add x0, x0, #0x104
  143. 0x000000000068ceb4 <+244>: 84 00 00 35 cbnz w4, 0x68cec4 <bucket_enqueue_single+260>
  144. 0x000000000068ceb8 <+248>: 02 00 40 b9 ldr w2, [x0]
  145. 0x000000000068cebc <+252>: 3f 00 02 6b cmp w1, w2
  146. 0x000000000068cec0 <+256>: 01 09 00 54 b.ne 0x68cfe0 <bucket_enqueue_single+544> // b.any
  147.  
  148. 24 while (unlikely(old_val != __atomic_load_n(&ht->tail,
  149. 25 __ATOMIC_RELAXED)))
  150. 26 rte_pause();
  151. 27
  152. 28 __atomic_store_n(&ht->tail, new_val, __ATOMIC_RELEASE);
  153. 0x000000000068cec4 <+260>: 06 fc 9f 88 stlr w6, [x0]
  154.  
  155. /export/dpdk.org/build/include/rte_ring.h:
  156. 355 return n;
  157. 0x000000000068cec8 <+264>: 00 00 80 52 mov w0, #0x0 // #0
  158. 0x000000000068cecc <+268>: c0 03 5f d6 ret
  159. 0x000000000068ced0 <+272>: bf 00 03 6b cmp w5, w3
  160. 0x000000000068ced4 <+276>: c8 fe ff 54 b.hi 0x68ceac <bucket_enqueue_single+236> // b.pmore
  161.  
  162. 349 ENQUEUE_PTRS(r, &r[1], prod_head, obj_table, n, void *);
  163. 0x000000000068ced8 <+280>: 02 80 01 f9 str x2, [x0, #768]
  164. 0x000000000068cedc <+284>: f5 ff ff 17 b 0x68ceb0 <bucket_enqueue_single+240>
  165.  
  166. /export/dpdk.org/drivers/mempool/bucket/rte_mempool_bucket.c:
  167. 120 struct rte_ring *adopt_ring =
  168. 0x000000000068cee0 <+288>: 03 4c 23 8b add x3, x0, w3, uxtw #3
  169. 0x000000000068cee4 <+292>: 60 10 42 f9 ldr x0, [x3, #1056]
  170.  
  171. /export/dpdk.org/build/include/rte_ring.h:
  172. 532 return rte_ring_enqueue_bulk(r, &obj, 1, NULL) ? 0 : -ENOBUFS;
  173. 0x000000000068cee8 <+296>: 05 00 04 91 add x5, x0, #0x100
  174. 0x000000000068ceec <+300>: 03 38 40 b9 ldr w3, [x0, #56]
  175. 0x000000000068cef0 <+304>: 04 08 41 b9 ldr w4, [x0, #264]
  176.  
  177. /export/dpdk.org/build/include/rte_ring_c11_mem.h:
  178. 64 *old_head = __atomic_load_n(&r->prod.head, __ATOMIC_ACQUIRE);
  179. 0x000000000068cef4 <+308>: a2 fc df 88 ldar w2, [x5]
  180. 0x000000000068cef8 <+312>: 07 10 08 91 add x7, x0, #0x204
  181. 0x000000000068cefc <+316>: 84 03 00 34 cbz w4, 0x68cf6c <bucket_enqueue_single+428>
  182.  
  183. 65 do {
  184. 66 /* Reset n to the initial burst count */
  185. 67 n = max;
  186. 68
  187. 69 /* load-acquire synchronize with store-release of ht->tail
  188. 70 * in update_tail.
  189. 71 */
  190. 72 const uint32_t cons_tail = __atomic_load_n(&r->cons.tail,
  191. 0x000000000068cf00 <+320>: e5 fc df 88 ldar w5, [x7]
  192.  
  193. 73 __ATOMIC_ACQUIRE);
  194. 74
  195. 75 /* The subtraction is done between two unsigned 32bits value
  196. 76 * (the result is always modulo 32 bits even if we have
  197. 77 * *old_head > cons_tail). So 'free_entries' is always between 0
  198. 78 * and capacity (which is < size).
  199. 79 */
  200. 80 *free_entries = (capacity + cons_tail - *old_head);
  201. 81
  202. 82 /* check that we have enough room in ring */
  203. 83 if (unlikely(n > *free_entries))
  204. 0x000000000068cf04 <+324>: 63 00 05 0b add w3, w3, w5
  205. 0x000000000068cf08 <+328>: 5f 00 03 6b cmp w2, w3
  206. 0x000000000068cf0c <+332>: a0 04 00 54 b.eq 0x68cfa0 <bucket_enqueue_single+480> // b.none
  207.  
  208. 84 n = (behavior == RTE_RING_QUEUE_FIXED) ?
  209. 85 0 : *free_entries;
  210. 86
  211. 87 if (n == 0)
  212. 88 return 0;
  213. 89
  214. 90 *new_head = *old_head + n;
  215. 0x000000000068cf10 <+336>: 46 04 00 11 add w6, w2, #0x1
  216.  
  217. 91 if (is_sp)
  218. 92 r->prod.head = *new_head, success = 1;
  219. 0x000000000068cf14 <+340>: 06 00 01 b9 str w6, [x0, #256]
  220.  
  221. /export/dpdk.org/build/include/rte_ring.h:
  222. 349 ENQUEUE_PTRS(r, &r[1], prod_head, obj_table, n, void *);
  223. 0x000000000068cf18 <+344>: 05 0c 46 29 ldp w5, w3, [x0, #48]
  224. 0x000000000068cf1c <+348>: 08 00 0c 91 add x8, x0, #0x300
  225. 0x000000000068cf20 <+352>: 43 00 03 0a and w3, w2, w3
  226. 0x000000000068cf24 <+356>: 67 04 00 11 add w7, w3, #0x1
  227. 0x000000000068cf28 <+360>: bf 00 07 6b cmp w5, w7
  228. 0x000000000068cf2c <+364>: 89 01 00 54 b.ls 0x68cf5c <bucket_enqueue_single+412> // b.plast
  229. 0x000000000068cf30 <+368>: 01 59 23 f8 str x1, [x8, w3, uxtw #3]
  230.  
  231. /export/dpdk.org/build/include/rte_ring_c11_mem.h:
  232. 23 if (!single)
  233. 0x000000000068cf34 <+372>: 00 10 04 91 add x0, x0, #0x104
  234. 0x000000000068cf38 <+376>: 64 fc ff 35 cbnz w4, 0x68cec4 <bucket_enqueue_single+260>
  235. 0x000000000068cf3c <+380>: 01 00 40 b9 ldr w1, [x0]
  236. 0x000000000068cf40 <+384>: 3f 00 02 6b cmp w1, w2
  237. 0x000000000068cf44 <+388>: 00 fc ff 54 b.eq 0x68cec4 <bucket_enqueue_single+260> // b.none
  238.  
  239. /export/dpdk.org/build/include/rte_pause_64.h:
  240. 17 asm volatile("yield" ::: "memory");
  241. 0x000000000068cf48 <+392>: 3f 20 03 d5 yield
  242. 0x000000000068cf4c <+396>: 01 00 40 b9 ldr w1, [x0]
  243. 0x000000000068cf50 <+400>: 3f 00 02 6b cmp w1, w2
  244. 0x000000000068cf54 <+404>: 80 fb ff 54 b.eq 0x68cec4 <bucket_enqueue_single+260> // b.none
  245. 0x000000000068cf58 <+408>: fc ff ff 17 b 0x68cf48 <bucket_enqueue_single+392>
  246. 0x000000000068cf5c <+412>: bf 00 03 6b cmp w5, w3
  247. 0x000000000068cf60 <+416>: 88 fe ff 54 b.hi 0x68cf30 <bucket_enqueue_single+368> // b.pmore
  248. 0x000000000068cf64 <+420>: 01 80 01 f9 str x1, [x0, #768]
  249. 0x000000000068cf68 <+424>: f3 ff ff 17 b 0x68cf34 <bucket_enqueue_single+372>
  250. 0x000000000068cf6c <+428>: e8 fc df 88 ldar w8, [x7]
  251. 0x000000000068cf70 <+432>: 68 00 08 0b add w8, w3, w8
  252. 0x000000000068cf74 <+436>: 46 04 00 11 add w6, w2, #0x1
  253. 0x000000000068cf78 <+440>: 5f 00 08 6b cmp w2, w8
  254. 0x000000000068cf7c <+444>: 20 01 00 54 b.eq 0x68cfa0 <bucket_enqueue_single+480> // b.none
  255. 0x000000000068cf80 <+448>: a8 fc 5f 88 ldaxr w8, [x5]
  256. 0x000000000068cf84 <+452>: 1f 01 02 6b cmp w8, w2
  257. 0x000000000068cf88 <+456>: 61 00 00 54 b.ne 0x68cf94 <bucket_enqueue_single+468> // b.any
  258. 0x000000000068cf8c <+460>: a6 7c 09 88 stxr w9, w6, [x5]
  259. 0x000000000068cf90 <+464>: 89 ff ff 35 cbnz w9, 0x68cf80 <bucket_enqueue_single+448>
  260. 0x000000000068cf94 <+468>: e2 03 08 2a mov w2, w8
  261. 0x000000000068cf98 <+472>: 00 fc ff 54 b.eq 0x68cf18 <bucket_enqueue_single+344> // b.none
  262. 0x000000000068cf9c <+476>: f4 ff ff 17 b 0x68cf6c <bucket_enqueue_single+428>
  263. 0x000000000068cfa0 <+480>: 00 0d 80 12 mov w0, #0xffffff97 // #-105
  264. 0x000000000068cfa4 <+484>: c0 03 5f d6 ret
  265. 0x000000000068cfa8 <+488>: 08 10 08 91 add x8, x0, #0x204
  266. 0x000000000068cfac <+492>: 07 fd df 88 ldar w7, [x8]
  267. 0x000000000068cfb0 <+496>: 67 00 07 0b add w7, w3, w7
  268. 0x000000000068cfb4 <+500>: 26 04 00 11 add w6, w1, #0x1
  269. 0x000000000068cfb8 <+504>: 3f 00 07 6b cmp w1, w7
  270. 0x000000000068cfbc <+508>: 20 ff ff 54 b.eq 0x68cfa0 <bucket_enqueue_single+480> // b.none
  271. 0x000000000068cfc0 <+512>: a7 fc 5f 88 ldaxr w7, [x5]
  272. 0x000000000068cfc4 <+516>: ff 00 01 6b cmp w7, w1
  273. 0x000000000068cfc8 <+520>: 61 00 00 54 b.ne 0x68cfd4 <bucket_enqueue_single+532> // b.any
  274. 0x000000000068cfcc <+524>: a6 7c 09 88 stxr w9, w6, [x5]
  275. 0x000000000068cfd0 <+528>: 89 ff ff 35 cbnz w9, 0x68cfc0 <bucket_enqueue_single+512>
  276. 0x000000000068cfd4 <+532>: e1 03 07 2a mov w1, w7
  277. 0x000000000068cfd8 <+536>: e0 f5 ff 54 b.eq 0x68ce94 <bucket_enqueue_single+212> // b.none
  278. 0x000000000068cfdc <+540>: f4 ff ff 17 b 0x68cfac <bucket_enqueue_single+492>
  279. 0x000000000068cfe0 <+544>: 3f 20 03 d5 yield
  280. 0x000000000068cfe4 <+548>: b5 ff ff 17 b 0x68ceb8 <bucket_enqueue_single+248>
  281. End of assembler dump.
Add Comment
Please, Sign In to add comment