Advertisement
funny_falcon

test_rwspin.c

Jun 16th, 2025
52
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 6.59 KB | Software | 0 0
  1. #include <stdlib.h>
  2. #include <stdio.h>
  3. #include <assert.h>
  4. #include <stdbool.h>
  5. #include <stdatomic.h>
  6. #include <stdint.h>
  7. #include <threads.h>
  8. #include <immintrin.h>
  9. #include <unistd.h>
  10.  
  11. typedef atomic_uint_least32_t RWSpin;
  12.  
  13. static inline void
  14. spin_delay(uint32_t *delay)
  15. {
  16.     ++(*delay);
  17.     if (*delay == 1000)
  18.     {
  19.         usleep(1000);
  20.         *delay = 0;
  21.     }
  22.     else if (*delay % 100 == 0)
  23.         thrd_yield();
  24.     else //if (*delay % 4 == 0)
  25.         _mm_pause();
  26.     atomic_signal_fence(memory_order_relaxed);
  27. }
  28.  
  29.  
  30. enum
  31. {
  32.     RW_SPIN_EX_LOCK = 1,
  33.     RW_SPIN_SH_MARK = 2,
  34. };
  35.  
  36. static void RWSpinLockEx_slowpath(RWSpin *spin, uint32_t val);
  37.  
  38. static inline void
  39. RWSpinLockEx(RWSpin *spin)
  40. {
  41.     uint32_t old = atomic_load_explicit(spin, memory_order_relaxed);
  42.    
  43.     if (old ||
  44.         !atomic_compare_exchange_strong(spin, &old, RW_SPIN_EX_LOCK))
  45.         RWSpinLockEx_slowpath(spin, old);
  46. }
  47.  
  48. static void
  49. RWSpinLockEx_slowpath(RWSpin *spin, uint32_t val)
  50. {
  51.     uint32_t delay = 0;
  52.     for (;;)
  53.     {
  54.         if (val == 0)
  55.         {
  56.             if (atomic_compare_exchange_strong(spin, &val, RW_SPIN_EX_LOCK))
  57.                 break;
  58.         }
  59.         else
  60.         {
  61.             spin_delay(&delay);
  62.             val = atomic_load_explicit(spin, memory_order_relaxed);
  63.         }
  64.     }
  65. }
  66.  
  67. static inline void
  68. RWSpinUnlockEx(RWSpin *spin)
  69. {
  70.     atomic_fetch_sub(spin, RW_SPIN_EX_LOCK);
  71. }
  72.  
  73. static void RWSpinLockSh_slowpath(RWSpin *spin, uint32_t val);
  74.  
  75. static inline void
  76. RWSpinLockSh(RWSpin *spin)
  77. {
  78.     uint32_t val = atomic_fetch_add(spin, RW_SPIN_SH_MARK);
  79.  
  80.     if (val & RW_SPIN_EX_LOCK)
  81.         RWSpinLockSh_slowpath(spin, val);
  82. }
  83.  
  84. static void
  85. RWSpinLockSh_slowpath(RWSpin *spin, uint32_t val)
  86. {
  87.     uint32_t delay = 0;
  88.  
  89.     while (val & RW_SPIN_EX_LOCK)
  90.     {
  91.         spin_delay(&delay);
  92.         val = atomic_load_explicit(spin, memory_order_relaxed);
  93.     }
  94.  
  95.     atomic_thread_fence(memory_order_seq_cst);
  96. }
  97.  
  98. static inline void
  99. RWSpinUnlockSh(RWSpin *spin)
  100. {
  101.     atomic_fetch_sub(spin, RW_SPIN_SH_MARK);
  102. }
  103.  
  104. static inline void
  105. FlagAcquire(atomic_bool *flg)
  106. {
  107.     uint32_t delay = 0;
  108.     while (atomic_exchange_explicit(flg, 1, memory_order_acquire))
  109.     {
  110.         while (atomic_load_explicit(flg, memory_order_relaxed))
  111.             spin_delay(&delay);
  112.     }
  113. }
  114.  
  115. static inline void
  116. FlagRelease(atomic_bool *flg)
  117. {
  118.     atomic_store_explicit(flg, 0, memory_order_relaxed);
  119. }
  120.  
  121. typedef atomic_uint_least64_t RWOptLock;
  122. typedef struct {
  123.     RWOptLock *lock;
  124.     uint64_t v;
  125.     uint32_t delay;
  126.     bool check;
  127. } RWOptLockRead;
  128.  
  129. static inline void
  130. RWOptAcquire(RWOptLock *lock)
  131. {
  132.     uint32_t delay = 0;
  133.     uint64_t val = atomic_fetch_or(lock, 1);
  134.     if ((val & 1) == 0)
  135.         return;
  136.     do
  137.     {
  138.         spin_delay(&delay);
  139.         val = atomic_load_explicit(lock, memory_order_relaxed);
  140.     } while((val & 1) || (atomic_fetch_or(lock, 1) & 1));
  141. }
  142.  
  143. static inline void
  144. RWOptRelease(RWOptLock *lock)
  145. {
  146.     atomic_fetch_add(lock, 1);
  147. }
  148.  
  149. static inline bool
  150. RWOptReadDone( RWOptLockRead *rd)
  151. {
  152.     uint64_t val;
  153.     if (rd->check)
  154.     {
  155.         atomic_thread_fence(memory_order_seq_cst);
  156.         val = atomic_load_explicit(rd->lock, memory_order_relaxed);
  157.         if (val == rd->v)
  158.             return true;
  159.         rd->v = val;
  160.     }
  161.     else
  162.     {
  163.         rd->check = true;
  164.         val = atomic_load_explicit(rd->lock, memory_order_relaxed);
  165.     }
  166.     val = rd->v;
  167.     while (val & 1)
  168.     {
  169.         spin_delay(&rd->delay);
  170.         val = atomic_load_explicit(rd->lock, memory_order_relaxed);
  171.     }
  172.     rd->v = val;
  173.     atomic_thread_fence(memory_order_seq_cst);
  174.     return false;
  175. }
  176.  
  177. #define RWOptReadLoop(opt) for (RWOptLockRead _rd_ = (RWOptLockRead){.lock = (opt)}; !RWOptReadDone(&_rd_);)
  178.  
  179. typedef struct {
  180.     RWSpin spin;
  181.     atomic_bool spin_flg;
  182.     RWOptLock opt;
  183.     char pad[128];
  184.     uint32_t iters;
  185.     uint32_t every;
  186.     uint64_t a;
  187.     uint64_t b;
  188. } TestVal;
  189.  
  190. static int
  191. test_func_ex(void *arg)
  192. {
  193.     TestVal *val = arg;
  194.     uint32_t delay = 0;
  195.     bool less;
  196.     uint64_t sa, sb;
  197.  
  198.     for (unsigned i = 0; i < val->iters; i++)
  199.     {
  200.         if (i % val->every == 0)
  201.         {
  202.             FlagAcquire(&val->spin_flg);
  203.             assert(val->a == val->b || val->a == val->b + 1);
  204.             less = val->a <= val->b;
  205.             atomic_signal_fence(memory_order_relaxed); /* acts as compiler barrier */
  206.             if (less)
  207.                 val->a++;
  208.             else
  209.                 val->b++;
  210.             FlagRelease(&val->spin_flg);
  211.         }
  212.         else
  213.         {
  214.             FlagAcquire(&val->spin_flg);
  215.             sa = val->a;
  216.             atomic_signal_fence(memory_order_relaxed); /* acts as compiler barrier */
  217.             sb = val->b;
  218.             FlagRelease(&val->spin_flg);
  219.             assert(sa == sb || sa == sb + 1);
  220.         }
  221.     }
  222.     return 0;
  223. }
  224.  
  225. static int
  226. test_func_rwspin(void *arg)
  227. {
  228.     TestVal *val = arg;
  229.     uint32_t delay = 0;
  230.     bool less;
  231.     uint64_t sa, sb;
  232.  
  233.     for (unsigned i = 0; i < val->iters; i++)
  234.     {
  235.         //if (i % 1000 == 0)
  236.             //fprintf(stderr, "%i\n", i);
  237.         if (i % val->every == 0)
  238.         {
  239.             RWSpinLockEx(&val->spin);
  240.             assert(val->a == val->b || val->a == val->b + 1);
  241.             less = val->a <= val->b;
  242.             atomic_signal_fence(memory_order_relaxed); /* acts as compiler barrier */
  243.             if (less)
  244.                 val->a++;
  245.             else
  246.                 val->b++;
  247.             RWSpinUnlockEx(&val->spin);
  248.         }
  249.         else
  250.         {
  251.             RWSpinLockSh(&val->spin);
  252.             sa = val->a;
  253.             atomic_signal_fence(memory_order_relaxed); /* acts as compiler barrier */
  254.             sb = val->b;
  255.             RWSpinUnlockSh(&val->spin);
  256.             assert(sa == sb || sa == sb + 1);
  257.         }
  258.     }
  259.     return 0;
  260. }
  261.  
  262. static int
  263. test_func_rwopt(void *arg)
  264. {
  265.     TestVal *val = arg;
  266.     uint32_t delay = 0;
  267.     bool less;
  268.     uint64_t sa, sb;
  269.  
  270.     for (unsigned i = 0; i < val->iters; i++)
  271.     {
  272.         if (i % val->every == 0)
  273.         {
  274.             RWOptAcquire(&val->opt);
  275.             assert(val->a == val->b || val->a == val->b + 1);
  276.             less = val->a <= val->b;
  277.             atomic_signal_fence(memory_order_relaxed); /* acts as compiler barrier */
  278.             if (less)
  279.                 val->a++;
  280.             else
  281.                 val->b++;
  282.             RWOptRelease(&val->opt);
  283.         }
  284.         else
  285.         {
  286.             RWOptReadLoop(&val->opt)
  287.             {
  288.                 sa = val->a;
  289.                 atomic_signal_fence(memory_order_relaxed); /* acts as compiler barrier */
  290.                 sb = val->b;
  291.             }
  292.             assert(sa == sb || sa == sb + 1);
  293.         }
  294.     }
  295.     return 0;
  296. }
  297.  
  298. int main(int argc, char **argv)
  299. {
  300.     long niters = argc > 1 ? atol(argv[1]) : 10000;
  301.     long every = argc > 2 ? atol(argv[2]) : 8;
  302.     long kind = argc > 3 ? atol(argv[3]) : 1;
  303.     long nthreads = argc > 4 ? atol(argv[4]) : 8;
  304.  
  305.     TestVal val = {.spin = 0, .spin_flg = false,
  306.         .every = every, .iters = niters};
  307.  
  308.     thrd_start_t func = kind == 0 ? test_func_rwspin :
  309.                         kind == 1 ? test_func_ex :
  310.                         kind == 2 ? test_func_rwopt : NULL;
  311.                                  
  312.     thrd_t *threads = calloc(nthreads, sizeof(thrd_t));
  313.     assert(threads != NULL);
  314.  
  315.  
  316.     for (int i = 0; i < nthreads; i++)
  317.     {
  318.         int r = thrd_create(&threads[i], func, &val);
  319.         assert(r == thrd_success);
  320.     }
  321.     for (int i = 0; i < nthreads; i++)
  322.     {
  323.         int r = thrd_join(threads[i], NULL);
  324.         assert(r == thrd_success);
  325.     }
  326.     return 0;
  327. }
  328.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement