Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdlib.h>
- #include <stdio.h>
- #include <assert.h>
- #include <stdbool.h>
- #include <stdatomic.h>
- #include <stdint.h>
- #include <threads.h>
- #include <immintrin.h>
- #include <unistd.h>
- typedef atomic_uint_least32_t RWSpin;
- static inline void
- spin_delay(uint32_t *delay)
- {
- ++(*delay);
- if (*delay == 1000)
- {
- usleep(1000);
- *delay = 0;
- }
- else if (*delay % 100 == 0)
- thrd_yield();
- else //if (*delay % 4 == 0)
- _mm_pause();
- atomic_signal_fence(memory_order_relaxed);
- }
- enum
- {
- RW_SPIN_EX_LOCK = 1,
- RW_SPIN_SH_MARK = 2,
- };
- static void RWSpinLockEx_slowpath(RWSpin *spin, uint32_t val);
- static inline void
- RWSpinLockEx(RWSpin *spin)
- {
- uint32_t old = atomic_load_explicit(spin, memory_order_relaxed);
- if (old ||
- !atomic_compare_exchange_strong(spin, &old, RW_SPIN_EX_LOCK))
- RWSpinLockEx_slowpath(spin, old);
- }
- static void
- RWSpinLockEx_slowpath(RWSpin *spin, uint32_t val)
- {
- uint32_t delay = 0;
- for (;;)
- {
- if (val == 0)
- {
- if (atomic_compare_exchange_strong(spin, &val, RW_SPIN_EX_LOCK))
- break;
- }
- else
- {
- spin_delay(&delay);
- val = atomic_load_explicit(spin, memory_order_relaxed);
- }
- }
- }
- static inline void
- RWSpinUnlockEx(RWSpin *spin)
- {
- atomic_fetch_sub(spin, RW_SPIN_EX_LOCK);
- }
- static void RWSpinLockSh_slowpath(RWSpin *spin, uint32_t val);
- static inline void
- RWSpinLockSh(RWSpin *spin)
- {
- uint32_t val = atomic_fetch_add(spin, RW_SPIN_SH_MARK);
- if (val & RW_SPIN_EX_LOCK)
- RWSpinLockSh_slowpath(spin, val);
- }
- static void
- RWSpinLockSh_slowpath(RWSpin *spin, uint32_t val)
- {
- uint32_t delay = 0;
- while (val & RW_SPIN_EX_LOCK)
- {
- spin_delay(&delay);
- val = atomic_load_explicit(spin, memory_order_relaxed);
- }
- atomic_thread_fence(memory_order_seq_cst);
- }
- static inline void
- RWSpinUnlockSh(RWSpin *spin)
- {
- atomic_fetch_sub(spin, RW_SPIN_SH_MARK);
- }
- static inline void
- FlagAcquire(atomic_bool *flg)
- {
- uint32_t delay = 0;
- while (atomic_exchange_explicit(flg, 1, memory_order_acquire))
- {
- while (atomic_load_explicit(flg, memory_order_relaxed))
- spin_delay(&delay);
- }
- }
- static inline void
- FlagRelease(atomic_bool *flg)
- {
- atomic_store_explicit(flg, 0, memory_order_relaxed);
- }
- typedef atomic_uint_least64_t RWOptLock;
- typedef struct {
- RWOptLock *lock;
- uint64_t v;
- uint32_t delay;
- bool check;
- } RWOptLockRead;
- static inline void
- RWOptAcquire(RWOptLock *lock)
- {
- uint32_t delay = 0;
- uint64_t val = atomic_fetch_or(lock, 1);
- if ((val & 1) == 0)
- return;
- do
- {
- spin_delay(&delay);
- val = atomic_load_explicit(lock, memory_order_relaxed);
- } while((val & 1) || (atomic_fetch_or(lock, 1) & 1));
- }
- static inline void
- RWOptRelease(RWOptLock *lock)
- {
- atomic_fetch_add(lock, 1);
- }
- static inline bool
- RWOptReadDone( RWOptLockRead *rd)
- {
- uint64_t val;
- if (rd->check)
- {
- atomic_thread_fence(memory_order_seq_cst);
- val = atomic_load_explicit(rd->lock, memory_order_relaxed);
- if (val == rd->v)
- return true;
- rd->v = val;
- }
- else
- {
- rd->check = true;
- val = atomic_load_explicit(rd->lock, memory_order_relaxed);
- }
- val = rd->v;
- while (val & 1)
- {
- spin_delay(&rd->delay);
- val = atomic_load_explicit(rd->lock, memory_order_relaxed);
- }
- rd->v = val;
- atomic_thread_fence(memory_order_seq_cst);
- return false;
- }
- #define RWOptReadLoop(opt) for (RWOptLockRead _rd_ = (RWOptLockRead){.lock = (opt)}; !RWOptReadDone(&_rd_);)
- typedef struct {
- RWSpin spin;
- atomic_bool spin_flg;
- RWOptLock opt;
- char pad[128];
- uint32_t iters;
- uint32_t every;
- uint64_t a;
- uint64_t b;
- } TestVal;
- static int
- test_func_ex(void *arg)
- {
- TestVal *val = arg;
- uint32_t delay = 0;
- bool less;
- uint64_t sa, sb;
- for (unsigned i = 0; i < val->iters; i++)
- {
- if (i % val->every == 0)
- {
- FlagAcquire(&val->spin_flg);
- assert(val->a == val->b || val->a == val->b + 1);
- less = val->a <= val->b;
- atomic_signal_fence(memory_order_relaxed); /* acts as compiler barrier */
- if (less)
- val->a++;
- else
- val->b++;
- FlagRelease(&val->spin_flg);
- }
- else
- {
- FlagAcquire(&val->spin_flg);
- sa = val->a;
- atomic_signal_fence(memory_order_relaxed); /* acts as compiler barrier */
- sb = val->b;
- FlagRelease(&val->spin_flg);
- assert(sa == sb || sa == sb + 1);
- }
- }
- return 0;
- }
- static int
- test_func_rwspin(void *arg)
- {
- TestVal *val = arg;
- uint32_t delay = 0;
- bool less;
- uint64_t sa, sb;
- for (unsigned i = 0; i < val->iters; i++)
- {
- //if (i % 1000 == 0)
- //fprintf(stderr, "%i\n", i);
- if (i % val->every == 0)
- {
- RWSpinLockEx(&val->spin);
- assert(val->a == val->b || val->a == val->b + 1);
- less = val->a <= val->b;
- atomic_signal_fence(memory_order_relaxed); /* acts as compiler barrier */
- if (less)
- val->a++;
- else
- val->b++;
- RWSpinUnlockEx(&val->spin);
- }
- else
- {
- RWSpinLockSh(&val->spin);
- sa = val->a;
- atomic_signal_fence(memory_order_relaxed); /* acts as compiler barrier */
- sb = val->b;
- RWSpinUnlockSh(&val->spin);
- assert(sa == sb || sa == sb + 1);
- }
- }
- return 0;
- }
- static int
- test_func_rwopt(void *arg)
- {
- TestVal *val = arg;
- uint32_t delay = 0;
- bool less;
- uint64_t sa, sb;
- for (unsigned i = 0; i < val->iters; i++)
- {
- if (i % val->every == 0)
- {
- RWOptAcquire(&val->opt);
- assert(val->a == val->b || val->a == val->b + 1);
- less = val->a <= val->b;
- atomic_signal_fence(memory_order_relaxed); /* acts as compiler barrier */
- if (less)
- val->a++;
- else
- val->b++;
- RWOptRelease(&val->opt);
- }
- else
- {
- RWOptReadLoop(&val->opt)
- {
- sa = val->a;
- atomic_signal_fence(memory_order_relaxed); /* acts as compiler barrier */
- sb = val->b;
- }
- assert(sa == sb || sa == sb + 1);
- }
- }
- return 0;
- }
- int main(int argc, char **argv)
- {
- long niters = argc > 1 ? atol(argv[1]) : 10000;
- long every = argc > 2 ? atol(argv[2]) : 8;
- long kind = argc > 3 ? atol(argv[3]) : 1;
- long nthreads = argc > 4 ? atol(argv[4]) : 8;
- TestVal val = {.spin = 0, .spin_flg = false,
- .every = every, .iters = niters};
- thrd_start_t func = kind == 0 ? test_func_rwspin :
- kind == 1 ? test_func_ex :
- kind == 2 ? test_func_rwopt : NULL;
- thrd_t *threads = calloc(nthreads, sizeof(thrd_t));
- assert(threads != NULL);
- for (int i = 0; i < nthreads; i++)
- {
- int r = thrd_create(&threads[i], func, &val);
- assert(r == thrd_success);
- }
- for (int i = 0; i < nthreads; i++)
- {
- int r = thrd_join(threads[i], NULL);
- assert(r == thrd_success);
- }
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement