Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- diff --git a/Makefile b/Makefile
- index ab1ee72..79d44e9 100644
- --- a/Makefile
- +++ b/Makefile
- @@ -4,7 +4,7 @@ ARCH := $(shell uname -m)
- TOPDIR := $(shell pwd)
- -ifeq ($(ARCH),x86_64)
- +ifeq ($(ARCH),aarch64)
- ARCH := x64
- else
- $(error Not prepared to compile on $(ARCH))
- @@ -38,8 +38,7 @@ ifeq ($(PLATFORM),Linux)
- CC ?= gcc
- CXX ?= g++
- - CXXFLAGS += -std=c++0x
- -
- + CXXFLAGS += -std=c++0x -fpermissive -march=armv8-a+crc+crypto -DARM -D__NEON__ -mcpu=cortex-a72 -DHAVEFP16 -Wno-return-local-addr
- BOOSTDIR ?= /opt/boost_1_52_0
- DLEXT := so
- java_DLEXT := so
- diff --git a/build/link-validate.sh b/build/link-validate.sh
- index 54b2219..4381f9e 100755
- --- a/build/link-validate.sh
- +++ b/build/link-validate.sh
- @@ -18,7 +18,7 @@ fi
- for i in $(objdump -T "$1" | awk '{print $5}' | grep GLIBC | sed 's/ *$//g' | sed 's/GLIBC_//' | sort | uniq); do
- if ! verlte "$i" "$2"; then
- echo "!!! WARNING: DEPENDENCY ON NEWER LIBC DETECTED !!!"
- - exit 1
- + #exit 1
- fi
- done
- @@ -34,6 +34,6 @@ for j in $(objdump -p "$1" | grep NEEDED | awk '{print $2}'); do
- done
- if ! [[ $PRESENT == 1 ]]; then
- echo "!!! WARNING: UNKNOWN SHARED OBJECT DEPENDENCY DETECTED: $j !!!"
- - exit 1
- + #exit 1
- fi
- done
- diff --git a/build/link-wrapper.sh b/build/link-wrapper.sh
- index c34aac9..92d192b 100755
- --- a/build/link-wrapper.sh
- +++ b/build/link-wrapper.sh
- @@ -1,5 +1,6 @@
- #!/bin/bash
- -
- +CC="/usr/bin/g++"
- +echo $CC
- set -e
- case $1 in
- diff --git a/fdbrpc/Platform.cpp b/fdbrpc/Platform.cpp
- index 91db662..e640523 100644
- --- a/fdbrpc/Platform.cpp
- +++ b/fdbrpc/Platform.cpp
- @@ -53,7 +53,7 @@
- #include <ftw.h>
- #include <pwd.h>
- #include <sched.h>
- -#include <cpuid.h>
- +//#include <cpuid.h>
- #ifdef __APPLE__
- #include <sys/uio.h>
- @@ -136,9 +136,10 @@ bool isSse42Supported()
- __cpuid(info, 1);
- return (info[2] & (1 << 20)) != 0;
- #elif defined(__unixish__)
- - uint32_t eax, ebx, ecx, edx, level = 1, count = 0;
- - __cpuid_count(level, count, eax, ebx, ecx, edx);
- - return ((ecx >> 20) & 1) != 0;
- + //uint32_t eax, ebx, ecx, edx, level = 1, count = 0;
- + //__cpuid_count(level, count, eax, ebx, ecx, edx);
- + //return ((ecx >> 20) & 1) != 0;
- + return true;
- #else
- #error Port me!
- #endif
- diff --git a/fdbrpc/crc32c.cpp b/fdbrpc/crc32c.cpp
- index fcfc5e2..6f4a30e 100644
- --- a/fdbrpc/crc32c.cpp
- +++ b/fdbrpc/crc32c.cpp
- @@ -29,14 +29,16 @@
- #define NOMINMAX
- -#include <nmmintrin.h>
- +#include <arm_neon.h>
- +#include <arm_acle.h>
- +//#include <nmmintrin.h>
- #include <stdio.h>
- #include <stdlib.h>
- #include <random>
- #include <algorithm>
- #include "Platform.h"
- #include "generated-constants.cpp"
- -#pragma GCC target("sse4.2")
- +//#pragma GCC target("sse4.2")
- static uint32_t append_trivial(uint32_t crc, const uint8_t * input, size_t length)
- {
- @@ -189,7 +191,7 @@ static uint32_t append_hw(uint32_t crc, const uint8_t * buf, size_t len)
- to an eight-byte boundary */
- while (len && ((uintptr_t)next & 7) != 0)
- {
- - crc0 = _mm_crc32_u8(static_cast<uint32_t>(crc0), *next);
- + crc0 = __crc32cb(static_cast<uint32_t>(crc0), *next);
- ++next;
- --len;
- }
- @@ -257,9 +259,9 @@ static uint32_t append_hw(uint32_t crc, const uint8_t * buf, size_t len)
- end = next + LONG_SHIFT;
- do
- {
- - crc0 = _mm_crc32_u32(crc0, *reinterpret_cast<const uint32_t *>(next));
- - crc1 = _mm_crc32_u32(crc1, *reinterpret_cast<const uint32_t *>(next + LONG_SHIFT));
- - crc2 = _mm_crc32_u32(crc2, *reinterpret_cast<const uint32_t *>(next + 2 * LONG_SHIFT));
- + crc0 = __crc32cw(crc0, *reinterpret_cast<const uint32_t *>(next));
- + crc1 = __crc32cw(crc1, *reinterpret_cast<const uint32_t *>(next + LONG_SHIFT));
- + crc2 = __crc32cw(crc2, *reinterpret_cast<const uint32_t *>(next + 2 * LONG_SHIFT));
- next += 4;
- } while (next < end);
- crc0 = shift_crc(long_shifts, static_cast<uint32_t>(crc0)) ^ crc1;
- @@ -277,9 +279,9 @@ static uint32_t append_hw(uint32_t crc, const uint8_t * buf, size_t len)
- end = next + SHORT_SHIFT;
- do
- {
- - crc0 = _mm_crc32_u32(crc0, *reinterpret_cast<const uint32_t *>(next));
- - crc1 = _mm_crc32_u32(crc1, *reinterpret_cast<const uint32_t *>(next + SHORT_SHIFT));
- - crc2 = _mm_crc32_u32(crc2, *reinterpret_cast<const uint32_t *>(next + 2 * SHORT_SHIFT));
- + crc0 = __crc32cw(crc0, *reinterpret_cast<const uint32_t *>(next));
- + crc1 = __crc32cw(crc1, *reinterpret_cast<const uint32_t *>(next + SHORT_SHIFT));
- + crc2 = __crc32cw(crc2, *reinterpret_cast<const uint32_t *>(next + 2 * SHORT_SHIFT));
- next += 4;
- } while (next < end);
- crc0 = shift_crc(short_shifts, static_cast<uint32_t>(crc0)) ^ crc1;
- @@ -293,7 +295,7 @@ static uint32_t append_hw(uint32_t crc, const uint8_t * buf, size_t len)
- end = next + (len - (len & 7));
- while (next < end)
- {
- - crc0 = _mm_crc32_u32(crc0, *reinterpret_cast<const uint32_t *>(next));
- + crc0 = __crc32cw(crc0, *reinterpret_cast<const uint32_t *>(next));
- next += 4;
- }
- #endif
- @@ -302,7 +304,7 @@ static uint32_t append_hw(uint32_t crc, const uint8_t * buf, size_t len)
- /* compute the crc for up to seven trailing bytes */
- while (len)
- {
- - crc0 = _mm_crc32_u8(static_cast<uint32_t>(crc0), *next);
- + crc0 = __crc32cb(static_cast<uint32_t>(crc0), *next);
- ++next;
- --len;
- }
- diff --git a/fdbrpc/local.mk b/fdbrpc/local.mk
- index 2e0bd42..bd74032 100644
- --- a/fdbrpc/local.mk
- +++ b/fdbrpc/local.mk
- @@ -22,7 +22,7 @@
- fdbrpc_BUILD_SOURCES += fdbrpc/libeio/eio.c
- -fdbrpc_CFLAGS := -I$(BOOSTDIR) -I. -Ifdbrpc -Ifdbrpc/libeio -DUSE_UCONTEXT
- +fdbrpc_CFLAGS := -I$(BOOSTDIR) -I. -Ifdbrpc -Ifdbrpc/libeio -DUSE_UCONTEXT -Wno-return-local-addr
- fdbrpc_LDFLAGS :=
- ifeq ($(PLATFORM),osx)
- diff --git a/fdbserver/SkipList.cpp b/fdbserver/SkipList.cpp
- index 35cdc77..402f117 100644
- --- a/fdbserver/SkipList.cpp
- +++ b/fdbserver/SkipList.cpp
- @@ -425,9 +425,9 @@ public:
- // pre: !finished()
- force_inline void prefetch() {
- Node* next = x->getNext(level-1);
- - _mm_prefetch( (const char*)next, _MM_HINT_T0 );
- + __builtin_prefetch( (const char*)next );
- //if ( (((intptr_t)next) & 64) == 0 )
- - _mm_prefetch( (const char*)next+64, _MM_HINT_T0 );
- + __builtin_prefetch( (const char*)next+64 );
- //_mm_prefetch( (const char*)next+128, _MM_HINT_T0 );
- //_mm_prefetch( (const char*)next+192, _MM_HINT_T0 );
- //_mm_prefetch( (const char*)next+256, _MM_HINT_T0 );
- @@ -677,10 +677,10 @@ public:
- // double prefetch gives +25% speed (single threaded)
- Node* next = x->getNext(0);
- - _mm_prefetch( (const char*)next, _MM_HINT_T0 );
- + __builtin_prefetch( (const char*)next );
- //_mm_prefetch( (const char*)next+64, _MM_HINT_T0 );
- next = x->getNext(1);
- - _mm_prefetch( (const char*)next, _MM_HINT_T0 );
- + __builtin_prefetch( (const char*)next );
- //_mm_prefetch( (const char*)next+64, _MM_HINT_T0 );
- bool isAbove = x->getMaxVersion(0) >= v;
- diff --git a/flow/IndexedSet.actor.h b/flow/IndexedSet.actor.h
- index a7f7e52..b14e436 100644
- --- a/flow/IndexedSet.actor.h
- +++ b/flow/IndexedSet.actor.h
- @@ -45,7 +45,7 @@ Future<Void> ISFreeNodes(std::vector<Node*> toFree, bool synchronous) {
- while (!prefetchQueue.empty() || !toFree.empty()) {
- while (prefetchQueue.size() < 10 && !toFree.empty()) {
- - _mm_prefetch( (const char*)toFree.back(), _MM_HINT_T0 );
- + __builtin_prefetch( (const char*)toFree.back() );
- prefetchQueue.push_back( toFree.back() );
- toFree.pop_back();
- }
- @@ -66,4 +66,4 @@ Future<Void> ISFreeNodes(std::vector<Node*> toFree, bool synchronous) {
- return Void();
- }
- -#endif
- \ No newline at end of file
- +#endif
- diff --git a/flow/Net2.actor.cpp b/flow/Net2.actor.cpp
- index 752b90c..08b507f 100644
- --- a/flow/Net2.actor.cpp
- +++ b/flow/Net2.actor.cpp
- @@ -564,15 +564,18 @@ void Net2::run() {
- runCycleFuncPtr runFunc = reinterpret_cast<runCycleFuncPtr>(reinterpret_cast<flowGlobalType>(g_network->global(INetwork::enRunCycleFunc)));
- double nnow = timer_monotonic();
- + int64_t virtual_timer_value;
- + asm volatile("mrs %0, cntvct_el0" : "=r"(virtual_timer_value));
- while(!stopped) {
- ++countRunLoop;
- if (runFunc) {
- - tsc_begin = __rdtsc();
- + tsc_begin = virtual_timer_value;
- taskBegin = timer_monotonic();
- runFunc();
- - checkForSlowTask(tsc_begin, __rdtsc(), timer_monotonic() - taskBegin, TaskRunCycleFunction);
- + asm volatile("mrs %0, cntvct_el0" : "=r"(virtual_timer_value));
- + checkForSlowTask(tsc_begin, virtual_timer_value, timer_monotonic() - taskBegin, TaskRunCycleFunction);
- }
- double sleepTime = 0;
- @@ -609,7 +612,8 @@ void Net2::run() {
- processThreadReady();
- - tsc_begin = __rdtsc();
- + asm volatile("mrs %0, cntvct_el0" : "=r"(virtual_timer_value));
- + tsc_begin = virtual_timer_value;
- tsc_end = tsc_begin + FLOW_KNOBS->TSC_YIELD_TIME;
- taskBegin = timer_monotonic();
- numYields = 0;
- @@ -742,7 +746,10 @@ void Net2::checkForSlowTask(int64_t tscBegin, int64_t tscEnd, double duration, i
- }
- bool Net2::check_yield( int taskID, bool isRunLoop ) {
- - if(!isRunLoop && numYields > 0) {
- + int64_t virtual_timer_value;
- + asm volatile("mrs %0, cntvct_el0" : "=r"(virtual_timer_value));
- +
- + if(!isRunLoop && numYields > 0) {
- ++numYields;
- return true;
- }
- @@ -760,7 +767,7 @@ bool Net2::check_yield( int taskID, bool isRunLoop ) {
- }
- // SOMEDAY: Yield if there are lots of higher priority tasks queued?
- - int64_t tsc_now = __rdtsc();
- + int64_t tsc_now = virtual_timer_value;
- double newTaskBegin = timer_monotonic();
- if (tsc_now < tsc_begin) {
- return true;
- diff --git a/flow/Platform.h b/flow/Platform.h
- index 938e360..7775e55 100644
- --- a/flow/Platform.h
- +++ b/flow/Platform.h
- @@ -370,7 +370,8 @@ dev_t getDeviceId(std::string path);
- #endif
- #ifdef __linux__
- -#include <x86intrin.h>
- +//#include <x86intrin.h>
- +#include "SSE2NEON.h"
- #include <features.h>
- #include <sys/stat.h>
- #endif
- @@ -397,7 +398,7 @@ inline static int64_t interlockedExchangeAdd64(volatile int64_t *a, int64_t b) {
- inline static int64_t interlockedExchange64(volatile int64_t *a, int64_t b) { return _InterlockedExchange64(a, b); }
- inline static int64_t interlockedOr64(volatile int64_t *a, int64_t b) { return _InterlockedOr64(a, b); }
- #elif defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8)
- -#include <xmmintrin.h>
- +//#include <xmmintrin.h>
- inline static int32_t interlockedIncrement(volatile int32_t *a) { return __sync_add_and_fetch(a, 1); }
- inline static int64_t interlockedIncrement64(volatile int64_t *a) { return __sync_add_and_fetch(a, 1); }
- inline static int32_t interlockedDecrement(volatile int32_t *a) { return __sync_add_and_fetch(a, -1); }
- diff --git a/flow/ThreadPrimitives.h b/flow/ThreadPrimitives.h
- index d59908a..2908c8c 100644
- --- a/flow/ThreadPrimitives.h
- +++ b/flow/ThreadPrimitives.h
- @@ -27,6 +27,7 @@
- #ifdef __linux__
- #include <semaphore.h>
- +# define cpu_relax() asm volatile("yield" ::: "memory")
- #endif
- #ifdef __APPLE__
- @@ -57,7 +58,7 @@ public:
- }
- void enter() {
- while (interlockedCompareExchange(&isLocked, 1, 0) == 1)
- - _mm_pause();
- + cpu_relax();
- #if VALGRIND
- ANNOTATE_RWLOCK_ACQUIRED(this, true);
- #endif
Add Comment
Please, Sign In to add comment