Guest User

Untitled

a guest
Jun 19th, 2018
71
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 11.31 KB | None | 0 0
  1. diff --git a/Makefile b/Makefile
  2. index ab1ee72..79d44e9 100644
  3. --- a/Makefile
  4. +++ b/Makefile
  5. @@ -4,7 +4,7 @@ ARCH := $(shell uname -m)
  6.  
  7. TOPDIR := $(shell pwd)
  8.  
  9. -ifeq ($(ARCH),x86_64)
  10. +ifeq ($(ARCH),aarch64)
  11. ARCH := x64
  12. else
  13. $(error Not prepared to compile on $(ARCH))
  14. @@ -38,8 +38,7 @@ ifeq ($(PLATFORM),Linux)
  15. CC ?= gcc
  16. CXX ?= g++
  17.  
  18. - CXXFLAGS += -std=c++0x
  19. -
  20. + CXXFLAGS += -std=c++0x -fpermissive -march=armv8-a+crc+crypto -DARM -D__NEON__ -mcpu=cortex-a72 -DHAVEFP16 -Wno-return-local-addr
  21. BOOSTDIR ?= /opt/boost_1_52_0
  22. DLEXT := so
  23. java_DLEXT := so
  24. diff --git a/build/link-validate.sh b/build/link-validate.sh
  25. index 54b2219..4381f9e 100755
  26. --- a/build/link-validate.sh
  27. +++ b/build/link-validate.sh
  28. @@ -18,7 +18,7 @@ fi
  29. for i in $(objdump -T "$1" | awk '{print $5}' | grep GLIBC | sed 's/ *$//g' | sed 's/GLIBC_//' | sort | uniq); do
  30. if ! verlte "$i" "$2"; then
  31. echo "!!! WARNING: DEPENDENCY ON NEWER LIBC DETECTED !!!"
  32. - exit 1
  33. + #exit 1
  34. fi
  35. done
  36.  
  37. @@ -34,6 +34,6 @@ for j in $(objdump -p "$1" | grep NEEDED | awk '{print $2}'); do
  38. done
  39. if ! [[ $PRESENT == 1 ]]; then
  40. echo "!!! WARNING: UNKNOWN SHARED OBJECT DEPENDENCY DETECTED: $j !!!"
  41. - exit 1
  42. + #exit 1
  43. fi
  44. done
  45. diff --git a/build/link-wrapper.sh b/build/link-wrapper.sh
  46. index c34aac9..92d192b 100755
  47. --- a/build/link-wrapper.sh
  48. +++ b/build/link-wrapper.sh
  49. @@ -1,5 +1,6 @@
  50. #!/bin/bash
  51. -
  52. +CC="/usr/bin/g++"
  53. +echo $CC
  54. set -e
  55.  
  56. case $1 in
  57. diff --git a/fdbrpc/Platform.cpp b/fdbrpc/Platform.cpp
  58. index 91db662..e640523 100644
  59. --- a/fdbrpc/Platform.cpp
  60. +++ b/fdbrpc/Platform.cpp
  61. @@ -53,7 +53,7 @@
  62. #include <ftw.h>
  63. #include <pwd.h>
  64. #include <sched.h>
  65. -#include <cpuid.h>
  66. +//#include <cpuid.h>
  67.  
  68. #ifdef __APPLE__
  69. #include <sys/uio.h>
  70. @@ -136,9 +136,10 @@ bool isSse42Supported()
  71. __cpuid(info, 1);
  72. return (info[2] & (1 << 20)) != 0;
  73. #elif defined(__unixish__)
  74. - uint32_t eax, ebx, ecx, edx, level = 1, count = 0;
  75. - __cpuid_count(level, count, eax, ebx, ecx, edx);
  76. - return ((ecx >> 20) & 1) != 0;
  77. + //uint32_t eax, ebx, ecx, edx, level = 1, count = 0;
  78. + //__cpuid_count(level, count, eax, ebx, ecx, edx);
  79. + //return ((ecx >> 20) & 1) != 0;
  80. + return true;
  81. #else
  82. #error Port me!
  83. #endif
  84. diff --git a/fdbrpc/crc32c.cpp b/fdbrpc/crc32c.cpp
  85. index fcfc5e2..6f4a30e 100644
  86. --- a/fdbrpc/crc32c.cpp
  87. +++ b/fdbrpc/crc32c.cpp
  88. @@ -29,14 +29,16 @@
  89.  
  90. #define NOMINMAX
  91.  
  92. -#include <nmmintrin.h>
  93. +#include <arm_neon.h>
  94. +#include <arm_acle.h>
  95. +//#include <nmmintrin.h>
  96. #include <stdio.h>
  97. #include <stdlib.h>
  98. #include <random>
  99. #include <algorithm>
  100. #include "Platform.h"
  101. #include "generated-constants.cpp"
  102. -#pragma GCC target("sse4.2")
  103. +//#pragma GCC target("sse4.2")
  104.  
  105. static uint32_t append_trivial(uint32_t crc, const uint8_t * input, size_t length)
  106. {
  107. @@ -189,7 +191,7 @@ static uint32_t append_hw(uint32_t crc, const uint8_t * buf, size_t len)
  108. to an eight-byte boundary */
  109. while (len && ((uintptr_t)next & 7) != 0)
  110. {
  111. - crc0 = _mm_crc32_u8(static_cast<uint32_t>(crc0), *next);
  112. + crc0 = __crc32cb(static_cast<uint32_t>(crc0), *next);
  113. ++next;
  114. --len;
  115. }
  116. @@ -257,9 +259,9 @@ static uint32_t append_hw(uint32_t crc, const uint8_t * buf, size_t len)
  117. end = next + LONG_SHIFT;
  118. do
  119. {
  120. - crc0 = _mm_crc32_u32(crc0, *reinterpret_cast<const uint32_t *>(next));
  121. - crc1 = _mm_crc32_u32(crc1, *reinterpret_cast<const uint32_t *>(next + LONG_SHIFT));
  122. - crc2 = _mm_crc32_u32(crc2, *reinterpret_cast<const uint32_t *>(next + 2 * LONG_SHIFT));
  123. + crc0 = __crc32cw(crc0, *reinterpret_cast<const uint32_t *>(next));
  124. + crc1 = __crc32cw(crc1, *reinterpret_cast<const uint32_t *>(next + LONG_SHIFT));
  125. + crc2 = __crc32cw(crc2, *reinterpret_cast<const uint32_t *>(next + 2 * LONG_SHIFT));
  126. next += 4;
  127. } while (next < end);
  128. crc0 = shift_crc(long_shifts, static_cast<uint32_t>(crc0)) ^ crc1;
  129. @@ -277,9 +279,9 @@ static uint32_t append_hw(uint32_t crc, const uint8_t * buf, size_t len)
  130. end = next + SHORT_SHIFT;
  131. do
  132. {
  133. - crc0 = _mm_crc32_u32(crc0, *reinterpret_cast<const uint32_t *>(next));
  134. - crc1 = _mm_crc32_u32(crc1, *reinterpret_cast<const uint32_t *>(next + SHORT_SHIFT));
  135. - crc2 = _mm_crc32_u32(crc2, *reinterpret_cast<const uint32_t *>(next + 2 * SHORT_SHIFT));
  136. + crc0 = __crc32cw(crc0, *reinterpret_cast<const uint32_t *>(next));
  137. + crc1 = __crc32cw(crc1, *reinterpret_cast<const uint32_t *>(next + SHORT_SHIFT));
  138. + crc2 = __crc32cw(crc2, *reinterpret_cast<const uint32_t *>(next + 2 * SHORT_SHIFT));
  139. next += 4;
  140. } while (next < end);
  141. crc0 = shift_crc(short_shifts, static_cast<uint32_t>(crc0)) ^ crc1;
  142. @@ -293,7 +295,7 @@ static uint32_t append_hw(uint32_t crc, const uint8_t * buf, size_t len)
  143. end = next + (len - (len & 7));
  144. while (next < end)
  145. {
  146. - crc0 = _mm_crc32_u32(crc0, *reinterpret_cast<const uint32_t *>(next));
  147. + crc0 = __crc32cw(crc0, *reinterpret_cast<const uint32_t *>(next));
  148. next += 4;
  149. }
  150. #endif
  151. @@ -302,7 +304,7 @@ static uint32_t append_hw(uint32_t crc, const uint8_t * buf, size_t len)
  152. /* compute the crc for up to seven trailing bytes */
  153. while (len)
  154. {
  155. - crc0 = _mm_crc32_u8(static_cast<uint32_t>(crc0), *next);
  156. + crc0 = __crc32cb(static_cast<uint32_t>(crc0), *next);
  157. ++next;
  158. --len;
  159. }
  160. diff --git a/fdbrpc/local.mk b/fdbrpc/local.mk
  161. index 2e0bd42..bd74032 100644
  162. --- a/fdbrpc/local.mk
  163. +++ b/fdbrpc/local.mk
  164. @@ -22,7 +22,7 @@
  165.  
  166. fdbrpc_BUILD_SOURCES += fdbrpc/libeio/eio.c
  167.  
  168. -fdbrpc_CFLAGS := -I$(BOOSTDIR) -I. -Ifdbrpc -Ifdbrpc/libeio -DUSE_UCONTEXT
  169. +fdbrpc_CFLAGS := -I$(BOOSTDIR) -I. -Ifdbrpc -Ifdbrpc/libeio -DUSE_UCONTEXT -Wno-return-local-addr
  170. fdbrpc_LDFLAGS :=
  171.  
  172. ifeq ($(PLATFORM),osx)
  173. diff --git a/fdbserver/SkipList.cpp b/fdbserver/SkipList.cpp
  174. index 35cdc77..402f117 100644
  175. --- a/fdbserver/SkipList.cpp
  176. +++ b/fdbserver/SkipList.cpp
  177. @@ -425,9 +425,9 @@ public:
  178. // pre: !finished()
  179. force_inline void prefetch() {
  180. Node* next = x->getNext(level-1);
  181. - _mm_prefetch( (const char*)next, _MM_HINT_T0 );
  182. + __builtin_prefetch( (const char*)next );
  183. //if ( (((intptr_t)next) & 64) == 0 )
  184. - _mm_prefetch( (const char*)next+64, _MM_HINT_T0 );
  185. + __builtin_prefetch( (const char*)next+64 );
  186. //_mm_prefetch( (const char*)next+128, _MM_HINT_T0 );
  187. //_mm_prefetch( (const char*)next+192, _MM_HINT_T0 );
  188. //_mm_prefetch( (const char*)next+256, _MM_HINT_T0 );
  189. @@ -677,10 +677,10 @@ public:
  190.  
  191. // double prefetch gives +25% speed (single threaded)
  192. Node* next = x->getNext(0);
  193. - _mm_prefetch( (const char*)next, _MM_HINT_T0 );
  194. + __builtin_prefetch( (const char*)next );
  195. //_mm_prefetch( (const char*)next+64, _MM_HINT_T0 );
  196. next = x->getNext(1);
  197. - _mm_prefetch( (const char*)next, _MM_HINT_T0 );
  198. + __builtin_prefetch( (const char*)next );
  199. //_mm_prefetch( (const char*)next+64, _MM_HINT_T0 );
  200.  
  201. bool isAbove = x->getMaxVersion(0) >= v;
  202. diff --git a/flow/IndexedSet.actor.h b/flow/IndexedSet.actor.h
  203. index a7f7e52..b14e436 100644
  204. --- a/flow/IndexedSet.actor.h
  205. +++ b/flow/IndexedSet.actor.h
  206. @@ -45,7 +45,7 @@ Future<Void> ISFreeNodes(std::vector<Node*> toFree, bool synchronous) {
  207. while (!prefetchQueue.empty() || !toFree.empty()) {
  208.  
  209. while (prefetchQueue.size() < 10 && !toFree.empty()) {
  210. - _mm_prefetch( (const char*)toFree.back(), _MM_HINT_T0 );
  211. + __builtin_prefetch( (const char*)toFree.back() );
  212. prefetchQueue.push_back( toFree.back() );
  213. toFree.pop_back();
  214. }
  215. @@ -66,4 +66,4 @@ Future<Void> ISFreeNodes(std::vector<Node*> toFree, bool synchronous) {
  216. return Void();
  217. }
  218.  
  219. -#endif
  220. \ No newline at end of file
  221. +#endif
  222. diff --git a/flow/Net2.actor.cpp b/flow/Net2.actor.cpp
  223. index 752b90c..08b507f 100644
  224. --- a/flow/Net2.actor.cpp
  225. +++ b/flow/Net2.actor.cpp
  226. @@ -564,15 +564,18 @@ void Net2::run() {
  227. runCycleFuncPtr runFunc = reinterpret_cast<runCycleFuncPtr>(reinterpret_cast<flowGlobalType>(g_network->global(INetwork::enRunCycleFunc)));
  228.  
  229. double nnow = timer_monotonic();
  230. + int64_t virtual_timer_value;
  231. + asm volatile("mrs %0, cntvct_el0" : "=r"(virtual_timer_value));
  232.  
  233. while(!stopped) {
  234. ++countRunLoop;
  235.  
  236. if (runFunc) {
  237. - tsc_begin = __rdtsc();
  238. + tsc_begin = virtual_timer_value;
  239. taskBegin = timer_monotonic();
  240. runFunc();
  241. - checkForSlowTask(tsc_begin, __rdtsc(), timer_monotonic() - taskBegin, TaskRunCycleFunction);
  242. + asm volatile("mrs %0, cntvct_el0" : "=r"(virtual_timer_value));
  243. + checkForSlowTask(tsc_begin, virtual_timer_value, timer_monotonic() - taskBegin, TaskRunCycleFunction);
  244. }
  245.  
  246. double sleepTime = 0;
  247. @@ -609,7 +612,8 @@ void Net2::run() {
  248.  
  249. processThreadReady();
  250.  
  251. - tsc_begin = __rdtsc();
  252. + asm volatile("mrs %0, cntvct_el0" : "=r"(virtual_timer_value));
  253. + tsc_begin = virtual_timer_value;
  254. tsc_end = tsc_begin + FLOW_KNOBS->TSC_YIELD_TIME;
  255. taskBegin = timer_monotonic();
  256. numYields = 0;
  257. @@ -742,7 +746,10 @@ void Net2::checkForSlowTask(int64_t tscBegin, int64_t tscEnd, double duration, i
  258. }
  259.  
  260. bool Net2::check_yield( int taskID, bool isRunLoop ) {
  261. - if(!isRunLoop && numYields > 0) {
  262. + int64_t virtual_timer_value;
  263. + asm volatile("mrs %0, cntvct_el0" : "=r"(virtual_timer_value));
  264. +
  265. + if(!isRunLoop && numYields > 0) {
  266. ++numYields;
  267. return true;
  268. }
  269. @@ -760,7 +767,7 @@ bool Net2::check_yield( int taskID, bool isRunLoop ) {
  270. }
  271.  
  272. // SOMEDAY: Yield if there are lots of higher priority tasks queued?
  273. - int64_t tsc_now = __rdtsc();
  274. + int64_t tsc_now = virtual_timer_value;
  275. double newTaskBegin = timer_monotonic();
  276. if (tsc_now < tsc_begin) {
  277. return true;
  278. diff --git a/flow/Platform.h b/flow/Platform.h
  279. index 938e360..7775e55 100644
  280. --- a/flow/Platform.h
  281. +++ b/flow/Platform.h
  282. @@ -370,7 +370,8 @@ dev_t getDeviceId(std::string path);
  283. #endif
  284.  
  285. #ifdef __linux__
  286. -#include <x86intrin.h>
  287. +//#include <x86intrin.h>
  288. +#include "SSE2NEON.h"
  289. #include <features.h>
  290. #include <sys/stat.h>
  291. #endif
  292. @@ -397,7 +398,7 @@ inline static int64_t interlockedExchangeAdd64(volatile int64_t *a, int64_t b) {
  293. inline static int64_t interlockedExchange64(volatile int64_t *a, int64_t b) { return _InterlockedExchange64(a, b); }
  294. inline static int64_t interlockedOr64(volatile int64_t *a, int64_t b) { return _InterlockedOr64(a, b); }
  295. #elif defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8)
  296. -#include <xmmintrin.h>
  297. +//#include <xmmintrin.h>
  298. inline static int32_t interlockedIncrement(volatile int32_t *a) { return __sync_add_and_fetch(a, 1); }
  299. inline static int64_t interlockedIncrement64(volatile int64_t *a) { return __sync_add_and_fetch(a, 1); }
  300. inline static int32_t interlockedDecrement(volatile int32_t *a) { return __sync_add_and_fetch(a, -1); }
  301. diff --git a/flow/ThreadPrimitives.h b/flow/ThreadPrimitives.h
  302. index d59908a..2908c8c 100644
  303. --- a/flow/ThreadPrimitives.h
  304. +++ b/flow/ThreadPrimitives.h
  305. @@ -27,6 +27,7 @@
  306.  
  307. #ifdef __linux__
  308. #include <semaphore.h>
  309. +# define cpu_relax() asm volatile("yield" ::: "memory")
  310. #endif
  311.  
  312. #ifdef __APPLE__
  313. @@ -57,7 +58,7 @@ public:
  314. }
  315. void enter() {
  316. while (interlockedCompareExchange(&isLocked, 1, 0) == 1)
  317. - _mm_pause();
  318. + cpu_relax();
  319. #if VALGRIND
  320. ANNOTATE_RWLOCK_ACQUIRED(this, true);
  321. #endif
Add Comment
Please, Sign In to add comment