Advertisement
Guest User

stockfish optimizations

a guest
May 21st, 2018
114
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 15.74 KB | None | 0 0
  1. Using FishBench.exe (https://github.com/zardav/FishBench)
  2.  
  3. Results for 123 tests for each version:
  4.  
  5. Base Test Diff
  6. Mean 1191850 1371328 -179478
  7. StDev 78998 79664 15694
  8.  
  9. p-value: 1
  10. speedup: 0,151
  11.  
  12. ----------------------------------------
  13.  
  14. Without really knowing what I did :o , I looked in the documentation of gcc to find some exotic optimizations:
  15.  
  16. mk.sh:
  17. ------
  18.  
  19. #!/bin/sh
  20. make -C src clean profile-build ARCH=x86-64-modern COMP=gcc \
  21. MYFLAGS="-pipe -Ofast -fomit-frame-pointer \
  22. -fira-loop-pressure -fira-region=all \
  23. -funsafe-loop-optimizations -fmodulo-sched -fsched-pressure \
  24. -fsched-spec-load-dangerous -fsched-stalled-insns=0 \
  25. -fsched2-use-superblocks -floop-nest-optimize \
  26. -fipa-profile -fipa-pta -fgraphite-identity -msse4a -m3dnowa -mpopcnt \
  27. -fmodulo-sched-allow-regmoves -fno-reschedule-modulo-scheduled-loops \
  28. --param max-modulo-backtrack-attempts=123456 \
  29. --param graphite-max-nb-scop-params=0 \
  30. --param graphite-max-bbs-per-function=0 -finline-limit=12345678" \
  31. native=yes lto=yes -j
  32. # sfdir points to the appropriate Arena-subdirectory
  33. # '/cygdrive/c/Program\ Files\ \(x86\)/Arena/Engines/Stockfish/'
  34. test -f src/stockfish.exe && install -s src/stockfish.exe sfdir/stockfish-gcc.exe
  35. # rm -f src/stockfish.exe
  36.  
  37. ----------------------------------------
  38.  
  39. Makefile:
  40. ---------
  41.  
  42. # Stockfish, a UCI chess playing engine derived from Glaurung 2.1
  43. # Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
  44. # Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
  45. # Copyright (C) 2015-2018 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
  46. #
  47. # Stockfish is free software: you can redistribute it and/or modify
  48. # it under the terms of the GNU General Public License as published by
  49. # the Free Software Foundation, either version 3 of the License, or
  50. # (at your option) any later version.
  51. #
  52. # Stockfish is distributed in the hope that it will be useful,
  53. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  54. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  55. # GNU General Public License for more details.
  56. #
  57. # You should have received a copy of the GNU General Public License
  58. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  59.  
  60.  
  61. ### ==========================================================================
  62. ### Section 1. General Configuration
  63. ### ==========================================================================
  64.  
  65. ### Executable name
  66. ifeq ($(COMP),mingw)
  67. EXE = stockfish.exe
  68. else
  69. EXE = stockfish
  70. endif
  71.  
  72. ### Installation dir definitions
  73. PREFIX = /usr/local
  74. BINDIR = $(PREFIX)/bin
  75.  
  76. ### Built-in benchmark for pgo-builds
  77. PGOBENCH = ./$(EXE) bench
  78.  
  79. ### Object files
  80. OBJS = benchmark.o bitbase.o bitboard.o endgame.o evaluate.o main.o \
  81. material.o misc.o movegen.o movepick.o pawns.o position.o psqt.o \
  82. search.o thread.o timeman.o tt.o uci.o ucioption.o syzygy/tbprobe.o
  83.  
  84. ### Establish the operating system name
  85. KERNEL = $(shell uname -s)
  86. ifeq ($(KERNEL),Linux)
  87. OS = $(shell uname -o)
  88. endif
  89.  
  90. ### ==========================================================================
  91. ### Section 2. High-level Configuration
  92. ### ==========================================================================
  93. #
  94. # flag --- Comp switch --- Description
  95. # ----------------------------------------------------------------------------
  96. #
  97. # debug = yes/no --- -DNDEBUG --- Enable/Disable debug mode
  98. # sanitize = undefined/thread/no (-fsanitize )
  99. # --- ( undefined ) --- enable undefined behavior checks
  100. # --- ( thread ) --- enable threading error checks
  101. # optimize = yes/no --- (-O3/-fast etc.) --- Enable/Disable optimizations
  102. # arch = (name) --- (-arch) --- Target architecture
  103. # bits = 64/32 --- -DIS_64BIT --- 64-/32-bit operating system
  104. # native = yes/no --- (-march=native -mtune=native)
  105. # prefetch = yes/no --- -DUSE_PREFETCH --- Use prefetch asm-instruction
  106. # popcnt = yes/no --- -DUSE_POPCNT --- Use popcnt asm-instruction
  107. # sse = yes/no --- -msse --- Use Intel Streaming SIMD Extensions
  108. # pext = yes/no --- -DUSE_PEXT --- Use pext x86_64 asm-instruction
  109. #
  110. # Note that Makefile is space sensitive, so when adding new architectures
  111. # or modifying existing flags, you have to make sure there are no extra spaces
  112. # at the end of the line for flag values.
  113.  
  114. ### 2.1. General and architecture defaults
  115. optimize = yes
  116. debug = no
  117. sanitize = no
  118. bits = 32
  119. prefetch = no
  120. popcnt = no
  121. sse = no
  122. pext = no
  123. native = no
  124. lto=no
  125.  
  126. ### 2.2 Architecture specific
  127.  
  128. ifeq ($(ARCH),general-32)
  129. arch = any
  130. endif
  131.  
  132. ifeq ($(ARCH),x86-32-old)
  133. arch = i386
  134. endif
  135.  
  136. ifeq ($(ARCH),x86-32)
  137. arch = i386
  138. prefetch = yes
  139. sse = yes
  140. endif
  141.  
  142. ifeq ($(ARCH),general-64)
  143. arch = any
  144. bits = 64
  145. endif
  146.  
  147. ifeq ($(ARCH),x86-64)
  148. arch = x86_64
  149. bits = 64
  150. prefetch = yes
  151. sse = yes
  152. endif
  153.  
  154. ifeq ($(ARCH),x86-64-modern)
  155. arch = x86_64
  156. bits = 64
  157. prefetch = yes
  158. popcnt = yes
  159. sse = yes
  160. endif
  161.  
  162. ifeq ($(ARCH),x86-64-bmi2)
  163. arch = x86_64
  164. bits = 64
  165. prefetch = yes
  166. popcnt = yes
  167. sse = yes
  168. pext = yes
  169. endif
  170.  
  171. ifeq ($(ARCH),armv7)
  172. arch = armv7
  173. prefetch = yes
  174. endif
  175.  
  176. ifeq ($(ARCH),ppc-32)
  177. arch = ppc
  178. endif
  179.  
  180. ifeq ($(ARCH),ppc-64)
  181. arch = ppc64
  182. bits = 64
  183. endif
  184.  
  185.  
  186. ### ==========================================================================
  187. ### Section 3. Low-level configuration
  188. ### ==========================================================================
  189.  
  190. ### 3.1 Selecting compiler (default = gcc)
  191.  
  192. CXXFLAGS += -Wall -Wcast-qual -fno-exceptions -std=c++11 $(EXTRACXXFLAGS)
  193. DEPENDFLAGS += -std=c++11
  194. LDFLAGS += $(EXTRALDFLAGS)
  195.  
  196. ifeq ($(COMP),)
  197. COMP=gcc
  198. endif
  199.  
  200. ifeq ($(COMP),gcc)
  201. comp=gcc
  202. CXX=g++
  203. CXXFLAGS += -pedantic -Wextra -Wshadow
  204.  
  205. ifeq ($(ARCH),armv7)
  206. ifeq ($(OS),Android)
  207. CXXFLAGS += -m$(bits)
  208. LDFLAGS += -m$(bits)
  209. endif
  210. else
  211. CXXFLAGS += -m$(bits)
  212. LDFLAGS += -m$(bits)
  213. endif
  214.  
  215. ifneq ($(KERNEL),Darwin)
  216. LDFLAGS += -Wl,--no-as-needed
  217. endif
  218. endif
  219.  
  220. ifeq ($(COMP),mingw)
  221. comp=mingw
  222.  
  223. ifeq ($(KERNEL),Linux)
  224. ifeq ($(bits),64)
  225. ifeq ($(shell which x86_64-w64-mingw32-c++-posix),)
  226. CXX=x86_64-w64-mingw32-c++
  227. else
  228. CXX=x86_64-w64-mingw32-c++-posix
  229. endif
  230. else
  231. ifeq ($(shell which i686-w64-mingw32-c++-posix),)
  232. CXX=i686-w64-mingw32-c++
  233. else
  234. CXX=i686-w64-mingw32-c++-posix
  235. endif
  236. endif
  237. else
  238. CXX=g++
  239. endif
  240.  
  241. CXXFLAGS += -Wextra -Wshadow
  242. LDFLAGS += -static
  243. endif
  244.  
  245. ifeq ($(COMP),icc)
  246. comp=icc
  247. CXX=icpc
  248. CXXFLAGS += -diag-disable 1476,10120 -Wcheck -Wabi -Wdeprecated -strict-ansi
  249. endif
  250.  
  251. ifeq ($(COMP),clang)
  252. comp=clang
  253. CXX=clang++
  254. CXXFLAGS += -pedantic -Wextra -Wshadow
  255.  
  256. ifneq ($(KERNEL),Darwin)
  257. ifneq ($(KERNEL),OpenBSD)
  258. LDFLAGS += -latomic
  259. endif
  260. endif
  261.  
  262. ifeq ($(ARCH),armv7)
  263. ifeq ($(OS),Android)
  264. CXXFLAGS += -m$(bits)
  265. LDFLAGS += -m$(bits)
  266. endif
  267. else
  268. CXXFLAGS += -m$(bits)
  269. LDFLAGS += -m$(bits)
  270. endif
  271. endif
  272.  
  273. ifeq ($(comp),icc)
  274. profile_make = icc-profile-make
  275. profile_use = icc-profile-use
  276. else
  277. ifeq ($(comp),clang)
  278. profile_make = clang-profile-make
  279. profile_use = clang-profile-use
  280. else
  281. profile_make = gcc-profile-make
  282. profile_use = gcc-profile-use
  283. endif
  284. endif
  285.  
  286. ifeq ($(KERNEL),Darwin)
  287. CXXFLAGS += -arch $(arch) -mmacosx-version-min=10.9
  288. LDFLAGS += -arch $(arch) -mmacosx-version-min=10.9
  289. endif
  290.  
  291. ### Travis CI script uses COMPILER to overwrite CXX
  292. ifdef COMPILER
  293. COMPCXX=$(COMPILER)
  294. endif
  295.  
  296. ### Allow overwriting CXX from command line
  297. ifdef COMPCXX
  298. CXX=$(COMPCXX)
  299. endif
  300.  
  301. ### On mingw use Windows threads, otherwise POSIX
  302. ifneq ($(comp),mingw)
  303. # On Android Bionic's C library comes with its own pthread implementation bundled in
  304. ifneq ($(OS),Android)
  305. # Haiku has pthreads in its libroot, so only link it in on other platforms
  306. ifneq ($(KERNEL),Haiku)
  307. LDFLAGS += -lpthread
  308. endif
  309. endif
  310. endif
  311.  
  312. ### 3.2.1 Debugging
  313. ifeq ($(debug),no)
  314. CXXFLAGS += -DNDEBUG
  315. else
  316. CXXFLAGS += -g
  317. endif
  318.  
  319. ### 3.2.2 Debugging with undefined behavior sanitizers
  320. ifneq ($(sanitize),no)
  321. CXXFLAGS += -g3 -fsanitize=$(sanitize) -fuse-ld=gold
  322. LDFLAGS += -fsanitize=$(sanitize) -fuse-ld=gold
  323. endif
  324.  
  325. ### 3.3 Optimization
  326. ifeq ($(optimize),yes)
  327.  
  328. CXXFLAGS += -Ofast
  329.  
  330. ifeq ($(comp),gcc)
  331. ifeq ($(OS), Android)
  332. CXXFLAGS += -fno-gcse -mthumb -march=armv7-a -mfloat-abi=softfp
  333. endif
  334. endif
  335.  
  336. ifeq ($(comp),$(filter $(comp),gcc clang icc))
  337. ifeq ($(KERNEL),Darwin)
  338. CXXFLAGS += -mdynamic-no-pic
  339. endif
  340. endif
  341. endif
  342.  
  343. ### 3.4 Bits
  344. ifeq ($(bits),64)
  345. CXXFLAGS += -DIS_64BIT
  346. endif
  347.  
  348. ### 3.5 prefetch
  349. ifeq ($(prefetch),yes)
  350. ifeq ($(sse),yes)
  351. CXXFLAGS += -msse
  352. DEPENDFLAGS += -msse
  353. endif
  354. else
  355. CXXFLAGS += -DNO_PREFETCH
  356. endif
  357.  
  358. ### 3.6 popcnt
  359. ifeq ($(popcnt),yes)
  360. ifeq ($(comp),icc)
  361. CXXFLAGS += -msse3 -DUSE_POPCNT
  362. else
  363. CXXFLAGS += -msse3 -mpopcnt -DUSE_POPCNT
  364. endif
  365. endif
  366.  
  367. ### 3.7 pext
  368. ifeq ($(pext),yes)
  369. CXXFLAGS += -DUSE_PEXT
  370. ifeq ($(comp),$(filter $(comp),gcc clang mingw))
  371. CXXFLAGS += -mbmi2
  372. endif
  373. endif
  374.  
  375. ### 3.8 Link Time Optimization, it works since gcc 4.5 but not on mingw under Windows.
  376. ### This is a mix of compile and link time options because the lto link phase
  377. ### needs access to the optimization flags.
  378. ifeq ($(optimize),yes)
  379.  
  380. ifeq ($(debug), no)
  381. ifeq ($(comp),$(filter $(comp),gcc clang))
  382. ifeq ($(lto),yes)
  383. CXXFLAGS += -flto
  384. LDFLAGS += -flto
  385. endif
  386. endif
  387.  
  388. ifeq ($(comp),mingw)
  389. ifeq ($(KERNEL),Linux)
  390. ifeq ($(lto),yes)
  391. CXXFLAGS += -flto
  392. LDFLAGS += -flto
  393. endif
  394. endif
  395. endif
  396. ifeq ($(native),yes)
  397. CXXFLAGS += -march=native -mtune=native
  398. LDFLAGS += -march=native -mtune=native
  399. endif
  400. ifneq ($(MYFLAGS),)
  401. CXXFLAGS +=$(MYFLAGS)
  402. LDFLAGS += $(MYFLAGS)
  403. endif
  404. endif
  405. endif
  406.  
  407. ### 3.9 Android 5 can only run position independent executables. Note that this
  408. ### breaks Android 4.0 and earlier.
  409. ifeq ($(OS), Android)
  410. CXXFLAGS += -fPIE
  411. LDFLAGS += -fPIE -pie
  412. endif
  413.  
  414.  
  415. ### ==========================================================================
  416. ### Section 4. Public targets
  417. ### ==========================================================================
  418.  
  419. help:
  420. @echo ""
  421. @echo "To compile stockfish, type: "
  422. @echo ""
  423. @echo "make target ARCH=arch [COMP=compiler] [COMPCXX=cxx]"
  424. @echo ""
  425. @echo "Supported targets:"
  426. @echo ""
  427. @echo "build > Standard build"
  428. @echo "profile-build > PGO build"
  429. @echo "strip > Strip executable"
  430. @echo "install > Install executable"
  431. @echo "clean > Clean up"
  432. @echo ""
  433. @echo "Supported archs:"
  434. @echo ""
  435. @echo "x86-64 > x86 64-bit"
  436. @echo "x86-64-modern > x86 64-bit with popcnt support"
  437. @echo "x86-64-bmi2 > x86 64-bit with pext support"
  438. @echo "x86-32 > x86 32-bit with SSE support"
  439. @echo "x86-32-old > x86 32-bit fall back for old hardware"
  440. @echo "ppc-64 > PPC 64-bit"
  441. @echo "ppc-32 > PPC 32-bit"
  442. @echo "armv7 > ARMv7 32-bit"
  443. @echo "general-64 > unspecified 64-bit"
  444. @echo "general-32 > unspecified 32-bit"
  445. @echo ""
  446. @echo "Supported compilers:"
  447. @echo ""
  448. @echo "gcc > Gnu compiler (default)"
  449. @echo "mingw > Gnu compiler with MinGW under Windows"
  450. @echo "clang > LLVM Clang compiler"
  451. @echo "icc > Intel compiler"
  452. @echo ""
  453. @echo "Simple examples. If you don't know what to do, you likely want to run: "
  454. @echo ""
  455. @echo "make build ARCH=x86-64 (This is for 64-bit systems)"
  456. @echo "make build ARCH=x86-32 (This is for 32-bit systems)"
  457. @echo ""
  458. @echo "Advanced examples, for experienced users: "
  459. @echo ""
  460. @echo "make build ARCH=x86-64 COMP=clang"
  461. @echo "make profile-build ARCH=x86-64-modern COMP=gcc COMPCXX=g++-4.8"
  462. @echo ""
  463.  
  464.  
  465. .PHONY: help build profile-build strip install clean objclean profileclean help \
  466. config-sanity icc-profile-use icc-profile-make gcc-profile-use gcc-profile-make \
  467. clang-profile-use clang-profile-make
  468.  
  469. build: config-sanity
  470. $(MAKE) ARCH=$(ARCH) COMP=$(COMP) all
  471.  
  472. profile-build: config-sanity objclean profileclean
  473. @echo ""
  474. @echo "Step 1/4. Building instrumented executable ..."
  475. $(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_make)
  476. @echo ""
  477. @echo "Step 2/4. Running benchmark for pgo-build ..."
  478. $(PGOBENCH) > /dev/null
  479. @echo ""
  480. @echo "Step 3/4. Building optimized executable ..."
  481. $(MAKE) ARCH=$(ARCH) COMP=$(COMP) objclean
  482. $(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_use)
  483. @echo ""
  484. @echo "Step 4/4. Deleting profile data ..."
  485. $(MAKE) ARCH=$(ARCH) COMP=$(COMP) profileclean
  486.  
  487. strip:
  488. strip $(EXE)
  489.  
  490. install:
  491. -mkdir -p -m 755 $(BINDIR)
  492. -cp $(EXE) $(BINDIR)
  493. -strip $(BINDIR)/$(EXE)
  494.  
  495. #clean all
  496. clean: objclean profileclean
  497. @rm -f .depend *~ core
  498.  
  499. # clean binaries and objects
  500. objclean:
  501. @rm -f $(EXE) $(EXE).exe *.o ./syzygy/*.o
  502.  
  503. # clean auxiliary profiling files
  504. profileclean:
  505. @rm -rf profdir
  506. @rm -f bench.txt *.gcda ./syzygy/*.gcda *.gcno ./syzygy/*.gcno
  507. @rm -f stockfish.profdata *.profraw
  508.  
  509. default:
  510. help
  511.  
  512. ### ==========================================================================
  513. ### Section 5. Private targets
  514. ### ==========================================================================
  515.  
  516. all: $(EXE) .depend
  517.  
  518. config-sanity:
  519. @echo ""
  520. @echo "Config:"
  521. @echo "debug: '$(debug)'"
  522. @echo "sanitize: '$(sanitize)'"
  523. @echo "optimize: '$(optimize)'"
  524. @echo "arch: '$(arch)'"
  525. @echo "bits: '$(bits)'"
  526. @echo "kernel: '$(KERNEL)'"
  527. @echo "os: '$(OS)'"
  528. @echo "prefetch: '$(prefetch)'"
  529. @echo "popcnt: '$(popcnt)'"
  530. @echo "sse: '$(sse)'"
  531. @echo "pext: '$(pext)'"
  532. @echo ""
  533. @echo "Flags:"
  534. @echo "CXX: $(CXX)"
  535. @echo "CXXFLAGS: $(CXXFLAGS)"
  536. @echo "LDFLAGS: $(LDFLAGS)"
  537. @echo ""
  538. @echo "Testing config sanity. If this fails, try 'make help' ..."
  539. @echo ""
  540. @test "$(debug)" = "yes" || test "$(debug)" = "no"
  541. @test "$(sanitize)" = "undefined" || test "$(sanitize)" = "thread" || test "$(sanitize)" = "no"
  542. @test "$(optimize)" = "yes" || test "$(optimize)" = "no"
  543. @test "$(arch)" = "any" || test "$(arch)" = "x86_64" || test "$(arch)" = "i386" || \
  544. test "$(arch)" = "ppc64" || test "$(arch)" = "ppc" || test "$(arch)" = "armv7"
  545. @test "$(bits)" = "32" || test "$(bits)" = "64"
  546. @test "$(prefetch)" = "yes" || test "$(prefetch)" = "no"
  547. @test "$(popcnt)" = "yes" || test "$(popcnt)" = "no"
  548. @test "$(sse)" = "yes" || test "$(sse)" = "no"
  549. @test "$(pext)" = "yes" || test "$(pext)" = "no"
  550. @test "$(comp)" = "gcc" || test "$(comp)" = "icc" || test "$(comp)" = "mingw" || test "$(comp)" = "clang"
  551.  
  552. $(EXE): $(OBJS)
  553. $(CXX) -o $@ $(OBJS) $(LDFLAGS)
  554.  
  555. clang-profile-make:
  556. $(MAKE) ARCH=$(ARCH) COMP=$(COMP) \
  557. EXTRACXXFLAGS='-fprofile-instr-generate ' \
  558. EXTRALDFLAGS=' -fprofile-instr-generate' \
  559. all
  560.  
  561. clang-profile-use:
  562. llvm-profdata merge -output=stockfish.profdata *.profraw
  563. $(MAKE) ARCH=$(ARCH) COMP=$(COMP) \
  564. EXTRACXXFLAGS='-fprofile-instr-use=stockfish.profdata' \
  565. EXTRALDFLAGS='-fprofile-use ' \
  566. all
  567.  
  568. gcc-profile-make:
  569. $(MAKE) ARCH=$(ARCH) COMP=$(COMP) \
  570. EXTRACXXFLAGS='-fprofile-generate' \
  571. EXTRALDFLAGS='-lgcov' \
  572. all
  573.  
  574. gcc-profile-use:
  575. $(MAKE) ARCH=$(ARCH) COMP=$(COMP) \
  576. EXTRACXXFLAGS='-fprofile-use -fno-peel-loops -fno-tracer' \
  577. EXTRALDFLAGS='-lgcov' \
  578. all
  579.  
  580. icc-profile-make:
  581. @mkdir -p profdir
  582. $(MAKE) ARCH=$(ARCH) COMP=$(COMP) \
  583. EXTRACXXFLAGS='-prof-gen=srcpos -prof_dir ./profdir' \
  584. all
  585.  
  586. icc-profile-use:
  587. $(MAKE) ARCH=$(ARCH) COMP=$(COMP) \
  588. EXTRACXXFLAGS='-prof_use -prof_dir ./profdir' \
  589. all
  590.  
  591. .depend:
  592. -@$(CXX) $(DEPENDFLAGS) -MM $(OBJS:.o=.cpp) > $@ 2> /dev/null
  593.  
  594. -include .depend
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement