Advertisement
Guest User

asdasdasd1

a guest
Jan 22nd, 2020
111
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 9.80 KB | None | 0 0
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": null,
  6. "metadata": {},
  7. "outputs": [],
  8. "source": [
  9. "using BenchmarkTools"
  10. ]
  11. },
  12. {
  13. "cell_type": "markdown",
  14. "metadata": {},
  15. "source": [
  16. "Original code:\n",
  17. "```C\n",
  18. "double mat[N][N], s[N][N], val;\n",
  19. "int i, j, v[N];\n",
  20. "//\n",
  21. "// ... v[] and s[][] may be assumed to contain valid data\n",
  22. "//\n",
  23. "for(i=0; i<N ; ++i) {\n",
  24. " for(j=0; j<N; ++j) {\n",
  25. " val = (double)(v[i] % 256);\n",
  26. " mat[j][i] = s[j][i]*(sin(val)*sin(val)-cos(val)*cos(val));\n",
  27. " }\n",
  28. "}\n",
  29. "```"
  30. ]
  31. },
  32. {
  33. "cell_type": "code",
  34. "execution_count": 50,
  35. "metadata": {},
  36. "outputs": [
  37. {
  38. "data": {
  39. "text/plain": [
  40. "work (generic function with 2 methods)"
  41. ]
  42. },
  43. "execution_count": 50,
  44. "metadata": {},
  45. "output_type": "execute_result"
  46. }
  47. ],
  48. "source": [
  49. "function work(mat, s, v, N)\n",
  50. " val = 0.0\n",
  51. " @inbounds for i in 1:N\n",
  52. " for j in 1:N\n",
  53. " val = mod(v[i],256);\n",
  54. " mat[i,j] = s[i,j]*(sin(val)*sin(val)-cos(val)*cos(val));\n",
  55. " end\n",
  56. " end;\n",
  57. "end"
  58. ]
  59. },
  60. {
  61. "cell_type": "code",
  62. "execution_count": 21,
  63. "metadata": {},
  64. "outputs": [
  65. {
  66. "data": {
  67. "text/plain": [
  68. "\u001b[32m\u001b[1mTest Passed\u001b[22m\u001b[39m"
  69. ]
  70. },
  71. "execution_count": 21,
  72. "metadata": {},
  73. "output_type": "execute_result"
  74. }
  75. ],
  76. "source": [
  77. "using Test\n",
  78. "x = rand()\n",
  79. "@test 1-2*cos(x)*cos(x) β‰ˆ sin(x)*sin(x)-cos(x)*cos(x)\n",
  80. "@test -cos(2*x) β‰ˆ sin(x)*sin(x)-cos(x)*cos(x)"
  81. ]
  82. },
  83. {
  84. "cell_type": "code",
  85. "execution_count": 51,
  86. "metadata": {},
  87. "outputs": [
  88. {
  89. "data": {
  90. "text/plain": [
  91. "opt1 (generic function with 2 methods)"
  92. ]
  93. },
  94. "execution_count": 51,
  95. "metadata": {},
  96. "output_type": "execute_result"
  97. }
  98. ],
  99. "source": [
  100. "# pulling out + analytical opt\n",
  101. "function opt1(mat, s, v, N)\n",
  102. " val = 0.0\n",
  103. " @inbounds for i in 1:N\n",
  104. " val = mod(v[i],256);\n",
  105. " val = -cos(2*val)\n",
  106. " for j in 1:N\n",
  107. " mat[i,j] = s[i,j]*val;\n",
  108. " end\n",
  109. " end;\n",
  110. " mat\n",
  111. "end"
  112. ]
  113. },
  114. {
  115. "cell_type": "code",
  116. "execution_count": 52,
  117. "metadata": {},
  118. "outputs": [
  119. {
  120. "data": {
  121. "text/plain": [
  122. "opt12 (generic function with 2 methods)"
  123. ]
  124. },
  125. "execution_count": 52,
  126. "metadata": {},
  127. "output_type": "execute_result"
  128. }
  129. ],
  130. "source": [
  131. "# pulling out + analytical opt + allocate values\n",
  132. "function opt12(mat, s, v, N)\n",
  133. " val = Vector{Float64}(undef, length(v))\n",
  134. " @inbounds for i in eachindex(val)\n",
  135. " val[i] = -cos(2*mod(v[i],256));\n",
  136. " end\n",
  137. " \n",
  138. " @inbounds for i in 1:N\n",
  139. " for j in 1:N\n",
  140. " mat[i,j] = s[i,j]*val[i];\n",
  141. " end\n",
  142. " end;\n",
  143. " mat\n",
  144. "end"
  145. ]
  146. },
  147. {
  148. "cell_type": "code",
  149. "execution_count": 55,
  150. "metadata": {},
  151. "outputs": [
  152. {
  153. "data": {
  154. "text/plain": [
  155. "opt2 (generic function with 2 methods)"
  156. ]
  157. },
  158. "execution_count": 55,
  159. "metadata": {},
  160. "output_type": "execute_result"
  161. }
  162. ],
  163. "source": [
  164. "# reordering loops\n",
  165. "function opt2(mat, s, v, N)\n",
  166. " val = 0.0\n",
  167. " @inbounds for j in 1:N\n",
  168. " for i in 1:N\n",
  169. " val = mod(v[i],256);\n",
  170. " val = -cos(2*val)\n",
  171. " mat[i,j] = s[i,j]*val;\n",
  172. " end\n",
  173. " end;\n",
  174. "end"
  175. ]
  176. },
  177. {
  178. "cell_type": "code",
  179. "execution_count": 56,
  180. "metadata": {},
  181. "outputs": [
  182. {
  183. "data": {
  184. "text/plain": [
  185. "opt22 (generic function with 1 method)"
  186. ]
  187. },
  188. "execution_count": 56,
  189. "metadata": {},
  190. "output_type": "execute_result"
  191. }
  192. ],
  193. "source": [
  194. "# reordering loops + allocate values\n",
  195. "function opt22(mat, s, v, N)\n",
  196. " val = Vector{Float64}(undef, length(v))\n",
  197. " @inbounds for i in eachindex(val)\n",
  198. " val[i] = -cos(2*mod(v[i],256));\n",
  199. " end\n",
  200. " \n",
  201. " @inbounds for j in 1:N\n",
  202. " for i in 1:N\n",
  203. " mat[i,j] = s[i,j]*val[i];\n",
  204. " end\n",
  205. " end;\n",
  206. "end"
  207. ]
  208. },
  209. {
  210. "cell_type": "code",
  211. "execution_count": 63,
  212. "metadata": {},
  213. "outputs": [
  214. {
  215. "data": {
  216. "text/plain": [
  217. "8"
  218. ]
  219. },
  220. "execution_count": 63,
  221. "metadata": {},
  222. "output_type": "execute_result"
  223. }
  224. ],
  225. "source": [
  226. "using Hwloc\n",
  227. "Hwloc.num_physical_cores()"
  228. ]
  229. },
  230. {
  231. "cell_type": "code",
  232. "execution_count": 68,
  233. "metadata": {},
  234. "outputs": [
  235. {
  236. "data": {
  237. "text/plain": [
  238. "opt22_threaded (generic function with 1 method)"
  239. ]
  240. },
  241. "execution_count": 68,
  242. "metadata": {},
  243. "output_type": "execute_result"
  244. }
  245. ],
  246. "source": [
  247. "function opt22_threaded(mat, s, v, N)\n",
  248. " val = Vector{Float64}(undef, length(v))\n",
  249. " @inbounds for i in eachindex(val)\n",
  250. " val[i] = -cos(2*mod(v[i],256));\n",
  251. " end\n",
  252. " \n",
  253. " @inbounds Threads.@threads for j in 1:N\n",
  254. " for i in 1:N\n",
  255. " mat[i,j] = s[i,j]*val[i];\n",
  256. " end\n",
  257. " end;\n",
  258. " mat\n",
  259. "end"
  260. ]
  261. },
  262. {
  263. "cell_type": "code",
  264. "execution_count": 88,
  265. "metadata": {},
  266. "outputs": [],
  267. "source": [
  268. "N = 4000\n",
  269. "mat = zeros(N,N)\n",
  270. "s = rand(N,N)\n",
  271. "v = rand(Int, N);"
  272. ]
  273. },
  274. {
  275. "cell_type": "code",
  276. "execution_count": 92,
  277. "metadata": {},
  278. "outputs": [],
  279. "source": [
  280. "opt22_threaded(mat, s, v, N);"
  281. ]
  282. },
  283. {
  284. "cell_type": "code",
  285. "execution_count": 57,
  286. "metadata": {},
  287. "outputs": [
  288. {
  289. "name": "stdout",
  290. "output_type": "stream",
  291. "text": [
  292. "Performance: 9.096081484617212 MIt/s\n"
  293. ]
  294. }
  295. ],
  296. "source": [
  297. "runtime = @belapsed work($mat, $s, $v, $N);\n",
  298. "perf = N*N*1e-6/runtime # MIt/s\n",
  299. "println(\"Performance: $perf MIt/s\")"
  300. ]
  301. },
  302. {
  303. "cell_type": "code",
  304. "execution_count": 58,
  305. "metadata": {},
  306. "outputs": [
  307. {
  308. "name": "stdout",
  309. "output_type": "stream",
  310. "text": [
  311. "Performance: 19.456335886711514 MIt/s\n"
  312. ]
  313. }
  314. ],
  315. "source": [
  316. "runtime = @belapsed opt1($mat, $s, $v, $N);\n",
  317. "perf = N*N*1e-6/runtime # MIt/s\n",
  318. "println(\"Performance: $perf MIt/s\")"
  319. ]
  320. },
  321. {
  322. "cell_type": "code",
  323. "execution_count": 59,
  324. "metadata": {},
  325. "outputs": [
  326. {
  327. "name": "stdout",
  328. "output_type": "stream",
  329. "text": [
  330. "Performance: 19.358628902845535 MIt/s\n"
  331. ]
  332. }
  333. ],
  334. "source": [
  335. "runtime = @belapsed opt12($mat, $s, $v, $N);\n",
  336. "perf = N*N*1e-6/runtime # MIt/s\n",
  337. "println(\"Performance: $perf MIt/s\")"
  338. ]
  339. },
  340. {
  341. "cell_type": "code",
  342. "execution_count": 60,
  343. "metadata": {},
  344. "outputs": [
  345. {
  346. "name": "stdout",
  347. "output_type": "stream",
  348. "text": [
  349. "Performance: 65.3802668739134 MIt/s\n"
  350. ]
  351. }
  352. ],
  353. "source": [
  354. "runtime = @belapsed opt2($mat, $s, $v, $N);\n",
  355. "perf = N*N*1e-6/runtime # MIt/s\n",
  356. "println(\"Performance: $perf MIt/s\")"
  357. ]
  358. },
  359. {
  360. "cell_type": "code",
  361. "execution_count": 61,
  362. "metadata": {},
  363. "outputs": [
  364. {
  365. "name": "stdout",
  366. "output_type": "stream",
  367. "text": [
  368. "Performance: 658.4475132762567 MIt/s\n"
  369. ]
  370. }
  371. ],
  372. "source": [
  373. "runtime = @belapsed opt22($mat, $s, $v, $N);\n",
  374. "perf = N*N*1e-6/runtime # MIt/s\n",
  375. "println(\"Performance: $perf MIt/s\")"
  376. ]
  377. },
  378. {
  379. "cell_type": "code",
  380. "execution_count": 91,
  381. "metadata": {},
  382. "outputs": [
  383. {
  384. "data": {
  385. "text/plain": [
  386. "73.11111111111111"
  387. ]
  388. },
  389. "execution_count": 91,
  390. "metadata": {},
  391. "output_type": "execute_result"
  392. }
  393. ],
  394. "source": [
  395. "658/9"
  396. ]
  397. },
  398. {
  399. "cell_type": "code",
  400. "execution_count": 93,
  401. "metadata": {},
  402. "outputs": [
  403. {
  404. "data": {
  405. "text/plain": [
  406. "86"
  407. ]
  408. },
  409. "execution_count": 93,
  410. "metadata": {},
  411. "output_type": "execute_result"
  412. }
  413. ],
  414. "source": [
  415. "emmy = 86"
  416. ]
  417. },
  418. {
  419. "cell_type": "code",
  420. "execution_count": null,
  421. "metadata": {},
  422. "outputs": [],
  423. "source": [
  424. "emmy_opt = 600"
  425. ]
  426. },
  427. {
  428. "cell_type": "code",
  429. "execution_count": null,
  430. "metadata": {},
  431. "outputs": [],
  432. "source": [
  433. "emmy_opt_threaded = 1.8 * 1e9"
  434. ]
  435. },
  436. {
  437. "cell_type": "markdown",
  438. "metadata": {},
  439. "source": [
  440. "# Multiple `N`"
  441. ]
  442. },
  443. {
  444. "cell_type": "code",
  445. "execution_count": 18,
  446. "metadata": {},
  447. "outputs": [],
  448. "source": [
  449. "using Plots\n",
  450. "\n",
  451. "runtime = @belapsed work($mat, $s, $v);\n",
  452. "perf = N*N*1e-6/runtime # MIt/s\n",
  453. "println(\"Performance: $perf MIt/s\")"
  454. ]
  455. }
  456. ],
  457. "metadata": {
  458. "kernelspec": {
  459. "display_name": "Julia 1.3.1",
  460. "language": "julia",
  461. "name": "julia-1.3"
  462. },
  463. "language_info": {
  464. "file_extension": ".jl",
  465. "mimetype": "application/julia",
  466. "name": "julia",
  467. "version": "1.3.1"
  468. }
  469. },
  470. "nbformat": 4,
  471. "nbformat_minor": 2
  472. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement