Advertisement
Guest User

Awk vs. grep (grep is too slow?)

a guest
Jul 13th, 2011
171
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 9.44 KB | None | 0 0
  1. # Links
  2. Script: http://pastebin.com/PJ2VfqN4
  3. Result: http://tinypic.com/r/303a4iv/7
  4.  
  5. --
  6.  
  7. # environment
  8. GNU Awk 3.1.8
  9. GNU grep 2.6.3
  10.  
  11. $ cat /proc/cpuinfo
  12. processor : 0
  13. vendor_id : GenuineIntel
  14. cpu family : 6
  15. model : 23
  16. model name : Celeron(R) Dual-Core CPU T3000 @ 1.80GHz
  17. stepping : 10
  18. cpu MHz : 1795
  19. cache size : 1024 KB
  20. fpu : yes
  21. fpu_exception : yes
  22. cpuid level : 13
  23. wp : yes
  24. flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe pni dtes64 monitor ds_cpl tm2 ssse3 cx16 xtpr pdcm xsave osxsave lahf_lm
  25. TLB size : 0 4K pages
  26. clflush size : 64
  27. cache_alignment : 64
  28. address sizes : 36 bits physical, 48 bits virtual
  29. power management:
  30.  
  31. processor : 1
  32. vendor_id : GenuineIntel
  33. cpu family : 6
  34. model : 23
  35. model name : Celeron(R) Dual-Core CPU T3000 @ 1.80GHz
  36. stepping : 10
  37. cpu MHz : 1795
  38. cache size : 1024 KB
  39. fpu : yes
  40. fpu_exception : yes
  41. cpuid level : 13
  42. wp : yes
  43. flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe pni dtes64 monitor ds_cpl tm2 ssse3 cx16 xtpr pdcm xsave osxsave lahf_lm
  44. TLB size : 0 4K pages
  45. clflush size : 64
  46. cache_alignment : 64
  47. address sizes : 36 bits physical, 48 bits virtual
  48. power management:
  49.  
  50. --
  51.  
  52. # make src.txt
  53. wget http://www.kernel.org/pub/linux/kernel/v2.6/linux-2.6.39.1.tar.bz2
  54. tar xf linux-2.6.39.1.tar.bz2 --wildcards '*.c' -O >src.txt
  55.  
  56. --
  57.  
  58. ## merge time.txt with cmd.txt
  59. awk '/./ { print; next };
  60. { print; if (!("" p)) getline p < "time.txt";
  61. print p; while ((getline q < "time.txt") > 0) { if (p == q) break; print q }
  62. print }' <cmd.txt >ttime.txt
  63.  
  64. ## for gnuplot
  65. awk '/^# result$/ { b = !b }; !b { next };
  66. $1 == "time" { t[0] = t[1]; if (/awk/) { sub(/^.*}; \//, "\"/"); sub(/ { .*$/, "\"") }
  67. else gsub(/^.*\$I | >\/.*$/, "\""); gsub(/\\/, "&&"); t[1] = $0; next };
  68. /^user/ { split($2, u, "m"); s[0] = s[1]; s[1] = 60 * u[1] + u[2]; next };
  69. /^for/ { if (t[0]) { p(c, s); print ORS } sub(/[^0-9]+/, ""); print "#", +$0 };
  70. /^[0-9]+$/ { if (/^0$/) p("\"Lines\"", t); else p(c, s); c = $0 };
  71. END { p(c, s) };
  72. function p(x, y) { print x, y[0], y[1] }' ttime.txt >gp.dat
  73.  
  74. --
  75.  
  76. # measure execution time
  77. for ((N=3000000,I=0; I<=100; I+=20)); do echo $I 1>&2; time head -n$N src.txt |awk 'BEGIN { L = '$I'; b = L + 1; n = L + 2; i = j = 0; s = 2; }; /regex/ { if (L && s == 1) print "--"; while (i != j) { print a[i++]; if (i == b) i = 0 } n = s = 0 }; n > L { a[j++] = $0; if (j == b) j = 0; if (i == j) { i++; if (i == b) i = 0; if (!s) s++ } next }; n++ <= L' >/dev/null; time head -n$N src.txt |grep -C $I 'regex' >/dev/null; done 2>>time.txt
  78.  
  79. --
  80.  
  81. # test script
  82. for ((N=3000000,I=0; I<=100; I+=50)); do
  83. echo $I 1>&2
  84. time head -n$N src.txt |awk 'BEGIN { L = '$I'; (snip.) }; /regex/ { (snip.) }; n++ <= L' >/dev/null
  85. time head -n$N src.txt |grep -C $I 'regex' >/dev/null
  86. done 2>time.txt
  87.  
  88. --
  89.  
  90. # result
  91.  
  92. for ((N=3000000,I=0; I<=100; I+=20)); do
  93. echo $I 1>&2
  94. time head -n$N src.txt |awk 'BEGIN { L = '$I'; (snip.) }; /test/ { (snip.) }; n++ <= L' >/dev/null
  95. time head -n$N src.txt |grep -C $I test >/dev/null
  96. done 2>time.txt
  97.  
  98. 0
  99.  
  100. real 0m8.646s
  101. user 0m8.673s
  102. sys 0m0.482s
  103.  
  104. real 0m0.790s
  105. user 0m0.607s
  106. sys 0m0.450s
  107. 20
  108.  
  109. real 0m9.295s
  110. user 0m9.266s
  111. sys 0m0.451s
  112.  
  113. real 0m0.778s
  114. user 0m0.685s
  115. sys 0m0.373s
  116. 40
  117.  
  118. real 0m9.533s
  119. user 0m9.873s
  120. sys 0m0.483s
  121.  
  122. real 0m0.765s
  123. user 0m0.685s
  124. sys 0m0.388s
  125. 60
  126.  
  127. real 0m9.635s
  128. user 0m9.717s
  129. sys 0m0.497s
  130.  
  131. real 0m0.768s
  132. user 0m0.733s
  133. sys 0m0.419s
  134. 80
  135.  
  136. real 0m9.696s
  137. user 0m9.780s
  138. sys 0m0.341s
  139.  
  140. real 0m0.793s
  141. user 0m0.747s
  142. sys 0m0.435s
  143. 100
  144.  
  145. real 0m9.766s
  146. user 0m9.765s
  147. sys 0m0.498s
  148.  
  149. real 0m0.791s
  150. user 0m0.654s
  151. sys 0m0.544s
  152.  
  153. for ((N=3000000,I=0; I<=100; I+=20)); do
  154. echo $I 1>&2
  155. time head -n$N src.txt |awk 'BEGIN { L = '$I'; (snip.) }; /test.*test/ { (snip.) }; n++ <= L' >/dev/null
  156. time head -n$N src.txt |grep -C $I 'test.*test' >/dev/null
  157. done 2>>time.txt
  158.  
  159. 0
  160.  
  161. real 0m8.549s
  162. user 0m8.517s
  163. sys 0m0.482s
  164.  
  165. real 0m0.851s
  166. user 0m0.794s
  167. sys 0m0.419s
  168. 20
  169.  
  170. real 0m9.341s
  171. user 0m9.125s
  172. sys 0m0.357s
  173.  
  174. real 0m0.925s
  175. user 0m0.920s
  176. sys 0m0.497s
  177. 40
  178.  
  179. real 0m9.610s
  180. user 0m9.594s
  181. sys 0m0.497s
  182.  
  183. real 0m0.843s
  184. user 0m0.810s
  185. sys 0m0.466s
  186. 60
  187.  
  188. real 0m9.671s
  189. user 0m9.608s
  190. sys 0m0.357s
  191.  
  192. real 0m0.858s
  193. user 0m0.669s
  194. sys 0m0.560s
  195. 80
  196.  
  197. real 0m9.707s
  198. user 0m9.842s
  199. sys 0m0.451s
  200.  
  201. real 0m0.856s
  202. user 0m0.779s
  203. sys 0m0.466s
  204. 100
  205.  
  206. real 0m9.774s
  207. user 0m9.826s
  208. sys 0m0.561s
  209.  
  210. real 0m0.846s
  211. user 0m0.763s
  212. sys 0m0.373s
  213.  
  214. for ((N=30000,I=0; I<=100; I+=20)); do
  215. echo $I 1>&2
  216. time head -n$N src.txt |awk 'BEGIN { L = '$I'; (snip.) }; /./ { (snip.) }; n++ <= L' >/dev/null
  217. time head -n$N src.txt |grep -C $I . >/dev/null
  218. done 2>>time.txt
  219.  
  220. 0
  221.  
  222. real 0m0.168s
  223. user 0m0.077s
  224. sys 0m0.046s
  225.  
  226. real 0m50.856s
  227. user 0m50.169s
  228. sys 0m0.092s
  229. 20
  230.  
  231. real 0m0.171s
  232. user 0m0.046s
  233. sys 0m0.061s
  234.  
  235. real 0m50.712s
  236. user 0m49.998s
  237. sys 0m0.076s
  238. 40
  239.  
  240. real 0m0.190s
  241. user 0m0.077s
  242. sys 0m0.046s
  243.  
  244. real 0m50.489s
  245. user 0m50.216s
  246. sys 0m0.030s
  247. 60
  248.  
  249. real 0m0.181s
  250. user 0m0.078s
  251. sys 0m0.046s
  252.  
  253. real 0m51.034s
  254. user 0m50.294s
  255. sys 0m0.045s
  256. 80
  257.  
  258. real 0m0.162s
  259. user 0m0.062s
  260. sys 0m0.031s
  261.  
  262. real 0m50.549s
  263. user 0m50.076s
  264. sys 0m0.045s
  265. 100
  266.  
  267. real 0m0.171s
  268. user 0m0.061s
  269. sys 0m0.062s
  270.  
  271. real 0m50.535s
  272. user 0m50.091s
  273. sys 0m0.030s
  274.  
  275. for ((N=30000,I=0; I<=100; I+=20)); do
  276. echo $I 1>&2
  277. time head -n$N src.txt |awk 'BEGIN { L = '$I'; (snip.) }; /.*/ { (snip.) }; n++ <= L' >/dev/null
  278. time head -n$N src.txt |grep -C $I '.*' >/dev/null
  279. done 2>>time.txt
  280.  
  281. 0
  282.  
  283. real 0m0.153s
  284. user 0m0.077s
  285. sys 0m0.076s
  286.  
  287. real 0m59.338s
  288. user 0m58.703s
  289. sys 0m0.045s
  290. 20
  291.  
  292. real 0m0.164s
  293. user 0m0.077s
  294. sys 0m0.045s
  295.  
  296. real 0m59.392s
  297. user 0m58.608s
  298. sys 0m0.030s
  299. 40
  300.  
  301. real 0m0.165s
  302. user 0m0.077s
  303. sys 0m0.062s
  304.  
  305. real 1m1.078s
  306. user 0m59.139s
  307. sys 0m0.030s
  308. 60
  309.  
  310. real 0m0.174s
  311. user 0m0.078s
  312. sys 0m0.061s
  313.  
  314. real 0m59.714s
  315. user 0m58.952s
  316. sys 0m0.076s
  317. 80
  318.  
  319. real 0m0.164s
  320. user 0m0.046s
  321. sys 0m0.060s
  322.  
  323. real 0m59.242s
  324. user 0m58.796s
  325. sys 0m0.092s
  326. 100
  327.  
  328. real 0m0.164s
  329. user 0m0.046s
  330. sys 0m0.061s
  331.  
  332. real 1m1.563s
  333. user 0m59.249s
  334. sys 0m0.107s
  335.  
  336. for ((N=3000000,I=0; I<=100; I+=20)); do
  337. echo $I 1>&2
  338. time head -n$N src.txt |awk 'BEGIN { L = '$I'; (snip.) }; / / { (snip.) }; n++ <= L' >/dev/null
  339. time head -n$N src.txt |grep -C $I ' ' >/dev/null
  340. done 2>>time.txt
  341.  
  342. 0
  343.  
  344. real 0m9.239s
  345. user 0m7.206s
  346. sys 0m0.529s
  347.  
  348. real 0m2.661s
  349. user 0m2.277s
  350. sys 0m0.606s
  351. 20
  352.  
  353. real 0m6.663s
  354. user 0m5.786s
  355. sys 0m0.591s
  356.  
  357. real 0m3.483s
  358. user 0m3.025s
  359. sys 0m0.451s
  360. 40
  361.  
  362. real 0m6.886s
  363. user 0m6.051s
  364. sys 0m0.482s
  365.  
  366. real 0m3.457s
  367. user 0m2.901s
  368. sys 0m0.546s
  369. 60
  370.  
  371. real 0m6.546s
  372. user 0m5.864s
  373. sys 0m0.623s
  374.  
  375. real 0m3.809s
  376. user 0m3.306s
  377. sys 0m0.264s
  378. 80
  379.  
  380. real 0m6.744s
  381. user 0m6.006s
  382. sys 0m0.684s
  383.  
  384. real 0m4.726s
  385. user 0m3.072s
  386. sys 0m0.560s
  387. 100
  388.  
  389. real 0m6.587s
  390. user 0m5.911s
  391. sys 0m0.561s
  392.  
  393. real 0m4.014s
  394. user 0m3.447s
  395. sys 0m0.357s
  396.  
  397. for ((N=3000000,I=0; I<=100; I+=20)); do
  398. echo $I 1>&2
  399. time head -n$N src.txt |awk 'BEGIN { L = '$I'; (snip.) }; / ./ { (snip.) }; n++ <= L' >/dev/null
  400. time head -n$N src.txt |grep -C $I ' .' >/dev/null
  401. done 2>>time.txt
  402.  
  403. 0
  404.  
  405. real 0m7.628s
  406. user 0m7.066s
  407. sys 0m0.561s
  408.  
  409. real 0m9.883s
  410. user 0m8.938s
  411. sys 0m0.544s
  412. 20
  413.  
  414. real 0m6.658s
  415. user 0m5.803s
  416. sys 0m0.684s
  417.  
  418. real 0m10.163s
  419. user 0m9.531s
  420. sys 0m0.482s
  421. 40
  422.  
  423. real 0m7.104s
  424. user 0m6.097s
  425. sys 0m0.514s
  426.  
  427. real 0m10.673s
  428. user 0m9.858s
  429. sys 0m0.669s
  430. 60
  431.  
  432. real 0m7.071s
  433. user 0m5.989s
  434. sys 0m0.466s
  435.  
  436. real 0m10.524s
  437. user 0m9.702s
  438. sys 0m0.654s
  439. 80
  440.  
  441. real 0m6.198s
  442. user 0m5.756s
  443. sys 0m0.559s
  444.  
  445. real 0m10.446s
  446. user 0m9.936s
  447. sys 0m0.512s
  448. 100
  449.  
  450. real 0m6.310s
  451. user 0m5.926s
  452. sys 0m0.466s
  453.  
  454. real 0m10.653s
  455. user 0m9.858s
  456. sys 0m0.434s
  457.  
  458. for ((N=3000000,I=0; I<=100; I+=20)); do
  459. echo $I 1>&2
  460. time head -n$N src.txt |awk 'BEGIN { L = '$I'; (snip.) }; / ?[-+=*/~|^&] ?/ { (snip.) }; n++ <= L' >/dev/null
  461. time head -n$N src.txt |grep -C $I ' \?[-+=*/~|^&] \?' >/dev/null
  462. done 2>>time.txt
  463.  
  464. 0
  465.  
  466. real 0m7.522s
  467. user 0m7.581s
  468. sys 0m0.451s
  469.  
  470. real 0m1.472s
  471. user 0m1.480s
  472. sys 0m0.435s
  473. 20
  474.  
  475. real 0m5.794s
  476. user 0m5.879s
  477. sys 0m0.467s
  478.  
  479. real 0m2.237s
  480. user 0m2.324s
  481. sys 0m0.357s
  482. 40
  483.  
  484. real 0m5.886s
  485. user 0m5.896s
  486. sys 0m0.514s
  487.  
  488. real 0m2.328s
  489. user 0m2.433s
  490. sys 0m0.420s
  491. 60
  492.  
  493. real 0m5.898s
  494. user 0m5.833s
  495. sys 0m0.528s
  496.  
  497. real 0m2.394s
  498. user 0m2.402s
  499. sys 0m0.483s
  500. 80
  501.  
  502. real 0m6.228s
  503. user 0m6.037s
  504. sys 0m0.420s
  505.  
  506. real 0m2.345s
  507. user 0m2.293s
  508. sys 0m0.623s
  509. 100
  510.  
  511. real 0m5.733s
  512. user 0m5.740s
  513. sys 0m0.529s
  514.  
  515. real 0m2.380s
  516. user 0m2.323s
  517. sys 0m0.590s
  518.  
  519. for ((N=3000000,I=0; I<=100; I+=20)); do
  520. echo $I 1>&2
  521. time head -n$N src.txt |awk 'BEGIN { L = '$I'; (snip.) }; / [_A-Za-z][^ ]/ { (snip.) }; n++ <= L' >/dev/null
  522. time head -n$N src.txt |grep -C $I ' [_A-Za-z][^ ]' >/dev/null
  523. done 2>>time.txt
  524.  
  525. 0
  526.  
  527. real 0m20.183s
  528. user 0m19.390s
  529. sys 0m0.482s
  530.  
  531. real 0m12.193s
  532. user 0m11.917s
  533. sys 0m0.560s
  534. 20
  535.  
  536. real 0m18.113s
  537. user 0m17.331s
  538. sys 0m0.575s
  539.  
  540. real 0m13.256s
  541. user 0m12.900s
  542. sys 0m0.513s
  543. 40
  544.  
  545. real 0m18.065s
  546. user 0m17.362s
  547. sys 0m0.405s
  548.  
  549. real 0m13.086s
  550. user 0m12.853s
  551. sys 0m0.389s
  552. 60
  553.  
  554. real 0m17.816s
  555. user 0m17.315s
  556. sys 0m0.482s
  557.  
  558. real 0m13.046s
  559. user 0m12.838s
  560. sys 0m0.669s
  561. 80
  562.  
  563. real 0m17.557s
  564. user 0m17.283s
  565. sys 0m0.514s
  566.  
  567. real 0m13.439s
  568. user 0m12.900s
  569. sys 0m0.419s
  570. 100
  571.  
  572. real 0m17.735s
  573. user 0m17.378s
  574. sys 0m0.466s
  575.  
  576. real 0m12.999s
  577. user 0m12.947s
  578. sys 0m0.466s
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement