Advertisement
Guest User

Untitled

a guest
Jul 1st, 2016
78
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.01 KB | None | 0 0
  1. .text
  2. .global bubble
  3. bubble:
  4. movsx %edi,%rdi
  5. cmp $0x20, %rdi
  6. jg bubble_simd
  7. bubble_serial:
  8. lea -4(%rsi,%rdi,4), %rdi
  9. #store end pointer in rdi
  10. #rdx = 4* (number of elements-1) + start address
  11.  
  12. 0:
  13. mov %rsi, %rax #store temp pointer in rax
  14. #will be destructively changining it
  15. mov %rax, %rdx #clear rdx flag. flag for having swapped
  16. movl (%rax), %r9d #load current value to compare with other values
  17. 1:
  18. movl 4(%rax), %ecx #load next value to compare
  19. cmp %ecx, %r9d #compare and branch as appropriate
  20. mov %ecx, %r8d #mov the smaller of the two values into r8
  21. cmovng %r9d, %r8d
  22. cmovg %rax, %rdx #store position of last swap
  23. cmovng %ecx, %r9d #mov the bigger into b
  24.  
  25. movl %r8d, (%rax) #move smaller into prevoios position in array
  26. add $4, %rax #increment pointer
  27. cmp %rdi, %rax #if we aren't at the end continue
  28. jl 1b
  29.  
  30. movl %r9d, (%rax) #move whats left into array
  31. mov %rdx, %rdi #stop at last swap
  32. cmp %rdi, %rsi #If we've made a swap then redo loop
  33. jl 0b
  34. ret
  35.  
  36.  
  37. bubble_simd:
  38. push %rdi #save rdi/rsi as we will destructively change them
  39. push %rsi #but still need them later
  40.  
  41. mov %rsi, %rax #align rdi to 16 bytes
  42. lea -0x10(%rax,%rdi,4), %rdi
  43. and $-0x10,%rdi
  44.  
  45. and $0xF, %rax #align rsi to 16 bytes
  46. mov $0x10, %rdx
  47. sub %rax, %rdx
  48. add %rax, %rsi
  49.  
  50.  
  51. 0:
  52. mov %rsi, %rax #start sort from beginning
  53. mov %rsi, %rdx #reset last swap position
  54. 1:
  55. vmovdqa (%rax), %xmm0 #load first set into xmm0
  56. vmovdqa 0x10(%rax), %xmm1 #load second set into xmm1
  57.  
  58. vpcmpgtd %xmm1, %xmm0, %xmm2 #compare them and store mask in xmm2
  59. vptest %xmm2,%xmm2 #if all 0 no swap has occured
  60. cmovnz %rax, %rdx
  61.  
  62. vpand %xmm0, %xmm2, %xmm3 #move greter into xmm3 and xmm5
  63. vpand %xmm1, %xmm2, %xmm4 #move lesser into xmm4 and xmm6
  64.  
  65. vpandn %xmm0, %xmm2, %xmm5
  66. vpandn %xmm1, %xmm2, %xmm6
  67.  
  68. vpaddd %xmm5, %xmm3, %xmm0 #combine the two halves together
  69. vpaddd %xmm6, %xmm4, %xmm1
  70.  
  71. vmovdqa %xmm0, 0x10(%rax) #store back into memory
  72. vmovdqa %xmm1, (%rax)
  73.  
  74. add $0x10, %rax #increment point by 4 dwords
  75. cmp %rdi, %rax #loop if not at end
  76. jl 1b
  77.  
  78. mov %rdx,%rdi #see if we made a swap
  79. cmp %rdi,%rsi #loop if we did
  80. jl 0b
  81.  
  82. pop %rsi #restore rsi and rdi
  83. pop %rdi
  84. jmp bubble_serial #use non parraleel version to finish sorting
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement