Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- .text
- .global bubble
- bubble:
- movsx %edi,%rdi
- cmp $0x20, %rdi
- jg bubble_simd
- bubble_serial:
- lea -4(%rsi,%rdi,4), %rdi
- #store end pointer in rdi
- #rdx = 4* (number of elements-1) + start address
- 0:
- mov %rsi, %rax #store temp pointer in rax
- #will be destructively changining it
- mov %rax, %rdx #clear rdx flag. flag for having swapped
- movl (%rax), %r9d #load current value to compare with other values
- 1:
- movl 4(%rax), %ecx #load next value to compare
- cmp %ecx, %r9d #compare and branch as appropriate
- mov %ecx, %r8d #mov the smaller of the two values into r8
- cmovng %r9d, %r8d
- cmovg %rax, %rdx #store position of last swap
- cmovng %ecx, %r9d #mov the bigger into b
- movl %r8d, (%rax) #move smaller into prevoios position in array
- add $4, %rax #increment pointer
- cmp %rdi, %rax #if we aren't at the end continue
- jl 1b
- movl %r9d, (%rax) #move whats left into array
- mov %rdx, %rdi #stop at last swap
- cmp %rdi, %rsi #If we've made a swap then redo loop
- jl 0b
- ret
- bubble_simd:
- push %rdi #save rdi/rsi as we will destructively change them
- push %rsi #but still need them later
- mov %rsi, %rax #align rdi to 16 bytes
- lea -0x10(%rax,%rdi,4), %rdi
- and $-0x10,%rdi
- and $0xF, %rax #align rsi to 16 bytes
- mov $0x10, %rdx
- sub %rax, %rdx
- add %rax, %rsi
- 0:
- mov %rsi, %rax #start sort from beginning
- mov %rsi, %rdx #reset last swap position
- 1:
- vmovdqa (%rax), %xmm0 #load first set into xmm0
- vmovdqa 0x10(%rax), %xmm1 #load second set into xmm1
- vpcmpgtd %xmm1, %xmm0, %xmm2 #compare them and store mask in xmm2
- vptest %xmm2,%xmm2 #if all 0 no swap has occured
- cmovnz %rax, %rdx
- vpand %xmm0, %xmm2, %xmm3 #move greter into xmm3 and xmm5
- vpand %xmm1, %xmm2, %xmm4 #move lesser into xmm4 and xmm6
- vpandn %xmm0, %xmm2, %xmm5
- vpandn %xmm1, %xmm2, %xmm6
- vpaddd %xmm5, %xmm3, %xmm0 #combine the two halves together
- vpaddd %xmm6, %xmm4, %xmm1
- vmovdqa %xmm0, 0x10(%rax) #store back into memory
- vmovdqa %xmm1, (%rax)
- add $0x10, %rax #increment point by 4 dwords
- cmp %rdi, %rax #loop if not at end
- jl 1b
- mov %rdx,%rdi #see if we made a swap
- cmp %rdi,%rsi #loop if we did
- jl 0b
- pop %rsi #restore rsi and rdi
- pop %rdi
- jmp bubble_serial #use non parraleel version to finish sorting
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement