Guest User

Untitled

a guest
Feb 3rd, 2013
37
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. FDM_unaligned proc
  2.     ;preserve callee-save register
  3.     movapd reg6,  xmm6
  4.     movapd reg7,  xmm7
  5.     movapd reg12, xmm12
  6.     movapd reg13, xmm13
  7.     movapd reg14, xmm14
  8.     ;load constants
  9.     movapd xmm5,  c2
  10.     movapd xmm14, c3
  11.     movapd xmm13, half
  12.     movapd xmm12, quarter
  13.    
  14.     mov rax, rdx
  15.     add rax, 7984 ; N*8-8
  16.    
  17.     mov r9, rdx     ;save alt in r9
  18.     mov r10, r8     ;save neu in r10
  19.    
  20. TimeLoop:
  21.     ;first two elements
  22.     movapd  xmm0, [rdx]     ;alt[0,1]
  23.     movupd  xmm1, [rdx+8]   ;alt[1,2] -> reuse in inner loop
  24.    
  25.     movsd   xmm7, xmm1      ;xmm7 = alt[1]
  26.     mulsd   xmm7, xmm14     ;calculate neu[0]=alt[1]/c3
  27.    
  28.     movhlps xmm6, xmm1      ;xmm6 = alt[2]
  29.     addsd   xmm6, xmm0      ;alt[0]+alt[2]
  30.     mulsd   xmm6, xmm12     ;0.25*(alt[0]+alt[2])
  31.    
  32.     movsd   xmm0, xmm1      ;xmm0 hat jetzt alt[1]
  33.     mulsd   xmm0, xmm13     ;0.5*alt[1]
  34.     addsd   xmm0, xmm6      ;=neu[1]
  35.    
  36.     movlhps xmm7, xmm0      ;combine neu[0] and neu[1]
  37.     movapd  xmmword ptr [r8], xmm7
  38.    
  39.     add     rdx, 16
  40.     add     r8,  16
  41.    
  42. InnerLoop:
  43.     movupd  xmm2, [rdx+8]
  44.     movapd  xmm4, [rdx+16]
  45.     addpd   xmm1, xmm2
  46.     mulpd   xmm1, xmm12
  47.    
  48.     movapd  xmm3, [rdx]
  49.     mulpd   xmm3, xmm13
  50.     addpd   xmm3, xmm1
  51.     movapd  [r8], xmm3
  52.    
  53.     movupd  xmm1, [rdx+24]
  54.     addpd   xmm2, xmm1
  55.     mulpd   xmm2, xmm12
  56.    
  57.    
  58.     mulpd   xmm4, xmm13
  59.     addpd   xmm4, xmm2
  60.     movapd  [r8+16], xmm4
  61.     add     r8,  32
  62.     add     rdx, 32
  63.     cmp     rdx, rax
  64.     jne InnerLoop
  65.    
  66.     ;calculate neu[N-1]
  67.     movsd   xmm6, xmm1
  68.     addsd   xmm6, xmm5
  69.     movsd   qword ptr [r8], xmm6
  70.    
  71.     ;swap pointers
  72.     xchg        r9,r10
  73.     mov     rdx, r9
  74.     mov     r8,r10
  75.     mov     rax, rdx
  76.     add     rax, 7984
  77.    
  78.     sub         rcx, 1
  79.     jnz TimeLoop
  80.    
  81.     ;restore callee-save register
  82.     movapd xmm6,  reg6
  83.     movapd xmm7,  reg7
  84.     movapd xmm12, reg12
  85.     movapd xmm13, reg13
  86.     movapd xmm14, reg14
  87.     RET
  88. FDM_unaligned endp
Advertisement
Add Comment
Please, Sign In to add comment