Advertisement
Guest User

Untitled

a guest
Oct 20th, 2011
66
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.44 KB | None | 0 0
  1. float
  2. dotprod_fff_armv7_a(const float *a, const float *b, size_t n)
  3. {
  4. float s = 0;
  5.  
  6. asm ("vmov.f32 q8, #0.0 \n\t"
  7. "vmov.f32 q9, #0.0 \n\t"
  8. "1: \n\t"
  9. "subs %3, %3, #8 \n\t"
  10. "vld1.32 {d0,d1,d2,d3}, [%1]! \n\t"
  11. "vld1.32 {d4,d5,d6,d7}, [%2]! \n\t"
  12. "vmla.f32 q8, q0, q2 \n\t"
  13. "vmla.f32 q9, q1, q3 \n\t"
  14. "bgt 1b \n\t"
  15. "vadd.f32 q8, q8, q9 \n\t"
  16. "vpadd.f32 d0, d16, d17 \n\t"
  17. "vadd.f32 %0, s0, s1 \n\t"
  18. : "=w"(s), "+r"(a), "+r"(b), "+r"(n)
  19. :: "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
  20. "d16", "d17", "d18", "d19");
  21.  
  22. return s;
  23.  
  24.  
  25.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement