Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- float
- dotprod_fff_armv7_a(const float *a, const float *b, size_t n)
- {
- float s = 0;
- asm ("vmov.f32 q8, #0.0 \n\t"
- "vmov.f32 q9, #0.0 \n\t"
- "1: \n\t"
- "subs %3, %3, #8 \n\t"
- "vld1.32 {d0,d1,d2,d3}, [%1]! \n\t"
- "vld1.32 {d4,d5,d6,d7}, [%2]! \n\t"
- "vmla.f32 q8, q0, q2 \n\t"
- "vmla.f32 q9, q1, q3 \n\t"
- "bgt 1b \n\t"
- "vadd.f32 q8, q8, q9 \n\t"
- "vpadd.f32 d0, d16, d17 \n\t"
- "vadd.f32 %0, s0, s1 \n\t"
- : "=w"(s), "+r"(a), "+r"(b), "+r"(n)
- :: "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
- "d16", "d17", "d18", "d19");
- return s;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement