Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
- index 654d9d5..f2dbcc4 100644
- --- a/gcc/config/arm/neon.md
- +++ b/gcc/config/arm/neon.md
- @@ -548,6 +548,33 @@
- (const_string "neon_mul_<V_elem_ch><q>")))]
- )
- +(define_expand "div<mode>3"
- + [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
- + (div:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w")
- + (match_operand:VCVTF 2 "s_register_operand" "w")))]
- + "TARGET_NEON && !optimize_size
- + && flag_unsafe_math_optimizations && flag_reciprocal_math"
- + {
- + rtx rec = gen_reg_rtx (<MODE>mode);
- + rtx vrecps_temp = gen_reg_rtx (<MODE>mode);
- +
- + /* Reciprocal estimate. */
- + emit_insn (gen_neon_vrecpe<mode> (rec, operands[2]));
- +
- + /* Perform 2 iterations of newton-raphson method. */
- + for (int i = 0; i < 2; i++)
- + {
- + emit_insn (gen_neon_vrecps<mode> (vrecps_temp, rec, operands[2]));
- + emit_insn (gen_mul<mode>3 (rec, rec, vrecps_temp));
- + }
- +
- + /* We now have reciprocal in rec, perform operands[0] = operands[1] * rec. */
- + emit_insn (gen_mul<mode>3 (operands[0], operands[1], rec));
- + DONE;
- + }
- +)
- +
- +
- (define_insn "mul<mode>3add<mode>_neon"
- [(set (match_operand:VDQW 0 "s_register_operand" "=w")
- (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
- diff --git a/gcc/testsuite/gcc.target/arm/vect-div-1.c b/gcc/testsuite/gcc.target/arm/vect-div-1.c
- new file mode 100644
- index 0000000..e562ef3
- --- /dev/null
- +++ b/gcc/testsuite/gcc.target/arm/vect-div-1.c
- @@ -0,0 +1,14 @@
- +/* { dg-do compile } */
- +/* { dg-require-effective-target arm_v8_neon_ok } */
- +/* { dg-options "-O2 -funsafe-math-optimizations -ftree-vectorize -fdump-tree-vect-all" } */
- +/* { dg-add-options arm_v8_neon } */
- +
- +void
- +foo (int len, float * __restrict p, float *__restrict x)
- +{
- + len = len & ~31;
- + for (int i = 0; i < len; i++)
- + p[i] = p[i] / x[i];
- +}
- +
- +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
- diff --git a/gcc/testsuite/gcc.target/arm/vect-div-2.c b/gcc/testsuite/gcc.target/arm/vect-div-2.c
- new file mode 100644
- index 0000000..8e15d0a
- --- /dev/null
- +++ b/gcc/testsuite/gcc.target/arm/vect-div-2.c
- @@ -0,0 +1,14 @@
- +/* { dg-do compile } */
- +/* { dg-require-effective-target arm_v8_neon_ok } */
- +/* { dg-options "-O2 -funsafe-math-optimizations -fno-reciprocal-math -ftree-vectorize -fdump-tree-vect-all" } */
- +/* { dg-add-options arm_v8_neon } */
- +
- +void
- +foo (int len, float * __restrict p, float *__restrict x)
- +{
- + len = len & ~31;
- + for (int i = 0; i < len; i++)
- + p[i] = p[i] / x[i];
- +}
- +
- +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement