Advertisement
prat3492

835-2.diff

Jul 30th, 2015
217
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Diff 2.68 KB | None | 0 0
  1. diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
  2. index 654d9d5..f2dbcc4 100644
  3. --- a/gcc/config/arm/neon.md
  4. +++ b/gcc/config/arm/neon.md
  5. @@ -548,6 +548,33 @@
  6.                      (const_string "neon_mul_<V_elem_ch><q>")))]
  7.  )
  8.  
  9. +(define_expand "div<mode>3"
  10. +  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
  11. +        (div:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w")
  12. +         (match_operand:VCVTF 2 "s_register_operand" "w")))]
  13. +  "TARGET_NEON && !optimize_size
  14. +   && flag_unsafe_math_optimizations && flag_reciprocal_math"
  15. +  {
  16. +    rtx rec = gen_reg_rtx (<MODE>mode);
  17. +    rtx vrecps_temp = gen_reg_rtx (<MODE>mode);
  18. +
  19. +    /* Reciprocal estimate.  */
  20. +    emit_insn (gen_neon_vrecpe<mode> (rec, operands[2]));
  21. +
  22. +    /* Perform 2 iterations of newton-raphson method.  */
  23. +    for (int i = 0; i < 2; i++)
  24. +      {
  25. +   emit_insn (gen_neon_vrecps<mode> (vrecps_temp, rec, operands[2]));
  26. +   emit_insn (gen_mul<mode>3 (rec, rec, vrecps_temp));
  27. +      }
  28. +
  29. +    /* We now have reciprocal in rec, perform operands[0] = operands[1] * rec.  */
  30. +    emit_insn (gen_mul<mode>3 (operands[0], operands[1], rec));
  31. +    DONE;
  32. +  }
  33. +)
  34. +
  35. +
  36.  (define_insn "mul<mode>3add<mode>_neon"
  37.    [(set (match_operand:VDQW 0 "s_register_operand" "=w")
  38.          (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
  39. diff --git a/gcc/testsuite/gcc.target/arm/vect-div-1.c b/gcc/testsuite/gcc.target/arm/vect-div-1.c
  40. new file mode 100644
  41. index 0000000..e562ef3
  42. --- /dev/null
  43. +++ b/gcc/testsuite/gcc.target/arm/vect-div-1.c
  44. @@ -0,0 +1,14 @@
  45. +/* { dg-do compile } */
  46. +/* { dg-require-effective-target arm_v8_neon_ok } */
  47. +/* { dg-options "-O2 -funsafe-math-optimizations -ftree-vectorize -fdump-tree-vect-all" } */
  48. +/* { dg-add-options arm_v8_neon } */
  49. +
  50. +void
  51. +foo (int len, float * __restrict p, float *__restrict x)
  52. +{
  53. +  len = len & ~31;
  54. +  for (int i = 0; i < len; i++)
  55. +    p[i] = p[i] / x[i];
  56. +}
  57. +
  58. +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
  59. diff --git a/gcc/testsuite/gcc.target/arm/vect-div-2.c b/gcc/testsuite/gcc.target/arm/vect-div-2.c
  60. new file mode 100644
  61. index 0000000..8e15d0a
  62. --- /dev/null
  63. +++ b/gcc/testsuite/gcc.target/arm/vect-div-2.c
  64. @@ -0,0 +1,14 @@
  65. +/* { dg-do compile } */
  66. +/* { dg-require-effective-target arm_v8_neon_ok } */
  67. +/* { dg-options "-O2 -funsafe-math-optimizations -fno-reciprocal-math -ftree-vectorize -fdump-tree-vect-all" } */
  68. +/* { dg-add-options arm_v8_neon } */
  69. +
  70. +void
  71. +foo (int len, float * __restrict p, float *__restrict x)
  72. +{
  73. +  len = len & ~31;
  74. +  for (int i = 0; i < len; i++)
  75. +    p[i] = p[i] / x[i];
  76. +}
  77. +
  78. +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement