spu.md ("div<mode>3"): Convert into expander, move original insn and splitter content...
authorUlrich Weigand <uweigand@de.ibm.com>
Mon, 21 Jul 2008 17:15:22 +0000 (17:15 +0000)
committerUlrich Weigand <uweigand@gcc.gnu.org>
Mon, 21 Jul 2008 17:15:22 +0000 (17:15 +0000)
* config/spu/spu.md ("div<mode>3"): Convert into expander, move
original insn and splitter contents into ...
("*div<mode>3_fast"): ... this new pattern.  Enable only if
flag_unsafe_math_optimizations.  Add dummy scratch register.
("*div<mode>3_adjusted"): New insn and splitter.  Enable only if
!flag_unsafe_math_optimizations.  Returns number with next
highest magnitude if this is still less or equal to the true
quotient in magnitude.

From-SVN: r138036

gcc/ChangeLog
gcc/config/spu/spu.md

index 02675347322909acce8d2d26582dc67de453d2a9..4838ebe83be6c7d94b2168c91af6f6f25ac06553 100644 (file)
@@ -1,3 +1,14 @@
+2008-07-21  Ulrich Weigand  <Ulrich.Weigand@de.ibm.com>
+
+       * config/spu/spu.md ("div<mode>3"): Convert into expander, move
+       original insn and splitter contents into ...
+       ("*div<mode>3_fast"): ... this new pattern.  Enable only if
+       flag_unsafe_math_optimizations.  Add dummy scratch register.
+       ("*div<mode>3_adjusted"): New insn and splitter.  Enable only if
+       !flag_unsafe_math_optimizations.  Returns number with next
+       highest magnitude if this is still less or equal to the true
+       quotient in magnitude.
+
 2008-07-21  Rafael Avila de Espindola  <espindola@google.com>
 
        * Makefile.in: Replace toplev.h with TOPLEV_H.
index 6985a6836977b24836f90583315bea064d2a2787..c267efd29d1b47a783cc51cc7f52fdcff7f10477 100644 (file)
   [(set_attr "type" "multi0")
    (set_attr "length" "80")])
 
-(define_insn_and_split "div<mode>3"
+(define_expand "div<mode>3"
+  [(parallel
+    [(set (match_operand:VSF 0 "spu_reg_operand" "")   
+         (div:VSF (match_operand:VSF 1 "spu_reg_operand" "")
+                  (match_operand:VSF 2 "spu_reg_operand" "")))
+     (clobber (match_scratch:VSF 3 ""))
+     (clobber (match_scratch:VSF 4 ""))
+     (clobber (match_scratch:VSF 5 ""))])]
+  ""
+  "")
+
+(define_insn_and_split "*div<mode>3_fast"
   [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
        (div:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
                 (match_operand:VSF 2 "spu_reg_operand" "r")))
    (clobber (match_scratch:VSF 3 "=&r"))
-   (clobber (match_scratch:VSF 4 "=&r"))]
-  ""
+   (clobber (match_scratch:VSF 4 "=&r"))
+   (clobber (scratch:VSF))]
+  "flag_unsafe_math_optimizations"
   "#"
   "reload_completed"
   [(set (match_dup:VSF 0)
        (div:VSF (match_dup:VSF 1)
                 (match_dup:VSF 2)))
    (clobber (match_dup:VSF 3))
-   (clobber (match_dup:VSF 4))]
+   (clobber (match_dup:VSF 4))
+   (clobber (scratch:VSF))]
   {
     emit_insn (gen_frest_<mode>(operands[3], operands[2]));
     emit_insn (gen_fi_<mode>(operands[3], operands[2], operands[3]));
     DONE;
   })
 
+(define_insn_and_split "*div<mode>3_adjusted"
+  [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
+       (div:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
+                (match_operand:VSF 2 "spu_reg_operand" "r")))
+   (clobber (match_scratch:VSF 3 "=&r"))
+   (clobber (match_scratch:VSF 4 "=&r"))
+   (clobber (match_scratch:VSF 5 "=&r"))]
+  "!flag_unsafe_math_optimizations"
+  "#"
+  "reload_completed"
+  [(set (match_dup:VSF 0)
+       (div:VSF (match_dup:VSF 1)
+                (match_dup:VSF 2)))
+   (clobber (match_dup:VSF 3))
+   (clobber (match_dup:VSF 4))
+   (clobber (match_dup:VSF 5))]
+  {
+    emit_insn (gen_frest_<mode> (operands[3], operands[2]));
+    emit_insn (gen_fi_<mode> (operands[3], operands[2], operands[3]));
+    emit_insn (gen_mul<mode>3 (operands[4], operands[1], operands[3]));
+    emit_insn (gen_fnms_<mode> (operands[5], operands[4], operands[2], operands[1]));
+    emit_insn (gen_fma_<mode> (operands[3], operands[5], operands[3], operands[4]));
+
+   /* Due to truncation error, the quotient result may be low by 1 ulp.
+      Conditionally add one if the estimate is too small in magnitude.  */
+
+    emit_move_insn (gen_lowpart (<F2I>mode, operands[4]),
+                   spu_const (<F2I>mode, 0x80000000ULL));
+    emit_move_insn (gen_lowpart (<F2I>mode, operands[5]),
+                   spu_const (<F2I>mode, 0x3f800000ULL));
+    emit_insn (gen_selb (operands[5], operands[5], operands[1], operands[4]));
+
+    emit_insn (gen_add<f2i>3 (gen_lowpart (<F2I>mode, operands[4]),
+                             gen_lowpart (<F2I>mode, operands[3]),
+                             spu_const (<F2I>mode, 1)));
+    emit_insn (gen_fnms_<mode> (operands[0], operands[2], operands[4], operands[1]));
+    emit_insn (gen_mul<mode>3 (operands[0], operands[0], operands[5]));
+    emit_insn (gen_cgt_<f2i> (gen_lowpart (<F2I>mode, operands[0]),
+                             gen_lowpart (<F2I>mode, operands[0]),
+                             spu_const (<F2I>mode, -1)));
+    emit_insn (gen_selb (operands[0], operands[3], operands[4], operands[0]));
+    DONE;
+  })
+
 ;; Taken from STI's gcc
 ;; Does not correctly handle INF or NAN.
 (define_expand "divdf3"