From 655f2eb93e47f4996700fe6dd0524a151504144c Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@cygnus.com>
Date: Mon, 11 Sep 2000 14:15:50 -0700
Subject: [PATCH] ia64-protos.h (fr_nonimmediate_operand): Declare.

        * config/ia64/ia64-protos.h (fr_nonimmediate_operand): Declare.
        * config/ia64/ia64.c (fr_nonimmediate_operand): New.
        (ia64_override_options): Prevent optimizing division for both
        latency and throughput.
        (rtx_needs_barrier): Handle frcpa.
        * config/ia64/ia64.h (MASK_INLINE_DIV_LAT): New.
        (MASK_INLINE_DIV_THR, TARGET_INLINE_DIV_LAT): New.
        (TARGET_INLINE_DIV_THR, TARGET_INLINE_DIV): New.
        (TARGET_SWITCHES): Add -minline-divide-min-latency and
        -minline-divide-max-throughput.
        (PREDICATE_CODES): Update.
        * config/ia64/ia64.md (extendsidi2): Remove * from f case.
        (zero_extendsidi2): Likewise.  Fix typo in f case insn.
        (extendsfdf2): Add cases for gr<->fr and fr<->mem.
        (extendsftf2): Likewise.
        (extenddftf2): Likewise.
        (fix_trunctfdi2_alts): New.
        (fixuns_trunctfdi2_alts): New.
        (madd*4): Rename from madd*3.
        (divsi3, modsi3, udivsi3, umodsi3): New.
        (divsi3_internal): New.
        (divdi3, moddi3, udivdi3, umoddi3): New.
        (divdi3_internal_lat, divdi3_internal_thr): New.
        (multf3_alts, maddtf4_alts, nmaddtf4_alts): New.
        (recip_approx): New.

From-SVN: r36330
---
 gcc/ChangeLog                 |  28 ++
 gcc/config/ia64/ia64-protos.h |   1 +
 gcc/config/ia64/ia64.c        |  31 +++
 gcc/config/ia64/ia64.h        |  16 ++
 gcc/config/ia64/ia64.md       | 506 ++++++++++++++++++++++++++++++++--
 5 files changed, 552 insertions(+), 30 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 1f902ca8529..31d94b4b8f5 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,31 @@
+2000-09-11  Richard Henderson  <rth@cygnus.com>
+
+	* config/ia64/ia64-protos.h (fr_nonimmediate_operand): Declare.
+	* config/ia64/ia64.c (fr_nonimmediate_operand): New.
+	(ia64_override_options): Prevent optimizing division for both
+	latency and throughput.
+	(rtx_needs_barrier): Handle frcpa.
+	* config/ia64/ia64.h (MASK_INLINE_DIV_LAT): New.
+	(MASK_INLINE_DIV_THR, TARGET_INLINE_DIV_LAT): New.
+	(TARGET_INLINE_DIV_THR, TARGET_INLINE_DIV): New.
+	(TARGET_SWITCHES): Add -minline-divide-min-latency and
+	-minline-divide-max-throughput.
+	(PREDICATE_CODES): Update.
+	* config/ia64/ia64.md (extendsidi2): Remove * from f case.
+	(zero_extendsidi2): Likewise.  Fix typo in f case insn.
+	(extendsfdf2): Add cases for gr<->fr and fr<->mem.
+	(extendsftf2): Likewise.
+	(extenddftf2): Likewise.
+	(fix_trunctfdi2_alts): New.
+	(fixuns_trunctfdi2_alts): New.
+	(madd*4): Rename from madd*3.
+	(divsi3, modsi3, udivsi3, umodsi3): New.
+	(divsi3_internal): New.
+	(divdi3, moddi3, udivdi3, umoddi3): New.
+	(divdi3_internal_lat, divdi3_internal_thr): New.
+	(multf3_alts, maddtf4_alts, nmaddtf4_alts): New.
+	(recip_approx): New.
+
 2000-09-11  Alexandre Oliva  <aoliva@redhat.com>
 
 	* print-rtl.c (debug_call_placeholder_verbose): New variable.
diff --git a/gcc/config/ia64/ia64-protos.h b/gcc/config/ia64/ia64-protos.h
index 36a67eb26ac..d213b4180f8 100644
--- a/gcc/config/ia64/ia64-protos.h
+++ b/gcc/config/ia64/ia64-protos.h
@@ -38,6 +38,7 @@ extern int gr_register_operand PARAMS((rtx, enum machine_mode));
 extern int fr_register_operand PARAMS((rtx, enum machine_mode));
 extern int grfr_register_operand PARAMS((rtx, enum machine_mode));
 extern int gr_nonimmediate_operand PARAMS((rtx, enum machine_mode));
+extern int fr_nonimmediate_operand PARAMS((rtx, enum machine_mode));
 extern int grfr_nonimmediate_operand PARAMS((rtx, enum machine_mode));
 extern int gr_reg_or_0_operand PARAMS((rtx, enum machine_mode));
 extern int gr_reg_or_5bit_operand PARAMS((rtx, enum machine_mode));
diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c
index 7b422351b96..57de975c870 100644
--- a/gcc/config/ia64/ia64.c
+++ b/gcc/config/ia64/ia64.c
@@ -397,6 +397,26 @@ gr_nonimmediate_operand (op, mode)
   return 1;
 }
 
+/* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg.  */
+
+int
+fr_nonimmediate_operand (op, mode)
+     rtx op;
+     enum machine_mode mode;
+{
+  if (! nonimmediate_operand (op, mode))
+    return 0;
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+  if (GET_CODE (op) == REG)
+    {
+      unsigned int regno = REGNO (op);
+      if (regno < FIRST_PSEUDO_REGISTER)
+	return FR_REGNO_P (regno);
+    }
+  return 1;
+}
+
 /* Return 1 if OP is a nonimmediate operand that is a GR/FR reg.  */
 
 int
@@ -3484,6 +3504,12 @@ ia64_override_options ()
   if (TARGET_AUTO_PIC)
     target_flags |= MASK_CONST_GP;
 
+  if (TARGET_INLINE_DIV_LAT && TARGET_INLINE_DIV_THR)
+    {
+      warning ("cannot optimize division for both latency and throughput");
+      target_flags &= ~MASK_INLINE_DIV_THR;
+    }
+
   if (ia64_fixed_range_string)
     fix_range (ia64_fixed_range_string);
 
@@ -3971,6 +3997,11 @@ rtx_needs_barrier (x, flags, pred)
 	case 21: /* flushrs */
           break;
 
+	case 5: /* recip_approx */
+	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
+	  need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
+	  break;
+
         case 13: /* cmpxchg_acq */
 	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
 	  need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
diff --git a/gcc/config/ia64/ia64.h b/gcc/config/ia64/ia64.h
index 86463b4e029..1f975f3913d 100644
--- a/gcc/config/ia64/ia64.h
+++ b/gcc/config/ia64/ia64.h
@@ -63,6 +63,10 @@ extern int target_flags;
 
 #define MASK_AUTO_PIC	0x00000200	/* generate automatically PIC */
 
+#define MASK_INLINE_DIV_LAT 0x00000400	/* inline div, min latency.  */
+
+#define MASK_INLINE_DIV_THR 0x00000800	/* inline div, max throughput.  */
+
 #define MASK_DWARF2_ASM 0x40000000	/* test dwarf2 line info via gas.  */
 
 #define TARGET_BIG_ENDIAN	(target_flags & MASK_BIG_ENDIAN)
@@ -85,6 +89,13 @@ extern int target_flags;
 
 #define TARGET_AUTO_PIC		(target_flags & MASK_AUTO_PIC)
 
+#define TARGET_INLINE_DIV_LAT	(target_flags & MASK_INLINE_DIV_LAT)
+
+#define TARGET_INLINE_DIV_THR	(target_flags & MASK_INLINE_DIV_THR)
+
+#define TARGET_INLINE_DIV \
+  (target_flags & (MASK_INLINE_DIV_LAT | MASK_INLINE_DIV_THR))
+
 #define TARGET_DWARF2_ASM	(target_flags & MASK_DWARF2_ASM)
 
 /* This macro defines names of command options to set and clear bits in
@@ -123,6 +134,10 @@ extern int target_flags;
       N_("gp is constant (but save/restore gp on indirect calls)") },	\
   { "auto-pic",		MASK_AUTO_PIC,					\
       N_("Generate self-relocatable code") },				\
+  { "inline-divide-min-latency", MASK_INLINE_DIV_LAT,			\
+      N_("Generate inline division, optimize for latency") },		\
+  { "inline-divide-max-throughput", MASK_INLINE_DIV_THR,		\
+      N_("Generate inline division, optimize for throughput") },	\
   { "dwarf2-asm", 	MASK_DWARF2_ASM,				\
       N_("Enable Dwarf 2 line debug info via GNU as")},			\
   { "no-dwarf2-asm", 	-MASK_DWARF2_ASM,				\
@@ -2646,6 +2661,7 @@ do {									\
 { "fr_register_operand", {SUBREG, REG}},				\
 { "grfr_register_operand", {SUBREG, REG}},				\
 { "gr_nonimmediate_operand", {SUBREG, REG, MEM}},			\
+{ "fr_nonimmediate_operand", {SUBREG, REG, MEM}},			\
 { "grfr_nonimmediate_operand", {SUBREG, REG, MEM}},			\
 { "gr_reg_or_0_operand", {SUBREG, REG, CONST_INT}},			\
 { "gr_reg_or_5bit_operand", {SUBREG, REG, CONST_INT, CONSTANT_P_RTX}},	\
diff --git a/gcc/config/ia64/ia64.md b/gcc/config/ia64/ia64.md
index 129ce240316..8593bf62054 100644
--- a/gcc/config/ia64/ia64.md
+++ b/gcc/config/ia64/ia64.md
@@ -57,6 +57,7 @@
 ;;	2	gr_restore
 ;;	3	fr_spill
 ;;	4	fr_restore
+;;	5	recip_approx
 ;;	8	popcnt
 ;;	12	mf
 ;;	13	cmpxchg_acq
@@ -950,8 +951,8 @@
   [(set_attr "type" "I")])
 
 (define_insn "extendsidi2"
-  [(set (match_operand:DI 0 "grfr_register_operand" "=r,*f")
-	(sign_extend:DI (match_operand:SI 1 "grfr_register_operand" "r,*f")))]
+  [(set (match_operand:DI 0 "grfr_register_operand" "=r,?f")
+	(sign_extend:DI (match_operand:SI 1 "grfr_register_operand" "r,f")))]
   ""
   "@
    sxt4 %0 = %1
@@ -979,14 +980,14 @@
   [(set_attr "type" "I,M")])
 
 (define_insn "zero_extendsidi2"
-  [(set (match_operand:DI 0 "grfr_register_operand" "=r,r,*f")
+  [(set (match_operand:DI 0 "grfr_register_operand" "=r,r,?f")
 	(zero_extend:DI
-	  (match_operand:SI 1 "grfr_nonimmediate_operand" "r,m,*f")))]
+	  (match_operand:SI 1 "grfr_nonimmediate_operand" "r,m,f")))]
   ""
   "@
    zxt4 %0 = %1
    ld4%O1 %0 = %1%P1
-   fsxt.r %0 = f1, %1%B0"
+   fmix.r %0 = f0, %1%B0"
   [(set_attr "type" "I,M,F")])
 
 ;; Convert between floating point types of different sizes.
@@ -997,34 +998,53 @@
 ;; would let combine merge the thing into adjacent insns.
 
 (define_insn_and_split "extendsfdf2"
-  [(set (match_operand:DF 0 "fr_register_operand" "=f,f")
-	(float_extend:DF (match_operand:SF 1 "fr_register_operand" "0,f")))]
+  [(set (match_operand:DF 0 "grfr_nonimmediate_operand" "=f,f,f,f,m,*r")
+	(float_extend:DF
+	  (match_operand:SF 1 "grfr_nonimmediate_operand" "0,f,m,*r,f,f")))]
   ""
-  "mov %0 = %1"
+  "@
+   mov %0 = %1
+   mov %0 = %1
+   ldfs %0 = %1%P1
+   setf.s %0 = %1
+   stfd %0 = %1%P0
+   getf.d %0 = %1"
   "reload_completed"
   [(set (match_dup 0) (float_extend:DF (match_dup 1)))]
   "if (true_regnum (operands[0]) == true_regnum (operands[1])) DONE;"
-  [(set_attr "type" "F")])
+  [(set_attr "type" "F,F,M,M,M,M")])
 
 (define_insn_and_split "extendsftf2"
-  [(set (match_operand:TF 0 "fr_register_operand" "=f,f")
-	(float_extend:TF (match_operand:SF 1 "fr_register_operand" "0,f")))]
+  [(set (match_operand:TF 0 "fr_nonimmediate_operand" "=f,f,f,f,Q")
+	(float_extend:TF
+	  (match_operand:SF 1 "grfr_nonimmediate_operand" "0,f,Q,*r,f")))]
   ""
-  "mov %0 = %1"
+  "@
+   mov %0 = %1
+   mov %0 = %1
+   ldfs %0 = %1%P1
+   setf.s %0 = %1
+   stfe %0 = %1%P0"
   "reload_completed"
   [(set (match_dup 0) (float_extend:TF (match_dup 1)))]
   "if (true_regnum (operands[0]) == true_regnum (operands[1])) DONE;"
-  [(set_attr "type" "F")])
+  [(set_attr "type" "F,F,M,M,M")])
 
 (define_insn_and_split "extenddftf2"
-  [(set (match_operand:TF 0 "fr_register_operand" "=f,f")
-	(float_extend:TF (match_operand:DF 1 "fr_register_operand" "0,f")))]
+  [(set (match_operand:TF 0 "fr_nonimmediate_operand" "=f,f,f,f,Q")
+	(float_extend:TF
+	  (match_operand:DF 1 "grfr_nonimmediate_operand" "0,f,Q,*r,f")))]
   ""
-  "mov %0 = %1"
+  "@
+   mov %0 = %1
+   mov %0 = %1
+   ldfd %0 = %1%P1
+   setf.d %0 = %1
+   stfe %0 = %1%P0"
   "reload_completed"
   [(set (match_dup 0) (float_extend:TF (match_dup 1)))]
   "if (true_regnum (operands[0]) == true_regnum (operands[1])) DONE;"
-  [(set_attr "type" "F")])
+  [(set_attr "type" "F,F,M,M,M")])
 
 (define_insn "truncdfsf2"
   [(set (match_operand:SF 0 "fr_register_operand" "=f")
@@ -1077,6 +1097,14 @@
   "fcvt.fx.trunc %0 = %1%B0"
   [(set_attr "type" "F")])
 
+(define_insn "fix_trunctfdi2_alts"
+  [(set (match_operand:DI 0 "fr_register_operand" "=f")
+	(fix:DI (match_operand:TF 1 "fr_register_operand" "f")))
+   (use (match_operand:SI 2 "const_int_operand" ""))]
+  ""
+  "fcvt.fx.trunc.s%2 %0 = %1%B0"
+  [(set_attr "type" "F")])
+
 ;; Convert between unsigned integer types and floating point.
 
 (define_insn "floatunsdisf2"
@@ -1120,6 +1148,14 @@
   ""
   "fcvt.fxu.trunc %0 = %1%B0"
   [(set_attr "type" "F")])
+
+(define_insn "fixuns_trunctfdi2_alts"
+  [(set (match_operand:DI 0 "fr_register_operand" "=f")
+	(unsigned_fix:DI (match_operand:TF 1 "fr_register_operand" "f")))
+   (use (match_operand:SI 2 "const_int_operand" ""))]
+  ""
+  "fcvt.fxu.trunc.s%2 %0 = %1%B0"
+  [(set_attr "type" "F")])
 
 ;; ::::::::::::::::::::
 ;; ::
@@ -1400,7 +1436,7 @@
   "xma.l %0 = %1, %2, f0%B0"
   [(set_attr "type" "F")])
 
-(define_insn "*maddsi3"
+(define_insn "maddsi4"
   [(set (match_operand:SI 0 "fr_register_operand" "=f")
 	(plus:SI (mult:SI (match_operand:SI 1 "grfr_register_operand" "f")
 			  (match_operand:SI 2 "grfr_register_operand" "f"))
@@ -1481,6 +1517,172 @@
   operands[3] = gen_reg_rtx (CCmode);
 }")
 
+(define_expand "divsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(div:SI (match_operand:SI 1 "general_operand" "")
+		(match_operand:SI 2 "general_operand" "")))]
+  "TARGET_INLINE_DIV"
+  "
+{
+  rtx op1_tf, op2_tf, op0_tf, op0_di, twon34;
+
+  op0_tf = gen_reg_rtx (TFmode);
+  op0_di = gen_reg_rtx (DImode);
+
+  if (CONSTANT_P (operands[1]))
+    operands[1] = force_reg (SImode, operands[1]);
+  op1_tf = gen_reg_rtx (TFmode);
+  expand_float (op1_tf, operands[1], 0);
+
+  if (CONSTANT_P (operands[2]))
+    operands[2] = force_reg (SImode, operands[2]);
+  op2_tf = gen_reg_rtx (TFmode);
+  expand_float (op2_tf, operands[2], 0);
+
+  /* 2^-34 */
+#if 0
+  twon34 = (CONST_DOUBLE_FROM_REAL_VALUE
+	    (REAL_VALUE_FROM_TARGET_SINGLE (0x2e800000), TFmode));
+  twon34 = force_reg (TFmode, twon34);
+#else
+  twon34 = gen_reg_rtx (TFmode);
+  convert_move (twon34, force_const_mem (SFmode, CONST_DOUBLE_FROM_REAL_VALUE (REAL_VALUE_FROM_TARGET_SINGLE (0x2e800000), SFmode)), 0);
+#endif
+
+  emit_insn (gen_divsi3_internal (op0_tf, op1_tf, op2_tf, twon34));
+
+  emit_insn (gen_fix_trunctfdi2_alts (op0_di, op0_tf, const1_rtx));
+  emit_move_insn (operands[0], gen_lowpart (SImode, op0_di));
+  DONE;
+}")
+
+(define_expand "modsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(mod:SI (match_operand:SI 1 "general_operand" "")
+		(match_operand:SI 2 "general_operand" "")))]
+  "TARGET_INLINE_DIV"
+  "
+{
+  rtx op2_neg, op1_di, div;
+
+  div = gen_reg_rtx (SImode);
+  emit_insn (gen_divsi3 (div, operands[1], operands[2]));
+
+  op2_neg = expand_unop (SImode, neg_optab, operands[2], NULL_RTX, 0);
+
+  /* This is a trick to get us to reuse the value that we're sure to
+     have already copied to the FP regs.  */
+  op1_di = gen_reg_rtx (DImode);
+  convert_move (op1_di, operands[1], 0);
+
+  emit_insn (gen_maddsi4 (operands[0], div, op2_neg,
+			  gen_lowpart (SImode, op1_di)));
+  DONE;
+}")
+
+(define_expand "udivsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(udiv:SI (match_operand:SI 1 "general_operand" "")
+		 (match_operand:SI 2 "general_operand" "")))]
+  "TARGET_INLINE_DIV"
+  "
+{
+  rtx op1_tf, op2_tf, op0_tf, op0_di, twon34;
+
+  op0_tf = gen_reg_rtx (TFmode);
+  op0_di = gen_reg_rtx (DImode);
+
+  if (CONSTANT_P (operands[1]))
+    operands[1] = force_reg (SImode, operands[1]);
+  op1_tf = gen_reg_rtx (TFmode);
+  expand_float (op1_tf, operands[1], 1);
+
+  if (CONSTANT_P (operands[2]))
+    operands[2] = force_reg (SImode, operands[2]);
+  op2_tf = gen_reg_rtx (TFmode);
+  expand_float (op2_tf, operands[2], 1);
+
+  /* 2^-34 */
+#if 0
+  twon34 = (CONST_DOUBLE_FROM_REAL_VALUE
+	    (REAL_VALUE_FROM_TARGET_SINGLE (0x2e800000), TFmode));
+  twon34 = force_reg (TFmode, twon34);
+#else
+  twon34 = gen_reg_rtx (TFmode);
+  convert_move (twon34, force_const_mem (SFmode, CONST_DOUBLE_FROM_REAL_VALUE (REAL_VALUE_FROM_TARGET_SINGLE (0x2e800000), SFmode)), 0);
+#endif
+
+  emit_insn (gen_divsi3_internal (op0_tf, op1_tf, op2_tf, twon34));
+
+  emit_insn (gen_fixuns_trunctfdi2_alts (op0_di, op0_tf, const1_rtx));
+  emit_move_insn (operands[0], gen_lowpart (SImode, op0_di));
+  DONE;
+}")
+
+(define_expand "umodsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(umod:SI (match_operand:SI 1 "general_operand" "")
+		 (match_operand:SI 2 "general_operand" "")))]
+  "TARGET_INLINE_DIV"
+  "
+{
+  rtx op2_neg, op1_di, div;
+
+  div = gen_reg_rtx (SImode);
+  emit_insn (gen_udivsi3 (div, operands[1], operands[2]));
+
+  op2_neg = expand_unop (SImode, neg_optab, operands[2], NULL_RTX, 0);
+
+  /* This is a trick to get us to reuse the value that we're sure to
+     have already copied to the FP regs.  */
+  op1_di = gen_reg_rtx (DImode);
+  convert_move (op1_di, operands[1], 1);
+
+  emit_insn (gen_maddsi4 (operands[0], div, op2_neg,
+			  gen_lowpart (SImode, op1_di)));
+  DONE;
+}")
+
+(define_insn_and_split "divsi3_internal"
+  [(set (match_operand:TF 0 "fr_register_operand" "=&f")
+	(float:TF (div:SI (match_operand:TF 1 "fr_register_operand" "f")
+			  (match_operand:TF 2 "fr_register_operand" "f"))))
+   (clobber (match_scratch:TF 4 "=&f"))
+   (clobber (match_scratch:TF 5 "=&f"))
+   (clobber (match_scratch:CC 6 "=c"))
+   (use (match_operand:TF 3 "fr_register_operand" "f"))]
+  "TARGET_INLINE_DIV"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (match_dup 0) (div:TF (const_int 1) (match_dup 2)))
+	      (set (match_dup 6) (unspec:CC [(match_dup 1) (match_dup 2)] 5))
+	      (use (const_int 1))])
+   (cond_exec (ne (match_dup 6) (const_int 0))
+     (parallel [(set (match_dup 4) (mult:TF (match_dup 1) (match_dup 0)))
+		(use (const_int 1))]))
+   (cond_exec (ne (match_dup 6) (const_int 0))
+     (parallel [(set (match_dup 5)
+		     (plus:TF (neg:TF (mult:TF (match_dup 2) (match_dup 0)))
+			      (match_dup 7)))
+		(use (const_int 1))]))
+   (cond_exec (ne (match_dup 6) (const_int 0))
+     (parallel [(set (match_dup 4)
+		     (plus:TF (mult:TF (match_dup 5) (match_dup 4))
+			      (match_dup 4)))
+		(use (const_int 1))]))
+   (cond_exec (ne (match_dup 6) (const_int 0))
+     (parallel [(set (match_dup 5)
+		     (plus:TF (mult:TF (match_dup 5) (match_dup 5))
+			      (match_dup 3)))
+		(use (const_int 1))]))
+   (cond_exec (ne (match_dup 6) (const_int 0))
+     (parallel [(set (match_dup 0)
+		     (plus:TF (mult:TF (match_dup 5) (match_dup 4))
+			      (match_dup 4)))
+		(use (const_int 1))]))
+  ] 
+  "operands[7] = CONST1_RTX (TFmode);"
+  [(set_attr "predicable" "no")])
 
 ;; ::::::::::::::::::::
 ;; ::
@@ -1557,7 +1759,7 @@
 
 ;; ??? Maybe we should change how adds are canonicalized.
 
-(define_insn "*madddi3"
+(define_insn "madddi4"
   [(set (match_operand:DI 0 "fr_register_operand" "=f")
 	(plus:DI (mult:DI (match_operand:DI 1 "grfr_register_operand" "f")
 			  (match_operand:DI 2 "grfr_register_operand" "f"))
@@ -1572,10 +1774,10 @@
 
 ;; We have to use nonmemory_operand for operand 4, to ensure that the
 ;; validate_changes call inside eliminate_regs will always succeed.  If it
-;; doesn't succeed, then this remain a madddi3 pattern, and will be reloaded
+;; doesn't succeed, then this remain a madddi4 pattern, and will be reloaded
 ;; incorrectly.
 
-(define_insn "*madddi3_elim"
+(define_insn "*madddi4_elim"
   [(set (match_operand:DI 0 "register_operand" "=&r")
 	(plus:DI (plus:DI (mult:DI (match_operand:DI 1 "register_operand" "f")
 				   (match_operand:DI 2 "register_operand" "f"))
@@ -1734,6 +1936,208 @@
   "popcnt %0 = %1"
   [(set_attr "type" "I")])
 
+(define_expand "divdi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(div:DI (match_operand:DI 1 "general_operand" "")
+		(match_operand:DI 2 "general_operand" "")))]
+  "TARGET_INLINE_DIV"
+  "
+{
+  rtx op1_tf, op2_tf, op0_tf;
+
+  op0_tf = gen_reg_rtx (TFmode);
+
+  if (CONSTANT_P (operands[1]))
+    operands[1] = force_reg (DImode, operands[1]);
+  op1_tf = gen_reg_rtx (TFmode);
+  expand_float (op1_tf, operands[1], 0);
+
+  if (CONSTANT_P (operands[2]))
+    operands[2] = force_reg (DImode, operands[2]);
+  op2_tf = gen_reg_rtx (TFmode);
+  expand_float (op2_tf, operands[2], 0);
+
+  if (TARGET_INLINE_DIV_LAT)
+    emit_insn (gen_divdi3_internal_lat (op0_tf, op1_tf, op2_tf));
+  else
+    emit_insn (gen_divdi3_internal_thr (op0_tf, op1_tf, op2_tf));
+
+  emit_insn (gen_fix_trunctfdi2_alts (operands[0], op0_tf, const1_rtx));
+  DONE;
+}")
+
+(define_expand "moddi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(mod:SI (match_operand:DI 1 "general_operand" "")
+		(match_operand:DI 2 "general_operand" "")))]
+  "TARGET_INLINE_DIV"
+  "
+{
+  rtx op2_neg, div;
+
+  div = gen_reg_rtx (DImode);
+  emit_insn (gen_divdi3 (div, operands[1], operands[2]));
+
+  op2_neg = expand_unop (DImode, neg_optab, operands[2], NULL_RTX, 0);
+
+  emit_insn (gen_madddi4 (operands[0], div, op2_neg, operands[1]));
+  DONE;
+}")
+
+(define_expand "udivdi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(udiv:DI (match_operand:DI 1 "general_operand" "")
+		 (match_operand:DI 2 "general_operand" "")))]
+  "TARGET_INLINE_DIV"
+  "
+{
+  rtx op1_tf, op2_tf, op0_tf;
+
+  op0_tf = gen_reg_rtx (TFmode);
+
+  if (CONSTANT_P (operands[1]))
+    operands[1] = force_reg (DImode, operands[1]);
+  op1_tf = gen_reg_rtx (TFmode);
+  expand_float (op1_tf, operands[1], 1);
+
+  if (CONSTANT_P (operands[2]))
+    operands[2] = force_reg (DImode, operands[2]);
+  op2_tf = gen_reg_rtx (TFmode);
+  expand_float (op2_tf, operands[2], 1);
+
+  if (TARGET_INLINE_DIV_LAT)
+    emit_insn (gen_divdi3_internal_lat (op0_tf, op1_tf, op2_tf));
+  else
+    emit_insn (gen_divdi3_internal_thr (op0_tf, op1_tf, op2_tf));
+
+  emit_insn (gen_fixuns_trunctfdi2_alts (operands[0], op0_tf, const1_rtx));
+  DONE;
+}")
+
+(define_expand "umoddi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(umod:DI (match_operand:DI 1 "general_operand" "")
+		 (match_operand:DI 2 "general_operand" "")))]
+  "TARGET_INLINE_DIV"
+  "
+{
+  rtx op2_neg, div;
+
+  div = gen_reg_rtx (DImode);
+  emit_insn (gen_udivdi3 (div, operands[1], operands[2]));
+
+  op2_neg = expand_unop (DImode, neg_optab, operands[2], NULL_RTX, 0);
+
+  emit_insn (gen_madddi4 (operands[0], div, op2_neg, operands[1]));
+  DONE;
+}")
+
+(define_insn_and_split "divdi3_internal_lat"
+  [(set (match_operand:TF 0 "fr_register_operand" "=&f")
+	(float:TF (div:SI (match_operand:TF 1 "fr_register_operand" "f")
+			  (match_operand:TF 2 "fr_register_operand" "f"))))
+   (clobber (match_scratch:TF 3 "=&f"))
+   (clobber (match_scratch:TF 4 "=&f"))
+   (clobber (match_scratch:TF 5 "=&f"))
+   (clobber (match_scratch:CC 6 "=c"))]
+  "TARGET_INLINE_DIV_LAT"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (match_dup 0) (div:TF (const_int 1) (match_dup 2)))
+	      (set (match_dup 6) (unspec:CC [(match_dup 1) (match_dup 2)] 5))
+	      (use (const_int 1))])
+   (cond_exec (ne (match_dup 6) (const_int 0))
+     (parallel [(set (match_dup 3)
+		     (plus:TF (neg:TF (mult:TF (match_dup 2) (match_dup 0)))
+			      (match_dup 7)))
+		(use (const_int 1))]))
+   (cond_exec (ne (match_dup 6) (const_int 0))
+     (parallel [(set (match_dup 4) (mult:TF (match_dup 1) (match_dup 0)))
+		(use (const_int 1))]))
+   (cond_exec (ne (match_dup 6) (const_int 0))
+     (parallel [(set (match_dup 5) (mult:TF (match_dup 3) (match_dup 3)))
+		(use (const_int 1))]))
+   (cond_exec (ne (match_dup 6) (const_int 0))
+     (parallel [(set (match_dup 4)
+		     (plus:TF (mult:TF (match_dup 3) (match_dup 4))
+			      (match_dup 4)))
+		(use (const_int 1))]))
+   (cond_exec (ne (match_dup 6) (const_int 0))
+     (parallel [(set (match_dup 0)
+		     (plus:TF (mult:TF (match_dup 3) (match_dup 0))
+			      (match_dup 0)))
+		(use (const_int 1))]))
+   (cond_exec (ne (match_dup 6) (const_int 0))
+     (parallel [(set (match_dup 3)
+		     (plus:TF (mult:TF (match_dup 5) (match_dup 4))
+			      (match_dup 4)))
+		(use (const_int 1))]))
+   (cond_exec (ne (match_dup 6) (const_int 0))
+     (parallel [(set (match_dup 0)
+		     (plus:TF (mult:TF (match_dup 5) (match_dup 0))
+			      (match_dup 0)))
+		(use (const_int 1))]))
+   (cond_exec (ne (match_dup 6) (const_int 0))
+     (parallel [(set (match_dup 4)
+		     (plus:TF (neg:TF (mult:TF (match_dup 2) (match_dup 3)))
+			      (match_dup 1)))
+		(use (const_int 1))]))
+   (cond_exec (ne (match_dup 6) (const_int 0))
+     (parallel [(set (match_dup 0)
+		     (plus:TF (mult:TF (match_dup 4) (match_dup 0))
+			      (match_dup 3)))
+		(use (const_int 1))]))
+  ] 
+  "operands[7] = CONST1_RTX (TFmode);"
+  [(set_attr "predicable" "no")])
+
+(define_insn_and_split "divdi3_internal_thr"
+  [(set (match_operand:TF 0 "fr_register_operand" "=&f")
+	(float:TF (div:SI (match_operand:TF 1 "fr_register_operand" "f")
+			  (match_operand:TF 2 "fr_register_operand" "f"))))
+   (clobber (match_scratch:TF 3 "=&f"))
+   (clobber (match_scratch:TF 4 "=f"))
+   (clobber (match_scratch:CC 5 "=c"))]
+  "TARGET_INLINE_DIV_THR"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (match_dup 0) (div:TF (const_int 1) (match_dup 2)))
+	      (set (match_dup 5) (unspec:CC [(match_dup 1) (match_dup 2)] 5))
+	      (use (const_int 1))])
+   (cond_exec (ne (match_dup 5) (const_int 0))
+     (parallel [(set (match_dup 3)
+		     (plus:TF (neg:TF (mult:TF (match_dup 2) (match_dup 0)))
+			      (match_dup 6)))
+		(use (const_int 1))]))
+   (cond_exec (ne (match_dup 5) (const_int 0))
+     (parallel [(set (match_dup 0)
+		     (plus:TF (mult:TF (match_dup 3) (match_dup 0))
+			      (match_dup 0)))
+		(use (const_int 1))]))
+   (cond_exec (ne (match_dup 5) (const_int 0))
+     (parallel [(set (match_dup 3) (mult:TF (match_dup 3) (match_dup 3)))
+		(use (const_int 1))]))
+   (cond_exec (ne (match_dup 5) (const_int 0))
+     (parallel [(set (match_dup 0)
+		     (plus:TF (mult:TF (match_dup 3) (match_dup 0))
+			      (match_dup 0)))
+		(use (const_int 1))]))
+   (cond_exec (ne (match_dup 5) (const_int 0))
+     (parallel [(set (match_dup 3) (mult:TF (match_dup 0) (match_dup 1)))
+		(use (const_int 1))]))
+   (cond_exec (ne (match_dup 5) (const_int 0))
+     (parallel [(set (match_dup 4)
+		     (plus:TF (neg:TF (mult:TF (match_dup 2) (match_dup 3)))
+			      (match_dup 1)))
+		(use (const_int 1))]))
+   (cond_exec (ne (match_dup 5) (const_int 0))
+     (parallel [(set (match_dup 0)
+		     (plus:TF (mult:TF (match_dup 4) (match_dup 0))
+			      (match_dup 3)))
+		(use (const_int 1))]))
+  ] 
+  "operands[6] = CONST1_RTX (TFmode);"
+  [(set_attr "predicable" "no")])
 
 ;; ::::::::::::::::::::
 ;; ::
@@ -1802,7 +2206,7 @@
   "fmax %0 = %1, %F2%B0"
   [(set_attr "type" "F")])
 
-(define_insn "*maddsf3"
+(define_insn "*maddsf4"
   [(set (match_operand:SF 0 "fr_register_operand" "=f")
 	(plus:SF (mult:SF (match_operand:SF 1 "fr_register_operand" "f")
 			  (match_operand:SF 2 "fr_register_operand" "f"))
@@ -1811,7 +2215,7 @@
   "fma.s %0 = %1, %2, %F3%B0"
   [(set_attr "type" "F")])
 
-(define_insn "*msubsf3"
+(define_insn "*msubsf4"
   [(set (match_operand:SF 0 "fr_register_operand" "=f")
 	(minus:SF (mult:SF (match_operand:SF 1 "fr_register_operand" "f")
 			   (match_operand:SF 2 "fr_register_operand" "f"))
@@ -1830,7 +2234,7 @@
 
 ;; ??? Is it possible to canonicalize this as (minus (reg) (mult))?
 
-(define_insn "*nmaddsf3"
+(define_insn "*nmaddsf4"
   [(set (match_operand:SF 0 "fr_register_operand" "=f")
 	(plus:SF (neg:SF (mult:SF (match_operand:SF 1 "fr_register_operand" "f")
 				  (match_operand:SF 2 "fr_register_operand" "f")))
@@ -1907,7 +2311,7 @@
   "fmax %0 = %1, %F2%B0"
   [(set_attr "type" "F")])
 
-(define_insn "*madddf3"
+(define_insn "*madddf4"
   [(set (match_operand:DF 0 "fr_register_operand" "=f")
 	(plus:DF (mult:DF (match_operand:DF 1 "fr_register_operand" "f")
 			  (match_operand:DF 2 "fr_register_operand" "f"))
@@ -1916,7 +2320,7 @@
   "fma.d %0 = %1, %2, %F3%B0"
   [(set_attr "type" "F")])
 
-(define_insn "*msubdf3"
+(define_insn "*msubdf4"
   [(set (match_operand:DF 0 "fr_register_operand" "=f")
 	(minus:DF (mult:DF (match_operand:DF 1 "fr_register_operand" "f")
 			   (match_operand:DF 2 "fr_register_operand" "f"))
@@ -1935,7 +2339,7 @@
 
 ;; ??? Is it possible to canonicalize this as (minus (reg) (mult))?
 
-(define_insn "*nmadddf3"
+(define_insn "*nmadddf4"
   [(set (match_operand:DF 0 "fr_register_operand" "=f")
 	(plus:DF (neg:DF (mult:DF (match_operand:DF 1 "fr_register_operand" "f")
 				  (match_operand:DF 2 "fr_register_operand" "f")))
@@ -1974,6 +2378,15 @@
   "fmpy %0 = %F1, %F2%B0"
   [(set_attr "type" "F")])
 
+(define_insn "*multf3_alts"
+  [(set (match_operand:TF 0 "fr_register_operand" "=f")
+	(mult:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG")
+		 (match_operand:TF 2 "tfreg_or_fp01_operand" "fG")))
+   (use (match_operand:SI 3 "const_int_operand" ""))]
+  ""
+  "fmpy.s%3 %0 = %F1, %F2%B0"
+  [(set_attr "type" "F")])
+
 (define_insn "abstf2"
   [(set (match_operand:TF 0 "fr_register_operand" "=f")
 	(abs:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG")))]
@@ -2011,7 +2424,7 @@
   "fmax %0 = %F1, %F2%B0"
   [(set_attr "type" "F")])
 
-(define_insn "*maddtf3"
+(define_insn "*maddtf4"
   [(set (match_operand:TF 0 "fr_register_operand" "=f")
 	(plus:TF (mult:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG")
 			  (match_operand:TF 2 "tfreg_or_fp01_operand" "fG"))
@@ -2020,7 +2433,17 @@
   "fma %0 = %F1, %F2, %F3%B0"
   [(set_attr "type" "F")])
 
-(define_insn "*msubtf3"
+(define_insn "*maddtf4_alts"
+  [(set (match_operand:TF 0 "fr_register_operand" "=f")
+	(plus:TF (mult:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG")
+			  (match_operand:TF 2 "tfreg_or_fp01_operand" "fG"))
+		 (match_operand:TF 3 "tfreg_or_fp01_operand" "fG")))
+   (use (match_operand:SI 4 "const_int_operand" ""))]
+  ""
+  "fma.s%4 %0 = %F1, %F2, %F3%B0"
+  [(set_attr "type" "F")])
+
+(define_insn "*msubtf4"
   [(set (match_operand:TF 0 "fr_register_operand" "=f")
 	(minus:TF (mult:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG")
 			   (match_operand:TF 2 "tfreg_or_fp01_operand" "fG"))
@@ -2039,7 +2462,7 @@
 
 ;; ??? Is it possible to canonicalize this as (minus (reg) (mult))?
 
-(define_insn "*nmaddtf3"
+(define_insn "*nmaddtf4"
   [(set (match_operand:TF 0 "fr_register_operand" "=f")
 	(plus:TF (neg:TF (mult:TF
 			  (match_operand:TF 1 "tfreg_or_fp01_operand" "fG")
@@ -2048,6 +2471,29 @@
   ""
   "fnma %0 = %F1, %F2, %F3%B0"
   [(set_attr "type" "F")])
+
+(define_insn "*nmaddtf4_alts"
+  [(set (match_operand:TF 0 "fr_register_operand" "=f")
+	(plus:TF (neg:TF (mult:TF
+			  (match_operand:TF 1 "tfreg_or_fp01_operand" "fG")
+			  (match_operand:TF 2 "tfreg_or_fp01_operand" "fG")))
+		 (match_operand:TF 3 "tfreg_or_fp01_operand" "fG")))
+   (use (match_operand:SI 4 "const_int_operand" ""))]
+  ""
+  "fnma.s%4 %0 = %F1, %F2, %F3%B0"
+  [(set_attr "type" "F")])
+
+(define_insn "*recip_approx"
+  [(set (match_operand:TF 0 "fr_register_operand" "=f")
+	(div:TF (const_int 1)
+		(match_operand:TF 3 "fr_register_operand" "f")))
+   (set (match_operand:CC 1 "register_operand" "=c")
+	(unspec:CC [(match_operand:TF 2 "fr_register_operand" "f")
+		    (match_dup 3)] 5))
+   (use (match_operand:SI 4 "const_int_operand" ""))]
+  ""
+  "frcpa.s%4 %0, %1 = %2, %3"
+  [(set_attr "type" "F")])
 
 ;; ::::::::::::::::::::
 ;; ::
-- 
2.30.2