From 655f2eb93e47f4996700fe6dd0524a151504144c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 11 Sep 2000 14:15:50 -0700 Subject: [PATCH] ia64-protos.h (fr_nonimmediate_operand): Declare. * config/ia64/ia64-protos.h (fr_nonimmediate_operand): Declare. * config/ia64/ia64.c (fr_nonimmediate_operand): New. (ia64_override_options): Prevent optimizing division for both latency and throughput. (rtx_needs_barrier): Handle frcpa. * config/ia64/ia64.h (MASK_INLINE_DIV_LAT): New. (MASK_INLINE_DIV_THR, TARGET_INLINE_DIV_LAT): New. (TARGET_INLINE_DIV_THR, TARGET_INLINE_DIV): New. (TARGET_SWITCHES): Add -minline-divide-min-latency and -minline-divide-max-throughput. (PREDICATE_CODES): Update. * config/ia64/ia64.md (extendsidi2): Remove * from f case. (zero_extendsidi2): Likewise. Fix typo in f case insn. (extendsfdf2): Add cases for gr<->fr and fr<->mem. (extendsftf2): Likewise. (extenddftf2): Likewise. (fix_trunctfdi2_alts): New. (fixuns_trunctfdi2_alts): New. (madd*4): Rename from madd*3. (divsi3, modsi3, udivsi3, umodsi3): New. (divsi3_internal): New. (divdi3, moddi3, udivdi3, umoddi3): New. (divdi3_internal_lat, divdi3_internal_thr): New. (multf3_alts, maddtf4_alts, nmaddtf4_alts): New. (recip_approx): New. From-SVN: r36330 --- gcc/ChangeLog | 28 ++ gcc/config/ia64/ia64-protos.h | 1 + gcc/config/ia64/ia64.c | 31 +++ gcc/config/ia64/ia64.h | 16 ++ gcc/config/ia64/ia64.md | 506 ++++++++++++++++++++++++++++++++-- 5 files changed, 552 insertions(+), 30 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 1f902ca8529..31d94b4b8f5 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,31 @@ +2000-09-11 Richard Henderson + + * config/ia64/ia64-protos.h (fr_nonimmediate_operand): Declare. + * config/ia64/ia64.c (fr_nonimmediate_operand): New. + (ia64_override_options): Prevent optimizing division for both + latency and throughput. + (rtx_needs_barrier): Handle frcpa. + * config/ia64/ia64.h (MASK_INLINE_DIV_LAT): New. + (MASK_INLINE_DIV_THR, TARGET_INLINE_DIV_LAT): New. + (TARGET_INLINE_DIV_THR, TARGET_INLINE_DIV): New. + (TARGET_SWITCHES): Add -minline-divide-min-latency and + -minline-divide-max-throughput. + (PREDICATE_CODES): Update. + * config/ia64/ia64.md (extendsidi2): Remove * from f case. + (zero_extendsidi2): Likewise. Fix typo in f case insn. + (extendsfdf2): Add cases for gr<->fr and fr<->mem. + (extendsftf2): Likewise. + (extenddftf2): Likewise. + (fix_trunctfdi2_alts): New. + (fixuns_trunctfdi2_alts): New. + (madd*4): Rename from madd*3. + (divsi3, modsi3, udivsi3, umodsi3): New. + (divsi3_internal): New. + (divdi3, moddi3, udivdi3, umoddi3): New. + (divdi3_internal_lat, divdi3_internal_thr): New. + (multf3_alts, maddtf4_alts, nmaddtf4_alts): New. + (recip_approx): New. + 2000-09-11 Alexandre Oliva * print-rtl.c (debug_call_placeholder_verbose): New variable. diff --git a/gcc/config/ia64/ia64-protos.h b/gcc/config/ia64/ia64-protos.h index 36a67eb26ac..d213b4180f8 100644 --- a/gcc/config/ia64/ia64-protos.h +++ b/gcc/config/ia64/ia64-protos.h @@ -38,6 +38,7 @@ extern int gr_register_operand PARAMS((rtx, enum machine_mode)); extern int fr_register_operand PARAMS((rtx, enum machine_mode)); extern int grfr_register_operand PARAMS((rtx, enum machine_mode)); extern int gr_nonimmediate_operand PARAMS((rtx, enum machine_mode)); +extern int fr_nonimmediate_operand PARAMS((rtx, enum machine_mode)); extern int grfr_nonimmediate_operand PARAMS((rtx, enum machine_mode)); extern int gr_reg_or_0_operand PARAMS((rtx, enum machine_mode)); extern int gr_reg_or_5bit_operand PARAMS((rtx, enum machine_mode)); diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c index 7b422351b96..57de975c870 100644 --- a/gcc/config/ia64/ia64.c +++ b/gcc/config/ia64/ia64.c @@ -397,6 +397,26 @@ gr_nonimmediate_operand (op, mode) return 1; } +/* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */ + +int +fr_nonimmediate_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + if (! nonimmediate_operand (op, mode)) + return 0; + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + if (GET_CODE (op) == REG) + { + unsigned int regno = REGNO (op); + if (regno < FIRST_PSEUDO_REGISTER) + return FR_REGNO_P (regno); + } + return 1; +} + /* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */ int @@ -3484,6 +3504,12 @@ ia64_override_options () if (TARGET_AUTO_PIC) target_flags |= MASK_CONST_GP; + if (TARGET_INLINE_DIV_LAT && TARGET_INLINE_DIV_THR) + { + warning ("cannot optimize division for both latency and throughput"); + target_flags &= ~MASK_INLINE_DIV_THR; + } + if (ia64_fixed_range_string) fix_range (ia64_fixed_range_string); @@ -3971,6 +3997,11 @@ rtx_needs_barrier (x, flags, pred) case 21: /* flushrs */ break; + case 5: /* recip_approx */ + need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred); + need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred); + break; + case 13: /* cmpxchg_acq */ need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred); need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred); diff --git a/gcc/config/ia64/ia64.h b/gcc/config/ia64/ia64.h index 86463b4e029..1f975f3913d 100644 --- a/gcc/config/ia64/ia64.h +++ b/gcc/config/ia64/ia64.h @@ -63,6 +63,10 @@ extern int target_flags; #define MASK_AUTO_PIC 0x00000200 /* generate automatically PIC */ +#define MASK_INLINE_DIV_LAT 0x00000400 /* inline div, min latency. */ + +#define MASK_INLINE_DIV_THR 0x00000800 /* inline div, max throughput. */ + #define MASK_DWARF2_ASM 0x40000000 /* test dwarf2 line info via gas. */ #define TARGET_BIG_ENDIAN (target_flags & MASK_BIG_ENDIAN) @@ -85,6 +89,13 @@ extern int target_flags; #define TARGET_AUTO_PIC (target_flags & MASK_AUTO_PIC) +#define TARGET_INLINE_DIV_LAT (target_flags & MASK_INLINE_DIV_LAT) + +#define TARGET_INLINE_DIV_THR (target_flags & MASK_INLINE_DIV_THR) + +#define TARGET_INLINE_DIV \ + (target_flags & (MASK_INLINE_DIV_LAT | MASK_INLINE_DIV_THR)) + #define TARGET_DWARF2_ASM (target_flags & MASK_DWARF2_ASM) /* This macro defines names of command options to set and clear bits in @@ -123,6 +134,10 @@ extern int target_flags; N_("gp is constant (but save/restore gp on indirect calls)") }, \ { "auto-pic", MASK_AUTO_PIC, \ N_("Generate self-relocatable code") }, \ + { "inline-divide-min-latency", MASK_INLINE_DIV_LAT, \ + N_("Generate inline division, optimize for latency") }, \ + { "inline-divide-max-throughput", MASK_INLINE_DIV_THR, \ + N_("Generate inline division, optimize for throughput") }, \ { "dwarf2-asm", MASK_DWARF2_ASM, \ N_("Enable Dwarf 2 line debug info via GNU as")}, \ { "no-dwarf2-asm", -MASK_DWARF2_ASM, \ @@ -2646,6 +2661,7 @@ do { \ { "fr_register_operand", {SUBREG, REG}}, \ { "grfr_register_operand", {SUBREG, REG}}, \ { "gr_nonimmediate_operand", {SUBREG, REG, MEM}}, \ +{ "fr_nonimmediate_operand", {SUBREG, REG, MEM}}, \ { "grfr_nonimmediate_operand", {SUBREG, REG, MEM}}, \ { "gr_reg_or_0_operand", {SUBREG, REG, CONST_INT}}, \ { "gr_reg_or_5bit_operand", {SUBREG, REG, CONST_INT, CONSTANT_P_RTX}}, \ diff --git a/gcc/config/ia64/ia64.md b/gcc/config/ia64/ia64.md index 129ce240316..8593bf62054 100644 --- a/gcc/config/ia64/ia64.md +++ b/gcc/config/ia64/ia64.md @@ -57,6 +57,7 @@ ;; 2 gr_restore ;; 3 fr_spill ;; 4 fr_restore +;; 5 recip_approx ;; 8 popcnt ;; 12 mf ;; 13 cmpxchg_acq @@ -950,8 +951,8 @@ [(set_attr "type" "I")]) (define_insn "extendsidi2" - [(set (match_operand:DI 0 "grfr_register_operand" "=r,*f") - (sign_extend:DI (match_operand:SI 1 "grfr_register_operand" "r,*f")))] + [(set (match_operand:DI 0 "grfr_register_operand" "=r,?f") + (sign_extend:DI (match_operand:SI 1 "grfr_register_operand" "r,f")))] "" "@ sxt4 %0 = %1 @@ -979,14 +980,14 @@ [(set_attr "type" "I,M")]) (define_insn "zero_extendsidi2" - [(set (match_operand:DI 0 "grfr_register_operand" "=r,r,*f") + [(set (match_operand:DI 0 "grfr_register_operand" "=r,r,?f") (zero_extend:DI - (match_operand:SI 1 "grfr_nonimmediate_operand" "r,m,*f")))] + (match_operand:SI 1 "grfr_nonimmediate_operand" "r,m,f")))] "" "@ zxt4 %0 = %1 ld4%O1 %0 = %1%P1 - fsxt.r %0 = f1, %1%B0" + fmix.r %0 = f0, %1%B0" [(set_attr "type" "I,M,F")]) ;; Convert between floating point types of different sizes. @@ -997,34 +998,53 @@ ;; would let combine merge the thing into adjacent insns. (define_insn_and_split "extendsfdf2" - [(set (match_operand:DF 0 "fr_register_operand" "=f,f") - (float_extend:DF (match_operand:SF 1 "fr_register_operand" "0,f")))] + [(set (match_operand:DF 0 "grfr_nonimmediate_operand" "=f,f,f,f,m,*r") + (float_extend:DF + (match_operand:SF 1 "grfr_nonimmediate_operand" "0,f,m,*r,f,f")))] "" - "mov %0 = %1" + "@ + mov %0 = %1 + mov %0 = %1 + ldfs %0 = %1%P1 + setf.s %0 = %1 + stfd %0 = %1%P0 + getf.d %0 = %1" "reload_completed" [(set (match_dup 0) (float_extend:DF (match_dup 1)))] "if (true_regnum (operands[0]) == true_regnum (operands[1])) DONE;" - [(set_attr "type" "F")]) + [(set_attr "type" "F,F,M,M,M,M")]) (define_insn_and_split "extendsftf2" - [(set (match_operand:TF 0 "fr_register_operand" "=f,f") - (float_extend:TF (match_operand:SF 1 "fr_register_operand" "0,f")))] + [(set (match_operand:TF 0 "fr_nonimmediate_operand" "=f,f,f,f,Q") + (float_extend:TF + (match_operand:SF 1 "grfr_nonimmediate_operand" "0,f,Q,*r,f")))] "" - "mov %0 = %1" + "@ + mov %0 = %1 + mov %0 = %1 + ldfs %0 = %1%P1 + setf.s %0 = %1 + stfe %0 = %1%P0" "reload_completed" [(set (match_dup 0) (float_extend:TF (match_dup 1)))] "if (true_regnum (operands[0]) == true_regnum (operands[1])) DONE;" - [(set_attr "type" "F")]) + [(set_attr "type" "F,F,M,M,M")]) (define_insn_and_split "extenddftf2" - [(set (match_operand:TF 0 "fr_register_operand" "=f,f") - (float_extend:TF (match_operand:DF 1 "fr_register_operand" "0,f")))] + [(set (match_operand:TF 0 "fr_nonimmediate_operand" "=f,f,f,f,Q") + (float_extend:TF + (match_operand:DF 1 "grfr_nonimmediate_operand" "0,f,Q,*r,f")))] "" - "mov %0 = %1" + "@ + mov %0 = %1 + mov %0 = %1 + ldfd %0 = %1%P1 + setf.d %0 = %1 + stfe %0 = %1%P0" "reload_completed" [(set (match_dup 0) (float_extend:TF (match_dup 1)))] "if (true_regnum (operands[0]) == true_regnum (operands[1])) DONE;" - [(set_attr "type" "F")]) + [(set_attr "type" "F,F,M,M,M")]) (define_insn "truncdfsf2" [(set (match_operand:SF 0 "fr_register_operand" "=f") @@ -1077,6 +1097,14 @@ "fcvt.fx.trunc %0 = %1%B0" [(set_attr "type" "F")]) +(define_insn "fix_trunctfdi2_alts" + [(set (match_operand:DI 0 "fr_register_operand" "=f") + (fix:DI (match_operand:TF 1 "fr_register_operand" "f"))) + (use (match_operand:SI 2 "const_int_operand" ""))] + "" + "fcvt.fx.trunc.s%2 %0 = %1%B0" + [(set_attr "type" "F")]) + ;; Convert between unsigned integer types and floating point. (define_insn "floatunsdisf2" @@ -1120,6 +1148,14 @@ "" "fcvt.fxu.trunc %0 = %1%B0" [(set_attr "type" "F")]) + +(define_insn "fixuns_trunctfdi2_alts" + [(set (match_operand:DI 0 "fr_register_operand" "=f") + (unsigned_fix:DI (match_operand:TF 1 "fr_register_operand" "f"))) + (use (match_operand:SI 2 "const_int_operand" ""))] + "" + "fcvt.fxu.trunc.s%2 %0 = %1%B0" + [(set_attr "type" "F")]) ;; :::::::::::::::::::: ;; :: @@ -1400,7 +1436,7 @@ "xma.l %0 = %1, %2, f0%B0" [(set_attr "type" "F")]) -(define_insn "*maddsi3" +(define_insn "maddsi4" [(set (match_operand:SI 0 "fr_register_operand" "=f") (plus:SI (mult:SI (match_operand:SI 1 "grfr_register_operand" "f") (match_operand:SI 2 "grfr_register_operand" "f")) @@ -1481,6 +1517,172 @@ operands[3] = gen_reg_rtx (CCmode); }") +(define_expand "divsi3" + [(set (match_operand:SI 0 "register_operand" "") + (div:SI (match_operand:SI 1 "general_operand" "") + (match_operand:SI 2 "general_operand" "")))] + "TARGET_INLINE_DIV" + " +{ + rtx op1_tf, op2_tf, op0_tf, op0_di, twon34; + + op0_tf = gen_reg_rtx (TFmode); + op0_di = gen_reg_rtx (DImode); + + if (CONSTANT_P (operands[1])) + operands[1] = force_reg (SImode, operands[1]); + op1_tf = gen_reg_rtx (TFmode); + expand_float (op1_tf, operands[1], 0); + + if (CONSTANT_P (operands[2])) + operands[2] = force_reg (SImode, operands[2]); + op2_tf = gen_reg_rtx (TFmode); + expand_float (op2_tf, operands[2], 0); + + /* 2^-34 */ +#if 0 + twon34 = (CONST_DOUBLE_FROM_REAL_VALUE + (REAL_VALUE_FROM_TARGET_SINGLE (0x2e800000), TFmode)); + twon34 = force_reg (TFmode, twon34); +#else + twon34 = gen_reg_rtx (TFmode); + convert_move (twon34, force_const_mem (SFmode, CONST_DOUBLE_FROM_REAL_VALUE (REAL_VALUE_FROM_TARGET_SINGLE (0x2e800000), SFmode)), 0); +#endif + + emit_insn (gen_divsi3_internal (op0_tf, op1_tf, op2_tf, twon34)); + + emit_insn (gen_fix_trunctfdi2_alts (op0_di, op0_tf, const1_rtx)); + emit_move_insn (operands[0], gen_lowpart (SImode, op0_di)); + DONE; +}") + +(define_expand "modsi3" + [(set (match_operand:SI 0 "register_operand" "") + (mod:SI (match_operand:SI 1 "general_operand" "") + (match_operand:SI 2 "general_operand" "")))] + "TARGET_INLINE_DIV" + " +{ + rtx op2_neg, op1_di, div; + + div = gen_reg_rtx (SImode); + emit_insn (gen_divsi3 (div, operands[1], operands[2])); + + op2_neg = expand_unop (SImode, neg_optab, operands[2], NULL_RTX, 0); + + /* This is a trick to get us to reuse the value that we're sure to + have already copied to the FP regs. */ + op1_di = gen_reg_rtx (DImode); + convert_move (op1_di, operands[1], 0); + + emit_insn (gen_maddsi4 (operands[0], div, op2_neg, + gen_lowpart (SImode, op1_di))); + DONE; +}") + +(define_expand "udivsi3" + [(set (match_operand:SI 0 "register_operand" "") + (udiv:SI (match_operand:SI 1 "general_operand" "") + (match_operand:SI 2 "general_operand" "")))] + "TARGET_INLINE_DIV" + " +{ + rtx op1_tf, op2_tf, op0_tf, op0_di, twon34; + + op0_tf = gen_reg_rtx (TFmode); + op0_di = gen_reg_rtx (DImode); + + if (CONSTANT_P (operands[1])) + operands[1] = force_reg (SImode, operands[1]); + op1_tf = gen_reg_rtx (TFmode); + expand_float (op1_tf, operands[1], 1); + + if (CONSTANT_P (operands[2])) + operands[2] = force_reg (SImode, operands[2]); + op2_tf = gen_reg_rtx (TFmode); + expand_float (op2_tf, operands[2], 1); + + /* 2^-34 */ +#if 0 + twon34 = (CONST_DOUBLE_FROM_REAL_VALUE + (REAL_VALUE_FROM_TARGET_SINGLE (0x2e800000), TFmode)); + twon34 = force_reg (TFmode, twon34); +#else + twon34 = gen_reg_rtx (TFmode); + convert_move (twon34, force_const_mem (SFmode, CONST_DOUBLE_FROM_REAL_VALUE (REAL_VALUE_FROM_TARGET_SINGLE (0x2e800000), SFmode)), 0); +#endif + + emit_insn (gen_divsi3_internal (op0_tf, op1_tf, op2_tf, twon34)); + + emit_insn (gen_fixuns_trunctfdi2_alts (op0_di, op0_tf, const1_rtx)); + emit_move_insn (operands[0], gen_lowpart (SImode, op0_di)); + DONE; +}") + +(define_expand "umodsi3" + [(set (match_operand:SI 0 "register_operand" "") + (umod:SI (match_operand:SI 1 "general_operand" "") + (match_operand:SI 2 "general_operand" "")))] + "TARGET_INLINE_DIV" + " +{ + rtx op2_neg, op1_di, div; + + div = gen_reg_rtx (SImode); + emit_insn (gen_udivsi3 (div, operands[1], operands[2])); + + op2_neg = expand_unop (SImode, neg_optab, operands[2], NULL_RTX, 0); + + /* This is a trick to get us to reuse the value that we're sure to + have already copied to the FP regs. */ + op1_di = gen_reg_rtx (DImode); + convert_move (op1_di, operands[1], 1); + + emit_insn (gen_maddsi4 (operands[0], div, op2_neg, + gen_lowpart (SImode, op1_di))); + DONE; +}") + +(define_insn_and_split "divsi3_internal" + [(set (match_operand:TF 0 "fr_register_operand" "=&f") + (float:TF (div:SI (match_operand:TF 1 "fr_register_operand" "f") + (match_operand:TF 2 "fr_register_operand" "f")))) + (clobber (match_scratch:TF 4 "=&f")) + (clobber (match_scratch:TF 5 "=&f")) + (clobber (match_scratch:CC 6 "=c")) + (use (match_operand:TF 3 "fr_register_operand" "f"))] + "TARGET_INLINE_DIV" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 0) (div:TF (const_int 1) (match_dup 2))) + (set (match_dup 6) (unspec:CC [(match_dup 1) (match_dup 2)] 5)) + (use (const_int 1))]) + (cond_exec (ne (match_dup 6) (const_int 0)) + (parallel [(set (match_dup 4) (mult:TF (match_dup 1) (match_dup 0))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 6) (const_int 0)) + (parallel [(set (match_dup 5) + (plus:TF (neg:TF (mult:TF (match_dup 2) (match_dup 0))) + (match_dup 7))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 6) (const_int 0)) + (parallel [(set (match_dup 4) + (plus:TF (mult:TF (match_dup 5) (match_dup 4)) + (match_dup 4))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 6) (const_int 0)) + (parallel [(set (match_dup 5) + (plus:TF (mult:TF (match_dup 5) (match_dup 5)) + (match_dup 3))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 6) (const_int 0)) + (parallel [(set (match_dup 0) + (plus:TF (mult:TF (match_dup 5) (match_dup 4)) + (match_dup 4))) + (use (const_int 1))])) + ] + "operands[7] = CONST1_RTX (TFmode);" + [(set_attr "predicable" "no")]) ;; :::::::::::::::::::: ;; :: @@ -1557,7 +1759,7 @@ ;; ??? Maybe we should change how adds are canonicalized. -(define_insn "*madddi3" +(define_insn "madddi4" [(set (match_operand:DI 0 "fr_register_operand" "=f") (plus:DI (mult:DI (match_operand:DI 1 "grfr_register_operand" "f") (match_operand:DI 2 "grfr_register_operand" "f")) @@ -1572,10 +1774,10 @@ ;; We have to use nonmemory_operand for operand 4, to ensure that the ;; validate_changes call inside eliminate_regs will always succeed. If it -;; doesn't succeed, then this remain a madddi3 pattern, and will be reloaded +;; doesn't succeed, then this remain a madddi4 pattern, and will be reloaded ;; incorrectly. -(define_insn "*madddi3_elim" +(define_insn "*madddi4_elim" [(set (match_operand:DI 0 "register_operand" "=&r") (plus:DI (plus:DI (mult:DI (match_operand:DI 1 "register_operand" "f") (match_operand:DI 2 "register_operand" "f")) @@ -1734,6 +1936,208 @@ "popcnt %0 = %1" [(set_attr "type" "I")]) +(define_expand "divdi3" + [(set (match_operand:DI 0 "register_operand" "") + (div:DI (match_operand:DI 1 "general_operand" "") + (match_operand:DI 2 "general_operand" "")))] + "TARGET_INLINE_DIV" + " +{ + rtx op1_tf, op2_tf, op0_tf; + + op0_tf = gen_reg_rtx (TFmode); + + if (CONSTANT_P (operands[1])) + operands[1] = force_reg (DImode, operands[1]); + op1_tf = gen_reg_rtx (TFmode); + expand_float (op1_tf, operands[1], 0); + + if (CONSTANT_P (operands[2])) + operands[2] = force_reg (DImode, operands[2]); + op2_tf = gen_reg_rtx (TFmode); + expand_float (op2_tf, operands[2], 0); + + if (TARGET_INLINE_DIV_LAT) + emit_insn (gen_divdi3_internal_lat (op0_tf, op1_tf, op2_tf)); + else + emit_insn (gen_divdi3_internal_thr (op0_tf, op1_tf, op2_tf)); + + emit_insn (gen_fix_trunctfdi2_alts (operands[0], op0_tf, const1_rtx)); + DONE; +}") + +(define_expand "moddi3" + [(set (match_operand:DI 0 "register_operand" "") + (mod:SI (match_operand:DI 1 "general_operand" "") + (match_operand:DI 2 "general_operand" "")))] + "TARGET_INLINE_DIV" + " +{ + rtx op2_neg, div; + + div = gen_reg_rtx (DImode); + emit_insn (gen_divdi3 (div, operands[1], operands[2])); + + op2_neg = expand_unop (DImode, neg_optab, operands[2], NULL_RTX, 0); + + emit_insn (gen_madddi4 (operands[0], div, op2_neg, operands[1])); + DONE; +}") + +(define_expand "udivdi3" + [(set (match_operand:DI 0 "register_operand" "") + (udiv:DI (match_operand:DI 1 "general_operand" "") + (match_operand:DI 2 "general_operand" "")))] + "TARGET_INLINE_DIV" + " +{ + rtx op1_tf, op2_tf, op0_tf; + + op0_tf = gen_reg_rtx (TFmode); + + if (CONSTANT_P (operands[1])) + operands[1] = force_reg (DImode, operands[1]); + op1_tf = gen_reg_rtx (TFmode); + expand_float (op1_tf, operands[1], 1); + + if (CONSTANT_P (operands[2])) + operands[2] = force_reg (DImode, operands[2]); + op2_tf = gen_reg_rtx (TFmode); + expand_float (op2_tf, operands[2], 1); + + if (TARGET_INLINE_DIV_LAT) + emit_insn (gen_divdi3_internal_lat (op0_tf, op1_tf, op2_tf)); + else + emit_insn (gen_divdi3_internal_thr (op0_tf, op1_tf, op2_tf)); + + emit_insn (gen_fixuns_trunctfdi2_alts (operands[0], op0_tf, const1_rtx)); + DONE; +}") + +(define_expand "umoddi3" + [(set (match_operand:DI 0 "register_operand" "") + (umod:DI (match_operand:DI 1 "general_operand" "") + (match_operand:DI 2 "general_operand" "")))] + "TARGET_INLINE_DIV" + " +{ + rtx op2_neg, div; + + div = gen_reg_rtx (DImode); + emit_insn (gen_udivdi3 (div, operands[1], operands[2])); + + op2_neg = expand_unop (DImode, neg_optab, operands[2], NULL_RTX, 0); + + emit_insn (gen_madddi4 (operands[0], div, op2_neg, operands[1])); + DONE; +}") + +(define_insn_and_split "divdi3_internal_lat" + [(set (match_operand:TF 0 "fr_register_operand" "=&f") + (float:TF (div:SI (match_operand:TF 1 "fr_register_operand" "f") + (match_operand:TF 2 "fr_register_operand" "f")))) + (clobber (match_scratch:TF 3 "=&f")) + (clobber (match_scratch:TF 4 "=&f")) + (clobber (match_scratch:TF 5 "=&f")) + (clobber (match_scratch:CC 6 "=c"))] + "TARGET_INLINE_DIV_LAT" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 0) (div:TF (const_int 1) (match_dup 2))) + (set (match_dup 6) (unspec:CC [(match_dup 1) (match_dup 2)] 5)) + (use (const_int 1))]) + (cond_exec (ne (match_dup 6) (const_int 0)) + (parallel [(set (match_dup 3) + (plus:TF (neg:TF (mult:TF (match_dup 2) (match_dup 0))) + (match_dup 7))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 6) (const_int 0)) + (parallel [(set (match_dup 4) (mult:TF (match_dup 1) (match_dup 0))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 6) (const_int 0)) + (parallel [(set (match_dup 5) (mult:TF (match_dup 3) (match_dup 3))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 6) (const_int 0)) + (parallel [(set (match_dup 4) + (plus:TF (mult:TF (match_dup 3) (match_dup 4)) + (match_dup 4))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 6) (const_int 0)) + (parallel [(set (match_dup 0) + (plus:TF (mult:TF (match_dup 3) (match_dup 0)) + (match_dup 0))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 6) (const_int 0)) + (parallel [(set (match_dup 3) + (plus:TF (mult:TF (match_dup 5) (match_dup 4)) + (match_dup 4))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 6) (const_int 0)) + (parallel [(set (match_dup 0) + (plus:TF (mult:TF (match_dup 5) (match_dup 0)) + (match_dup 0))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 6) (const_int 0)) + (parallel [(set (match_dup 4) + (plus:TF (neg:TF (mult:TF (match_dup 2) (match_dup 3))) + (match_dup 1))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 6) (const_int 0)) + (parallel [(set (match_dup 0) + (plus:TF (mult:TF (match_dup 4) (match_dup 0)) + (match_dup 3))) + (use (const_int 1))])) + ] + "operands[7] = CONST1_RTX (TFmode);" + [(set_attr "predicable" "no")]) + +(define_insn_and_split "divdi3_internal_thr" + [(set (match_operand:TF 0 "fr_register_operand" "=&f") + (float:TF (div:SI (match_operand:TF 1 "fr_register_operand" "f") + (match_operand:TF 2 "fr_register_operand" "f")))) + (clobber (match_scratch:TF 3 "=&f")) + (clobber (match_scratch:TF 4 "=f")) + (clobber (match_scratch:CC 5 "=c"))] + "TARGET_INLINE_DIV_THR" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 0) (div:TF (const_int 1) (match_dup 2))) + (set (match_dup 5) (unspec:CC [(match_dup 1) (match_dup 2)] 5)) + (use (const_int 1))]) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 3) + (plus:TF (neg:TF (mult:TF (match_dup 2) (match_dup 0))) + (match_dup 6))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 0) + (plus:TF (mult:TF (match_dup 3) (match_dup 0)) + (match_dup 0))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 3) (mult:TF (match_dup 3) (match_dup 3))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 0) + (plus:TF (mult:TF (match_dup 3) (match_dup 0)) + (match_dup 0))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 3) (mult:TF (match_dup 0) (match_dup 1))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 4) + (plus:TF (neg:TF (mult:TF (match_dup 2) (match_dup 3))) + (match_dup 1))) + (use (const_int 1))])) + (cond_exec (ne (match_dup 5) (const_int 0)) + (parallel [(set (match_dup 0) + (plus:TF (mult:TF (match_dup 4) (match_dup 0)) + (match_dup 3))) + (use (const_int 1))])) + ] + "operands[6] = CONST1_RTX (TFmode);" + [(set_attr "predicable" "no")]) ;; :::::::::::::::::::: ;; :: @@ -1802,7 +2206,7 @@ "fmax %0 = %1, %F2%B0" [(set_attr "type" "F")]) -(define_insn "*maddsf3" +(define_insn "*maddsf4" [(set (match_operand:SF 0 "fr_register_operand" "=f") (plus:SF (mult:SF (match_operand:SF 1 "fr_register_operand" "f") (match_operand:SF 2 "fr_register_operand" "f")) @@ -1811,7 +2215,7 @@ "fma.s %0 = %1, %2, %F3%B0" [(set_attr "type" "F")]) -(define_insn "*msubsf3" +(define_insn "*msubsf4" [(set (match_operand:SF 0 "fr_register_operand" "=f") (minus:SF (mult:SF (match_operand:SF 1 "fr_register_operand" "f") (match_operand:SF 2 "fr_register_operand" "f")) @@ -1830,7 +2234,7 @@ ;; ??? Is it possible to canonicalize this as (minus (reg) (mult))? -(define_insn "*nmaddsf3" +(define_insn "*nmaddsf4" [(set (match_operand:SF 0 "fr_register_operand" "=f") (plus:SF (neg:SF (mult:SF (match_operand:SF 1 "fr_register_operand" "f") (match_operand:SF 2 "fr_register_operand" "f"))) @@ -1907,7 +2311,7 @@ "fmax %0 = %1, %F2%B0" [(set_attr "type" "F")]) -(define_insn "*madddf3" +(define_insn "*madddf4" [(set (match_operand:DF 0 "fr_register_operand" "=f") (plus:DF (mult:DF (match_operand:DF 1 "fr_register_operand" "f") (match_operand:DF 2 "fr_register_operand" "f")) @@ -1916,7 +2320,7 @@ "fma.d %0 = %1, %2, %F3%B0" [(set_attr "type" "F")]) -(define_insn "*msubdf3" +(define_insn "*msubdf4" [(set (match_operand:DF 0 "fr_register_operand" "=f") (minus:DF (mult:DF (match_operand:DF 1 "fr_register_operand" "f") (match_operand:DF 2 "fr_register_operand" "f")) @@ -1935,7 +2339,7 @@ ;; ??? Is it possible to canonicalize this as (minus (reg) (mult))? -(define_insn "*nmadddf3" +(define_insn "*nmadddf4" [(set (match_operand:DF 0 "fr_register_operand" "=f") (plus:DF (neg:DF (mult:DF (match_operand:DF 1 "fr_register_operand" "f") (match_operand:DF 2 "fr_register_operand" "f"))) @@ -1974,6 +2378,15 @@ "fmpy %0 = %F1, %F2%B0" [(set_attr "type" "F")]) +(define_insn "*multf3_alts" + [(set (match_operand:TF 0 "fr_register_operand" "=f") + (mult:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG") + (match_operand:TF 2 "tfreg_or_fp01_operand" "fG"))) + (use (match_operand:SI 3 "const_int_operand" ""))] + "" + "fmpy.s%3 %0 = %F1, %F2%B0" + [(set_attr "type" "F")]) + (define_insn "abstf2" [(set (match_operand:TF 0 "fr_register_operand" "=f") (abs:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG")))] @@ -2011,7 +2424,7 @@ "fmax %0 = %F1, %F2%B0" [(set_attr "type" "F")]) -(define_insn "*maddtf3" +(define_insn "*maddtf4" [(set (match_operand:TF 0 "fr_register_operand" "=f") (plus:TF (mult:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG") (match_operand:TF 2 "tfreg_or_fp01_operand" "fG")) @@ -2020,7 +2433,17 @@ "fma %0 = %F1, %F2, %F3%B0" [(set_attr "type" "F")]) -(define_insn "*msubtf3" +(define_insn "*maddtf4_alts" + [(set (match_operand:TF 0 "fr_register_operand" "=f") + (plus:TF (mult:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG") + (match_operand:TF 2 "tfreg_or_fp01_operand" "fG")) + (match_operand:TF 3 "tfreg_or_fp01_operand" "fG"))) + (use (match_operand:SI 4 "const_int_operand" ""))] + "" + "fma.s%4 %0 = %F1, %F2, %F3%B0" + [(set_attr "type" "F")]) + +(define_insn "*msubtf4" [(set (match_operand:TF 0 "fr_register_operand" "=f") (minus:TF (mult:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG") (match_operand:TF 2 "tfreg_or_fp01_operand" "fG")) @@ -2039,7 +2462,7 @@ ;; ??? Is it possible to canonicalize this as (minus (reg) (mult))? -(define_insn "*nmaddtf3" +(define_insn "*nmaddtf4" [(set (match_operand:TF 0 "fr_register_operand" "=f") (plus:TF (neg:TF (mult:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG") @@ -2048,6 +2471,29 @@ "" "fnma %0 = %F1, %F2, %F3%B0" [(set_attr "type" "F")]) + +(define_insn "*nmaddtf4_alts" + [(set (match_operand:TF 0 "fr_register_operand" "=f") + (plus:TF (neg:TF (mult:TF + (match_operand:TF 1 "tfreg_or_fp01_operand" "fG") + (match_operand:TF 2 "tfreg_or_fp01_operand" "fG"))) + (match_operand:TF 3 "tfreg_or_fp01_operand" "fG"))) + (use (match_operand:SI 4 "const_int_operand" ""))] + "" + "fnma.s%4 %0 = %F1, %F2, %F3%B0" + [(set_attr "type" "F")]) + +(define_insn "*recip_approx" + [(set (match_operand:TF 0 "fr_register_operand" "=f") + (div:TF (const_int 1) + (match_operand:TF 3 "fr_register_operand" "f"))) + (set (match_operand:CC 1 "register_operand" "=c") + (unspec:CC [(match_operand:TF 2 "fr_register_operand" "f") + (match_dup 3)] 5)) + (use (match_operand:SI 4 "const_int_operand" ""))] + "" + "frcpa.s%4 %0, %1 = %2, %3" + [(set_attr "type" "F")]) ;; :::::::::::::::::::: ;; :: -- 2.30.2