From: Michael Meissner Date: Thu, 5 Jan 2017 00:43:53 +0000 (+0000) Subject: re PR target/71977 (powerpc64: Use VSR when operating on float and integer) X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=fba4b86109078adda686acc0102a923de709146d;p=gcc.git re PR target/71977 (powerpc64: Use VSR when operating on float and integer) [gcc] 2017-01-04 Michael Meissner PR target/71977 PR target/70568 PR target/78823 * config/rs6000/predicates.md (sf_subreg_operand): New predicate. (altivec_register_operand): Do not return true if the operand contains a SUBREG mixing SImode and SFmode. (vsx_register_operand): Likewise. (vsx_reg_sfsubreg_ok): New predicate. (vfloat_operand): Do not return true if the operand contains a SUBREG mixing SImode and SFmode. (vint_operand): Likewise. (vlogical_operand): Likewise. (gpc_reg_operand): Likewise. (int_reg_operand): Likewise. * config/rs6000/rs6000-protos.h (valid_sf_si_move): Add declaration. * config/rs6000/rs6000.c (valid_sf_si_move): New function to determine if a MOVSI or MOVSF operation contains SUBREGs that mix SImode and SFmode. (rs6000_emit_move_si_sf_subreg): New helper function. (rs6000_emit_move): Call rs6000_emit_move_si_sf_subreg to possbily fixup SUBREGs involving SImode and SFmode. * config/rs6000/vsx.md (SFBOOL_*): New constants that are operand numbers for the new peephole2 optimization. (peephole2 for SFmode unions): New peephole2 to optimize cases in the GLIBC math library that do AND/IOR/XOR operations on single precision floating point. * config/rs6000/rs6000.h (TARGET_NO_SF_SUBREG): New internal target macros to say whether we need to avoid SUBREGs mixing SImode and SFmode. (TARGET_ALLOW_SF_SUBREG): Likewise. * config/rs6000/rs6000.md (UNSPEC_SF_FROM_SI): New unspecs. (UNSPEC_SI_FROM_SF): Likewise. (iorxor): Change spacing. (and_ior_xor): New iterator for AND, IOR, and XOR. (movsi_from_sf): New insns for SImode/SFmode SUBREG support. (movdi_from_sf_zero_ext): Likewise. (mov_hardfloat, FMOVE32 iterator): Use register_operand instead of gpc_reg_operand. Add SImode/SFmode SUBREG support. (movsf_from_si): New insn for SImode/SFmode SUBREG support. (fma4): Use gpc_reg_operand instead of register_operand. (fms4): Likewise. (fnma4): Likewise. (fnms4): Likewise. (nfma4): Likewise. (nfms4): Likewise. [gcc/testsuite] 2017-01-04 Michael Meissner PR target/71977 PR target/70568 PR target/78823 * gcc.target/powerpc/pr71977-1.c: New tests to check whether on 64-bit VSX systems with direct move, whether we optimize common code sequences in the GLIBC math library for float math functions. * gcc.target/powerpc/pr71977-2.c: Likewise. From-SVN: r244084 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 45efbb47c86..e0a57f12768 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,52 @@ +2017-01-04 Michael Meissner + + PR target/71977 + PR target/70568 + PR target/78823 + * config/rs6000/predicates.md (sf_subreg_operand): New predicate. + (altivec_register_operand): Do not return true if the operand + contains a SUBREG mixing SImode and SFmode. + (vsx_register_operand): Likewise. + (vsx_reg_sfsubreg_ok): New predicate. + (vfloat_operand): Do not return true if the operand contains a + SUBREG mixing SImode and SFmode. + (vint_operand): Likewise. + (vlogical_operand): Likewise. + (gpc_reg_operand): Likewise. + (int_reg_operand): Likewise. + * config/rs6000/rs6000-protos.h (valid_sf_si_move): Add + declaration. + * config/rs6000/rs6000.c (valid_sf_si_move): New function to + determine if a MOVSI or MOVSF operation contains SUBREGs that mix + SImode and SFmode. + (rs6000_emit_move_si_sf_subreg): New helper function. + (rs6000_emit_move): Call rs6000_emit_move_si_sf_subreg to possbily + fixup SUBREGs involving SImode and SFmode. + * config/rs6000/vsx.md (SFBOOL_*): New constants that are operand + numbers for the new peephole2 optimization. + (peephole2 for SFmode unions): New peephole2 to optimize cases in + the GLIBC math library that do AND/IOR/XOR operations on single + precision floating point. + * config/rs6000/rs6000.h (TARGET_NO_SF_SUBREG): New internal + target macros to say whether we need to avoid SUBREGs mixing + SImode and SFmode. + (TARGET_ALLOW_SF_SUBREG): Likewise. + * config/rs6000/rs6000.md (UNSPEC_SF_FROM_SI): New unspecs. + (UNSPEC_SI_FROM_SF): Likewise. + (iorxor): Change spacing. + (and_ior_xor): New iterator for AND, IOR, and XOR. + (movsi_from_sf): New insns for SImode/SFmode SUBREG support. + (movdi_from_sf_zero_ext): Likewise. + (mov_hardfloat, FMOVE32 iterator): Use register_operand + instead of gpc_reg_operand. Add SImode/SFmode SUBREG support. + (movsf_from_si): New insn for SImode/SFmode SUBREG support. + (fma4): Use gpc_reg_operand instead of register_operand. + (fms4): Likewise. + (fnma4): Likewise. + (fnms4): Likewise. + (nfma4): Likewise. + (nfms4): Likewise. + 2017-01-04 Marek Polacek PR c++/64767 diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index f79982f9029..3d69fc86dee 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -31,12 +31,47 @@ (match_test "REGNO (op) == CTR_REGNO || REGNO (op) > LAST_VIRTUAL_REGISTER"))) +;; Return 1 if op is a SUBREG that is used to look at a SFmode value as +;; and integer or vice versa. +;; +;; In the normal case where SFmode is in a floating point/vector register, it +;; is stored as a DFmode and has a different format. If we don't transform the +;; value, things that use logical operations on the values will get the wrong +;; value. +;; +;; If we don't have 64-bit and direct move, this conversion will be done by +;; store and load, instead of by fiddling with the bits within the register. +(define_predicate "sf_subreg_operand" + (match_code "subreg") +{ + rtx inner_reg = SUBREG_REG (op); + machine_mode inner_mode = GET_MODE (inner_reg); + + if (TARGET_ALLOW_SF_SUBREG || !REG_P (inner_reg)) + return 0; + + if ((mode == SFmode && GET_MODE_CLASS (inner_mode) == MODE_INT) + || (GET_MODE_CLASS (mode) == MODE_INT && inner_mode == SFmode)) + { + if (INT_REGNO_P (REGNO (inner_reg))) + return 0; + + return 1; + } + return 0; +}) + ;; Return 1 if op is an Altivec register. (define_predicate "altivec_register_operand" (match_operand 0 "register_operand") { if (GET_CODE (op) == SUBREG) - op = SUBREG_REG (op); + { + if (TARGET_NO_SF_SUBREG && sf_subreg_operand (op, mode)) + return 0; + + op = SUBREG_REG (op); + } if (!REG_P (op)) return 0; @@ -50,6 +85,27 @@ ;; Return 1 if op is a VSX register. (define_predicate "vsx_register_operand" (match_operand 0 "register_operand") +{ + if (GET_CODE (op) == SUBREG) + { + if (TARGET_NO_SF_SUBREG && sf_subreg_operand (op, mode)) + return 0; + + op = SUBREG_REG (op); + } + + if (!REG_P (op)) + return 0; + + if (REGNO (op) >= FIRST_PSEUDO_REGISTER) + return 1; + + return VSX_REGNO_P (REGNO (op)); +}) + +;; Like vsx_register_operand, but allow SF SUBREGS +(define_predicate "vsx_reg_sfsubreg_ok" + (match_operand 0 "register_operand") { if (GET_CODE (op) == SUBREG) op = SUBREG_REG (op); @@ -69,7 +125,12 @@ (match_operand 0 "register_operand") { if (GET_CODE (op) == SUBREG) - op = SUBREG_REG (op); + { + if (TARGET_NO_SF_SUBREG && sf_subreg_operand (op, mode)) + return 0; + + op = SUBREG_REG (op); + } if (!REG_P (op)) return 0; @@ -86,7 +147,12 @@ (match_operand 0 "register_operand") { if (GET_CODE (op) == SUBREG) - op = SUBREG_REG (op); + { + if (TARGET_NO_SF_SUBREG && sf_subreg_operand (op, mode)) + return 0; + + op = SUBREG_REG (op); + } if (!REG_P (op)) return 0; @@ -103,7 +169,13 @@ (match_operand 0 "register_operand") { if (GET_CODE (op) == SUBREG) - op = SUBREG_REG (op); + { + if (TARGET_NO_SF_SUBREG && sf_subreg_operand (op, mode)) + return 0; + + op = SUBREG_REG (op); + } + if (!REG_P (op)) return 0; @@ -221,6 +293,9 @@ (match_test "IN_RANGE (INTVAL (op), 0, 15)"))) ;; Return 1 if op is a register that is not special. +;; Disallow (SUBREG:SF (REG:SI)) and (SUBREG:SI (REG:SF)) on VSX systems where +;; you need to be careful in moving a SFmode to SImode and vice versa due to +;; the fact that SFmode is represented as DFmode in the VSX registers. (define_predicate "gpc_reg_operand" (match_operand 0 "register_operand") { @@ -228,7 +303,12 @@ return 0; if (GET_CODE (op) == SUBREG) - op = SUBREG_REG (op); + { + if (TARGET_NO_SF_SUBREG && sf_subreg_operand (op, mode)) + return 0; + + op = SUBREG_REG (op); + } if (!REG_P (op)) return 0; @@ -246,7 +326,8 @@ }) ;; Return 1 if op is a general purpose register. Unlike gpc_reg_operand, don't -;; allow floating point or vector registers. +;; allow floating point or vector registers. Since vector registers are not +;; allowed, we don't have to reject SFmode/SImode subregs. (define_predicate "int_reg_operand" (match_operand 0 "register_operand") { @@ -254,7 +335,12 @@ return 0; if (GET_CODE (op) == SUBREG) - op = SUBREG_REG (op); + { + if (TARGET_NO_SF_SUBREG && sf_subreg_operand (op, mode)) + return 0; + + op = SUBREG_REG (op); + } if (!REG_P (op)) return 0; @@ -266,6 +352,8 @@ }) ;; Like int_reg_operand, but don't return true for pseudo registers +;; We don't have to check for SF SUBREGS because pseudo registers +;; are not allowed, and SF SUBREGs are ok within GPR registers. (define_predicate "int_reg_operand_not_pseudo" (match_operand 0 "register_operand") { diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 92e75a05376..c20d3b5271e 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -153,6 +153,7 @@ extern void rs6000_fatal_bad_address (rtx); extern rtx create_TOC_reference (rtx, rtx); extern void rs6000_split_multireg_move (rtx, rtx); extern void rs6000_emit_le_vsx_move (rtx, rtx, machine_mode); +extern bool valid_sf_si_move (rtx, rtx, machine_mode); extern void rs6000_emit_move (rtx, rtx, machine_mode); extern rtx rs6000_secondary_memory_needed_rtx (machine_mode); extern machine_mode rs6000_secondary_memory_needed_mode (machine_mode); diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 875015ce10e..fe858738d38 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -10402,6 +10402,78 @@ rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode) } } +/* Return whether a SFmode or SImode move can be done without converting one + mode to another. This arrises when we have: + + (SUBREG:SF (REG:SI ...)) + (SUBREG:SI (REG:SF ...)) + + and one of the values is in a floating point/vector register, where SFmode + scalars are stored in DFmode format. */ + +bool +valid_sf_si_move (rtx dest, rtx src, machine_mode mode) +{ + if (TARGET_ALLOW_SF_SUBREG) + return true; + + if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT) + return true; + + if (!SUBREG_P (src) || !sf_subreg_operand (src, mode)) + return true; + + /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */ + if (SUBREG_P (dest)) + { + rtx dest_subreg = SUBREG_REG (dest); + rtx src_subreg = SUBREG_REG (src); + return GET_MODE (dest_subreg) == GET_MODE (src_subreg); + } + + return false; +} + + +/* Helper function to change moves with: + + (SUBREG:SF (REG:SI)) and + (SUBREG:SI (REG:SF)) + + into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode + values are stored as DFmode values in the VSX registers. We need to convert + the bits before we can use a direct move or operate on the bits in the + vector register as an integer type. + + Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */ + +static bool +rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode) +{ + if (TARGET_DIRECT_MOVE_64BIT && !reload_in_progress && !reload_completed + && !lra_in_progress + && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode)) + && SUBREG_P (source) && sf_subreg_operand (source, mode)) + { + rtx inner_source = SUBREG_REG (source); + machine_mode inner_mode = GET_MODE (inner_source); + + if (mode == SImode && inner_mode == SFmode) + { + emit_insn (gen_movsi_from_sf (dest, inner_source)); + return true; + } + + if (mode == SFmode && inner_mode == SImode) + { + emit_insn (gen_movsf_from_si (dest, inner_source)); + return true; + } + } + + return false; +} + /* Emit a move from SOURCE to DEST in mode MODE. */ void rs6000_emit_move (rtx dest, rtx source, machine_mode mode) @@ -10432,6 +10504,11 @@ rs6000_emit_move (rtx dest, rtx source, machine_mode mode) gcc_unreachable (); } + /* See if we need to special case SImode/SFmode SUBREG moves. */ + if ((mode == SImode || mode == SFmode) && SUBREG_P (source) + && rs6000_emit_move_si_sf_subreg (dest, source, mode)) + return; + /* Check if GCC is setting up a block move that will end up using FP registers as temporaries. We must make sure this is acceptable. */ if (GET_CODE (operands[0]) == MEM diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index 4003730d9bd..0180e0c3dfd 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -608,6 +608,12 @@ extern int rs6000_vector_align[]; && TARGET_POWERPC64) #define TARGET_VEXTRACTUB (TARGET_P9_VECTOR && TARGET_DIRECT_MOVE \ && TARGET_UPPER_REGS_DI && TARGET_POWERPC64) + + +/* Whether we should avoid (SUBREG:SI (REG:SF) and (SUBREG:SF (REG:SI). */ +#define TARGET_NO_SF_SUBREG TARGET_DIRECT_MOVE_64BIT +#define TARGET_ALLOW_SF_SUBREG (!TARGET_DIRECT_MOVE_64BIT) + /* This wants to be set for p8 and newer. On p7, overlapping unaligned loads are slow. */ #define TARGET_EFFICIENT_OVERLAPPING_UNALIGNED TARGET_EFFICIENT_UNALIGNED_VSX diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 7e103b019f0..f7c1ab26a99 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -150,6 +150,8 @@ UNSPEC_IEEE128_CONVERT UNSPEC_SIGNBIT UNSPEC_DOLOOP + UNSPEC_SF_FROM_SI + UNSPEC_SI_FROM_SF ]) ;; @@ -561,7 +563,8 @@ (define_code_attr return_str [(return "") (simple_return "simple_")]) ; Logical operators. -(define_code_iterator iorxor [ior xor]) +(define_code_iterator iorxor [ior xor]) +(define_code_iterator and_ior_xor [and ior xor]) ; Signed/unsigned variants of ops. (define_code_iterator any_extend [sign_extend zero_extend]) @@ -6768,6 +6771,157 @@ [(set_attr "type" "*,*,load,store,*,*,*,mfjmpr,mtjmpr,*,*,fpstore,fpload") (set_attr "length" "4,4,4,4,4,4,8,4,4,4,4,4,4")]) +;; Like movsi, but adjust a SF value to be used in a SI context, i.e. +;; (set (reg:SI ...) (subreg:SI (reg:SF ...) 0)) +;; +;; Because SF values are actually stored as DF values within the vector +;; registers, we need to convert the value to the vector SF format when +;; we need to use the bits in a union or similar cases. We only need +;; to do this transformation when the value is a vector register. Loads, +;; stores, and transfers within GPRs are assumed to be safe. +;; +;; This is a more general case of reload_gpr_from_vsxsf. That insn must have +;; no alternatives, because the call is created as part of secondary_reload, +;; and operand #2's register class is used to allocate the temporary register. +;; This function is called before reload, and it creates the temporary as +;; needed. + +;; MR LWZ LFIWZX LXSIWZX STW +;; STFS STXSSP STXSSPX VSX->GPR MTVSRWZ +;; VSX->VSX + +(define_insn_and_split "movsi_from_sf" + [(set (match_operand:SI 0 "rs6000_nonimmediate_operand" + "=r, r, ?*wI, ?*wH, m, + m, wY, Z, r, wIwH, + ?wK") + + (unspec:SI [(match_operand:SF 1 "input_operand" + "r, m, Z, Z, r, + f, wu, wu, wIwH, r, + wK")] + UNSPEC_SI_FROM_SF)) + + (clobber (match_scratch:V4SF 2 + "=X, X, X, X, X, + X, X, X, wa, X, + wa"))] + + "TARGET_NO_SF_SUBREG + && (register_operand (operands[0], SImode) + || register_operand (operands[1], SFmode))" + "@ + mr %0,%1 + lwz%U1%X1 %0,%1 + lfiwzx %0,%y1 + lxsiwzx %x0,%y1 + stw%U0%X0 %1,%0 + stfs%U0%X0 %1,%0 + stxssp %1,%0 + stxsspx %x1,%y0 + # + mtvsrwz %x0,%1 + #" + "&& reload_completed + && register_operand (operands[0], SImode) + && vsx_reg_sfsubreg_ok (operands[1], SFmode)" + [(const_int 0)] +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx op2 = operands[2]; + rtx op0_di = gen_rtx_REG (DImode, REGNO (op0)); + + emit_insn (gen_vsx_xscvdpspn_scalar (op2, op1)); + + if (int_reg_operand (op0, SImode)) + { + emit_insn (gen_p8_mfvsrd_4_disf (op0_di, op2)); + emit_insn (gen_lshrdi3 (op0_di, op0_di, GEN_INT (32))); + } + else + { + rtx op1_v16qi = gen_rtx_REG (V16QImode, REGNO (op1)); + rtx byte_off = VECTOR_ELT_ORDER_BIG ? const0_rtx : GEN_INT (12); + emit_insn (gen_vextract4b (op0_di, op1_v16qi, byte_off)); + } + + DONE; +} + [(set_attr "type" + "*, load, fpload, fpload, store, + fpstore, fpstore, fpstore, mftgpr, mffgpr, + veclogical") + + (set_attr "length" + "4, 4, 4, 4, 4, + 4, 4, 4, 12, 4, + 8")]) + +;; movsi_from_sf with zero extension +;; +;; RLDICL LWZ LFIWZX LXSIWZX VSX->GPR +;; MTVSRWZ VSX->VSX + +(define_insn_and_split "*movdi_from_sf_zero_ext" + [(set (match_operand:DI 0 "gpc_reg_operand" + "=r, r, ?*wI, ?*wH, r, + wIwH, ?wK") + + (zero_extend:DI + (unspec:SI [(match_operand:SF 1 "input_operand" + "r, m, Z, Z, wIwH, + r, wK")] + UNSPEC_SI_FROM_SF))) + + (clobber (match_scratch:V4SF 2 + "=X, X, X, X, wa, + X, wa"))] + + "TARGET_DIRECT_MOVE_64BIT + && (register_operand (operands[0], DImode) + || register_operand (operands[1], SImode))" + "@ + rldicl %0,%1,0,32 + lwz%U1%X1 %0,%1 + lfiwzx %0,%y1 + lxsiwzx %x0,%y1 + # + mtvsrwz %x0,%1 + #" + "&& reload_completed + && vsx_reg_sfsubreg_ok (operands[1], SFmode)" + [(const_int 0)] +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx op2 = operands[2]; + + emit_insn (gen_vsx_xscvdpspn_scalar (op2, op1)); + + if (int_reg_operand (op0, DImode)) + { + emit_insn (gen_p8_mfvsrd_4_disf (op0, op2)); + emit_insn (gen_lshrdi3 (op0, op0, GEN_INT (32))); + } + else + { + rtx op0_si = gen_rtx_REG (SImode, REGNO (op0)); + rtx op1_v16qi = gen_rtx_REG (V16QImode, REGNO (op1)); + rtx byte_off = VECTOR_ELT_ORDER_BIG ? const0_rtx : GEN_INT (12); + emit_insn (gen_vextract4b (op0_si, op1_v16qi, byte_off)); + } + + DONE; +} + [(set_attr "type" + "*, load, fpload, fpload, mftgpr, + mffgpr, veclogical") + + (set_attr "length" + "4, 4, 4, 4, 12, + 4, 8")]) + ;; Split a load of a large constant into the appropriate two-insn ;; sequence. @@ -6977,9 +7131,11 @@ "m, , , Z, r, , , , , , r, , f, , r, r, *h, 0"))] - "(gpc_reg_operand (operands[0], mode) - || gpc_reg_operand (operands[1], mode)) - && (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT)" + "(register_operand (operands[0], mode) + || register_operand (operands[1], mode)) + && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT + && (TARGET_ALLOW_SF_SUBREG + || valid_sf_si_move (operands[0], operands[1], mode))" "@ lwz%U1%X1 %0,%1 @@ -7021,6 +7177,75 @@ [(set_attr "type" "*,mtjmpr,mfjmpr,load,store,*,*,*,*,*") (set_attr "length" "4,4,4,4,4,4,4,4,8,4")]) +;; Like movsf, but adjust a SI value to be used in a SF context, i.e. +;; (set (reg:SF ...) (subreg:SF (reg:SI ...) 0)) +;; +;; Because SF values are actually stored as DF values within the vector +;; registers, we need to convert the value to the vector SF format when +;; we need to use the bits in a union or similar cases. We only need +;; to do this transformation when the value is a vector register. Loads, +;; stores, and transfers within GPRs are assumed to be safe. +;; +;; This is a more general case of reload_vsx_from_gprsf. That insn must have +;; no alternatives, because the call is created as part of secondary_reload, +;; and operand #2's register class is used to allocate the temporary register. +;; This function is called before reload, and it creates the temporary as +;; needed. + +;; LWZ LFS LXSSP LXSSPX STW STFIWX +;; STXSIWX GPR->VSX VSX->GPR GPR->GPR +(define_insn_and_split "movsf_from_si" + [(set (match_operand:SF 0 "rs6000_nonimmediate_operand" + "=!r, f, wb, wu, m, Z, + Z, wy, ?r, !r") + + (unspec:SF [(match_operand:SI 1 "input_operand" + "m, m, wY, Z, r, f, + wu, r, wy, r")] + UNSPEC_SF_FROM_SI)) + + (clobber (match_scratch:DI 2 + "=X, X, X, X, X, X, + X, r, X, X"))] + + "TARGET_NO_SF_SUBREG + && (register_operand (operands[0], SFmode) + || register_operand (operands[1], SImode))" + "@ + lwz%U1%X1 %0,%1 + lfs%U1%X1 %0,%1 + lxssp %0,%1 + lxsspx %x0,%y1 + stw%U0%X0 %1,%0 + stfiwx %1,%y0 + stxsiwx %x1,%y0 + # + mfvsrwz %0,%x1 + mr %0,%1" + + "&& reload_completed + && vsx_reg_sfsubreg_ok (operands[0], SFmode) + && int_reg_operand_not_pseudo (operands[1], SImode)" + [(const_int 0)] +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx op2 = operands[2]; + rtx op1_di = gen_rtx_REG (DImode, REGNO (op1)); + + /* Move SF value to upper 32-bits for xscvspdpn. */ + emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32))); + emit_insn (gen_p8_mtvsrd_sf (op0, op2)); + emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0)); + DONE; +} + [(set_attr "length" + "4, 4, 4, 4, 4, 4, + 4, 12, 4, 4") + (set_attr "type" + "load, fpload, fpload, fpload, store, fpstore, + fpstore, vecfloat, mffgpr, *")]) + ;; Move 64-bit binary/decimal floating point (define_expand "mov" @@ -13231,11 +13456,11 @@ ;; Note that the conditions for expansion are in the FMA_F iterator. (define_expand "fma4" - [(set (match_operand:FMA_F 0 "register_operand" "") + [(set (match_operand:FMA_F 0 "gpc_reg_operand" "") (fma:FMA_F - (match_operand:FMA_F 1 "register_operand" "") - (match_operand:FMA_F 2 "register_operand" "") - (match_operand:FMA_F 3 "register_operand" "")))] + (match_operand:FMA_F 1 "gpc_reg_operand" "") + (match_operand:FMA_F 2 "gpc_reg_operand" "") + (match_operand:FMA_F 3 "gpc_reg_operand" "")))] "" "") @@ -13255,11 +13480,11 @@ ; Altivec only has fma and nfms. (define_expand "fms4" - [(set (match_operand:FMA_F 0 "register_operand" "") + [(set (match_operand:FMA_F 0 "gpc_reg_operand" "") (fma:FMA_F - (match_operand:FMA_F 1 "register_operand" "") - (match_operand:FMA_F 2 "register_operand" "") - (neg:FMA_F (match_operand:FMA_F 3 "register_operand" ""))))] + (match_operand:FMA_F 1 "gpc_reg_operand" "") + (match_operand:FMA_F 2 "gpc_reg_operand" "") + (neg:FMA_F (match_operand:FMA_F 3 "gpc_reg_operand" ""))))] "!VECTOR_UNIT_ALTIVEC_P (mode)" "") @@ -13279,34 +13504,34 @@ ;; If signed zeros are ignored, -(a * b - c) = -a * b + c. (define_expand "fnma4" - [(set (match_operand:FMA_F 0 "register_operand" "") + [(set (match_operand:FMA_F 0 "gpc_reg_operand" "") (neg:FMA_F (fma:FMA_F - (match_operand:FMA_F 1 "register_operand" "") - (match_operand:FMA_F 2 "register_operand" "") - (neg:FMA_F (match_operand:FMA_F 3 "register_operand" "")))))] + (match_operand:FMA_F 1 "gpc_reg_operand" "") + (match_operand:FMA_F 2 "gpc_reg_operand" "") + (neg:FMA_F (match_operand:FMA_F 3 "gpc_reg_operand" "")))))] "!HONOR_SIGNED_ZEROS (mode)" "") ;; If signed zeros are ignored, -(a * b + c) = -a * b - c. (define_expand "fnms4" - [(set (match_operand:FMA_F 0 "register_operand" "") + [(set (match_operand:FMA_F 0 "gpc_reg_operand" "") (neg:FMA_F (fma:FMA_F - (match_operand:FMA_F 1 "register_operand" "") - (match_operand:FMA_F 2 "register_operand" "") - (match_operand:FMA_F 3 "register_operand" ""))))] + (match_operand:FMA_F 1 "gpc_reg_operand" "") + (match_operand:FMA_F 2 "gpc_reg_operand" "") + (match_operand:FMA_F 3 "gpc_reg_operand" ""))))] "!HONOR_SIGNED_ZEROS (mode) && !VECTOR_UNIT_ALTIVEC_P (mode)" "") ; Not an official optab name, but used from builtins. (define_expand "nfma4" - [(set (match_operand:FMA_F 0 "register_operand" "") + [(set (match_operand:FMA_F 0 "gpc_reg_operand" "") (neg:FMA_F (fma:FMA_F - (match_operand:FMA_F 1 "register_operand" "") - (match_operand:FMA_F 2 "register_operand" "") - (match_operand:FMA_F 3 "register_operand" ""))))] + (match_operand:FMA_F 1 "gpc_reg_operand" "") + (match_operand:FMA_F 2 "gpc_reg_operand" "") + (match_operand:FMA_F 3 "gpc_reg_operand" ""))))] "!VECTOR_UNIT_ALTIVEC_P (mode)" "") @@ -13327,12 +13552,12 @@ ; Not an official optab name, but used from builtins. (define_expand "nfms4" - [(set (match_operand:FMA_F 0 "register_operand" "") + [(set (match_operand:FMA_F 0 "gpc_reg_operand" "") (neg:FMA_F (fma:FMA_F - (match_operand:FMA_F 1 "register_operand" "") - (match_operand:FMA_F 2 "register_operand" "") - (neg:FMA_F (match_operand:FMA_F 3 "register_operand" "")))))] + (match_operand:FMA_F 1 "gpc_reg_operand" "") + (match_operand:FMA_F 2 "gpc_reg_operand" "") + (neg:FMA_F (match_operand:FMA_F 3 "gpc_reg_operand" "")))))] "" "") diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 6264e6c7206..e054f5c7caa 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -3897,3 +3897,149 @@ "TARGET_P9_VECTOR" "xxinsertw %x0,%x1,%3" [(set_attr "type" "vecperm")]) + + + +;; Operand numbers for the following peephole2 +(define_constants + [(SFBOOL_TMP_GPR 0) ;; GPR temporary + (SFBOOL_TMP_VSX 1) ;; vector temporary + (SFBOOL_MFVSR_D 2) ;; move to gpr dest + (SFBOOL_MFVSR_A 3) ;; move to gpr src + (SFBOOL_BOOL_D 4) ;; and/ior/xor dest + (SFBOOL_BOOL_A1 5) ;; and/ior/xor arg1 + (SFBOOL_BOOL_A2 6) ;; and/ior/xor arg1 + (SFBOOL_SHL_D 7) ;; shift left dest + (SFBOOL_SHL_A 8) ;; shift left arg + (SFBOOL_MTVSR_D 9) ;; move to vecter dest + (SFBOOL_BOOL_A_DI 10) ;; SFBOOL_BOOL_A1/A2 as DImode + (SFBOOL_TMP_VSX_DI 11) ;; SFBOOL_TMP_VSX as DImode + (SFBOOL_MTVSR_D_V4SF 12)]) ;; SFBOOL_MTVSRD_D as V4SFmode + +;; Attempt to optimize some common GLIBC operations using logical operations to +;; pick apart SFmode operations. For example, there is code from e_powf.c +;; after macro expansion that looks like: +;; +;; typedef union { +;; float value; +;; uint32_t word; +;; } ieee_float_shape_type; +;; +;; float t1; +;; int32_t is; +;; +;; do { +;; ieee_float_shape_type gf_u; +;; gf_u.value = (t1); +;; (is) = gf_u.word; +;; } while (0); +;; +;; do { +;; ieee_float_shape_type sf_u; +;; sf_u.word = (is & 0xfffff000); +;; (t1) = sf_u.value; +;; } while (0); +;; +;; +;; This would result in two direct move operations (convert to memory format, +;; direct move to GPR, do the AND operation, direct move to VSX, convert to +;; scalar format). With this peephole, we eliminate the direct move to the +;; GPR, and instead move the integer mask value to the vector register after a +;; shift and do the VSX logical operation. + +;; The insns for dealing with SFmode in GPR registers looks like: +;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN)) +;; +;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX)) +;; +;; (set (reg:DI reg3) (lshiftrt:DI (reg:DI reg3) (const_int 32))) +;; +;; (set (reg:DI reg5) (and:DI (reg:DI reg3) (reg:DI reg4))) +;; +;; (set (reg:DI reg6) (ashift:DI (reg:DI reg5) (const_int 32))) +;; +;; (set (reg:SF reg7) (unspec:SF [(reg:DI reg6)] UNSPEC_P8V_MTVSRD)) +;; +;; (set (reg:SF reg7) (unspec:SF [(reg:SF reg7)] UNSPEC_VSX_CVSPDPN)) + +(define_peephole2 + [(match_scratch:DI SFBOOL_TMP_GPR "r") + (match_scratch:V4SF SFBOOL_TMP_VSX "wa") + + ;; MFVSRD + (set (match_operand:DI SFBOOL_MFVSR_D "int_reg_operand") + (unspec:DI [(match_operand:V4SF SFBOOL_MFVSR_A "vsx_register_operand")] + UNSPEC_P8V_RELOAD_FROM_VSX)) + + ;; SRDI + (set (match_dup SFBOOL_MFVSR_D) + (lshiftrt:DI (match_dup SFBOOL_MFVSR_D) + (const_int 32))) + + ;; AND/IOR/XOR operation on int + (set (match_operand:SI SFBOOL_BOOL_D "int_reg_operand") + (and_ior_xor:SI (match_operand:SI SFBOOL_BOOL_A1 "int_reg_operand") + (match_operand:SI SFBOOL_BOOL_A2 "reg_or_cint_operand"))) + + ;; SLDI + (set (match_operand:DI SFBOOL_SHL_D "int_reg_operand") + (ashift:DI (match_operand:DI SFBOOL_SHL_A "int_reg_operand") + (const_int 32))) + + ;; MTVSRD + (set (match_operand:SF SFBOOL_MTVSR_D "vsx_register_operand") + (unspec:SF [(match_dup SFBOOL_SHL_D)] UNSPEC_P8V_MTVSRD))] + + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE + /* The REG_P (xxx) tests prevents SUBREG's, which allows us to use REGNO + to compare registers, when the mode is different. */ + && REG_P (operands[SFBOOL_MFVSR_D]) && REG_P (operands[SFBOOL_BOOL_D]) + && REG_P (operands[SFBOOL_BOOL_A1]) && REG_P (operands[SFBOOL_SHL_D]) + && REG_P (operands[SFBOOL_SHL_A]) && REG_P (operands[SFBOOL_MTVSR_D]) + && (REG_P (operands[SFBOOL_BOOL_A2]) + || CONST_INT_P (operands[SFBOOL_BOOL_A2])) + && (REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_MFVSR_D]) + || peep2_reg_dead_p (3, operands[SFBOOL_MFVSR_D])) + && (REGNO (operands[SFBOOL_MFVSR_D]) == REGNO (operands[SFBOOL_BOOL_A1]) + || (REG_P (operands[SFBOOL_BOOL_A2]) + && REGNO (operands[SFBOOL_MFVSR_D]) + == REGNO (operands[SFBOOL_BOOL_A2]))) + && REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_SHL_A]) + && (REGNO (operands[SFBOOL_SHL_D]) == REGNO (operands[SFBOOL_BOOL_D]) + || peep2_reg_dead_p (4, operands[SFBOOL_BOOL_D])) + && peep2_reg_dead_p (5, operands[SFBOOL_SHL_D])" + [(set (match_dup SFBOOL_TMP_GPR) + (ashift:DI (match_dup SFBOOL_BOOL_A_DI) + (const_int 32))) + + (set (match_dup SFBOOL_TMP_VSX_DI) + (match_dup SFBOOL_TMP_GPR)) + + (set (match_dup SFBOOL_MTVSR_D_V4SF) + (and_ior_xor:V4SF (match_dup SFBOOL_MFVSR_A) + (match_dup SFBOOL_TMP_VSX)))] +{ + rtx bool_a1 = operands[SFBOOL_BOOL_A1]; + rtx bool_a2 = operands[SFBOOL_BOOL_A2]; + int regno_mfvsr_d = REGNO (operands[SFBOOL_MFVSR_D]); + int regno_tmp_vsx = REGNO (operands[SFBOOL_TMP_VSX]); + int regno_mtvsr_d = REGNO (operands[SFBOOL_MTVSR_D]); + + if (CONST_INT_P (bool_a2)) + { + rtx tmp_gpr = operands[SFBOOL_TMP_GPR]; + emit_move_insn (tmp_gpr, bool_a2); + operands[SFBOOL_BOOL_A_DI] = tmp_gpr; + } + else + { + int regno_bool_a1 = REGNO (bool_a1); + int regno_bool_a2 = REGNO (bool_a2); + int regno_bool_a = (regno_mfvsr_d == regno_bool_a1 + ? regno_bool_a2 : regno_bool_a1); + operands[SFBOOL_BOOL_A_DI] = gen_rtx_REG (DImode, regno_bool_a); + } + + operands[SFBOOL_TMP_VSX_DI] = gen_rtx_REG (DImode, regno_tmp_vsx); + operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d); +}) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 9b44a4a5843..90a5c1099b9 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,13 @@ +2016-12-29 Michael Meissner + + PR target/71977 + PR target/70568 + PR target/78823 + * gcc.target/powerpc/pr71977-1.c: New tests to check whether on + 64-bit VSX systems with direct move, whether we optimize common + code sequences in the GLIBC math library for float math functions. + * gcc.target/powerpc/pr71977-2.c: Likewise. + 2017-01-04 Marek Polacek PR c++/64767 diff --git a/gcc/testsuite/gcc.target/powerpc/pr71977-1.c b/gcc/testsuite/gcc.target/powerpc/pr71977-1.c new file mode 100644 index 00000000000..c4413b8747a --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr71977-1.c @@ -0,0 +1,31 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */ +/* { dg-options "-mcpu=power8 -O2" } */ + +#include + +typedef union +{ + float value; + uint32_t word; +} ieee_float_shape_type; + +float +mask_and_float_var (float f, uint32_t mask) +{ + ieee_float_shape_type u; + + u.value = f; + u.word &= mask; + + return u.value; +} + +/* { dg-final { scan-assembler "\[ \t\]xxland " } } */ +/* { dg-final { scan-assembler-not "\[ \t\]and " } } */ +/* { dg-final { scan-assembler-not "\[ \t\]mfvsrd " } } */ +/* { dg-final { scan-assembler-not "\[ \t\]stxv" } } */ +/* { dg-final { scan-assembler-not "\[ \t\]lxv" } } */ +/* { dg-final { scan-assembler-not "\[ \t\]srdi " } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr71977-2.c b/gcc/testsuite/gcc.target/powerpc/pr71977-2.c new file mode 100644 index 00000000000..8ec1b6126ad --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr71977-2.c @@ -0,0 +1,31 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */ +/* { dg-options "-mcpu=power8 -O2" } */ + +#include + +typedef union +{ + float value; + uint32_t word; +} ieee_float_shape_type; + +float +mask_and_float_sign (float f) +{ + ieee_float_shape_type u; + + u.value = f; + u.word &= 0x80000000; + + return u.value; +} + +/* { dg-final { scan-assembler "\[ \t\]xxland " } } */ +/* { dg-final { scan-assembler-not "\[ \t\]and " } } */ +/* { dg-final { scan-assembler-not "\[ \t\]mfvsrd " } } */ +/* { dg-final { scan-assembler-not "\[ \t\]stxv" } } */ +/* { dg-final { scan-assembler-not "\[ \t\]lxv" } } */ +/* { dg-final { scan-assembler-not "\[ \t\]srdi " } } */