From 9fede15c4d5f7873ed906eb8cddee7cb35d2cec4 Mon Sep 17 00:00:00 2001 From: Segher Boessenkool Date: Mon, 23 Jul 2018 13:27:38 +0200 Subject: [PATCH] rs6000: Improve vsx_init_v4si This changes vsx_init_v4si to be an expander. That way, no special cases are needed anymore for special arguments: the normal RTL passes can deal with it. * config/rs6000/rs6000-p8swap.c (rtx_is_swappable_p): Adjust. * config/rs6000/rs6000-protos.h (rs6000_split_v4si_init): Delete. * config/rs6000/rs6000.c (rs6000_expand_vector_init): Always force the elements into a register. (rs6000_split_v4si_init_di_reg): Delete. (rs6000_split_v4si_init): Delete. * config/rs6000/vsx.md (unspec): Delete UNSPEC_VSX_VEC_INIT. (vsx_init_v4si): Rewrite as a define_expand. From-SVN: r262930 --- gcc/ChangeLog | 11 ++++ gcc/config/rs6000/rs6000-p8swap.c | 1 - gcc/config/rs6000/rs6000-protos.h | 1 - gcc/config/rs6000/rs6000.c | 92 +------------------------------ gcc/config/rs6000/vsx.md | 49 ++++++++++------ 5 files changed, 45 insertions(+), 109 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 7dc5e0bfdd1..1bf8b46fb18 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2018-07-23 Segher Boessenkool + + * config/rs6000/rs6000-p8swap.c (rtx_is_swappable_p): Adjust. + * config/rs6000/rs6000-protos.h (rs6000_split_v4si_init): Delete. + * config/rs6000/rs6000.c (rs6000_expand_vector_init): Always force + the elements into a register. + (rs6000_split_v4si_init_di_reg): Delete. + (rs6000_split_v4si_init): Delete. + * config/rs6000/vsx.md (unspec): Delete UNSPEC_VSX_VEC_INIT. + (vsx_init_v4si): Rewrite as a define_expand. + 2018-07-23 Segher Boessenkool * config/rs6000/rs6000.md (splitters for rldimi and rlwimi with the diff --git a/gcc/config/rs6000/rs6000-p8swap.c b/gcc/config/rs6000/rs6000-p8swap.c index 071bc0c187d..f32db38b3fe 100644 --- a/gcc/config/rs6000/rs6000-p8swap.c +++ b/gcc/config/rs6000/rs6000-p8swap.c @@ -772,7 +772,6 @@ rtx_is_swappable_p (rtx op, unsigned int *special) case UNSPEC_VSX_EXTRACT: case UNSPEC_VSX_SET: case UNSPEC_VSX_SLDWI: - case UNSPEC_VSX_VEC_INIT: case UNSPEC_VSX_VSLO: case UNSPEC_VUNPACK_HI_SIGN: case UNSPEC_VUNPACK_HI_SIGN_DIRECT: diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 714b8a8f43a..fc45aa51af9 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -61,7 +61,6 @@ extern void rs6000_expand_vector_set (rtx, rtx, int); extern void rs6000_expand_vector_extract (rtx, rtx, rtx); extern void rs6000_split_vec_extract_var (rtx, rtx, rtx, rtx, rtx); extern rtx rs6000_adjust_vec_address (rtx, rtx, rtx, rtx, machine_mode); -extern void rs6000_split_v4si_init (rtx []); extern void altivec_expand_vec_perm_le (rtx op[4]); extern void rs6000_expand_extract_even (rtx, rtx, rtx); extern void rs6000_expand_interleave (rtx, rtx, rtx, bool); diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index caa35e01787..2b736d756c3 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -6857,11 +6857,7 @@ rs6000_expand_vector_init (rtx target, rtx vals) size_t i; for (i = 0; i < 4; i++) - { - elements[i] = XVECEXP (vals, 0, i); - if (!CONST_INT_P (elements[i]) && !REG_P (elements[i])) - elements[i] = copy_to_mode_reg (SImode, elements[i]); - } + elements[i] = force_reg (SImode, XVECEXP (vals, 0, i)); emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1], elements[2], elements[3])); @@ -7568,92 +7564,6 @@ rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr, gcc_unreachable (); } -/* Helper function for rs6000_split_v4si_init to build up a DImode value from - two SImode values. */ - -static void -rs6000_split_v4si_init_di_reg (rtx dest, rtx si1, rtx si2, rtx tmp) -{ - const unsigned HOST_WIDE_INT mask_32bit = HOST_WIDE_INT_C (0xffffffff); - - if (CONST_INT_P (si1) && CONST_INT_P (si2)) - { - unsigned HOST_WIDE_INT const1 = (UINTVAL (si1) & mask_32bit) << 32; - unsigned HOST_WIDE_INT const2 = UINTVAL (si2) & mask_32bit; - - emit_move_insn (dest, GEN_INT (const1 | const2)); - return; - } - - /* Put si1 into upper 32-bits of dest. */ - if (CONST_INT_P (si1)) - emit_move_insn (dest, GEN_INT ((UINTVAL (si1) & mask_32bit) << 32)); - else - { - /* Generate RLDIC. */ - rtx si1_di = gen_rtx_REG (DImode, regno_or_subregno (si1)); - rtx shift_rtx = gen_rtx_ASHIFT (DImode, si1_di, GEN_INT (32)); - rtx mask_rtx = GEN_INT (mask_32bit << 32); - rtx and_rtx = gen_rtx_AND (DImode, shift_rtx, mask_rtx); - gcc_assert (!reg_overlap_mentioned_p (dest, si1)); - emit_insn (gen_rtx_SET (dest, and_rtx)); - } - - /* Put si2 into the temporary. */ - gcc_assert (!reg_overlap_mentioned_p (dest, tmp)); - if (CONST_INT_P (si2)) - emit_move_insn (tmp, GEN_INT (UINTVAL (si2) & mask_32bit)); - else - emit_insn (gen_zero_extendsidi2 (tmp, si2)); - - /* Combine the two parts. */ - emit_insn (gen_iordi3 (dest, dest, tmp)); - return; -} - -/* Split a V4SI initialization. */ - -void -rs6000_split_v4si_init (rtx operands[]) -{ - rtx dest = operands[0]; - - /* Destination is a GPR, build up the two DImode parts in place. */ - if (REG_P (dest) || SUBREG_P (dest)) - { - int d_regno = regno_or_subregno (dest); - rtx scalar1 = operands[1]; - rtx scalar2 = operands[2]; - rtx scalar3 = operands[3]; - rtx scalar4 = operands[4]; - rtx tmp1 = operands[5]; - rtx tmp2 = operands[6]; - - /* Even though we only need one temporary (plus the destination, which - has an early clobber constraint, try to use two temporaries, one for - each double word created. That way the 2nd insn scheduling pass can - rearrange things so the two parts are done in parallel. */ - if (BYTES_BIG_ENDIAN) - { - rtx di_lo = gen_rtx_REG (DImode, d_regno); - rtx di_hi = gen_rtx_REG (DImode, d_regno + 1); - rs6000_split_v4si_init_di_reg (di_lo, scalar1, scalar2, tmp1); - rs6000_split_v4si_init_di_reg (di_hi, scalar3, scalar4, tmp2); - } - else - { - rtx di_lo = gen_rtx_REG (DImode, d_regno + 1); - rtx di_hi = gen_rtx_REG (DImode, d_regno); - rs6000_split_v4si_init_di_reg (di_lo, scalar4, scalar3, tmp1); - rs6000_split_v4si_init_di_reg (di_hi, scalar2, scalar1, tmp2); - } - return; - } - - else - gcc_unreachable (); -} - /* Return alignment of TYPE. Existing alignment is ALIGN. HOW selects whether the alignment is abi mandated, optional, or both abi and optional alignment. */ diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 432aa1e3754..de2fa7815a3 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -388,7 +388,6 @@ UNSPEC_VSX_VXSIG UNSPEC_VSX_VIEXP UNSPEC_VSX_VTSTDC - UNSPEC_VSX_VEC_INIT UNSPEC_VSX_VSIGNED2 UNSPEC_LXVL @@ -2946,23 +2945,41 @@ } [(set_attr "type" "vecperm")]) -;; V4SImode initialization splitter -(define_insn_and_split "vsx_init_v4si" - [(set (match_operand:V4SI 0 "gpc_reg_operand" "=&r") - (unspec:V4SI - [(match_operand:SI 1 "reg_or_cint_operand" "rn") - (match_operand:SI 2 "reg_or_cint_operand" "rn") - (match_operand:SI 3 "reg_or_cint_operand" "rn") - (match_operand:SI 4 "reg_or_cint_operand" "rn")] - UNSPEC_VSX_VEC_INIT)) - (clobber (match_scratch:DI 5 "=&r")) - (clobber (match_scratch:DI 6 "=&r"))] +;; Concatenate 4 SImode elements into a V4SImode reg. +(define_expand "vsx_init_v4si" + [(use (match_operand:V4SI 0 "gpc_reg_operand")) + (use (match_operand:SI 1 "gpc_reg_operand")) + (use (match_operand:SI 2 "gpc_reg_operand")) + (use (match_operand:SI 3 "gpc_reg_operand")) + (use (match_operand:SI 4 "gpc_reg_operand"))] "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT" - "#" - "&& reload_completed" - [(const_int 0)] { - rs6000_split_v4si_init (operands); + rtx a = gen_reg_rtx (DImode); + rtx b = gen_reg_rtx (DImode); + rtx c = gen_reg_rtx (DImode); + rtx d = gen_reg_rtx (DImode); + emit_insn (gen_zero_extendsidi2 (a, operands[1])); + emit_insn (gen_zero_extendsidi2 (b, operands[2])); + emit_insn (gen_zero_extendsidi2 (c, operands[3])); + emit_insn (gen_zero_extendsidi2 (d, operands[4])); + if (!BYTES_BIG_ENDIAN) + { + std::swap (a, b); + std::swap (c, d); + } + + rtx aa = gen_reg_rtx (DImode); + rtx ab = gen_reg_rtx (DImode); + rtx cc = gen_reg_rtx (DImode); + rtx cd = gen_reg_rtx (DImode); + emit_insn (gen_ashldi3 (aa, a, GEN_INT (32))); + emit_insn (gen_ashldi3 (cc, c, GEN_INT (32))); + emit_insn (gen_iordi3 (ab, aa, b)); + emit_insn (gen_iordi3 (cd, cc, d)); + + rtx abcd = gen_reg_rtx (V2DImode); + emit_insn (gen_vsx_concat_v2di (abcd, ab, cd)); + emit_move_insn (operands[0], gen_lowpart (V4SImode, abcd)); DONE; }) -- 2.30.2