From e3a8ef8ef2ac077b393687624b09db6ab31c7746 Mon Sep 17 00:00:00 2001 From: Xionghu Luo Date: Thu, 21 Jan 2021 21:03:28 -0600 Subject: [PATCH] rs6000: Enable vec_insert for P8 with rs6000_expand_vector_set_var_p8 [PR98093] Support P8 variable vec_insert and Update testcases' instruction count. gcc/ChangeLog: 2021-01-22 Xionghu Luo PR target/98093 * config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin): Generate ARRAY_REF(VIEW_CONVERT_EXPR) for P8 and later platforms. * config/rs6000/rs6000.c (rs6000_expand_vector_set_var): Update to call different path for P8 and P9. (rs6000_expand_vector_set_var_p9): New function. (rs6000_expand_vector_set_var_p8): New function. gcc/testsuite/ChangeLog: 2021-01-22 Xionghu Luo * gcc.target/powerpc/pr79251.p8.c: New test. * gcc.target/powerpc/fold-vec-insert-char-p8.c: Adjust instruction counts. * gcc.target/powerpc/fold-vec-insert-char-p9.c: Likewise. * gcc.target/powerpc/fold-vec-insert-double.c: Likewise. * gcc.target/powerpc/fold-vec-insert-float-p8.c: Likewise. * gcc.target/powerpc/fold-vec-insert-float-p9.c: Likewise. * gcc.target/powerpc/fold-vec-insert-int-p8.c: Likewise. * gcc.target/powerpc/fold-vec-insert-int-p9.c: Likewise. * gcc.target/powerpc/fold-vec-insert-longlong.c: Likewise. * gcc.target/powerpc/fold-vec-insert-short-p8.c: Likewise. * gcc.target/powerpc/fold-vec-insert-short-p9.c: Likewise. * gcc.target/powerpc/vsx-builtin-7.c: Likewise. --- gcc/config/rs6000/rs6000-c.c | 27 +++- gcc/config/rs6000/rs6000.c | 117 +++++++++++++++++- .../powerpc/fold-vec-insert-char-p8.c | 11 +- .../powerpc/fold-vec-insert-char-p9.c | 12 +- .../powerpc/fold-vec-insert-double.c | 11 +- .../powerpc/fold-vec-insert-float-p8.c | 6 +- .../powerpc/fold-vec-insert-float-p9.c | 10 +- .../powerpc/fold-vec-insert-int-p8.c | 9 +- .../powerpc/fold-vec-insert-int-p9.c | 11 +- .../powerpc/fold-vec-insert-longlong.c | 10 +- .../powerpc/fold-vec-insert-short-p8.c | 9 +- .../powerpc/fold-vec-insert-short-p9.c | 8 +- gcc/testsuite/gcc.target/powerpc/pr79251.p8.c | 17 +++ .../gcc.target/powerpc/vsx-builtin-7.c | 2 +- 14 files changed, 206 insertions(+), 54 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/pr79251.p8.c diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c index 4a237ae322a..f6ee1e61b56 100644 --- a/gcc/config/rs6000/rs6000-c.c +++ b/gcc/config/rs6000/rs6000-c.c @@ -1599,10 +1599,29 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, SET_EXPR_LOCATION (stmt, loc); stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt); } - stmt = build_array_ref (loc, stmt, arg2); - stmt = fold_build2 (MODIFY_EXPR, TREE_TYPE (arg0), stmt, - convert (TREE_TYPE (stmt), arg0)); - stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl); + + if (TARGET_P8_VECTOR) + { + stmt = build_array_ref (loc, stmt, arg2); + stmt = fold_build2 (MODIFY_EXPR, TREE_TYPE (arg0), stmt, + convert (TREE_TYPE (stmt), arg0)); + stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl); + } + else + { + tree arg1_inner_type; + tree innerptrtype; + arg1_inner_type = TREE_TYPE (arg1_type); + innerptrtype = build_pointer_type (arg1_inner_type); + + stmt = build_unary_op (loc, ADDR_EXPR, stmt, 0); + stmt = convert (innerptrtype, stmt); + stmt = build_binary_op (loc, PLUS_EXPR, stmt, arg2, 1); + stmt = build_indirect_ref (loc, stmt, RO_NULL); + stmt = build2 (MODIFY_EXPR, TREE_TYPE (stmt), stmt, + convert (TREE_TYPE (stmt), arg0)); + stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl); + } return stmt; } diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 68832a38952..f5565a1a253 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -7090,10 +7090,10 @@ rs6000_expand_vector_set (rtx target, rtx val, rtx elt_rtx) } /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX - is variable and also counts by vector element size. */ + is variable and also counts by vector element size for p9 and above. */ void -rs6000_expand_vector_set_var (rtx target, rtx val, rtx idx) +rs6000_expand_vector_set_var_p9 (rtx target, rtx val, rtx idx) { machine_mode mode = GET_MODE (target); @@ -7136,6 +7136,119 @@ rs6000_expand_vector_set_var (rtx target, rtx val, rtx idx) emit_insn (perml); } +/* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX + is variable and also counts by vector element size for p8. */ + +void +rs6000_expand_vector_set_var_p8 (rtx target, rtx val, rtx idx) +{ + machine_mode mode = GET_MODE (target); + + gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx)); + + gcc_assert (GET_MODE (idx) == E_SImode); + + machine_mode inner_mode = GET_MODE (val); + HOST_WIDE_INT mode_mask = GET_MODE_MASK (inner_mode); + + rtx tmp = gen_reg_rtx (GET_MODE (idx)); + int width = GET_MODE_SIZE (inner_mode); + + gcc_assert (width >= 1 && width <= 4); + + if (!BYTES_BIG_ENDIAN) + { + /* idx = idx * width. */ + emit_insn (gen_mulsi3 (tmp, idx, GEN_INT (width))); + /* idx = idx + 8. */ + emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (8))); + } + else + { + emit_insn (gen_mulsi3 (tmp, idx, GEN_INT (width))); + emit_insn (gen_subsi3 (tmp, GEN_INT (24 - width), tmp)); + } + + /* lxv vs33, mask. + DImode: 0xffffffffffffffff0000000000000000 + SImode: 0x00000000ffffffff0000000000000000 + HImode: 0x000000000000ffff0000000000000000. + QImode: 0x00000000000000ff0000000000000000. */ + rtx mask = gen_reg_rtx (V16QImode); + rtx mask_v2di = gen_reg_rtx (V2DImode); + rtvec v = rtvec_alloc (2); + if (!BYTES_BIG_ENDIAN) + { + RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, 0); + RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, mode_mask); + } + else + { + RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, mode_mask); + RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, 0); + } + emit_insn (gen_vec_initv2didi (mask_v2di, gen_rtx_PARALLEL (V2DImode, v))); + rtx sub_mask = simplify_gen_subreg (V16QImode, mask_v2di, V2DImode, 0); + emit_insn (gen_rtx_SET (mask, sub_mask)); + + /* mtvsrd[wz] f0,tmp_val. */ + rtx tmp_val = gen_reg_rtx (SImode); + if (inner_mode == E_SFmode) + emit_insn (gen_movsi_from_sf (tmp_val, val)); + else + tmp_val = force_reg (SImode, val); + + rtx val_v16qi = gen_reg_rtx (V16QImode); + rtx val_v2di = gen_reg_rtx (V2DImode); + rtvec vec_val = rtvec_alloc (2); + if (!BYTES_BIG_ENDIAN) + { + RTVEC_ELT (vec_val, 0) = gen_rtx_CONST_INT (DImode, 0); + RTVEC_ELT (vec_val, 1) = tmp_val; + } + else + { + RTVEC_ELT (vec_val, 0) = tmp_val; + RTVEC_ELT (vec_val, 1) = gen_rtx_CONST_INT (DImode, 0); + } + emit_insn ( + gen_vec_initv2didi (val_v2di, gen_rtx_PARALLEL (V2DImode, vec_val))); + rtx sub_val = simplify_gen_subreg (V16QImode, val_v2di, V2DImode, 0); + emit_insn (gen_rtx_SET (val_v16qi, sub_val)); + + /* lvsl 13,0,idx. */ + tmp = convert_modes (DImode, SImode, tmp, 1); + rtx pcv = gen_reg_rtx (V16QImode); + emit_insn (gen_altivec_lvsl_reg (pcv, tmp)); + + /* vperm 1,1,1,13. */ + /* vperm 0,0,0,13. */ + rtx val_perm = gen_reg_rtx (V16QImode); + rtx mask_perm = gen_reg_rtx (V16QImode); + emit_insn (gen_altivec_vperm_v8hiv16qi (val_perm, val_v16qi, val_v16qi, pcv)); + emit_insn (gen_altivec_vperm_v8hiv16qi (mask_perm, mask, mask, pcv)); + + rtx target_v16qi = simplify_gen_subreg (V16QImode, target, mode, 0); + + /* xxsel 34,34,32,33. */ + emit_insn ( + gen_vector_select_v16qi (target_v16qi, target_v16qi, val_perm, mask_perm)); +} + +/* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX + is variable and also counts by vector element size. */ + +void +rs6000_expand_vector_set_var (rtx target, rtx val, rtx idx) +{ + machine_mode mode = GET_MODE (target); + machine_mode inner_mode = GET_MODE_INNER (mode); + if (TARGET_P9_VECTOR || GET_MODE_SIZE (inner_mode) == 8) + rs6000_expand_vector_set_var_p9 (target, val, idx); + else + rs6000_expand_vector_set_var_p8 (target, val, idx); +} + /* Extract field ELT from VEC into TARGET. */ void diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-char-p8.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-char-p8.c index 39fd4df6d42..8f94e8c80b8 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-char-p8.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-char-p8.c @@ -44,15 +44,16 @@ vector unsigned char testuu_cst (unsigned char x, vector unsigned char v) return vec_insert (x, v, 12); } -/* one store per _var test */ -/* { dg-final { scan-assembler-times {\mstvx\M|\mstxvw4x\M} 4 } } */ +/* no store per _var test */ +/* { dg-final { scan-assembler-times {\mstvx\M|\mstxvw4x\M} 0 } } */ /* one store-byte per test */ -/* { dg-final { scan-assembler-times {\mstb\M} 8 } } */ +/* { dg-final { scan-assembler-times {\mstb\M} 4 } } */ /* one load per test */ -/* { dg-final { scan-assembler-times {\mlvx\M|\mlxvw4x\M} 8 } } */ +/* { dg-final { scan-assembler-times {\mlvx\M|\mlxvw4x\M} 8 { target le } } } */ +/* { dg-final { scan-assembler-times {\mlvx\M|\mlxvw4x\M} 4 { target be } } } */ /* one lvebx per _cst test.*/ /* { dg-final { scan-assembler-times {\mlvebx\M} 4 } } */ /* one vperm per _cst test.*/ -/* { dg-final { scan-assembler-times {\mvperm\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mvperm\M} 12 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-char-p9.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-char-p9.c index ae1daada4c0..35ae420dba0 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-char-p9.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-char-p9.c @@ -44,13 +44,13 @@ vector unsigned char testuu_cst (unsigned char x, vector unsigned char v) return vec_insert (x, v, 12); } -/* load immediate, add, store, stb, load variable test. */ -/* { dg-final { scan-assembler-times {\mstxv\M|\mstvx\M} 4 { target lp64 } } } */ -/* { dg-final { scan-assembler-times {\mstb\M} 4 { target lp64 } } } */ -/* { dg-final { scan-assembler-times {\mlvebx\M|\mlxv\M|\mlvx\M} 4 { target lp64} } } */ +/* no store per _var test. */ +/* { dg-final { scan-assembler-times {\mstxv\M|\mstvx\M} 0 { target lp64 } } } */ +/* { dg-final { scan-assembler-times {\mstb\M} 0 { target lp64 } } } */ +/* { dg-final { scan-assembler-times {\mlvebx\M|\mlxv\M|\mlvx\M} 0 { target lp64} } } */ /* an insert and a move per constant test. */ -/* { dg-final { scan-assembler-times {\mmtvsrwz\M} 4 { target lp64 } } } */ -/* { dg-final { scan-assembler-times {\mvinsertb\M} 4 { target lp64 } } } */ +/* { dg-final { scan-assembler-times {\mmtvsrwz\M} 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times {\mvinsertb\M} 8 { target lp64 } } } */ /* -m32 codegen. */ /* { dg-final { scan-assembler-times {\mrlwinm\M} 4 { target ilp32 } } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-double.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-double.c index 120579cba23..5afadc9aa91 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-double.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-double.c @@ -23,7 +23,12 @@ testd_cst (double d, vector double vd) /* { dg-final { scan-assembler {\mxxpermdi\M} } } */ /* { dg-final { scan-assembler-times {\mrldic\M|\mrlwinm\M} 1 } } */ -/* { dg-final { scan-assembler-times {\mstxvd2x\M|\mstxv\M|\mstvx\M} 1 } } */ -/* { dg-final { scan-assembler-times {\mstfdx\M|\mstfd\M} 1 } } */ -/* { dg-final { scan-assembler-times {\mlxvd2x\M|\mlxv\M|\mlvx\M} 1 } } */ + +/* { dg-final { scan-assembler-times {\mstxvd2x\M|\mstxv\M|\mstvx\M} 1 { target { ! has_arch_pwr8 } } } } */ +/* { dg-final { scan-assembler-times {\mstfdx\M|\mstfd\M} 1 { target { ! has_arch_pwr8 } } } } */ +/* { dg-final { scan-assembler-times {\mlxvd2x\M|\mlxv\M|\mlvx\M} 1 { target { ! has_arch_pwr8 } } } } */ + +/* { dg-final { scan-assembler-times {\mstxvd2x\M|\mstxv\M|\mstvx\M} 0 { target { has_arch_pwr8 } } } } */ +/* { dg-final { scan-assembler-times {\mstfdx\M|\mstfd\M} 0 { target { has_arch_pwr8 } } } } */ +/* { dg-final { scan-assembler-times {\mlxvd2x\M|\mlxv\M|\mlvx\M} 0 { target { has_arch_pwr8 } } } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-float-p8.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-float-p8.c index 76039bc45f5..834f1d39579 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-float-p8.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-float-p8.c @@ -19,12 +19,12 @@ testf_cst (float f, vector float vf) return vec_insert (f, vf, 12); } -/* { dg-final { scan-assembler-times {\mstvx\M|\mstxv\M|\mstxvd2x\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mstvx\M|\mstxv\M|\mstxvd2x\M} 0 } } */ /* cst tests has stfs instead of stfsx. */ -/* { dg-final { scan-assembler-times {\mstfs\M|\mstfsx\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mstfs\M|\mstfsx\M} 1 } } */ /* { dg-final { scan-assembler-times {\mlvx\M|\mlxv\M|\mlxvd2x\M|\mlxvw4x\M} 2 } } */ /* cst test has a lvewx,vperm combo */ /* { dg-final { scan-assembler-times {\mlvewx\M} 1 } } */ -/* { dg-final { scan-assembler-times {\mvperm\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvperm\M} 3 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-float-p9.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-float-p9.c index 3819fe67375..ba41330d835 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-float-p9.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-float-p9.c @@ -20,13 +20,13 @@ testf_cst (float f, vector float vf) } /* var test has a load and store. */ -/* { dg-final { scan-assembler-times {\mlxv\M|\mlvx\M} 1 { target lp64 } } } */ -/* { dg-final { scan-assembler-times {\mstfsx\M} 1 { target lp64} } } */ +/* { dg-final { scan-assembler-times {\mlxv\M|\mlvx\M} 0 { target lp64 } } } */ +/* { dg-final { scan-assembler-times {\mstfsx\M} 0 { target lp64} } } */ /* cst test have a xscvdpspn,xxextractuw,xxinsertw combo */ -/* { dg-final { scan-assembler-times {\mxscvdpspn\M} 1 { target lp64 } } } */ -/* { dg-final { scan-assembler-times {\mxxextractuw\M} 1 { target lp64 } } } */ -/* { dg-final { scan-assembler-times {\mxxinsertw\M} 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times {\mxscvdpspn\M} 2 { target lp64 } } } */ +/* { dg-final { scan-assembler-times {\mxxextractuw\M} 2 { target lp64 } } } */ +/* { dg-final { scan-assembler-times {\mxxinsertw\M} 2 { target lp64 } } } */ /* { dg-final { scan-assembler-times {\mstfs\M} 2 { target ilp32 } } } */ /* { dg-final { scan-assembler-times {\mlxv\M} 2 { target ilp32 } } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-int-p8.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-int-p8.c index 0f2bdd71d60..37502417c7f 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-int-p8.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-int-p8.c @@ -49,9 +49,10 @@ testui2_cst(unsigned int x, vector unsigned int v) } /* Each test has lvx (8). cst tests have additional lvewx. (4) */ -/* var tests have both stwx (4) and stvx (4). cst tests have stw (4).*/ -/* { dg-final { scan-assembler-times {\mstvx\M|\mstwx\M|\mstw\M|\mstxvw4x\M} 12 } } */ -/* { dg-final { scan-assembler-times {\mlvx\M|\mlxvw4x\M} 8 } } */ +/* var tests have no stwx and stvx. cst tests have stw (4).*/ +/* { dg-final { scan-assembler-times {\mstvx\M|\mstwx\M|\mstw\M|\mstxvw4x\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mlvx\M|\mlxvw4x\M} 8 { target le } } } */ +/* { dg-final { scan-assembler-times {\mlvx\M|\mlxvw4x\M} 4 { target be } } } */ /* { dg-final { scan-assembler-times {\mlvewx\M} 4 } } */ -/* { dg-final { scan-assembler-times {\mvperm\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mvperm\M} 12 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-int-p9.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-int-p9.c index a851fd6b8dc..01d4eee81fb 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-int-p9.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-int-p9.c @@ -49,14 +49,13 @@ testui2_cst(unsigned int x, vector unsigned int v) } -/* load immediate, add, store, stb, load variable test. */ -/* { dg-final { scan-assembler-times {\mstxv\M|\mstvx\M} 4 } } */ -/* { dg-final { scan-assembler-times {\mstwx\M} 4 { target lp64 } } } */ -/* { dg-final { scan-assembler-times {\mlxv\M|\mlvx\M} 4 { target lp64 } } } */ +/* { dg-final { scan-assembler-times {\mstxv\M|\mstvx\M} 0 } } */ +/* { dg-final { scan-assembler-times {\mstwx\M} 0 { target lp64 } } } */ +/* { dg-final { scan-assembler-times {\mlxv\M|\mlvx\M} 0 { target lp64 } } } */ /* an insert and a move per constant test. */ -/* { dg-final { scan-assembler-times {\mmtvsrwz\M} 4 { target lp64 } } } */ -/* { dg-final { scan-assembler-times {\mxxinsertw\M} 4 { target lp64 } } } */ +/* { dg-final { scan-assembler-times {\mmtvsrwz\M} 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times {\mxxinsertw\M} 8 { target lp64 } } } */ /* { dg-final { scan-assembler-times {\mstw\M} 8 { target ilp32 } } } */ /* { dg-final { scan-assembler-times {\mlxv\M} 8 { target ilp32 } } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-longlong.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-longlong.c index e9698986788..aa52efe13a6 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-longlong.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-longlong.c @@ -60,13 +60,9 @@ testul2_cst(unsigned long long x, vector unsigned long long v) /* { dg-final { scan-assembler-times {\mrldic\M|\mrlwinm\M} 4 } } */ -/* The number of addi instructions decreases on newer systems. Measured as 8 on - power7 and power8 targets, and drops to 4 on power9 targets that use the - newer stxv,lxv instructions. For this test ensure we get at least one. */ -/* { dg-final { scan-assembler {\maddi\M} } } */ -/* { dg-final { scan-assembler-times {\mstxvd2x\M|\mstvx\M|\mstxv\M} 4 } } */ -/* { dg-final { scan-assembler-times {\mstdx\M} 4 { target lp64 } } } */ +/* { dg-final { scan-assembler-times {\mstxvd2x\M|\mstvx\M|\mstxv\M} 0 } } */ +/* { dg-final { scan-assembler-times {\mstdx\M} 0 { target lp64 } } } */ /* { dg-final { scan-assembler-times {\mstw\M} 8 { target ilp32 } } } */ -/* { dg-final { scan-assembler-times {\mlxvd2x\M|\mlxv\M|\mlvx\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mlxvd2x\M|\mlxv\M|\mlvx\M} 0 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-short-p8.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-short-p8.c index 0a73db7120c..8dc98f78ad4 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-short-p8.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-short-p8.c @@ -48,10 +48,11 @@ testus2_cst(unsigned short x, vector unsigned short v) return vec_insert(x, v, 12); } -/* { dg-final { scan-assembler-times {\mlhz\M|\mlvx\M|\mlxv\M|\mlxvw4x\M} 8 } } */ -/* stores.. 2 each per variable tests, 1 each per cst test. */ -/* { dg-final { scan-assembler-times {\msthx\M|\mstvx\M|\msth\M|\mstxvw4x\M} 12 } } */ +/* { dg-final { scan-assembler-times {\mlhz\M|\mlvx\M|\mlxv\M|\mlxvw4x\M} 8 { target le } } } */ +/* { dg-final { scan-assembler-times {\mlhz\M|\mlvx\M|\mlxv\M|\mlxvw4x\M} 4 { target be } } } */ +/* stores.. 0 per variable tests, 1 each per cst test. */ +/* { dg-final { scan-assembler-times {\msthx\M|\mstvx\M|\msth\M|\mstxvw4x\M} 4 } } */ /* { dg-final { scan-assembler-times {\mlvehx\M} 4 } } */ -/* { dg-final { scan-assembler-times {\mvperm\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mvperm\M} 12 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-short-p9.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-short-p9.c index 0f3504258aa..55778bda3a5 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-short-p9.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-short-p9.c @@ -48,11 +48,11 @@ testus2_cst(unsigned short x, vector unsigned short v) return vec_insert(x, v, 12); } -/* { dg-final { scan-assembler-times {\mmtvsrwz\M} 4 { target lp64 } } } */ -/* { dg-final { scan-assembler-times {\mvinserth\M} 4 { target lp64 } } } */ +/* { dg-final { scan-assembler-times {\mmtvsrwz\M} 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times {\mvinserth\M} 8 { target lp64 } } } */ -/* { dg-final { scan-assembler-times {\mstxv\M|\mstvx\M} 4 } } */ -/* { dg-final { scan-assembler-times {\mlxv\M|\mlvx\M} 4 { target lp64 }} } */ +/* { dg-final { scan-assembler-times {\mstxv\M|\mstvx\M} 0 } } */ +/* { dg-final { scan-assembler-times {\mlxv\M|\mlvx\M} 0 { target lp64 }} } */ /* -m32 uses sth/lvehx as part of the sequence. */ /* { dg-final { scan-assembler-times {\msth\M} 8 { target ilp32 }} } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c b/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c new file mode 100644 index 00000000000..06da47b7758 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-options "-O2 -mdejagnu-cpu=power8 -maltivec" } */ + +#include +#include +#include "pr79251.h" + +TEST_VEC_INSERT_ALL (test) + +/* { dg-final { scan-assembler-not {\mstxw\M} } } */ +/* { dg-final { scan-assembler-times {\mlvsl\M} 10 } } */ +/* { dg-final { scan-assembler-times {\mlvsr\M} 3 } } */ +/* { dg-final { scan-assembler-times {\mvperm\M} 20 } } */ +/* { dg-final { scan-assembler-times {\mxxpermdi\M} 10 } } */ +/* { dg-final { scan-assembler-times {\mxxsel\M} 7 } } */ + diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-builtin-7.c b/gcc/testsuite/gcc.target/powerpc/vsx-builtin-7.c index 0780b01ffab..6fffb7eb098 100644 --- a/gcc/testsuite/gcc.target/powerpc/vsx-builtin-7.c +++ b/gcc/testsuite/gcc.target/powerpc/vsx-builtin-7.c @@ -193,7 +193,7 @@ vector unsigned __int128 splat_uint128 (unsigned __int128 x) { return vec_splats /* { dg-final { scan-assembler-times {\mrldic\M} 0 { target { be && ilp32 } } } } */ /* { dg-final { scan-assembler-times {\mrldic\M} 64 { target { be && lp64 } } } } */ /* { dg-final { scan-assembler-times {\mrldic\M} 64 { target le } } } */ -/* { dg-final { scan-assembler-times "xxpermdi" 4 { target be } } } */ +/* { dg-final { scan-assembler-times "xxpermdi" 11 { target be } } } */ /* { dg-final { scan-assembler-times "xxpermdi" 6 { target le } } } */ /* { dg-final { scan-assembler-times "vspltisb" 2 } } */ /* { dg-final { scan-assembler-times "vspltish" 2 } } */ -- 2.30.2