From baca1278a30eeb2bfca4c6aa0b7fbd9a1882a860 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Thu, 16 Jul 2015 00:20:17 +0200 Subject: [PATCH] re PR target/66866 (incorrect load address on manual vector shuffle) PR target/66866 * config/i386/i386-protos.h (ix86_expand_pextr): New prototype. * config/i386/i386.c (ix86_expand_pextr): New function. (ix86_expand_pinsr): Handle V1TI and TI modes. Call ix86_expand_pextr for non-lowpart subregs. * config/i386/i386.md (extzv): Expand with ix86_expand_pextr. (insv): Use SWI248 mode iterator. (insv_1): Ditto. testsuite/ChangeLog: PR target/66866 * g++.dg/pr66866.C: New test. From-SVN: r225852 --- gcc/ChangeLog | 11 +++ gcc/config/i386/i386-protos.h | 1 + gcc/config/i386/i386.c | 140 ++++++++++++++++++++++++++++++--- gcc/config/i386/i386.md | 19 +++-- gcc/testsuite/ChangeLog | 5 ++ gcc/testsuite/g++.dg/pr66866.C | 29 +++++++ 6 files changed, 187 insertions(+), 18 deletions(-) create mode 100644 gcc/testsuite/g++.dg/pr66866.C diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9c1e3cf8219..6f5459d4797 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2015-07-16 Uros Bizjak + + PR target/66866 + * config/i386/i386-protos.h (ix86_expand_pextr): New prototype. + * config/i386/i386.c (ix86_expand_pextr): New function. + (ix86_expand_pinsr): Handle V1TI and TI modes. Call ix86_expand_pextr + for non-lowpart subregs. + * config/i386/i386.md (extzv): Expand with ix86_expand_pextr. + (insv): Use SWI248 mode iterator. + (insv_1): Ditto. + 2015-07-15 Aditya Kumar Sebastian Pop diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 0224c362e97..6a17ef40a2e 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -223,6 +223,7 @@ extern void ix86_expand_vector_extract (bool, rtx, rtx, int); extern void ix86_expand_reduc (rtx (*)(rtx, rtx, rtx), rtx, rtx); extern void ix86_expand_vec_extract_even_odd (rtx, rtx, rtx, unsigned); +extern bool ix86_expand_pextr (rtx *); extern bool ix86_expand_pinsr (rtx *); extern void ix86_expand_mul_widen_evenodd (rtx, rtx, rtx, bool, bool); extern void ix86_expand_mul_widen_hilo (rtx, rtx, rtx, bool, bool); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 493e68672b9..55e1e2db349 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -50509,6 +50509,105 @@ ix86_expand_sse2_abs (rtx target, rtx input) emit_move_insn (target, x); } +/* Expand an extract from a vector register through pextr insn. + Return true if successful. */ + +bool +ix86_expand_pextr (rtx *operands) +{ + rtx dst = operands[0]; + rtx src = operands[1]; + + unsigned int size = INTVAL (operands[2]); + unsigned int pos = INTVAL (operands[3]); + + if (GET_CODE (dst) == SUBREG) + { + /* Reject non-lowpart subregs. */ + if (SUBREG_BYTE (dst) > 0) + return false; + dst = SUBREG_REG (dst); + } + + if (GET_CODE (src) == SUBREG) + { + pos += SUBREG_BYTE (src) * BITS_PER_UNIT; + src = SUBREG_REG (src); + } + + switch (GET_MODE (src)) + { + case V16QImode: + case V8HImode: + case V4SImode: + case V2DImode: + case V1TImode: + case TImode: + { + machine_mode srcmode, dstmode; + rtx d, pat; + + dstmode = mode_for_size (size, MODE_INT, 0); + + switch (dstmode) + { + case QImode: + if (!TARGET_SSE4_1) + return false; + srcmode = V16QImode; + break; + + case HImode: + if (!TARGET_SSE2) + return false; + srcmode = V8HImode; + break; + + case SImode: + if (!TARGET_SSE4_1) + return false; + srcmode = V4SImode; + break; + + case DImode: + gcc_assert (TARGET_64BIT); + if (!TARGET_SSE4_1) + return false; + srcmode = V2DImode; + break; + + default: + return false; + } + + if (GET_MODE (dst) == dstmode) + d = dst; + else + d = gen_reg_rtx (dstmode); + + /* Construct insn pattern. */ + pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (pos / size))); + pat = gen_rtx_VEC_SELECT (dstmode, gen_lowpart (srcmode, src), pat); + + /* Let the rtl optimizers know about the zero extension performed. */ + if (dstmode == QImode || dstmode == HImode) + { + pat = gen_rtx_ZERO_EXTEND (SImode, pat); + d = gen_lowpart (SImode, d); + } + + emit_insn (gen_rtx_SET (d, pat)); + + if (d != dst) + emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d)); + return true; + } + + default: + return false; + } +} + /* Expand an insert into a vector register through pinsr insn. Return true if successful. */ @@ -50527,18 +50626,18 @@ ix86_expand_pinsr (rtx *operands) dst = SUBREG_REG (dst); } - if (GET_CODE (src) == SUBREG) - src = SUBREG_REG (src); - switch (GET_MODE (dst)) { case V16QImode: case V8HImode: case V4SImode: case V2DImode: + case V1TImode: + case TImode: { machine_mode srcmode, dstmode; rtx (*pinsr)(rtx, rtx, rtx, rtx); + rtx d; srcmode = mode_for_size (size, MODE_INT, 0); @@ -50577,15 +50676,36 @@ ix86_expand_pinsr (rtx *operands) return false; } - rtx d = dst; - if (GET_MODE (dst) != dstmode) - d = gen_reg_rtx (dstmode); - src = gen_lowpart (srcmode, src); + if (GET_CODE (src) == SUBREG) + { + unsigned int srcpos = SUBREG_BYTE (src); - pos /= size; + if (srcpos > 0) + { + rtx extr_ops[4]; + + extr_ops[0] = gen_reg_rtx (srcmode); + extr_ops[1] = gen_lowpart (srcmode, SUBREG_REG (src)); + extr_ops[2] = GEN_INT (size); + extr_ops[3] = GEN_INT (srcpos * BITS_PER_UNIT); + + if (!ix86_expand_pextr (extr_ops)) + return false; + + src = extr_ops[0]; + } + else + src = gen_lowpart (srcmode, SUBREG_REG (src)); + } + + if (GET_MODE (dst) == dstmode) + d = dst; + else + d = gen_reg_rtx (dstmode); - emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src, - GEN_INT (1 << pos))); + emit_insn (pinsr (d, gen_lowpart (dstmode, dst), + gen_lowpart (srcmode, src), + GEN_INT (1 << (pos / size)))); if (d != dst) emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d)); return true; diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index b7b795ed267..2b3b6e98f65 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -2734,6 +2734,9 @@ (match_operand:SI 3 "const_int_operand")))] "" { + if (ix86_expand_pextr (operands)) + DONE; + /* Handle extractions from %ah et al. */ if (INTVAL (operands[2]) != 8 || INTVAL (operands[3]) != 8) FAIL; @@ -2781,10 +2784,10 @@ (const_string "QI")))]) (define_expand "insv" - [(set (zero_extract:SWI48 (match_operand:SWI48 0 "register_operand") - (match_operand:SI 1 "const_int_operand") - (match_operand:SI 2 "const_int_operand")) - (match_operand:SWI48 3 "register_operand"))] + [(set (zero_extract:SWI248 (match_operand:SWI248 0 "register_operand") + (match_operand:SI 1 "const_int_operand") + (match_operand:SI 2 "const_int_operand")) + (match_operand:SWI248 3 "register_operand"))] "" { rtx dst; @@ -2811,10 +2814,10 @@ }) (define_insn "insv_1" - [(set (zero_extract:SWI48 (match_operand 0 "ext_register_operand" "+Q,Q") - (const_int 8) - (const_int 8)) - (match_operand:SWI48 1 "general_x64nomem_operand" "Qn,m"))] + [(set (zero_extract:SWI248 (match_operand 0 "ext_register_operand" "+Q,Q") + (const_int 8) + (const_int 8)) + (match_operand:SWI248 1 "general_x64nomem_operand" "Qn,m"))] "" { if (CONST_INT_P (operands[1])) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 84ee7177913..a3ae9d5b39c 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2015-07-16 Uros Bizjak + + PR target/66866 + * g++.dg/pr66866.C: New test. + 2015-07-15 Bill Schmidt * gcc.target/powerpc/vec-cmp-sel.c: New test. diff --git a/gcc/testsuite/g++.dg/pr66866.C b/gcc/testsuite/g++.dg/pr66866.C new file mode 100644 index 00000000000..72ff3b192a8 --- /dev/null +++ b/gcc/testsuite/g++.dg/pr66866.C @@ -0,0 +1,29 @@ +// { dg-do run { target i?86-*-* x86_64-*-* } } +// { dg-require-effective-target sse2_runtime } +// { dg-options "-O -msse2" } + +extern "C" void abort (void); + +typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__)); +typedef short A __attribute__((__may_alias__)); + +__m128i __attribute__((noinline)) +shuf(const __m128i v) +{ + __m128i r; + + reinterpret_cast(&r)[5] = reinterpret_cast(&v)[4]; + return r; +} + +int main() +{ + __attribute__((aligned(16))) short mem[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; + + *reinterpret_cast<__m128i *>(mem) = shuf (*reinterpret_cast<__m128i *>(mem)); + + if (mem[5] != 4) + abort (); + + return 0; +} -- 2.30.2