From: Uros Bizjak Date: Wed, 20 Apr 2011 19:58:23 +0000 (+0200) Subject: re PR target/48678 (unable to find a register to spill in class ‘GENERAL_REGS’) X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=3b84d61f446425ee51e583b1cf1540b7a5fe636e;p=gcc.git re PR target/48678 (unable to find a register to spill in class ‘GENERAL_REGS’) PR target/48678 * config/i386/i386.md (insv): Change operand 0 constraint to "register_operand". Change operand 1 and 2 constraint to "const_int_operand". Expand to pinsr{b,w,d,q} * when appropriate. * config/i386/sse.md (sse4_1_pinsrb): Export. (sse2_pinsrw): Ditto. (sse4_1_pinsrd): Ditto. (sse4_1_pinsrq): Ditto. * config/i386/i386-protos.h (ix86_expand_pinsr): Add prototype. * config/i386/i386.c (ix86_expand_pinsr): New. testsuite/ChangeLog: PR target/48678 * gcc.target/i386/sse2-pinsrw.c: New test. * gcc.target/i386/avx-vpinsrw.c: Ditto. * gcc.target/i386/sse4_1-insvqi.c: Ditto. * gcc.target/i386/sse2-insvhi.c: Ditto. * gcc.target/i386/sse4_1-insvsi.c: Ditto. * gcc.target/i386/sse4_1-insvdi.c: Ditto. From-SVN: r172792 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 85ec2e60747..cc9014cde34 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,16 @@ +2011-04-20 Uros Bizjak + + PR target/48678 + * config/i386/i386.md (insv): Change operand 0 constraint to + "register_operand". Change operand 1 and 2 constraint to + "const_int_operand". Expand to pinsr{b,w,d,q} * when appropriate. + * config/i386/sse.md (sse4_1_pinsrb): Export. + (sse2_pinsrw): Ditto. + (sse4_1_pinsrd): Ditto. + (sse4_1_pinsrq): Ditto. + * config/i386/i386-protos.h (ix86_expand_pinsr): Add prototype. + * config/i386/i386.c (ix86_expand_pinsr): New. + 2011-04-20 Easwaran Raman * cfgexpand.c (add_alias_set_conflicts): Add conflicts with a variable @@ -72,8 +85,7 @@ 2011-04-20 Kai Tietz * fold-const.c (fold_binary_loc): Add handling for - (X & ~Y) | (~X & Y) and (X && !Y) | (!X && Y) optimization - to (X ^ Y). + (X & ~Y) | (~X & Y) and (X && !Y) | (!X && Y) optimization to (X ^ Y). 2011-04-20 Andrew Stubbs @@ -246,11 +258,10 @@ 2011-04-19 Rainer Orth * doc/install.texi (Configuration, --enable-threads): Remove mach. - Add lynx, mipssde. - Sort table. + Add lynx, mipssde. Sort table. 2011-04-19 Xinliang David Li - + * ipa-cp.c (ipcp_update_profiling): Assert that scale_completement is not negative. diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index c651687cff4..d434d758794 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -203,6 +203,7 @@ extern void ix86_expand_vector_extract (bool, rtx, rtx, int); extern void ix86_expand_reduc_v4sf (rtx (*)(rtx, rtx, rtx), rtx, rtx); extern void ix86_expand_vec_extract_even_odd (rtx, rtx, rtx, unsigned); +extern bool ix86_expand_pinsr (rtx *); /* In i386-c.c */ extern void ix86_target_macros (void); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 80cb104b06e..7cb7c2db017 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -34105,6 +34105,88 @@ ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd) /* ... or we use the special-case patterns. */ expand_vec_perm_even_odd_1 (&d, odd); } + +/* Expand an insert into a vector register through pinsr insn. + Return true if successful. */ + +bool +ix86_expand_pinsr (rtx *operands) +{ + rtx dst = operands[0]; + rtx src = operands[3]; + + unsigned int size = INTVAL (operands[1]); + unsigned int pos = INTVAL (operands[2]); + + if (GET_CODE (dst) == SUBREG) + { + pos += SUBREG_BYTE (dst) * BITS_PER_UNIT; + dst = SUBREG_REG (dst); + } + + if (GET_CODE (src) == SUBREG) + src = SUBREG_REG (src); + + switch (GET_MODE (dst)) + { + case V16QImode: + case V8HImode: + case V4SImode: + case V2DImode: + { + enum machine_mode srcmode, dstmode; + rtx (*pinsr)(rtx, rtx, rtx, rtx); + + srcmode = mode_for_size (size, MODE_INT, 0); + + switch (srcmode) + { + case QImode: + if (!TARGET_SSE4_1) + return false; + dstmode = V16QImode; + pinsr = gen_sse4_1_pinsrb; + break; + + case HImode: + if (!TARGET_SSE2) + return false; + dstmode = V8HImode; + pinsr = gen_sse2_pinsrw; + break; + + case SImode: + if (!TARGET_SSE4_1) + return false; + dstmode = V4SImode; + pinsr = gen_sse4_1_pinsrd; + break; + + case DImode: + gcc_assert (TARGET_64BIT); + if (!TARGET_SSE4_1) + return false; + dstmode = V2DImode; + pinsr = gen_sse4_1_pinsrq; + break; + + default: + return false; + } + + dst = gen_lowpart (dstmode, dst); + src = gen_lowpart (srcmode, src); + + pos /= size; + + emit_insn (pinsr (dst, dst, src, GEN_INT (1 << pos))); + return true; + } + + default: + return false; + } +} /* This function returns the calling abi specific va_list type node. It returns the FNDECL specific va_list type. */ diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 38bdcd30c10..a6956b8a316 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -10393,14 +10393,17 @@ }) (define_expand "insv" - [(set (zero_extract (match_operand 0 "ext_register_operand" "") - (match_operand 1 "const8_operand" "") - (match_operand 2 "const8_operand" "")) + [(set (zero_extract (match_operand 0 "register_operand" "") + (match_operand 1 "const_int_operand" "") + (match_operand 2 "const_int_operand" "")) (match_operand 3 "register_operand" ""))] "" { rtx (*gen_mov_insv_1) (rtx, rtx); + if (ix86_expand_pinsr (operands)) + DONE; + /* Handle insertions to %ah et al. */ if (INTVAL (operands[1]) != 8 || INTVAL (operands[2]) != 8) FAIL; diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 63da5dfe955..498f9b90401 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -6051,7 +6051,7 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) -(define_insn "*sse4_1_pinsrb" +(define_insn "sse4_1_pinsrb" [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x") (vec_merge:V16QI (vec_duplicate:V16QI @@ -6083,7 +6083,7 @@ (set_attr "prefix" "orig,orig,vex,vex") (set_attr "mode" "TI")]) -(define_insn "*sse2_pinsrw" +(define_insn "sse2_pinsrw" [(set (match_operand:V8HI 0 "register_operand" "=x,x,x,x") (vec_merge:V8HI (vec_duplicate:V8HI @@ -6117,7 +6117,7 @@ (set_attr "mode" "TI")]) ;; It must come before sse2_loadld since it is preferred. -(define_insn "*sse4_1_pinsrd" +(define_insn "sse4_1_pinsrd" [(set (match_operand:V4SI 0 "register_operand" "=x,x") (vec_merge:V4SI (vec_duplicate:V4SI @@ -6145,7 +6145,7 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) -(define_insn "*sse4_1_pinsrq" +(define_insn "sse4_1_pinsrq" [(set (match_operand:V2DI 0 "register_operand" "=x,x") (vec_merge:V2DI (vec_duplicate:V2DI diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index c16c9bf6574..becce40e7f5 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,13 @@ +2011-04-20 Uros Bizjak + + PR target/48678 + * gcc.target/i386/sse2-pinsrw.c: New test. + * gcc.target/i386/avx-vpinsrw.c: Ditto. + * gcc.target/i386/sse4_1-insvqi.c: Ditto. + * gcc.target/i386/sse2-insvhi.c: Ditto. + * gcc.target/i386/sse4_1-insvsi.c: Ditto. + * gcc.target/i386/sse4_1-insvdi.c: Ditto. + 2011-04-20 Jason Merrill * g++.dg/template/const4.C: New. diff --git a/gcc/testsuite/gcc.target/i386/avx-vpinsrw-1.c b/gcc/testsuite/gcc.target/i386/avx-vpinsrw-1.c new file mode 100644 index 00000000000..5e1a7cb91c9 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vpinsrw-1.c @@ -0,0 +1,8 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx } */ +/* { dg-options "-O2 -mavx" } */ + +#define CHECK_H "avx-check.h" +#define TEST avx_test + +#include "sse2-pinsrw.c" diff --git a/gcc/testsuite/gcc.target/i386/sse2-insvhi.c b/gcc/testsuite/gcc.target/i386/sse2-insvhi.c new file mode 100644 index 00000000000..03a287042f1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse2-insvhi.c @@ -0,0 +1,54 @@ +/* { dg-do run } */ +/* { dg-require-effective-target sse2 } */ +/* { dg-options "-O2 -msse2" } */ + +#ifndef CHECK_H +#define CHECK_H "sse2-check.h" +#endif + +#ifndef TEST +#define TEST sse2_test +#endif + +#include CHECK_H + +#include +#include + +typedef short T __attribute__((may_alias)); +struct S { __m128i d; }; + +__m128i +__attribute__((noinline)) +foo (__m128i y, short x) +{ + struct S s; + + s.d = y; + ((T *) &s.d)[1] = x; + return s.d; +} + +static void +TEST (void) +{ + union + { + __m128i x; + unsigned int i[4]; + unsigned short s[8]; + } res, val, tmp; + unsigned short ins[4] = { 3, 4, 5, 6 }; + + val.i[0] = 0x35251505; + val.i[1] = 0x75655545; + val.i[2] = 0xB5A59585; + val.i[3] = 0xF5E5D5C5; + + res.x = foo (val.x, ins[3]); + + tmp.x = val.x; + tmp.s[1] = ins[3]; + if (memcmp (&tmp, &res, sizeof (tmp))) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/sse2-pinsrw.c b/gcc/testsuite/gcc.target/i386/sse2-pinsrw.c new file mode 100644 index 00000000000..16167437c6d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse2-pinsrw.c @@ -0,0 +1,86 @@ +/* { dg-do run } */ +/* { dg-require-effective-target sse2 } */ +/* { dg-options "-O2 -msse2" } */ + +#ifndef CHECK_H +#define CHECK_H "sse2-check.h" +#endif + +#ifndef TEST +#define TEST sse2_test +#endif + +#include CHECK_H + +#include +#include + +#define msk0 0x00 +#define msk1 0x01 +#define msk2 0x02 +#define msk3 0x03 +#define msk4 0x04 +#define msk5 0x05 +#define msk6 0x06 +#define msk7 0x07 + +static void +TEST (void) +{ + union + { + __m128i x; + unsigned int i[4]; + unsigned short s[8]; + } res [8], val, tmp; + int masks[8]; + unsigned short ins[4] = { 3, 4, 5, 6 }; + int i; + + val.i[0] = 0x35251505; + val.i[1] = 0x75655545; + val.i[2] = 0xB5A59585; + val.i[3] = 0xF5E5D5C5; + + /* Check pinsrw imm8, r32, xmm. */ + res[0].x = _mm_insert_epi16 (val.x, ins[0], msk0); + res[1].x = _mm_insert_epi16 (val.x, ins[0], msk1); + res[2].x = _mm_insert_epi16 (val.x, ins[0], msk2); + res[3].x = _mm_insert_epi16 (val.x, ins[0], msk3); + res[4].x = _mm_insert_epi16 (val.x, ins[0], msk4); + res[5].x = _mm_insert_epi16 (val.x, ins[0], msk5); + res[6].x = _mm_insert_epi16 (val.x, ins[0], msk6); + res[7].x = _mm_insert_epi16 (val.x, ins[0], msk7); + + masks[0] = msk0; + masks[1] = msk1; + masks[2] = msk2; + masks[3] = msk3; + masks[4] = msk4; + masks[5] = msk5; + masks[6] = msk6; + masks[7] = msk7; + + for (i = 0; i < 8; i++) + { + tmp.x = val.x; + tmp.s[masks[i]] = ins[0]; + if (memcmp (&tmp, &res[i], sizeof (tmp))) + abort (); + } + + /* Check pinsrw imm8, m16, xmm. */ + for (i = 0; i < 8; i++) + { + res[i].x = _mm_insert_epi16 (val.x, ins[i % 2], msk0); + masks[i] = msk0; + } + + for (i = 0; i < 8; i++) + { + tmp.x = val.x; + tmp.s[masks[i]] = ins[i % 2]; + if (memcmp (&tmp, &res[i], sizeof (tmp))) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-insvdi.c b/gcc/testsuite/gcc.target/i386/sse4_1-insvdi.c new file mode 100644 index 00000000000..da090ba1582 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-insvdi.c @@ -0,0 +1,55 @@ +/* { dg-do run } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK_H + +#include +#include + +typedef long T __attribute__((may_alias)); +struct S { __m128i d; }; + +__m128i +__attribute__((noinline)) +foo (__m128i y, long x) +{ + struct S s; + + s.d = y; + ((T *) &s.d)[1] = x; + return s.d; +} + +static void +TEST (void) +{ + union + { + __m128i x; + unsigned int i[4]; + unsigned long l[2]; + } res, val, tmp; + unsigned long ins[4] = { 3, 4, 5, 6 }; + + val.i[0] = 0x35251505; + val.i[1] = 0x75655545; + val.i[2] = 0xB5A59585; + val.i[3] = 0xF5E5D5C5; + + res.x = foo (val.x, ins[3]); + + tmp.x = val.x; + tmp.l[1] = ins[3]; + if (memcmp (&tmp, &res, sizeof (tmp))) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-insvqi.c b/gcc/testsuite/gcc.target/i386/sse4_1-insvqi.c new file mode 100644 index 00000000000..784201e2d94 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-insvqi.c @@ -0,0 +1,54 @@ +/* { dg-do run } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK_H + +#include +#include + +typedef char T __attribute__((may_alias)); +struct S { __m128i d; }; + +__m128i +__attribute__((noinline)) +foo (__m128i y, char x) +{ + struct S s; + + s.d = y; + ((T *) &s.d)[1] = x; + return s.d; +} + +static void +TEST (void) +{ + union + { + __m128i x; + unsigned int i[4]; + unsigned char c[16]; + } res, val, tmp; + unsigned char ins[4] = { 3, 4, 5, 6 }; + + val.i[0] = 0x35251505; + val.i[1] = 0x75655545; + val.i[2] = 0xB5A59585; + val.i[3] = 0xF5E5D5C5; + + res.x = foo (val.x, ins[3]); + + tmp.x = val.x; + tmp.c[1] = ins[3]; + if (memcmp (&tmp, &res, sizeof (tmp))) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-insvsi.c b/gcc/testsuite/gcc.target/i386/sse4_1-insvsi.c new file mode 100644 index 00000000000..569b8f269c2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-insvsi.c @@ -0,0 +1,53 @@ +/* { dg-do run } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK_H + +#include +#include + +typedef int T __attribute__((may_alias)); +struct S { __m128i d; }; + +__m128i +__attribute__((noinline)) +foo (__m128i y, int x) +{ + struct S s; + + s.d = y; + ((T *) &s.d)[1] = x; + return s.d; +} + +static void +TEST (void) +{ + union + { + __m128i x; + unsigned int i[4]; + } res, val, tmp; + unsigned int ins[4] = { 3, 4, 5, 6 }; + + val.i[0] = 0x35251505; + val.i[1] = 0x75655545; + val.i[2] = 0xB5A59585; + val.i[3] = 0xF5E5D5C5; + + res.x = foo (val.x, ins[3]); + + tmp.x = val.x; + tmp.i[1] = ins[3]; + if (memcmp (&tmp, &res, sizeof (tmp))) + abort (); +}