From 6784c6e03313eefa52ceb1a40776051ad1a720f1 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Thu, 15 May 2008 12:17:45 +0200 Subject: [PATCH] sse.md (*vec_concatv2sf_sse4_1): New insn pattern. * config/i386/sse.md (*vec_concatv2sf_sse4_1): New insn pattern. (*vec_concatv2si_sse4_1): Use vector_move_operand predicate for operand 2. Remove pinsr{q,d} with 0x0 immediate operand from insn alternatives. Add missing alternatives. (*vec_concatv2di_rex64_sse4_1): Likewise. (*vec_concatv2si_sse2): Use "x" register constraint instead of "Y2". (*vec_concatv2di_rex64_sse): Rename from *vec_concatv2di_rex64. Require TARGET_SSE. testsuite/ChangeLog: * gcc.target/i386/sse-set-ps-1.c: New. * gcc.target/i386/sse4_1-set-ps-1.c: Likewise. From-SVN: r135331 --- gcc/ChangeLog | 48 +++++++----- gcc/config/i386/sse.md | 77 +++++++++++++------ gcc/testsuite/ChangeLog | 7 +- gcc/testsuite/gcc.target/i386/sse-set-ps-1.c | 40 ++++++++++ .../gcc.target/i386/sse4_1-set-ps-1.c | 41 ++++++++++ 5 files changed, 167 insertions(+), 46 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/sse-set-ps-1.c create mode 100644 gcc/testsuite/gcc.target/i386/sse4_1-set-ps-1.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 74c5765d266..a76ec9d81ef 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,15 @@ +2008-05-15 Uros Bizjak + H.J. Lu + + * config/i386/sse.md (*vec_concatv2sf_sse4_1): New insn pattern. + (*vec_concatv2si_sse4_1): Use vector_move_operand predicate + for operand 2. Remove pinsr{q,d} with 0x0 immediate operand from + insn alternatives. Add missing alternatives. + (*vec_concatv2di_rex64_sse4_1): Likewise. + (*vec_concatv2si_sse2): Use "x" register constraint instead of "Y2". + (*vec_concatv2di_rex64_sse): Rename from *vec_concatv2di_rex64. + Require TARGET_SSE. + 2008-05-15 Richard Guenther PR tree-optimization/36009 @@ -71,8 +83,7 @@ (vrotl@var{m}3): Ditto. (vrotr@var{m}3): Ditto. - * config/i386/i386.md (PPERM_SRC): Move PPERM masks here from - i386.c. + * config/i386/i386.md (PPERM_SRC): Move PPERM masks here from i386.c. (PPERM_INVERT): Ditto. (PPERM_REVERSE): Ditto. (PPERM_REV_INV): Ditto. @@ -122,8 +133,7 @@ * config/rs6000/rs6000.c (bdesc_2arg): Change the names of vector shift patterns. - * config/rs6000/altivec.md (vashl3): Rename from - ashl3. + * config/rs6000/altivec.md (vashl3): Rename from ashl3. (vlshr3): Rename from vlshr3. (vashr3): Rename from vashr3. (mulv4sf3): Change the names of vector shift patterns. @@ -133,8 +143,7 @@ * config/spu/spu.c (spu_initialize_trampoline): Rename vector shift insns. - * config/spu/spu-builtins.def (SI_SHLH): Rename vector shift - insns. + * config/spu/spu-builtins.def (SI_SHLH): Rename vector shift insns. (SI_SHLHI): Ditto. (SI_SHL): Ditto. (SI_SHLI): Ditto. @@ -176,9 +185,8 @@ 2008-05-14 Michael Meissner PR target/36224 - * config/i386/sse.md (vec_widen_smult_hi_v4si): Delete, using - unsigned multiply gives the wrong value when doing widening - multiplies. + * config/i386/sse.md (vec_widen_smult_hi_v4si): Delete, using unsigned + multiply gives the wrong value when doing widening multiplies. (vec_widen_smult_lo_v4si): Ditto. 2008-05-14 Kenneth Zadeck @@ -207,8 +215,7 @@ 2008-05-14 Adam Nemet * calls.c (emit_library_call_value_1): Restore code clearing - ECF_LIBCALL_BLOCK to ensure that we only call end_sequence - once. + ECF_LIBCALL_BLOCK to ensure that we only call end_sequence once. 2008-05-14 Olivier Hainque Nicolas Roche @@ -234,7 +241,7 @@ (emit_no_conflict_block): Removed. * optabls.h: (emit_no_conflict_block): Removed. * cse.c (cse_extended_basic_block): Remove search for - REG_NO_CONFLICT note. + REG_NO_CONFLICT note. * global.c: Removed incorrect comment added in revision 117. * expr.c (convert_move): Change call to emit_no_conflict_block to emit_insn. @@ -253,20 +260,19 @@ REG_NO_CONFLICT notes. * loop_invariant.c (find_invariant_insn): Removed REG_NO_CONFLICT case. - * combine.c (can_combine_p, distribute_notes): Removed REG_NO_CONFLICT - case. - * config/cris/cris.md (movdi pattern): Changed - emit_no_conflict_block to emit_insns. + * combine.c (can_combine_p, distribute_notes): Removed + REG_NO_CONFLICT case. + * config/cris/cris.md (movdi pattern): Changed emit_no_conflict_block + to emit_insns. * config/mn10300/mn10300.md (absdf2, negdf2 patterns): Ditto. * config/m68k/m68k.md (negdf2, negxf2, absdf2, absxf2 patterns): - Ditto. + Ditto. * reg-notes.def (NO_CONFLICT): Removed. 2008-05-14 David S. Miller * config/sparc/sparc.c (sparc_profile_hook): If - NO_PROFILE_COUNTERS, don't generate and pass a label - into mcount. + NO_PROFILE_COUNTERS, don't generate and pass a label into mcount. * config/sparc/linux.h (NO_PROFILE_COUNTERS): Define as 1. * config/sparc/linux64.h (NO_PROFILE_COUNTERS): Likewise. @@ -304,8 +310,8 @@ tree-ssanames.c. Convert to static inline. Call make_ssa_name_fn. * omp-low.c (expand_omp_parallel): * tree-flow-inline.h (redirect_edge_var_map_result): - * tree-ssa.c (init_tree_ssa): Add argument FN. Use it instead of cfun. - Update all users. + * tree-ssa.c (init_tree_ssa): Add argument FN. + Use it instead of cfun. Update all users. 2008-05-13 Tom Tromey diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 23a63d8c7ef..6e781c1c439 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -2257,6 +2257,24 @@ [(set_attr "type" "sselog1") (set_attr "mode" "V4SF")]) +;; Although insertps takes register source, we prefer +;; unpcklps with register source since it is shorter. +(define_insn "*vec_concatv2sf_sse4_1" + [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y,*y") + (vec_concat:V2SF + (match_operand:SF 1 "nonimmediate_operand" " 0,0,m, 0, m") + (match_operand:SF 2 "vector_move_operand" " x,m,C,*y, C")))] + "TARGET_SSE4_1" + "@ + unpcklps\t{%2, %0|%0, %2} + insertps\t{$0x10, %2, %0|%0, %2, 0x10} + movss\t{%1, %0|%0, %1} + punpckldq\t{%2, %0|%0, %2} + movd\t{%1, %0|%0, %1}" + [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov") + (set_attr "prefix_extra" "1,*,*,*,*") + (set_attr "mode" "V4SF,V4SF,SF,DI,DI")]) + ;; ??? In theory we can match memory for the MMX alternative, but allowing ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE ;; alternatives pretty much forces the MMX alternative to be chosen. @@ -4801,25 +4819,29 @@ (set_attr "mode" "TI,V4SF")]) (define_insn "*vec_concatv2si_sse4_1" - [(set (match_operand:V2SI 0 "register_operand" "=x,x") + [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y") (vec_concat:V2SI - (match_operand:SI 1 "nonimmediate_operand" "0,rm") - (match_operand:SI 2 "nonimmediate_operand" "rm,0")))] + (match_operand:SI 1 "nonimmediate_operand" "0 ,0,rm, 0 ,rm") + (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))] "TARGET_SSE4_1" "@ - pinsrd\t{$0x1, %2, %0|%0, %2, 0x1} - pinsrd\t{$0x0, %2, %0|%0, %2, 0x0}" - [(set_attr "type" "sselog") - (set_attr "mode" "TI")]) + pinsrd\t{$0x1, %2, %0|%0, %2, 0x1} + punpckldq\t{%2, %0|%0, %2} + movd\t{%1, %0|%0, %1} + punpckldq\t{%2, %0|%0, %2} + movd\t{%1, %0|%0, %1}" + [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov") + (set_attr "prefix_extra" "1,*,*,*,*") + (set_attr "mode" "TI,TI,TI,DI,DI")]) ;; ??? In theory we can match memory for the MMX alternative, but allowing ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE ;; alternatives pretty much forces the MMX alternative to be chosen. (define_insn "*vec_concatv2si_sse2" - [(set (match_operand:V2SI 0 "register_operand" "=Y2, Y2,*y,*y") + [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y") (vec_concat:V2SI - (match_operand:SI 1 "nonimmediate_operand" " 0 ,rm , 0,rm") - (match_operand:SI 2 "reg_or_0_operand" " Y2,C ,*y, C")))] + (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm") + (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))] "TARGET_SSE2" "@ punpckldq\t{%2, %0|%0, %2} @@ -4856,18 +4878,6 @@ [(set_attr "type" "sselog,ssemov,ssemov") (set_attr "mode" "TI,V4SF,V2SF")]) -(define_insn "*vec_concatv2di_rex64_sse4_1" - [(set (match_operand:V2DI 0 "register_operand" "=x,x") - (vec_concat:V2DI - (match_operand:DI 1 "nonimmediate_operand" "0,rm") - (match_operand:DI 2 "nonimmediate_operand" "rm,0")))] - "TARGET_64BIT && TARGET_SSE4_1" - "@ - pinsrq\t{$0x1, %2, %0|%0, %2, 0x1} - pinsrq\t{$0x0, %2, %0|%0, %2, 0x0}" - [(set_attr "type" "sselog") - (set_attr "mode" "TI")]) - (define_insn "vec_concatv2di" [(set (match_operand:V2DI 0 "register_operand" "=Y2,?Y2,Y2,x,x,x") (vec_concat:V2DI @@ -4884,12 +4894,31 @@ [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov") (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")]) -(define_insn "*vec_concatv2di_rex64" +(define_insn "*vec_concatv2di_rex64_sse4_1" + [(set (match_operand:V2DI 0 "register_operand" "=x,x,Yi,!x,x,x,x,x") + (vec_concat:V2DI + (match_operand:DI 1 "nonimmediate_operand" " 0,m,r ,*y,0,0,0,m") + (match_operand:DI 2 "vector_move_operand" "rm,C,C ,C ,x,x,m,0")))] + "TARGET_64BIT && TARGET_SSE4_1" + "@ + pinsrq\t{$0x1, %2, %0|%0, %2, 0x1} + movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1} + movq2dq\t{%1, %0|%0, %1} + punpcklqdq\t{%2, %0|%0, %2} + movlhps\t{%2, %0|%0, %2} + movhps\t{%2, %0|%0, %2} + movlps\t{%1, %0|%0, %1}" + [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov,ssemov") + (set_attr "prefix_extra" "1,*,*,*,*,*,*,*") + (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")]) + +(define_insn "*vec_concatv2di_rex64_sse" [(set (match_operand:V2DI 0 "register_operand" "=Y2,Yi,!Y2,Y2,x,x,x") (vec_concat:V2DI (match_operand:DI 1 "nonimmediate_operand" " m,r ,*y ,0 ,0,0,m") (match_operand:DI 2 "vector_move_operand" " C,C ,C ,Y2,x,m,0")))] - "TARGET_64BIT" + "TARGET_64BIT && TARGET_SSE" "@ movq\t{%1, %0|%0, %1} movq\t{%1, %0|%0, %1} diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index d8c7caedc5e..4c08485a0bd 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,9 +1,14 @@ +2008-05-15 H.J. Lu + + * gcc.target/i386/sse-set-ps-1.c: New. + * gcc.target/i386/sse4_1-set-ps-1.c: Likewise. + 2008-05-15 Richard Guenther PR tree-optimization/36009 PR tree-optimization/36204 * gcc.dg/tree-ssa/ssa-lim-5.c: New testcase. - * gcc.dg/tree-ssa/ssa-lim-6.c: Likewise.. + * gcc.dg/tree-ssa/ssa-lim-6.c: Likewise. 2008-05-15 Richard Guenther diff --git a/gcc/testsuite/gcc.target/i386/sse-set-ps-1.c b/gcc/testsuite/gcc.target/i386/sse-set-ps-1.c new file mode 100644 index 00000000000..8232c7229c5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse-set-ps-1.c @@ -0,0 +1,40 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -msse" } */ + +#include "sse-check.h" + +#ifdef DEBUG +#include +#endif + +#include + +static void +__attribute__((noinline)) +test (float *v) +{ + union + { + __m128 x; + float f[4]; + } u; + unsigned int i; + + u.x = _mm_set_ps (v[3], v[2], v[1], v[0]); + + for (i = 0; i < sizeof (v) / sizeof (v[0]); i++) + if (v[i] != u.f[i]) + { +#ifdef DEBUG + printf ("%i: %f != %f\n", i, v[i], u.f[i]); +#endif + abort (); + } +} + +static void +sse_test (void) +{ + float v[4] = { -3, 2, 1, 9 }; + test (v); +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-set-ps-1.c b/gcc/testsuite/gcc.target/i386/sse4_1-set-ps-1.c new file mode 100644 index 00000000000..fe77d94ad7d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-set-ps-1.c @@ -0,0 +1,41 @@ +/* { dg-do run } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" + +#ifdef DEBUG +#include +#endif + +#include + +static void +__attribute__((noinline)) +test (float *v) +{ + union + { + __m128 x; + float f[4]; + } u; + unsigned int i; + + u.x = _mm_set_ps (v[3], v[2], v[1], v[0]); + + for (i = 0; i < sizeof (v) / sizeof (v[0]); i++) + if (v[i] != u.f[i]) + { +#ifdef DEBUG + printf ("%i: %f != %f\n", i, v[i], u.f[i]); +#endif + abort (); + } +} + +static void +sse4_1_test (void) +{ + float v[4] = { -3, 2, 1, 9 }; + test (v); +} -- 2.30.2