From 32469ccc49bbecadceb571310c29b975be0286a5 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Sat, 17 Sep 2011 17:22:42 +0200 Subject: [PATCH] i386.c (ix86_build_const_vector): Handle V8SImode and V4DImode. * config/i386/i386.c (ix86_build_const_vector): Handle V8SImode and V4DImode. (ix86_build_signbit_mask): Likewise. (ix86_expand_int_vcond): Likewise. Handle V16HImode and V32QImode. (bdesc_args): Use CODE_FOR_{s,u}m{ax,in}v{32q,16h,8s}i3 instead of CODE_FOR_avx2_{s,u}m{ax,in}v{32q,16h,8s}i3. * config/i386/sse.md (avx2_3 umaxmin expand): Rename to... (3) ... this. (avx2_3 smaxmin expand): Rename to... (3) ... this. (smax3, smin3): Macroize using smaxmin code iterator. (smaxv2di3, sminv2di3): Macroize using smaxmin code iterator and VI8_AVX2 mode iterator. (umaxv2di3, uminv2di3): Macroize using umaxmin code iterator and VI8_AVX2 mode iterator. (vcond, vcondu): New expanders. From-SVN: r178929 --- gcc/ChangeLog | 22 +++++++ gcc/config/i386/i386.c | 43 ++++++++----- gcc/config/i386/sse.md | 139 ++++++++++++++++------------------------- 3 files changed, 105 insertions(+), 99 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 3474e297a17..3498f979aff 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,25 @@ +2011-09-17 Jakub Jelinek + + * config/i386/i386.c (ix86_build_const_vector): Handle V8SImode + and V4DImode. + (ix86_build_signbit_mask): Likewise. + (ix86_expand_int_vcond): Likewise. Handle V16HImode and + V32QImode. + (bdesc_args): Use CODE_FOR_{s,u}m{ax,in}v{32q,16h,8s}i3 + instead of CODE_FOR_avx2_{s,u}m{ax,in}v{32q,16h,8s}i3. + * config/i386/sse.md (avx2_3 umaxmin expand): Rename + to... + (3) ... this. + (avx2_3 smaxmin expand): Rename to... + (3) ... this. + (smax3, smin3): Macroize using smaxmin code iterator. + (smaxv2di3, sminv2di3): Macroize using smaxmin code iterator and + VI8_AVX2 mode iterator. + (umaxv2di3, uminv2di3): Macroize using umaxmin code iterator and + VI8_AVX2 mode iterator. + (vcond, vcondu): + New expanders. + 2011-09-17 Richard Sandiford * config/ia64/itanium2.md: Use match_test rather than eq/ne symbol_ref diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index d6438394157..d4f589d8d19 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -16946,7 +16946,9 @@ ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value) switch (mode) { + case V8SImode: case V4SImode: + case V4DImode: case V2DImode: gcc_assert (vect); case V8SFmode: @@ -16987,6 +16989,7 @@ ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert) /* Find the sign bit, sign extended to 2*HWI. */ switch (mode) { + case V8SImode: case V4SImode: case V8SFmode: case V4SFmode: @@ -16996,6 +16999,7 @@ ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert) lo = 0x80000000, hi = lo < 0; break; + case V4DImode: case V2DImode: case V4DFmode: case V2DFmode: @@ -19107,17 +19111,26 @@ ix86_expand_int_vcond (rtx operands[]) switch (mode) { + case V8SImode: + case V4DImode: case V4SImode: case V2DImode: { rtx t1, t2, mask; rtx (*gen_sub3) (rtx, rtx, rtx); + switch (mode) + { + case V8SImode: gen_sub3 = gen_subv8si3; break; + case V4DImode: gen_sub3 = gen_subv4di3; break; + case V4SImode: gen_sub3 = gen_subv4si3; break; + case V2DImode: gen_sub3 = gen_subv2di3; break; + default: + gcc_unreachable (); + } /* Subtract (-(INT MAX) - 1) from both operands to make them signed. */ mask = ix86_build_signbit_mask (mode, true, false); - gen_sub3 = (mode == V4SImode - ? gen_subv4si3 : gen_subv2di3); t1 = gen_reg_rtx (mode); emit_insn (gen_sub3 (t1, cop0, mask)); @@ -19130,6 +19143,8 @@ ix86_expand_int_vcond (rtx operands[]) } break; + case V32QImode: + case V16HImode: case V16QImode: case V8HImode: /* Perform a parallel unsigned saturating subtraction. */ @@ -25723,18 +25738,18 @@ static const struct builtin_description bdesc_args[] = { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI }, { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI }, { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI }, - { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI }, - { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI }, - { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI }, - { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI }, - { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI }, - { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI }, - { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI }, - { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI }, - { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI }, - { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI }, - { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI }, - { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI }, { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI }, { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI }, { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI }, diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 8d462472ec1..5d605bb6f5b 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -5806,7 +5806,7 @@ operands[1] = gen_lowpart (V1TImode, operands[1]); }) -(define_expand "avx2_3" +(define_expand "3" [(set (match_operand:VI124_256 0 "register_operand" "") (umaxmin:VI124_256 (match_operand:VI124_256 1 "nonimmediate_operand" "") @@ -5853,7 +5853,7 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) -(define_expand "avx2_3" +(define_expand "3" [(set (match_operand:VI124_256 0 "register_operand" "") (smaxmin:VI124_256 (match_operand:VI124_256 1 "nonimmediate_operand" "") @@ -5904,47 +5904,22 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) -(define_expand "smax3" - [(set (match_operand:VI14_128 0 "register_operand" "") - (smax:VI14_128 (match_operand:VI14_128 1 "register_operand" "") - (match_operand:VI14_128 2 "register_operand" "")))] - "TARGET_SSE2" -{ - if (TARGET_SSE4_1) - ix86_fixup_binary_operands_no_copy (SMAX, mode, operands); - else - { - rtx xops[6]; - bool ok; - - xops[0] = operands[0]; - xops[1] = operands[1]; - xops[2] = operands[2]; - xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]); - xops[4] = operands[1]; - xops[5] = operands[2]; - ok = ix86_expand_int_vcond (xops); - gcc_assert (ok); - DONE; - } -}) - -(define_expand "smin3" +(define_expand "3" [(set (match_operand:VI14_128 0 "register_operand" "") - (smin:VI14_128 (match_operand:VI14_128 1 "register_operand" "") - (match_operand:VI14_128 2 "register_operand" "")))] + (smaxmin:VI14_128 (match_operand:VI14_128 1 "register_operand" "") + (match_operand:VI14_128 2 "register_operand" "")))] "TARGET_SSE2" { if (TARGET_SSE4_1) - ix86_fixup_binary_operands_no_copy (SMIN, mode, operands); + ix86_fixup_binary_operands_no_copy (, mode, operands); else { rtx xops[6]; bool ok; xops[0] = operands[0]; - xops[1] = operands[2]; - xops[2] = operands[1]; + xops[1] = operands[ == SMAX ? 1 : 2]; + xops[2] = operands[ == SMAX ? 2 : 1]; xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]); xops[4] = operands[1]; xops[5] = operands[2]; @@ -5962,38 +5937,18 @@ "TARGET_SSE2" "ix86_fixup_binary_operands_no_copy (, V8HImode, operands);") -(define_expand "smaxv2di3" - [(set (match_operand:V2DI 0 "register_operand" "") - (smax:V2DI (match_operand:V2DI 1 "register_operand" "") - (match_operand:V2DI 2 "register_operand" "")))] - "TARGET_SSE4_2" -{ - rtx xops[6]; - bool ok; - - xops[0] = operands[0]; - xops[1] = operands[1]; - xops[2] = operands[2]; - xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]); - xops[4] = operands[1]; - xops[5] = operands[2]; - ok = ix86_expand_int_vcond (xops); - gcc_assert (ok); - DONE; -}) - -(define_expand "sminv2di3" - [(set (match_operand:V2DI 0 "register_operand" "") - (smin:V2DI (match_operand:V2DI 1 "register_operand" "") - (match_operand:V2DI 2 "register_operand" "")))] +(define_expand "3" + [(set (match_operand:VI8_AVX2 0 "register_operand" "") + (smaxmin:VI8_AVX2 (match_operand:VI8_AVX2 1 "register_operand" "") + (match_operand:VI8_AVX2 2 "register_operand" "")))] "TARGET_SSE4_2" { rtx xops[6]; bool ok; xops[0] = operands[0]; - xops[1] = operands[2]; - xops[2] = operands[1]; + xops[1] = operands[ == SMAX ? 1 : 2]; + xops[2] = operands[ == SMAX ? 2 : 1]; xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]); xops[4] = operands[1]; xops[5] = operands[2]; @@ -6110,38 +6065,18 @@ } }) -(define_expand "umaxv2di3" - [(set (match_operand:V2DI 0 "register_operand" "") - (umax:V2DI (match_operand:V2DI 1 "register_operand" "") - (match_operand:V2DI 2 "register_operand" "")))] - "TARGET_SSE4_2" -{ - rtx xops[6]; - bool ok; - - xops[0] = operands[0]; - xops[1] = operands[1]; - xops[2] = operands[2]; - xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]); - xops[4] = operands[1]; - xops[5] = operands[2]; - ok = ix86_expand_int_vcond (xops); - gcc_assert (ok); - DONE; -}) - -(define_expand "uminv2di3" - [(set (match_operand:V2DI 0 "register_operand" "") - (umin:V2DI (match_operand:V2DI 1 "register_operand" "") - (match_operand:V2DI 2 "register_operand" "")))] +(define_expand "3" + [(set (match_operand:VI8_AVX2 0 "register_operand" "") + (umaxmin:VI8_AVX2 (match_operand:VI8_AVX2 1 "register_operand" "") + (match_operand:VI8_AVX2 2 "register_operand" "")))] "TARGET_SSE4_2" { rtx xops[6]; bool ok; xops[0] = operands[0]; - xops[1] = operands[2]; - xops[2] = operands[1]; + xops[1] = operands[ == UMAX ? 1 : 2]; + xops[2] = operands[ == UMAX ? 2 : 1]; xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]); xops[4] = operands[1]; xops[5] = operands[2]; @@ -6265,6 +6200,23 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) +(define_expand "vcond" + [(set (match_operand:V_256 0 "register_operand" "") + (if_then_else:V_256 + (match_operator 3 "" + [(match_operand:VI_256 4 "nonimmediate_operand" "") + (match_operand:VI_256 5 "nonimmediate_operand" "")]) + (match_operand:V_256 1 "general_operand" "") + (match_operand:V_256 2 "general_operand" "")))] + "TARGET_AVX2 + && (GET_MODE_NUNITS (mode) + == GET_MODE_NUNITS (mode))" +{ + bool ok = ix86_expand_int_vcond (operands); + gcc_assert (ok); + DONE; +}) + (define_expand "vcond" [(set (match_operand:V_128 0 "register_operand" "") (if_then_else:V_128 @@ -6297,6 +6249,23 @@ DONE; }) +(define_expand "vcondu" + [(set (match_operand:V_256 0 "register_operand" "") + (if_then_else:V_256 + (match_operator 3 "" + [(match_operand:VI_256 4 "nonimmediate_operand" "") + (match_operand:VI_256 5 "nonimmediate_operand" "")]) + (match_operand:V_256 1 "general_operand" "") + (match_operand:V_256 2 "general_operand" "")))] + "TARGET_AVX2 + && (GET_MODE_NUNITS (mode) + == GET_MODE_NUNITS (mode))" +{ + bool ok = ix86_expand_int_vcond (operands); + gcc_assert (ok); + DONE; +}) + (define_expand "vcondu" [(set (match_operand:V_128 0 "register_operand" "") (if_then_else:V_128 -- 2.30.2