From 07c0852e00d2b163f52cdc47828b4277ca7e4889 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Wed, 6 Apr 2011 11:24:46 +0200 Subject: [PATCH] i386.md (attribute isa): New. * config/i386/i386.md (attribute isa): New. (attribute enabled): New. (setcc__sse): Merge from *{avx,sse}_setcc. (*fop__comm_mixed): Merge with *fop__comm_mixed_avx. (*fop__comm_sse): Merge with *fop__comm_avx. (*fop__1_mixed): Merge with *fop__1_mixed_avx. (*fop__1_sse): Merge with *fop__1_avx. (3): Merge with *avx_3. (*ieee_smin3): Merge with *avx_ieee_smin3. (*ieee_smax3): Merge with *avx_ieee_smax3. * config/i386/sse.md (VF): New mode iterator. (VF1): Ditto. (VF2): Ditto. (VF_128): Ditto. (SSEMODEF4): Remove. (attribute sse): Handle V8SF and V4DF modes. (2): Use VF mode iterator. (*absneg2): Merge from *{avx,sse}_absneg2. Use VF mode iterator. (3): Use VF mode iterator. (*3): Merge with *avx_3. Use VF mode iterator. (_vm3): Merge with *avx_vm3. Use VF_128 mode iterator. (mul3): Use VF mode iterator. (*mul3): Merge with *avx_mul3. Use VF mode iterator. (_vmmul3): Merge with *avx_vmmul3. Use VF_128 mode iterator. (div3): Merge from divv2df3 and divv4df3. (div3): Merge from divv4sf3 and divv8sf3. (_div3): Merge with *avx_div3. Use VF mode iterator. (_vmdiv3): Merge with *avx_vmdiv3. Use VF_128 mode iterator. (_rcp2): Merge from avx_rcpv8sf2 and sse_rcpv4sf2. Use VF1 mode iterator. (sse_vmrcpv4sf2): Merge with *avx_vmrcpv4sf2. (sqrt2): New expander. (sqrt2): Merge from sqrtv4sf2 and sqrtv8sf2. (_sqrt2): Merge from avx_sqrtv8sf2, sse_sqrtv4sf, sqrtv4df2 and sqrtv2df2. Use VF mode iterator. (_vmsqrt2): Merge with *avx_vmsqrt2. Use VF_128 mode iterator. (rsqrt2): Merge from rsqrtv4sf2 and rsqrtv8sf2. (_rsqrt2): Merge from avx_rsqrtv8sf2 and sse_rsqrt4sf2. Use VF1 mode iterator. (sse_vmrsqrtv4sf2): Merge with *avx_vmrsqrtv4sf2. (3): Use VF mode iterator. (*3_finite): Merge with *avx_3_finite. Use VF mode iterator. (*3): Merge with *avx_3. (_vm2): Merge with *avx_vm2. Use VF_128 mode iterator. (*ieee_smin3): Merge with *avx_ieee_smin3. Use VF mode iterator. (*ieee_smax3): Merge with *avx_ieee_smax3. Use VF mode iterator. (sse3_addsubv2df3): Merge with *avx_addsubv2df3. (sse3_addsubv4sf3): Merge with *avx_addsubv4sf3. (sse3_hv2df3): Merge with *avx_hv2df3. (sse3_hv4sf3): Merge with *avx_hv4sf3. (avx_cmp3): Rename from avx_cmp3. Use VF mode iterator. (avx_vmcmp3): Rename from avx_cmp3. Use VF_128 mode iterator. (_maskcmp3): Merge with *avx_maskcmp3. Use VF mode iterator. (_vmmaskcmp3): Merge with *avx_vmmaskcmp3. Use VF_128 mode iterator. (vcond): Use VF mode iterator. * config/i386/predicates.md (sse_comparison_operator): Merge with avx_comparison_float_operator. Do not declare as special_predicate. * config/i386/i386.c (struct builtin_description): Update for renamed compare patterns. (ix86_expand_args_builtin): Ditto. (ix86_expand_sse_compare_mask): Ditto. From-SVN: r172028 --- gcc/ChangeLog | 83 ++- gcc/config/i386/i386.c | 34 +- gcc/config/i386/i386.md | 238 +++---- gcc/config/i386/predicates.md | 18 +- gcc/config/i386/sse.md | 1111 ++++++++++++--------------------- 5 files changed, 572 insertions(+), 912 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 24207ac5910..74f210ffb19 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,10 +1,89 @@ +2011-04-06 Uros Bizjak + + * config/i386/i386.md (attribute isa): New. + (attribute enabled): New. + (setcc__sse): Merge from *{avx,sse}_setcc. + (*fop__comm_mixed): Merge with *fop__comm_mixed_avx. + (*fop__comm_sse): Merge with *fop__comm_avx. + (*fop__1_mixed): Merge with *fop__1_mixed_avx. + (*fop__1_sse): Merge with *fop__1_avx. + (3): Merge with *avx_3. + (*ieee_smin3): Merge with *avx_ieee_smin3. + (*ieee_smax3): Merge with *avx_ieee_smax3. + + * config/i386/sse.md (VF): New mode iterator. + (VF1): Ditto. + (VF2): Ditto. + (VF_128): Ditto. + (SSEMODEF4): Remove. + (attribute sse): Handle V8SF and V4DF modes. + (2): Use VF mode iterator. + (*absneg2): Merge from *{avx,sse}_absneg2. Use VF + mode iterator. + (3): Use VF mode iterator. + (*3): Merge with *avx_3. + Use VF mode iterator. + (_vm3): Merge with + *avx_vm3. Use VF_128 mode iterator. + (mul3): Use VF mode iterator. + (*mul3): Merge with *avx_mul3. Use VF mode iterator. + (_vmmul3): Merge with *avx_vmmul3. Use VF_128 + mode iterator. + (div3): Merge from divv2df3 and divv4df3. + (div3): Merge from divv4sf3 and divv8sf3. + (_div3): Merge with *avx_div3. Use VF mode iterator. + (_vmdiv3): Merge with *avx_vmdiv3. Use VF_128 + mode iterator. + (_rcp2): Merge from avx_rcpv8sf2 and sse_rcpv4sf2. + Use VF1 mode iterator. + (sse_vmrcpv4sf2): Merge with *avx_vmrcpv4sf2. + (sqrt2): New expander. + (sqrt2): Merge from sqrtv4sf2 and sqrtv8sf2. + (_sqrt2): Merge from avx_sqrtv8sf2, sse_sqrtv4sf, sqrtv4df2 + and sqrtv2df2. Use VF mode iterator. + (_vmsqrt2): Merge with *avx_vmsqrt2. Use VF_128 + mode iterator. + (rsqrt2): Merge from rsqrtv4sf2 and rsqrtv8sf2. + (_rsqrt2): Merge from avx_rsqrtv8sf2 and sse_rsqrt4sf2. + Use VF1 mode iterator. + (sse_vmrsqrtv4sf2): Merge with *avx_vmrsqrtv4sf2. + (3): Use VF mode iterator. + (*3_finite): Merge with + *avx_3_finite. Use VF mode iterator. + (*3): Merge with *avx_3. + (_vm2): Merge with + *avx_vm2. Use VF_128 mode iterator. + (*ieee_smin3): Merge with *avx_ieee_smin3. Use VF + mode iterator. + (*ieee_smax3): Merge with *avx_ieee_smax3. Use VF + mode iterator. + (sse3_addsubv2df3): Merge with *avx_addsubv2df3. + (sse3_addsubv4sf3): Merge with *avx_addsubv4sf3. + (sse3_hv2df3): Merge with *avx_hv2df3. + (sse3_hv4sf3): Merge with *avx_hv4sf3. + (avx_cmp3): Rename from avx_cmp3. Use + VF mode iterator. + (avx_vmcmp3): Rename from avx_cmp3. + Use VF_128 mode iterator. + (_maskcmp3): Merge with *avx_maskcmp3. Use VF + mode iterator. + (_vmmaskcmp3): Merge with *avx_vmmaskcmp3. Use + VF_128 mode iterator. + (vcond): Use VF mode iterator. + * config/i386/predicates.md (sse_comparison_operator): Merge with + avx_comparison_float_operator. Do not declare as special_predicate. + * config/i386/i386.c (struct builtin_description): Update for renamed + compare patterns. + (ix86_expand_args_builtin): Ditto. + (ix86_expand_sse_compare_mask): Ditto. + 2011-04-06 Richard Guenther * tree-inline.c (estimate_num_insns): For calls simply account for all passed arguments and a used return value. 2011-04-06 Richard Guenther - + PR tree-optimization/47663 * cgraph.h (struct cgraph_edge): Add call_stmt_size and call_stmt_time fields. @@ -618,7 +697,7 @@ * config.gcc (*-*-mingw*): Allow as option the posix threading model. - * config/i386/mingw32.h ( SPEC_PTHREAD1, SPEC_PTHREAD2): + * config/i386/mingw32.h (SPEC_PTHREAD1, SPEC_PTHREAD2): New macros defined dependent to TARGET_USE_PTHREAD_BY_DEFAULT definition. (CPP_SPEC): Add pthread/no-pthread handling. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 2b2823afd59..2e174710a75 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -25342,12 +25342,12 @@ static const struct builtin_description bdesc_args[] = { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT }, @@ -26994,12 +26994,12 @@ ix86_expand_args_builtin (const struct builtin_description *d, error ("the last argument must be a 1-bit immediate"); return const0_rtx; - case CODE_FOR_avx_cmpsdv2df3: - case CODE_FOR_avx_cmpssv4sf3: - case CODE_FOR_avx_cmppdv2df3: - case CODE_FOR_avx_cmppsv4sf3: - case CODE_FOR_avx_cmppdv4df3: - case CODE_FOR_avx_cmppsv8sf3: + case CODE_FOR_avx_vmcmpv2df3: + case CODE_FOR_avx_vmcmpv4sf3: + case CODE_FOR_avx_cmpv2df3: + case CODE_FOR_avx_cmpv4sf3: + case CODE_FOR_avx_cmpv4df3: + case CODE_FOR_avx_cmpv8sf3: error ("the last argument must be a 5-bit immediate"); return const0_rtx; @@ -32359,6 +32359,7 @@ static rtx ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1, bool swap_operands) { + rtx (*insn)(rtx, rtx, rtx, rtx); enum machine_mode mode = GET_MODE (op0); rtx mask = gen_reg_rtx (mode); @@ -32369,13 +32370,10 @@ ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1, op1 = tmp; } - if (mode == DFmode) - emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1, - gen_rtx_fmt_ee (code, mode, op0, op1))); - else - emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1, - gen_rtx_fmt_ee (code, mode, op0, op1))); + insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse; + emit_insn (insn (mask, op0, op1, + gen_rtx_fmt_ee (code, mode, op0, op1))); return mask; } diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index b74136368ae..f896bc9b903 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -698,6 +698,16 @@ ;; Define attribute to indicate unaligned ssemov insns (define_attr "movu" "0,1" (const_string "0")) +;; Used to control the "enabled" attribute on a per-instruction basis. +(define_attr "isa" "base,noavx,avx" + (const_string "base")) + +(define_attr "enabled" "" + (cond [(eq_attr "isa" "noavx") (symbol_ref "!TARGET_AVX") + (eq_attr "isa" "avx") (symbol_ref "TARGET_AVX") + ] + (const_int 1))) + ;; Describe a user's asm statement. (define_asm_attributes [(set_attr "length" "128") @@ -10708,27 +10718,19 @@ ;; 0xffffffff is NaN, but not in normalized form, so we can't represent ;; it directly. -(define_insn "*avx_setcc" - [(set (match_operand:MODEF 0 "register_operand" "=x") - (match_operator:MODEF 1 "avx_comparison_float_operator" - [(match_operand:MODEF 2 "register_operand" "x") - (match_operand:MODEF 3 "nonimmediate_operand" "xm")]))] - "TARGET_AVX" - "vcmp%D1s\t{%3, %2, %0|%0, %2, %3}" - [(set_attr "type" "ssecmp") - (set_attr "prefix" "vex") - (set_attr "length_immediate" "1") - (set_attr "mode" "")]) - -(define_insn "*sse_setcc" - [(set (match_operand:MODEF 0 "register_operand" "=x") - (match_operator:MODEF 1 "sse_comparison_operator" - [(match_operand:MODEF 2 "register_operand" "0") - (match_operand:MODEF 3 "nonimmediate_operand" "xm")]))] +(define_insn "setcc__sse" + [(set (match_operand:MODEF 0 "register_operand" "=x,x") + (match_operator:MODEF 3 "sse_comparison_operator" + [(match_operand:MODEF 1 "register_operand" "0,x") + (match_operand:MODEF 2 "nonimmediate_operand" "xm,xm")]))] "SSE_FLOAT_MODE_P (mode)" - "cmp%D1s\t{%3, %0|%0, %3}" - [(set_attr "type" "ssecmp") + "@ + cmp%D3s\t{%2, %0|%0, %2} + vcmp%D3s\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "ssecmp") (set_attr "length_immediate" "1") + (set_attr "prefix" "orig,vex") (set_attr "mode" "")]) ;; Basic conditional jump instructions. @@ -12865,66 +12867,32 @@ ;; Gcc is slightly more smart about handling normal two address instructions ;; so use special patterns for add and mull. -(define_insn "*fop__comm_mixed_avx" - [(set (match_operand:MODEF 0 "register_operand" "=f,x") - (match_operator:MODEF 3 "binary_fp_operator" - [(match_operand:MODEF 1 "nonimmediate_operand" "%0,x") - (match_operand:MODEF 2 "nonimmediate_operand" "fm,xm")]))] - "AVX_FLOAT_MODE_P (mode) && TARGET_MIX_SSE_I387 - && COMMUTATIVE_ARITH_P (operands[3]) - && !(MEM_P (operands[1]) && MEM_P (operands[2]))" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (if_then_else (eq_attr "alternative" "1") - (if_then_else (match_operand:MODEF 3 "mult_operator" "") - (const_string "ssemul") - (const_string "sseadd")) - (if_then_else (match_operand:MODEF 3 "mult_operator" "") - (const_string "fmul") - (const_string "fop")))) - (set_attr "prefix" "orig,maybe_vex") - (set_attr "mode" "")]) - (define_insn "*fop__comm_mixed" - [(set (match_operand:MODEF 0 "register_operand" "=f,x") + [(set (match_operand:MODEF 0 "register_operand" "=f,x,x") (match_operator:MODEF 3 "binary_fp_operator" - [(match_operand:MODEF 1 "nonimmediate_operand" "%0,0") - (match_operand:MODEF 2 "nonimmediate_operand" "fm,xm")]))] + [(match_operand:MODEF 1 "nonimmediate_operand" "%0,0,x") + (match_operand:MODEF 2 "nonimmediate_operand" "fm,xm,xm")]))] "SSE_FLOAT_MODE_P (mode) && TARGET_MIX_SSE_I387 && COMMUTATIVE_ARITH_P (operands[3]) && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "* return output_387_binary_op (insn, operands);" [(set (attr "type") - (if_then_else (eq_attr "alternative" "1") + (if_then_else (eq_attr "alternative" "1,2") (if_then_else (match_operand:MODEF 3 "mult_operator" "") (const_string "ssemul") (const_string "sseadd")) (if_then_else (match_operand:MODEF 3 "mult_operator" "") (const_string "fmul") (const_string "fop")))) - (set_attr "mode" "")]) - -(define_insn "*fop__comm_avx" - [(set (match_operand:MODEF 0 "register_operand" "=x") - (match_operator:MODEF 3 "binary_fp_operator" - [(match_operand:MODEF 1 "nonimmediate_operand" "%x") - (match_operand:MODEF 2 "nonimmediate_operand" "xm")]))] - "AVX_FLOAT_MODE_P (mode) && TARGET_SSE_MATH - && COMMUTATIVE_ARITH_P (operands[3]) - && !(MEM_P (operands[1]) && MEM_P (operands[2]))" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (if_then_else (match_operand:MODEF 3 "mult_operator" "") - (const_string "ssemul") - (const_string "sseadd"))) - (set_attr "prefix" "vex") + (set_attr "isa" "base,noavx,avx") + (set_attr "prefix" "orig,orig,vex") (set_attr "mode" "")]) (define_insn "*fop__comm_sse" - [(set (match_operand:MODEF 0 "register_operand" "=x") + [(set (match_operand:MODEF 0 "register_operand" "=x,x") (match_operator:MODEF 3 "binary_fp_operator" - [(match_operand:MODEF 1 "nonimmediate_operand" "%0") - (match_operand:MODEF 2 "nonimmediate_operand" "xm")]))] + [(match_operand:MODEF 1 "nonimmediate_operand" "%0,x") + (match_operand:MODEF 2 "nonimmediate_operand" "xm,xm")]))] "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && COMMUTATIVE_ARITH_P (operands[3]) && !(MEM_P (operands[1]) && MEM_P (operands[2]))" @@ -12933,6 +12901,8 @@ (if_then_else (match_operand:MODEF 3 "mult_operator" "") (const_string "ssemul") (const_string "sseadd"))) + (set_attr "isa" "noavx,avx") + (set_attr "prefix" "orig,vex") (set_attr "mode" "")]) (define_insn "*fop__comm_i387" @@ -12950,50 +12920,23 @@ (const_string "fop"))) (set_attr "mode" "")]) -(define_insn "*fop__1_mixed_avx" - [(set (match_operand:MODEF 0 "register_operand" "=f,f,x") - (match_operator:MODEF 3 "binary_fp_operator" - [(match_operand:MODEF 1 "nonimmediate_operand" "0,fm,x") - (match_operand:MODEF 2 "nonimmediate_operand" "fm,0,xm")]))] - "AVX_FLOAT_MODE_P (mode) && TARGET_MIX_SSE_I387 - && !COMMUTATIVE_ARITH_P (operands[3]) - && !(MEM_P (operands[1]) && MEM_P (operands[2]))" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (cond [(and (eq_attr "alternative" "2") - (match_operand:MODEF 3 "mult_operator" "")) - (const_string "ssemul") - (and (eq_attr "alternative" "2") - (match_operand:MODEF 3 "div_operator" "")) - (const_string "ssediv") - (eq_attr "alternative" "2") - (const_string "sseadd") - (match_operand:MODEF 3 "mult_operator" "") - (const_string "fmul") - (match_operand:MODEF 3 "div_operator" "") - (const_string "fdiv") - ] - (const_string "fop"))) - (set_attr "prefix" "orig,orig,maybe_vex") - (set_attr "mode" "")]) - (define_insn "*fop__1_mixed" - [(set (match_operand:MODEF 0 "register_operand" "=f,f,x") + [(set (match_operand:MODEF 0 "register_operand" "=f,f,x,x") (match_operator:MODEF 3 "binary_fp_operator" - [(match_operand:MODEF 1 "nonimmediate_operand" "0,fm,0") - (match_operand:MODEF 2 "nonimmediate_operand" "fm,0,xm")]))] + [(match_operand:MODEF 1 "nonimmediate_operand" "0,fm,0,x") + (match_operand:MODEF 2 "nonimmediate_operand" "fm,0,xm,xm")]))] "SSE_FLOAT_MODE_P (mode) && TARGET_MIX_SSE_I387 && !COMMUTATIVE_ARITH_P (operands[3]) && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "* return output_387_binary_op (insn, operands);" [(set (attr "type") - (cond [(and (eq_attr "alternative" "2") + (cond [(and (eq_attr "alternative" "2,3") (match_operand:MODEF 3 "mult_operator" "")) (const_string "ssemul") - (and (eq_attr "alternative" "2") + (and (eq_attr "alternative" "2,3") (match_operand:MODEF 3 "div_operator" "")) (const_string "ssediv") - (eq_attr "alternative" "2") + (eq_attr "alternative" "2,3") (const_string "sseadd") (match_operand:MODEF 3 "mult_operator" "") (const_string "fmul") @@ -13001,6 +12944,8 @@ (const_string "fdiv") ] (const_string "fop"))) + (set_attr "isa" "base,base,noavx,avx") + (set_attr "prefix" "orig,orig,orig,vex") (set_attr "mode" "")]) (define_insn "*rcpsf2_sse" @@ -13014,29 +12959,11 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "SF")]) -(define_insn "*fop__1_avx" - [(set (match_operand:MODEF 0 "register_operand" "=x") - (match_operator:MODEF 3 "binary_fp_operator" - [(match_operand:MODEF 1 "register_operand" "x") - (match_operand:MODEF 2 "nonimmediate_operand" "xm")]))] - "AVX_FLOAT_MODE_P (mode) && TARGET_SSE_MATH - && !COMMUTATIVE_ARITH_P (operands[3])" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (cond [(match_operand:MODEF 3 "mult_operator" "") - (const_string "ssemul") - (match_operand:MODEF 3 "div_operator" "") - (const_string "ssediv") - ] - (const_string "sseadd"))) - (set_attr "prefix" "vex") - (set_attr "mode" "")]) - (define_insn "*fop__1_sse" - [(set (match_operand:MODEF 0 "register_operand" "=x") + [(set (match_operand:MODEF 0 "register_operand" "=x,x") (match_operator:MODEF 3 "binary_fp_operator" - [(match_operand:MODEF 1 "register_operand" "0") - (match_operand:MODEF 2 "nonimmediate_operand" "xm")]))] + [(match_operand:MODEF 1 "register_operand" "0,x") + (match_operand:MODEF 2 "nonimmediate_operand" "xm,xm")]))] "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && !COMMUTATIVE_ARITH_P (operands[3])" "* return output_387_binary_op (insn, operands);" @@ -13047,6 +12974,8 @@ (const_string "ssediv") ] (const_string "sseadd"))) + (set_attr "isa" "noavx,avx") + (set_attr "prefix" "orig,vex") (set_attr "mode" "")]) ;; This pattern is not fully shadowed by the pattern above. @@ -16473,25 +16402,18 @@ ;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator ;; are undefined in this condition, we're certain this is correct. -(define_insn "*avx_3" - [(set (match_operand:MODEF 0 "register_operand" "=x") - (smaxmin:MODEF - (match_operand:MODEF 1 "nonimmediate_operand" "%x") - (match_operand:MODEF 2 "nonimmediate_operand" "xm")))] - "AVX_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" - "vs\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sseadd") - (set_attr "prefix" "vex") - (set_attr "mode" "")]) - (define_insn "3" - [(set (match_operand:MODEF 0 "register_operand" "=x") + [(set (match_operand:MODEF 0 "register_operand" "=x,x") (smaxmin:MODEF - (match_operand:MODEF 1 "nonimmediate_operand" "%0") - (match_operand:MODEF 2 "nonimmediate_operand" "xm")))] + (match_operand:MODEF 1 "nonimmediate_operand" "%0,x") + (match_operand:MODEF 2 "nonimmediate_operand" "xm,xm")))] "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" - "s\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") + "@ + s\t{%2, %0|%0, %2} + vs\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "prefix" "orig,vex") + (set_attr "type" "sseadd") (set_attr "mode" "")]) ;; These versions of the min/max patterns implement exactly the operations @@ -16500,50 +16422,34 @@ ;; Their operands are not commutative, and thus they may be used in the ;; presence of -0.0 and NaN. -(define_insn "*avx_ieee_smin3" - [(set (match_operand:MODEF 0 "register_operand" "=x") - (unspec:MODEF - [(match_operand:MODEF 1 "register_operand" "x") - (match_operand:MODEF 2 "nonimmediate_operand" "xm")] - UNSPEC_IEEE_MIN))] - "AVX_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" - "vmins\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sseadd") - (set_attr "prefix" "vex") - (set_attr "mode" "")]) - (define_insn "*ieee_smin3" - [(set (match_operand:MODEF 0 "register_operand" "=x") + [(set (match_operand:MODEF 0 "register_operand" "=x,x") (unspec:MODEF - [(match_operand:MODEF 1 "register_operand" "0") - (match_operand:MODEF 2 "nonimmediate_operand" "xm")] + [(match_operand:MODEF 1 "register_operand" "0,x") + (match_operand:MODEF 2 "nonimmediate_operand" "xm,xm")] UNSPEC_IEEE_MIN))] "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" - "mins\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "")]) - -(define_insn "*avx_ieee_smax3" - [(set (match_operand:MODEF 0 "register_operand" "=x") - (unspec:MODEF - [(match_operand:MODEF 1 "register_operand" "0") - (match_operand:MODEF 2 "nonimmediate_operand" "xm")] - UNSPEC_IEEE_MAX))] - "AVX_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" - "vmaxs\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sseadd") - (set_attr "prefix" "vex") + "@ + mins\t{%2, %0|%0, %2} + vmins\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "prefix" "orig,vex") + (set_attr "type" "sseadd") (set_attr "mode" "")]) (define_insn "*ieee_smax3" - [(set (match_operand:MODEF 0 "register_operand" "=x") + [(set (match_operand:MODEF 0 "register_operand" "=x,x") (unspec:MODEF - [(match_operand:MODEF 1 "register_operand" "0") - (match_operand:MODEF 2 "nonimmediate_operand" "xm")] + [(match_operand:MODEF 1 "register_operand" "0,x") + (match_operand:MODEF 2 "nonimmediate_operand" "xm,xm")] UNSPEC_IEEE_MAX))] "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" - "maxs\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") + "@ + maxs\t{%2, %0|%0, %2} + vmaxs\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "prefix" "orig,vex") + (set_attr "type" "sseadd") (set_attr "mode" "")]) ;; Make two stack loads independent: diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index 986856b73eb..03ec158e274 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -969,19 +969,11 @@ ;; Return true if OP is a comparison that can be used in the CMPSS/CMPPS insns. ;; The first set are supported directly; the second set can't be done with ;; full IEEE support, i.e. NaNs. -;; -;; ??? It would seem that we have a lot of uses of this predicate that pass -;; it the wrong mode. We got away with this because the old function didn't -;; check the mode at all. Mirror that for now by calling this a special -;; predicate. - -(define_special_predicate "sse_comparison_operator" - (match_code "eq,lt,le,unordered,ne,unge,ungt,ordered")) - -;; Return true if OP is a comparison operator that can be issued by -;; avx predicate generation instructions -(define_predicate "avx_comparison_float_operator" - (match_code "ne,eq,ge,gt,le,lt,unordered,ordered,uneq,unge,ungt,unle,unlt,ltgt")) + +(define_predicate "sse_comparison_operator" + (ior (match_code "eq,ne,lt,le,unordered,unge,ungt,ordered") + (and (match_code "ge,gt,uneq,unle,unlt,ltgt") + (match_test "TARGET_AVX")))) (define_predicate "ix86_comparison_int_operator" (match_code "ne,eq,ge,gt,le,lt")) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 4c22bc5df68..8c9a7b21ee2 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -19,6 +19,25 @@ ;; . +;; All vector float modes +(define_mode_iterator VF + [(V4SF "TARGET_SSE") (V2DF "TARGET_SSE2") + (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")]) + +;; All SFmode vector float modes +(define_mode_iterator VF1 + [(V4SF "TARGET_SSE") (V8SF "TARGET_AVX")]) + +;; All DFmode vector float modes +(define_mode_iterator VF2 + [(V2DF "TARGET_SSE2") (V4DF "TARGET_AVX")]) + +;; All 128bit vector float modes +(define_mode_iterator VF_128 + [(V4SF "TARGET_SSE") (V2DF "TARGET_SSE2")]) + + + ;; Instruction suffix for sign and zero extensions. (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")]) @@ -54,7 +73,6 @@ (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI]) (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI]) (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI]) -(define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF]) (define_mode_iterator SSEMODEF2P [V4SF V2DF]) (define_mode_iterator AVX256MODEF2P [V8SF V4DF]) @@ -107,7 +125,10 @@ (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")]) ;; Mapping from float mode to required SSE level -(define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")]) +(define_mode_attr sse + [(SF "sse") (DF "sse2") + (V4SF "sse") (V2DF "sse2") + (V8SF "avx") (V4DF "avx")]) ;; Mapping from integer vector mode to mnemonic suffix (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")]) @@ -589,494 +610,290 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define_expand "2" - [(set (match_operand:VEC_FLOAT_MODE 0 "register_operand" "") - (absneg:VEC_FLOAT_MODE - (match_operand:VEC_FLOAT_MODE 1 "register_operand" "")))] + [(set (match_operand:VF 0 "register_operand" "") + (absneg:VF + (match_operand:VF 1 "register_operand" "")))] "" "ix86_expand_fp_absneg_operator (, mode, operands); DONE;") -(define_insn_and_split "*avx_absneg2" - [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x") - (match_operator:AVXMODEF2P 3 "absneg_operator" - [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "x,m")])) - (use (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm,x"))] - "AVX_VEC_FLOAT_MODE_P (mode)" +(define_insn_and_split "*absneg2" + [(set (match_operand:VF 0 "register_operand" "=x,x,x,x") + (match_operator:VF 3 "absneg_operator" + [(match_operand:VF 1 "nonimmediate_operand" "0, xm,x, m")])) + (use (match_operand:VF 2 "nonimmediate_operand" "xm,0, xm,x"))] + "" "#" - "&& reload_completed" + "reload_completed" [(const_int 0)] { + enum rtx_code absneg_op; + rtx op1, op2; rtx t; - if (MEM_P (operands[1])) - t = gen_rtx_fmt_ee (GET_CODE (operands[3]) == NEG ? XOR : AND, - mode, operands[2], operands[1]); + if (TARGET_AVX) + { + if (MEM_P (operands[1])) + op1 = operands[2], op2 = operands[1]; + else + op1 = operands[1], op2 = operands[2]; + } else - t = gen_rtx_fmt_ee (GET_CODE (operands[3]) == NEG ? XOR : AND, - mode, operands[1], operands[2]); - t = gen_rtx_SET (VOIDmode, operands[0], t); - emit_insn (t); - DONE; -}) - -(define_insn_and_split "*sse_absneg2" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") - (match_operator:SSEMODEF2P 3 "absneg_operator" - [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,xm")])) - (use (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm,0"))] - "SSE_VEC_FLOAT_MODE_P (mode)" - "#" - "&& reload_completed" - [(const_int 0)] -{ - rtx t; + { + op1 = operands[0]; + if (rtx_equal_p (operands[0], operands[1])) + op2 = operands[2]; + else + op2 = operands[1]; + } - t = operands[rtx_equal_p (operands[0], operands[1]) ? 2 : 1]; - t = gen_rtx_fmt_ee (GET_CODE (operands[3]) == NEG ? XOR : AND, - mode, operands[0], t); + absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND; + t = gen_rtx_fmt_ee (absneg_op, mode, op1, op2); t = gen_rtx_SET (VOIDmode, operands[0], t); emit_insn (t); DONE; -}) - -(define_expand "3" - [(set (match_operand:AVX256MODEF2P 0 "register_operand" "") - (plusminus:AVX256MODEF2P - (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "") - (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))] - "AVX256_VEC_FLOAT_MODE_P (mode)" - "ix86_fixup_binary_operands_no_copy (, mode, operands);") - -(define_insn "*avx_3" - [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x") - (plusminus:AVXMODEF2P - (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "x") - (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))] - "AVX_VEC_FLOAT_MODE_P (mode) - && ix86_binary_operator_ok (, mode, operands)" - "v\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sseadd") - (set_attr "prefix" "vex") - (set_attr "mode" "")]) +} + [(set_attr "isa" "noavx,noavx,avx,avx")]) (define_expand "3" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "") - (plusminus:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))] - "SSE_VEC_FLOAT_MODE_P (mode)" + [(set (match_operand:VF 0 "register_operand" "") + (plusminus:VF + (match_operand:VF 1 "nonimmediate_operand" "") + (match_operand:VF 2 "nonimmediate_operand" "")))] + "" "ix86_fixup_binary_operands_no_copy (, mode, operands);") (define_insn "*3" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") - (plusminus:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))] - "SSE_VEC_FLOAT_MODE_P (mode) - && ix86_binary_operator_ok (, mode, operands)" - "\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") + [(set (match_operand:VF 0 "register_operand" "=x,x") + (plusminus:VF + (match_operand:VF 1 "nonimmediate_operand" "0,x") + (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))] + "ix86_binary_operator_ok (, mode, operands)" + "@ + \t{%2, %0|%0, %2} + v\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseadd") + (set_attr "prefix" "orig,vex") (set_attr "mode" "")]) -(define_insn "*avx_vm3" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") - (vec_merge:SSEMODEF2P - (plusminus:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "register_operand" "x") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] - "AVX128_VEC_FLOAT_MODE_P (mode)" - "v\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sseadd") - (set_attr "prefix" "vex") - (set_attr "mode" "")]) - (define_insn "_vm3" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") - (vec_merge:SSEMODEF2P - (plusminus:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "register_operand" "0") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")) + [(set (match_operand:VF_128 0 "register_operand" "=x,x") + (vec_merge:VF_128 + (plusminus:VF_128 + (match_operand:VF_128 1 "register_operand" "0,x") + (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")) (match_dup 1) (const_int 1)))] - "SSE_VEC_FLOAT_MODE_P (mode)" - "\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") + "" + "@ + \t{%2, %0|%0, %2} + v\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseadd") + (set_attr "prefix" "orig,vex") (set_attr "mode" "")]) (define_expand "mul3" - [(set (match_operand:AVX256MODEF2P 0 "register_operand" "") - (mult:AVX256MODEF2P - (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "") - (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))] - "AVX256_VEC_FLOAT_MODE_P (mode)" - "ix86_fixup_binary_operands_no_copy (MULT, mode, operands);") - -(define_insn "*avx_mul3" - [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x") - (mult:AVXMODEF2P - (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x") - (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))] - "AVX_VEC_FLOAT_MODE_P (mode) - && ix86_binary_operator_ok (MULT, mode, operands)" - "vmul\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "ssemul") - (set_attr "prefix" "vex") - (set_attr "mode" "")]) - -(define_expand "mul3" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "") - (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))] - "SSE_VEC_FLOAT_MODE_P (mode)" + [(set (match_operand:VF 0 "register_operand" "") + (mult:VF + (match_operand:VF 1 "nonimmediate_operand" "") + (match_operand:VF 2 "nonimmediate_operand" "")))] + "" "ix86_fixup_binary_operands_no_copy (MULT, mode, operands);") (define_insn "*mul3" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") - (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))] - "SSE_VEC_FLOAT_MODE_P (mode) - && ix86_binary_operator_ok (MULT, mode, operands)" - "mul\t{%2, %0|%0, %2}" - [(set_attr "type" "ssemul") + [(set (match_operand:VF 0 "register_operand" "=x,x") + (mult:VF + (match_operand:VF 1 "nonimmediate_operand" "%0,x") + (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))] + "ix86_binary_operator_ok (MULT, mode, operands)" + "@ + mul\t{%2, %0|%0, %2} + vmul\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "ssemul") + (set_attr "prefix" "orig,vex") (set_attr "mode" "")]) -(define_insn "*avx_vmmul3" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") - (vec_merge:SSEMODEF2P - (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "register_operand" "x") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] - "AVX_VEC_FLOAT_MODE_P (mode)" - "vmul\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "ssemul") - (set_attr "prefix" "vex") - (set_attr "mode" "")]) - (define_insn "_vmmul3" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") - (vec_merge:SSEMODEF2P - (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "register_operand" "0") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")) + [(set (match_operand:VF_128 0 "register_operand" "=x,x") + (vec_merge:VF_128 + (mult:VF_128 + (match_operand:VF_128 1 "register_operand" "0,x") + (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")) (match_dup 1) (const_int 1)))] - "SSE_VEC_FLOAT_MODE_P (mode)" - "mul\t{%2, %0|%0, %2}" - [(set_attr "type" "ssemul") + "" + "@ + mul\t{%2, %0|%0, %2} + vmul\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "ssemul") + (set_attr "prefix" "orig,vex") (set_attr "mode" "")]) -(define_expand "divv8sf3" - [(set (match_operand:V8SF 0 "register_operand" "") - (div:V8SF (match_operand:V8SF 1 "register_operand" "") - (match_operand:V8SF 2 "nonimmediate_operand" "")))] - "TARGET_AVX" +(define_expand "div3" + [(set (match_operand:VF2 0 "register_operand" "") + (div:VF2 (match_operand:VF2 1 "register_operand" "") + (match_operand:VF2 2 "nonimmediate_operand" "")))] + "" + "ix86_fixup_binary_operands_no_copy (DIV, mode, operands);") + +(define_expand "div3" + [(set (match_operand:VF1 0 "register_operand" "") + (div:VF1 (match_operand:VF1 1 "register_operand" "") + (match_operand:VF1 2 "nonimmediate_operand" "")))] + "" { - ix86_fixup_binary_operands_no_copy (DIV, V8SFmode, operands); + ix86_fixup_binary_operands_no_copy (DIV, mode, operands); if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p () && flag_finite_math_only && !flag_trapping_math && flag_unsafe_math_optimizations) { - ix86_emit_swdivsf (operands[0], operands[1], - operands[2], V8SFmode); - DONE; - } -}) - -(define_expand "divv4df3" - [(set (match_operand:V4DF 0 "register_operand" "") - (div:V4DF (match_operand:V4DF 1 "register_operand" "") - (match_operand:V4DF 2 "nonimmediate_operand" "")))] - "TARGET_AVX" - "ix86_fixup_binary_operands_no_copy (DIV, V4DFmode, operands);") - -(define_insn "avx_div3" - [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x") - (div:AVXMODEF2P - (match_operand:AVXMODEF2P 1 "register_operand" "x") - (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))] - "AVX_VEC_FLOAT_MODE_P (mode)" - "vdiv\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "ssediv") - (set_attr "prefix" "vex") - (set_attr "mode" "")]) - -(define_expand "divv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "") - (div:V4SF (match_operand:V4SF 1 "register_operand" "") - (match_operand:V4SF 2 "nonimmediate_operand" "")))] - "TARGET_SSE" -{ - if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p () - && flag_finite_math_only && !flag_trapping_math - && flag_unsafe_math_optimizations) - { - ix86_emit_swdivsf (operands[0], operands[1], - operands[2], V4SFmode); + ix86_emit_swdivsf (operands[0], operands[1], operands[2], mode); DONE; } }) -(define_expand "divv2df3" - [(set (match_operand:V2DF 0 "register_operand" "") - (div:V2DF (match_operand:V2DF 1 "register_operand" "") - (match_operand:V2DF 2 "nonimmediate_operand" "")))] - "TARGET_SSE2") - -(define_insn "*avx_div3" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") - (div:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "register_operand" "x") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))] - "AVX128_VEC_FLOAT_MODE_P (mode)" - "vdiv\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "ssediv") - (set_attr "prefix" "vex") - (set_attr "mode" "")]) - (define_insn "_div3" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") - (div:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "register_operand" "0") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))] - "SSE_VEC_FLOAT_MODE_P (mode)" - "div\t{%2, %0|%0, %2}" - [(set_attr "type" "ssediv") + [(set (match_operand:VF 0 "register_operand" "=x,x") + (div:VF + (match_operand:VF 1 "register_operand" "0,x") + (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))] + "" + "@ + div\t{%2, %0|%0, %2} + vdiv\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "ssediv") + (set_attr "prefix" "orig,vex") (set_attr "mode" "")]) -(define_insn "*avx_vmdiv3" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") - (vec_merge:SSEMODEF2P - (div:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "register_operand" "x") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] - "AVX128_VEC_FLOAT_MODE_P (mode)" - "vdiv\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "ssediv") - (set_attr "prefix" "vex") - (set_attr "mode" "")]) - (define_insn "_vmdiv3" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") - (vec_merge:SSEMODEF2P - (div:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "register_operand" "0") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")) + [(set (match_operand:VF_128 0 "register_operand" "=x,x") + (vec_merge:VF_128 + (div:VF_128 + (match_operand:VF_128 1 "register_operand" "0,x") + (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")) (match_dup 1) (const_int 1)))] - "SSE_VEC_FLOAT_MODE_P (mode)" - "div\t{%2, %0|%0, %2}" - [(set_attr "type" "ssediv") + "" + "@ + div\t{%2, %0|%0, %2} + vdiv\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "ssediv") + (set_attr "prefix" "orig,vex") (set_attr "mode" "")]) -(define_insn "avx_rcpv8sf2" - [(set (match_operand:V8SF 0 "register_operand" "=x") - (unspec:V8SF - [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))] - "TARGET_AVX" - "vrcpps\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") - (set_attr "prefix" "vex") - (set_attr "mode" "V8SF")]) - -(define_insn "sse_rcpv4sf2" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (unspec:V4SF - [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))] - "TARGET_SSE" +(define_insn "_rcp2" + [(set (match_operand:VF1 0 "register_operand" "=x") + (unspec:VF1 + [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))] + "" "%vrcpps\t{%1, %0|%0, %1}" [(set_attr "type" "sse") (set_attr "atom_sse_attr" "rcp") (set_attr "prefix" "maybe_vex") - (set_attr "mode" "V4SF")]) - -(define_insn "*avx_vmrcpv4sf2" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] - UNSPEC_RCP) - (match_operand:V4SF 2 "register_operand" "x") - (const_int 1)))] - "TARGET_AVX" - "vrcpss\t{%1, %2, %0|%0, %2, %1}" - [(set_attr "type" "sse") - (set_attr "prefix" "vex") - (set_attr "mode" "SF")]) + (set_attr "mode" "")]) (define_insn "sse_vmrcpv4sf2" - [(set (match_operand:V4SF 0 "register_operand" "=x") + [(set (match_operand:V4SF 0 "register_operand" "=x,x") (vec_merge:V4SF - (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] + (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")] UNSPEC_RCP) - (match_operand:V4SF 2 "register_operand" "0") + (match_operand:V4SF 2 "register_operand" "0,x") (const_int 1)))] "TARGET_SSE" - "rcpss\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") + "@ + rcpss\t{%1, %0|%0, %1} + vrcpss\t{%1, %2, %0|%0, %2, %1}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sse") (set_attr "atom_sse_attr" "rcp") + (set_attr "prefix" "orig,vex") (set_attr "mode" "SF")]) -(define_expand "sqrtv8sf2" - [(set (match_operand:V8SF 0 "register_operand" "") - (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "")))] - "TARGET_AVX" -{ - if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p () - && flag_finite_math_only && !flag_trapping_math - && flag_unsafe_math_optimizations) - { - ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 0); - DONE; - } -}) - -(define_insn "avx_sqrtv8sf2" - [(set (match_operand:V8SF 0 "register_operand" "=x") - (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "xm")))] - "TARGET_AVX" - "vsqrtps\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") - (set_attr "prefix" "vex") - (set_attr "mode" "V8SF")]) +(define_expand "sqrt2" + [(set (match_operand:VF2 0 "register_operand" "") + (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand" "")))]) -(define_expand "sqrtv4sf2" - [(set (match_operand:V4SF 0 "register_operand" "") - (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))] - "TARGET_SSE" +(define_expand "sqrt2" + [(set (match_operand:VF1 0 "register_operand" "") + (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand" "")))] + "" { - if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p () + if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p () && flag_finite_math_only && !flag_trapping_math && flag_unsafe_math_optimizations) { - ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0); + ix86_emit_swsqrtsf (operands[0], operands[1], mode, false); DONE; } }) -(define_insn "sse_sqrtv4sf2" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "%vsqrtps\t{%1, %0|%0, %1}" +(define_insn "_sqrt2" + [(set (match_operand:VF 0 "register_operand" "=x") + (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "xm")))] + "" + "%vsqrt\t{%1, %0|%0, %1}" [(set_attr "type" "sse") (set_attr "atom_sse_attr" "sqrt") (set_attr "prefix" "maybe_vex") - (set_attr "mode" "V4SF")]) - -(define_insn "sqrtv4df2" - [(set (match_operand:V4DF 0 "register_operand" "=x") - (sqrt:V4DF (match_operand:V4DF 1 "nonimmediate_operand" "xm")))] - "TARGET_AVX" - "vsqrtpd\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") - (set_attr "prefix" "vex") - (set_attr "mode" "V4DF")]) - -(define_insn "sqrtv2df2" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "%vsqrtpd\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") - (set_attr "prefix" "maybe_vex") - (set_attr "mode" "V2DF")]) - -(define_insn "*avx_vmsqrt2" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") - (vec_merge:SSEMODEF2P - (sqrt:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")) - (match_operand:SSEMODEF2P 2 "register_operand" "x") - (const_int 1)))] - "AVX_VEC_FLOAT_MODE_P (mode)" - "vsqrt\t{%1, %2, %0|%0, %2, %1}" - [(set_attr "type" "sse") - (set_attr "prefix" "vex") - (set_attr "mode" "")]) + (set_attr "mode" "")]) (define_insn "_vmsqrt2" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") - (vec_merge:SSEMODEF2P - (sqrt:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")) - (match_operand:SSEMODEF2P 2 "register_operand" "0") + [(set (match_operand:VF_128 0 "register_operand" "=x,x") + (vec_merge:VF_128 + (sqrt:VF_128 + (match_operand:VF_128 1 "nonimmediate_operand" "xm,xm")) + (match_operand:VF_128 2 "register_operand" "0,x") (const_int 1)))] - "SSE_VEC_FLOAT_MODE_P (mode)" - "sqrt\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") + "" + "@ + sqrt\t{%1, %0|%0, %1} + vsqrt\t{%1, %2, %0|%0, %2, %1}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sse") (set_attr "atom_sse_attr" "sqrt") + (set_attr "prefix" "orig,vex") (set_attr "mode" "")]) -(define_expand "rsqrtv8sf2" - [(set (match_operand:V8SF 0 "register_operand" "") - (unspec:V8SF - [(match_operand:V8SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))] - "TARGET_AVX && TARGET_SSE_MATH" -{ - ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 1); - DONE; -}) - -(define_insn "avx_rsqrtv8sf2" - [(set (match_operand:V8SF 0 "register_operand" "=x") - (unspec:V8SF - [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))] - "TARGET_AVX" - "vrsqrtps\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") - (set_attr "prefix" "vex") - (set_attr "mode" "V8SF")]) - -(define_expand "rsqrtv4sf2" - [(set (match_operand:V4SF 0 "register_operand" "") - (unspec:V4SF - [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))] +(define_expand "rsqrt2" + [(set (match_operand:VF1 0 "register_operand" "") + (unspec:VF1 + [(match_operand:VF1 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))] "TARGET_SSE_MATH" { - ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1); + ix86_emit_swsqrtsf (operands[0], operands[1], mode, true); DONE; }) -(define_insn "sse_rsqrtv4sf2" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (unspec:V4SF - [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))] - "TARGET_SSE" +(define_insn "_rsqrt2" + [(set (match_operand:VF1 0 "register_operand" "=x") + (unspec:VF1 + [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))] + "" "%vrsqrtps\t{%1, %0|%0, %1}" [(set_attr "type" "sse") (set_attr "prefix" "maybe_vex") - (set_attr "mode" "V4SF")]) - -(define_insn "*avx_vmrsqrtv4sf2" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] - UNSPEC_RSQRT) - (match_operand:V4SF 2 "register_operand" "x") - (const_int 1)))] - "TARGET_AVX" - "vrsqrtss\t{%1, %2, %0|%0, %2, %1}" - [(set_attr "type" "sse") - (set_attr "prefix" "vex") - (set_attr "mode" "SF")]) + (set_attr "mode" "")]) (define_insn "sse_vmrsqrtv4sf2" - [(set (match_operand:V4SF 0 "register_operand" "=x") + [(set (match_operand:V4SF 0 "register_operand" "=x,x") (vec_merge:V4SF - (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] + (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")] UNSPEC_RSQRT) - (match_operand:V4SF 2 "register_operand" "0") + (match_operand:V4SF 2 "register_operand" "0,x") (const_int 1)))] "TARGET_SSE" - "rsqrtss\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") + "@ + rsqrtss\t{%1, %0|%0, %1} + vrsqrtss\t{%1, %2, %0|%0, %2, %1}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sse") + (set_attr "prefix" "orig,vex") (set_attr "mode" "SF")]) ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX @@ -1084,98 +901,61 @@ ;; applied to NaNs. Hopefully the optimizers won't get too smart on us. (define_expand "3" - [(set (match_operand:AVX256MODEF2P 0 "register_operand" "") - (smaxmin:AVX256MODEF2P - (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "") - (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))] - "AVX256_VEC_FLOAT_MODE_P (mode)" -{ - if (!flag_finite_math_only) - operands[1] = force_reg (mode, operands[1]); - ix86_fixup_binary_operands_no_copy (, mode, operands); -}) - -(define_expand "3" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "") - (smaxmin:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))] - "SSE_VEC_FLOAT_MODE_P (mode)" + [(set (match_operand:VF 0 "register_operand" "") + (smaxmin:VF + (match_operand:VF 1 "nonimmediate_operand" "") + (match_operand:VF 2 "nonimmediate_operand" "")))] + "" { if (!flag_finite_math_only) operands[1] = force_reg (mode, operands[1]); ix86_fixup_binary_operands_no_copy (, mode, operands); }) -(define_insn "*avx_3_finite" - [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x") - (smaxmin:AVXMODEF2P - (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x") - (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))] - "AVX_VEC_FLOAT_MODE_P (mode) && flag_finite_math_only - && ix86_binary_operator_ok (, mode, operands)" - "v\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sseadd") - (set_attr "prefix" "vex") - (set_attr "mode" "")]) - (define_insn "*3_finite" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") - (smaxmin:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))] - "SSE_VEC_FLOAT_MODE_P (mode) && flag_finite_math_only + [(set (match_operand:VF 0 "register_operand" "=x,x") + (smaxmin:VF + (match_operand:VF 1 "nonimmediate_operand" "%0,x") + (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))] + "flag_finite_math_only && ix86_binary_operator_ok (, mode, operands)" - "\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") + "@ + \t{%2, %0|%0, %2} + v\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseadd") + (set_attr "prefix" "orig,vex") (set_attr "mode" "")]) -(define_insn "*avx_3" - [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x") - (smaxmin:AVXMODEF2P - (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x") - (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))] - "AVX_VEC_FLOAT_MODE_P (mode)" - "v\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sseadd") - (set_attr "prefix" "vex") - (set_attr "mode" "")]) - (define_insn "*3" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") - (smaxmin:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "register_operand" "0") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))] - "SSE_VEC_FLOAT_MODE_P (mode)" - "\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") + [(set (match_operand:VF 0 "register_operand" "=x,x") + (smaxmin:VF + (match_operand:VF 1 "register_operand" "0,x") + (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))] + "!flag_finite_math_only" + "@ + \t{%2, %0|%0, %2} + v\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseadd") + (set_attr "prefix" "orig,vex") (set_attr "mode" "")]) -(define_insn "*avx_vm3" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") - (vec_merge:SSEMODEF2P - (smaxmin:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "register_operand" "x") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] - "AVX128_VEC_FLOAT_MODE_P (mode)" - "v\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sse") - (set_attr "prefix" "vex") - (set_attr "mode" "")]) - (define_insn "_vm3" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") - (vec_merge:SSEMODEF2P - (smaxmin:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "register_operand" "0") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")) + [(set (match_operand:VF_128 0 "register_operand" "=x,x") + (vec_merge:VF_128 + (smaxmin:VF_128 + (match_operand:VF_128 1 "register_operand" "0,x") + (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")) (match_dup 1) (const_int 1)))] - "SSE_VEC_FLOAT_MODE_P (mode)" - "\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") + "" + "@ + \t{%2, %0|%0, %2} + v\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sse") + (set_attr "prefix" "orig,vex") (set_attr "mode" "")]) ;; These versions of the min/max patterns implement exactly the operations @@ -1184,66 +964,36 @@ ;; Their operands are not commutative, and thus they may be used in the ;; presence of -0.0 and NaN. -(define_insn "*avx_ieee_smin3" - [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x") - (unspec:AVXMODEF2P - [(match_operand:AVXMODEF2P 1 "register_operand" "x") - (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")] - UNSPEC_IEEE_MIN))] - "AVX_VEC_FLOAT_MODE_P (mode)" - "vmin\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sseadd") - (set_attr "prefix" "vex") - (set_attr "mode" "")]) - -(define_insn "*avx_ieee_smax3" - [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x") - (unspec:AVXMODEF2P - [(match_operand:AVXMODEF2P 1 "register_operand" "x") - (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")] - UNSPEC_IEEE_MAX))] - "AVX_VEC_FLOAT_MODE_P (mode)" - "vmax\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sseadd") - (set_attr "prefix" "vex") - (set_attr "mode" "")]) - (define_insn "*ieee_smin3" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") - (unspec:SSEMODEF2P - [(match_operand:SSEMODEF2P 1 "register_operand" "0") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")] + [(set (match_operand:VF 0 "register_operand" "=x,x") + (unspec:VF + [(match_operand:VF 1 "register_operand" "0,x") + (match_operand:VF 2 "nonimmediate_operand" "xm,xm")] UNSPEC_IEEE_MIN))] - "SSE_VEC_FLOAT_MODE_P (mode)" - "min\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") + "" + "@ + vmin\t{%2, %1, %0|%0, %1, %2} + min\t{%2, %0|%0, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseadd") + (set_attr "prefix" "orig,vex") (set_attr "mode" "")]) (define_insn "*ieee_smax3" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") - (unspec:SSEMODEF2P - [(match_operand:SSEMODEF2P 1 "register_operand" "0") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")] + [(set (match_operand:VF 0 "register_operand" "=x,x") + (unspec:VF + [(match_operand:VF 1 "register_operand" "0,x") + (match_operand:VF 2 "nonimmediate_operand" "xm,xm")] UNSPEC_IEEE_MAX))] - "SSE_VEC_FLOAT_MODE_P (mode)" - "max\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") + "" + "@ + vmax\t{%2, %1, %0|%0, %1, %2} + max\t{%2, %0|%0, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseadd") + (set_attr "prefix" "orig,vex") (set_attr "mode" "")]) -(define_insn "avx_addsubv8sf3" - [(set (match_operand:V8SF 0 "register_operand" "=x") - (vec_merge:V8SF - (plus:V8SF - (match_operand:V8SF 1 "register_operand" "x") - (match_operand:V8SF 2 "nonimmediate_operand" "xm")) - (minus:V8SF (match_dup 1) (match_dup 2)) - (const_int 170)))] - "TARGET_AVX" - "vaddsubps\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sseadd") - (set_attr "prefix" "vex") - (set_attr "mode" "V8SF")]) - (define_insn "avx_addsubv4df3" [(set (match_operand:V4DF 0 "register_operand" "=x") (vec_merge:V4DF @@ -1258,62 +1008,56 @@ (set_attr "prefix" "vex") (set_attr "mode" "V4DF")]) -(define_insn "*avx_addsubv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (plus:V4SF - (match_operand:V4SF 1 "register_operand" "x") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")) - (minus:V4SF (match_dup 1) (match_dup 2)) - (const_int 10)))] +(define_insn "sse3_addsubv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x,x") + (vec_merge:V2DF + (plus:V2DF + (match_operand:V2DF 1 "register_operand" "0,x") + (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")) + (minus:V2DF (match_dup 1) (match_dup 2)) + (const_int 2)))] + "TARGET_SSE3" + "@ + addsubpd\t{%2, %0|%0, %2} + vaddsubpd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseadd") + (set_attr "atom_unit" "complex") + (set_attr "prefix" "orig,vex") + (set_attr "mode" "V2DF")]) + +(define_insn "avx_addsubv8sf3" + [(set (match_operand:V8SF 0 "register_operand" "=x") + (vec_merge:V8SF + (plus:V8SF + (match_operand:V8SF 1 "register_operand" "x") + (match_operand:V8SF 2 "nonimmediate_operand" "xm")) + (minus:V8SF (match_dup 1) (match_dup 2)) + (const_int 170)))] "TARGET_AVX" "vaddsubps\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseadd") (set_attr "prefix" "vex") - (set_attr "mode" "V4SF")]) + (set_attr "mode" "V8SF")]) (define_insn "sse3_addsubv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") + [(set (match_operand:V4SF 0 "register_operand" "=x,x") (vec_merge:V4SF (plus:V4SF - (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (match_operand:V4SF 1 "register_operand" "0,x") + (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")) (minus:V4SF (match_dup 1) (match_dup 2)) (const_int 10)))] "TARGET_SSE3" - "addsubps\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "prefix_rep" "1") + "@ + addsubps\t{%2, %0|%0, %2} + vaddsubps\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseadd") + (set_attr "prefix" "orig,vex") + (set_attr "prefix_rep" "1,*") (set_attr "mode" "V4SF")]) -(define_insn "*avx_addsubv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (vec_merge:V2DF - (plus:V2DF - (match_operand:V2DF 1 "register_operand" "x") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")) - (minus:V2DF (match_dup 1) (match_dup 2)) - (const_int 2)))] - "TARGET_AVX" - "vaddsubpd\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sseadd") - (set_attr "prefix" "vex") - (set_attr "mode" "V2DF")]) - -(define_insn "sse3_addsubv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (vec_merge:V2DF - (plus:V2DF - (match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")) - (minus:V2DF (match_dup 1) (match_dup 2)) - (const_int 2)))] - "TARGET_SSE3" - "addsubpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "atom_unit" "complex") - (set_attr "mode" "V2DF")]) - (define_insn "avx_hv4df3" [(set (match_operand:V4DF 0 "register_operand" "=x") (vec_concat:V4DF @@ -1341,6 +1085,28 @@ (set_attr "prefix" "vex") (set_attr "mode" "V4DF")]) +(define_insn "sse3_hv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x,x") + (vec_concat:V2DF + (plusminus:DF + (vec_select:DF + (match_operand:V2DF 1 "register_operand" "0,x") + (parallel [(const_int 0)])) + (vec_select:DF (match_dup 1) (parallel [(const_int 1)]))) + (plusminus:DF + (vec_select:DF + (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm") + (parallel [(const_int 0)])) + (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))] + "TARGET_SSE3" + "@ + hpd\t{%2, %0|%0, %2} + vhpd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseadd") + (set_attr "prefix" "orig,vex") + (set_attr "mode" "V2DF")]) + (define_insn "avx_hv8sf3" [(set (match_operand:V8SF 0 "register_operand" "=x") (vec_concat:V8SF @@ -1384,40 +1150,13 @@ (set_attr "prefix" "vex") (set_attr "mode" "V8SF")]) -(define_insn "*avx_hv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_concat:V4SF - (vec_concat:V2SF - (plusminus:SF - (vec_select:SF - (match_operand:V4SF 1 "register_operand" "x") - (parallel [(const_int 0)])) - (vec_select:SF (match_dup 1) (parallel [(const_int 1)]))) - (plusminus:SF - (vec_select:SF (match_dup 1) (parallel [(const_int 2)])) - (vec_select:SF (match_dup 1) (parallel [(const_int 3)])))) - (vec_concat:V2SF - (plusminus:SF - (vec_select:SF - (match_operand:V4SF 2 "nonimmediate_operand" "xm") - (parallel [(const_int 0)])) - (vec_select:SF (match_dup 2) (parallel [(const_int 1)]))) - (plusminus:SF - (vec_select:SF (match_dup 2) (parallel [(const_int 2)])) - (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))] - "TARGET_AVX" - "vhps\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sseadd") - (set_attr "prefix" "vex") - (set_attr "mode" "V4SF")]) - (define_insn "sse3_hv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") + [(set (match_operand:V4SF 0 "register_operand" "=x,x") (vec_concat:V4SF (vec_concat:V2SF (plusminus:SF (vec_select:SF - (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 1 "register_operand" "0,x") (parallel [(const_int 0)])) (vec_select:SF (match_dup 1) (parallel [(const_int 1)]))) (plusminus:SF @@ -1426,55 +1165,44 @@ (vec_concat:V2SF (plusminus:SF (vec_select:SF - (match_operand:V4SF 2 "nonimmediate_operand" "xm") + (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm") (parallel [(const_int 0)])) (vec_select:SF (match_dup 2) (parallel [(const_int 1)]))) (plusminus:SF (vec_select:SF (match_dup 2) (parallel [(const_int 2)])) (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))] "TARGET_SSE3" - "hps\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") + "@ + hps\t{%2, %0|%0, %2} + vhps\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseadd") (set_attr "atom_unit" "complex") - (set_attr "prefix_rep" "1") + (set_attr "prefix" "orig,vex") + (set_attr "prefix_rep" "1,*") (set_attr "mode" "V4SF")]) -(define_insn "*avx_hv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (vec_concat:V2DF - (plusminus:DF - (vec_select:DF - (match_operand:V2DF 1 "register_operand" "x") - (parallel [(const_int 0)])) - (vec_select:DF (match_dup 1) (parallel [(const_int 1)]))) - (plusminus:DF - (vec_select:DF - (match_operand:V2DF 2 "nonimmediate_operand" "xm") - (parallel [(const_int 0)])) - (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))] +(define_expand "reduc_splus_v4df" + [(match_operand:V4DF 0 "register_operand" "") + (match_operand:V4DF 1 "register_operand" "")] "TARGET_AVX" - "vhpd\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sseadd") - (set_attr "prefix" "vex") - (set_attr "mode" "V2DF")]) +{ + rtx tmp = gen_reg_rtx (V4DFmode); + rtx tmp2 = gen_reg_rtx (V4DFmode); + emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1])); + emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1))); + emit_insn (gen_addv4df3 (operands[0], tmp, tmp2)); + DONE; +}) -(define_insn "sse3_hv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (vec_concat:V2DF - (plusminus:DF - (vec_select:DF - (match_operand:V2DF 1 "register_operand" "0") - (parallel [(const_int 0)])) - (vec_select:DF (match_dup 1) (parallel [(const_int 1)]))) - (plusminus:DF - (vec_select:DF - (match_operand:V2DF 2 "nonimmediate_operand" "xm") - (parallel [(const_int 0)])) - (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))] +(define_expand "reduc_splus_v2df" + [(match_operand:V2DF 0 "register_operand" "") + (match_operand:V2DF 1 "register_operand" "")] "TARGET_SSE3" - "hpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "V2DF")]) +{ + emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1])); + DONE; +}) (define_expand "reduc_splus_v8sf" [(match_operand:V8SF 0 "register_operand" "") @@ -1506,27 +1234,6 @@ DONE; }) -(define_expand "reduc_splus_v4df" - [(match_operand:V4DF 0 "register_operand" "") - (match_operand:V4DF 1 "register_operand" "")] - "TARGET_AVX" -{ - rtx tmp = gen_reg_rtx (V4DFmode); - rtx tmp2 = gen_reg_rtx (V4DFmode); - emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1])); - emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1))); - emit_insn (gen_addv4df3 (operands[0], tmp, tmp2)); - DONE; -}) - -(define_expand "reduc_splus_v2df" - [(match_operand:V2DF 0 "register_operand" "") - (match_operand:V2DF 1 "register_operand" "")] - "TARGET_SSE3" -{ - emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1])); - DONE; -}) (define_expand "reduc_smax_v4sf" [(match_operand:V4SF 0 "register_operand" "") @@ -1552,11 +1259,11 @@ ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(define_insn "avx_cmp3" - [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x") - (unspec:AVXMODEF2P - [(match_operand:AVXMODEF2P 1 "register_operand" "x") - (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm") +(define_insn "avx_cmp3" + [(set (match_operand:VF 0 "register_operand" "=x") + (unspec:VF + [(match_operand:VF 1 "register_operand" "x") + (match_operand:VF 2 "nonimmediate_operand" "xm") (match_operand:SI 3 "const_0_to_31_operand" "n")] UNSPEC_PCMP))] "TARGET_AVX" @@ -1566,12 +1273,12 @@ (set_attr "prefix" "vex") (set_attr "mode" "")]) -(define_insn "avx_cmp3" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "") - (vec_merge:SSEMODEF2P - (unspec:SSEMODEF2P - [(match_operand:SSEMODEF2P 1 "register_operand" "x") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm") +(define_insn "avx_vmcmp3" + [(set (match_operand:VF_128 0 "register_operand" "=x") + (vec_merge:VF_128 + (unspec:VF_128 + [(match_operand:VF_128 1 "register_operand" "x") + (match_operand:VF_128 2 "nonimmediate_operand" "xm") (match_operand:SI 3 "const_0_to_31_operand" "n")] UNSPEC_PCMP) (match_dup 1) @@ -1583,58 +1290,37 @@ (set_attr "prefix" "vex") (set_attr "mode" "")]) -;; We don't promote 128bit vector compare intrinsics. But vectorizer -;; may generate 256bit vector compare instructions. -(define_insn "*avx_maskcmp3" - [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x") - (match_operator:AVXMODEF2P 3 "avx_comparison_float_operator" - [(match_operand:AVXMODEF2P 1 "register_operand" "x") - (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]))] - "AVX_VEC_FLOAT_MODE_P (mode)" - "vcmp%D3\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "ssecmp") - (set_attr "prefix" "vex") - (set_attr "length_immediate" "1") - (set_attr "mode" "")]) - (define_insn "_maskcmp3" - [(set (match_operand:SSEMODEF4 0 "register_operand" "=x") - (match_operator:SSEMODEF4 3 "sse_comparison_operator" - [(match_operand:SSEMODEF4 1 "register_operand" "0") - (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))] - "!TARGET_XOP - && (SSE_FLOAT_MODE_P (mode) || SSE_VEC_FLOAT_MODE_P (mode))" - "cmp%D3\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecmp") + [(set (match_operand:VF 0 "register_operand" "=x,x") + (match_operator:VF 3 "sse_comparison_operator" + [(match_operand:VF 1 "register_operand" "0,x") + (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))] + "" + "@ + cmp%D3\t{%2, %0|%0, %2} + vcmp%D3\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "ssecmp") (set_attr "length_immediate" "1") + (set_attr "prefix" "orig,vex") (set_attr "mode" "")]) -(define_insn "*avx_vmmaskcmp3" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") - (vec_merge:SSEMODEF2P - (match_operator:SSEMODEF2P 3 "sse_comparison_operator" - [(match_operand:SSEMODEF2P 1 "register_operand" "x") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]) - (match_dup 1) - (const_int 1)))] - "AVX_VEC_FLOAT_MODE_P (mode)" - "vcmp%D3\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "ssecmp") - (set_attr "prefix" "vex") - (set_attr "mode" "")]) - (define_insn "_vmmaskcmp3" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") - (vec_merge:SSEMODEF2P - (match_operator:SSEMODEF2P 3 "sse_comparison_operator" - [(match_operand:SSEMODEF2P 1 "register_operand" "0") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]) + [(set (match_operand:VF_128 0 "register_operand" "=x,x") + (vec_merge:VF_128 + (match_operator:VF_128 3 "sse_comparison_operator" + [(match_operand:VF_128 1 "register_operand" "0,x") + (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")]) (match_dup 1) (const_int 1)))] - "SSE_VEC_FLOAT_MODE_P (mode)" - "cmp%D3\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecmp") - (set_attr "length_immediate" "1") + "" + "@ + cmp%D3\t{%2, %0|%0, %2} + vcmp%D3\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "ssecmp") + (set_attr "length_immediate" "1,*") + (set_attr "prefix" "orig,vex") (set_attr "mode" "")]) (define_insn "_comi" @@ -1678,15 +1364,14 @@ (set_attr "mode" "")]) (define_expand "vcond" - [(set (match_operand:AVXMODEF2P 0 "register_operand" "") - (if_then_else:AVXMODEF2P + [(set (match_operand:VF 0 "register_operand" "") + (if_then_else:VF (match_operator 3 "" - [(match_operand:AVXMODEF2P 4 "nonimmediate_operand" "") - (match_operand:AVXMODEF2P 5 "nonimmediate_operand" "")]) - (match_operand:AVXMODEF2P 1 "general_operand" "") - (match_operand:AVXMODEF2P 2 "general_operand" "")))] - "(SSE_VEC_FLOAT_MODE_P (mode) - || AVX_VEC_FLOAT_MODE_P (mode))" + [(match_operand:VF 4 "nonimmediate_operand" "") + (match_operand:VF 5 "nonimmediate_operand" "")]) + (match_operand:VF 1 "general_operand" "") + (match_operand:VF 2 "general_operand" "")))] + "" { bool ok = ix86_expand_fp_vcond (operands); gcc_assert (ok); -- 2.30.2