From: Jakub Jelinek Date: Thu, 11 Jan 2018 20:49:40 +0000 (+0100) Subject: re PR target/83203 (Inefficient int to avx2 vector conversion) X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=1ad6e904d4fbad716bc65ac4d0bbdeecf3f552bc;p=gcc.git re PR target/83203 (Inefficient int to avx2 vector conversion) PR target/83203 * config/i386/i386.c (ix86_expand_vector_init_one_nonzero): If one_var is 0, for V{8,16}S[IF] and V[48]D[IF]mode use gen_vec_set_0. * config/i386/sse.md (VI8_AVX_AVX512F, VI4F_256_512): New mode iterators. (ssescalarmodesuffix): Add 512-bit vectors. Use "d" or "q" for integral modes instead of "ss" and "sd". (vec_set_0): New define_insns for 256-bit and 512-bit vectors with 32-bit and 64-bit elements. (vecdupssescalarmodesuffix): New mode attribute. (vec_dup): Use it. From-SVN: r256556 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 2b939fb10f9..3a37576a7a9 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,17 @@ +2018-01-11 Jakub Jelinek + + PR target/83203 + * config/i386/i386.c (ix86_expand_vector_init_one_nonzero): If one_var + is 0, for V{8,16}S[IF] and V[48]D[IF]mode use gen_vec_set_0. + * config/i386/sse.md (VI8_AVX_AVX512F, VI4F_256_512): New mode + iterators. + (ssescalarmodesuffix): Add 512-bit vectors. Use "d" or "q" for + integral modes instead of "ss" and "sd". + (vec_set_0): New define_insns for 256-bit and 512-bit + vectors with 32-bit and 64-bit elements. + (vecdupssescalarmodesuffix): New mode attribute. + (vec_dup): Use it. + 2018-01-11 H.J. Lu PR target/83330 diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index d6ff096d466..d625670c35c 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -41767,6 +41767,7 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode, rtx new_target; rtx x, tmp; bool use_vector_set = false; + rtx (*gen_vec_set_0) (rtx, rtx, rtx) = NULL; switch (mode) { @@ -41791,14 +41792,41 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode, break; case E_V32QImode: case E_V16HImode: + use_vector_set = TARGET_AVX; + break; case E_V8SImode: + use_vector_set = TARGET_AVX; + gen_vec_set_0 = gen_vec_setv8si_0; + break; case E_V8SFmode: + use_vector_set = TARGET_AVX; + gen_vec_set_0 = gen_vec_setv8sf_0; + break; case E_V4DFmode: use_vector_set = TARGET_AVX; + gen_vec_set_0 = gen_vec_setv4df_0; break; case E_V4DImode: /* Use ix86_expand_vector_set in 64bit mode only. */ use_vector_set = TARGET_AVX && TARGET_64BIT; + gen_vec_set_0 = gen_vec_setv4di_0; + break; + case E_V16SImode: + use_vector_set = TARGET_AVX512F && one_var == 0; + gen_vec_set_0 = gen_vec_setv16si_0; + break; + case E_V16SFmode: + use_vector_set = TARGET_AVX512F && one_var == 0; + gen_vec_set_0 = gen_vec_setv16sf_0; + break; + case E_V8DFmode: + use_vector_set = TARGET_AVX512F && one_var == 0; + gen_vec_set_0 = gen_vec_setv8df_0; + break; + case E_V8DImode: + /* Use ix86_expand_vector_set in 64bit mode only. */ + use_vector_set = TARGET_AVX512F && TARGET_64BIT && one_var == 0; + gen_vec_set_0 = gen_vec_setv8di_0; break; default: break; @@ -41806,6 +41834,12 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode, if (use_vector_set) { + if (gen_vec_set_0 && one_var == 0) + { + var = force_reg (GET_MODE_INNER (mode), var); + emit_insn (gen_vec_set_0 (target, CONST0_RTX (mode), var)); + return true; + } emit_insn (gen_rtx_SET (target, CONST0_RTX (mode))); var = force_reg (GET_MODE_INNER (mode), var); ix86_expand_vector_set (mmx_ok, target, var, one_var); diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index ffc4f2a60b0..24197a8b3c3 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -401,6 +401,9 @@ (define_mode_iterator VI8_AVX2_AVX512F [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI]) +(define_mode_iterator VI8_AVX_AVX512F + [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")]) + (define_mode_iterator VI4_128_8_256 [V4SI V4DI]) @@ -622,6 +625,9 @@ (define_mode_iterator VI8F_128 [V2DI V2DF]) (define_mode_iterator VI4F_256 [V8SI V8SF]) (define_mode_iterator VI8F_256 [V4DI V4DF]) +(define_mode_iterator VI4F_256_512 + [V8SI V8SF + (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")]) (define_mode_iterator VI48F_256_512 [V8SI V8SF (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") @@ -838,10 +844,12 @@ ;; SSE scalar suffix for vector modes (define_mode_attr ssescalarmodesuffix [(SF "ss") (DF "sd") + (V16SF "ss") (V8DF "sd") (V8SF "ss") (V4DF "sd") (V4SF "ss") (V2DF "sd") - (V8SI "ss") (V4DI "sd") - (V4SI "d")]) + (V16SI "d") (V8DI "q") + (V8SI "d") (V4DI "q") + (V4SI "d") (V2DI "q")]) ;; Pack/unpack vector modes (define_mode_attr sseunpackmode @@ -7092,6 +7100,26 @@ (set_attr "prefix" "orig,orig,maybe_evex") (set_attr "mode" "V4SF")]) +;; All of vinsertps, vmovss, vmovd clear also the higher bits. +(define_insn "vec_set_0" + [(set (match_operand:VI4F_256_512 0 "register_operand" "=v,v,Yi") + (vec_merge:VI4F_256_512 + (vec_duplicate:VI4F_256_512 + (match_operand: 2 "general_operand" "v,m,r")) + (match_operand:VI4F_256_512 1 "const0_operand" "C,C,C") + (const_int 1)))] + "TARGET_AVX" + "@ + vinsertps\t{$0xe, %2, %2, %x0|%x0, %2, %2, 0xe} + vmov\t{%x2, %x0|%x0, %2} + vmovd\t{%2, %x0|%x0, %2}" + [(set (attr "type") + (if_then_else (eq_attr "alternative" "0") + (const_string "sselog") + (const_string "ssemov"))) + (set_attr "prefix" "maybe_evex") + (set_attr "mode" "SF,,SI")]) + (define_insn "sse4_1_insertps" [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v") (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "Yrm,*xm,vm") @@ -9220,6 +9248,20 @@ (const_string "orig"))) (set_attr "mode" "V2DF,V2DF,V2DF, DF, DF, V1DF,V1DF,DF,V4SF,V2SF")]) +;; vmovq clears also the higher bits. +(define_insn "vec_set_0" + [(set (match_operand:VF2_512_256 0 "register_operand" "=v") + (vec_merge:VF2_512_256 + (vec_duplicate:VF2_512_256 + (match_operand: 2 "general_operand" "xm")) + (match_operand:VF2_512_256 1 "const0_operand" "C") + (const_int 1)))] + "TARGET_AVX" + "vmovq\t{%2, %x0|%x0, %2}" + [(set_attr "type" "ssemov") + (set_attr "prefix" "maybe_evex") + (set_attr "mode" "DF")]) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Parallel integer down-conversion operations @@ -13993,6 +14035,22 @@ (const_string "orig"))) (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")]) +;; vmovq clears also the higher bits. +(define_insn "vec_set_0" + [(set (match_operand:VI8_AVX_AVX512F 0 "register_operand" "=Yi,v") + (vec_merge:VI8_AVX_AVX512F + (vec_duplicate:VI8_AVX_AVX512F + (match_operand: 2 "general_operand" "r,vm")) + (match_operand:VI8_AVX_AVX512F 1 "const0_operand" "C,C") + (const_int 1)))] + "TARGET_AVX" + "vmovq\t{%2, %x0|%x0, %2}" + [(set_attr "isa" "x64,*") + (set_attr "type" "ssemov") + (set_attr "prefix_rex" "1,*") + (set_attr "prefix" "maybe_evex") + (set_attr "mode" "TI")]) + (define_expand "vec_unpacks_lo_" [(match_operand: 0 "register_operand") (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")] @@ -17743,6 +17801,8 @@ ;; Modes handled by AVX vec_dup patterns. (define_mode_iterator AVX_VEC_DUP_MODE [V8SI V8SF V4DI V4DF]) +(define_mode_attr vecdupssescalarmodesuffix + [(V8SF "ss") (V4DF "sd") (V8SI "ss") (V4DI "sd")]) ;; Modes handled by AVX2 vec_dup patterns. (define_mode_iterator AVX2_VEC_DUP_MODE [V32QI V16QI V16HI V8HI V8SI V4SI]) @@ -17769,7 +17829,7 @@ "TARGET_AVX" "@ vbroadcast\t{%1, %0|%0, %1} - vbroadcast\t{%1, %0|%0, %1} + vbroadcast\t{%1, %0|%0, %1} vbroadcast\t{%x1, %0|%0, %x1} vbroadcast\t{%x1, %g0|%g0, %x1} #"