From: Jakub Jelinek Date: Thu, 12 Apr 2018 11:17:23 +0000 (+0200) Subject: re PR target/85328 (accessing ymm16 with non-avx512 instruction form) X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=dcd88195e7c18ad57165f24b7f15e2fa6d239876;p=gcc.git re PR target/85328 (accessing ymm16 with non-avx512 instruction form) PR target/85328 * config/i386/sse.md (avx512dq_vextract64x2_1 split, avx512f_vextract32x4_1 split, vec_extract_lo_ split, vec_extract_lo_v32hi, vec_extract_lo_v64qi): For non-AVX512VL if input is xmm16+ reg and output is a reg, avoid creating invalid lowpart subreg, but instead split into a 512-bit move. Don't split if not AVX512VL, input is xmm16+ reg and output is a mem. (vec_extract_lo_, vec_extract_lo_v32hi, vec_extract_lo_v64qi): Don't require split if not AVX512VL, input is xmm16+ reg and output is a mem. * gcc.target/i386/pr85328.c: New test. From-SVN: r259344 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5d10cbf8716..487d4017020 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,18 @@ +2018-04-12 Jakub Jelinek + + PR target/85328 + * config/i386/sse.md + (avx512dq_vextract64x2_1 split, + avx512f_vextract32x4_1 split, + vec_extract_lo_ split, vec_extract_lo_v32hi, + vec_extract_lo_v64qi): For non-AVX512VL if input is xmm16+ reg + and output is a reg, avoid creating invalid lowpart subreg, but + instead split into a 512-bit move. Don't split if not AVX512VL, + input is xmm16+ reg and output is a mem. + (vec_extract_lo_, vec_extract_lo_v32hi, + vec_extract_lo_v64qi): Don't require split if not AVX512VL, input is + xmm16+ reg and output is a mem. + 2018-04-12 Andreas Krebbel * config/s390/s390.c (s390_output_indirect_thunk_function): Check diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 8c970e0cc8a..c3345d0cfdd 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -7361,9 +7361,21 @@ (vec_select: (match_operand:V8FI 1 "register_operand") (parallel [(const_int 0) (const_int 1)])))] - "TARGET_AVX512DQ && reload_completed" + "TARGET_AVX512DQ + && reload_completed + && (TARGET_AVX512VL + || REG_P (operands[0]) + || !EXT_REX_SSE_REG_P (operands[1]))" [(set (match_dup 0) (match_dup 1))] - "operands[1] = gen_lowpart (mode, operands[1]);") +{ + if (!TARGET_AVX512VL + && REG_P (operands[0]) + && EXT_REX_SSE_REG_P (operands[1])) + operands[0] + = lowpart_subreg (mode, operands[0], mode); + else + operands[1] = gen_lowpart (mode, operands[1]); +}) (define_insn "avx512f_vextract32x4_1" [(set (match_operand: 0 "" "=") @@ -7394,9 +7406,21 @@ (match_operand:V16FI 1 "register_operand") (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)])))] - "TARGET_AVX512F && reload_completed" + "TARGET_AVX512F + && reload_completed + && (TARGET_AVX512VL + || REG_P (operands[0]) + || !EXT_REX_SSE_REG_P (operands[1]))" [(set (match_dup 0) (match_dup 1))] - "operands[1] = gen_lowpart (mode, operands[1]);") +{ + if (!TARGET_AVX512VL + && REG_P (operands[0]) + && EXT_REX_SSE_REG_P (operands[1])) + operands[0] + = lowpart_subreg (mode, operands[0], mode); + else + operands[1] = gen_lowpart (mode, operands[1]); +}) (define_mode_attr extract_type_2 [(V16SF "avx512dq") (V16SI "avx512dq") (V8DF "avx512f") (V8DI "avx512f")]) @@ -7639,7 +7663,10 @@ && && ( || !(MEM_P (operands[0]) && MEM_P (operands[1])))" { - if () + if ( + || (!TARGET_AVX512VL + && !REG_P (operands[0]) + && EXT_REX_SSE_REG_P (operands[1]))) return "vextract32x8\t{$0x0, %1, %0|%0, %1, 0x0}"; else return "#"; @@ -7654,9 +7681,20 @@ (const_int 4) (const_int 5) (const_int 6) (const_int 7)])))] "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1])) - && reload_completed" + && reload_completed + && (TARGET_AVX512VL + || REG_P (operands[0]) + || !EXT_REX_SSE_REG_P (operands[1]))" [(set (match_dup 0) (match_dup 1))] - "operands[1] = gen_lowpart (mode, operands[1]);") +{ + if (!TARGET_AVX512VL + && REG_P (operands[0]) + && EXT_REX_SSE_REG_P (operands[1])) + operands[0] + = lowpart_subreg (mode, operands[0], mode); + else + operands[1] = gen_lowpart (mode, operands[1]); +}) (define_insn "vec_extract_lo_" [(set (match_operand: 0 "" "=v,m") @@ -7828,10 +7866,27 @@ (const_int 12) (const_int 13) (const_int 14) (const_int 15)])))] "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))" - "#" - "&& reload_completed" +{ + if (TARGET_AVX512VL + || REG_P (operands[0]) + || !EXT_REX_SSE_REG_P (operands[1])) + return "#"; + else + return "vextracti64x4\t{$0x0, %1, %0|%0, %1, 0x0}"; +} + "&& reload_completed + && (TARGET_AVX512VL + || REG_P (operands[0]) + || !EXT_REX_SSE_REG_P (operands[1]))" [(set (match_dup 0) (match_dup 1))] - "operands[1] = gen_lowpart (V16HImode, operands[1]);") +{ + if (!TARGET_AVX512VL + && REG_P (operands[0]) + && EXT_REX_SSE_REG_P (operands[1])) + operands[0] = lowpart_subreg (V32HImode, operands[0], V16HImode); + else + operands[1] = gen_lowpart (V16HImode, operands[1]); +}) (define_insn "vec_extract_hi_v32hi" [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m") @@ -7913,10 +7968,27 @@ (const_int 28) (const_int 29) (const_int 30) (const_int 31)])))] "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))" - "#" - "&& reload_completed" +{ + if (TARGET_AVX512VL + || REG_P (operands[0]) + || !EXT_REX_SSE_REG_P (operands[1])) + return "#"; + else + return "vextracti64x4\t{$0x0, %1, %0|%0, %1, 0x0}"; +} + "&& reload_completed + && (TARGET_AVX512VL + || REG_P (operands[0]) + || !EXT_REX_SSE_REG_P (operands[1]))" [(set (match_dup 0) (match_dup 1))] - "operands[1] = gen_lowpart (V32QImode, operands[1]);") +{ + if (!TARGET_AVX512VL + && REG_P (operands[0]) + && EXT_REX_SSE_REG_P (operands[1])) + operands[0] = lowpart_subreg (V64QImode, operands[0], V32QImode); + else + operands[1] = gen_lowpart (V32QImode, operands[1]); +}) (define_insn "vec_extract_hi_v64qi" [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index d6cdfd4bc28..03099159db0 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2018-04-12 Jakub Jelinek + + PR target/85328 + * gcc.target/i386/pr85328.c: New test. + 2018-04-12 Andreas Krebbel * gcc.target/s390/nobp-no-dwarf2-cfi.c: New test. diff --git a/gcc/testsuite/gcc.target/i386/pr85328.c b/gcc/testsuite/gcc.target/i386/pr85328.c new file mode 100644 index 00000000000..987ea82fdeb --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr85328.c @@ -0,0 +1,18 @@ +/* PR target/85328 */ +/* { dg-do assemble { target avx512f } } */ +/* { dg-options "-O3 -fno-caller-saves -mavx512f" } */ + +typedef char U __attribute__((vector_size (64))); +typedef int V __attribute__((vector_size (64))); +U a, b; + +extern void bar (void); + +V +foo (V f) +{ + b <<= (U){(V){}[63]} & 7; + bar (); + a = (U)f & 7; + return (V)b; +}