From: Jakub Jelinek Date: Sun, 1 Apr 2018 06:05:01 +0000 (+0200) Subject: re PR middle-end/85090 (wrong code with -O2 -fno-tree-dominator-opts -mavx512f -fira... X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=7be6ee78d62f0fe1ea00fb6c436b535b391f6d92;p=gcc.git re PR middle-end/85090 (wrong code with -O2 -fno-tree-dominator-opts -mavx512f -fira-algorithm=priority) PR middle-end/85090 * config/i386/sse.md (V): Add V64QI and V32HI for TARGET_AVX512F. (V_128_256): New mode iterator. (*avx512dq_vextract64x2_1 splitter): New define_split. (*avx512f_vextract32x4_1 splitter): Likewise. (xop_pcmov_): Use V_128_256 mode iterator instead of V. * config/i386/i386.c (ix86_expand_vector_set): Improve V32HImode and V64QImode expansion for !TARGET_AVX512BW && TARGET_AVX512F. * gcc.target/i386/avx512f-pr85090-1.c: New test. * gcc.target/i386/avx512f-pr85090-2.c: New test. * gcc.target/i386/avx512f-pr85090-3.c: New test. * gcc.target/i386/avx512bw-pr85090-2.c: New test. * gcc.target/i386/avx512bw-pr85090-3.c: New test. From-SVN: r258994 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 56a63a044b9..ad75aa20ec2 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,15 @@ +2018-04-01 Jakub Jelinek + + PR middle-end/85090 + * config/i386/sse.md (V): Add V64QI and V32HI for TARGET_AVX512F. + (V_128_256): New mode iterator. + (*avx512dq_vextract64x2_1 splitter): New define_split. + (*avx512f_vextract32x4_1 splitter): Likewise. + (xop_pcmov_): Use V_128_256 mode iterator instead + of V. + * config/i386/i386.c (ix86_expand_vector_set): Improve V32HImode and + V64QImode expansion for !TARGET_AVX512BW && TARGET_AVX512F. + 2018-03-31 Segher Boessenkool PR target/83315 @@ -1123,7 +1135,7 @@ PR target/84807 * config/i386/i386.opt: Replace Enforcment with Enforcement. -2018-03-10 Alexandre Oliva +2018-03-10 Alexandre Oliva PR debug/84620 * dwarf2out.h (dw_val_class): Add dw_val_class_symview. @@ -1171,7 +1183,7 @@ (builtin_access::generic_overlap): Be prepared to handle non-array base objects. -2018-03-09 Alexandre Oliva +2018-03-09 Alexandre Oliva PR rtl-optimization/84682 * lra-constraints.c (process_address_1): Check is_address flag @@ -1302,7 +1314,7 @@ * doc/gcov.texi: Document usage of profile files. * gcov-io.h: Document changes in the format. -2018-03-08 Alexandre Oliva +2018-03-08 Alexandre Oliva PR debug/84404 PR debug/84408 diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 2b2896f7ac6..337545f127c 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -44085,21 +44085,69 @@ half: break; case E_V32HImode: - if (TARGET_AVX512F && TARGET_AVX512BW) + if (TARGET_AVX512BW) { mmode = SImode; gen_blendm = gen_avx512bw_blendmv32hi; } + else if (TARGET_AVX512F) + { + half_mode = E_V8HImode; + n = 8; + goto quarter; + } break; case E_V64QImode: - if (TARGET_AVX512F && TARGET_AVX512BW) + if (TARGET_AVX512BW) { mmode = DImode; gen_blendm = gen_avx512bw_blendmv64qi; } + else if (TARGET_AVX512F) + { + half_mode = E_V16QImode; + n = 16; + goto quarter; + } break; +quarter: + /* Compute offset. */ + i = elt / n; + elt %= n; + + gcc_assert (i <= 3); + + { + /* Extract the quarter. */ + tmp = gen_reg_rtx (V4SImode); + rtx tmp2 = gen_lowpart (V16SImode, target); + rtx mask = gen_reg_rtx (QImode); + + emit_move_insn (mask, constm1_rtx); + emit_insn (gen_avx512f_vextracti32x4_mask (tmp, tmp2, GEN_INT (i), + tmp, mask)); + + tmp2 = gen_reg_rtx (half_mode); + emit_move_insn (tmp2, gen_lowpart (half_mode, tmp)); + tmp = tmp2; + + /* Put val in tmp at elt. */ + ix86_expand_vector_set (false, tmp, val, elt); + + /* Put it back. */ + tmp2 = gen_reg_rtx (V16SImode); + rtx tmp3 = gen_lowpart (V16SImode, target); + mask = gen_reg_rtx (HImode); + emit_move_insn (mask, constm1_rtx); + tmp = gen_lowpart (V4SImode, tmp); + emit_insn (gen_avx512f_vinserti32x4_mask (tmp2, tmp3, tmp, GEN_INT (i), + tmp3, mask)); + emit_move_insn (target, gen_lowpart (mode, tmp2)); + } + return; + default: break; } diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 795d08a87a7..34c007f8f43 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -229,8 +229,8 @@ ;; All vector modes (define_mode_iterator V - [(V32QI "TARGET_AVX") V16QI - (V16HI "TARGET_AVX") V8HI + [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI + (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF @@ -244,6 +244,10 @@ (define_mode_iterator V_256 [V32QI V16HI V8SI V4DI V8SF V4DF]) +;; All 128bit and 256bit vector modes +(define_mode_iterator V_128_256 + [V32QI V16QI V16HI V8HI V8SI V4SI V4DI V2DI V8SF V4SF V4DF V2DF]) + ;; All 512bit vector modes (define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF]) @@ -7351,6 +7355,15 @@ (set_attr "prefix" "evex") (set_attr "mode" "")]) +(define_split + [(set (match_operand: 0 "nonimmediate_operand") + (vec_select: + (match_operand:V8FI 1 "register_operand") + (parallel [(const_int 0) (const_int 1)])))] + "TARGET_AVX512DQ && reload_completed" + [(set (match_dup 0) (match_dup 1))] + "operands[1] = gen_lowpart (mode, operands[1]);") + (define_insn "avx512f_vextract32x4_1" [(set (match_operand: 0 "" "=") (vec_select: @@ -7374,6 +7387,16 @@ (set_attr "prefix" "evex") (set_attr "mode" "")]) +(define_split + [(set (match_operand: 0 "nonimmediate_operand") + (vec_select: + (match_operand:V16FI 1 "register_operand") + (parallel [(const_int 0) (const_int 1) + (const_int 2) (const_int 3)])))] + "TARGET_AVX512F && reload_completed" + [(set (match_dup 0) (match_dup 1))] + "operands[1] = gen_lowpart (mode, operands[1]);") + (define_mode_attr extract_type_2 [(V16SF "avx512dq") (V16SI "avx512dq") (V8DF "avx512f") (V8DI "avx512f")]) @@ -16478,11 +16501,11 @@ ;; XOP parallel XMM conditional moves (define_insn "xop_pcmov_" - [(set (match_operand:V 0 "register_operand" "=x,x") - (if_then_else:V - (match_operand:V 3 "nonimmediate_operand" "x,m") - (match_operand:V 1 "register_operand" "x,x") - (match_operand:V 2 "nonimmediate_operand" "xm,x")))] + [(set (match_operand:V_128_256 0 "register_operand" "=x,x") + (if_then_else:V_128_256 + (match_operand:V_128_256 3 "nonimmediate_operand" "x,m") + (match_operand:V_128_256 1 "register_operand" "x,x") + (match_operand:V_128_256 2 "nonimmediate_operand" "xm,x")))] "TARGET_XOP" "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "sse4arg")]) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index d3044eac963..7fddf8c271a 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,4 +1,13 @@ -2018-03-31 Alexandre Oliva +2018-04-01 Jakub Jelinek + + PR middle-end/85090 + * gcc.target/i386/avx512f-pr85090-1.c: New test. + * gcc.target/i386/avx512f-pr85090-2.c: New test. + * gcc.target/i386/avx512f-pr85090-3.c: New test. + * gcc.target/i386/avx512bw-pr85090-2.c: New test. + * gcc.target/i386/avx512bw-pr85090-3.c: New test. + +2018-03-31 Alexandre Oliva PR c++/85027 * g++.dg/pr85027.C: New. @@ -101,7 +110,7 @@ PR sanitizer/85081 * g++.dg/asan/pr85081.C: New test. -2018-03-28 Alexandre Oliva +2018-03-28 Alexandre Oliva PR c++/84789 * g++.dg/template/pr84789.C: Adjust for testing with @@ -376,7 +385,7 @@ PR sanitizer/85029 * g++.dg/ubsan/pr85029.C: New test. -2018-03-23 Alexandre Oliva +2018-03-23 Alexandre Oliva PR c++/71251 * g++.dg/cpp0x/pr71251.C: New. @@ -440,7 +449,7 @@ * gcc.dg/builtin-tgmath-3.c: New test. -2018-03-21 Alexandre Oliva +2018-03-21 Alexandre Oliva PR c++/71965 * g++.dg/concepts/pr71965.C: New. @@ -1072,7 +1081,7 @@ * gcc.dg/Wrestrict-10.c: New test. * gcc.dg/Wrestrict-11.c: New test. -2018-03-09 Alexandre Oliva +2018-03-09 Alexandre Oliva PR rtl-optimization/84682 * gcc.dg/torture/pr84682-1.c: New. @@ -1144,7 +1153,7 @@ * gcc.dg/torture/pr84746.c: New testcase. -2018-03-08 Alexandre Oliva +2018-03-08 Alexandre Oliva PR debug/84404 PR debug/84408 * gcc.dg/graphite/pr84404.c: New. @@ -1236,7 +1245,7 @@ PR tree-optimization/84687 * gcc.dg/pr84687.c: New test. -2018-03-06 Alexandre Oliva +2018-03-06 Alexandre Oliva PR c++/84231 * g++.dg/pr84231.C: New. diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr85090-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr85090-2.c new file mode 100644 index 00000000000..b93ae4b071f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr85090-2.c @@ -0,0 +1,35 @@ +/* PR middle-end/85090 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512bw -mtune=intel -masm=att" } */ + +typedef short V __attribute__((vector_size (64))); + +V +f1 (V x, int y) +{ + x[0] = y; + return x; +} + +V +f2 (V x, int y) +{ + x[7] = y; + return x; +} + +V +f3 (V x, int y) +{ + x[11] = y; + return x; +} + +V +f4 (V x, int y) +{ + x[29] = y; + return x; +} + +/* { dg-final { scan-assembler-times "vpbroadcastw\t" 4 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr85090-3.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr85090-3.c new file mode 100644 index 00000000000..9e32a31edd4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr85090-3.c @@ -0,0 +1,35 @@ +/* PR middle-end/85090 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512bw -mtune=intel -masm=att" } */ + +typedef signed char V __attribute__((vector_size (64))); + +V +f1 (V x, int y) +{ + x[0] = y; + return x; +} + +V +f2 (V x, int y) +{ + x[15] = y; + return x; +} + +V +f3 (V x, int y) +{ + x[22] = y; + return x; +} + +V +f4 (V x, int y) +{ + x[59] = y; + return x; +} + +/* { dg-final { scan-assembler-times "vpbroadcastb\t" 4 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr85090-1.c b/gcc/testsuite/gcc.target/i386/avx512f-pr85090-1.c new file mode 100644 index 00000000000..f5c0feec319 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-pr85090-1.c @@ -0,0 +1,35 @@ +/* PR middle-end/85090 */ +/* { dg-do run { target int128 } } */ +/* { dg-require-effective-target avx512f } */ +/* { dg-options "-O2 -fno-tree-dominator-opts -mavx512f -fira-algorithm=priority" } */ + +#include "avx512f-check.h" + +typedef unsigned short U __attribute__ ((vector_size (64))); +typedef unsigned int V __attribute__ ((vector_size (64))); +typedef unsigned __int128 W __attribute__ ((vector_size (64))); + +V h; +W d, e, g; +U f; + +static __attribute__((noipa)) U +foo (U i) +{ + f >>= ((U)d > f) & 1; + i[0] <<= 1; + e = (7 & -d) << (7 & -(g & 7)); + return i; +} + +void +avx512f_test (void) +{ + U x; + for (unsigned i = 0; i < 32; i++) + x[i] = i; + x = foo (x); + for (unsigned i = 0; i < 32; i++) + if (x[i] != i) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr85090-2.c b/gcc/testsuite/gcc.target/i386/avx512f-pr85090-2.c new file mode 100644 index 00000000000..968d2f55c21 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-pr85090-2.c @@ -0,0 +1,37 @@ +/* PR middle-end/85090 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512f -mno-avx512bw -mtune=intel -masm=att" } */ + +typedef short V __attribute__((vector_size (64))); + +V +f1 (V x, int y) +{ + x[0] = y; + return x; +} + +V +f2 (V x, int y) +{ + x[7] = y; + return x; +} + +V +f3 (V x, int y) +{ + x[11] = y; + return x; +} + +V +f4 (V x, int y) +{ + x[29] = y; + return x; +} + +/* { dg-final { scan-assembler-times "vpinsrw\t" 4 } } */ +/* { dg-final { scan-assembler-times "vextracti32x4\t" 2 } } */ +/* { dg-final { scan-assembler-times "vinserti32x4\t" 4 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr85090-3.c b/gcc/testsuite/gcc.target/i386/avx512f-pr85090-3.c new file mode 100644 index 00000000000..ffe515474d2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-pr85090-3.c @@ -0,0 +1,37 @@ +/* PR middle-end/85090 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512f -mno-avx512bw -mtune=intel -masm=att" } */ + +typedef signed char V __attribute__((vector_size (64))); + +V +f1 (V x, int y) +{ + x[0] = y; + return x; +} + +V +f2 (V x, int y) +{ + x[15] = y; + return x; +} + +V +f3 (V x, int y) +{ + x[22] = y; + return x; +} + +V +f4 (V x, int y) +{ + x[59] = y; + return x; +} + +/* { dg-final { scan-assembler-times "vpinsrb\t" 4 } } */ +/* { dg-final { scan-assembler-times "vextracti32x4\t" 2 } } */ +/* { dg-final { scan-assembler-times "vinserti32x4\t" 4 } } */