From 3b48574c6bcbfc12699eae9b586ab034b62885b0 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Sun, 22 May 2016 12:26:52 +0200 Subject: [PATCH] sse.md (i128vldq): New mode iterator. * config/i386/sse.md (i128vldq): New mode iterator. (avx2_vbroadcasti128_, avx_vbroadcastf128_): Add avx512dq and avx512vl alternatives. * gcc.target/i386/avx512dq-vbroadcast-2.c: New test. * gcc.target/i386/avx512vl-vbroadcast-2.c: New test. From-SVN: r236567 --- gcc/ChangeLog | 4 ++ gcc/config/i386/sse.md | 37 +++++++++----- gcc/testsuite/ChangeLog | 3 ++ .../gcc.target/i386/avx512dq-vbroadcast-2.c | 49 +++++++++++++++++++ .../gcc.target/i386/avx512vl-vbroadcast-2.c | 47 ++++++++++++++++++ 5 files changed, 129 insertions(+), 11 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/avx512dq-vbroadcast-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-vbroadcast-2.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 048861f5b18..80224c92f1a 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,9 @@ 2016-05-22 Jakub Jelinek + * config/i386/sse.md (i128vldq): New mode iterator. + (avx2_vbroadcasti128_, avx_vbroadcastf128_): Add + avx512dq and avx512vl alternatives. + * config/i386/sse.md (avx2_vec_dupv4df): Use v instead of x constraint, use maybe_evex prefix instead of vex. (vec_dupv4sf): Use v constraint instead of x for output diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index c6f9d78f84c..e971f1f49d6 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -778,6 +778,12 @@ (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128") (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")]) +;; For 256-bit modes for TARGET_AVX512VL && TARGET_AVX512DQ +;; i32x4, f32x4, i64x2 or f64x2 suffixes. +(define_mode_attr i128vldq + [(V8SF "f32x4") (V4DF "f64x2") + (V32QI "i32x4") (V16HI "i32x4") (V8SI "i32x4") (V4DI "i64x2")]) + ;; Mix-n-match (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF]) (define_mode_iterator AVX512MODE2P [V16SI V16SF V8DF]) @@ -17038,15 +17044,19 @@ (set_attr "mode" "TI,TI,DF,V4SF")]) (define_insn "avx2_vbroadcasti128_" - [(set (match_operand:VI_256 0 "register_operand" "=x") + [(set (match_operand:VI_256 0 "register_operand" "=x,v,v") (vec_concat:VI_256 - (match_operand: 1 "memory_operand" "m") + (match_operand: 1 "memory_operand" "m,m,m") (match_dup 1)))] "TARGET_AVX2" - "vbroadcasti128\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov") + "@ + vbroadcasti128\t{%1, %0|%0, %1} + vbroadcast\t{%1, %0|%0, %1} + vbroadcast32x4\t{%1, %0|%0, %1}" + [(set_attr "isa" "*,avx512dq,avx512vl") + (set_attr "type" "ssemov") (set_attr "prefix_extra" "1") - (set_attr "prefix" "vex") + (set_attr "prefix" "vex,evex,evex") (set_attr "mode" "OI")]) ;; Modes handled by AVX vec_dup patterns. @@ -17123,19 +17133,24 @@ "operands[2] = gen_lowpart (mode, operands[0]);") (define_insn "avx_vbroadcastf128_" - [(set (match_operand:V_256 0 "register_operand" "=x,x,x") + [(set (match_operand:V_256 0 "register_operand" "=x,x,x,v,v,v,v") (vec_concat:V_256 - (match_operand: 1 "nonimmediate_operand" "m,0,?x") + (match_operand: 1 "nonimmediate_operand" "m,0,?x,m,0,m,0") (match_dup 1)))] "TARGET_AVX" "@ vbroadcast\t{%1, %0|%0, %1} vinsert\t{$1, %1, %0, %0|%0, %0, %1, 1} - vperm2\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}" - [(set_attr "type" "ssemov,sselog1,sselog1") + vperm2\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0} + vbroadcast\t{%1, %0|%0, %1} + vinsert\t{$1, %1, %0, %0|%0, %0, %1, 1} + vbroadcast32x4\t{%1, %0|%0, %1} + vinsert32x4\t{$1, %1, %0, %0|%0, %0, %1, 1}" + [(set_attr "isa" "*,*,*,avx512dq,avx512dq,avx512vl,avx512vl") + (set_attr "type" "ssemov,sselog1,sselog1,ssemov,sselog1,ssemov,sselog1") (set_attr "prefix_extra" "1") - (set_attr "length_immediate" "0,1,1") - (set_attr "prefix" "vex") + (set_attr "length_immediate" "0,1,1,0,1,0,1") + (set_attr "prefix" "vex,vex,vex,evex,evex,evex,evex") (set_attr "mode" "")]) ;; For broadcast[i|f]32x2. Yes there is no v4sf version, only v4si. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 05f4e3a7dd6..84d1f805a21 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,8 @@ 2016-05-22 Jakub Jelinek + * gcc.target/i386/avx512dq-vbroadcast-2.c: New test. + * gcc.target/i386/avx512vl-vbroadcast-2.c: New test. + * gcc.target/i386/avx512vl-vbroadcast-1.c: New test. 2016-05-22 Kugan Vivekanandarajah diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vbroadcast-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-vbroadcast-2.c new file mode 100644 index 00000000000..645765696e0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512dq-vbroadcast-2.c @@ -0,0 +1,49 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mavx512vl -mavx512dq" } */ + +#include + +void +f1 (__m128i x) +{ + register __m128i a __asm ("xmm16"); + register __m256i c; + a = x; + asm volatile ("" : "+v" (a)); + c = _mm256_broadcastsi128_si256 (a); + register __m256i b __asm ("xmm16"); + b = c; + asm volatile ("" : "+v" (b)); +} + +/* { dg-final { scan-assembler "vinserti64x2\[^\n\r]*(xmm16\[^\n\r]*ymm16\[^\n\r]*ymm16|ymm16\[^\n\r]*ymm16\[^\n\r]*xmm16)" } } */ + +void +f2 (__m128i *x) +{ + register __m256i a __asm ("xmm16"); + a = _mm256_broadcastsi128_si256 (*x); + asm volatile ("" : "+v" (a)); +} + +/* { dg-final { scan-assembler "vbroadcasti64x2\[^\n\r]*ymm16" } } */ + +void +f3 (__m128 *x) +{ + register __m256 a __asm ("xmm16"); + a = _mm256_broadcast_ps (x); + asm volatile ("" : "+v" (a)); +} + +/* { dg-final { scan-assembler "vbroadcastf32x4\[^\n\r]*ymm16" } } */ + +void +f4 (__m128d *x) +{ + register __m256d a __asm ("xmm16"); + a = _mm256_broadcast_pd (x); + asm volatile ("" : "+v" (a)); +} + +/* { dg-final { scan-assembler "vbroadcastf64x2\[^\n\r]*ymm16" } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vbroadcast-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vbroadcast-2.c new file mode 100644 index 00000000000..22f4129f4f8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vbroadcast-2.c @@ -0,0 +1,47 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mavx512vl -mno-avx512dq" } */ + +#include + +void +f1 (__m128i x) +{ + register __m128i a __asm ("xmm16"); + register __m256i c; + a = x; + asm volatile ("" : "+v" (a)); + c = _mm256_broadcastsi128_si256 (a); + register __m256i b __asm ("xmm16"); + b = c; + asm volatile ("" : "+v" (b)); +} + +/* { dg-final { scan-assembler "vinserti32x4\[^\n\r]*(xmm16\[^\n\r]*ymm16\[^\n\r]*ymm16|ymm16\[^\n\r]*ymm16\[^\n\r]*xmm16)" } } */ + +void +f2 (__m128i *x) +{ + register __m256i a __asm ("xmm16"); + a = _mm256_broadcastsi128_si256 (*x); + asm volatile ("" : "+v" (a)); +} + +/* { dg-final { scan-assembler "vbroadcasti32x4\[^\n\r]*ymm16" } } */ + +void +f3 (__m128 *x) +{ + register __m256 a __asm ("xmm16"); + a = _mm256_broadcast_ps (x); + asm volatile ("" : "+v" (a)); +} + +void +f4 (__m128d *x) +{ + register __m256d a __asm ("xmm16"); + a = _mm256_broadcast_pd (x); + asm volatile ("" : "+v" (a)); +} + +/* { dg-final { scan-assembler-times "vbroadcastf32x4\[^\n\r]*ymm16" 2 } } */ -- 2.30.2