set_feature (FEATURE_AVX512BF16);
if (eax & bit_HRESET)
set_feature (FEATURE_HRESET);
+ if (eax & bit_AVXVNNI)
+ set_feature (FEATURE_AVXVNNI);
}
}
(OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512F_SET)
#define OPTION_MASK_ISA_AVX512VNNI_SET \
(OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512F_SET)
+#define OPTION_MASK_ISA2_AVXVNNI_SET OPTION_MASK_ISA2_AVXVNNI
#define OPTION_MASK_ISA_AVX512VPOPCNTDQ_SET \
(OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512F_SET)
#define OPTION_MASK_ISA_AVX512BITALG_SET \
#define OPTION_MASK_ISA_XSAVEOPT_UNSET OPTION_MASK_ISA_XSAVEOPT
#define OPTION_MASK_ISA_AVX2_UNSET \
(OPTION_MASK_ISA_AVX2 | OPTION_MASK_ISA_AVX512F_UNSET)
+#define OPTION_MASK_ISA2_AVX2_UNSET \
+ (OPTION_MASK_ISA2_AVXVNNI_UNSET | OPTION_MASK_ISA2_AVX512F_UNSET)
#define OPTION_MASK_ISA_AVX512F_UNSET \
(OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_AVX512CD_UNSET \
| OPTION_MASK_ISA_AVX512PF_UNSET | OPTION_MASK_ISA_AVX512ER_UNSET \
#define OPTION_MASK_ISA2_AVX5124VNNIW_UNSET OPTION_MASK_ISA2_AVX5124VNNIW
#define OPTION_MASK_ISA_AVX512VBMI2_UNSET OPTION_MASK_ISA_AVX512VBMI2
#define OPTION_MASK_ISA_AVX512VNNI_UNSET OPTION_MASK_ISA_AVX512VNNI
+#define OPTION_MASK_ISA2_AVXVNNI_UNSET OPTION_MASK_ISA2_AVXVNNI
#define OPTION_MASK_ISA_AVX512VPOPCNTDQ_UNSET OPTION_MASK_ISA_AVX512VPOPCNTDQ
#define OPTION_MASK_ISA_AVX512BITALG_UNSET OPTION_MASK_ISA_AVX512BITALG
#define OPTION_MASK_ISA2_AVX512BF16_UNSET OPTION_MASK_ISA2_AVX512BF16
| OPTION_MASK_ISA2_AVX512VP2INTERSECT_UNSET)
#define OPTION_MASK_ISA2_GENERAL_REGS_ONLY_UNSET \
(OPTION_MASK_ISA2_AVX512F_UNSET)
-#define OPTION_MASK_ISA2_AVX2_UNSET OPTION_MASK_ISA2_AVX512F_UNSET
#define OPTION_MASK_ISA2_AVX_UNSET OPTION_MASK_ISA2_AVX2_UNSET
#define OPTION_MASK_ISA2_SSE4_2_UNSET OPTION_MASK_ISA2_AVX_UNSET
#define OPTION_MASK_ISA2_SSE4_1_UNSET OPTION_MASK_ISA2_SSE4_2_UNSET
}
return true;
+ case OPT_mavxvnni:
+ if (value)
+ {
+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AVXVNNI_SET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVXVNNI_SET;
+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2_SET;
+ opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX2_SET;
+ }
+ else
+ {
+ opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVXVNNI_UNSET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVXVNNI_UNSET;
+ }
+ return true;
+
case OPT_msgx:
if (value)
{
FEATURE_KL,
FEATURE_AESKLE,
FEATURE_WIDEKL,
+ FEATURE_AVXVNNI,
CPU_FEATURE_MAX
};
ISA_NAMES_TABLE_ENTRY("kl", FEATURE_KL, P_NONE, "-mkl")
ISA_NAMES_TABLE_ENTRY("aeskle", FEATURE_AESKLE, P_NONE, NULL)
ISA_NAMES_TABLE_ENTRY("widekl", FEATURE_WIDEKL, P_NONE, "-mwidekl")
+ ISA_NAMES_TABLE_ENTRY("avxvnni", FEATURE_AVXVNNI, P_NONE, "-mavxvnni")
ISA_NAMES_TABLE_END
avx512vp2intersectintrin.h avx512vp2intersectvlintrin.h
tsxldtrkintrin.h amxtileintrin.h amxint8intrin.h
amxbf16intrin.h x86gprintrin.h uintrintrin.h
- hresetintrin.h keylockerintrin.h"
+ hresetintrin.h keylockerintrin.h avxvnniintrin.h"
;;
x86_64-*-*)
cpu_type=i386
avx512vp2intersectintrin.h avx512vp2intersectvlintrin.h
tsxldtrkintrin.h amxtileintrin.h amxint8intrin.h
amxbf16intrin.h x86gprintrin.h uintrintrin.h
- hresetintrin.h keylockerintrin.h"
+ hresetintrin.h keylockerintrin.h avxvnniintrin.h"
;;
ia64-*-*)
extra_headers=ia64intrin.h
#define __DISABLE_AVX512VNNIVL__
#endif /* __AVX512VNNIVL__ */
-extern __inline __m256i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_dpbusd_epi32 (__m256i __A, __m256i __B, __m256i __C)
-{
- return (__m256i) __builtin_ia32_vpdpbusd_v8si ((__v8si)__A, (__v8si) __B,
- (__v8si) __C);
-}
+#define _mm256_dpbusd_epi32(A, B, C) \
+ ((__m256i) __builtin_ia32_vpdpbusd_v8si ((__v8si) (A), \
+ (__v8si) (B), \
+ (__v8si) (C)))
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
(__v8si) __C, (__v8si) __D, (__mmask8)__A);
}
-extern __inline __m128i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_dpbusd_epi32 (__m128i __A, __m128i __B, __m128i __C)
-{
- return (__m128i) __builtin_ia32_vpdpbusd_v4si ((__v4si)__A, (__v4si) __B,
- (__v4si) __C);
-}
+#define _mm_dpbusd_epi32(A, B, C) \
+ ((__m128i) __builtin_ia32_vpdpbusd_v4si ((__v4si) (A), \
+ (__v4si) (B), \
+ (__v4si) (C)))
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
(__v4si) __C, (__v4si) __D, (__mmask8)__A);
}
-extern __inline __m256i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_dpbusds_epi32 (__m256i __A, __m256i __B, __m256i __C)
-{
- return (__m256i) __builtin_ia32_vpdpbusds_v8si ((__v8si)__A, (__v8si) __B,
- (__v8si) __C);
-}
+#define _mm256_dpbusds_epi32(A, B, C) \
+ ((__m256i) __builtin_ia32_vpdpbusds_v8si ((__v8si) (A), \
+ (__v8si) (B), \
+ (__v8si) (C)))
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
(__v8si) __C, (__v8si) __D, (__mmask8)__A);
}
-extern __inline __m128i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_dpbusds_epi32 (__m128i __A, __m128i __B, __m128i __C)
-{
- return (__m128i) __builtin_ia32_vpdpbusds_v4si ((__v4si)__A, (__v4si) __B,
- (__v4si) __C);
-}
+#define _mm_dpbusds_epi32(A, B, C) \
+ ((__m128i) __builtin_ia32_vpdpbusds_v4si ((__v4si) (A), \
+ (__v4si) (B), \
+ (__v4si) (C)))
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
(__v4si) __C, (__v4si) __D, (__mmask8)__A);
}
-extern __inline __m256i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_dpwssd_epi32 (__m256i __A, __m256i __B, __m256i __C)
-{
- return (__m256i) __builtin_ia32_vpdpwssd_v8si ((__v8si)__A, (__v8si) __B,
- (__v8si) __C);
-}
+#define _mm256_dpwssd_epi32(A, B, C) \
+ ((__m256i) __builtin_ia32_vpdpwssd_v8si ((__v8si) (A), \
+ (__v8si) (B), \
+ (__v8si) (C)))
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
(__v8si) __C, (__v8si) __D, (__mmask8)__A);
}
-extern __inline __m128i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_dpwssd_epi32 (__m128i __A, __m128i __B, __m128i __C)
-{
- return (__m128i) __builtin_ia32_vpdpwssd_v4si ((__v4si)__A, (__v4si) __B,
- (__v4si) __C);
-}
+#define _mm_dpwssd_epi32(A, B, C) \
+ ((__m128i) __builtin_ia32_vpdpwssd_v4si ((__v4si) (A), \
+ (__v4si) (B), \
+ (__v4si) (C)))
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
(__v4si) __C, (__v4si) __D, (__mmask8)__A);
}
-extern __inline __m256i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_dpwssds_epi32 (__m256i __A, __m256i __B, __m256i __C)
-{
- return (__m256i) __builtin_ia32_vpdpwssds_v8si ((__v8si)__A, (__v8si) __B,
- (__v8si) __C);
-}
+#define _mm256_dpwssds_epi32(A, B, C) \
+ ((__m256i) __builtin_ia32_vpdpwssds_v8si ((__v8si) (A), \
+ (__v8si) (B), \
+ (__v8si) (C)))
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
(__v8si) __C, (__v8si) __D, (__mmask8)__A);
}
-extern __inline __m128i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_dpwssds_epi32 (__m128i __A, __m128i __B, __m128i __C)
-{
- return (__m128i) __builtin_ia32_vpdpwssds_v4si ((__v4si)__A, (__v4si) __B,
- (__v4si) __C);
-}
+#define _mm_dpwssds_epi32(A, B, C) \
+ ((__m128i) __builtin_ia32_vpdpwssds_v4si ((__v4si) (A), \
+ (__v4si) (B), \
+ (__v4si) (C)))
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
--- /dev/null
+/* Copyright (C) 2020 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use <avxvnniintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVXVNNIINTRIN_H_INCLUDED
+#define _AVXVNNIINTRIN_H_INCLUDED
+
+#if !defined(__AVXVNNI__)
+#pragma GCC push_options
+#pragma GCC target("avxvnni")
+#define __DISABLE_AVXVNNIVL__
+#endif /* __AVXVNNIVL__ */
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_dpbusd_avx_epi32(__m256i __A, __m256i __B, __m256i __C)
+{
+ return (__m256i) __builtin_ia32_vpdpbusd_v8si ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si) __C);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_dpbusd_avx_epi32(__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i) __builtin_ia32_vpdpbusd_v4si ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __C);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_dpbusds_avx_epi32(__m256i __A, __m256i __B, __m256i __C)
+{
+ return (__m256i) __builtin_ia32_vpdpbusds_v8si ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si) __C);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_dpbusds_avx_epi32(__m128i __A,__m128i __B,__m128i __C)
+{
+ return (__m128i) __builtin_ia32_vpdpbusds_v4si ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __C);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_dpwssd_avx_epi32(__m256i __A,__m256i __B,__m256i __C)
+{
+ return (__m256i) __builtin_ia32_vpdpwssd_v8si ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si) __C);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_dpwssd_avx_epi32(__m128i __A,__m128i __B,__m128i __C)
+{
+ return (__m128i) __builtin_ia32_vpdpwssd_v4si ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __C);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_dpwssds_avx_epi32(__m256i __A,__m256i __B,__m256i __C)
+{
+ return (__m256i) __builtin_ia32_vpdpwssds_v8si ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si) __C);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_dpwssds_avx_epi32(__m128i __A,__m128i __B,__m128i __C)
+{
+ return (__m128i) __builtin_ia32_vpdpwssds_v4si ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __C);
+}
+
+#ifdef __DISABLE_AVXVNNIVL__
+#undef __DISABLE_AVXVNNIVL__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVXVNNIVL__ */
+#endif /* _AVXVNNIINTRIN_H_INCLUDED */
#define _CPUID_H_INCLUDED
/* %eax */
+#define bit_AVXVNNI (1 << 4)
#define bit_AVX512BF16 (1 << 5)
#define bit_HRESET (1 << 22)
BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_vgf2p8mulb_v16qi, "__builtin_ia32_vgf2p8mulb_v16qi", IX86_BUILTIN_VGF2P8MULB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI)
BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vgf2p8mulb_v16qi_mask, "__builtin_ia32_vgf2p8mulb_v16qi_mask", IX86_BUILTIN_VGF2P8MULB128MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI)
-/* VNNI */
+/* AVX512_VNNI */
BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si, "__builtin_ia32_vpdpbusd_v16si", IX86_BUILTIN_VPDPBUSDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si_mask, "__builtin_ia32_vpdpbusd_v16si_mask", IX86_BUILTIN_VPDPBUSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si_maskz, "__builtin_ia32_vpdpbusd_v16si_maskz", IX86_BUILTIN_VPDPBUSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v8si, "__builtin_ia32_vpdpbusd_v8si", IX86_BUILTIN_VPDPBUSDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpbusd_v8si, "__builtin_ia32_vpdpbusd_v8si", IX86_BUILTIN_VPDPBUSDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v8si_mask, "__builtin_ia32_vpdpbusd_v8si_mask", IX86_BUILTIN_VPDPBUSDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v8si_maskz, "__builtin_ia32_vpdpbusd_v8si_maskz", IX86_BUILTIN_VPDPBUSDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v4si, "__builtin_ia32_vpdpbusd_v4si", IX86_BUILTIN_VPDPBUSDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpbusd_v4si, "__builtin_ia32_vpdpbusd_v4si", IX86_BUILTIN_VPDPBUSDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v4si_mask, "__builtin_ia32_vpdpbusd_v4si_mask", IX86_BUILTIN_VPDPBUSDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v4si_maskz, "__builtin_ia32_vpdpbusd_v4si_maskz", IX86_BUILTIN_VPDPBUSDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si, "__builtin_ia32_vpdpbusds_v16si", IX86_BUILTIN_VPDPBUSDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si_mask, "__builtin_ia32_vpdpbusds_v16si_mask", IX86_BUILTIN_VPDPBUSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si_maskz, "__builtin_ia32_vpdpbusds_v16si_maskz", IX86_BUILTIN_VPDPBUSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v8si, "__builtin_ia32_vpdpbusds_v8si", IX86_BUILTIN_VPDPBUSDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpbusds_v8si, "__builtin_ia32_vpdpbusds_v8si", IX86_BUILTIN_VPDPBUSDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v8si_mask, "__builtin_ia32_vpdpbusds_v8si_mask", IX86_BUILTIN_VPDPBUSDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v8si_maskz, "__builtin_ia32_vpdpbusds_v8si_maskz", IX86_BUILTIN_VPDPBUSDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v4si, "__builtin_ia32_vpdpbusds_v4si", IX86_BUILTIN_VPDPBUSDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpbusds_v4si, "__builtin_ia32_vpdpbusds_v4si", IX86_BUILTIN_VPDPBUSDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v4si_mask, "__builtin_ia32_vpdpbusds_v4si_mask", IX86_BUILTIN_VPDPBUSDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v4si_maskz, "__builtin_ia32_vpdpbusds_v4si_maskz", IX86_BUILTIN_VPDPBUSDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si, "__builtin_ia32_vpdpwssd_v16si", IX86_BUILTIN_VPDPWSSDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si_mask, "__builtin_ia32_vpdpwssd_v16si_mask", IX86_BUILTIN_VPDPWSSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si_maskz, "__builtin_ia32_vpdpwssd_v16si_maskz", IX86_BUILTIN_VPDPWSSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v8si, "__builtin_ia32_vpdpwssd_v8si", IX86_BUILTIN_VPDPWSSDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpwssd_v8si, "__builtin_ia32_vpdpwssd_v8si", IX86_BUILTIN_VPDPWSSDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v8si_mask, "__builtin_ia32_vpdpwssd_v8si_mask", IX86_BUILTIN_VPDPWSSDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v8si_maskz, "__builtin_ia32_vpdpwssd_v8si_maskz", IX86_BUILTIN_VPDPWSSDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v4si, "__builtin_ia32_vpdpwssd_v4si", IX86_BUILTIN_VPDPWSSDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpwssd_v4si, "__builtin_ia32_vpdpwssd_v4si", IX86_BUILTIN_VPDPWSSDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v4si_mask, "__builtin_ia32_vpdpwssd_v4si_mask", IX86_BUILTIN_VPDPWSSDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v4si_maskz, "__builtin_ia32_vpdpwssd_v4si_maskz", IX86_BUILTIN_VPDPWSSDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si, "__builtin_ia32_vpdpwssds_v16si", IX86_BUILTIN_VPDPWSSDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si_mask, "__builtin_ia32_vpdpwssds_v16si_mask", IX86_BUILTIN_VPDPWSSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si_maskz, "__builtin_ia32_vpdpwssds_v16si_maskz", IX86_BUILTIN_VPDPWSSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v8si, "__builtin_ia32_vpdpwssds_v8si", IX86_BUILTIN_VPDPWSSDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpwssds_v8si, "__builtin_ia32_vpdpwssds_v8si", IX86_BUILTIN_VPDPWSSDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v8si_mask, "__builtin_ia32_vpdpwssds_v8si_mask", IX86_BUILTIN_VPDPWSSDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v8si_maskz, "__builtin_ia32_vpdpwssds_v8si_maskz", IX86_BUILTIN_VPDPWSSDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v4si, "__builtin_ia32_vpdpwssds_v4si", IX86_BUILTIN_VPDPWSSDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpwssds_v4si, "__builtin_ia32_vpdpwssds_v4si", IX86_BUILTIN_VPDPWSSDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v4si_mask, "__builtin_ia32_vpdpwssds_v4si_mask", IX86_BUILTIN_VPDPWSSDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v4si_maskz, "__builtin_ia32_vpdpwssds_v4si_maskz", IX86_BUILTIN_VPDPWSSDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
if (((mask2 == 0 || (mask2 & ix86_isa_flags2) != 0)
&& (mask == 0 || (mask & ix86_isa_flags) != 0))
|| ((mask & OPTION_MASK_ISA_MMX) != 0 && TARGET_MMX_WITH_SSE)
+ /* "Unified" builtin used by either AVXVNNI intrinsics or AVX512VNNIVL
+ non-mask intrinsics should be defined whenever avxvnni
+ or avx512vnni && avx512vl exist. */
+ || (mask2 == OPTION_MASK_ISA2_AVXVNNI)
|| (lang_hooks.builtin_function
== lang_hooks.builtin_function_ext_scope))
{
def_or_undef (parse_in, "__KL__");
if (isa_flag2 & OPTION_MASK_ISA2_WIDEKL)
def_or_undef (parse_in, "__WIDEKL__");
+ if (isa_flag2 & OPTION_MASK_ISA2_AVXVNNI)
+ def_or_undef (parse_in, "__AVXVNNI__");
if (TARGET_IAMCU)
{
def_or_undef (parse_in, "__iamcu");
OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A
OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32
OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4
+ (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL) or
+ OPTION_MASK_ISA2_AVXVNNI
where for each such pair it is sufficient if either of the ISAs is
enabled, plus if it is ored with other options also those others.
OPTION_MASK_ISA_MMX in bisa is satisfied also if TARGET_MMX_WITH_SSE. */
&& (isa & (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4)) != 0)
isa |= (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4);
+ if ((((bisa & (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL))
+ == (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL))
+ || (bisa2 & OPTION_MASK_ISA2_AVXVNNI) != 0)
+ && (((isa & (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL))
+ == (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL))
+ || (isa2 & OPTION_MASK_ISA2_AVXVNNI) != 0))
+ {
+ isa |= OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL;
+ isa2 |= OPTION_MASK_ISA2_AVXVNNI;
+ }
+
if ((bisa & OPTION_MASK_ISA_MMX) && !TARGET_MMX && TARGET_MMX_WITH_SSE
/* __builtin_ia32_maskmovq requires MMX registers. */
&& fcode != IX86_BUILTIN_MASKMOVQ)
{ "-muintr", OPTION_MASK_ISA2_UINTR },
{ "-mhreset", OPTION_MASK_ISA2_HRESET },
{ "-mkl", OPTION_MASK_ISA2_KL },
- { "-mwidekl", OPTION_MASK_ISA2_WIDEKL }
+ { "-mwidekl", OPTION_MASK_ISA2_WIDEKL },
+ { "-mavxvnni", OPTION_MASK_ISA2_AVXVNNI }
};
static struct ix86_target_opts isa_opts[] =
{
IX86_ATTR_ISA ("amx-int8", OPT_mamx_int8),
IX86_ATTR_ISA ("amx-bf16", OPT_mamx_bf16),
IX86_ATTR_ISA ("hreset", OPT_mhreset),
+ IX86_ATTR_ISA ("avxvnni", OPT_mavxvnni),
/* enum options */
IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
&& !(opts->x_ix86_isa_flags2_explicit
& OPTION_MASK_ISA2_AMX_BF16))
opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AMX_BF16;
+ if (((processor_alias_table[i].flags & PTA_AVXVNNI) != 0)
+ && !(opts->x_ix86_isa_flags2_explicit
+ & OPTION_MASK_ISA2_AVXVNNI))
+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AVXVNNI;
if (((processor_alias_table[i].flags & PTA_MOVDIRI) != 0)
&& !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVDIRI))
opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVDIRI;
#define TARGET_KL_P(x) TARGET_ISA2_KL_P(x)
#define TARGET_WIDEKL TARGET_ISA2_WIDEKL
#define TARGET_WIDEKL_P(x) TARGET_ISA2_WIDEKL_P(x)
+#define TARGET_AVXVNNI TARGET_ISA2_AVXVNNI
+#define TARGET_AVXVNNI_P(x) TARGET_ISA2_AVXVNNI_P(x)
#define TARGET_LP64 TARGET_ABI_64
#define TARGET_LP64_P(x) TARGET_ABI_64_P(x)
const wide_int_bitmask PTA_HRESET (0, HOST_WIDE_INT_1U << 23);
const wide_int_bitmask PTA_KL (0, HOST_WIDE_INT_1U << 24);
const wide_int_bitmask PTA_WIDEKL (0, HOST_WIDE_INT_1U << 25);
+const wide_int_bitmask PTA_AVXVNNI (0, HOST_WIDE_INT_1U << 26);
const wide_int_bitmask PTA_X86_64_BASELINE = PTA_64BIT | PTA_MMX | PTA_SSE
| PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR;
const wide_int_bitmask PTA_SAPPHIRERAPIDS = PTA_COOPERLAKE | PTA_MOVDIRI
| PTA_MOVDIR64B | PTA_AVX512VP2INTERSECT | PTA_ENQCMD | PTA_CLDEMOTE
| PTA_PTWRITE | PTA_WAITPKG | PTA_SERIALIZE | PTA_TSXLDTRK | PTA_AMX_TILE
- | PTA_AMX_INT8 | PTA_AMX_BF16 | PTA_UINTR;
+ | PTA_AMX_INT8 | PTA_AMX_BF16 | PTA_UINTR | PTA_AVXVNNI;
const wide_int_bitmask PTA_ALDERLAKE = PTA_SKYLAKE | PTA_CLDEMOTE | PTA_PTWRITE
- | PTA_WAITPKG | PTA_SERIALIZE | PTA_HRESET | PTA_KL | PTA_WIDEKL;
+ | PTA_WAITPKG | PTA_SERIALIZE | PTA_HRESET | PTA_KL | PTA_WIDEKL | PTA_AVXVNNI;
const wide_int_bitmask PTA_KNL = PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER
| PTA_AVX512F | PTA_AVX512CD | PTA_PREFETCHWT1;
const wide_int_bitmask PTA_BONNELL = PTA_CORE2 | PTA_MOVBE;
sse_noavx,sse2,sse2_noavx,sse3,sse3_noavx,sse4,sse4_noavx,
avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f,
avx512bw,noavx512bw,avx512dq,noavx512dq,
- avx512vl,noavx512vl,x64_avx512dq,x64_avx512bw"
+ avx512vl,noavx512vl,x64_avx512dq,x64_avx512bw,
+ avxvnni,avx512vnnivl"
(const_string "base"))
;; Define instruction set of MMX instructions
(eq_attr "isa" "noavx512dq") (symbol_ref "!TARGET_AVX512DQ")
(eq_attr "isa" "avx512vl") (symbol_ref "TARGET_AVX512VL")
(eq_attr "isa" "noavx512vl") (symbol_ref "!TARGET_AVX512VL")
+ (eq_attr "isa" "avxvnni") (symbol_ref "TARGET_AVXVNNI")
+ (eq_attr "isa" "avx512vnnivl") (symbol_ref "TARGET_AVX512VNNI && TARGET_AVX512VL")
(eq_attr "mmx_isa" "native")
(symbol_ref "!TARGET_MMX_WITH_SSE")
mwidekl
Target Report Mask(ISA2_WIDEKL) Var(ix86_isa_flags2) Save
Support WIDEKL built-in functions and code generation.
+
+mavxvnni
+Target Report Mask(ISA2_AVXVNNI) Var(ix86_isa_flags2) Save
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, and
+AVXVNNI built-in functions and code generation.
#include <avxintrin.h>
+#include <avxvnniintrin.h>
+
#include <avx2intrin.h>
#include <avx512fintrin.h>
[(set_attr ("prefix") ("evex"))
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "vpdpbusd_<mode>"
- [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
- (unspec:VI4_AVX512VL
- [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
- (match_operand:VI4_AVX512VL 2 "register_operand" "v")
- (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
+(define_insn "vpdpbusd_v16si"
+ [(set (match_operand:V16SI 0 "register_operand" "=v")
+ (unspec:V16SI
+ [(match_operand:V16SI 1 "register_operand" "0")
+ (match_operand:V16SI 2 "register_operand" "v")
+ (match_operand:V16SI 3 "nonimmediate_operand" "vm")]
UNSPEC_VPMADDUBSWACCD))]
"TARGET_AVX512VNNI"
- "vpdpbusd\t{%3, %2, %0|%0, %2, %3 }"
- [(set_attr ("prefix") ("evex"))])
+ "vpdpbusd\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr ("prefix") ("evex"))])
+
+(define_insn "vpdpbusd_<mode>"
+ [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,v")
+ (unspec:VI4_AVX2
+ [(match_operand:VI4_AVX2 1 "register_operand" "0,0")
+ (match_operand:VI4_AVX2 2 "register_operand" "x,v")
+ (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")]
+ UNSPEC_VPMADDUBSWACCD))]
+ "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)"
+ "@
+ %{vex%} vpdpbusd\t{%3, %2, %0|%0, %2, %3}
+ vpdpbusd\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr ("prefix") ("vex,evex"))
+ (set_attr ("isa") ("avxvnni,avx512vnnivl"))])
(define_insn "vpdpbusd_<mode>_mask"
[(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
"vpdpbusd\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
[(set_attr ("prefix") ("evex"))])
+(define_insn "vpdpbusds_v16si"
+ [(set (match_operand:V16SI 0 "register_operand" "=v")
+ (unspec:V16SI
+ [(match_operand:V16SI 1 "register_operand" "0")
+ (match_operand:V16SI 2 "register_operand" "v")
+ (match_operand:V16SI 3 "nonimmediate_operand" "vm")]
+ UNSPEC_VPMADDUBSWACCSSD))]
+ "TARGET_AVX512VNNI"
+ "vpdpbusds\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr ("prefix") ("evex"))])
(define_insn "vpdpbusds_<mode>"
- [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
- (unspec:VI4_AVX512VL
- [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
- (match_operand:VI4_AVX512VL 2 "register_operand" "v")
- (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
+ [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,v")
+ (unspec:VI4_AVX2
+ [(match_operand:VI4_AVX2 1 "register_operand" "0,0")
+ (match_operand:VI4_AVX2 2 "register_operand" "x,v")
+ (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")]
UNSPEC_VPMADDUBSWACCSSD))]
- "TARGET_AVX512VNNI"
- "vpdpbusds\t{%3, %2, %0|%0, %2, %3 }"
- [(set_attr ("prefix") ("evex"))])
+ "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)"
+ "@
+ %{vex%} vpdpbusds\t{%3, %2, %0|%0, %2, %3}
+ vpdpbusds\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr ("prefix") ("vex,evex"))
+ (set_attr ("isa") ("avxvnni,avx512vnnivl"))])
(define_insn "vpdpbusds_<mode>_mask"
[(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
"vpdpbusds\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
[(set_attr ("prefix") ("evex"))])
+(define_insn "vpdpwssd_v16si"
+ [(set (match_operand:V16SI 0 "register_operand" "=v")
+ (unspec:V16SI
+ [(match_operand:V16SI 1 "register_operand" "0")
+ (match_operand:V16SI 2 "register_operand" "v")
+ (match_operand:V16SI 3 "nonimmediate_operand" "vm")]
+ UNSPEC_VPMADDWDACCD))]
+ "TARGET_AVX512VNNI"
+ "vpdpwssd\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr ("prefix") ("evex"))])
(define_insn "vpdpwssd_<mode>"
- [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
- (unspec:VI4_AVX512VL
- [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
- (match_operand:VI4_AVX512VL 2 "register_operand" "v")
- (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
+ [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,v")
+ (unspec:VI4_AVX2
+ [(match_operand:VI4_AVX2 1 "register_operand" "0,0")
+ (match_operand:VI4_AVX2 2 "register_operand" "x,v")
+ (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")]
UNSPEC_VPMADDWDACCD))]
- "TARGET_AVX512VNNI"
- "vpdpwssd\t{%3, %2, %0|%0, %2, %3 }"
- [(set_attr ("prefix") ("evex"))])
+ "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)"
+ "@
+ %{vex%} vpdpwssd\t{%3, %2, %0|%0, %2, %3}
+ vpdpwssd\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr ("prefix") ("vex,evex"))
+ (set_attr ("isa") ("avxvnni,avx512vnnivl"))])
(define_insn "vpdpwssd_<mode>_mask"
[(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
"vpdpwssd\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
[(set_attr ("prefix") ("evex"))])
+(define_insn "vpdpwssds_v16si"
+ [(set (match_operand:V16SI 0 "register_operand" "=v")
+ (unspec:V16SI
+ [(match_operand:V16SI 1 "register_operand" "0")
+ (match_operand:V16SI 2 "register_operand" "v")
+ (match_operand:V16SI 3 "nonimmediate_operand" "vm")]
+ UNSPEC_VPMADDWDACCSSD))]
+ "TARGET_AVX512VNNI"
+ "vpdpwssds\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr ("prefix") ("evex"))])
(define_insn "vpdpwssds_<mode>"
- [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
- (unspec:VI4_AVX512VL
- [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
- (match_operand:VI4_AVX512VL 2 "register_operand" "v")
- (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
+ [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,v")
+ (unspec:VI4_AVX2
+ [(match_operand:VI4_AVX2 1 "register_operand" "0,0")
+ (match_operand:VI4_AVX2 2 "register_operand" "x,v")
+ (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")]
UNSPEC_VPMADDWDACCSSD))]
- "TARGET_AVX512VNNI"
- "vpdpwssds\t{%3, %2, %0|%0, %2, %3 }"
- [(set_attr ("prefix") ("evex"))])
+ "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)"
+ "@
+ %{vex%} vpdpwssds\t{%3, %2, %0|%0, %2, %3}
+ vpdpwssds\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr ("prefix") ("vex,evex"))
+ (set_attr ("isa") ("avxvnni,avx512vnnivl"))])
(define_insn "vpdpwssds_<mode>_mask"
[(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
@cindex @code{target("widekl")} function attribute, x86
Enable/disable the generation of the WIDEKL instructions.
+@item avxvnni
+@itemx no-avxvnni
+@cindex @code{target("avxvnni")} function attribute, x86
+Enable/disable the generation of the AVXVNNI instructions.
+
@item cld
@itemx no-cld
@cindex @code{target("cld")} function attribute, x86
-mvpclmulqdq -mavx512bitalg -mmovdiri -mmovdir64b -mavx512vpopcntdq @gol
-mavx5124fmaps -mavx512vnni -mavx5124vnniw -mprfchw -mrdpid @gol
-mrdseed -msgx -mavx512vp2intersect -mserialize -mtsxldtrk@gol
--mamx-tile -mamx-int8 -mamx-bf16 -muintr -mhreset@gol
+-mamx-tile -mamx-int8 -mamx-bf16 -muintr -mhreset -mavxvnni@gol
-mcldemote -mms-bitfields -mno-align-stringops -minline-all-stringops @gol
-minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol
-mkl -mwidekl @gol
@itemx -mavx512vnni
@opindex mavx512vnni
@need 200
+@itemx -mavxvnni
+@opindex mavxvnni
+@need 200
@itemx -mavx5124vnniw
@opindex mavx5124vnniw
@need 200
XSAVEOPT, XSAVEC, XSAVES, RTM, HLE, TBM, MWAITX, CLZERO, PKU, AVX512VBMI2,
GFNI, VAES, WAITPKG, VPCLMULQDQ, AVX512BITALG, MOVDIRI, MOVDIR64B, AVX512BF16,
ENQCMD, AVX512VPOPCNTDQ, AVX5124FMAPS, AVX512VNNI, AVX5124VNNIW, SERIALIZE,
-UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL or CLDEMOTE extended
-instruction sets. Each has a corresponding @option{-mno-} option to disable
-use of these instructions.
+UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL, AVXVNNI or CLDEMOTE
+extended instruction sets. Each has a corresponding @option{-mno-} option to
+disable use of these instructions.
These extensions are also available as built-in functions: see
@ref{x86 Built-in Functions}, for details of the functions enabled and
@item avx2_runtime
Target supports the execution of @code{avx2} instructions.
+@item avxvnni
+Target supports the execution of @code{avxvnni} instructions.
+
@item avx512f
Target supports compiling @code{avx512f} instructions.
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl" } */
+/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni" } */
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl" } */
+/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni" } */
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-mavxvnni -O2" } */
+/* { dg-final { scan-assembler-times "\{vex\} vpdpbusd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\{vex\} vpdpbusd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\{vex\} vpdpbusds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\{vex\} vpdpbusds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\{vex\} vpdpwssd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\{vex\} vpdpwssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\{vex\} vpdpwssds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\{vex\} vpdpwssds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+
+
+#include <immintrin.h>
+
+volatile __m256i x,y,z;
+volatile __m128i x_,y_,z_;
+
+void extern
+avxvnni_test (void)
+{
+ x = _mm256_dpbusd_epi32 (x, y, z);
+ x_ = _mm_dpbusd_epi32 (x_, y_, z_);
+ x = _mm256_dpbusds_epi32 (x, y, z);
+ x_ = _mm_dpbusds_epi32 (x_, y_, z_);
+ x = _mm256_dpwssd_epi32 (x, y, z);
+ x_ = _mm_dpwssd_epi32 (x_, y_, z_);
+ x = _mm256_dpwssds_epi32 (x, y, z);
+ x_ = _mm_dpwssds_epi32 (x_, y_, z_);
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-times "\{vex\} vpdpbusd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\{vex\} vpdpbusd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\{vex\} vpdpbusds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\{vex\} vpdpbusds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\{vex\} vpdpwssd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\{vex\} vpdpwssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\{vex\} vpdpwssds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\{vex\} vpdpwssds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+
+
+#include <immintrin.h>
+
+volatile __m256i x,y,z;
+volatile __m128i x_,y_,z_;
+
+__attribute__((target("avxvnni")))
+void
+avxvnni_test (void)
+{
+ x = _mm256_dpbusd_epi32 (x, y, z);
+ x_ = _mm_dpbusd_epi32 (x_, y_, z_);
+ x = _mm256_dpbusds_epi32 (x, y, z);
+ x_ = _mm_dpbusds_epi32 (x_, y_, z_);
+ x = _mm256_dpwssd_epi32 (x, y, z);
+ x_ = _mm_dpwssd_epi32 (x_, y_, z_);
+ x = _mm256_dpwssds_epi32 (x, y, z);
+ x_ = _mm_dpwssds_epi32 (x_, y_, z_);
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64" } */
+
+__attribute__ ((__gnu_inline__, __always_inline__, target("avxvnni")))
+inline int
+foo (void) /* { dg-error "inlining failed in call to 'always_inline' .* target specific option mismatch" } */
+{
+ return 0;
+}
+
+__attribute__ ((target("avx512vnni,avx512vl")))
+int
+bar (void)
+{
+ return foo (); /* { dg-message "called from here" } */
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64" } */
+
+__attribute__ ((__gnu_inline__, __always_inline__, target("avx512vnni,avx512vl")))
+inline int
+foo (void) /* { dg-error "inlining failed in call to 'always_inline' .* target specific option mismatch" } */
+{
+ return 0;
+}
+
+__attribute__ ((target("avxvnni")))
+int
+bar (void)
+{
+ return foo (); /* { dg-message "called from here" } */
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavxvnni -mavx512vnni -mavx512vl" } */
+/* { dg-final { scan-assembler-times "\{vex\} vpdpbusd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\{vex\} vpdpbusd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\{vex\} vpdpbusds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\{vex\} vpdpbusds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\{vex\} vpdpwssd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\{vex\} vpdpwssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\{vex\} vpdpwssds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\{vex\} vpdpwssds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+
+
+#include <immintrin.h>
+
+volatile __m256i x,y,z;
+volatile __m128i x_,y_,z_;
+
+void
+avxvnni_test (void)
+{
+ x = _mm256_dpbusd_epi32 (x, y, z);
+ x_ = _mm_dpbusd_epi32 (x_, y_, z_);
+ x = _mm256_dpbusds_epi32 (x, y, z);
+ x_ = _mm_dpbusds_epi32 (x_, y_, z_);
+ x = _mm256_dpwssd_epi32 (x, y, z);
+ x_ = _mm_dpwssd_epi32 (x_, y_, z_);
+ x = _mm256_dpwssds_epi32 (x, y, z);
+ x_ = _mm_dpwssds_epi32 (x_, y_, z_);
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-mavxvnni -O2" } */
+/* { dg-final { scan-assembler-times "\{vex\} vpdpbusd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\{vex\} vpdpbusd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\{vex\} vpdpbusds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\{vex\} vpdpbusds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\{vex\} vpdpwssd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\{vex\} vpdpwssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\{vex\} vpdpwssds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\{vex\} vpdpwssds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+
+
+#include <immintrin.h>
+
+volatile __m256i x,y,z;
+volatile __m128i x_,y_,z_;
+
+void extern
+avxvnni_test (void)
+{
+ x = _mm256_dpbusd_avx_epi32 (x, y, z);
+ x_ = _mm_dpbusd_avx_epi32 (x_, y_, z_);
+ x = _mm256_dpbusds_avx_epi32 (x, y, z);
+ x_ = _mm_dpbusds_avx_epi32 (x_, y_, z_);
+ x = _mm256_dpwssd_avx_epi32 (x, y, z);
+ x_ = _mm_dpwssd_avx_epi32 (x_, y_, z_);
+ x = _mm256_dpwssds_avx_epi32 (x, y, z);
+ x_ = _mm_dpwssds_avx_epi32 (x_, y_, z_);
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavxvnni" } */
+/* { dg-require-effective-target avxvnni } */
+
+#ifndef CHECK
+#define CHECK "avx-check.h"
+#endif
+
+#ifndef TEST
+#define TEST avx_test
+#endif
+
+#include CHECK
+
+static void
+CALC (int *r, int *dst, unsigned char *s1, char *s2, int size)
+{
+ short tempres[32];
+ for (int i = 0; i < size; i++) {
+ tempres[i] = ((unsigned short)(s1[i]) * (short)(s2[i]));
+ }
+ for (int i = 0; i < size / 4; i++) {
+ long long test = (long long)dst[i] + tempres[i*4] + tempres[i*4 + 1] + tempres[i*4 + 2] + tempres[i*4 + 3];
+ r[i] = test;
+ }
+}
+
+void
+TEST (void)
+{
+ int i;
+ union256i_d res_256;
+ union256i_b src2_256;
+ union256i_ub src1_256;
+ int res_ref_256[8];
+
+ if (!__builtin_cpu_supports ("avxvnni"))
+ return;
+
+ for (i = 0; i < 32; i++)
+ {
+ int sign = i % 2 ? 1 : -1;
+ src1_256.a[i] = 10 + 3*i + sign;
+ src2_256.a[i] = sign*10*i*i;
+ }
+
+ for (i = 0; i < 8; i++)
+ res_256.a[i] = 0x7fffffff;
+
+ CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 32);
+ res_256.x = _mm256_dpbusd_avx_epi32 (res_256.x, src1_256.x, src2_256.x);
+ if (check_union256i_d (res_256, res_ref_256))
+ abort ();
+
+ union128i_d res_128;
+ union128i_b src2_128;
+ union128i_ub src1_128;
+ int res_ref_128[4];
+
+ for (i = 0; i < 16; i++)
+ {
+ int sign = i % 2 ? 1 : -1;
+ src1_128.a[i] = 10 + 3*i*i + sign;
+ src2_128.a[i] = sign*10*i*i;
+ }
+
+ for (i = 0; i < 4; i++)
+ res_128.a[i] = 0x7fffffff;
+
+ CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 16);
+ res_128.x = _mm_dpbusd_avx_epi32 (res_128.x, src1_128.x, src2_128.x);
+ if (check_union128i_d (res_128, res_ref_128))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavxvnni " } */
+/* { dg-require-effective-target avxvnni } */
+
+#ifndef CHECK
+#define CHECK "avx-check.h"
+#endif
+
+#ifndef TEST
+#define TEST avx_test
+#endif
+
+#include CHECK
+
+static void
+CALC (int *r, int *dst, unsigned char *s1, char *s2, int size)
+{
+ short tempres[32];
+ for (int i = 0; i < size; i++) {
+ tempres[i] = ((unsigned short)(s1[i]) * (short)(s2[i]));
+ }
+ for (int i = 0; i < size / 4; i++) {
+ long long test = (long long)dst[i] + tempres[i*4] + tempres[i*4 + 1] + tempres[i*4 + 2] + tempres[i*4 + 3];
+ r[i] = test > 0x7FFFFFFF ? 0x7FFFFFFF : test;
+ }
+}
+
+void
+TEST (void)
+{
+ int i;
+ union256i_d res_256;
+ union256i_b src2_256;
+ union256i_ub src1_256;
+ int res_ref_256[8];
+
+ if (!__builtin_cpu_supports ("avxvnni"))
+ return;
+
+ for (i = 0; i < 32; i++)
+ {
+ int sign = i % 2 ? 1 : -1;
+ src1_256.a[i] = 10 + 3*i*i + sign;
+ src2_256.a[i] = sign*10*i*i;
+ }
+
+ for (i = 0; i < 8; i++)
+ res_256.a[i] = 0x7fffffff;
+
+ CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 32);
+ res_256.x = _mm256_dpbusds_avx_epi32 (res_256.x, src1_256.x, src2_256.x);
+ if (check_union256i_d (res_256, res_ref_256))
+ abort ();
+
+ union128i_d res_128;
+ union128i_b src2_128;
+ union128i_ub src1_128;
+ int res_ref_128[4];
+
+ for (i = 0; i < 16; i++)
+ {
+ int sign = i % 2 ? 1 : -1;
+ src1_128.a[i] = 10 + 3*i*i + sign;
+ src2_128.a[i] = sign*10*i*i;
+ }
+
+ for (i = 0; i < 4; i++)
+ res_128.a[i] = 0x7fffffff;
+
+ CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 16);
+ res_128.x = _mm_dpbusds_avx_epi32 (res_128.x, src1_128.x, src2_128.x);
+ if (check_union128i_d (res_128, res_ref_128))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavxvnni" } */
+/* { dg-require-effective-target avxvnni } */
+
+#ifndef CHECK
+#define CHECK "avx-check.h"
+#endif
+
+#ifndef TEST
+#define TEST avx_test
+#endif
+
+#include CHECK
+
+static void
+CALC (int *r, int *dst, short *s1, short *s2, int size)
+{
+ short tempres[16];
+ for (int i = 0; i < size; i++) {
+ tempres[i] = ((int)(s1[i]) * (int)(s2[i]));
+ }
+ for (int i = 0; i < size / 2; i++) {
+ long long test = (long long)dst[i] + tempres[i*2] + tempres[i*2 + 1];
+ r[i] = test;
+ }
+}
+
+void
+TEST (void)
+{
+ int i;
+ union256i_d res_256;
+ union256i_w src1_256, src2_256;
+ int res_ref_256[8];
+
+ if (!__builtin_cpu_supports ("avxvnni"))
+ return;
+
+ for (i = 0; i < 16; i++)
+ {
+ src1_256.a[i] = 1 + i;
+ src2_256.a[i] = 2 + 2*i + i * i;
+ }
+
+ for (i = 0; i < 8; i++)
+ res_256.a[i] = 0x7fffffff;
+
+ CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 16);
+ res_256.x = _mm256_dpwssd_avx_epi32 (res_256.x, src1_256.x, src2_256.x);
+ if (check_union256i_d (res_256, res_ref_256))
+ abort ();
+
+ union128i_d res_128;
+ union128i_w src1_128, src2_128;
+ int res_ref_128[4];
+
+ for (i = 0; i < 8; i++)
+ {
+ src1_128.a[i] = 1 + i;
+ src2_128.a[i] = 2 + 2*i + i * i;
+ }
+
+ for (i = 0; i < 4; i++)
+ res_128.a[i] = 0x7fffffff;
+
+ CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 8);
+ res_128.x = _mm_dpwssd_avx_epi32 (res_128.x, src1_128.x, src2_128.x);
+ if (check_union128i_d (res_128, res_ref_128))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavxvnni" } */
+/* { dg-require-effective-target avxvnni } */
+
+#ifndef CHECK
+#define CHECK "avx-check.h"
+#endif
+
+#ifndef TEST
+#define TEST avx_test
+#endif
+
+#include CHECK
+
+static void
+CALC (int *r, int *dst, short *s1, short *s2, int size)
+{
+ short tempres[16];
+ for (int i = 0; i < size; i++) {
+ tempres[i] = ((int)(s1[i]) * (int)(s2[i]));
+ }
+ for (int i = 0; i < size / 2; i++) {
+ long long test = (long long)dst[i] + tempres[i*2] + tempres[i*2 + 1];
+ r[i] = test > 0x7FFFFFFF ? 0x7FFFFFFF : test;
+ }
+}
+
+void
+TEST (void)
+{
+ int i;
+ union256i_d res_256;
+ union256i_w src1_256, src2_256;
+ int res_ref_256[8];
+
+ if (!__builtin_cpu_supports ("avxvnni"))
+ return;
+
+ for (i = 0; i < 16; i++)
+ {
+ src1_256.a[i] = 1 + i;
+ src2_256.a[i] = 2 + 2*i + i * i;
+ }
+
+ for (i = 0; i < 8; i++)
+ res_256.a[i] = 0x7fffffff;
+
+ CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 16);
+ res_256.x = _mm256_dpwssds_avx_epi32 (res_256.x, src1_256.x, src2_256.x);
+ if (check_union256i_d (res_256, res_ref_256))
+ abort ();
+
+ union128i_d res_128;
+ union128i_w src1_128, src2_128;
+ int res_ref_128[4];
+
+ for (i = 0; i < 8; i++)
+ {
+ src1_128.a[i] = 1 + i;
+ src2_128.a[i] = 2 + 2*i + i * i;
+ }
+
+ for (i = 0; i < 4; i++)
+ res_128.a[i] = 0x7fffffff;
+
+ CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 8);
+ res_128.x = _mm_dpwssds_avx_epi32 (res_128.x, src1_128.x, src2_128.x);
+ if (check_union128i_d (res_128, res_ref_128))
+ abort ();
+}
+++ /dev/null
-/* { dg-do compile } */
-/* { dg-options "-mavx512vl -mavx512vnni -mavx512bw -O2" } */
-/* { dg-final { scan-assembler-times "vpdpbusd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpdpbusd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpdpbusd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpdpbusd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpdpbusd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpdpbusd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpdpbusds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpdpbusds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpdpbusds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpdpbusds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpdpbusds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpdpbusds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpdpwssd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpdpwssd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpdpwssd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpdpwssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpdpwssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpdpwssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpdpwssds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpdpwssds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpdpwssds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpdpwssds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpdpwssds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpdpwssds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
-
-
-#include <immintrin.h>
-
-volatile __m256i x,y,z;
-volatile __m128i x_,y_,z_;
-volatile __mmask32 m;
-
-void extern
-avx512f_test (void)
-{
- x = _mm256_dpbusd_epi32 (x, y, z);
- x = _mm256_mask_dpbusd_epi32 (x, m, y, z);
- x = _mm256_maskz_dpbusd_epi32 (m, x, y, z);
-
- x_ = _mm_dpbusd_epi32 (x_, y_, z_);
- x_ = _mm_mask_dpbusd_epi32 (x_, m, y_, z_);
- x_ = _mm_maskz_dpbusd_epi32 (m, x_, y_, z_);
-
- x = _mm256_dpbusds_epi32 (x, y, z);
- x = _mm256_mask_dpbusds_epi32 (x, m, y, z);
- x = _mm256_maskz_dpbusds_epi32 (m, x, y, z);
-
- x_ = _mm_dpbusds_epi32 (x_, y_, z_);
- x_ = _mm_mask_dpbusds_epi32 (x_, m, y_, z_);
- x_ = _mm_maskz_dpbusds_epi32 (m, x_, y_, z_);
-
- x = _mm256_dpwssd_epi32 (x, y, z);
- x = _mm256_mask_dpwssd_epi32 (x, m, y, z);
- x = _mm256_maskz_dpwssd_epi32 (m, x, y, z);
-
- x_ = _mm_dpwssd_epi32 (x_, y_, z_);
- x_ = _mm_mask_dpwssd_epi32 (x_, m, y_, z_);
- x_ = _mm_maskz_dpwssd_epi32 (m, x_, y_, z_);
-
- x = _mm256_dpwssds_epi32 (x, y, z);
- x = _mm256_mask_dpwssds_epi32 (x, m, y, z);
- x = _mm256_maskz_dpwssds_epi32 (m, x, y, z);
-
- x_ = _mm_dpwssds_epi32 (x_, y_, z_);
- x_ = _mm_mask_dpwssds_epi32 (x_, m, y_, z_);
- x_ = _mm_maskz_dpwssds_epi32 (m, x_, y_, z_);
-}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -mavx512vnni -mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "vpdpbusd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpbusd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpbusd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpbusd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpbusd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpbusd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpbusds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpbusds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpbusds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpbusds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpbusds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpbusds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpwssd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpwssd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpwssd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpwssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpwssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpwssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpwssds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpwssds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpwssds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpwssds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpwssds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpwssds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+
+
+#include <immintrin.h>
+
+volatile __m256i x,y,z;
+volatile __m128i x_,y_,z_;
+volatile __mmask32 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm256_dpbusd_epi32 (x, y, z);
+ x = _mm256_mask_dpbusd_epi32 (x, m, y, z);
+ x = _mm256_maskz_dpbusd_epi32 (m, x, y, z);
+
+ x_ = _mm_dpbusd_epi32 (x_, y_, z_);
+ x_ = _mm_mask_dpbusd_epi32 (x_, m, y_, z_);
+ x_ = _mm_maskz_dpbusd_epi32 (m, x_, y_, z_);
+
+ x = _mm256_dpbusds_epi32 (x, y, z);
+ x = _mm256_mask_dpbusds_epi32 (x, m, y, z);
+ x = _mm256_maskz_dpbusds_epi32 (m, x, y, z);
+
+ x_ = _mm_dpbusds_epi32 (x_, y_, z_);
+ x_ = _mm_mask_dpbusds_epi32 (x_, m, y_, z_);
+ x_ = _mm_maskz_dpbusds_epi32 (m, x_, y_, z_);
+
+ x = _mm256_dpwssd_epi32 (x, y, z);
+ x = _mm256_mask_dpwssd_epi32 (x, m, y, z);
+ x = _mm256_maskz_dpwssd_epi32 (m, x, y, z);
+
+ x_ = _mm_dpwssd_epi32 (x_, y_, z_);
+ x_ = _mm_mask_dpwssd_epi32 (x_, m, y_, z_);
+ x_ = _mm_maskz_dpwssd_epi32 (m, x_, y_, z_);
+
+ x = _mm256_dpwssds_epi32 (x, y, z);
+ x = _mm256_mask_dpwssds_epi32 (x, m, y, z);
+ x = _mm256_maskz_dpwssds_epi32 (m, x, y, z);
+
+ x_ = _mm_dpwssds_epi32 (x_, y_, z_);
+ x_ = _mm_mask_dpwssds_epi32 (x_, m, y_, z_);
+ x_ = _mm_maskz_dpwssds_epi32 (m, x_, y_, z_);
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -mavx512vnni -mavx512bw -mavxvnni -O2" } */
+/* { dg-final { scan-assembler-times "\{vex\} vpdpbusd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpbusd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpbusd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\{vex\} vpdpbusd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpbusd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpbusd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\{vex\} vpdpbusds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpbusds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpbusds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\{vex\} vpdpbusds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpbusds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpbusds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\{vex\} vpdpwssd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpwssd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpwssd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\{vex\} vpdpwssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpwssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpwssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\{vex\} vpdpwssds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpwssds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpwssds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\{vex\} vpdpwssds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpwssds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpdpwssds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+
+
+#include <immintrin.h>
+
+volatile __m256i x,y,z;
+volatile __m128i x_,y_,z_;
+volatile __mmask32 m;
+
+void extern
+avx512f_test (void)
+{
+ x = _mm256_dpbusd_epi32 (x, y, z);
+ x = _mm256_mask_dpbusd_epi32 (x, m, y, z);
+ x = _mm256_maskz_dpbusd_epi32 (m, x, y, z);
+
+ x_ = _mm_dpbusd_epi32 (x_, y_, z_);
+ x_ = _mm_mask_dpbusd_epi32 (x_, m, y_, z_);
+ x_ = _mm_maskz_dpbusd_epi32 (m, x_, y_, z_);
+
+ x = _mm256_dpbusds_epi32 (x, y, z);
+ x = _mm256_mask_dpbusds_epi32 (x, m, y, z);
+ x = _mm256_maskz_dpbusds_epi32 (m, x, y, z);
+
+ x_ = _mm_dpbusds_epi32 (x_, y_, z_);
+ x_ = _mm_mask_dpbusds_epi32 (x_, m, y_, z_);
+ x_ = _mm_maskz_dpbusds_epi32 (m, x_, y_, z_);
+
+ x = _mm256_dpwssd_epi32 (x, y, z);
+ x = _mm256_mask_dpwssd_epi32 (x, m, y, z);
+ x = _mm256_maskz_dpwssd_epi32 (m, x, y, z);
+
+ x_ = _mm_dpwssd_epi32 (x_, y_, z_);
+ x_ = _mm_mask_dpwssd_epi32 (x_, m, y_, z_);
+ x_ = _mm_maskz_dpwssd_epi32 (m, x_, y_, z_);
+
+ x = _mm256_dpwssds_epi32 (x, y, z);
+ x = _mm256_mask_dpwssds_epi32 (x, m, y, z);
+ x = _mm256_maskz_dpwssds_epi32 (m, x, y, z);
+
+ x_ = _mm_dpwssds_epi32 (x_, y_, z_);
+ x_ = _mm_mask_dpwssds_epi32 (x_, m, y_, z_);
+ x_ = _mm_maskz_dpwssds_epi32 (m, x_, y_, z_);
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-times "\\tvpdpbusd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\\tvpdpbusd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\\tvpdpbusds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\\tvpdpbusds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\\tvpdpwssd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\\tvpdpwssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\\tvpdpwssds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\\tvpdpwssds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+
+
+#include <immintrin.h>
+
+volatile __m256i x,y,z;
+volatile __m128i x_,y_,z_;
+
+__attribute__((target("avx512vnni,avx512vl")))
+void
+avxvnni_test (void)
+{
+ x = _mm256_dpbusd_epi32 (x, y, z);
+ x_ = _mm_dpbusd_epi32 (x_, y_, z_);
+ x = _mm256_dpbusds_epi32 (x, y, z);
+ x_ = _mm_dpbusds_epi32 (x_, y_, z_);
+ x = _mm256_dpwssd_epi32 (x, y, z);
+ x_ = _mm_dpwssd_epi32 (x_, y_, z_);
+ x = _mm256_dpwssds_epi32 (x, y, z);
+ x_ = _mm_dpwssds_epi32 (x_, y_, z_);
+}
--- /dev/null
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavxvnni -mavx512vnni -mavx512vl" } */
+/* { dg-final { scan-assembler-times "\\tvpdpbusd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\\tvpdpbusd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\\tvpdpbusds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\\tvpdpbusds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\\tvpdpwssd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\\tvpdpwssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\\tvpdpwssds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "\\tvpdpwssds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+
+
+#include <immintrin.h>
+
+volatile __m256i x,y,z;
+volatile __m128i x_,y_,z_;
+
+void
+avxvnni_test (void)
+{
+ register __m256i a __asm ("xmm16");
+ register __m128i a_ __asm ("xmm26");
+ a = _mm256_dpbusd_epi32 (x, y, z);
+ asm volatile ("" : "+v" (a));
+ x = a;
+ a_ = _mm_dpbusd_epi32 (x_, y_, z_);
+ asm volatile ("" : "+v" (a_));
+ x_ = a_;
+ a = _mm256_dpbusds_epi32 (x, y, z);
+ asm volatile ("" : "+v" (a));
+ x = a;
+ a_ = _mm_dpbusds_epi32 (x_, y_, z_);
+ asm volatile ("" : "+v" (a_));
+ x_ = a_;
+ a = _mm256_dpwssd_epi32 (x, y, z);
+ asm volatile ("" : "+v" (a));
+ x = a;
+ a_ = _mm_dpwssd_epi32 (x_, y_, z_);
+ asm volatile ("" : "+v" (a_));
+ x_ = a_;
+ a = _mm256_dpwssds_epi32 (x, y, z);
+ asm volatile ("" : "+v" (a));
+ x = a;
+ a_ = _mm_dpwssds_epi32 (x_, y_, z_);
+ asm volatile ("" : "+v" (a_));
+ x_ = a_;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O0 -mno-avxvnni -mavx512vnni -mavx512vl" } */
+typedef int v8si __attribute__ ((vector_size (32)));
+v8si
+foo (v8si a, v8si b, v8si c)
+{
+ return __builtin_ia32_vpdpbusd_v8si (a, b, c);
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O0 -mavxvnni -mno-avx512vnni" } */
+typedef int v8si __attribute__ ((vector_size (32)));
+v8si
+foo (v8si a, v8si b, v8si c)
+{
+ return __builtin_ia32_vpdpbusd_v8si (a, b, c);
+}
extern void test_hreset (void) __attribute__((__target__("hreset")));
extern void test_keylocker (void) __attribute__((__target__("kl")));
extern void test_widekl (void) __attribute__((__target__("widekl")));
+extern void test_avxvnni (void) __attribute__((__target__("avxvnni")));
extern void test_no_sgx (void) __attribute__((__target__("no-sgx")));
extern void test_no_avx5124fmaps(void) __attribute__((__target__("no-avx5124fmaps")));
extern void test_no_hreset (void) __attribute__((__target__("no-hreset")));
extern void test_no_keylocker (void) __attribute__((__target__("no-kl")));
extern void test_no_widekl (void) __attribute__((__target__("no-widekl")));
+extern void test_no_avxvnni (void) __attribute__((__target__("no-avxvnni")));
extern void test_arch_nocona (void) __attribute__((__target__("arch=nocona")));
extern void test_arch_core2 (void) __attribute__((__target__("arch=core2")));
popcntintrin.h gfniintrin.h and mm_malloc.h are usable
with -O -std=c89 -pedantic-errors. */
/* { dg-do compile } */
-/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl" } */
+/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni" } */
#include <x86intrin.h>
/* { dg-do compile } */
-/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl" } */
+/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>
/* { dg-do compile } */
-/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl" } */
+/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>
#ifndef DIFFERENT_PRAGMAS
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl")
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni")
#endif
/* Following intrinsics require immediate arguments. They
/* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */
#ifdef DIFFERENT_PRAGMAS
-#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl")
+#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni")
#endif
#include <immintrin.h>
test_1 (_cvtss_sh, unsigned short, float, 1)
#define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1)
#define __builtin_ia32_vpclmulqdq_v8di(A, B, C) __builtin_ia32_vpclmulqdq_v8di(A, B, 1)
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl")
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni")
#include <x86intrin.h>
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O0 -mavx512vnni -mavx512vl -mno-popcnt" } */
+
+inline int __attribute__ ((__gnu_inline__, __always_inline__, target("popcnt")))
+foo () /* { dg-error "inlining failed in call to 'always_inline' .* target specific option mismatch" } */
+{
+ return 0;
+}
+
+int bar()
+{
+ return foo (); /* { dg-message "called from here" } */
+}
} "-O0 -mavx2" ]
}
+# Return 1 if avxvnni instructions can be compiled.
+proc check_effective_target_avxvnni { } {
+ return [check_no_compiler_messages avxvnni object {
+ typedef int __v8si __attribute__ ((__vector_size__ (32)));
+ __v8si
+ _mm256_dpbusd_epi32 (__v8si __A, __v8si __B, __v8si __C)
+ {
+ return __builtin_ia32_vpdpbusd_v8si (__A, __B, __C);
+ }
+ } "-mavxvnni" ]
+}
+
# Return 1 if sse instructions can be compiled.
proc check_effective_target_sse { } {
return [check_no_compiler_messages sse object {