[(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") TI])
(define_mode_iterator VI12_AVX2
+ [(V32QI "TARGET_AVX2") V16QI
+ (V16HI "TARGET_AVX2") V8HI])
+
+(define_mode_iterator VI12_AVX2_AVX512BW
[(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
(set_attr "mode" "<sseinsnmode>")])
(define_expand "<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
- [(set (match_operand:VI12_AVX2 0 "register_operand")
- (sat_plusminus:VI12_AVX2
- (match_operand:VI12_AVX2 1 "vector_operand")
- (match_operand:VI12_AVX2 2 "vector_operand")))]
+ [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand")
+ (sat_plusminus:VI12_AVX2_AVX512BW
+ (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand")
+ (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand")))]
"TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
(define_insn "*<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
- [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
- (sat_plusminus:VI12_AVX2
- (match_operand:VI12_AVX2 1 "vector_operand" "<comm>0,v")
- (match_operand:VI12_AVX2 2 "vector_operand" "xBm,vm")))]
+ [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand" "=x,v")
+ (sat_plusminus:VI12_AVX2_AVX512BW
+ (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand" "<comm>0,v")
+ (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand" "xBm,vm")))]
"TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
&& ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
"@
(set_attr "prefix" "orig,maybe_evex")
(set_attr "mode" "TI")])
+;; PR96906 - optimize psubusw compared to 0 into pminuw compared to op0.
+(define_split
+ [(set (match_operand:VI12_AVX2 0 "register_operand")
+ (eq:VI12_AVX2
+ (us_minus:VI12_AVX2
+ (match_operand:VI12_AVX2 1 "vector_operand")
+ (match_operand:VI12_AVX2 2 "vector_operand"))
+ (match_operand:VI12_AVX2 3 "const0_operand")))]
+ "TARGET_SSE2
+ && (<MODE>mode != V8HImode || TARGET_SSE4_1)
+ && ix86_binary_operator_ok (US_MINUS, <MODE>mode, operands)"
+ [(set (match_dup 4)
+ (umin:VI12_AVX2 (match_dup 1) (match_dup 2)))
+ (set (match_dup 0)
+ (eq:VI12_AVX2 (match_dup 4) (match_dup 1)))]
+ "operands[4] = gen_reg_rtx (<MODE>mode);")
+
(define_expand "mulv8qi3"
[(set (match_operand:V8QI 0 "register_operand")
(mult:V8QI (match_operand:V8QI 1 "register_operand")
})
(define_expand "uavg<mode>3_ceil"
- [(set (match_operand:VI12_AVX2 0 "register_operand")
- (truncate:VI12_AVX2
+ [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand")
+ (truncate:VI12_AVX2_AVX512BW
(lshiftrt:<ssedoublemode>
(plus:<ssedoublemode>
(plus:<ssedoublemode>
(zero_extend:<ssedoublemode>
- (match_operand:VI12_AVX2 1 "vector_operand"))
+ (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand"))
(zero_extend:<ssedoublemode>
- (match_operand:VI12_AVX2 2 "vector_operand")))
+ (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand")))
(match_dup 3))
(const_int 1))))]
"TARGET_SSE2"
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_expand "<sse2_avx2>_uavg<mode>3<mask_name>"
- [(set (match_operand:VI12_AVX2 0 "register_operand")
- (truncate:VI12_AVX2
+ [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand")
+ (truncate:VI12_AVX2_AVX512BW
(lshiftrt:<ssedoublemode>
(plus:<ssedoublemode>
(plus:<ssedoublemode>
(zero_extend:<ssedoublemode>
- (match_operand:VI12_AVX2 1 "vector_operand"))
+ (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand"))
(zero_extend:<ssedoublemode>
- (match_operand:VI12_AVX2 2 "vector_operand")))
+ (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand")))
(match_dup <mask_expand_op3>))
(const_int 1))))]
"TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
})
(define_insn "*<sse2_avx2>_uavg<mode>3<mask_name>"
- [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
- (truncate:VI12_AVX2
+ [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand" "=x,v")
+ (truncate:VI12_AVX2_AVX512BW
(lshiftrt:<ssedoublemode>
(plus:<ssedoublemode>
(plus:<ssedoublemode>
(zero_extend:<ssedoublemode>
- (match_operand:VI12_AVX2 1 "vector_operand" "%0,v"))
+ (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand" "%0,v"))
(zero_extend:<ssedoublemode>
- (match_operand:VI12_AVX2 2 "vector_operand" "xBm,vm")))
+ (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand" "xBm,vm")))
(match_operand:<ssedoublemode> <mask_expand_op3> "const1_operand"))
(const_int 1))))]
"TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
--- /dev/null
+/* PR target/96906 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx2" } */
+/* { dg-final { scan-assembler-times "\tvpminub\[^\n\r]*xmm" 2 } } */
+/* { dg-final { scan-assembler-times "\tvpminuw\[^\n\r]*xmm" 2 } } */
+/* { dg-final { scan-assembler-times "\tvpminub\[^\n\r]*ymm" 2 } } */
+/* { dg-final { scan-assembler-times "\tvpminuw\[^\n\r]*ymm" 2 } } */
+/* { dg-final { scan-assembler-times "\tvpcmpeqb\[^\n\r]*xmm" 2 } } */
+/* { dg-final { scan-assembler-times "\tvpcmpeqw\[^\n\r]*xmm" 2 } } */
+/* { dg-final { scan-assembler-times "\tvpcmpeqb\[^\n\r]*ymm" 2 } } */
+/* { dg-final { scan-assembler-times "\tvpcmpeqw\[^\n\r]*ymm" 2 } } */
+/* { dg-final { scan-assembler-not "\tvpsubus\[bw]" } } */
+
+#include <x86intrin.h>
+
+__m128i
+f1 (__m128i x, __m128i y)
+{
+ return _mm_cmpeq_epi16 (_mm_subs_epu16 (x, y), _mm_setzero_si128 ());
+}
+
+__m128i
+f2 (__m128i x, __m128i y)
+{
+ return _mm_cmpeq_epi16 (_mm_min_epu16 (x, y), x);
+}
+
+__m128i
+f3 (__m128i x, __m128i y)
+{
+ return _mm_cmpeq_epi8 (_mm_subs_epu8 (x, y), _mm_setzero_si128 ());
+}
+
+__m128i
+f4 (__m128i x, __m128i y)
+{
+ return _mm_cmpeq_epi8 (_mm_min_epu8 (x, y), x);
+}
+
+__m256i
+f5 (__m256i x, __m256i y)
+{
+ return _mm256_cmpeq_epi16 (_mm256_subs_epu16 (x, y), _mm256_setzero_si256 ());
+}
+
+__m256i
+f6 (__m256i x, __m256i y)
+{
+ return _mm256_cmpeq_epi16 (_mm256_min_epu16 (x, y), x);
+}
+
+__m256i
+f7 (__m256i x, __m256i y)
+{
+ return _mm256_cmpeq_epi8 (_mm256_subs_epu8 (x, y), _mm256_setzero_si256 ());
+}
+
+__m256i
+f8 (__m256i x, __m256i y)
+{
+ return _mm256_cmpeq_epi8 (_mm256_min_epu8 (x, y), x);
+}