+2020-01-31 Uroš Bizjak <ubizjak@gmail.com>
+
+ * config/i386/i386.md (*movoi_internal_avx): Do not check for
+ TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL. Remove MODE_V8SF handling.
+ (*movti_internal): Do not check for
+ TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL.
+ (*movtf_internal): Move check for TARGET_SSE2 and size optimization
+ just after check for TARGET_AVX.
+ (*movdf_internal): Ditto.
+ * config/i386/mmx.md (*mov<mode>_internal): Do not check for
+ TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL.
+ * config/i386/sse.md (mov<mode>_internal): Only check
+ TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL with V2DFmode. Move check
+ for TARGET_SSE2 and size optimization just after check for TARGET_AVX.
+ (<sse>_andnot<mode>3<mask_name>): Move check for
+ TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL after check for TARGET_AVX.
+ (<code><mode>3<mask_name>): Ditto.
+ (*andnot<mode>3): Ditto.
+ (*andnottf3): Ditto.
+ (*<code><mode>3): Ditto.
+ (*<code>tf3): Ditto.
+ (*andnot<VI:mode>3): Remove
+ TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL handling.
+ (<mask_codefor><code><VI48_AVX_AVX512F:mode>3<mask_name>): Ditto.
+ (*<code><VI12_AVX_AVX512F:mode>3): Ditto.
+ (sse4_1_blendv<ssemodesuffix>): Ditto.
+ * config/i386/x86-tune.def (X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL):
+ Explain that tune applies to 128bit instructions only.
+
2020-01-31 Kwok Cheung Yeung <kcy@codesourcery.com>
* config/gcn/mkoffload.c (process_asm): Add sgpr_count and vgpr_count
if (misaligned_operand (operands[0], OImode)
|| misaligned_operand (operands[1], OImode))
{
- if (get_attr_mode (insn) == MODE_V8SF)
- return "vmovups\t{%1, %0|%0, %1}";
- else if (get_attr_mode (insn) == MODE_XI)
+ if (get_attr_mode (insn) == MODE_XI)
return "vmovdqu32\t{%1, %0|%0, %1}";
else
return "vmovdqu\t{%1, %0|%0, %1}";
}
else
{
- if (get_attr_mode (insn) == MODE_V8SF)
- return "vmovaps\t{%1, %0|%0, %1}";
- else if (get_attr_mode (insn) == MODE_XI)
+ if (get_attr_mode (insn) == MODE_XI)
return "vmovdqa32\t{%1, %0|%0, %1}";
else
return "vmovdqa\t{%1, %0|%0, %1}";
(and (eq_attr "alternative" "1")
(match_test "TARGET_AVX512VL"))
(const_string "XI")
- (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
- (const_string "V8SF")
]
(const_string "OI")))])
(match_test "TARGET_AVX")
(const_string "TI")
(ior (not (match_test "TARGET_SSE2"))
- (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
- (and (eq_attr "alternative" "5")
- (match_test "TARGET_SSE_TYPELESS_STORES"))))
+ (match_test "optimize_function_for_size_p (cfun)"))
(const_string "V4SF")
- (match_test "optimize_function_for_size_p (cfun)")
+ (and (eq_attr "alternative" "5")
+ (match_test "TARGET_SSE_TYPELESS_STORES"))
(const_string "V4SF")
]
(const_string "TI")))
(cond [(ior (match_operand 0 "ext_sse_reg_operand")
(match_operand 1 "ext_sse_reg_operand"))
(const_string "TI")
- (ior (not (match_test "TARGET_SSE2"))
- (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
- (const_string "V4SF")
(match_test "TARGET_AVX")
(const_string "TI")
- (match_test "optimize_function_for_size_p (cfun)")
+ (ior (not (match_test "TARGET_SSE2"))
+ (match_test "optimize_function_for_size_p (cfun)"))
(const_string "V4SF")
]
(const_string "TI"))
(cond [(ior (match_operand 0 "ext_sse_reg_operand")
(match_operand 1 "ext_sse_reg_operand"))
(const_string "XI")
- (ior (not (match_test "TARGET_SSE2"))
- (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
- (const_string "V4SF")
(match_test "TARGET_AVX")
(const_string "TI")
- (match_test "optimize_function_for_size_p (cfun)")
+ (ior (not (match_test "TARGET_SSE2"))
+ (match_test "optimize_function_for_size_p (cfun)"))
(const_string "V4SF")
]
(const_string "TI"))
(const_string "DI")
(match_test "TARGET_AVX")
(const_string "TI")
+ (ior (not (match_test "TARGET_SSE2"))
+ (match_test "optimize_function_for_size_p (cfun)"))
+ (const_string "V4SF")
(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
(const_string "V4SF")
(and (eq_attr "alternative" "2")
(match_test "TARGET_SSE_TYPELESS_STORES"))
(const_string "V4SF")
- (ior (not (match_test "TARGET_SSE2"))
- (match_test "optimize_function_for_size_p (cfun)"))
- (const_string "V4SF")
]
(const_string "TI")))])
/* xorps is one byte shorter for non-AVX targets. */
(eq_attr "alternative" "12,16")
- (cond [(not (match_test "TARGET_SSE2"))
- (const_string "V4SF")
- (and (match_test "TARGET_AVX512F")
- (not (match_test "TARGET_PREFER_AVX256")))
+ (cond [(and (match_test "TARGET_AVX512F")
+ (not (match_test "TARGET_PREFER_AVX256")))
(const_string "XI")
(match_test "TARGET_AVX")
(const_string "V2DF")
- (match_test "optimize_function_for_size_p (cfun)")
+ (ior (not (match_test "TARGET_SSE2"))
+ (match_test "optimize_function_for_size_p (cfun)"))
(const_string "V4SF")
(match_test "TARGET_SSE_LOAD0_BY_PXOR")
(const_string "TI")
(ior (match_operand 0 "ext_sse_reg_operand")
(match_operand 1 "ext_sse_reg_operand")))
(const_string "V8DF")
+ (match_test "TARGET_AVX")
+ (const_string "DF")
(ior (not (match_test "TARGET_SSE2"))
- (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
+ (match_test "optimize_function_for_size_p (cfun)"))
+ (const_string "V4SF")
+ (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
(const_string "V4SF")
(match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
(const_string "V2DF")
- (match_test "TARGET_AVX")
- (const_string "DF")
- (match_test "optimize_function_for_size_p (cfun)")
- (const_string "V4SF")
]
(const_string "DF"))
(eq_attr "alternative" "11")
(const_string "DI")
(eq_attr "alternative" "5")
- (cond [(not (match_test "TARGET_SSE2"))
- (const_string "V4SF")
- (and (match_test "TARGET_AVX512F")
- (not (match_test "TARGET_PREFER_AVX256")))
+ (cond [(and (match_test "TARGET_AVX512F")
+ (not (match_test "TARGET_PREFER_AVX256")))
(const_string "V16SF")
(match_test "TARGET_AVX")
(const_string "V4SF")
- (match_test "optimize_function_for_size_p (cfun)")
+ (ior (not (match_test "TARGET_SSE2"))
+ (match_test "optimize_function_for_size_p (cfun)"))
(const_string "V4SF")
- (match_test "TARGET_SSE_LOAD0_BY_PXOR")
+ (match_test "TARGET_SSE_LOAD0_BY_PXOR")
(const_string "TI")
]
(const_string "V4SF"))
(match_test "<MODE>mode == V2SFmode")
(const_string "V4SF")
(ior (not (match_test "TARGET_SSE2"))
- (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
- (const_string "V4SF")
- (match_test "TARGET_AVX")
- (const_string "TI")
- (match_test "optimize_function_for_size_p (cfun)")
+ (match_test "optimize_function_for_size_p (cfun)"))
(const_string "V4SF")
]
(const_string "TI"))
(const_string "<sseinsnmode>")
(match_test "TARGET_AVX")
(const_string "<sseinsnmode>")
- (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
- (and (eq_attr "alternative" "3")
- (match_test "TARGET_SSE_TYPELESS_STORES")))
- (const_string "<ssePSmode>")
(ior (not (match_test "TARGET_SSE2"))
(match_test "optimize_function_for_size_p (cfun)"))
(const_string "V4SF")
+ (and (match_test "<MODE>mode == V2DFmode")
+ (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
+ (const_string "V4SF")
+ (and (eq_attr "alternative" "3")
+ (match_test "TARGET_SSE_TYPELESS_STORES"))
+ (const_string "V4SF")
(and (eq_attr "alternative" "0")
(match_test "TARGET_SSE_LOAD0_BY_PXOR"))
(const_string "TI")
(const_string "<sseintvecmode2>")
(eq_attr "alternative" "3")
(const_string "<sseintvecmode2>")
- (and (match_test "<MODE_SIZE> == 16")
- (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
- (const_string "<ssePSmode>")
(match_test "TARGET_AVX")
(const_string "<MODE>")
(match_test "optimize_function_for_size_p (cfun)")
(const_string "V4SF")
- ]
- (const_string "<MODE>")))])
-
+ (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+ (const_string "V4SF")
+ ]
+ (const_string "<MODE>")))])
(define_insn "<sse>_andnot<mode>3<mask_name>"
[(set (match_operand:VF_512 0 "register_operand" "=v")
(const_string "<sseintvecmode2>")
(eq_attr "alternative" "3")
(const_string "<sseintvecmode2>")
- (and (match_test "<MODE_SIZE> == 16")
- (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
- (const_string "<ssePSmode>")
(match_test "TARGET_AVX")
(const_string "<MODE>")
(match_test "optimize_function_for_size_p (cfun)")
(const_string "V4SF")
- ]
- (const_string "<MODE>")))])
+ (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+ (const_string "V4SF")
+ ]
+ (const_string "<MODE>")))])
(define_insn "*<code><mode>3<mask_name>"
[(set (match_operand:VF_512 0 "register_operand" "=v")
(if_then_else (match_test "TARGET_AVX512DQ")
(const_string "<avx512fvecmode>")
(const_string "XI"))
- (and (match_test "<MODE_SIZE> == 16")
- (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
- (const_string "V4SF")
(match_test "TARGET_AVX")
(const_string "<ssevecmode>")
(match_test "optimize_function_for_size_p (cfun)")
(const_string "V4SF")
- ]
- (const_string "<ssevecmode>")))])
+ (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+ (const_string "V4SF")
+ ]
+ (const_string "<ssevecmode>")))])
(define_insn "*andnottf3"
[(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
(const_string "TI")
(eq_attr "alternative" "3")
(const_string "XI")
- (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
- (const_string "V4SF")
(match_test "TARGET_AVX")
(const_string "TI")
(ior (not (match_test "TARGET_SSE2"))
(match_test "optimize_function_for_size_p (cfun)"))
(const_string "V4SF")
- ]
- (const_string "TI")))])
+ (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+ (const_string "V4SF")
+ ]
+ (const_string "TI")))])
(define_insn "*<code><mode>3"
[(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
(if_then_else (match_test "TARGET_AVX512DQ")
(const_string "<avx512fvecmode>")
(const_string "XI"))
- (and (match_test "<MODE_SIZE> == 16")
- (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
- (const_string "V4SF")
(match_test "TARGET_AVX")
(const_string "<ssevecmode>")
(match_test "optimize_function_for_size_p (cfun)")
(const_string "V4SF")
- ]
- (const_string "<ssevecmode>")))])
+ (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+ (const_string "V4SF")
+ ]
+ (const_string "<ssevecmode>")))])
(define_expand "<code>tf3"
[(set (match_operand:TF 0 "register_operand")
(const_string "TI")
(eq_attr "alternative" "3")
(const_string "QI")
- (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
- (const_string "V4SF")
(match_test "TARGET_AVX")
(const_string "TI")
(ior (not (match_test "TARGET_SSE2"))
(match_test "optimize_function_for_size_p (cfun)"))
(const_string "V4SF")
- ]
- (const_string "TI")))])
+ (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+ (const_string "V4SF")
+ ]
+ (const_string "TI")))])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
(const_string "*")))
(set_attr "prefix" "orig,vex,evex")
(set (attr "mode")
- (cond [(and (match_test "<MODE_SIZE> == 16")
- (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
- (const_string "<ssePSmode>")
- (match_test "TARGET_AVX2")
+ (cond [(match_test "TARGET_AVX2")
(const_string "<sseinsnmode>")
(match_test "TARGET_AVX")
(if_then_else
(const_string "*")))
(set_attr "prefix" "<mask_prefix3>,evex")
(set (attr "mode")
- (cond [(and (match_test "<MODE_SIZE> == 16")
- (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
- (const_string "<ssePSmode>")
- (match_test "TARGET_AVX2")
+ (cond [(match_test "TARGET_AVX2")
(const_string "<sseinsnmode>")
(match_test "TARGET_AVX")
(if_then_else
(const_string "*")))
(set_attr "prefix" "orig,vex,evex")
(set (attr "mode")
- (cond [(and (match_test "<MODE_SIZE> == 16")
- (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
- (const_string "<ssePSmode>")
- (match_test "TARGET_AVX2")
+ (cond [(match_test "TARGET_AVX2")
(const_string "<sseinsnmode>")
(match_test "TARGET_AVX")
(if_then_else
(set_attr "prefix" "orig,orig,vex")
(set_attr "btver2_decode" "vector,vector,vector")
(set (attr "mode")
- (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
- (const_string "V4SF")
- (match_test "TARGET_AVX")
+ (cond [(match_test "TARGET_AVX")
(const_string "<ssevecmode>")
(match_test "optimize_function_for_size_p (cfun)")
(const_string "V4SF")
- ]
- (const_string "<ssevecmode>")))])
+ (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+ (const_string "V4SF")
+ ]
+ (const_string "<ssevecmode>")))])
(define_insn_and_split "*<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>_lt"
[(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
| m_INTEL | m_GOLDMONT | m_GOLDMONT_PLUS
| m_TREMONT | m_AMDFAM10 | m_BDVER | m_BTVER | m_ZNVER | m_GENERIC)
-/* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL: Use movups for misaligned stores instead
- of a sequence loading registers by parts. */
+/* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL: Use movups for misaligned stores
+ instead of a sequence loading registers by parts. */
DEF_TUNE (X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL, "sse_unaligned_store_optimal",
m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_SILVERMONT | m_KNL | m_KNM
| m_INTEL | m_GOLDMONT | m_GOLDMONT_PLUS
| m_TREMONT | m_BDVER | m_ZNVER | m_GENERIC)
-/* Use packed single precision instructions where posisble. I.e. movups instead
- of movupd. */
+/* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL: Use packed single
+ precision 128bit instructions instead of double where possible. */
DEF_TUNE (X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL, "sse_packed_single_insn_optimal",
m_BDVER | m_ZNVER)