}
})
+(define_insn "*extendsfdf2"
+ [(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=f,m,v")
+ (float_extend:DF
+ (match_operand:SF 1 "nonimmediate_operand" "fm,f,vm")))]
+ "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ case 1:
+ return output_387_reg_move (insn, operands);
+
+ case 2:
+ return "%vcvtss2sd\t{%1, %d0|%d0, %1}";
+
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "fmov,fmov,ssecvt")
+ (set_attr "prefix" "orig,orig,maybe_vex")
+ (set_attr "mode" "SF,XF,DF")
+ (set (attr "enabled")
+ (if_then_else
+ (match_test ("TARGET_SSE2 && TARGET_SSE_MATH"))
+ (if_then_else
+ (eq_attr "alternative" "0,1")
+ (symbol_ref "TARGET_MIX_SSE_I387")
+ (symbol_ref "true"))
+ (if_then_else
+ (eq_attr "alternative" "0,1")
+ (symbol_ref "true")
+ (symbol_ref "false"))))])
+
/* For converting SF(xmm2) to DF(xmm1), use the following code instead of
cvtss2sd:
unpcklps xmm2,xmm2 ; packed conversion might crash on signaling NaNs
(set (match_dup 0) (float_extend:DF (match_dup 2)))]
"operands[2] = lowpart_subreg (SFmode, operands[0], DFmode);")
-(define_insn "*extendsfdf2"
- [(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=f,m,v")
+;; Break partial reg stall for cvtss2sd. This splitter should split
+;; late in the pass sequence (after register rename pass),
+;; so allocated registers won't change anymore.
+
+(define_split
+ [(set (match_operand:DF 0 "sse_reg_operand")
(float_extend:DF
- (match_operand:SF 1 "nonimmediate_operand" "fm,f,vm")))]
- "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
+ (match_operand:SF 1 "nonimmediate_operand")))]
+ "TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
+ && optimize_function_for_speed_p (cfun)
+ && (!REG_P (operands[1])
+ || REGNO (operands[0]) != REGNO (operands[1]))
+ && (!EXT_REX_SSE_REG_P (operands[0])
+ || TARGET_AVX512VL)"
+ [(set (match_dup 0)
+ (vec_merge:V2DF
+ (vec_duplicate:V2DF
+ (float_extend:DF
+ (match_dup 1)))
+ (match_dup 0)
+ (const_int 1)))]
{
- switch (which_alternative)
- {
- case 0:
- case 1:
- return output_387_reg_move (insn, operands);
-
- case 2:
- return "%vcvtss2sd\t{%1, %d0|%d0, %1}";
-
- default:
- gcc_unreachable ();
- }
-}
- [(set_attr "type" "fmov,fmov,ssecvt")
- (set_attr "prefix" "orig,orig,maybe_vex")
- (set_attr "mode" "SF,XF,DF")
- (set (attr "enabled")
- (if_then_else
- (match_test ("TARGET_SSE2 && TARGET_SSE_MATH"))
- (if_then_else
- (eq_attr "alternative" "0,1")
- (symbol_ref "TARGET_MIX_SSE_I387")
- (symbol_ref "true"))
- (if_then_else
- (eq_attr "alternative" "0,1")
- (symbol_ref "true")
- (symbol_ref "false"))))])
+ operands[0] = lowpart_subreg (V2DFmode, operands[0], DFmode);
+ emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
+})
(define_expand "extend<mode>xf2"
[(set (match_operand:XF 0 "nonimmediate_operand")
(set (match_dup 0) (float_truncate:SF (match_dup 2)))]
"operands[2] = lowpart_subreg (DFmode, operands[0], SFmode);")
+;; Break partial reg stall for cvtsd2ss. This splitter should split
+;; late in the pass sequence (after register rename pass),
+;; so allocated registers won't change anymore.
+
+(define_split
+ [(set (match_operand:SF 0 "sse_reg_operand")
+ (float_truncate:SF
+ (match_operand:DF 1 "nonimmediate_operand")))]
+ "TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
+ && optimize_function_for_speed_p (cfun)
+ && (!REG_P (operands[1])
+ || REGNO (operands[0]) != REGNO (operands[1]))
+ && (!EXT_REX_SSE_REG_P (operands[0])
+ || TARGET_AVX512VL)"
+ [(set (match_dup 0)
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (float_truncate:SF
+ (match_dup 1)))
+ (match_dup 0)
+ (const_int 1)))]
+{
+ operands[0] = lowpart_subreg (V4SFmode, operands[0], SFmode);
+ emit_move_insn (operands[0], CONST0_RTX (V4SFmode));
+})
+
;; Conversion from XFmode to {SF,DF}mode
(define_insn "truncxf<mode>2"
DONE;
})
-;; Avoid partial SSE register dependency stalls. This splitter should split
-;; late in the pass sequence (after register rename pass), so allocated
-;; registers won't change anymore
-
-(define_split
- [(set (match_operand:MODEF 0 "sse_reg_operand")
- (float:MODEF (match_operand:SWI48 1 "nonimmediate_operand")))]
- "TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
- && optimize_function_for_speed_p (cfun)
- && (!EXT_REX_SSE_REG_P (operands[0])
- || TARGET_AVX512VL)"
- [(set (match_dup 0)
- (vec_merge:<MODEF:ssevecmode>
- (vec_duplicate:<MODEF:ssevecmode>
- (float:MODEF
- (match_dup 1)))
- (match_dup 0)
- (const_int 1)))]
-{
- const machine_mode vmode = <MODEF:ssevecmode>mode;
-
- operands[0] = lowpart_subreg (vmode, operands[0], <MODEF:MODE>mode);
- emit_move_insn (operands[0], CONST0_RTX (vmode));
-})
-
-;; Break partial reg stall for cvtsd2ss. This splitter should split
-;; late in the pass sequence (after register rename pass),
-;; so allocated registers won't change anymore.
-
-(define_split
- [(set (match_operand:SF 0 "sse_reg_operand")
- (float_truncate:SF
- (match_operand:DF 1 "nonimmediate_operand")))]
- "TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
- && optimize_function_for_speed_p (cfun)
- && (!REG_P (operands[1])
- || REGNO (operands[0]) != REGNO (operands[1]))
- && (!EXT_REX_SSE_REG_P (operands[0])
- || TARGET_AVX512VL)"
- [(set (match_dup 0)
- (vec_merge:V4SF
- (vec_duplicate:V4SF
- (float_truncate:SF
- (match_dup 1)))
- (match_dup 0)
- (const_int 1)))]
-{
- operands[0] = lowpart_subreg (V4SFmode, operands[0], SFmode);
- emit_move_insn (operands[0], CONST0_RTX (V4SFmode));
-})
-
-;; Break partial reg stall for cvtss2sd. This splitter should split
-;; late in the pass sequence (after register rename pass),
-;; so allocated registers won't change anymore.
-
-(define_split
- [(set (match_operand:DF 0 "sse_reg_operand")
- (float_extend:DF
- (match_operand:SF 1 "nonimmediate_operand")))]
- "TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
- && optimize_function_for_speed_p (cfun)
- && (!REG_P (operands[1])
- || REGNO (operands[0]) != REGNO (operands[1]))
- && (!EXT_REX_SSE_REG_P (operands[0])
- || TARGET_AVX512VL)"
- [(set (match_dup 0)
- (vec_merge:V2DF
- (vec_duplicate:V2DF
- (float_extend:DF
- (match_dup 1)))
- (match_dup 0)
- (const_int 1)))]
-{
- operands[0] = lowpart_subreg (V2DFmode, operands[0], DFmode);
- emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
-})
-
;; Avoid store forwarding (partial memory) stall penalty
;; by passing DImode value through XMM registers. */
(set_attr "unit" "i387")
(set_attr "fp_int_src" "true")])
+;; Avoid partial SSE register dependency stalls. This splitter should split
+;; late in the pass sequence (after register rename pass), so allocated
+;; registers won't change anymore
+
+(define_split
+ [(set (match_operand:MODEF 0 "sse_reg_operand")
+ (float:MODEF (match_operand:SWI48 1 "nonimmediate_operand")))]
+ "TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
+ && optimize_function_for_speed_p (cfun)
+ && (!EXT_REX_SSE_REG_P (operands[0])
+ || TARGET_AVX512VL)"
+ [(set (match_dup 0)
+ (vec_merge:<MODEF:ssevecmode>
+ (vec_duplicate:<MODEF:ssevecmode>
+ (float:MODEF
+ (match_dup 1)))
+ (match_dup 0)
+ (const_int 1)))]
+{
+ const machine_mode vmode = <MODEF:ssevecmode>mode;
+
+ operands[0] = lowpart_subreg (vmode, operands[0], <MODEF:MODE>mode);
+ emit_move_insn (operands[0], CONST0_RTX (vmode));
+})
+
(define_expand "floatuns<SWI12:mode><MODEF:mode>2"
[(set (match_operand:MODEF 0 "register_operand")
(unsigned_float:MODEF