From b329888265f674982705102d1bc6a3f976189f80 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Thu, 26 Apr 2001 20:10:44 +0200 Subject: [PATCH] i386.md (abs/neg splitter): Fix calculation of sign bit for TFmodes * i386.md (abs/neg splitter): Fix calculation of sign bit for TFmodes (pushqi2_rex64, pushhi2_rex64): Add. From-SVN: r41599 --- gcc/ChangeLog | 5 ++ gcc/config/i386/i386.md | 184 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 182 insertions(+), 7 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 57205d11bc6..05ebf9fb53d 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,8 @@ +Thu Apr 26 19:20:28 CEST 2001 Jan Hubicka + + * i386.md (abs/neg splitter): Fix calculation of sign bit for TFmodes + (pushqi2_rex64, pushhi2_rex64): Add. + 2001-04-26 Andrew Haley * except.c (expand_eh_region_end_cleanup): Force pending stack diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 3991e87ba81..2233c1464af 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -1826,6 +1826,15 @@ [(set_attr "type" "push") (set_attr "mode" "HI")]) +;; For 64BIT abi we always round up to 8 bytes. +(define_insn "*pushhi2_rex64" + [(set (match_operand:HI 0 "push_operand" "=X") + (match_operand:HI 1 "nonmemory_no_elim_operand" "ri"))] + "TARGET_64BIT" + "push{q}\\t%q1" + [(set_attr "type" "push") + (set_attr "mode" "QI")]) + (define_insn "*movhi_1" [(set (match_operand:HI 0 "nonimmediate_operand" "=*a,r,r,*a,r,m") (match_operand:HI 1 "general_operand" "i,r,rn,rm,rm,rn"))] @@ -1985,6 +1994,15 @@ [(set_attr "type" "push") (set_attr "mode" "HI")]) +;; For 64BIT abi we always round up to 8 bytes. +(define_insn "*pushqi2_rex64" + [(set (match_operand:QI 0 "push_operand" "=X") + (match_operand:QI 1 "nonmemory_no_elim_operand" "ri"))] + "TARGET_64BIT" + "push{q}\\t%q1" + [(set_attr "type" "push") + (set_attr "mode" "QI")]) + ;; Situation is quite tricky about when to choose full sized (SImode) move ;; over QImode moves. For Q_REG -> Q_REG move we use full size only for ;; partial register dependency machines (such as AMD Athlon), where QImode @@ -9308,7 +9326,82 @@ (neg:SF (match_operand:SF 1 "nonimmediate_operand" ""))) (clobber (reg:CC 17))])] "TARGET_80387" - "ix86_expand_unary_operator (NEG, SFmode, operands); DONE;") + "if (TARGET_SSE) + { + /* In case operand is in memory, we will not use SSE. */ + if (memory_operand (operands[0], VOIDmode) + && rtx_equal_p (operands[0], operands[1])) + emit_insn (gen_negsf2_memory (operands[0], operands[1])); + else + { + /* Using SSE is tricky, since we need bitwise negation of -0 + in register. */ + rtx reg = gen_reg_rtx (SFmode); + emit_move_insn (reg, + gen_lowpart (SFmode, + trunc_int_for_mode (0x80000000, + SImode))); + emit_insn (gen_negsf2_ifs (operands[0], operands[1], reg)); + } + DONE; + } + ix86_expand_unary_operator (NEG, SFmode, operands); DONE;") + +(define_insn "negsf2_memory" + [(set (match_operand:SF 0 "memory_operand" "=m") + (neg:SF (match_operand:SF 1 "memory_operand" "0"))) + (clobber (reg:CC 17))] + "ix86_unary_operator_ok (NEG, SFmode, operands)" + "#") + +(define_insn "negsf2_ifs" + [(set (match_operand:SF 0 "nonimmediate_operand" "=x#fr,x#fr,f#xr,r#xf") + (neg:SF (match_operand:SF 1 "nonimmediate_operand" "0,x#fr,0,0"))) + (use (match_operand:SF 2 "nonmemory_operand" "x#x,0#x,*X#x,*X#x")) + (clobber (reg:CC 17))] + "TARGET_SSE" + "#") + +(define_split + [(set (match_operand:SF 0 "memory_operand" "") + (neg:SF (match_operand:SF 1 "memory_operand" ""))) + (use (match_operand:SF 2 "" "")) + (clobber (reg:CC 17))] + "" + [(parallel [(set (match_dup 0) + (neg:SF (match_dup 1))) + (clobber (reg:CC 17))])]) + +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (neg:SF (match_operand:SF 1 "register_operand" ""))) + (use (match_operand:SF 2 "" "")) + (clobber (reg:CC 17))] + "reload_completed && !SSE_REG_P (operands[0])" + [(parallel [(set (match_dup 0) + (neg:SF (match_dup 1))) + (clobber (reg:CC 17))])]) + +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (neg:SF (match_operand:SF 1 "register_operand" ""))) + (use (match_operand:SF 2 "register_operand" "")) + (clobber (reg:CC 17))] + "reload_completed && SSE_REG_P (operands[0])" + [(set (subreg:TI (match_dup 0) 0) + (xor:TI (subreg:TI (match_dup 1) 0) + (subreg:TI (match_dup 2) 0)))] + " +{ + if (operands_match_p (operands[0], operands[2])) + { + rtx tmp; + tmp = operands[1]; + operands[1] = operands[2]; + operands[2] = tmp; + } +}") + ;; Keep 'f' and 'r' in separate alternatives to avoid reload problems ;; because of secondary memory needed to reload from class FLOAT_INT_REGS @@ -9317,7 +9410,8 @@ [(set (match_operand:SF 0 "nonimmediate_operand" "=f#r,rm#f") (neg:SF (match_operand:SF 1 "nonimmediate_operand" "0,0"))) (clobber (reg:CC 17))] - "TARGET_80387 && ix86_unary_operator_ok (NEG, SFmode, operands)" + "TARGET_80387 && !TARGET_SSE + && ix86_unary_operator_ok (NEG, SFmode, operands)" "#") (define_split @@ -9350,8 +9444,8 @@ { int size = GET_MODE_SIZE (GET_MODE (operands[1])); - /* XFmode's size is 12, but only 10 bytes are used. */ - if (size == 12) + /* XFmode's size is 12, TFmode 16, but only 10 bytes are used. */ + if (size >= 12) size = 10; operands[0] = gen_rtx_MEM (QImode, XEXP (operands[0], 0)); operands[0] = adj_offsettable_operand (operands[0], size - 1); @@ -9363,7 +9457,83 @@ (neg:DF (match_operand:DF 1 "nonimmediate_operand" ""))) (clobber (reg:CC 17))])] "TARGET_80387" - "ix86_expand_unary_operator (NEG, DFmode, operands); DONE;") + "if (TARGET_SSE) + { + /* In case operand is in memory, we will not use SSE. */ + if (memory_operand (operands[0], VOIDmode) + && rtx_equal_p (operands[0], operands[1])) + emit_insn (gen_negdf2_memory (operands[0], operands[1])); + else + { + /* Using SSE is tricky, since we need bitwise negation of -0 + in register. */ + rtx reg = gen_reg_rtx (DFmode); +#if HOST_BITS_PER_WIDE_INT >= 64 + rtx imm = GEN_INT (0x80000000); +#else + rtx imm = immed_double_const (0, 0x80000000, DImode); +#endif + emit_move_insn (reg, gen_lowpart (DFmode, imm)); + emit_insn (gen_negdf2_ifs (operands[0], operands[1], reg)); + } + DONE; + } + ix86_expand_unary_operator (NEG, DFmode, operands); DONE;") + +(define_insn "negdf2_memory" + [(set (match_operand:DF 0 "memory_operand" "=m") + (neg:DF (match_operand:DF 1 "memory_operand" "0"))) + (clobber (reg:CC 17))] + "ix86_unary_operator_ok (NEG, DFmode, operands)" + "#") + +(define_insn "negdf2_ifs" + [(set (match_operand:DF 0 "nonimmediate_operand" "=x#fr,x#fr,f#xr,r#xf") + (neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,x#fr,0,0"))) + (use (match_operand:DF 2 "nonmemory_operand" "x#x,0#x,*X#x,*X#x")) + (clobber (reg:CC 17))] + "TARGET_SSE" + "#") + +(define_split + [(set (match_operand:DF 0 "memory_operand" "") + (neg:DF (match_operand:DF 1 "memory_operand" ""))) + (use (match_operand:DF 2 "" "")) + (clobber (reg:CC 17))] + "" + [(parallel [(set (match_dup 0) + (neg:DF (match_dup 1))) + (clobber (reg:CC 17))])]) + +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (neg:DF (match_operand:DF 1 "register_operand" ""))) + (use (match_operand:DF 2 "" "")) + (clobber (reg:CC 17))] + "reload_completed && !SSE_REG_P (operands[0])" + [(parallel [(set (match_dup 0) + (neg:DF (match_dup 1))) + (clobber (reg:CC 17))])]) + +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (neg:DF (match_operand:DF 1 "register_operand" ""))) + (use (match_operand:DF 2 "register_operand" "")) + (clobber (reg:CC 17))] + "reload_completed && SSE_REG_P (operands[0])" + [(set (subreg:TI (match_dup 0) 0) + (xor:TI (subreg:TI (match_dup 1) 0) + (subreg:TI (match_dup 2) 0)))] + " +{ + if (operands_match_p (operands[0], operands[2])) + { + rtx tmp; + tmp = operands[1]; + operands[1] = operands[2]; + operands[2] = tmp; + } +}") ;; Keep 'f' and 'r' in separate alternatives to avoid reload problems ;; because of secondary memory needed to reload from class FLOAT_INT_REGS @@ -9666,8 +9836,8 @@ { int size = GET_MODE_SIZE (GET_MODE (operands[1])); - /* XFmode's size is 12, but only 10 bytes are used. */ - if (size == 12) + /* XFmode's size is 12, TFmode 16, but only 10 bytes are used. */ + if (size >= 12) size = 10; operands[0] = gen_rtx_MEM (QImode, XEXP (operands[0], 0)); operands[0] = adj_offsettable_operand (operands[0], size - 1); -- 2.30.2