From bfd6bc60f5465c1090b91dd620555f90bdfb396a Mon Sep 17 00:00:00 2001
From: John Carr <jfc@mit.edu>
Date: Fri, 30 Jan 1998 23:34:15 +0000
Subject: [PATCH] sparc.c (sparc_override_options): Make v8plus and ultrasparc
 set MASK_V8PLUS.

Fri Jan 30 22:30:39 1998  John Carr  <jfc@mit.edu>
        * sparc.c (sparc_override_options): Make v8plus and ultrasparc set
        MASK_V8PLUS.
        (output_function_epilogue): Omit epilogue if nothing drops through.
        (output_move_double): Supress int ldd usage on ultrasparc and v9.
        (registers_ok_for_ldd_peep): Likewise.
        (print_operand): Supress b,a on ultrasparc.  Let Y accept a constant.
        (ultrasparc_adjust_cost): New function.
        (sparc_issue_rate): New function.
        * sparc.h (MASK_VIS, TARGET_VIS): New
        (MASK_V8PLUS, TARGET_V8PLUS): New.
        (TARGET_HARD_MUL32, TARGET_HARD_MUL): New.
        (TARGET_SWITCHES): Add vis and v8plus.
        (REG_CLASS_FROM_LETTER): Accept d and b for VIS.
        (REGISTER_MOVE_COST): FP<->INT move cost 12 for ultrasparc.
        (RTX_COSTS): Use TARGET_HARD_MUL
        (ADJUST_COST): Call ultrasparc_adjust_cost.
        (ISSUE_RATE): New.
        * sparc.md (attr type): Add sload, fpmove, fpcmove.  Adjust users
        of load & fp appropritely.
        (supersparc function units): Adjust for Haifa.
        (ultrasparc function units): Likewise.
        (get_pc_via_rdpc): All v9, not just arch64.
        (movdi_v8plus, movdi_v8plus+1): New.
        (adddi3_sp32+1): New.
        (subdi3_sp32+1): New.
        (movsi_insn, movsf_const_insn, movdf_const_insn): Know VIS.
        (addsi3, subsi3, anddi3_sp32, andsi3, and_not_di_sp32): Likewise.
        (and_not_si, iordi3_sp32, iorsi3, or_not_di_sp32, or_not_si): Likewise.
        (xorsi3_sp32, xorsi3, xor_not_di_sp32, xor_not_si): Likewise.
        (one_cmpldi2_sp32, one_cmplsi2): Likewise.
        (ldd peepholes): Suppress for v9.
        (return_adddi): Kill redundant test.  Arg1 may be arith_operand.
        (return_subsi): Revmove.

From-SVN: r17560
---
 gcc/ChangeLog             |  37 +++
 gcc/config/sparc/sparc.c  | 202 ++++++++++++-
 gcc/config/sparc/sparc.h  |  81 +++--
 gcc/config/sparc/sparc.md | 622 +++++++++++++++++++++++++-------------
 4 files changed, 703 insertions(+), 239 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 6c099950a9f..f87310c94d2 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,40 @@
+Fri Jan 30 22:30:39 1998  John Carr  <jfc@mit.edu>
+
+	* sparc.c (sparc_override_options): Make v8plus and ultrasparc set
+	MASK_V8PLUS.
+	(output_function_epilogue): Omit epilogue if nothing drops through.
+	(output_move_double): Supress int ldd usage on ultrasparc and v9.
+	(registers_ok_for_ldd_peep): Likewise.
+	(print_operand): Supress b,a on ultrasparc.  Let Y accept a constant.
+	(ultrasparc_adjust_cost): New function.
+	(sparc_issue_rate): New function.
+	* sparc.h (MASK_VIS, TARGET_VIS): New
+	(MASK_V8PLUS, TARGET_V8PLUS): New.
+	(TARGET_HARD_MUL32, TARGET_HARD_MUL): New.
+	(TARGET_SWITCHES): Add vis and v8plus.
+	(REG_CLASS_FROM_LETTER): Accept d and b for VIS.
+	(REGISTER_MOVE_COST): FP<->INT move cost 12 for ultrasparc.
+	(RTX_COSTS): Use TARGET_HARD_MUL
+	(ADJUST_COST): Call ultrasparc_adjust_cost.
+	(ISSUE_RATE): New.
+	* sparc.md (attr type): Add sload, fpmove, fpcmove.  Adjust users
+	of load & fp appropritely.
+	(supersparc function units): Adjust for Haifa.
+	(ultrasparc function units): Likewise.
+	(get_pc_via_rdpc): All v9, not just arch64.
+	(movdi_v8plus, movdi_v8plus+1): New.
+	(adddi3_sp32+1): New.
+	(subdi3_sp32+1): New.
+	(movsi_insn, movsf_const_insn, movdf_const_insn): Know VIS.
+	(addsi3, subsi3, anddi3_sp32, andsi3, and_not_di_sp32): Likewise.
+	(and_not_si, iordi3_sp32, iorsi3, or_not_di_sp32, or_not_si): Likewise.
+	(xorsi3_sp32, xorsi3, xor_not_di_sp32, xor_not_si): Likewise.
+	(one_cmpldi2_sp32, one_cmplsi2): Likewise.
+	(ldd peepholes): Suppress for v9.
+	(return_adddi): Kill redundant test.  Arg1 may be arith_operand.
+	(return_subsi): Revmove.
+
+
 Fri Jan 30 18:30:03 1998  John F Carr  <jfc@mit.edu>
 
 	* mips.c (save_restore_insns): Set RTX_UNCHANGING_P in register
diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c
index 384609f43cf..4d4d84ba773 100644
--- a/gcc/config/sparc/sparc.c
+++ b/gcc/config/sparc/sparc.c
@@ -209,10 +209,10 @@ sparc_override_options ()
     /* TEMIC sparclet */
     { "tsc701",     PROCESSOR_TSC701, MASK_ISA, MASK_SPARCLET },
     /* "v8plus" is what Sun calls Solaris2.5 running on UltraSPARC's.  */
-    { "v8plus",     PROCESSOR_V8PLUS, MASK_ISA, MASK_V9 },
+    { "v8plus",     PROCESSOR_V8PLUS, MASK_ISA, MASK_V8PLUS },
     { "v9",         PROCESSOR_V9, MASK_ISA, MASK_V9 },
     /* TI ultrasparc */
-    { "ultrasparc", PROCESSOR_ULTRASPARC, MASK_ISA, MASK_V9 },
+    { "ultrasparc", PROCESSOR_ULTRASPARC, MASK_ISA, MASK_V8PLUS },
     { 0 }
   };
   struct cpu_table *cpu;
@@ -379,6 +379,7 @@ v9_regcmp_p (code)
   return (code == EQ || code == NE || code == GE || code == LT
 	  || code == LE || code == GT);
 }
+
 
 /* Operand constraints.  */
 
@@ -1257,7 +1258,7 @@ eligible_for_epilogue_delay (trial, slot)
 
   src = SET_SRC (pat);
 
-  /* This matches "*return_[qhs]".  */
+  /* This matches "*return_[qhs]i".  */
   if (arith_operand (src, GET_MODE (src)))
     return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
     
@@ -2009,13 +2010,26 @@ output_move_double (operands)
 
       /* In v9, ldd can be used for word aligned addresses, so technically
 	 some of this logic is unneeded.  We still avoid ldd if the address
-	 is obviously unaligned though.  */
+	 is obviously unaligned though.
 
-      if (mem_aligned_8 (mem)
+	 Integer ldd/std are deprecated in V9 and are slow on UltraSPARC.
+	 Use them only if the access is volatile or not offsettable.  */
+
+      if ((mem_aligned_8 (mem)
+	   && (REGNO (reg) >= 32
+	       || MEM_VOLATILE_P (mem)
+	       || ! ((optype0 == OFFSOP || optype1 == OFFSOP)
+		     && (sparc_cpu == PROCESSOR_ULTRASPARC
+			 || sparc_cpu == PROCESSOR_V9))))
 	  /* If this is a floating point register higher than %f31,
 	     then we *must* use an aligned load, since `ld' will not accept
 	     the register number.  */
-	  || (TARGET_V9 && REGNO (reg) >= 64))
+	  || (TARGET_V9 && REGNO (reg) >= 64)
+	  /* Even if two instructions would otherwise be better than ldd/std,
+	     if this insn was put in a delay slot because reorg thought it
+	     was only one machine instruction, make sure it is only one
+	     instruction.  */
+	  || dbr_sequence_length () != 0)
 	{
 	  if (FP_REG_P (reg) || ! TARGET_ARCH64)
 	    return (mem == op1 ? "ldd %1,%0" : "std %1,%0");
@@ -3504,6 +3518,16 @@ output_function_epilogue (file, size, leaf_function)
     }
 #endif
 
+  else if (current_function_epilogue_delay_list == 0)
+    {                                                
+      /* If code does not drop into the epilogue, do nothing.  */
+      rtx insn = get_last_insn ();                               
+      if (GET_CODE (insn) == NOTE)                               
+      insn = prev_nonnote_insn (insn);                           
+      if (insn && GET_CODE (insn) == BARRIER)                    
+      return;                                                    
+    }
+
   /* Restore any call saved registers.  */
   if (num_gfregs)
     {
@@ -4631,8 +4655,7 @@ order_regs_for_local_alloc ()
 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
    This makes them candidates for using ldd and std insns. 
 
-   Note reg1 and reg2 *must* be hard registers.  To be sure we will
-   abort if we are passed pseudo registers.  */
+   Note reg1 and reg2 *must* be hard registers.  */
 
 int
 registers_ok_for_ldd_peep (reg1, reg2)
@@ -4645,6 +4668,10 @@ registers_ok_for_ldd_peep (reg1, reg2)
   if (REGNO (reg1) % 2 != 0)
     return 0;
 
+  /* Integer ldd is deprecated in SPARC V9 */ 
+  if (TARGET_V9 && REGNO (reg1) < 32)                  
+    return 0;                             
+
   return (REGNO (reg1) == REGNO (reg2) - 1);
 }
 
@@ -4762,13 +4789,17 @@ print_operand (file, x, code)
 	 are optimizing.  This is always used with '(' below.  */
       /* Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
 	 this is a dbx bug.  So, we only do this when optimizing.  */
-      if (dbr_sequence_length () == 0 && optimize)
+      /* On UltraSPARC, a branch in a delay slot causes a pipeline flush.
+	 Always emit a nop in case the next instruction is a branch.  */
+      if (dbr_sequence_length () == 0
+	  && (optimize && (int)sparc_cpu < PROCESSOR_V8PLUS))
 	fputs (",a", file);
       return;
     case '(':
       /* Output a 'nop' if there's nothing for the delay slot and we are
 	 not optimizing.  This is always used with '*' above.  */
-      if (dbr_sequence_length () == 0 && ! optimize)
+      if (dbr_sequence_length () == 0
+	  && ! (optimize && (int)sparc_cpu < PROCESSOR_V8PLUS))
 	fputs ("\n\tnop", file);
       return;
     case '_':
@@ -4783,7 +4814,9 @@ print_operand (file, x, code)
       return;
     case 'Y':
       /* Adjust the operand to take into account a RESTORE operation.  */
-      if (GET_CODE (x) != REG)
+      if (GET_CODE (x) == CONST_INT)
+	break;
+      else if (GET_CODE (x) != REG)
 	output_operand_lossage ("Invalid %%Y operand");
       else if (REGNO (x) < 8)
 	fputs (reg_names[REGNO (x)], file);
@@ -6022,3 +6055,150 @@ supersparc_adjust_cost (insn, link, dep_insn, cost)
 	
   return cost;
 }
+
+int
+ultrasparc_adjust_cost (insn, link, dep_insn, cost)
+     rtx insn;                                     
+     rtx link;                                     
+     rtx dep_insn;                                     
+     int cost;                                     
+{
+  enum attr_type insn_type, dep_type;
+  rtx pat = PATTERN(insn);                                                    
+  rtx dep_pat = PATTERN (dep_insn);                                           
+
+  if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)        
+    return cost;                                     
+
+  insn_type = get_attr_type (insn);                     
+  dep_type = get_attr_type (dep_insn);                  
+
+#define SLOW_FP(dep_type) \
+(dep_type == TYPE_FPSQRT || dep_type == TYPE_FPDIVS || dep_type == TYPE_FPDIVD)   
+  switch (REG_NOTE_KIND (link))
+    {                                              
+    case 0:                                        
+      /* Data dependency; DEP_INSN writes a register that INSN reads some
+	 cycles later.  */                               
+
+      switch (insn_type)
+	{                              
+	  /* UltraSPARC can dual issue a store and an instruction setting       
+	     the value stored, except for divide and square root.  */           
+	case TYPE_FPSTORE:
+	  if (! SLOW_FP (dep_type))        
+	    return 0;                                     
+	  break;
+
+	case TYPE_STORE:                                  
+	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
+	    return cost;     
+
+	  /* The dependency between the two instructions is on the data
+	     that is being stored.  Assume that the address of the store
+	     is not also dependent.  */
+	  if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
+	    return 0;                                
+	  return cost;                                   
+
+	case TYPE_LOAD:   
+	case TYPE_SLOAD:               
+	case TYPE_FPLOAD:                                                       
+	  /* A load does not return data until at least 11 cycles after         
+	     a store to the same location.  3 cycles are accounted for
+	     in the load latency; add the other 8 here.  */
+	  if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
+	    {   
+	      /* If the addresses are not equal this may be a false
+		 dependency because pointer aliasing could not be
+		 determined.  Add only 2 cycles in that case.  2 is
+		 an arbitrary compromise between 8, which would cause
+		 the scheduler to generate worse code elsewhere to
+		 compensate for a dependency which might not really    
+		 exist, and 0.  */                                      
+	      if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
+		  || GET_CODE (SET_DEST (pat)) != MEM         
+		  || GET_CODE (SET_SRC (dep_pat)) != MEM
+		  || ! rtx_equal_p (XEXP (SET_DEST (pat), 0),
+				    XEXP (SET_SRC (dep_pat), 0)))
+		return cost + 2;
+
+	      return cost + 8;         
+	    }                                                                   
+	  break;                                                                
+
+	case TYPE_BRANCH:                                  
+	  /* Compare to branch latency is 0.  There is no benefit from
+	     separating compare and branch.  */
+	  if (dep_type == TYPE_COMPARE)                            
+	    return 0;                                            
+	  /* Floating point compare to branch latency is less than 
+	     compare to conditional move.  */                        
+	  if (dep_type == TYPE_FPCMP)                             
+	    return cost - 1;                                           
+	  break;                                                        
+
+	case TYPE_FPCMOVE:                                    
+	  /* FMOVR class instructions can not issue in the same cycle
+	     or the cycle after an instruction which writes any
+	     integer register.  Model this as cost 2 for dependent
+	     instructions.  */  
+	  if (GET_CODE (PATTERN (insn)) == SET
+	      && (GET_MODE (SET_DEST (PATTERN (insn))) == SFmode
+	          || GET_MODE (SET_DEST (PATTERN (insn))) == DFmode)            
+	      && cost < 2)                                                      
+	    return 2;
+	  /* Otherwise check as for integer conditional moves. */
+
+	case TYPE_CMOVE:                       
+	  /* Conditional moves involving integer registers wait until
+	     3 cycles after loads return data.  The interlock applies
+	     to all loads, not just dependent loads, but that is hard
+	     to model.  */                        
+	  if (dep_type == TYPE_LOAD || dep_type == TYPE_SLOAD)                  
+	    return cost + 3;                                           
+	  break;                                                        
+	}
+	break;                                                
+
+    case REG_DEP_ANTI:                                       
+      /* Divide and square root lock destination registers for full latency. */
+      if (! SLOW_FP (dep_type))             
+	return 0;                                               
+      break;                                                                  
+    }
+
+  /* Other costs not accounted for:                            
+     - Multiply should be modeled as having no latency because there is
+       nothing the scheduler can do about it.  
+     - Single precision floating point loads lock the other half of  
+       the even/odd register pair.                                   
+     - Several hazards associated with ldd/std are ignored because these
+       instructions are rarely generated for V9.  
+     - A shift following an integer instruction which does not set the
+       condition codes can not issue in the same cycle.
+     - The floating point pipeline can not have both a single and double
+       precision operation active at the same time.  Format conversions
+       and graphics instructions are given honorary double precision status.
+     - call and jmpl are always the first instruction in a group.  */
+
+  return cost;                                                              
+}  
+
+int                                                           
+sparc_issue_rate ()
+{
+  switch (sparc_cpu)
+    {
+    default:                                 
+      return 1;                                                    
+    case PROCESSOR_V8PLUS:                                         
+    case PROCESSOR_V9:                                                
+      /* Assume these generic V9 types are capable of at least dual-issue.  */
+      return 2;
+    case PROCESSOR_SUPERSPARC:                                        
+      return 3;                                                      
+    case PROCESSOR_ULTRASPARC:                                            
+      return 4;                                                    
+    }
+}
diff --git a/gcc/config/sparc/sparc.h b/gcc/config/sparc/sparc.h
index 2cc8b95a3d5..94636294c58 100644
--- a/gcc/config/sparc/sparc.h
+++ b/gcc/config/sparc/sparc.h
@@ -449,6 +449,27 @@ extern int target_flags;
 #define MASK_FPU_SET 0x400000
 #define TARGET_FPU_SET (target_flags & MASK_FPU_SET)
 
+/* Use the UltraSPARC Visual Instruction Set extensions.  */
+#define MASK_VIS 0x1000000          
+#define TARGET_VIS (target_flags & MASK_VIS)
+
+/* Compile for Solaris V8+.  64 bit instructions are available but the
+   high 32 bits of all registers except the globals and current outs may
+   be cleared at any time.  */                 
+#define MASK_V8PLUS 0x2000000                 
+#define TARGET_V8PLUS (target_flags & MASK_V8PLUS)                            
+
+/* See sparc.md */
+#define TARGET_HARD_MUL32				\
+  ((TARGET_V8 || TARGET_SPARCLITE			\
+    || TARGET_SPARCLET || TARGET_DEPRECATED_V8_INSNS)	\
+   && ! TARGET_V8PLUS)
+
+#define TARGET_HARD_MUL					\
+  (TARGET_V8 || TARGET_SPARCLITE || TARGET_SPARCLET	\
+   || TARGET_DEPRECATED_V8_INSNS || TARGET_V8PLUS)                        
+
+
 /* Macro to define tables used to set the flags.
    This is a list in braces of pairs in braces,
    each pair being { "NAME", VALUE }
@@ -474,12 +495,14 @@ extern int target_flags;
     {"no-app-regs", -MASK_APP_REGS},	\
     {"hard-quad-float", MASK_HARD_QUAD}, \
     {"soft-quad-float", -MASK_HARD_QUAD}, \
+    {"vis", MASK_VIS},			\
     /* ??? These are deprecated, coerced to -mcpu=.  Delete in 2.9.  */ \
     {"cypress", 0},			\
     {"sparclite", 0},			\
     {"f930", 0},			\
     {"f934", 0},			\
     {"v8", 0},				\
+    {"v8plus", 0},			\
     {"supersparc", 0},			\
     /* End of deprecated options.  */	\
     /* -mptrNN exists for *experimental* purposes.  */ \
@@ -1242,17 +1265,20 @@ extern char leaf_reg_remap[];
 
 /* Get reg_class from a letter such as appears in the machine description.
    In the not-v9 case, coerce v9's 'e' class to 'f', so we can use 'e' in the
-   .md file for v8 and v9.  */
-
-#define REG_CLASS_FROM_LETTER(C) \
-(TARGET_V9			\
- ? ((C) == 'f' ? FP_REGS	\
-    : (C) == 'e' ? EXTRA_FP_REGS \
-    : (C) == 'c' ? FPCC_REGS	\
-    : NO_REGS)			\
- : ((C) == 'f' ? FP_REGS	\
-    : (C) == 'e' ? FP_REGS	\
-    : (C) == 'c' ? FPCC_REGS	\
+   .md file for v8 and v9.
+   Use 'd' and 'b' for single precision VIS operations if TARGET_VIS.  */
+
+#define REG_CLASS_FROM_LETTER(C)		\
+(TARGET_V9					\
+ ? ((C) == 'f' ? FP_REGS			\
+    : (C) == 'e' ? EXTRA_FP_REGS 		\
+    : (C) == 'c' ? FPCC_REGS			\
+    : ((C) == 'd' && TARGET_VIS) ? FP_REGS	\
+    : ((C) == 'b' && TARGET_VIS) ? FP_REGS	\
+    : NO_REGS)					\
+ : ((C) == 'f' ? FP_REGS			\
+    : (C) == 'e' ? FP_REGS			\
+    : (C) == 'c' ? FPCC_REGS			\
     : NO_REGS))
 
 /* The letters I, J, K, L and M in a register constraint string
@@ -2683,11 +2709,13 @@ extern struct rtx_def *legitimize_pic_address ();
 #define ADDRESS_COST(RTX)  1
 
 /* Compute extra cost of moving data between one register class
-   and another.
-   ??? v9: We ignore FPCC_REGS on the assumption they'll never be seen.  */
-#define REGISTER_MOVE_COST(CLASS1, CLASS2) \
-  (((FP_REG_CLASS_P (CLASS1) && (CLASS2) == GENERAL_REGS) \
-    || ((CLASS1) == GENERAL_REGS && FP_REG_CLASS_P (CLASS2))) ? 6 : 2)
+   and another.  */
+#define REGISTER_MOVE_COST(CLASS1, CLASS2)			\
+  (((FP_REG_CLASS_P (CLASS1) && (CLASS2) == GENERAL_REGS)	\
+    || ((CLASS1) == GENERAL_REGS && FP_REG_CLASS_P (CLASS2))	\
+    || (CLASS1) == FPCC_REGS || (CLASS2) == FPCC_REGS)		\
+   ? (sparc_cpu == PROCESSOR_ULTRASPARC ? 12 : 6)		\
+   : 2)
 
 /* Provide the costs of a rtl expression.  This is in the body of a
    switch on CODE.  The purpose for the cost of MULT is to encourage
@@ -2698,8 +2726,7 @@ extern struct rtx_def *legitimize_pic_address ();
 
 #define RTX_COSTS(X,CODE,OUTER_CODE)			\
   case MULT:						\
-    return (TARGET_V8 || TARGET_SPARCLITE)              \
-	? COSTS_N_INSNS (5) : COSTS_N_INSNS (25);	\
+    return TARGET_HARD_MUL ? COSTS_N_INSNS (5) : COSTS_N_INSNS (25); \
   case DIV:						\
   case UDIV:						\
   case MOD:						\
@@ -2711,16 +2738,24 @@ extern struct rtx_def *legitimize_pic_address ();
   case FIX:						\
     return 19;
 
+#define ISSUE_RATE  sparc_issue_rate()
+
 /* Adjust the cost of dependencies.  */
-#define ADJUST_COST(INSN,LINK,DEP,COST) \
-  if (sparc_cpu == PROCESSOR_SUPERSPARC) \
-    (COST) = supersparc_adjust_cost (INSN, LINK, DEP, COST)
+#define ADJUST_COST(INSN,LINK,DEP,COST)				\
+do {								\
+  if (sparc_cpu == PROCESSOR_SUPERSPARC)			\
+    (COST) = supersparc_adjust_cost (INSN, LINK, DEP, COST);	\
+  else if (sparc_cpu == PROCESSOR_ULTRASPARC)			\
+    (COST) = ultrasparc_adjust_cost (INSN, LINK, DEP, COST);	\
+} while (0)
 
 /* Conditional branches with empty delay slots have a length of two.  */
-#define ADJUST_INSN_LENGTH(INSN, LENGTH)	\
+#define ADJUST_INSN_LENGTH(INSN, LENGTH)				\
+do {									\
   if (GET_CODE (INSN) == CALL_INSN					\
       || (GET_CODE (INSN) == JUMP_INSN && ! simplejump_p (insn)))	\
-    LENGTH += 1;
+    LENGTH += 1;							\
+} while (0)
 
 /* Control the assembler format that we output.  */
 
diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md
index 6efa3320852..0cd564baa34 100644
--- a/gcc/config/sparc/sparc.md
+++ b/gcc/config/sparc/sparc.md
@@ -67,7 +67,7 @@
 ;; type "call_no_delay_slot" is a call followed by an unimp instruction.
 
 (define_attr "type"
-  "move,unary,binary,compare,load,store,ialu,shift,uncond_branch,branch,call,call_no_delay_slot,address,imul,fpload,fpstore,fp,fpcmp,fpmul,fpdivs,fpdivd,fpsqrt,cmove,multi,misc"
+  "move,unary,binary,compare,load,sload,store,ialu,shift,uncond_branch,branch,call,call_no_delay_slot,address,imul,fpload,fpstore,fp,fpmove,fpcmove,fpcmp,fpmul,fpdivs,fpdivd,fpsqrt,cmove,multi,misc"
   (const_string "binary"))
 
 ;; Set true if insn uses call-clobbered intermediate register.
@@ -79,7 +79,7 @@
 
 ;; Length (in # of insns).
 (define_attr "length" ""
-  (cond [(eq_attr "type" "load,fpload")
+  (cond [(eq_attr "type" "load,sload,fpload")
 	 (if_then_else (match_operand 1 "symbolic_memory_operand" "")
 		       (const_int 2) (const_int 1))
 
@@ -182,8 +182,11 @@
 
 ;; ---- cypress CY7C602 scheduling:
 ;; Memory with load-delay of 1 (i.e., 2 cycle load).
+
 (define_function_unit "memory" 1 0 
-  (and (eq_attr "type" "load,fpload") (eq_attr "cpu" "cypress")) 2 2)
+  (and (eq_attr "cpu" "cypress")
+    (eq_attr "type" "load,sload,fpload"))
+  2 2)
 
 ;; SPARC has two floating-point units: the FP ALU,
 ;; and the FP MUL/DIV/SQRT unit.
@@ -205,34 +208,57 @@
 ;; More insns cause the chip to stall.
 
 (define_function_unit "fp_alu" 1 0
-  (and (eq_attr "type" "fp")            (eq_attr "cpu" "cypress")) 5 5)
+  (and (eq_attr "cpu" "cypress")
+    (eq_attr "type" "fp,fpmove"))
+  5 5)
+
 (define_function_unit "fp_mds" 1 0
-  (and (eq_attr "type" "fpmul")         (eq_attr "cpu" "cypress")) 7 7)
+  (and (eq_attr "cpu" "cypress")
+    (eq_attr "type" "fpmul"))
+  7 7)
+
 (define_function_unit "fp_mds" 1 0
-  (and (eq_attr "type" "fpdivs,fpdivd") (eq_attr "cpu" "cypress")) 37 37)
+  (and (eq_attr "cpu" "cypress")
+    (eq_attr "type" "fpdivs,fpdivd"))
+  37 37)
+
 (define_function_unit "fp_mds" 1 0
-  (and (eq_attr "type" "fpsqrt")        (eq_attr "cpu" "cypress")) 63 63)
+  (and (eq_attr "cpu" "cypress")
+    (eq_attr "type" "fpsqrt"))
+  63 63)
 
 ;; ----- The TMS390Z55 scheduling
-;; The Supersparc can issue 1 - 3 insns per cycle; here we assume
-;; three insns/cycle, and hence multiply all costs by three.
-;; Combinations up to two integer, one ld/st, one fp.
+;; The Supersparc can issue 1 - 3 insns per cycle: up to two integer,
+;; one ld/st, one fp.
 ;; Memory delivers its result in one cycle to IU, zero cycles to FP
+
 (define_function_unit "memory" 1 0
-  (and (eq_attr "type" "load")          (eq_attr "cpu" "supersparc")) 3 3)
+  (and (eq_attr "cpu" "supersparc")
+    (eq_attr "type" "load,sload"))
+  1 1)
+
 (define_function_unit "memory" 1 0
-  (and (eq_attr "type" "fpload")        (eq_attr "cpu" "supersparc")) 1 3)
-;; at least one in three instructions can be a mem opt.
+  (and (eq_attr "cpu" "supersparc")
+    (eq_attr "type" "fpload"))
+  0 1)
+
 (define_function_unit "memory" 1 0
-  (and (eq_attr "type" "store,fpstore") (eq_attr "cpu" "supersparc")) 1 3)
-;; at least one in three instructions can be a shift op.
+  (and (eq_attr "cpu" "supersparc")
+    (eq_attr "type" "store,fpstore"))
+  1 1)
+
 (define_function_unit "shift" 1 0
-  (and (eq_attr "type" "shift")         (eq_attr "cpu" "supersparc")) 1 3)
+  (and (eq_attr "cpu" "supersparc")
+    (eq_attr "type" "shift"))
+  1 1)
 
 ;; There are only two write ports to the integer register file
 ;; A store also uses a write port
+
 (define_function_unit "iwport" 2 0
-  (and (eq_attr "type" "load,store,shift,ialu") (eq_attr "cpu" "supersparc")) 1 3)
+  (and (eq_attr "cpu" "supersparc")
+    (eq_attr "type" "load,sload,store,shift,ialu"))
+  1 1)
 
 ;; Timings; throughput/latency
 ;; FADD     1/3    add/sub, format conv, compar, abs, neg
@@ -244,50 +270,104 @@
 ;; IMUL     4/4
 
 (define_function_unit "fp_alu" 1 0
-  (and (eq_attr "type" "fp,fpcmp") (eq_attr "cpu" "supersparc")) 9 3)
+  (and (eq_attr "cpu" "supersparc")
+    (eq_attr "type" "fp,fpmove,fpcmp"))
+  3 1)
+
 (define_function_unit "fp_mds" 1 0
-  (and (eq_attr "type" "fpmul")    (eq_attr "cpu" "supersparc")) 9 3)
+  (and (eq_attr "cpu" "supersparc")
+    (eq_attr "type" "fpmul"))
+  3 1)
+
 (define_function_unit "fp_mds" 1 0
-  (and (eq_attr "type" "fpdivs")   (eq_attr "cpu" "supersparc")) 18 12)
+  (and (eq_attr "cpu" "supersparc")
+    (eq_attr "type" "fpdivs"))
+  6 4)
+
 (define_function_unit "fp_mds" 1 0
-  (and (eq_attr "type" "fpdivd")   (eq_attr "cpu" "supersparc")) 27 21)
+  (and (eq_attr "cpu" "supersparc")
+    (eq_attr "type" "fpdivd"))
+  9 7)
+
 (define_function_unit "fp_mds" 1 0
-  (and (eq_attr "type" "fpsqrt")   (eq_attr "cpu" "supersparc")) 36 30)
+  (and (eq_attr "cpu" "supersparc")
+    (eq_attr "type" "fpsqrt"))
+  12 10)
+
 (define_function_unit "fp_mds" 1 0
-  (and (eq_attr "type" "imul")     (eq_attr "cpu" "supersparc")) 12 12)
+  (and (eq_attr "cpu" "supersparc")
+    (eq_attr "type" "imul"))
+  4 4)
 
 ;; ----- sparclet tsc701 scheduling
 ;; The tsc701 issues 1 insn per cycle.
 ;; Results may be written back out of order.
 
 ;; Loads take 2 extra cycles to complete and 4 can be buffered at a time.
+
 (define_function_unit "tsc701_load" 4 1
-  (and (eq_attr "type" "load")          (eq_attr "cpu" "tsc701")) 3 1)
+  (and (eq_attr "cpu" "tsc701")
+    (eq_attr "type" "load,sload"))
+  3 1)
+
 ;; Stores take 2(?) extra cycles to complete.
 ;; It is desirable to not have any memory operation in the following 2 cycles.
 ;; (??? or 2 memory ops in the case of std).
+
 (define_function_unit "tsc701_store" 1 0
-  (and (eq_attr "type" "store")		(eq_attr "cpu" "tsc701")) 3 3
-  [(eq_attr "type" "load,store")])
+  (and (eq_attr "cpu" "tsc701")
+    (eq_attr "type" "store"))
+  3 3
+  [(eq_attr "type" "load,sload,store")])
+
 ;; The multiply unit has a latency of 5.
 (define_function_unit "tsc701_mul" 1 0
-  (and (eq_attr "type" "imul")		(eq_attr "cpu" "tsc701")) 5 5)
+  (and (eq_attr "cpu" "tsc701")
+    (eq_attr "type" "imul"))
+  5 5)
 
 ;; ----- The UltraSPARC-1 scheduling
-;; The Ultrasparc can issue 1 - 4 insns per cycle; here we assume
-;; four insns/cycle, and hence multiply all costs by four.
+;; UltraSPARC has two integer units.  Shift instructions can only execute
+;; on IE0.  Condition code setting instructions, call, and jmpl (including
+;; the ret and retl pseudo-instructions) can only execute on IE1.
+;; Branch on register uses IE1, but branch on condition code does not.
+;; Conditional moves take 2 cycles.  No other instruction can issue in the
+;; same cycle as a conditional move.
+;; Multiply and divide take many cycles during which no other instructions
+;; can issue.
+;; Memory delivers its result in two cycles (except for signed loads,
+;; which take one cycle more).  One memory instruction can be issued per
+;; cycle.
 
-;; Memory delivers its result in three cycles to IU, three cycles to FP
 (define_function_unit "memory" 1 0
-  (and (eq_attr "type" "load,fpload")   (eq_attr "cpu" "ultrasparc")) 12 4)
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "load,fpload"))
+  2 1)
+
 (define_function_unit "memory" 1 0
-  (and (eq_attr "type" "store,fpstore") (eq_attr "cpu" "ultrasparc"))  4 4)
-(define_function_unit "ieu" 1 0
-  (and (eq_attr "type" "ialu")          (eq_attr "cpu" "ultrasparc"))  1 2)
-(define_function_unit "ieu" 1 0
-  (and (eq_attr "type" "shift")         (eq_attr "cpu" "ultrasparc"))  1 4)
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "sload"))
+  3 1)
+
+(define_function_unit "memory" 1 0
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "store,fpstore"))
+  1 1)
+
 (define_function_unit "ieu" 1 0
-  (and (eq_attr "type" "cmove")         (eq_attr "cpu" "ultrasparc"))  8 4)
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "ialu,shift,compare,cmove,call"))
+  1 1)
+
+(define_function_unit "ieu_shift" 1 0
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "shift"))
+  1 1)
+
+(define_function_unit "ieu_shift" 1 0
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "cmove"))
+  2 1)
 
 ;; Timings; throughput/latency
 ;; ?? FADD     1/3    add/sub, format conv, compar, abs, neg
@@ -297,18 +377,50 @@
 ;; ?? FSQRTs   1/12
 ;; ?? FSQRTd   1/22
 
-(define_function_unit "fp" 1 0
-  (and (eq_attr "type" "fp")       (eq_attr "cpu" "ultrasparc")) 12 2)
-(define_function_unit "fp" 1 0
-  (and (eq_attr "type" "fpcmp")    (eq_attr "cpu" "ultrasparc"))  8 2)
-(define_function_unit "fp" 1 0
-  (and (eq_attr "type" "fpmul")    (eq_attr "cpu" "ultrasparc")) 12 2)
-(define_function_unit "fp" 1 0
-  (and (eq_attr "type" "fpdivs")   (eq_attr "cpu" "ultrasparc")) 48 2)
-(define_function_unit "fp" 1 0
-  (and (eq_attr "type" "fpdivd")   (eq_attr "cpu" "ultrasparc")) 88 2)
-(define_function_unit "fp" 1 0
-  (and (eq_attr "type" "fpsqrt")   (eq_attr "cpu" "ultrasparc")) 48 2)
+(define_function_unit "fadd" 1 0
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "fpmove"))
+  1 1)
+
+(define_function_unit "fadd" 1 0
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "fpcmove"))
+  2 1)
+
+(define_function_unit "fadd" 1 0
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "fp"))
+  4 1)
+
+(define_function_unit "fadd" 1 0
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "fpcmp"))
+  2 1)
+
+(define_function_unit "fmul" 1 0
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "fpmul"))
+  4 1)
+
+(define_function_unit "fadd" 1 0
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "fpcmove"))
+  2 1)
+
+(define_function_unit "fadd" 1 0
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "fpdivs"))
+  12 12)
+
+(define_function_unit "fadd" 1 0
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "fpdivd"))
+  22 22)
+
+(define_function_unit "fadd" 1 0
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "fpsqrt"))
+  12 12)
 
 ;; Compare instructions.
 ;; This controls RTL generation and register allocation.
@@ -1542,7 +1654,7 @@
 
 (define_insn "get_pc_via_rdpc"
   [(set (match_operand:DI 0 "register_operand" "=r") (pc))]
-  "TARGET_PTR64"
+  "TARGET_V9"
   "rd %%pc,%0"
   [(set_attr "type" "move")])
 
@@ -1972,8 +2084,8 @@
 ;; in an fp register, or an fp number is an integer register.
 
 (define_insn "*movsi_insn"
-  [(set (match_operand:SI 0 "reg_or_nonsymb_mem_operand" "=r,f,r,r,f,Q,Q")
-	(match_operand:SI 1 "move_operand" "rI,!f,K,Q,!Q,rJ,!f"))]
+  [(set (match_operand:SI 0 "reg_or_nonsymb_mem_operand" "=r,f,r,r,f,Q,Q,d")
+	(match_operand:SI 1 "move_operand" "rI,!f,K,Q,!Q,rJ,!f,J"))]
   "! TARGET_LIVE_G0
    && (register_operand (operands[0], SImode)
        || register_operand (operands[1], SImode)
@@ -1985,8 +2097,9 @@
    ld %1,%0
    ld %1,%0
    st %r1,%0
-   st %1,%0"
-  [(set_attr "type" "move,fp,move,load,fpload,store,fpstore")
+   st %1,%0
+   fzeros %0"
+  [(set_attr "type" "move,fp,move,load,fpload,store,fpstore,fpmove")
    (set_attr "length" "1")])
 
 (define_insn "*movsi_insn_liveg0"
@@ -2005,7 +2118,7 @@
    ld %1,%0
    st %1,%0
    st %1,%0"
-  [(set_attr "type" "move,move,move,fp,move,load,fpload,store,fpstore")
+  [(set_attr "type" "move,move,move,fpmove,move,load,fpload,store,fpstore")
    (set_attr "length" "1,1,2,1,1,1,1,1,1")])
 
 (define_insn "*store_si"
@@ -2028,6 +2141,47 @@
     DONE;
 }")
 
+;; V8+ movdi is like regular 32 bit except that a 64 bit zero can be stored
+;; to aligned memory with a single instruction and the ldd/std instructions
+;; are not used.
+(define_insn "*movdi_v8plus"
+  [(set (match_operand:DI 0 "reg_or_nonsymb_mem_operand" "=r,T,Q,r,r,f,f,Q,b")
+      (match_operand:DI 1 "general_operand" "r,J,r,Q,i,?f,?Q,?f,?J"))]
+  "TARGET_V8PLUS
+   && (register_operand (operands[0], DImode)
+       || register_operand (operands[1], DImode)
+       || operands[1] == const0_rtx)"
+  "*
+{
+  if (which_alternative == 0)
+    return \"stx %%g0,%0\";
+  if (which_alternative == 7)
+    return \"fzero %0\";
+  if (FP_REG_P (operands[0]) || FP_REG_P (operands[1]))
+    return output_fp_move_double (operands);
+  return output_move_double (operands);
+}"
+  [(set_attr "type" "move,store,store,load,multi,fp,fpload,fpstore,fpmove")
+   (set_attr "length" "2,1,3,3,3,2,3,3,1")])
+
+;; ??? The Haifa scheduler does not split instructions after reload if
+;; it also ran before reload.
+
+(define_split
+  [(set (match_operand:DI 0 "memory_operand" "=m")
+      (match_operand:DI 1 "register_operand" "r"))]
+  "TARGET_V8PLUS && !TARGET_ARCH64 && reload_completed
+   && REGNO (operands[1]) < 32 && ! MEM_VOLATILE_P (operands[0])
+   && offsettable_memref_p (operands[0])"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+  "operands[3] = gen_highpart (SImode, operands[1]);
+   operands[5] = gen_lowpart (SImode, operands[1]);
+   operands[4] = adj_offsettable_operand (operands[0], 4);
+   PUT_MODE (operands[4], SImode);
+   operands[2] = copy_rtx (operands[0]);
+   PUT_MODE (operands[2], SImode);")
+
 (define_insn "*movdi_sp32_insn"
   [(set (match_operand:DI 0 "reg_or_nonsymb_mem_operand" "=r,T,U,Q,r,r,?f,?f,?Q")
 	(match_operand:DI 1 "general_operand" "r,U,T,r,Q,i,f,Q,f"))]
@@ -2155,8 +2309,8 @@
 ;; to be reloaded by putting the constant into memory.
 ;; It must come before the more general movsf pattern.
 (define_insn "*movsf_const_insn"
-  [(set (match_operand:SF 0 "general_operand" "=?r,f,m")
-	(match_operand:SF 1 "" "?F,m,G"))]
+  [(set (match_operand:SF 0 "general_operand" "=?r,f,m,d")
+	(match_operand:SF 1 "" "?F,m,G,G"))]
   "TARGET_FPU
    && GET_CODE (operands[1]) == CONST_DOUBLE
    && (GET_CODE (operands[0]) == REG
@@ -2171,10 +2325,12 @@
       return \"ld %1,%0\";
     case 2:
       return \"st %%g0,%0\";
+    case 3:
+      return \"fzeros %0\";
     }
 }"
-  [(set_attr "type" "load,fpload,store")
-   (set_attr "length" "2,1,1")])
+  [(set_attr "type" "load,fpload,store,fpmove")
+   (set_attr "length" "2,1,1,1")])
 
 (define_expand "movsf"
   [(set (match_operand:SF 0 "general_operand" "")
@@ -2199,7 +2355,7 @@
    ld %1,%0
    st %1,%0
    st %1,%0"
-  [(set_attr "type" "fp,move,fpload,load,fpstore,store")])
+  [(set_attr "type" "fpmove,move,fpload,load,fpstore,store")])
 
 ;; Exactly the same as above, except that all `f' cases are deleted.
 ;; This is necessary to prevent reload from ever trying to use a `f' reg
@@ -2232,8 +2388,8 @@
 ;; It must come before the more general movdf pattern.
 
 (define_insn "*movdf_const_insn"
-  [(set (match_operand:DF 0 "general_operand" "=?r,e,o")
-	(match_operand:DF 1 "" "?F,m,G"))]
+  [(set (match_operand:DF 0 "general_operand" "=?r,e,o,d")
+	(match_operand:DF 1 "" "?F,m,G,G"))]
   "TARGET_FPU
    && GET_CODE (operands[1]) == CONST_DOUBLE
    && (GET_CODE (operands[0]) == REG
@@ -2247,7 +2403,7 @@
     case 1:
       return output_fp_move_double (operands);
     case 2:
-      if (TARGET_ARCH64)
+      if (TARGET_ARCH64 || (TARGET_V9 && mem_aligned_8 (operands[0])))
 	{
 	  return \"stx %%g0,%0\";
 	}
@@ -2256,10 +2412,12 @@
 	  operands[1] = adj_offsettable_operand (operands[0], 4);
 	  return \"st %%g0,%0\;st %%g0,%1\";
 	}
+    case 3:
+      return \"fzero %0\";
     }
 }"
-  [(set_attr "type" "load,fpload,store")
-   (set_attr "length" "3,3,3")])
+  [(set_attr "type" "load,fpload,store,fpmove")
+   (set_attr "length" "3,3,3,1")])
 
 (define_expand "movdf"
   [(set (match_operand:DF 0 "general_operand" "")
@@ -2368,7 +2526,7 @@
     case 1:
       return output_fp_move_quad (operands);
     case 2:
-      if (TARGET_ARCH64)
+      if (TARGET_ARCH64 || (TARGET_V9 && mem_aligned_8 (operands[0])))
 	{
 	  operands[1] = adj_offsettable_operand (operands[0], 8);
 	  return \"stx %%g0,%0\;stx %%g0,%1\";
@@ -2730,7 +2888,7 @@
   "@
    fmovs%C1 %x2,%3,%0
    fmovs%c1 %x2,%4,%0"
-  [(set_attr "type" "cmove")])
+  [(set_attr "type" "fpcmove")])
 
 (define_insn "*movdf_cc_sp64"
   [(set (match_operand:DF 0 "register_operand" "=e,e")
@@ -2743,7 +2901,7 @@
   "@
    fmovd%C1 %x2,%3,%0
    fmovd%c1 %x2,%4,%0"
-  [(set_attr "type" "cmove")])
+  [(set_attr "type" "fpcmove")])
 
 (define_insn "*movtf_cc_sp64"
   [(set (match_operand:TF 0 "register_operand" "=e,e")
@@ -2756,7 +2914,7 @@
   "@
    fmovq%C1 %x2,%3,%0
    fmovq%c1 %x2,%4,%0"
-  [(set_attr "type" "cmove")])
+  [(set_attr "type" "fpcmove")])
 
 (define_insn "*movqi_cc_reg_sp64"
   [(set (match_operand:QI 0 "register_operand" "=r,r")
@@ -2822,7 +2980,7 @@
   "@
    fmovrs%D1 %2,%3,%0
    fmovrs%d1 %2,%4,%0"
-  [(set_attr "type" "cmove")])
+  [(set_attr "type" "fpcmove")])
 
 (define_insn "*movdf_cc_reg_sp64"
   [(set (match_operand:DF 0 "register_operand" "=e,e")
@@ -2835,7 +2993,7 @@
   "@
    fmovrd%D1 %2,%3,%0
    fmovrd%d1 %2,%4,%0"
-  [(set_attr "type" "cmove")])
+  [(set_attr "type" "fpcmove")])
 
 (define_insn "*movtf_cc_reg_sp64"
   [(set (match_operand:TF 0 "register_operand" "=e,e")
@@ -2848,7 +3006,7 @@
   "@
    fmovrq%D1 %2,%3,%0
    fmovrq%d1 %2,%4,%0"
-  [(set_attr "type" "cmove")])
+  [(set_attr "type" "fpcmove")])
 
 ;;- zero extension instructions
 
@@ -3056,7 +3214,7 @@
 	(sign_extend:SI (match_operand:HI 1 "memory_operand" "m")))]
   ""
   "ldsh %1,%0"
-  [(set_attr "type" "load")])
+  [(set_attr "type" "sload")])
 
 (define_expand "extendqihi2"
   [(set (match_operand:HI 0 "register_operand" "")
@@ -3093,7 +3251,7 @@
 	(sign_extend:HI (match_operand:QI 1 "memory_operand" "m")))]
   ""
   "ldsb %1,%0"
-  [(set_attr "type" "load")])
+  [(set_attr "type" "sload")])
 
 (define_expand "extendqisi2"
   [(set (match_operand:SI 0 "register_operand" "")
@@ -3123,7 +3281,7 @@
 	(sign_extend:SI (match_operand:QI 1 "memory_operand" "m")))]
   ""
   "ldsb %1,%0"
-  [(set_attr "type" "load")])
+  [(set_attr "type" "sload")])
 
 (define_expand "extendqidi2"
   [(set (match_operand:DI 0 "register_operand" "")
@@ -3153,7 +3311,7 @@
 	(sign_extend:DI (match_operand:QI 1 "memory_operand" "m")))]
   "TARGET_ARCH64"
   "ldsb %1,%0"
-  [(set_attr "type" "load")])
+  [(set_attr "type" "sload")])
 
 (define_expand "extendhidi2"
   [(set (match_operand:DI 0 "register_operand" "")
@@ -3198,7 +3356,7 @@
   "@
   sra %1,0,%0
   ldsw %1,%0"
-  [(set_attr "type" "unary,load")
+  [(set_attr "type" "unary,sload")
    (set_attr "length" "1")])
 
 ;; Special pattern for optimizing bit-field compares.  This is needed
@@ -3619,6 +3777,15 @@
 }"
   [(set_attr "length" "2")])
 
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+      (plus:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
+               (match_operand:DI 2 "register_operand" "r")))
+   (clobber (reg:SI 100))]
+  "! TARGET_ARCH64"
+  "addcc %L2,%1,%L0\;addx %H2,0,%H0"
+  [(set_attr "type" "multi")])
+
 (define_insn "*adddi3_sp64"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(plus:DI (match_operand:DI 1 "arith_double_operand" "%r")
@@ -3627,12 +3794,14 @@
   "add %1,%2,%0")
 
 (define_insn "addsi3"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(plus:SI (match_operand:SI 1 "arith_operand" "%r")
-		 (match_operand:SI 2 "arith_operand" "rI")))]
+  [(set (match_operand:SI 0 "register_operand" "=r,d")
+	(plus:SI (match_operand:SI 1 "arith_operand" "%r,d")
+		 (match_operand:SI 2 "arith_operand" "rI,d")))]
   ""
-  "add %1,%2,%0"
-  [(set_attr "type" "ialu")])
+  "@
+   add %1,%2,%0
+   fpadd32s %1,%2,%0"
+  [(set_attr "type" "ialu,fp")])
 
 (define_insn "*cmp_cc_plus"
   [(set (reg:CC_NOOV 100)
@@ -3721,6 +3890,15 @@
 }"
   [(set_attr "length" "2")])
 
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+      (minus:DI (match_operand:DI 1 "register_operand" "r")
+                (zero_extend:DI (match_operand:SI 2 "register_operand" "r"))))
+   (clobber (reg:SI 100))]
+  "! TARGET_ARCH64"
+  "subcc %L1,%2,%L0\;addx %H1,0,%H0"
+  [(set_attr "type" "multi")])
+
 (define_insn "*subdi3_sp64"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(minus:DI (match_operand:DI 1 "register_operand" "r")
@@ -3729,12 +3907,14 @@
   "sub %1,%2,%0")
 
 (define_insn "subsi3"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(minus:SI (match_operand:SI 1 "register_operand" "r")
-		  (match_operand:SI 2 "arith_operand" "rI")))]
+  [(set (match_operand:SI 0 "register_operand" "=r,d")
+	(minus:SI (match_operand:SI 1 "register_operand" "r,d")
+		  (match_operand:SI 2 "arith_operand" "rI,d")))]
   ""
-  "sub %1,%2,%0"
-  [(set_attr "type" "ialu")])
+  "@
+   sub %1,%2,%0
+   fpsub32s %1,%2,%0"
+  [(set_attr "type" "ialu,fp")])
 
 (define_insn "*cmp_minus_cc"
   [(set (reg:CC_NOOV 100)
@@ -3784,7 +3964,7 @@
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(mult:SI (match_operand:SI 1 "arith_operand" "%r")
 		 (match_operand:SI 2 "arith_operand" "rI")))]
-  "TARGET_V8 || TARGET_SPARCLITE || TARGET_SPARCLET || TARGET_DEPRECATED_V8_INSNS"
+  "TARGET_HARD_MUL"
   "smul %1,%2,%0"
   [(set_attr "type" "imul")])
 
@@ -3812,7 +3992,7 @@
   [(set (match_operand:DI 0 "register_operand" "")
 	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" ""))
 		 (sign_extend:DI (match_operand:SI 2 "arith_operand" ""))))]
-  "TARGET_V8 || TARGET_SPARCLITE || TARGET_SPARCLET || TARGET_DEPRECATED_V8_INSNS"
+  "TARGET_HARD_MUL"
   "
 {
   if (CONSTANT_P (operands[2]))
@@ -3826,7 +4006,7 @@
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
 		 (sign_extend:DI (match_operand:SI 2 "register_operand" "r"))))]
-  "TARGET_V8 || TARGET_SPARCLITE || TARGET_SPARCLET || TARGET_DEPRECATED_V8_INSNS"
+  "TARGET_HARD_MUL"
   "*
 {
   return TARGET_SPARCLET ? \"smuld %1,%2,%L0\" : \"smul %1,%2,%L0\;rd %%y,%H0\";
@@ -3841,7 +4021,7 @@
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
 		 (match_operand:SI 2 "small_int" "I")))]
-  "TARGET_V8 || TARGET_SPARCLITE || TARGET_SPARCLET || TARGET_DEPRECATED_V8_INSNS"
+  "TARGET_HARD_MUL"
   "*
 {
   return TARGET_SPARCLET ? \"smuld %1,%2,%L0\" : \"smul %1,%2,%L0\;rd %%y,%H0\";
@@ -3856,7 +4036,7 @@
 	 (lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" ""))
 			       (sign_extend:DI (match_operand:SI 2 "arith_operand" "")))
 		      (const_int 32))))]
-  "TARGET_V8 || TARGET_SPARCLITE || TARGET_SPARCLET || TARGET_DEPRECATED_V8_INSNS"
+  "TARGET_HARD_MUL"
   "
 {
   if (CONSTANT_P (operands[2]))
@@ -3872,7 +4052,7 @@
 	 (lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
 			       (sign_extend:DI (match_operand:SI 2 "register_operand" "r")))
 		      (const_int 32))))]
-  "TARGET_V8 || TARGET_SPARCLITE || TARGET_SPARCLET || TARGET_DEPRECATED_V8_INSNS"
+  "TARGET_HARD_MUL"
   "smul %1,%2,%%g0\;rd %%y,%0"
   [(set_attr "length" "2")])
 
@@ -3890,7 +4070,7 @@
   [(set (match_operand:DI 0 "register_operand" "")
 	(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" ""))
 		 (zero_extend:DI (match_operand:SI 2 "uns_arith_operand" ""))))]
-  "TARGET_V8 || TARGET_SPARCLITE || TARGET_SPARCLET || TARGET_DEPRECATED_V8_INSNS"
+  "TARGET_HARD_MUL"
   "
 {
   if (CONSTANT_P (operands[2]))
@@ -3904,7 +4084,7 @@
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
 		 (zero_extend:DI (match_operand:SI 2 "register_operand" "r"))))]
-  "TARGET_V8 || TARGET_SPARCLITE || TARGET_SPARCLET || TARGET_DEPRECATED_V8_INSNS"
+  "TARGET_HARD_MUL"
   "*
 {
   return TARGET_SPARCLET ? \"umuld %1,%2,%L0\" : \"umul %1,%2,%L0\;rd %%y,%H0\";
@@ -3919,7 +4099,7 @@
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
 		 (match_operand:SI 2 "uns_small_int" "")))]
-  "TARGET_V8 || TARGET_SPARCLITE || TARGET_SPARCLET || TARGET_DEPRECATED_V8_INSNS"
+  "TARGET_HARD_MUL"
   "*
 {
   return TARGET_SPARCLET ? \"umuld %1,%2,%L0\" : \"umul %1,%2,%L0\;rd %%y,%H0\";
@@ -3934,7 +4114,7 @@
 	 (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" ""))
 			       (zero_extend:DI (match_operand:SI 2 "uns_arith_operand" "")))
 		      (const_int 32))))]
-  "TARGET_V8 || TARGET_SPARCLITE || TARGET_SPARCLET || TARGET_DEPRECATED_V8_INSNS"
+  "TARGET_HARD_MUL"
   "
 {
   if (CONSTANT_P (operands[2]))
@@ -3950,7 +4130,7 @@
 	 (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
 			       (zero_extend:DI (match_operand:SI 2 "register_operand" "r")))
 		      (const_int 32))))]
-  "TARGET_V8 || TARGET_SPARCLITE || TARGET_SPARCLET || TARGET_DEPRECATED_V8_INSNS"
+  "TARGET_HARD_MUL"
   "umul %1,%2,%%g0\;rd %%y,%0"
   [(set_attr "length" "2")])
 
@@ -3960,7 +4140,7 @@
 	 (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
 			       (match_operand:SI 2 "uns_small_int" ""))
 		      (const_int 32))))]
-  "TARGET_V8 || TARGET_SPARCLITE || TARGET_SPARCLET || TARGET_DEPRECATED_V8_INSNS"
+  "TARGET_HARD_MUL"
   "umul %1,%2,%%g0\;rd %%y,%0"
   [(set_attr "length" "2")])
 
@@ -4102,14 +4282,17 @@
   "")
 
 (define_insn "*anddi3_sp32"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(and:DI (match_operand:DI 1 "arith_double_operand" "%r")
-		(match_operand:DI 2 "arith_double_operand" "rHI")))]
+  [(set (match_operand:DI 0 "register_operand" "=r,b")
+	(and:DI (match_operand:DI 1 "arith_double_operand" "%r,b")
+		(match_operand:DI 2 "arith_double_operand" "rHI,b")))]
   "! TARGET_ARCH64"
   "*
 {
   rtx op2 = operands[2];
 
+  if (which_alternative == 1)
+    return \"fand %1,%2,%0\";
+
   if (GET_CODE (op2) == CONST_INT
       || GET_CODE (op2) == CONST_DOUBLE)
     {
@@ -4125,7 +4308,7 @@
     }
   return \"and %1,%2,%0\;and %R1,%R2,%R0\";
 }"
-  [(set_attr "length" "2")])
+  [(set_attr "length" "2,1")])
 
 (define_insn "*anddi3_sp64"
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -4135,12 +4318,14 @@
   "and %1,%2,%0")
 
 (define_insn "andsi3"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(and:SI (match_operand:SI 1 "arith_operand" "%r")
-		(match_operand:SI 2 "arith_operand" "rI")))]
+  [(set (match_operand:SI 0 "register_operand" "=r,d")
+	(and:SI (match_operand:SI 1 "arith_operand" "%r,d")
+		(match_operand:SI 2 "arith_operand" "rI,d")))]
   ""
-  "and %1,%2,%0"
-  [(set_attr "type" "ialu")])
+  "@
+   and %1,%2,%0
+   fands %1,%2,%0"
+  [(set_attr "type" "ialu,fp")])
 
 (define_split
   [(set (match_operand:SI 0 "register_operand" "")
@@ -4158,12 +4343,14 @@
 }")
 
 (define_insn "*and_not_di_sp32"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(and:DI (not:DI (match_operand:DI 1 "register_operand" "r"))
-		(match_operand:DI 2 "register_operand" "r")))]
+  [(set (match_operand:DI 0 "register_operand" "=r,b")
+	(and:DI (not:DI (match_operand:DI 1 "register_operand" "r,b"))
+		(match_operand:DI 2 "register_operand" "r,b")))]
   "! TARGET_ARCH64"
-  "andn %2,%1,%0\;andn %R2,%R1,%R0"
-  [(set_attr "length" "2")])
+  "@
+   andn %2,%1,%0\;andn %R2,%R1,%R0
+   fandnot1 %1,%2,%0"
+  [(set_attr "length" "2,1")])
 
 (define_insn "*and_not_di_sp64"
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -4173,12 +4360,14 @@
   "andn %2,%1,%0")
 
 (define_insn "*and_not_si"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(and:SI (not:SI (match_operand:SI 1 "register_operand" "r"))
-		(match_operand:SI 2 "register_operand" "r")))]
+  [(set (match_operand:SI 0 "register_operand" "=r,d")
+	(and:SI (not:SI (match_operand:SI 1 "register_operand" "r,d"))
+		(match_operand:SI 2 "register_operand" "r,d")))]
   ""
-  "andn %2,%1,%0"
-  [(set_attr "type" "ialu")])
+  "@
+   andn %2,%1,%0
+   fandnot1s %1,%2,%0"
+  [(set_attr "type" "ialu,fp")])
 
 (define_expand "iordi3"
   [(set (match_operand:DI 0 "register_operand" "")
@@ -4188,14 +4377,17 @@
   "")
 
 (define_insn "*iordi3_sp32"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(ior:DI (match_operand:DI 1 "arith_double_operand" "%r")
-		(match_operand:DI 2 "arith_double_operand" "rHI")))]
+  [(set (match_operand:DI 0 "register_operand" "=r,b")
+	(ior:DI (match_operand:DI 1 "arith_double_operand" "%r,b")
+		(match_operand:DI 2 "arith_double_operand" "rHI,b")))]
   "! TARGET_ARCH64"
   "*
 {
   rtx op2 = operands[2];
 
+  if (which_alternative == 1)
+    return \"for %1,%2,%0\";
+
   if (GET_CODE (op2) == CONST_INT
       || GET_CODE (op2) == CONST_DOUBLE)
     {
@@ -4211,7 +4403,7 @@
     }
   return \"or %1,%2,%0\;or %R1,%R2,%R0\";
 }"
-  [(set_attr "length" "2")])
+  [(set_attr "length" "2,1")])
 
 (define_insn "*iordi3_sp64"
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -4221,12 +4413,14 @@
   "or %1,%2,%0")
 
 (define_insn "iorsi3"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(ior:SI (match_operand:SI 1 "arith_operand" "%r")
-		(match_operand:SI 2 "arith_operand" "rI")))]
+  [(set (match_operand:SI 0 "register_operand" "=r,d")
+	(ior:SI (match_operand:SI 1 "arith_operand" "%r,d")
+		(match_operand:SI 2 "arith_operand" "rI,d")))]
   ""
-  "or %1,%2,%0"
-  [(set_attr "type" "ialu")])
+  "@
+   or %1,%2,%0
+   fors %1,%2,%0"
+  [(set_attr "type" "ialu,fp")])
 
 (define_split
   [(set (match_operand:SI 0 "register_operand" "")
@@ -4244,12 +4438,14 @@
 }")
 
 (define_insn "*or_not_di_sp32"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(ior:DI (not:DI (match_operand:DI 1 "register_operand" "r"))
-		(match_operand:DI 2 "register_operand" "r")))]
+  [(set (match_operand:DI 0 "register_operand" "=r,b")
+	(ior:DI (not:DI (match_operand:DI 1 "register_operand" "r,b"))
+		(match_operand:DI 2 "register_operand" "r,b")))]
   "! TARGET_ARCH64"
-  "orn %2,%1,%0\;orn %R2,%R1,%R0"
-  [(set_attr "length" "2")])
+  "@
+   orn %2,%1,%0\;orn %R2,%R1,%R0
+   fornot1 %1,%2,%0"
+  [(set_attr "length" "2,1")])
 
 (define_insn "*or_not_di_sp64"
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -4259,12 +4455,14 @@
   "orn %2,%1,%0")
 
 (define_insn "*or_not_si"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(ior:SI (not:SI (match_operand:SI 1 "register_operand" "r"))
-		(match_operand:SI 2 "register_operand" "r")))]
+  [(set (match_operand:SI 0 "register_operand" "=r,d")
+	(ior:SI (not:SI (match_operand:SI 1 "register_operand" "r,d"))
+		(match_operand:SI 2 "register_operand" "r,d")))]
   ""
-  "orn %2,%1,%0"
-  [(set_attr "type" "ialu")])
+  "@
+   orn %2,%1,%0
+   fornot1s %1,%2,%0"
+  [(set_attr "type" "ialu,fp")])
 
 (define_expand "xordi3"
   [(set (match_operand:DI 0 "register_operand" "")
@@ -4274,14 +4472,17 @@
   "")
 
 (define_insn "*xorsi3_sp32"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(xor:DI (match_operand:DI 1 "arith_double_operand" "%r")
-		(match_operand:DI 2 "arith_double_operand" "rHI")))]
+  [(set (match_operand:DI 0 "register_operand" "=r,b")
+	(xor:DI (match_operand:DI 1 "arith_double_operand" "%r,b")
+		(match_operand:DI 2 "arith_double_operand" "rHI,b")))]
   "! TARGET_ARCH64"
   "*
 {
   rtx op2 = operands[2];
 
+  if (which_alternative == 1)
+    return \"fxor %1,%2,%0\";
+
   if (GET_CODE (op2) == CONST_INT
       || GET_CODE (op2) == CONST_DOUBLE)
     {
@@ -4297,7 +4498,7 @@
     }
   return \"xor %1,%2,%0\;xor %R1,%R2,%R0\";
 }"
-  [(set_attr "length" "2")])
+  [(set_attr "length" "2,1")])
 
 (define_insn "*xordi3_sp64"
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -4307,12 +4508,14 @@
   "xor %r1,%2,%0")
 
 (define_insn "xorsi3"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(xor:SI (match_operand:SI 1 "arith_operand" "%rJ")
-		(match_operand:SI 2 "arith_operand" "rI")))]
+  [(set (match_operand:SI 0 "register_operand" "=r,d")
+	(xor:SI (match_operand:SI 1 "arith_operand" "%rJ,d")
+		(match_operand:SI 2 "arith_operand" "rI,d")))]
   ""
-  "xor %r1,%2,%0"
-  [(set_attr "type" "ialu")])
+  "@
+   xor %r1,%2,%0
+   fxors %1,%2,%0"
+  [(set_attr "type" "ialu,fp")])
 
 (define_split
   [(set (match_operand:SI 0 "register_operand" "")
@@ -4347,27 +4550,33 @@
 ;; xnor patterns.  Note that (a ^ ~b) == (~a ^ b) == ~(a ^ b).
 ;; Combine now canonicalizes to the rightmost expression.
 (define_insn "*xor_not_di_sp32"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(not:DI (xor:DI (match_operand:DI 1 "register_operand" "r")
-			(match_operand:DI 2 "register_operand" "r"))))]
+  [(set (match_operand:DI 0 "register_operand" "=r,b")
+	(not:DI (xor:DI (match_operand:DI 1 "register_operand" "r,b")
+			(match_operand:DI 2 "register_operand" "r,b"))))]
   "! TARGET_ARCH64"
-  "xnor %1,%2,%0\;xnor %R1,%R2,%R0"
-  [(set_attr "length" "2")])
+  "@
+   xnor %1,%2,%0\;xnor %R1,%R2,%R0
+   fxnor %1,%2,%0"
+  [(set_attr "length" "2,1")
+   (set_attr "type" "ialu,fp")])
 
 (define_insn "*xor_not_di_sp64"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(not:DI (xor:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
 			(match_operand:DI 2 "arith_double_operand" "rHI"))))]
   "TARGET_ARCH64"
-  "xnor %r1,%2,%0")
+  "xnor %r1,%2,%0"
+  [(set_attr "type" "ialu")])
 
 (define_insn "*xor_not_si"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(not:SI (xor:SI (match_operand:SI 1 "reg_or_0_operand" "rJ")
-			(match_operand:SI 2 "arith_operand" "rI"))))]
+  [(set (match_operand:SI 0 "register_operand" "=r,d")
+	(not:SI (xor:SI (match_operand:SI 1 "reg_or_0_operand" "rJ,d")
+			(match_operand:SI 2 "arith_operand" "rI,d"))))]
   ""
-  "xnor %r1,%2,%0"
-  [(set_attr "type" "ialu")])
+  "@
+   xnor %r1,%2,%0
+   fxnors %1,%2,%0"
+  [(set_attr "type" "ialu,fp")])
 
 ;; These correspond to the above in the case where we also (or only)
 ;; want to set the condition code.  
@@ -4608,12 +4817,14 @@
   "")
 
 (define_insn "*one_cmpldi2_sp32"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(not:DI (match_operand:DI 1 "register_operand" "r")))]
+  [(set (match_operand:DI 0 "register_operand" "=r,b")
+	(not:DI (match_operand:DI 1 "register_operand" "r,b")))]
   "! TARGET_ARCH64"
-  "xnor %1,0,%0\;xnor %R1,0,%R0"
-  [(set_attr "type" "unary")
-   (set_attr "length" "2")])
+  "@
+   xnor %1,0,%0\;xnor %R1,0,%R0
+   fnot1 %1,%0"
+  [(set_attr "type" "unary,fp")
+   (set_attr "length" "2,1")])
 
 (define_insn "*one_cmpldi2_sp64"
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -4623,21 +4834,24 @@
   [(set_attr "type" "unary")])
 
 (define_insn "one_cmplsi2"
-  [(set (match_operand:SI 0 "register_operand" "=r,r")
-	(not:SI (match_operand:SI 1 "arith_operand" "r,I")))]
+  [(set (match_operand:SI 0 "register_operand" "=r,r,d")
+	(not:SI (match_operand:SI 1 "arith_operand" "r,I,d")))]
   ""
   "*
 {
   if (which_alternative == 0)
     return \"xnor %1,0,%0\";
+  if (which_alternative == 1)
+    return \"fnot1s %1,%0\";
   if (TARGET_LIVE_G0)
     output_asm_insn (\"and %%g0,0,%%g0\", operands);
   return \"xnor %%g0,%1,%0\";
 }"
-  [(set_attr "type" "unary")
+  [(set_attr "type" "unary,unary,fp")
    (set_attr_alternative "length"
      [(const_int 1)
-      (if_then_else (eq_attr "live_g0" "yes") (const_int 2) (const_int 1))])])
+      (if_then_else (eq_attr "live_g0" "yes") (const_int 2) (const_int 1))
+      (const_int 1)])])
 
 (define_insn "*cmp_cc_not"
   [(set (reg:CC 100)
@@ -4804,7 +5018,7 @@
    return TARGET_V9 ? \"fnegd %1,%0\;fmovd %S1,%S0\"
      : \"fnegs %1,%0\;fmovs %R1,%R0\;fmovs %S1,%S0\;fmovs %T1,%T0\";
 }"
-  [(set_attr "type" "fp")
+  [(set_attr "type" "fpmove")
    (set_attr_alternative "length"
      [(const_int 1)
       (if_then_else (eq_attr "isa" "v9") (const_int 2) (const_int 4))])])
@@ -4822,7 +5036,7 @@
   else
    return \"fnegs %1,%0\;fmovs %R1,%R0\";
 }"
-  [(set_attr "type" "fp")
+  [(set_attr "type" "fpmove")
    (set_attr_alternative "length"
      [(const_int 1)
       (if_then_else (eq_attr "isa" "v9") (const_int 1) (const_int 2))])])
@@ -4832,7 +5046,7 @@
 	(neg:SF (match_operand:SF 1 "register_operand" "f")))]
   "TARGET_FPU"
   "fnegs %1,%0"
-  [(set_attr "type" "fp")])
+  [(set_attr "type" "fpmove")])
 
 (define_insn "abstf2"
   [(set (match_operand:TF 0 "register_operand" "=e,e")
@@ -4848,7 +5062,7 @@
     return TARGET_V9 ? \"fabsd %1,%0\;fmovd %S1,%S0\"
       : \"fabss %1,%0\;fmovs %R1,%R0\;fmovs %S1,%S0\;fmovs %T1,%T0\";
 }"
-  [(set_attr "type" "fp")
+  [(set_attr "type" "fpmove")
    (set_attr_alternative "length"
      [(const_int 1)
       (if_then_else (eq_attr "isa" "v9") (const_int 2) (const_int 4))])])
@@ -4866,7 +5080,7 @@
   else
     return \"fabss %1,%0\;fmovs %R1,%R0\";
 }"
-  [(set_attr "type" "fp")
+  [(set_attr "type" "fpmove")
    (set_attr_alternative "length"
      [(const_int 1)
       (if_then_else (eq_attr "isa" "v9") (const_int 1) (const_int 2))])])
@@ -4876,7 +5090,7 @@
 	(abs:SF (match_operand:SF 1 "register_operand" "f")))]
   "TARGET_FPU"
   "fabss %1,%0"
-  [(set_attr "type" "fp")])
+  [(set_attr "type" "fpmove")])
 
 (define_insn "sqrttf2"
   [(set (match_operand:TF 0 "register_operand" "=e")
@@ -5791,13 +6005,32 @@
 ;; The conditions in which we do this are very restricted and are 
 ;; explained in the code for {registers,memory}_ok_for_ldd functions.
 
+(define_peephole
+  [(set (match_operand:SI 0 "memory_operand" "")
+      (const_int 0))
+   (set (match_operand:SI 1 "memory_operand" "")
+      (const_int 0))]
+  "TARGET_V9
+   && ! MEM_VOLATILE_P (operands[0]) && ! MEM_VOLATILE_P (operands[1])
+   && addrs_ok_for_ldd_peep (XEXP (operands[0], 0), XEXP (operands[1], 0))"
+  "stx %%g0,%0")
+
+(define_peephole
+  [(set (match_operand:SI 0 "memory_operand" "")
+      (const_int 0))
+   (set (match_operand:SI 1 "memory_operand" "")
+      (const_int 0))]
+  "TARGET_V9
+   && ! MEM_VOLATILE_P (operands[0]) && ! MEM_VOLATILE_P (operands[1])
+   && addrs_ok_for_ldd_peep (XEXP (operands[1], 0), XEXP (operands[0], 0))"
+  "stx %%g0,%1")
+
 (define_peephole
   [(set (match_operand:SI 0 "register_operand" "=rf")
         (match_operand:SI 1 "memory_operand" ""))
    (set (match_operand:SI 2 "register_operand" "=rf")
         (match_operand:SI 3 "memory_operand" ""))]
-  "! TARGET_ARCH64
-   && registers_ok_for_ldd_peep (operands[0], operands[2]) 
+   "registers_ok_for_ldd_peep (operands[0], operands[2]) 
    && ! MEM_VOLATILE_P (operands[1]) && ! MEM_VOLATILE_P (operands[3])
    && addrs_ok_for_ldd_peep (XEXP (operands[1], 0), XEXP (operands[3], 0))" 
   "ldd %1,%0")
@@ -5807,8 +6040,7 @@
         (match_operand:SI 1 "register_operand" "rf"))
    (set (match_operand:SI 2 "memory_operand" "")
         (match_operand:SI 3 "register_operand" "rf"))]
-  "! TARGET_ARCH64
-   && registers_ok_for_ldd_peep (operands[1], operands[3]) 
+   "registers_ok_for_ldd_peep (operands[1], operands[3]) 
    && ! MEM_VOLATILE_P (operands[0]) && ! MEM_VOLATILE_P (operands[2])
    && addrs_ok_for_ldd_peep (XEXP (operands[0], 0), XEXP (operands[2], 0))"
   "std %1,%0")
@@ -5818,8 +6050,7 @@
         (match_operand:SF 1 "memory_operand" ""))
    (set (match_operand:SF 2 "register_operand" "=fr")
         (match_operand:SF 3 "memory_operand" ""))]
-  "! TARGET_ARCH64
-   && registers_ok_for_ldd_peep (operands[0], operands[2]) 
+   "registers_ok_for_ldd_peep (operands[0], operands[2]) 
    && ! MEM_VOLATILE_P (operands[1]) && ! MEM_VOLATILE_P (operands[3])
    && addrs_ok_for_ldd_peep (XEXP (operands[1], 0), XEXP (operands[3], 0))"
   "ldd %1,%0")
@@ -5829,8 +6060,7 @@
         (match_operand:SF 1 "register_operand" "fr"))
    (set (match_operand:SF 2 "memory_operand" "")
         (match_operand:SF 3 "register_operand" "fr"))]
-  "! TARGET_ARCH64
-   && registers_ok_for_ldd_peep (operands[1], operands[3]) 
+   "registers_ok_for_ldd_peep (operands[1], operands[3]) 
    && ! MEM_VOLATILE_P (operands[0]) && ! MEM_VOLATILE_P (operands[2])
    && addrs_ok_for_ldd_peep (XEXP (operands[0], 0), XEXP (operands[2], 0))"
   "std %1,%0")
@@ -5840,8 +6070,7 @@
         (match_operand:SI 1 "memory_operand" ""))
    (set (match_operand:SI 2 "register_operand" "=rf")
         (match_operand:SI 3 "memory_operand" ""))]
-  "! TARGET_ARCH64
-   && registers_ok_for_ldd_peep (operands[2], operands[0]) 
+   "registers_ok_for_ldd_peep (operands[2], operands[0]) 
    && ! MEM_VOLATILE_P (operands[3]) && ! MEM_VOLATILE_P (operands[1])
    && addrs_ok_for_ldd_peep (XEXP (operands[3], 0), XEXP (operands[1], 0))"
   "ldd %3,%2")
@@ -5851,8 +6080,7 @@
         (match_operand:SI 1 "register_operand" "rf"))
    (set (match_operand:SI 2 "memory_operand" "")
         (match_operand:SI 3 "register_operand" "rf"))]
-  "! TARGET_ARCH64
-   && registers_ok_for_ldd_peep (operands[3], operands[1]) 
+   "registers_ok_for_ldd_peep (operands[3], operands[1]) 
    && ! MEM_VOLATILE_P (operands[2]) && ! MEM_VOLATILE_P (operands[0])
    && addrs_ok_for_ldd_peep (XEXP (operands[2], 0), XEXP (operands[0], 0))" 
   "std %3,%2")
@@ -5862,8 +6090,7 @@
         (match_operand:SF 1 "memory_operand" ""))
    (set (match_operand:SF 2 "register_operand" "=fr")
         (match_operand:SF 3 "memory_operand" ""))]
-  "! TARGET_ARCH64
-   && registers_ok_for_ldd_peep (operands[2], operands[0]) 
+   "registers_ok_for_ldd_peep (operands[2], operands[0]) 
    && ! MEM_VOLATILE_P (operands[3]) && ! MEM_VOLATILE_P (operands[1])
    && addrs_ok_for_ldd_peep (XEXP (operands[3], 0), XEXP (operands[1], 0))"
   "ldd %3,%2")
@@ -5873,8 +6100,7 @@
         (match_operand:SF 1 "register_operand" "fr"))
    (set (match_operand:SF 2 "memory_operand" "")
         (match_operand:SF 3 "register_operand" "fr"))]
-  "! TARGET_ARCH64
-   && registers_ok_for_ldd_peep (operands[3], operands[1]) 
+   "registers_ok_for_ldd_peep (operands[3], operands[1]) 
    && ! MEM_VOLATILE_P (operands[2]) && ! MEM_VOLATILE_P (operands[0])
    && addrs_ok_for_ldd_peep (XEXP (operands[2], 0), XEXP (operands[0], 0))"
   "std %3,%2")
@@ -6072,24 +6298,13 @@
 
 (define_insn "*return_adddi"
   [(set (match_operand:DI 0 "restore_operand" "")
-	(plus:DI (match_operand:DI 1 "arith_double_operand" "%r")
+	(plus:DI (match_operand:DI 1 "arith_operand" "%r")
 		 (match_operand:DI 2 "arith_double_operand" "rHI")))
    (return)]
-  "TARGET_ARCH64 && ! TARGET_EPILOGUE
-   && (register_operand (operands[1], DImode)
-       || register_operand (operands[2], DImode))"
+  "TARGET_ARCH64 && ! TARGET_EPILOGUE"
   "ret\;restore %r1,%2,%Y0"
   [(set_attr "type" "multi")])
 
-(define_insn "*return_subsi"
-  [(set (match_operand:SI 0 "restore_operand" "")
-	(minus:SI (match_operand:SI 1 "register_operand" "r")
-		  (match_operand:SI 2 "small_int" "I")))
-   (return)]
-  "! TARGET_EPILOGUE && INTVAL (operands[2]) != -4096"
-  "ret\;restore %1,%n2,%Y0"
-  [(set_attr "type" "multi")])
-
 ;; The following pattern is only generated by delayed-branch scheduling,
 ;; when the insn winds up in the epilogue.
 (define_insn "*return_sf"
@@ -6117,10 +6332,7 @@
 	      (clobber (reg:SI 15))])
    (set (pc) (label_ref (match_operand 2 "" "")))]
   "short_branch (INSN_UID (insn), INSN_UID (operands[2]))"
-  "*
-{
-  return \"call %a0,%1\;add %%o7,(%l2-.-4),%%o7\";
-}")
+  "call %a0,%1\;add %%o7,(%l2-.-4),%%o7")
 
 (define_peephole
   [(parallel [(set (match_operand 0 "" "")
-- 
2.30.2