X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=gcc%2Fsimplify-rtx.c;h=9bb31e76ed9812632f828037b5ca69c8a88b9339;hb=6c84d5762f1e1a60b1d4cee0caf8b4e7e0ba5f6f;hp=6b0d56ed3ea480c2c8417b87adccb588426ddce3;hpb=0a81f074cf6c50c5eb026dfc95ec0d42e95f3da0;p=gcc.git

diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index 6b0d56ed3ea..9bb31e76ed9 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -1,7 +1,5 @@
 /* RTL simplification functions for GNU compiler.
-   Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
-   1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
-   2011  Free Software Foundation, Inc.
+   Copyright (C) 1987-2013 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -35,7 +33,6 @@ along with GCC; see the file COPYING3.  If not see
 #include "function.h"
 #include "expr.h"
 #include "diagnostic-core.h"
-#include "output.h"
 #include "ggc.h"
 #include "target.h"
 
@@ -67,7 +64,7 @@ static rtx simplify_binary_operation_1 (enum rtx_code, enum machine_mode,
 static rtx
 neg_const_int (enum machine_mode mode, const_rtx i)
 {
-  return gen_int_mode (- INTVAL (i), mode);
+  return gen_int_mode (-(unsigned HOST_WIDE_INT) INTVAL (i), mode);
 }
 
 /* Test whether expression, X, is an immediate constant that represents
@@ -89,8 +86,8 @@ mode_signbit_p (enum machine_mode mode, const_rtx x)
   if (width <= HOST_BITS_PER_WIDE_INT
       && CONST_INT_P (x))
     val = INTVAL (x);
-  else if (width <= 2 * HOST_BITS_PER_WIDE_INT
-	   && GET_CODE (x) == CONST_DOUBLE
+  else if (width <= HOST_BITS_PER_DOUBLE_INT
+	   && CONST_DOUBLE_AS_INT_P (x)
 	   && CONST_DOUBLE_LOW (x) == 0)
     {
       val = CONST_DOUBLE_HIGH (x);
@@ -201,7 +198,7 @@ avoid_constant_pool_reference (rtx x)
       /* Handle float extensions of constant pool references.  */
       tmp = XEXP (x, 0);
       c = avoid_constant_pool_reference (tmp);
-      if (c != tmp && GET_CODE (c) == CONST_DOUBLE)
+      if (c != tmp && CONST_DOUBLE_AS_FLOAT_P (c))
 	{
 	  REAL_VALUE_TYPE d;
 
@@ -245,7 +242,8 @@ avoid_constant_pool_reference (rtx x)
       /* If we're accessing the constant in a different mode than it was
          originally stored, attempt to fix that up via subreg simplifications.
          If that fails we have no choice but to return the original memory.  */
-      if (offset != 0 || cmode != GET_MODE (x))
+      if ((offset != 0 || cmode != GET_MODE (x))
+	  && offset >= 0 && offset < GET_MODE_SIZE (cmode))
         {
           rtx tem = simplify_subreg (GET_MODE (x), c, cmode, offset);
           if (tem && CONSTANT_P (tem))
@@ -565,6 +563,220 @@ simplify_replace_rtx (rtx x, const_rtx old_rtx, rtx new_rtx)
   return simplify_replace_fn_rtx (x, old_rtx, 0, new_rtx);
 }
 
+/* Try to simplify a MODE truncation of OP, which has OP_MODE.
+   Only handle cases where the truncated value is inherently an rvalue.
+
+   RTL provides two ways of truncating a value:
+
+   1. a lowpart subreg.  This form is only a truncation when both
+      the outer and inner modes (here MODE and OP_MODE respectively)
+      are scalar integers, and only then when the subreg is used as
+      an rvalue.
+
+      It is only valid to form such truncating subregs if the
+      truncation requires no action by the target.  The onus for
+      proving this is on the creator of the subreg -- e.g. the
+      caller to simplify_subreg or simplify_gen_subreg -- and typically
+      involves either TRULY_NOOP_TRUNCATION_MODES_P or truncated_to_mode.
+
+   2. a TRUNCATE.  This form handles both scalar and compound integers.
+
+   The first form is preferred where valid.  However, the TRUNCATE
+   handling in simplify_unary_operation turns the second form into the
+   first form when TRULY_NOOP_TRUNCATION_MODES_P or truncated_to_mode allow,
+   so it is generally safe to form rvalue truncations using:
+
+      simplify_gen_unary (TRUNCATE, ...)
+
+   and leave simplify_unary_operation to work out which representation
+   should be used.
+
+   Because of the proof requirements on (1), simplify_truncation must
+   also use simplify_gen_unary (TRUNCATE, ...) to truncate parts of OP,
+   regardless of whether the outer truncation came from a SUBREG or a
+   TRUNCATE.  For example, if the caller has proven that an SImode
+   truncation of:
+
+      (and:DI X Y)
+
+   is a no-op and can be represented as a subreg, it does not follow
+   that SImode truncations of X and Y are also no-ops.  On a target
+   like 64-bit MIPS that requires SImode values to be stored in
+   sign-extended form, an SImode truncation of:
+
+      (and:DI (reg:DI X) (const_int 63))
+
+   is trivially a no-op because only the lower 6 bits can be set.
+   However, X is still an arbitrary 64-bit number and so we cannot
+   assume that truncating it too is a no-op.  */
+
+static rtx
+simplify_truncation (enum machine_mode mode, rtx op,
+		     enum machine_mode op_mode)
+{
+  unsigned int precision = GET_MODE_UNIT_PRECISION (mode);
+  unsigned int op_precision = GET_MODE_UNIT_PRECISION (op_mode);
+  gcc_assert (precision <= op_precision);
+
+  /* Optimize truncations of zero and sign extended values.  */
+  if (GET_CODE (op) == ZERO_EXTEND
+      || GET_CODE (op) == SIGN_EXTEND)
+    {
+      /* There are three possibilities.  If MODE is the same as the
+	 origmode, we can omit both the extension and the subreg.
+	 If MODE is not larger than the origmode, we can apply the
+	 truncation without the extension.  Finally, if the outermode
+	 is larger than the origmode, we can just extend to the appropriate
+	 mode.  */
+      enum machine_mode origmode = GET_MODE (XEXP (op, 0));
+      if (mode == origmode)
+	return XEXP (op, 0);
+      else if (precision <= GET_MODE_UNIT_PRECISION (origmode))
+	return simplify_gen_unary (TRUNCATE, mode,
+				   XEXP (op, 0), origmode);
+      else
+	return simplify_gen_unary (GET_CODE (op), mode,
+				   XEXP (op, 0), origmode);
+    }
+
+  /* Simplify (truncate:SI (op:DI (x:DI) (y:DI)))
+     to (op:SI (truncate:SI (x:DI)) (truncate:SI (x:DI))).  */
+  if (GET_CODE (op) == PLUS
+      || GET_CODE (op) == MINUS
+      || GET_CODE (op) == MULT)
+    {
+      rtx op0 = simplify_gen_unary (TRUNCATE, mode, XEXP (op, 0), op_mode);
+      if (op0)
+	{
+	  rtx op1 = simplify_gen_unary (TRUNCATE, mode, XEXP (op, 1), op_mode);
+	  if (op1)
+	    return simplify_gen_binary (GET_CODE (op), mode, op0, op1);
+	}
+    }
+
+  /* Simplify (truncate:QI (lshiftrt:SI (sign_extend:SI (x:QI)) C)) into
+     to (ashiftrt:QI (x:QI) C), where C is a suitable small constant and
+     the outer subreg is effectively a truncation to the original mode.  */
+  if ((GET_CODE (op) == LSHIFTRT
+       || GET_CODE (op) == ASHIFTRT)
+      /* Ensure that OP_MODE is at least twice as wide as MODE
+	 to avoid the possibility that an outer LSHIFTRT shifts by more
+	 than the sign extension's sign_bit_copies and introduces zeros
+	 into the high bits of the result.  */
+      && 2 * precision <= op_precision
+      && CONST_INT_P (XEXP (op, 1))
+      && GET_CODE (XEXP (op, 0)) == SIGN_EXTEND
+      && GET_MODE (XEXP (XEXP (op, 0), 0)) == mode
+      && UINTVAL (XEXP (op, 1)) < precision)
+    return simplify_gen_binary (ASHIFTRT, mode,
+				XEXP (XEXP (op, 0), 0), XEXP (op, 1));
+
+  /* Likewise (truncate:QI (lshiftrt:SI (zero_extend:SI (x:QI)) C)) into
+     to (lshiftrt:QI (x:QI) C), where C is a suitable small constant and
+     the outer subreg is effectively a truncation to the original mode.  */
+  if ((GET_CODE (op) == LSHIFTRT
+       || GET_CODE (op) == ASHIFTRT)
+      && CONST_INT_P (XEXP (op, 1))
+      && GET_CODE (XEXP (op, 0)) == ZERO_EXTEND
+      && GET_MODE (XEXP (XEXP (op, 0), 0)) == mode
+      && UINTVAL (XEXP (op, 1)) < precision)
+    return simplify_gen_binary (LSHIFTRT, mode,
+				XEXP (XEXP (op, 0), 0), XEXP (op, 1));
+
+  /* Likewise (truncate:QI (ashift:SI (zero_extend:SI (x:QI)) C)) into
+     to (ashift:QI (x:QI) C), where C is a suitable small constant and
+     the outer subreg is effectively a truncation to the original mode.  */
+  if (GET_CODE (op) == ASHIFT
+      && CONST_INT_P (XEXP (op, 1))
+      && (GET_CODE (XEXP (op, 0)) == ZERO_EXTEND
+	  || GET_CODE (XEXP (op, 0)) == SIGN_EXTEND)
+      && GET_MODE (XEXP (XEXP (op, 0), 0)) == mode
+      && UINTVAL (XEXP (op, 1)) < precision)
+    return simplify_gen_binary (ASHIFT, mode,
+				XEXP (XEXP (op, 0), 0), XEXP (op, 1));
+
+  /* Recognize a word extraction from a multi-word subreg.  */
+  if ((GET_CODE (op) == LSHIFTRT
+       || GET_CODE (op) == ASHIFTRT)
+      && SCALAR_INT_MODE_P (mode)
+      && SCALAR_INT_MODE_P (op_mode)
+      && precision >= BITS_PER_WORD
+      && 2 * precision <= op_precision
+      && CONST_INT_P (XEXP (op, 1))
+      && (INTVAL (XEXP (op, 1)) & (precision - 1)) == 0
+      && UINTVAL (XEXP (op, 1)) < op_precision)
+    {
+      int byte = subreg_lowpart_offset (mode, op_mode);
+      int shifted_bytes = INTVAL (XEXP (op, 1)) / BITS_PER_UNIT;
+      return simplify_gen_subreg (mode, XEXP (op, 0), op_mode,
+				  (WORDS_BIG_ENDIAN
+				   ? byte - shifted_bytes
+				   : byte + shifted_bytes));
+    }
+
+  /* If we have a TRUNCATE of a right shift of MEM, make a new MEM
+     and try replacing the TRUNCATE and shift with it.  Don't do this
+     if the MEM has a mode-dependent address.  */
+  if ((GET_CODE (op) == LSHIFTRT
+       || GET_CODE (op) == ASHIFTRT)
+      && SCALAR_INT_MODE_P (op_mode)
+      && MEM_P (XEXP (op, 0))
+      && CONST_INT_P (XEXP (op, 1))
+      && (INTVAL (XEXP (op, 1)) % GET_MODE_BITSIZE (mode)) == 0
+      && INTVAL (XEXP (op, 1)) > 0
+      && INTVAL (XEXP (op, 1)) < GET_MODE_BITSIZE (op_mode)
+      && ! mode_dependent_address_p (XEXP (XEXP (op, 0), 0),
+				     MEM_ADDR_SPACE (XEXP (op, 0)))
+      && ! MEM_VOLATILE_P (XEXP (op, 0))
+      && (GET_MODE_SIZE (mode) >= UNITS_PER_WORD
+	  || WORDS_BIG_ENDIAN == BYTES_BIG_ENDIAN))
+    {
+      int byte = subreg_lowpart_offset (mode, op_mode);
+      int shifted_bytes = INTVAL (XEXP (op, 1)) / BITS_PER_UNIT;
+      return adjust_address_nv (XEXP (op, 0), mode,
+				(WORDS_BIG_ENDIAN
+				 ? byte - shifted_bytes
+				 : byte + shifted_bytes));
+    }
+
+  /* (truncate:SI (OP:DI ({sign,zero}_extend:DI foo:SI))) is
+     (OP:SI foo:SI) if OP is NEG or ABS.  */
+  if ((GET_CODE (op) == ABS
+       || GET_CODE (op) == NEG)
+      && (GET_CODE (XEXP (op, 0)) == SIGN_EXTEND
+	  || GET_CODE (XEXP (op, 0)) == ZERO_EXTEND)
+      && GET_MODE (XEXP (XEXP (op, 0), 0)) == mode)
+    return simplify_gen_unary (GET_CODE (op), mode,
+			       XEXP (XEXP (op, 0), 0), mode);
+
+  /* (truncate:A (subreg:B (truncate:C X) 0)) is
+     (truncate:A X).  */
+  if (GET_CODE (op) == SUBREG
+      && SCALAR_INT_MODE_P (mode)
+      && SCALAR_INT_MODE_P (op_mode)
+      && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op)))
+      && GET_CODE (SUBREG_REG (op)) == TRUNCATE
+      && subreg_lowpart_p (op))
+    {
+      rtx inner = XEXP (SUBREG_REG (op), 0);
+      if (GET_MODE_PRECISION (mode)
+	  <= GET_MODE_PRECISION (GET_MODE (SUBREG_REG (op))))
+	return simplify_gen_unary (TRUNCATE, mode, inner, GET_MODE (inner));
+      else
+	/* If subreg above is paradoxical and C is narrower
+	   than A, return (subreg:A (truncate:C X) 0).  */
+	return simplify_gen_subreg (mode, SUBREG_REG (op),
+				    GET_MODE (SUBREG_REG (op)), 0);
+    }
+
+  /* (truncate:A (truncate:B X)) is (truncate:A X).  */
+  if (GET_CODE (op) == TRUNCATE)
+    return simplify_gen_unary (TRUNCATE, mode, XEXP (op, 0),
+			       GET_MODE (XEXP (op, 0)));
+
+  return NULL_RTX;
+}
+
 /* Try to simplify a unary operation CODE whose output mode is to be
    MODE with input operand OP whose mode was originally OP_MODE.
    Return zero if no simplification can be made.  */
@@ -646,7 +858,6 @@ simplify_unary_operation_1 (enum rtx_code code, enum machine_mode mode, rtx op)
       /* (not (ashiftrt foo C)) where C is the number of bits in FOO
 	 minus 1 is (ge foo (const_int 0)) if STORE_FLAG_VALUE is -1,
 	 so we can perform the above simplification.  */
-
       if (STORE_FLAG_VALUE == -1
 	  && GET_CODE (op) == ASHIFTRT
 	  && GET_CODE (XEXP (op, 1))
@@ -669,14 +880,15 @@ simplify_unary_operation_1 (enum rtx_code code, enum machine_mode mode, rtx op)
 			      simplify_gen_unary (NOT, inner_mode, const1_rtx,
 						  inner_mode),
 			      XEXP (SUBREG_REG (op), 1));
-	  return rtl_hooks.gen_lowpart_no_emit (mode, x);
+	  temp = rtl_hooks.gen_lowpart_no_emit (mode, x);
+	  if (temp)
+	    return temp;
 	}
 
       /* Apply De Morgan's laws to reduce number of patterns for machines
 	 with negating logical insns (and-not, nand, etc.).  If result has
 	 only one NOT, put it first, since that is how the patterns are
 	 coded.  */
-
       if (GET_CODE (op) == IOR || GET_CODE (op) == AND)
 	{
 	  rtx in1 = XEXP (op, 0), in2 = XEXP (op, 1);
@@ -699,6 +911,13 @@ simplify_unary_operation_1 (enum rtx_code code, enum machine_mode mode, rtx op)
 	  return gen_rtx_fmt_ee (GET_CODE (op) == IOR ? AND : IOR,
 				 mode, in1, in2);
 	}
+
+      /* (not (bswap x)) -> (bswap (not x)).  */
+      if (GET_CODE (op) == BSWAP)
+	{
+	  rtx x = simplify_gen_unary (NOT, mode, XEXP (op, 0), mode);
+	  return simplify_gen_unary (BSWAP, mode, x, mode);
+	}
       break;
 
     case NEG:
@@ -730,8 +949,8 @@ simplify_unary_operation_1 (enum rtx_code code, enum machine_mode mode, rtx op)
 	  && !HONOR_SIGN_DEPENDENT_ROUNDING (mode))
 	{
 	  /* (neg (plus A C)) is simplified to (minus -C A).  */
-	  if (CONST_INT_P (XEXP (op, 1))
-	      || GET_CODE (XEXP (op, 1)) == CONST_DOUBLE)
+	  if (CONST_SCALAR_INT_P (XEXP (op, 1))
+	      || CONST_DOUBLE_AS_FLOAT_P (XEXP (op, 1)))
 	    {
 	      temp = simplify_unary_operation (NEG, mode, XEXP (op, 1), mode);
 	      if (temp)
@@ -816,51 +1035,43 @@ simplify_unary_operation_1 (enum rtx_code code, enum machine_mode mode, rtx op)
       break;
 
     case TRUNCATE:
-      /* We can't handle truncation to a partial integer mode here
-         because we don't know the real bitsize of the partial
-         integer mode.  */
-      if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
-        break;
-
-      /* (truncate:SI ({sign,zero}_extend:DI foo:SI)) == foo:SI.  */
-      if ((GET_CODE (op) == SIGN_EXTEND
-	   || GET_CODE (op) == ZERO_EXTEND)
-	  && GET_MODE (XEXP (op, 0)) == mode)
-	return XEXP (op, 0);
+      /* Don't optimize (lshiftrt (mult ...)) as it would interfere
+	 with the umulXi3_highpart patterns.  */
+      if (GET_CODE (op) == LSHIFTRT
+	  && GET_CODE (XEXP (op, 0)) == MULT)
+	break;
 
-      /* (truncate:SI (OP:DI ({sign,zero}_extend:DI foo:SI))) is
-	 (OP:SI foo:SI) if OP is NEG or ABS.  */
-      if ((GET_CODE (op) == ABS
-	   || GET_CODE (op) == NEG)
-	  && (GET_CODE (XEXP (op, 0)) == SIGN_EXTEND
-	      || GET_CODE (XEXP (op, 0)) == ZERO_EXTEND)
-	  && GET_MODE (XEXP (XEXP (op, 0), 0)) == mode)
-	return simplify_gen_unary (GET_CODE (op), mode,
-				   XEXP (XEXP (op, 0), 0), mode);
+      if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
+	{
+	  if (TRULY_NOOP_TRUNCATION_MODES_P (mode, GET_MODE (op)))
+	    {
+	      temp = rtl_hooks.gen_lowpart_no_emit (mode, op);
+	      if (temp)
+		return temp;
+	    }
+	  /* We can't handle truncation to a partial integer mode here
+	     because we don't know the real bitsize of the partial
+	     integer mode.  */
+	  break;
+	}
 
-      /* (truncate:A (subreg:B (truncate:C X) 0)) is
-	 (truncate:A X).  */
-      if (GET_CODE (op) == SUBREG
-	  && GET_CODE (SUBREG_REG (op)) == TRUNCATE
-	  && subreg_lowpart_p (op))
-	return simplify_gen_unary (TRUNCATE, mode, XEXP (SUBREG_REG (op), 0),
-				   GET_MODE (XEXP (SUBREG_REG (op), 0)));
+      if (GET_MODE (op) != VOIDmode)
+	{
+	  temp = simplify_truncation (mode, op, GET_MODE (op));
+	  if (temp)
+	    return temp;
+	}
 
       /* If we know that the value is already truncated, we can
-         replace the TRUNCATE with a SUBREG.  Note that this is also
-         valid if TRULY_NOOP_TRUNCATION is false for the corresponding
-         modes we just have to apply a different definition for
-         truncation.  But don't do this for an (LSHIFTRT (MULT ...))
-         since this will cause problems with the umulXi3_highpart
-         patterns.  */
-      if ((TRULY_NOOP_TRUNCATION_MODES_P (mode, GET_MODE (op))
-	   ? (num_sign_bit_copies (op, GET_MODE (op))
-	      > (unsigned int) (GET_MODE_PRECISION (GET_MODE (op))
-				- GET_MODE_PRECISION (mode)))
-	   : truncated_to_mode (mode, op))
-	  && ! (GET_CODE (op) == LSHIFTRT
-		&& GET_CODE (XEXP (op, 0)) == MULT))
-	return rtl_hooks.gen_lowpart_no_emit (mode, op);
+	 replace the TRUNCATE with a SUBREG.  */
+      if (GET_MODE_NUNITS (mode) == 1
+	  && (TRULY_NOOP_TRUNCATION_MODES_P (mode, GET_MODE (op))
+	      || truncated_to_mode (mode, op)))
+	{
+	  temp = rtl_hooks.gen_lowpart_no_emit (mode, op);
+	  if (temp)
+	    return temp;
+	}
 
       /* A truncate of a comparison can be replaced with a subreg if
          STORE_FLAG_VALUE permits.  This is like the previous test,
@@ -869,7 +1080,24 @@ simplify_unary_operation_1 (enum rtx_code code, enum machine_mode mode, rtx op)
       if (HWI_COMPUTABLE_MODE_P (mode)
 	  && COMPARISON_P (op)
 	  && (STORE_FLAG_VALUE & ~GET_MODE_MASK (mode)) == 0)
-	return rtl_hooks.gen_lowpart_no_emit (mode, op);
+	{
+	  temp = rtl_hooks.gen_lowpart_no_emit (mode, op);
+	  if (temp)
+	    return temp;
+	}
+
+      /* A truncate of a memory is just loading the low part of the memory
+	 if we are not changing the meaning of the address. */
+      if (GET_CODE (op) == MEM
+	  && !VECTOR_MODE_P (mode)
+	  && !MEM_VOLATILE_P (op)
+	  && !mode_dependent_address_p (XEXP (op, 0), MEM_ADDR_SPACE (op)))
+	{
+	  temp = rtl_hooks.gen_lowpart_no_emit (mode, op);
+	  if (temp)
+	    return temp;
+	}
+
       break;
 
     case FLOAT_TRUNCATE:
@@ -1101,7 +1329,11 @@ simplify_unary_operation_1 (enum rtx_code code, enum machine_mode mode, rtx op)
 	  && SUBREG_PROMOTED_VAR_P (op)
 	  && ! SUBREG_PROMOTED_UNSIGNED_P (op)
 	  && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (GET_MODE (XEXP (op, 0))))
-	return rtl_hooks.gen_lowpart_no_emit (mode, op);
+	{
+	  temp = rtl_hooks.gen_lowpart_no_emit (mode, op);
+	  if (temp)
+	    return temp;
+	}
 
       /* (sign_extend:M (sign_extend:N <X>)) is (sign_extend:M <X>).
 	 (sign_extend:M (zero_extend:N <X>)) is (zero_extend:M <X>).  */
@@ -1133,9 +1365,10 @@ simplify_unary_operation_1 (enum rtx_code code, enum machine_mode mode, rtx op)
 	    {
 	      rtx inner =
 		rtl_hooks.gen_lowpart_no_emit (tmode, XEXP (XEXP (op, 0), 0));
-	      return simplify_gen_unary (GET_CODE (op) == ASHIFTRT
-					 ? SIGN_EXTEND : ZERO_EXTEND,
-					 mode, inner, tmode);
+	      if (inner)
+		return simplify_gen_unary (GET_CODE (op) == ASHIFTRT
+					   ? SIGN_EXTEND : ZERO_EXTEND,
+					   mode, inner, tmode);
 	    }
 	}
 
@@ -1163,7 +1396,11 @@ simplify_unary_operation_1 (enum rtx_code code, enum machine_mode mode, rtx op)
 	  && SUBREG_PROMOTED_VAR_P (op)
 	  && SUBREG_PROMOTED_UNSIGNED_P (op) > 0
 	  && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (GET_MODE (XEXP (op, 0))))
-	return rtl_hooks.gen_lowpart_no_emit (mode, op);
+	{
+	  temp = rtl_hooks.gen_lowpart_no_emit (mode, op);
+	  if (temp)
+	    return temp;
+	}
 
       /* Extending a widening multiplication should be canonicalized to
 	 a wider widening multiplication.  */
@@ -1228,7 +1465,8 @@ simplify_unary_operation_1 (enum rtx_code code, enum machine_mode mode, rtx op)
 	    {
 	      rtx inner =
 		rtl_hooks.gen_lowpart_no_emit (tmode, XEXP (XEXP (op, 0), 0));
-	      return simplify_gen_unary (ZERO_EXTEND, mode, inner, tmode);
+	      if (inner)
+		return simplify_gen_unary (ZERO_EXTEND, mode, inner, tmode);
 	    }
 	}
 
@@ -1276,7 +1514,7 @@ simplify_const_unary_operation (enum rtx_code code, enum machine_mode mode,
 	  gcc_assert (GET_MODE_INNER (mode) == GET_MODE_INNER
 						(GET_MODE (op)));
       }
-      if (CONST_INT_P (op) || GET_CODE (op) == CONST_DOUBLE
+      if (CONST_SCALAR_INT_P (op) || CONST_DOUBLE_AS_FLOAT_P (op)
 	  || GET_CODE (op) == CONST_VECTOR)
 	{
           int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
@@ -1329,8 +1567,7 @@ simplify_const_unary_operation (enum rtx_code code, enum machine_mode mode,
      check the wrong mode (input vs. output) for a conversion operation,
      such as FIX.  At some point, this should be simplified.  */
 
-  if (code == FLOAT && GET_MODE (op) == VOIDmode
-      && (GET_CODE (op) == CONST_DOUBLE || CONST_INT_P (op)))
+  if (code == FLOAT && CONST_SCALAR_INT_P (op))
     {
       HOST_WIDE_INT hv, lv;
       REAL_VALUE_TYPE d;
@@ -1344,9 +1581,7 @@ simplify_const_unary_operation (enum rtx_code code, enum machine_mode mode,
       d = real_value_truncate (mode, d);
       return CONST_DOUBLE_FROM_REAL_VALUE (d, mode);
     }
-  else if (code == UNSIGNED_FLOAT && GET_MODE (op) == VOIDmode
-	   && (GET_CODE (op) == CONST_DOUBLE
-	       || CONST_INT_P (op)))
+  else if (code == UNSIGNED_FLOAT && CONST_SCALAR_INT_P (op))
     {
       HOST_WIDE_INT hv, lv;
       REAL_VALUE_TYPE d;
@@ -1357,7 +1592,7 @@ simplify_const_unary_operation (enum rtx_code code, enum machine_mode mode,
 	lv = CONST_DOUBLE_LOW (op),  hv = CONST_DOUBLE_HIGH (op);
 
       if (op_mode == VOIDmode
-	  || GET_MODE_PRECISION (op_mode) > 2 * HOST_BITS_PER_WIDE_INT)
+	  || GET_MODE_PRECISION (op_mode) > HOST_BITS_PER_DOUBLE_INT)
 	/* We should never get a negative number.  */
 	gcc_assert (hv >= 0);
       else if (GET_MODE_PRECISION (op_mode) <= HOST_BITS_PER_WIDE_INT)
@@ -1517,114 +1752,120 @@ simplify_const_unary_operation (enum rtx_code code, enum machine_mode mode,
 
   /* We can do some operations on integer CONST_DOUBLEs.  Also allow
      for a DImode operation on a CONST_INT.  */
-  else if (GET_MODE (op) == VOIDmode
-	   && width <= HOST_BITS_PER_WIDE_INT * 2
-	   && (GET_CODE (op) == CONST_DOUBLE
-	       || CONST_INT_P (op)))
+  else if (width <= HOST_BITS_PER_DOUBLE_INT
+	   && (CONST_DOUBLE_AS_INT_P (op) || CONST_INT_P (op)))
     {
-      unsigned HOST_WIDE_INT l1, lv;
-      HOST_WIDE_INT h1, hv;
+      double_int first, value;
 
-      if (GET_CODE (op) == CONST_DOUBLE)
-	l1 = CONST_DOUBLE_LOW (op), h1 = CONST_DOUBLE_HIGH (op);
+      if (CONST_DOUBLE_AS_INT_P (op))
+	first = double_int::from_pair (CONST_DOUBLE_HIGH (op),
+				       CONST_DOUBLE_LOW (op));
       else
-	l1 = INTVAL (op), h1 = HWI_SIGN_EXTEND (l1);
+	first = double_int::from_shwi (INTVAL (op));
 
       switch (code)
 	{
 	case NOT:
-	  lv = ~ l1;
-	  hv = ~ h1;
+	  value = ~first;
 	  break;
 
 	case NEG:
-	  neg_double (l1, h1, &lv, &hv);
+	  value = -first;
 	  break;
 
 	case ABS:
-	  if (h1 < 0)
-	    neg_double (l1, h1, &lv, &hv);
+	  if (first.is_negative ())
+	    value = -first;
 	  else
-	    lv = l1, hv = h1;
+	    value = first;
 	  break;
 
 	case FFS:
-	  hv = 0;
-	  if (l1 != 0)
-	    lv = ffs_hwi (l1);
-	  else if (h1 != 0)
-	    lv = HOST_BITS_PER_WIDE_INT + ffs_hwi (h1);
+	  value.high = 0;
+	  if (first.low != 0)
+	    value.low = ffs_hwi (first.low);
+	  else if (first.high != 0)
+	    value.low = HOST_BITS_PER_WIDE_INT + ffs_hwi (first.high);
 	  else
-	    lv = 0;
+	    value.low = 0;
 	  break;
 
 	case CLZ:
-	  hv = 0;
-	  if (h1 != 0)
-	    lv = GET_MODE_PRECISION (mode) - floor_log2 (h1) - 1
-	      - HOST_BITS_PER_WIDE_INT;
-	  else if (l1 != 0)
-	    lv = GET_MODE_PRECISION (mode) - floor_log2 (l1) - 1;
-	  else if (! CLZ_DEFINED_VALUE_AT_ZERO (mode, lv))
-	    lv = GET_MODE_PRECISION (mode);
+	  value.high = 0;
+	  if (first.high != 0)
+	    value.low = GET_MODE_PRECISION (mode) - floor_log2 (first.high) - 1
+	              - HOST_BITS_PER_WIDE_INT;
+	  else if (first.low != 0)
+	    value.low = GET_MODE_PRECISION (mode) - floor_log2 (first.low) - 1;
+	  else if (! CLZ_DEFINED_VALUE_AT_ZERO (mode, value.low))
+	    value.low = GET_MODE_PRECISION (mode);
 	  break;
 
 	case CTZ:
-	  hv = 0;
-	  if (l1 != 0)
-	    lv = ctz_hwi (l1);
-	  else if (h1 != 0)
-	    lv = HOST_BITS_PER_WIDE_INT + ctz_hwi (h1);
-	  else if (! CTZ_DEFINED_VALUE_AT_ZERO (mode, lv))
-	    lv = GET_MODE_PRECISION (mode);
+	  value.high = 0;
+	  if (first.low != 0)
+	    value.low = ctz_hwi (first.low);
+	  else if (first.high != 0)
+	    value.low = HOST_BITS_PER_WIDE_INT + ctz_hwi (first.high);
+	  else if (! CTZ_DEFINED_VALUE_AT_ZERO (mode, value.low))
+	    value.low = GET_MODE_PRECISION (mode);
 	  break;
 
 	case POPCOUNT:
-	  hv = 0;
-	  lv = 0;
-	  while (l1)
-	    lv++, l1 &= l1 - 1;
-	  while (h1)
-	    lv++, h1 &= h1 - 1;
+	  value = double_int_zero;
+	  while (first.low)
+	    {
+	      value.low++;
+	      first.low &= first.low - 1;
+	    }
+	  while (first.high)
+	    {
+	      value.low++;
+	      first.high &= first.high - 1;
+	    }
 	  break;
 
 	case PARITY:
-	  hv = 0;
-	  lv = 0;
-	  while (l1)
-	    lv++, l1 &= l1 - 1;
-	  while (h1)
-	    lv++, h1 &= h1 - 1;
-	  lv &= 1;
+	  value = double_int_zero;
+	  while (first.low)
+	    {
+	      value.low++;
+	      first.low &= first.low - 1;
+	    }
+	  while (first.high)
+	    {
+	      value.low++;
+	      first.high &= first.high - 1;
+	    }
+	  value.low &= 1;
 	  break;
 
 	case BSWAP:
 	  {
 	    unsigned int s;
 
-	    hv = 0;
-	    lv = 0;
+	    value = double_int_zero;
 	    for (s = 0; s < width; s += 8)
 	      {
 		unsigned int d = width - s - 8;
 		unsigned HOST_WIDE_INT byte;
 
 		if (s < HOST_BITS_PER_WIDE_INT)
-		  byte = (l1 >> s) & 0xff;
+		  byte = (first.low >> s) & 0xff;
 		else
-		  byte = (h1 >> (s - HOST_BITS_PER_WIDE_INT)) & 0xff;
+		  byte = (first.high >> (s - HOST_BITS_PER_WIDE_INT)) & 0xff;
 
 		if (d < HOST_BITS_PER_WIDE_INT)
-		  lv |= byte << d;
+		  value.low |= byte << d;
 		else
-		  hv |= byte << (d - HOST_BITS_PER_WIDE_INT);
+		  value.high |= byte << (d - HOST_BITS_PER_WIDE_INT);
 	      }
 	  }
 	  break;
 
 	case TRUNCATE:
 	  /* This is just a change-of-mode, so do nothing.  */
-	  lv = l1, hv = h1;
+	  value = first;
 	  break;
 
 	case ZERO_EXTEND:
@@ -1633,8 +1874,7 @@ simplify_const_unary_operation (enum rtx_code code, enum machine_mode mode,
 	  if (op_width > HOST_BITS_PER_WIDE_INT)
 	    return 0;
 
-	  hv = 0;
-	  lv = l1 & GET_MODE_MASK (op_mode);
+	  value = double_int::from_uhwi (first.low & GET_MODE_MASK (op_mode));
 	  break;
 
 	case SIGN_EXTEND:
@@ -1643,11 +1883,11 @@ simplify_const_unary_operation (enum rtx_code code, enum machine_mode mode,
 	    return 0;
 	  else
 	    {
-	      lv = l1 & GET_MODE_MASK (op_mode);
-	      if (val_signbit_known_set_p (op_mode, lv))
-		lv |= ~GET_MODE_MASK (op_mode);
+	      value.low = first.low & GET_MODE_MASK (op_mode);
+	      if (val_signbit_known_set_p (op_mode, value.low))
+		value.low |= ~GET_MODE_MASK (op_mode);
 
-	      hv = HWI_SIGN_EXTEND (lv);
+	      value.high = HWI_SIGN_EXTEND (value.low);
 	    }
 	  break;
 
@@ -1658,10 +1898,10 @@ simplify_const_unary_operation (enum rtx_code code, enum machine_mode mode,
 	  return 0;
 	}
 
-      return immed_double_const (lv, hv, mode);
+      return immed_double_int_const (value, mode);
     }
 
-  else if (GET_CODE (op) == CONST_DOUBLE
+  else if (CONST_DOUBLE_AS_FLOAT_P (op) 
 	   && SCALAR_FLOAT_MODE_P (mode)
 	   && SCALAR_FLOAT_MODE_P (GET_MODE (op)))
     {
@@ -1711,10 +1951,10 @@ simplify_const_unary_operation (enum rtx_code code, enum machine_mode mode,
       return CONST_DOUBLE_FROM_REAL_VALUE (d, mode);
     }
 
-  else if (GET_CODE (op) == CONST_DOUBLE
+  else if (CONST_DOUBLE_AS_FLOAT_P (op)
 	   && SCALAR_FLOAT_MODE_P (GET_MODE (op))
 	   && GET_MODE_CLASS (mode) == MODE_INT
-	   && width <= 2 * HOST_BITS_PER_WIDE_INT && width > 0)
+	   && width <= HOST_BITS_PER_DOUBLE_INT && width > 0)
     {
       /* Although the overflow semantics of RTL's FIX and UNSIGNED_FIX
 	 operators are intentionally left unspecified (to ease implementation
@@ -1779,7 +2019,7 @@ simplify_const_unary_operation (enum rtx_code code, enum machine_mode mode,
 	    return const0_rtx;
 
 	  /* Test against the unsigned upper bound.  */
-	  if (width == 2 * HOST_BITS_PER_WIDE_INT)
+	  if (width == HOST_BITS_PER_DOUBLE_INT)
 	    {
 	      th = -1;
 	      tl = -1;
@@ -1815,6 +2055,35 @@ simplify_const_unary_operation (enum rtx_code code, enum machine_mode mode,
   return NULL_RTX;
 }
 
+/* Subroutine of simplify_binary_operation to simplify a binary operation
+   CODE that can commute with byte swapping, with result mode MODE and
+   operating on OP0 and OP1.  CODE is currently one of AND, IOR or XOR.
+   Return zero if no simplification or canonicalization is possible.  */
+
+static rtx
+simplify_byte_swapping_operation (enum rtx_code code, enum machine_mode mode,
+				  rtx op0, rtx op1)
+{
+  rtx tem;
+
+  /* (op (bswap x) C1)) -> (bswap (op x C2)) with C2 swapped.  */
+  if (GET_CODE (op0) == BSWAP && CONST_SCALAR_INT_P (op1))
+    {
+      tem = simplify_gen_binary (code, mode, XEXP (op0, 0),
+				 simplify_gen_unary (BSWAP, mode, op1, mode));
+      return simplify_gen_unary (BSWAP, mode, tem, mode);
+    }
+
+  /* (op (bswap x) (bswap y)) -> (bswap (op x y)).  */
+  if (GET_CODE (op0) == BSWAP && GET_CODE (op1) == BSWAP)
+    {
+      tem = simplify_gen_binary (code, mode, XEXP (op0, 0), XEXP (op1, 0));
+      return simplify_gen_unary (BSWAP, mode, tem, mode);
+    }
+
+  return NULL_RTX;
+}
+
 /* Subroutine of simplify_binary_operation to simplify a commutative,
    associative binary operation CODE with result mode MODE, operating
    on OP0 and OP1.  CODE is currently one of PLUS, MULT, AND, IOR, XOR,
@@ -1983,7 +2252,7 @@ simplify_binary_operation_1 (enum rtx_code code, enum machine_mode mode,
 	  else if (GET_CODE (lhs) == MULT
 		   && CONST_INT_P (XEXP (lhs, 1)))
 	    {
-	      coeff0 = shwi_to_double_int (INTVAL (XEXP (lhs, 1)));
+	      coeff0 = double_int::from_shwi (INTVAL (XEXP (lhs, 1)));
 	      lhs = XEXP (lhs, 0);
 	    }
 	  else if (GET_CODE (lhs) == ASHIFT
@@ -1991,8 +2260,7 @@ simplify_binary_operation_1 (enum rtx_code code, enum machine_mode mode,
                    && INTVAL (XEXP (lhs, 1)) >= 0
 		   && INTVAL (XEXP (lhs, 1)) < HOST_BITS_PER_WIDE_INT)
 	    {
-	      coeff0 = double_int_setbit (double_int_zero,
-					  INTVAL (XEXP (lhs, 1)));
+	      coeff0 = double_int_zero.set_bit (INTVAL (XEXP (lhs, 1)));
 	      lhs = XEXP (lhs, 0);
 	    }
 
@@ -2004,7 +2272,7 @@ simplify_binary_operation_1 (enum rtx_code code, enum machine_mode mode,
 	  else if (GET_CODE (rhs) == MULT
 		   && CONST_INT_P (XEXP (rhs, 1)))
 	    {
-	      coeff1 = shwi_to_double_int (INTVAL (XEXP (rhs, 1)));
+	      coeff1 = double_int::from_shwi (INTVAL (XEXP (rhs, 1)));
 	      rhs = XEXP (rhs, 0);
 	    }
 	  else if (GET_CODE (rhs) == ASHIFT
@@ -2012,8 +2280,7 @@ simplify_binary_operation_1 (enum rtx_code code, enum machine_mode mode,
 		   && INTVAL (XEXP (rhs, 1)) >= 0
 		   && INTVAL (XEXP (rhs, 1)) < HOST_BITS_PER_WIDE_INT)
 	    {
-	      coeff1 = double_int_setbit (double_int_zero,
-					  INTVAL (XEXP (rhs, 1)));
+	      coeff1 = double_int_zero.set_bit (INTVAL (XEXP (rhs, 1)));
 	      rhs = XEXP (rhs, 0);
 	    }
 
@@ -2024,7 +2291,7 @@ simplify_binary_operation_1 (enum rtx_code code, enum machine_mode mode,
 	      double_int val;
 	      bool speed = optimize_function_for_speed_p (cfun);
 
-	      val = double_int_add (coeff0, coeff1);
+	      val = coeff0 + coeff1;
 	      coeff = immed_double_int_const (val, mode);
 
 	      tem = simplify_gen_binary (MULT, mode, lhs, coeff);
@@ -2034,11 +2301,9 @@ simplify_binary_operation_1 (enum rtx_code code, enum machine_mode mode,
 	}
 
       /* (plus (xor X C1) C2) is (xor X (C1^C2)) if C2 is signbit.  */
-      if ((CONST_INT_P (op1)
-	   || GET_CODE (op1) == CONST_DOUBLE)
+      if (CONST_SCALAR_INT_P (op1)
 	  && GET_CODE (op0) == XOR
-	  && (CONST_INT_P (XEXP (op0, 1))
-	      || GET_CODE (XEXP (op0, 1)) == CONST_DOUBLE)
+	  && CONST_SCALAR_INT_P (XEXP (op0, 1))
 	  && mode_signbit_p (mode, op1))
 	return simplify_gen_binary (XOR, mode, XEXP (op0, 0),
 				    simplify_gen_binary (XOR, mode, op1,
@@ -2163,7 +2428,7 @@ simplify_binary_operation_1 (enum rtx_code code, enum machine_mode mode,
 	  else if (GET_CODE (lhs) == MULT
 		   && CONST_INT_P (XEXP (lhs, 1)))
 	    {
-	      coeff0 = shwi_to_double_int (INTVAL (XEXP (lhs, 1)));
+	      coeff0 = double_int::from_shwi (INTVAL (XEXP (lhs, 1)));
 	      lhs = XEXP (lhs, 0);
 	    }
 	  else if (GET_CODE (lhs) == ASHIFT
@@ -2171,8 +2436,7 @@ simplify_binary_operation_1 (enum rtx_code code, enum machine_mode mode,
 		   && INTVAL (XEXP (lhs, 1)) >= 0
 		   && INTVAL (XEXP (lhs, 1)) < HOST_BITS_PER_WIDE_INT)
 	    {
-	      coeff0 = double_int_setbit (double_int_zero,
-					  INTVAL (XEXP (lhs, 1)));
+	      coeff0 = double_int_zero.set_bit (INTVAL (XEXP (lhs, 1)));
 	      lhs = XEXP (lhs, 0);
 	    }
 
@@ -2184,7 +2448,7 @@ simplify_binary_operation_1 (enum rtx_code code, enum machine_mode mode,
 	  else if (GET_CODE (rhs) == MULT
 		   && CONST_INT_P (XEXP (rhs, 1)))
 	    {
-	      negcoeff1 = shwi_to_double_int (-INTVAL (XEXP (rhs, 1)));
+	      negcoeff1 = double_int::from_shwi (-INTVAL (XEXP (rhs, 1)));
 	      rhs = XEXP (rhs, 0);
 	    }
 	  else if (GET_CODE (rhs) == ASHIFT
@@ -2192,9 +2456,8 @@ simplify_binary_operation_1 (enum rtx_code code, enum machine_mode mode,
 		   && INTVAL (XEXP (rhs, 1)) >= 0
 		   && INTVAL (XEXP (rhs, 1)) < HOST_BITS_PER_WIDE_INT)
 	    {
-	      negcoeff1 = double_int_setbit (double_int_zero,
-					     INTVAL (XEXP (rhs, 1)));
-	      negcoeff1 = double_int_neg (negcoeff1);
+	      negcoeff1 = double_int_zero.set_bit (INTVAL (XEXP (rhs, 1)));
+	      negcoeff1 = -negcoeff1;
 	      rhs = XEXP (rhs, 0);
 	    }
 
@@ -2205,7 +2468,7 @@ simplify_binary_operation_1 (enum rtx_code code, enum machine_mode mode,
 	      double_int val;
 	      bool speed = optimize_function_for_speed_p (cfun);
 
-	      val = double_int_add (coeff0, negcoeff1);
+	      val = coeff0 + negcoeff1;
 	      coeff = immed_double_int_const (val, mode);
 
 	      tem = simplify_gen_binary (MULT, mode, lhs, coeff);
@@ -2220,8 +2483,7 @@ simplify_binary_operation_1 (enum rtx_code code, enum machine_mode mode,
 
       /* (-x - c) may be simplified as (-c - x).  */
       if (GET_CODE (op0) == NEG
-	  && (CONST_INT_P (op1)
-	      || GET_CODE (op1) == CONST_DOUBLE))
+	  && (CONST_SCALAR_INT_P (op1) || CONST_DOUBLE_AS_FLOAT_P (op1)))
 	{
 	  tem = simplify_unary_operation (NEG, mode, op1, mode);
 	  if (tem)
@@ -2235,7 +2497,7 @@ simplify_binary_operation_1 (enum rtx_code code, enum machine_mode mode,
 				    neg_const_int (mode, op1));
 
       /* (x - (x & y)) -> (x & ~y) */
-      if (GET_CODE (op1) == AND)
+      if (INTEGRAL_MODE_P (mode) && GET_CODE (op1) == AND)
 	{
 	  if (rtx_equal_p (op0, XEXP (op1, 0)))
 	    {
@@ -2371,19 +2633,17 @@ simplify_binary_operation_1 (enum rtx_code code, enum machine_mode mode,
 	return simplify_gen_binary (ASHIFT, mode, op0, GEN_INT (val));
 
       /* Likewise for multipliers wider than a word.  */
-      if (GET_CODE (trueop1) == CONST_DOUBLE
-	  && (GET_MODE (trueop1) == VOIDmode
-	      || GET_MODE_CLASS (GET_MODE (trueop1)) == MODE_INT)
+      if (CONST_DOUBLE_AS_INT_P (trueop1)
 	  && GET_MODE (op0) == mode
 	  && CONST_DOUBLE_LOW (trueop1) == 0
 	  && (val = exact_log2 (CONST_DOUBLE_HIGH (trueop1))) >= 0
-	  && (val < 2 * HOST_BITS_PER_WIDE_INT - 1
-	      || GET_MODE_BITSIZE (mode) <= 2 * HOST_BITS_PER_WIDE_INT))
+	  && (val < HOST_BITS_PER_DOUBLE_INT - 1
+	      || GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_DOUBLE_INT))
 	return simplify_gen_binary (ASHIFT, mode, op0,
 				    GEN_INT (val + HOST_BITS_PER_WIDE_INT));
 
       /* x*2 is x+x and x*(-1) is -x */
-      if (GET_CODE (trueop1) == CONST_DOUBLE
+      if (CONST_DOUBLE_AS_FLOAT_P (trueop1)
 	  && SCALAR_FLOAT_MODE_P (GET_MODE (trueop1))
 	  && !DECIMAL_FLOAT_MODE_P (GET_MODE (trueop1))
 	  && GET_MODE (op0) == mode)
@@ -2429,7 +2689,9 @@ simplify_binary_operation_1 (enum rtx_code code, enum machine_mode mode,
     case IOR:
       if (trueop1 == CONST0_RTX (mode))
 	return op0;
-      if (INTEGRAL_MODE_P (mode) && trueop1 == CONSTM1_RTX (mode))
+      if (INTEGRAL_MODE_P (mode)
+	  && trueop1 == CONSTM1_RTX (mode)
+	  && !side_effects_p (op0))
 	return op1;
       if (rtx_equal_p (trueop0, trueop1) && ! side_effects_p (op0))
 	return op0;
@@ -2443,7 +2705,8 @@ simplify_binary_operation_1 (enum rtx_code code, enum machine_mode mode,
       /* (ior A C) is C if all bits of A that might be nonzero are on in C.  */
       if (CONST_INT_P (op1)
 	  && HWI_COMPUTABLE_MODE_P (mode)
-	  && (nonzero_bits (op0, mode) & ~UINTVAL (op1)) == 0)
+	  && (nonzero_bits (op0, mode) & ~UINTVAL (op1)) == 0
+	  && !side_effects_p (op0))
 	return op1;
 
       /* Canonicalize (X & C1) | C2.  */
@@ -2562,6 +2825,10 @@ simplify_binary_operation_1 (enum rtx_code code, enum machine_mode mode,
 					XEXP (op0, 1));
         }
 
+      tem = simplify_byte_swapping_operation (code, mode, op0, op1);
+      if (tem)
+	return tem;
+
       tem = simplify_associative_operation (code, mode, op0, op1);
       if (tem)
 	return tem;
@@ -2578,16 +2845,13 @@ simplify_binary_operation_1 (enum rtx_code code, enum machine_mode mode,
 	 return CONST0_RTX (mode);
 
       /* Canonicalize XOR of the most significant bit to PLUS.  */
-      if ((CONST_INT_P (op1)
-	   || GET_CODE (op1) == CONST_DOUBLE)
+      if (CONST_SCALAR_INT_P (op1)
 	  && mode_signbit_p (mode, op1))
 	return simplify_gen_binary (PLUS, mode, op0, op1);
       /* (xor (plus X C1) C2) is (xor X (C1^C2)) if C1 is signbit.  */
-      if ((CONST_INT_P (op1)
-	   || GET_CODE (op1) == CONST_DOUBLE)
+      if (CONST_SCALAR_INT_P (op1)
 	  && GET_CODE (op0) == PLUS
-	  && (CONST_INT_P (XEXP (op0, 1))
-	      || GET_CODE (XEXP (op0, 1)) == CONST_DOUBLE)
+	  && CONST_SCALAR_INT_P (XEXP (op0, 1))
 	  && mode_signbit_p (mode, XEXP (op0, 1)))
 	return simplify_gen_binary (XOR, mode, XEXP (op0, 0),
 				    simplify_gen_binary (XOR, mode, op1,
@@ -2708,6 +2972,10 @@ simplify_binary_operation_1 (enum rtx_code code, enum machine_mode mode,
 	  && (reversed = reversed_comparison (op0, mode)))
 	return reversed;
 
+      tem = simplify_byte_swapping_operation (code, mode, op0, op1);
+      if (tem)
+	return tem;
+
       tem = simplify_associative_operation (code, mode, op0, op1);
       if (tem)
 	return tem;
@@ -2890,6 +3158,10 @@ simplify_binary_operation_1 (enum rtx_code code, enum machine_mode mode,
 	  && op1 == XEXP (XEXP (op0, 0), 0))
 	return simplify_gen_binary (AND, mode, op1, XEXP (op0, 1));
 
+      tem = simplify_byte_swapping_operation (code, mode, op0, op1);
+      if (tem)
+	return tem;
+
       tem = simplify_associative_operation (code, mode, op0, op1);
       if (tem)
 	return tem;
@@ -2905,7 +3177,11 @@ simplify_binary_operation_1 (enum rtx_code code, enum machine_mode mode,
 	}
       /* x/1 is x.  */
       if (trueop1 == CONST1_RTX (mode))
-	return rtl_hooks.gen_lowpart_no_emit (mode, op0);
+	{
+	  tem = rtl_hooks.gen_lowpart_no_emit (mode, op0);
+	  if (tem)
+	    return tem;
+	}
       /* Convert divide by power of two into shift.  */
       if (CONST_INT_P (trueop1)
 	  && (val = exact_log2 (UINTVAL (trueop1))) > 0)
@@ -2930,7 +3206,7 @@ simplify_binary_operation_1 (enum rtx_code code, enum machine_mode mode,
 	      && !HONOR_SNANS (mode))
 	    return op0;
 
-	  if (GET_CODE (trueop1) == CONST_DOUBLE
+	  if (CONST_DOUBLE_AS_FLOAT_P (trueop1)
 	      && trueop1 != CONST0_RTX (mode))
 	    {
 	      REAL_VALUE_TYPE d;
@@ -2964,12 +3240,17 @@ simplify_binary_operation_1 (enum rtx_code code, enum machine_mode mode,
 	    }
 	  /* x/1 is x.  */
 	  if (trueop1 == CONST1_RTX (mode))
-	    return rtl_hooks.gen_lowpart_no_emit (mode, op0);
+	    {
+	      tem = rtl_hooks.gen_lowpart_no_emit (mode, op0);
+	      if (tem)
+		return tem;
+	    }
 	  /* x/-1 is -x.  */
 	  if (trueop1 == constm1_rtx)
 	    {
 	      rtx x = rtl_hooks.gen_lowpart_no_emit (mode, op0);
-	      return simplify_gen_unary (NEG, mode, x, mode);
+	      if (x)
+		return simplify_gen_unary (NEG, mode, x, mode);
 	    }
 	}
       break;
@@ -3015,6 +3296,18 @@ simplify_binary_operation_1 (enum rtx_code code, enum machine_mode mode,
 
     case ROTATERT:
     case ROTATE:
+      /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
+	 prefer left rotation, if op1 is from bitsize / 2 + 1 to
+	 bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
+	 amount instead.  */
+      if (CONST_INT_P (trueop1)
+	  && IN_RANGE (INTVAL (trueop1),
+		       GET_MODE_BITSIZE (mode) / 2 + (code == ROTATE),
+		       GET_MODE_BITSIZE (mode) - 1))
+	return simplify_gen_binary (code == ROTATE ? ROTATERT : ROTATE,
+				    mode, op0, GEN_INT (GET_MODE_BITSIZE (mode)
+							- INTVAL (trueop1)));
+      /* FALLTHRU */
     case ASHIFTRT:
       if (trueop1 == CONST0_RTX (mode))
 	return op0;
@@ -3242,6 +3535,61 @@ simplify_binary_operation_1 (enum rtx_code code, enum machine_mode mode,
 
 	      return gen_rtx_CONST_VECTOR (mode, v);
 	    }
+
+	  /* Recognize the identity.  */
+	  if (GET_MODE (trueop0) == mode)
+	    {
+	      bool maybe_ident = true;
+	      for (int i = 0; i < XVECLEN (trueop1, 0); i++)
+		{
+		  rtx j = XVECEXP (trueop1, 0, i);
+		  if (!CONST_INT_P (j) || INTVAL (j) != i)
+		    {
+		      maybe_ident = false;
+		      break;
+		    }
+		}
+	      if (maybe_ident)
+		return trueop0;
+	    }
+
+	  /* If we build {a,b} then permute it, build the result directly.  */
+	  if (XVECLEN (trueop1, 0) == 2
+	      && CONST_INT_P (XVECEXP (trueop1, 0, 0))
+	      && CONST_INT_P (XVECEXP (trueop1, 0, 1))
+	      && GET_CODE (trueop0) == VEC_CONCAT
+	      && GET_CODE (XEXP (trueop0, 0)) == VEC_CONCAT
+	      && GET_MODE (XEXP (trueop0, 0)) == mode
+	      && GET_CODE (XEXP (trueop0, 1)) == VEC_CONCAT
+	      && GET_MODE (XEXP (trueop0, 1)) == mode)
+	    {
+	      unsigned int i0 = INTVAL (XVECEXP (trueop1, 0, 0));
+	      unsigned int i1 = INTVAL (XVECEXP (trueop1, 0, 1));
+	      rtx subop0, subop1;
+
+	      gcc_assert (i0 < 4 && i1 < 4);
+	      subop0 = XEXP (XEXP (trueop0, i0 / 2), i0 % 2);
+	      subop1 = XEXP (XEXP (trueop0, i1 / 2), i1 % 2);
+
+	      return simplify_gen_binary (VEC_CONCAT, mode, subop0, subop1);
+	    }
+
+	  if (XVECLEN (trueop1, 0) == 2
+	      && CONST_INT_P (XVECEXP (trueop1, 0, 0))
+	      && CONST_INT_P (XVECEXP (trueop1, 0, 1))
+	      && GET_CODE (trueop0) == VEC_CONCAT
+	      && GET_MODE (trueop0) == mode)
+	    {
+	      unsigned int i0 = INTVAL (XVECEXP (trueop1, 0, 0));
+	      unsigned int i1 = INTVAL (XVECEXP (trueop1, 0, 1));
+	      rtx subop0, subop1;
+
+	      gcc_assert (i0 < 2 && i1 < 2);
+	      subop0 = XEXP (trueop0, i0);
+	      subop1 = XEXP (trueop0, i1);
+
+	      return simplify_gen_binary (VEC_CONCAT, mode, subop0, subop1);
+	    }
 	}
 
       if (XVECLEN (trueop1, 0) == 1
@@ -3270,6 +3618,31 @@ simplify_binary_operation_1 (enum rtx_code code, enum machine_mode mode,
 	    return vec;
 	}
 
+      /* If we select elements in a vec_merge that all come from the same
+	 operand, select from that operand directly.  */
+      if (GET_CODE (op0) == VEC_MERGE)
+	{
+	  rtx trueop02 = avoid_constant_pool_reference (XEXP (op0, 2));
+	  if (CONST_INT_P (trueop02))
+	    {
+	      unsigned HOST_WIDE_INT sel = UINTVAL (trueop02);
+	      bool all_operand0 = true;
+	      bool all_operand1 = true;
+	      for (int i = 0; i < XVECLEN (trueop1, 0); i++)
+		{
+		  rtx j = XVECEXP (trueop1, 0, i);
+		  if (sel & (1 << UINTVAL (j)))
+		    all_operand1 = false;
+		  else
+		    all_operand0 = false;
+		}
+	      if (all_operand0 && !side_effects_p (XEXP (op0, 1)))
+		return simplify_gen_binary (VEC_SELECT, mode, XEXP (op0, 0), op1);
+	      if (all_operand1 && !side_effects_p (XEXP (op0, 0)))
+		return simplify_gen_binary (VEC_SELECT, mode, XEXP (op0, 1), op1);
+	    }
+	}
+
       return 0;
     case VEC_CONCAT:
       {
@@ -3297,11 +3670,11 @@ simplify_binary_operation_1 (enum rtx_code code, enum machine_mode mode,
 	  gcc_assert (GET_MODE_INNER (mode) == op1_mode);
 
 	if ((GET_CODE (trueop0) == CONST_VECTOR
-	     || CONST_INT_P (trueop0)
-	     || GET_CODE (trueop0) == CONST_DOUBLE)
+	     || CONST_SCALAR_INT_P (trueop0) 
+	     || CONST_DOUBLE_AS_FLOAT_P (trueop0))
 	    && (GET_CODE (trueop1) == CONST_VECTOR
-		|| CONST_INT_P (trueop1)
-		|| GET_CODE (trueop1) == CONST_DOUBLE))
+		|| CONST_SCALAR_INT_P (trueop1) 
+		|| CONST_DOUBLE_AS_FLOAT_P (trueop1)))
 	  {
 	    int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
 	    unsigned n_elts = (GET_MODE_SIZE (mode) / elt_size);
@@ -3332,6 +3705,27 @@ simplify_binary_operation_1 (enum rtx_code code, enum machine_mode mode,
 
 	    return gen_rtx_CONST_VECTOR (mode, v);
 	  }
+
+	/* Try to merge two VEC_SELECTs from the same vector into a single one.
+	   Restrict the transformation to avoid generating a VEC_SELECT with a
+	   mode unrelated to its operand.  */
+	if (GET_CODE (trueop0) == VEC_SELECT
+	    && GET_CODE (trueop1) == VEC_SELECT
+	    && rtx_equal_p (XEXP (trueop0, 0), XEXP (trueop1, 0))
+	    && GET_MODE (XEXP (trueop0, 0)) == mode)
+	  {
+	    rtx par0 = XEXP (trueop0, 1);
+	    rtx par1 = XEXP (trueop1, 1);
+	    int len0 = XVECLEN (par0, 0);
+	    int len1 = XVECLEN (par1, 0);
+	    rtvec vec = rtvec_alloc (len0 + len1);
+	    for (int i = 0; i < len0; i++)
+	      RTVEC_ELT (vec, i) = XVECEXP (par0, 0, i);
+	    for (int i = 0; i < len1; i++)
+	      RTVEC_ELT (vec, len0 + i) = XVECEXP (par1, 0, i);
+	    return simplify_gen_binary (VEC_SELECT, mode, XEXP (trueop0, 0),
+					gen_rtx_PARALLEL (VOIDmode, vec));
+	  }
       }
       return 0;
 
@@ -3380,11 +3774,11 @@ simplify_const_binary_operation (enum rtx_code code, enum machine_mode mode,
 
   if (VECTOR_MODE_P (mode)
       && code == VEC_CONCAT
-      && (CONST_INT_P (op0)
-	  || GET_CODE (op0) == CONST_DOUBLE
-	  || GET_CODE (op0) == CONST_FIXED)
-      && (CONST_INT_P (op1)
-	  || GET_CODE (op1) == CONST_DOUBLE
+      && (CONST_SCALAR_INT_P (op0)
+	  || GET_CODE (op0) == CONST_FIXED
+	  || CONST_DOUBLE_AS_FLOAT_P (op0))
+      && (CONST_SCALAR_INT_P (op1)
+	  || CONST_DOUBLE_AS_FLOAT_P (op1)
 	  || GET_CODE (op1) == CONST_FIXED))
     {
       unsigned n_elts = GET_MODE_NUNITS (mode);
@@ -3419,8 +3813,8 @@ simplify_const_binary_operation (enum rtx_code code, enum machine_mode mode,
     }
 
   if (SCALAR_FLOAT_MODE_P (mode)
-      && GET_CODE (op0) == CONST_DOUBLE
-      && GET_CODE (op1) == CONST_DOUBLE
+      && CONST_DOUBLE_AS_FLOAT_P (op0) 
+      && CONST_DOUBLE_AS_FLOAT_P (op1)
       && mode == GET_MODE (op0) && mode == GET_MODE (op1))
     {
       if (code == AND
@@ -3542,10 +3936,11 @@ simplify_const_binary_operation (enum rtx_code code, enum machine_mode mode,
   /* We can fold some multi-word operations.  */
   if (GET_MODE_CLASS (mode) == MODE_INT
       && width == HOST_BITS_PER_DOUBLE_INT
-      && (CONST_DOUBLE_P (op0) || CONST_INT_P (op0))
-      && (CONST_DOUBLE_P (op1) || CONST_INT_P (op1)))
+      && (CONST_DOUBLE_AS_INT_P (op0) || CONST_INT_P (op0))
+      && (CONST_DOUBLE_AS_INT_P (op1) || CONST_INT_P (op1)))
     {
       double_int o0, o1, res, tmp;
+      bool overflow;
 
       o0 = rtx_to_double_int (op0);
       o1 = rtx_to_double_int (op1);
@@ -3554,76 +3949,72 @@ simplify_const_binary_operation (enum rtx_code code, enum machine_mode mode,
 	{
 	case MINUS:
 	  /* A - B == A + (-B).  */
-	  o1 = double_int_neg (o1);
+	  o1 = -o1;
 
 	  /* Fall through....  */
 
 	case PLUS:
-	  res = double_int_add (o0, o1);
+	  res = o0 + o1;
 	  break;
 
 	case MULT:
-	  res = double_int_mul (o0, o1);
+	  res = o0 * o1;
 	  break;
 
 	case DIV:
-	  if (div_and_round_double (TRUNC_DIV_EXPR, 0,
-				    o0.low, o0.high, o1.low, o1.high,
-				    &res.low, &res.high,
-				    &tmp.low, &tmp.high))
+          res = o0.divmod_with_overflow (o1, false, TRUNC_DIV_EXPR,
+					 &tmp, &overflow);
+	  if (overflow)
 	    return 0;
 	  break;
 
 	case MOD:
-	  if (div_and_round_double (TRUNC_DIV_EXPR, 0,
-				    o0.low, o0.high, o1.low, o1.high,
-				    &tmp.low, &tmp.high,
-				    &res.low, &res.high))
+          tmp = o0.divmod_with_overflow (o1, false, TRUNC_DIV_EXPR,
+					 &res, &overflow);
+	  if (overflow)
 	    return 0;
 	  break;
 
 	case UDIV:
-	  if (div_and_round_double (TRUNC_DIV_EXPR, 1,
-				    o0.low, o0.high, o1.low, o1.high,
-				    &res.low, &res.high,
-				    &tmp.low, &tmp.high))
+          res = o0.divmod_with_overflow (o1, true, TRUNC_DIV_EXPR,
+					 &tmp, &overflow);
+	  if (overflow)
 	    return 0;
 	  break;
 
 	case UMOD:
-	  if (div_and_round_double (TRUNC_DIV_EXPR, 1,
-				    o0.low, o0.high, o1.low, o1.high,
-				    &tmp.low, &tmp.high,
-				    &res.low, &res.high))
+          tmp = o0.divmod_with_overflow (o1, true, TRUNC_DIV_EXPR,
+					 &res, &overflow);
+	  if (overflow)
 	    return 0;
 	  break;
 
 	case AND:
-	  res = double_int_and (o0, o1);
+	  res = o0 & o1;
 	  break;
 
 	case IOR:
-	  res = double_int_ior (o0, o1);
+	  res = o0 | o1;
 	  break;
 
 	case XOR:
-	  res = double_int_xor (o0, o1);
+	  res = o0 ^ o1;
 	  break;
 
 	case SMIN:
-	  res = double_int_smin (o0, o1);
+	  res = o0.smin (o1);
 	  break;
 
 	case SMAX:
-	  res = double_int_smax (o0, o1);
+	  res = o0.smax (o1);
 	  break;
 
 	case UMIN:
-	  res = double_int_umin (o0, o1);
+	  res = o0.umin (o1);
 	  break;
 
 	case UMAX:
-	  res = double_int_umax (o0, o1);
+	  res = o0.umax (o1);
 	  break;
 
 	case LSHIFTRT:   case ASHIFTRT:
@@ -3633,24 +4024,26 @@ simplify_const_binary_operation (enum rtx_code code, enum machine_mode mode,
 	    unsigned HOST_WIDE_INT cnt;
 
 	    if (SHIFT_COUNT_TRUNCATED)
-	      o1 = double_int_zext (o1, GET_MODE_PRECISION (mode));
+	      {
+		o1.high = 0; 
+		o1.low &= GET_MODE_PRECISION (mode) - 1;
+	      }
 
-	    if (!double_int_fits_in_uhwi_p (o1)
-	        || double_int_to_uhwi (o1) >= GET_MODE_PRECISION (mode))
+	    if (!o1.fits_uhwi ()
+	        || o1.to_uhwi () >= GET_MODE_PRECISION (mode))
 	      return 0;
 
-	    cnt = double_int_to_uhwi (o1);
+	    cnt = o1.to_uhwi ();
+	    unsigned short prec = GET_MODE_PRECISION (mode);
 
 	    if (code == LSHIFTRT || code == ASHIFTRT)
-	      res = double_int_rshift (o0, cnt, GET_MODE_PRECISION (mode),
-				       code == ASHIFTRT);
+	      res = o0.rshift (cnt, prec, code == ASHIFTRT);
 	    else if (code == ASHIFT)
-	      res = double_int_lshift (o0, cnt, GET_MODE_PRECISION (mode),
-				       true);
+	      res = o0.alshift (cnt, prec);
 	    else if (code == ROTATE)
-	      res = double_int_lrotate (o0, cnt, GET_MODE_PRECISION (mode));
+	      res = o0.lrotate (cnt, prec);
 	    else /* code == ROTATERT */
-	      res = double_int_rrotate (o0, cnt, GET_MODE_PRECISION (mode));
+	      res = o0.rrotate (cnt, prec);
 	  }
 	  break;
 
@@ -4283,7 +4676,9 @@ simplify_relational_operation_1 (enum rtx_code code, enum machine_mode mode,
       && GET_CODE (op0) == PLUS
       && CONST_INT_P (XEXP (op0, 1))
       && (rtx_equal_p (op1, XEXP (op0, 0))
-	  || rtx_equal_p (op1, XEXP (op0, 1))))
+	  || rtx_equal_p (op1, XEXP (op0, 1)))
+      /* (LTU/GEU (PLUS a 0) 0) is not the same as (GEU/LTU a 0). */
+      && XEXP (op0, 1) != const0_rtx)
     {
       rtx new_cmp
 	= simplify_gen_unary (NEG, cmp_mode, XEXP (op0, 1), cmp_mode);
@@ -4409,14 +4804,27 @@ simplify_relational_operation_1 (enum rtx_code code, enum machine_mode mode,
   /* (eq/ne (xor x C1) C2) simplifies to (eq/ne x (C1^C2)).  */
   if ((code == EQ || code == NE)
       && op0code == XOR
-      && (CONST_INT_P (op1)
-	  || GET_CODE (op1) == CONST_DOUBLE)
-      && (CONST_INT_P (XEXP (op0, 1))
-	  || GET_CODE (XEXP (op0, 1)) == CONST_DOUBLE))
+      && CONST_SCALAR_INT_P (op1)
+      && CONST_SCALAR_INT_P (XEXP (op0, 1)))
     return simplify_gen_relational (code, mode, cmp_mode, XEXP (op0, 0),
 				    simplify_gen_binary (XOR, cmp_mode,
 							 XEXP (op0, 1), op1));
 
+  /* (eq/ne (bswap x) C1) simplifies to (eq/ne x C2) with C2 swapped.  */
+  if ((code == EQ || code == NE)
+      && GET_CODE (op0) == BSWAP
+      && CONST_SCALAR_INT_P (op1))
+    return simplify_gen_relational (code, mode, cmp_mode, XEXP (op0, 0),
+				    simplify_gen_unary (BSWAP, cmp_mode,
+							op1, cmp_mode));
+
+  /* (eq/ne (bswap x) (bswap y)) simplifies to (eq/ne x y).  */
+  if ((code == EQ || code == NE)
+      && GET_CODE (op0) == BSWAP
+      && GET_CODE (op1) == BSWAP)
+    return simplify_gen_relational (code, mode, cmp_mode,
+				    XEXP (op0, 0), XEXP (op1, 0));
+
   if (op0code == POPCOUNT && op1 == const0_rtx)
     switch (code)
       {
@@ -4589,8 +4997,8 @@ simplify_const_relational_operation (enum rtx_code code,
 
   /* If the operands are floating-point constants, see if we can fold
      the result.  */
-  if (GET_CODE (trueop0) == CONST_DOUBLE
-      && GET_CODE (trueop1) == CONST_DOUBLE
+  if (CONST_DOUBLE_AS_FLOAT_P (trueop0)
+      && CONST_DOUBLE_AS_FLOAT_P (trueop1)
       && SCALAR_FLOAT_MODE_P (GET_MODE (trueop0)))
     {
       REAL_VALUE_TYPE d0, d1;
@@ -4629,17 +5037,15 @@ simplify_const_relational_operation (enum rtx_code code,
 
   /* Otherwise, see if the operands are both integers.  */
   if ((GET_MODE_CLASS (mode) == MODE_INT || mode == VOIDmode)
-       && (GET_CODE (trueop0) == CONST_DOUBLE
-	   || CONST_INT_P (trueop0))
-       && (GET_CODE (trueop1) == CONST_DOUBLE
-	   || CONST_INT_P (trueop1)))
+       && (CONST_DOUBLE_AS_INT_P (trueop0) || CONST_INT_P (trueop0))
+       && (CONST_DOUBLE_AS_INT_P (trueop1) || CONST_INT_P (trueop1)))
     {
       int width = GET_MODE_PRECISION (mode);
       HOST_WIDE_INT l0s, h0s, l1s, h1s;
       unsigned HOST_WIDE_INT l0u, h0u, l1u, h1u;
 
       /* Get the two words comprising each integer constant.  */
-      if (GET_CODE (trueop0) == CONST_DOUBLE)
+      if (CONST_DOUBLE_AS_INT_P (trueop0))
 	{
 	  l0u = l0s = CONST_DOUBLE_LOW (trueop0);
 	  h0u = h0s = CONST_DOUBLE_HIGH (trueop0);
@@ -4650,7 +5056,7 @@ simplify_const_relational_operation (enum rtx_code code,
 	  h0u = h0s = HWI_SIGN_EXTEND (l0s);
 	}
 
-      if (GET_CODE (trueop1) == CONST_DOUBLE)
+      if (CONST_DOUBLE_AS_INT_P (trueop1))
 	{
 	  l1u = l1s = CONST_DOUBLE_LOW (trueop1);
 	  h1u = h1s = CONST_DOUBLE_HIGH (trueop1);
@@ -4916,7 +5322,7 @@ simplify_ternary_operation (enum rtx_code code, enum machine_mode mode,
 {
   unsigned int width = GET_MODE_PRECISION (mode);
   bool any_change = false;
-  rtx tem;
+  rtx tem, trueop2;
 
   /* VOIDmode means "infinite" precision.  */
   if (width == 0)
@@ -5062,33 +5468,74 @@ simplify_ternary_operation (enum rtx_code code, enum machine_mode mode,
       gcc_assert (GET_MODE (op0) == mode);
       gcc_assert (GET_MODE (op1) == mode);
       gcc_assert (VECTOR_MODE_P (mode));
-      op2 = avoid_constant_pool_reference (op2);
-      if (CONST_INT_P (op2))
+      trueop2 = avoid_constant_pool_reference (op2);
+      if (CONST_INT_P (trueop2))
 	{
-          int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
+	  int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
 	  unsigned n_elts = (GET_MODE_SIZE (mode) / elt_size);
-	  int mask = (1 << n_elts) - 1;
+	  unsigned HOST_WIDE_INT sel = UINTVAL (trueop2);
+	  unsigned HOST_WIDE_INT mask;
+	  if (n_elts == HOST_BITS_PER_WIDE_INT)
+	    mask = -1;
+	  else
+	    mask = ((unsigned HOST_WIDE_INT) 1 << n_elts) - 1;
 
-	  if (!(INTVAL (op2) & mask))
+	  if (!(sel & mask) && !side_effects_p (op0))
 	    return op1;
-	  if ((INTVAL (op2) & mask) == mask)
+	  if ((sel & mask) == mask && !side_effects_p (op1))
 	    return op0;
 
-	  op0 = avoid_constant_pool_reference (op0);
-	  op1 = avoid_constant_pool_reference (op1);
-	  if (GET_CODE (op0) == CONST_VECTOR
-	      && GET_CODE (op1) == CONST_VECTOR)
+	  rtx trueop0 = avoid_constant_pool_reference (op0);
+	  rtx trueop1 = avoid_constant_pool_reference (op1);
+	  if (GET_CODE (trueop0) == CONST_VECTOR
+	      && GET_CODE (trueop1) == CONST_VECTOR)
 	    {
 	      rtvec v = rtvec_alloc (n_elts);
 	      unsigned int i;
 
 	      for (i = 0; i < n_elts; i++)
-		RTVEC_ELT (v, i) = (INTVAL (op2) & (1 << i)
-				    ? CONST_VECTOR_ELT (op0, i)
-				    : CONST_VECTOR_ELT (op1, i));
+		RTVEC_ELT (v, i) = ((sel & ((unsigned HOST_WIDE_INT) 1 << i))
+				    ? CONST_VECTOR_ELT (trueop0, i)
+				    : CONST_VECTOR_ELT (trueop1, i));
 	      return gen_rtx_CONST_VECTOR (mode, v);
 	    }
+
+	  /* Replace (vec_merge (vec_merge a b m) c n) with (vec_merge b c n)
+	     if no element from a appears in the result.  */
+	  if (GET_CODE (op0) == VEC_MERGE)
+	    {
+	      tem = avoid_constant_pool_reference (XEXP (op0, 2));
+	      if (CONST_INT_P (tem))
+		{
+		  unsigned HOST_WIDE_INT sel0 = UINTVAL (tem);
+		  if (!(sel & sel0 & mask) && !side_effects_p (XEXP (op0, 0)))
+		    return simplify_gen_ternary (code, mode, mode,
+						 XEXP (op0, 1), op1, op2);
+		  if (!(sel & ~sel0 & mask) && !side_effects_p (XEXP (op0, 1)))
+		    return simplify_gen_ternary (code, mode, mode,
+						 XEXP (op0, 0), op1, op2);
+		}
+	    }
+	  if (GET_CODE (op1) == VEC_MERGE)
+	    {
+	      tem = avoid_constant_pool_reference (XEXP (op1, 2));
+	      if (CONST_INT_P (tem))
+		{
+		  unsigned HOST_WIDE_INT sel1 = UINTVAL (tem);
+		  if (!(~sel & sel1 & mask) && !side_effects_p (XEXP (op1, 0)))
+		    return simplify_gen_ternary (code, mode, mode,
+						 op0, XEXP (op1, 1), op2);
+		  if (!(~sel & ~sel1 & mask) && !side_effects_p (XEXP (op1, 1)))
+		    return simplify_gen_ternary (code, mode, mode,
+						 op0, XEXP (op1, 0), op2);
+		}
+	    }
 	}
+
+      if (rtx_equal_p (op0, op1)
+	  && !side_effects_p (op2) && !side_effects_p (op1))
+	return op0;
+
       break;
 
     default:
@@ -5196,7 +5643,7 @@ simplify_immed_subreg (enum machine_mode outermode, rtx op,
 
 	      for (i = 0; i < HOST_BITS_PER_WIDE_INT; i += value_bit)
 		*vp++ = CONST_DOUBLE_LOW (el) >> i;
-	      while (i < HOST_BITS_PER_WIDE_INT * 2 && i < elem_bitsize)
+	      while (i < HOST_BITS_PER_DOUBLE_INT && i < elem_bitsize)
 		{
 		  *vp++
 		    = CONST_DOUBLE_HIGH (el) >> (i - HOST_BITS_PER_WIDE_INT);
@@ -5251,7 +5698,7 @@ simplify_immed_subreg (enum machine_mode outermode, rtx op,
 	    {
 	      for (i = 0; i < HOST_BITS_PER_WIDE_INT; i += value_bit)
 		*vp++ = CONST_FIXED_VALUE_LOW (el) >> i;
-              for (; i < 2 * HOST_BITS_PER_WIDE_INT && i < elem_bitsize;
+              for (; i < HOST_BITS_PER_DOUBLE_INT && i < elem_bitsize;
 		   i += value_bit)
 		*vp++ = CONST_FIXED_VALUE_HIGH (el)
 			>> (i - HOST_BITS_PER_WIDE_INT);
@@ -5344,7 +5791,7 @@ simplify_immed_subreg (enum machine_mode outermode, rtx op,
 	       know why.  */
 	    if (elem_bitsize <= HOST_BITS_PER_WIDE_INT)
 	      elems[elem] = gen_int_mode (lo, outer_submode);
-	    else if (elem_bitsize <= 2 * HOST_BITS_PER_WIDE_INT)
+	    else if (elem_bitsize <= HOST_BITS_PER_DOUBLE_INT)
 	      elems[elem] = immed_double_const (lo, hi, outer_submode);
 	    else
 	      return NULL_RTX;
@@ -5425,14 +5872,17 @@ simplify_subreg (enum machine_mode outermode, rtx op,
   gcc_assert (GET_MODE (op) == innermode
 	      || GET_MODE (op) == VOIDmode);
 
-  gcc_assert ((byte % GET_MODE_SIZE (outermode)) == 0);
-  gcc_assert (byte < GET_MODE_SIZE (innermode));
+  if ((byte % GET_MODE_SIZE (outermode)) != 0)
+    return NULL_RTX;
+
+  if (byte >= GET_MODE_SIZE (innermode))
+    return NULL_RTX;
 
   if (outermode == innermode && !byte)
     return op;
 
-  if (CONST_INT_P (op)
-      || GET_CODE (op) == CONST_DOUBLE
+  if (CONST_SCALAR_INT_P (op)
+      || CONST_DOUBLE_AS_FLOAT_P (op)
       || GET_CODE (op) == CONST_FIXED
       || GET_CODE (op) == CONST_VECTOR)
     return simplify_immed_subreg (outermode, op, innermode, byte);
@@ -5525,14 +5975,6 @@ simplify_subreg (enum machine_mode outermode, rtx op,
       return NULL_RTX;
     }
 
-  /* Merge implicit and explicit truncations.  */
-
-  if (GET_CODE (op) == TRUNCATE
-      && GET_MODE_SIZE (outermode) < GET_MODE_SIZE (innermode)
-      && subreg_lowpart_offset (outermode, innermode) == byte)
-    return simplify_gen_unary (TRUNCATE, outermode, XEXP (op, 0),
-			       GET_MODE (XEXP (op, 0)));
-
   /* SUBREG of a hard register => just change the register number
      and/or mode.  If the hard register is not valid in that mode,
      suppress this simplification.  If the hard register is the stack,
@@ -5580,7 +6022,7 @@ simplify_subreg (enum machine_mode outermode, rtx op,
      or if we would be widening it.  */
 
   if (MEM_P (op)
-      && ! mode_dependent_address_p (XEXP (op, 0))
+      && ! mode_dependent_address_p (XEXP (op, 0), MEM_ADDR_SPACE (op))
       /* Allow splitting of volatile memory references in case we don't
          have instruction to move the whole thing.  */
       && (! MEM_VOLATILE_P (op)
@@ -5618,137 +6060,23 @@ simplify_subreg (enum machine_mode outermode, rtx op,
       return NULL_RTX;
     }
 
-  /* Optimize SUBREG truncations of zero and sign extended values.  */
-  if ((GET_CODE (op) == ZERO_EXTEND
-       || GET_CODE (op) == SIGN_EXTEND)
-      && SCALAR_INT_MODE_P (innermode)
-      && GET_MODE_PRECISION (outermode) < GET_MODE_PRECISION (innermode))
+  /* A SUBREG resulting from a zero extension may fold to zero if
+     it extracts higher bits that the ZERO_EXTEND's source bits.  */
+  if (GET_CODE (op) == ZERO_EXTEND && SCALAR_INT_MODE_P (innermode))
     {
       unsigned int bitpos = subreg_lsb_1 (outermode, innermode, byte);
-
-      /* If we're requesting the lowpart of a zero or sign extension,
-	 there are three possibilities.  If the outermode is the same
-	 as the origmode, we can omit both the extension and the subreg.
-	 If the outermode is not larger than the origmode, we can apply
-	 the truncation without the extension.  Finally, if the outermode
-	 is larger than the origmode, but both are integer modes, we
-	 can just extend to the appropriate mode.  */
-      if (bitpos == 0)
-	{
-	  enum machine_mode origmode = GET_MODE (XEXP (op, 0));
-	  if (outermode == origmode)
-	    return XEXP (op, 0);
-	  if (GET_MODE_PRECISION (outermode) <= GET_MODE_PRECISION (origmode))
-	    return simplify_gen_subreg (outermode, XEXP (op, 0), origmode,
-					subreg_lowpart_offset (outermode,
-							       origmode));
-	  if (SCALAR_INT_MODE_P (outermode))
-	    return simplify_gen_unary (GET_CODE (op), outermode,
-				       XEXP (op, 0), origmode);
-	}
-
-      /* A SUBREG resulting from a zero extension may fold to zero if
-	 it extracts higher bits that the ZERO_EXTEND's source bits.  */
-      if (GET_CODE (op) == ZERO_EXTEND
-	  && bitpos >= GET_MODE_PRECISION (GET_MODE (XEXP (op, 0))))
+      if (bitpos >= GET_MODE_PRECISION (GET_MODE (XEXP (op, 0))))
 	return CONST0_RTX (outermode);
     }
 
-  /* Simplify (subreg:QI (lshiftrt:SI (sign_extend:SI (x:QI)) C), 0) into
-     to (ashiftrt:QI (x:QI) C), where C is a suitable small constant and
-     the outer subreg is effectively a truncation to the original mode.  */
-  if ((GET_CODE (op) == LSHIFTRT
-       || GET_CODE (op) == ASHIFTRT)
-      && SCALAR_INT_MODE_P (outermode)
-      && SCALAR_INT_MODE_P (innermode)
-      /* Ensure that OUTERMODE is at least twice as wide as the INNERMODE
-	 to avoid the possibility that an outer LSHIFTRT shifts by more
-	 than the sign extension's sign_bit_copies and introduces zeros
-	 into the high bits of the result.  */
-      && (2 * GET_MODE_PRECISION (outermode)) <= GET_MODE_PRECISION (innermode)
-      && CONST_INT_P (XEXP (op, 1))
-      && GET_CODE (XEXP (op, 0)) == SIGN_EXTEND
-      && GET_MODE (XEXP (XEXP (op, 0), 0)) == outermode
-      && INTVAL (XEXP (op, 1)) < GET_MODE_PRECISION (outermode)
-      && subreg_lsb_1 (outermode, innermode, byte) == 0)
-    return simplify_gen_binary (ASHIFTRT, outermode,
-				XEXP (XEXP (op, 0), 0), XEXP (op, 1));
-
-  /* Likewise (subreg:QI (lshiftrt:SI (zero_extend:SI (x:QI)) C), 0) into
-     to (lshiftrt:QI (x:QI) C), where C is a suitable small constant and
-     the outer subreg is effectively a truncation to the original mode.  */
-  if ((GET_CODE (op) == LSHIFTRT
-       || GET_CODE (op) == ASHIFTRT)
-      && SCALAR_INT_MODE_P (outermode)
-      && SCALAR_INT_MODE_P (innermode)
-      && GET_MODE_PRECISION (outermode) < GET_MODE_PRECISION (innermode)
-      && CONST_INT_P (XEXP (op, 1))
-      && GET_CODE (XEXP (op, 0)) == ZERO_EXTEND
-      && GET_MODE (XEXP (XEXP (op, 0), 0)) == outermode
-      && INTVAL (XEXP (op, 1)) < GET_MODE_PRECISION (outermode)
-      && subreg_lsb_1 (outermode, innermode, byte) == 0)
-    return simplify_gen_binary (LSHIFTRT, outermode,
-				XEXP (XEXP (op, 0), 0), XEXP (op, 1));
-
-  /* Likewise (subreg:QI (ashift:SI (zero_extend:SI (x:QI)) C), 0) into
-     to (ashift:QI (x:QI) C), where C is a suitable small constant and
-     the outer subreg is effectively a truncation to the original mode.  */
-  if (GET_CODE (op) == ASHIFT
-      && SCALAR_INT_MODE_P (outermode)
+  if (SCALAR_INT_MODE_P (outermode)
       && SCALAR_INT_MODE_P (innermode)
       && GET_MODE_PRECISION (outermode) < GET_MODE_PRECISION (innermode)
-      && CONST_INT_P (XEXP (op, 1))
-      && (GET_CODE (XEXP (op, 0)) == ZERO_EXTEND
-	  || GET_CODE (XEXP (op, 0)) == SIGN_EXTEND)
-      && GET_MODE (XEXP (XEXP (op, 0), 0)) == outermode
-      && INTVAL (XEXP (op, 1)) < GET_MODE_PRECISION (outermode)
-      && subreg_lsb_1 (outermode, innermode, byte) == 0)
-    return simplify_gen_binary (ASHIFT, outermode,
-				XEXP (XEXP (op, 0), 0), XEXP (op, 1));
-
-  /* Recognize a word extraction from a multi-word subreg.  */
-  if ((GET_CODE (op) == LSHIFTRT
-       || GET_CODE (op) == ASHIFTRT)
-      && SCALAR_INT_MODE_P (innermode)
-      && GET_MODE_PRECISION (outermode) >= BITS_PER_WORD
-      && GET_MODE_PRECISION (innermode) >= (2 * GET_MODE_PRECISION (outermode))
-      && CONST_INT_P (XEXP (op, 1))
-      && (INTVAL (XEXP (op, 1)) & (GET_MODE_PRECISION (outermode) - 1)) == 0
-      && INTVAL (XEXP (op, 1)) >= 0
-      && INTVAL (XEXP (op, 1)) < GET_MODE_PRECISION (innermode)
       && byte == subreg_lowpart_offset (outermode, innermode))
     {
-      int shifted_bytes = INTVAL (XEXP (op, 1)) / BITS_PER_UNIT;
-      return simplify_gen_subreg (outermode, XEXP (op, 0), innermode,
-				  (WORDS_BIG_ENDIAN
-				   ? byte - shifted_bytes
-				   : byte + shifted_bytes));
-    }
-
-  /* If we have a lowpart SUBREG of a right shift of MEM, make a new MEM
-     and try replacing the SUBREG and shift with it.  Don't do this if
-     the MEM has a mode-dependent address or if we would be widening it.  */
-
-  if ((GET_CODE (op) == LSHIFTRT
-       || GET_CODE (op) == ASHIFTRT)
-      && SCALAR_INT_MODE_P (innermode)
-      && MEM_P (XEXP (op, 0))
-      && CONST_INT_P (XEXP (op, 1))
-      && GET_MODE_SIZE (outermode) < GET_MODE_SIZE (GET_MODE (op))
-      && (INTVAL (XEXP (op, 1)) % GET_MODE_BITSIZE (outermode)) == 0
-      && INTVAL (XEXP (op, 1)) > 0
-      && INTVAL (XEXP (op, 1)) < GET_MODE_BITSIZE (innermode)
-      && ! mode_dependent_address_p (XEXP (XEXP (op, 0), 0))
-      && ! MEM_VOLATILE_P (XEXP (op, 0))
-      && byte == subreg_lowpart_offset (outermode, innermode)
-      && (GET_MODE_SIZE (outermode) >= UNITS_PER_WORD
-	  || WORDS_BIG_ENDIAN == BYTES_BIG_ENDIAN))
-    {
-      int shifted_bytes = INTVAL (XEXP (op, 1)) / BITS_PER_UNIT;
-      return adjust_address_nv (XEXP (op, 0), outermode,
-				(WORDS_BIG_ENDIAN
-				 ? byte - shifted_bytes
-				 : byte + shifted_bytes));
+      rtx tem = simplify_truncation (outermode, op, innermode);
+      if (tem)
+	return tem;
     }
 
   return NULL_RTX;