From: Richard Henderson <rth@redhat.com>
Date: Mon, 23 Nov 2015 15:55:58 +0000 (-0800)
Subject: Add uaddv4_optab and usubv4_optab
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=cde9d596263ef8fe62bda9c684e3203aac3d359f;p=gcc.git

Add uaddv4_optab and usubv4_optab

	PR target/67089
    	* optabs.def (uaddv4_optab, usubv4_optab): New.
    	* internal-fn.c (expand_addsub_overflow): Use uaddv4_optab
    	and usubv4_optab in the u +- u -> u case.
    	* doc/md.texi (Standard Names): Document addv{m}4, subv{m}4,
    	mulv{m}4, uaddv{m}4, usubv{m}4, umulv{m}4.

    	* config/i386/i386.md (uaddv<SWI>4, usubv<SWI>4): New.

From-SVN: r230767
---

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 61cbac92610..cd57e0c27a7 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,19 @@
+2015-11-23  Richard Henderson  <rth@redhat.com>
+
+	* optabs.def (uaddv4_optab, usubv4_optab): New.
+	* internal-fn.c (expand_addsub_overflow): Use them.
+	* doc/md.texi (Standard Names): Add uaddv<m>4, usubv<m>4.
+
+	* config/i386/i386.c (ix86_cc_mode): Extend add overflow check
+	to reversed operands.
+	* config/i386/i386.md (uaddv<SWI>4, usubv<SWI>4): New.
+	(*add<SWI>3_cconly_overflow_1): Rename *add<SWI>3_cconly_overflow.
+	(*add<SWI>3_cc_overflow_1): Rename *add<SWI>3_cc_overflow.
+	(*addsi3_zext_cc_overflow_1): Rename *add3_zext_cc_overflow.
+	(*add<SWI>3_cconly_overflow_2): New.
+	(*add<SWI>3_cc_overflow_2): New.
+	(*addsi3_zext_cc_overflow_2): New.
+
 2015-11-23  Richard Biener  <rguenther@suse.de>
 
 	PR tree-optimization/68465
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 83749d59f3e..cc425442796 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -21137,7 +21137,8 @@ ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
     case LTU:			/* CF=1 */
       /* Detect overflow checks.  They need just the carry flag.  */
       if (GET_CODE (op0) == PLUS
-	  && rtx_equal_p (op1, XEXP (op0, 0)))
+	  && (rtx_equal_p (op1, XEXP (op0, 0))
+	      || rtx_equal_p (op1, XEXP (op0, 1))))
 	return CCCmode;
       else
 	return CCmode;
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 4c5e22aa719..a57d165f21f 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -6156,6 +6156,22 @@
 		  (const_string "4")]
 	      (const_string "<MODE_SIZE>")))])
 
+(define_expand "uaddv<mode>4"
+  [(parallel [(set (reg:CCC FLAGS_REG)
+		   (compare:CCC
+		     (plus:SWI
+		       (match_operand:SWI 1 "nonimmediate_operand")
+		       (match_operand:SWI 2 "<general_operand>"))
+		     (match_dup 1)))
+	      (set (match_operand:SWI 0 "register_operand")
+		   (plus:SWI (match_dup 1) (match_dup 2)))])
+   (set (pc) (if_then_else
+	       (ltu (reg:CCC FLAGS_REG) (const_int 0))
+	       (label_ref (match_operand 3))
+	       (pc)))]
+  ""
+  "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
+
 ;; The lea patterns for modes less than 32 bits need to be matched by
 ;; several insns converted to real lea by splitters.
 
@@ -6461,6 +6477,20 @@
 		  (const_string "4")]
 	      (const_string "<MODE_SIZE>")))])
 
+(define_expand "usubv<mode>4"
+  [(parallel [(set (reg:CC FLAGS_REG)
+		   (compare:CC
+		     (match_operand:SWI 1 "nonimmediate_operand")
+		     (match_operand:SWI 2 "<general_operand>")))
+	      (set (match_operand:SWI 0 "register_operand")
+		   (minus:SWI (match_dup 1) (match_dup 2)))])
+   (set (pc) (if_then_else
+	       (ltu (reg:CC FLAGS_REG) (const_int 0))
+	       (label_ref (match_operand 3))
+	       (pc)))]
+  ""
+  "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
+
 (define_insn "*sub<mode>_3"
   [(set (reg FLAGS_REG)
 	(compare (match_operand:SWI 1 "nonimmediate_operand" "0,0")
@@ -6611,7 +6641,7 @@
       (clobber (match_scratch:QI 2))])]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))")
 
-(define_insn "*add<mode>3_cconly_overflow"
+(define_insn "*add<mode>3_cconly_overflow_1"
   [(set (reg:CCC FLAGS_REG)
 	(compare:CCC
 	  (plus:SWI
@@ -6624,7 +6654,20 @@
   [(set_attr "type" "alu")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*add<mode>3_cc_overflow"
+(define_insn "*add<mode>3_cconly_overflow_2"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	  (plus:SWI
+	    (match_operand:SWI 1 "nonimmediate_operand" "%0")
+	    (match_operand:SWI 2 "<general_operand>" "<g>"))
+	  (match_dup 2)))
+   (clobber (match_scratch:SWI 0 "=<r>"))]
+  "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "add{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*add<mode>3_cc_overflow_1"
   [(set (reg:CCC FLAGS_REG)
 	(compare:CCC
 	    (plus:SWI
@@ -6638,7 +6681,21 @@
   [(set_attr "type" "alu")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*addsi3_zext_cc_overflow"
+(define_insn "*add<mode>3_cc_overflow_2"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	    (plus:SWI
+		(match_operand:SWI 1 "nonimmediate_operand" "%0,0")
+		(match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m"))
+	    (match_dup 2)))
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+	(plus:SWI (match_dup 1) (match_dup 2)))]
+  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
+  "add{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*addsi3_zext_cc_overflow_1"
   [(set (reg:CCC FLAGS_REG)
 	(compare:CCC
 	  (plus:SI
@@ -6652,6 +6709,20 @@
   [(set_attr "type" "alu")
    (set_attr "mode" "SI")])
 
+(define_insn "*addsi3_zext_cc_overflow_2"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	  (plus:SI
+	    (match_operand:SI 1 "nonimmediate_operand" "%0")
+	    (match_operand:SI 2 "x86_64_general_operand" "rme"))
+	  (match_dup 2)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
+  "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
+  "add{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
 ;; The patterns that match these are at the end of this file.
 
 (define_expand "<plusminus_insn>xf3"
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 79f3cf1d0f3..de1b58a985e 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -4913,12 +4913,17 @@ signed integer addition with overflow checking.
 @item @samp{subv@var{m}4}, @samp{mulv@var{m}4}
 Similar, for other signed arithmetic operations.
 
-@cindex @code{umulv@var{m}4} instruction pattern
-@item @samp{umulv@var{m}4}
-Like @code{mulv@var{m}4} but for unsigned multiplication.  That is to
-say, the operation is the same as signed multiplication but the jump
+@cindex @code{uaddv@var{m}4} instruction pattern
+@item @samp{uaddv@var{m}4}
+Like @code{addv@var{m}4} but for unsigned addition.  That is to
+say, the operation is the same as signed addition but the jump
 is taken only on unsigned overflow.
 
+@cindex @code{usubv@var{m}4} instruction pattern
+@cindex @code{umulv@var{m}4} instruction pattern
+@item @samp{usubv@var{m}4}, @samp{umulv@var{m}4}
+Similar, for other unsigned arithmetic operations.
+
 @cindex @code{addptr@var{m}3} instruction pattern
 @item @samp{addptr@var{m}3}
 Like @code{add@var{m}3} but is guaranteed to only be used for address
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index bc77bdc3b55..b15657f442b 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -546,6 +546,33 @@ expand_addsub_overflow (location_t loc, tree_code code, tree lhs,
   /* u1 +- u2 -> ur  */
   if (uns0_p && uns1_p && unsr_p)
     {
+      insn_code icode = optab_handler (code == PLUS_EXPR ? uaddv4_optab
+                                       : usubv4_optab, mode);
+      if (icode != CODE_FOR_nothing)
+	{
+	  struct expand_operand ops[4];
+	  rtx_insn *last = get_last_insn ();
+
+	  res = gen_reg_rtx (mode);
+	  create_output_operand (&ops[0], res, mode);
+	  create_input_operand (&ops[1], op0, mode);
+	  create_input_operand (&ops[2], op1, mode);
+	  create_fixed_operand (&ops[3], do_error);
+	  if (maybe_expand_insn (icode, 4, ops))
+	    {
+	      last = get_last_insn ();
+	      if (profile_status_for_fn (cfun) != PROFILE_ABSENT
+		  && JUMP_P (last)
+		  && any_condjump_p (last)
+		  && !find_reg_note (last, REG_BR_PROB, 0))
+		add_int_reg_note (last, REG_BR_PROB, PROB_VERY_UNLIKELY);
+	      emit_jump (done_label);
+	      goto do_error_label;
+	    }
+
+	  delete_insns_since (last);
+	}
+
       /* Compute the operation.  On RTL level, the addition is always
 	 unsigned.  */
       res = expand_binop (mode, code == PLUS_EXPR ? add_optab : sub_optab,
@@ -737,92 +764,88 @@ expand_addsub_overflow (location_t loc, tree_code code, tree lhs,
   gcc_assert (!uns0_p && !uns1_p && !unsr_p);
 
   /* s1 +- s2 -> sr  */
- do_signed: ;
-  enum insn_code icode;
-  icode = optab_handler (code == PLUS_EXPR ? addv4_optab : subv4_optab, mode);
-  if (icode != CODE_FOR_nothing)
-    {
-      struct expand_operand ops[4];
-      rtx_insn *last = get_last_insn ();
-
-      res = gen_reg_rtx (mode);
-      create_output_operand (&ops[0], res, mode);
-      create_input_operand (&ops[1], op0, mode);
-      create_input_operand (&ops[2], op1, mode);
-      create_fixed_operand (&ops[3], do_error);
-      if (maybe_expand_insn (icode, 4, ops))
-	{
-	  last = get_last_insn ();
-	  if (profile_status_for_fn (cfun) != PROFILE_ABSENT
-	      && JUMP_P (last)
-	      && any_condjump_p (last)
-	      && !find_reg_note (last, REG_BR_PROB, 0))
-	    add_int_reg_note (last, REG_BR_PROB, PROB_VERY_UNLIKELY);
-	  emit_jump (done_label);
-        }
-      else
-	{
-	  delete_insns_since (last);
-	  icode = CODE_FOR_nothing;
-	}
-    }
-
-  if (icode == CODE_FOR_nothing)
-    {
-      rtx_code_label *sub_check = gen_label_rtx ();
-      int pos_neg = 3;
-
-      /* Compute the operation.  On RTL level, the addition is always
-	 unsigned.  */
-      res = expand_binop (mode, code == PLUS_EXPR ? add_optab : sub_optab,
-			  op0, op1, NULL_RTX, false, OPTAB_LIB_WIDEN);
-
-      /* If we can prove one of the arguments (for MINUS_EXPR only
-	 the second operand, as subtraction is not commutative) is always
-	 non-negative or always negative, we can do just one comparison
-	 and conditional jump instead of 2 at runtime, 3 present in the
-	 emitted code.  If one of the arguments is CONST_INT, all we
-	 need is to make sure it is op1, then the first
-	 do_compare_rtx_and_jump will be just folded.  Otherwise try
-	 to use range info if available.  */
-      if (code == PLUS_EXPR && CONST_INT_P (op0))
-	std::swap (op0, op1);
-      else if (CONST_INT_P (op1))
-	;
-      else if (code == PLUS_EXPR && TREE_CODE (arg0) == SSA_NAME)
-	{
-	  pos_neg = get_range_pos_neg (arg0);
-	  if (pos_neg != 3)
-	    std::swap (op0, op1);
-	}
-      if (pos_neg == 3 && !CONST_INT_P (op1) && TREE_CODE (arg1) == SSA_NAME)
-	pos_neg = get_range_pos_neg (arg1);
-
-      /* If the op1 is negative, we have to use a different check.  */
-      if (pos_neg == 3)
-	do_compare_rtx_and_jump (op1, const0_rtx, LT, false, mode, NULL_RTX,
-				 NULL, sub_check, PROB_EVEN);
-
-      /* Compare the result of the operation with one of the operands.  */
-      if (pos_neg & 1)
-	do_compare_rtx_and_jump (res, op0, code == PLUS_EXPR ? GE : LE,
-				 false, mode, NULL_RTX, NULL, done_label,
-				 PROB_VERY_LIKELY);
-
-      /* If we get here, we have to print the error.  */
-      if (pos_neg == 3)
-	{
-	  emit_jump (do_error);
+ do_signed:
+  {
+    insn_code icode = optab_handler (code == PLUS_EXPR ? addv4_optab
+				     : subv4_optab, mode);
+    if (icode != CODE_FOR_nothing)
+      {
+	struct expand_operand ops[4];
+	rtx_insn *last = get_last_insn ();
+
+	res = gen_reg_rtx (mode);
+	create_output_operand (&ops[0], res, mode);
+	create_input_operand (&ops[1], op0, mode);
+	create_input_operand (&ops[2], op1, mode);
+	create_fixed_operand (&ops[3], do_error);
+	if (maybe_expand_insn (icode, 4, ops))
+	  {
+	    last = get_last_insn ();
+	    if (profile_status_for_fn (cfun) != PROFILE_ABSENT
+		&& JUMP_P (last)
+		&& any_condjump_p (last)
+		&& !find_reg_note (last, REG_BR_PROB, 0))
+	      add_int_reg_note (last, REG_BR_PROB, PROB_VERY_UNLIKELY);
+	    emit_jump (done_label);
+	    goto do_error_label;
+	  }
+
+	delete_insns_since (last);
+      }
+
+    rtx_code_label *sub_check = gen_label_rtx ();
+    int pos_neg = 3;
+
+    /* Compute the operation.  On RTL level, the addition is always
+       unsigned.  */
+    res = expand_binop (mode, code == PLUS_EXPR ? add_optab : sub_optab,
+			op0, op1, NULL_RTX, false, OPTAB_LIB_WIDEN);
+
+    /* If we can prove one of the arguments (for MINUS_EXPR only
+       the second operand, as subtraction is not commutative) is always
+       non-negative or always negative, we can do just one comparison
+       and conditional jump instead of 2 at runtime, 3 present in the
+       emitted code.  If one of the arguments is CONST_INT, all we
+       need is to make sure it is op1, then the first
+       do_compare_rtx_and_jump will be just folded.  Otherwise try
+       to use range info if available.  */
+    if (code == PLUS_EXPR && CONST_INT_P (op0))
+      std::swap (op0, op1);
+    else if (CONST_INT_P (op1))
+      ;
+    else if (code == PLUS_EXPR && TREE_CODE (arg0) == SSA_NAME)
+      {
+        pos_neg = get_range_pos_neg (arg0);
+        if (pos_neg != 3)
+	  std::swap (op0, op1);
+      }
+    if (pos_neg == 3 && !CONST_INT_P (op1) && TREE_CODE (arg1) == SSA_NAME)
+      pos_neg = get_range_pos_neg (arg1);
+
+    /* If the op1 is negative, we have to use a different check.  */
+    if (pos_neg == 3)
+      do_compare_rtx_and_jump (op1, const0_rtx, LT, false, mode, NULL_RTX,
+			       NULL, sub_check, PROB_EVEN);
+
+    /* Compare the result of the operation with one of the operands.  */
+    if (pos_neg & 1)
+      do_compare_rtx_and_jump (res, op0, code == PLUS_EXPR ? GE : LE,
+			       false, mode, NULL_RTX, NULL, done_label,
+			       PROB_VERY_LIKELY);
 
-	  emit_label (sub_check);
-	}
+    /* If we get here, we have to print the error.  */
+    if (pos_neg == 3)
+      {
+	emit_jump (do_error);
+	emit_label (sub_check);
+      }
 
-      /* We have k = a + b for b < 0 here.  k <= a must hold.  */
-      if (pos_neg & 2)
-	do_compare_rtx_and_jump (res, op0, code == PLUS_EXPR ? LE : GE,
-				 false, mode, NULL_RTX, NULL, done_label,
-				 PROB_VERY_LIKELY);
-    }
+    /* We have k = a + b for b < 0 here.  k <= a must hold.  */
+    if (pos_neg & 2)
+      do_compare_rtx_and_jump (res, op0, code == PLUS_EXPR ? LE : GE,
+			       false, mode, NULL_RTX, NULL, done_label,
+			       PROB_VERY_LIKELY);
+  }
 
  do_error_label:
   emit_label (do_error);
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 0ca2333eeff..c141a3ca6ff 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -197,6 +197,8 @@ OPTAB_D (ctrap_optab, "ctrap$a4")
 OPTAB_D (addv4_optab, "addv$I$a4")
 OPTAB_D (subv4_optab, "subv$I$a4")
 OPTAB_D (mulv4_optab, "mulv$I$a4")
+OPTAB_D (uaddv4_optab, "uaddv$I$a4")
+OPTAB_D (usubv4_optab, "usubv$I$a4")
 OPTAB_D (umulv4_optab, "umulv$I$a4")
 OPTAB_D (negv3_optab, "negv$I$a3")
 OPTAB_D (addptr3_optab, "addptr$a3")