(strlensi): New pattern.
authorStan Cox <coxs@gnu.org>
Fri, 22 Dec 1995 20:24:34 +0000 (20:24 +0000)
committerStan Cox <coxs@gnu.org>
Fri, 22 Dec 1995 20:24:34 +0000 (20:24 +0000)
From-SVN: r10831

gcc/config/i386/i386.md

index 70f6953205aaa4555c2ba5d4ee1683dc11f6b38d..43633159add7c64cc236f2ccf3c678d7a648fb02 100644 (file)
                         (const_int 1)
                         (match_operand:SI 2 "general_operand" "r"))
        (match_operand:SI 3 "const_int_operand" "n"))]
-  "TARGET_BIT_TEST && GET_CODE (operands[2]) != CONST_INT"
+  "TARGET_USE_BIT_TEST && GET_CODE (operands[2]) != CONST_INT"
   "*
 {
   CC_STATUS_INIT;
        (xor:SI (ashift:SI (const_int 1)
                           (match_operand:SI 1 "general_operand" "r"))
                (match_operand:SI 2 "general_operand" "0")))]
-  "TARGET_BIT_TEST && GET_CODE (operands[1]) != CONST_INT"
+  "TARGET_USE_BIT_TEST && GET_CODE (operands[1]) != CONST_INT"
   "*
 {
   CC_STATUS_INIT;
        (xor:SI (match_operand:SI 1 "general_operand" "0")
                (ashift:SI (const_int 1)
                           (match_operand:SI 2 "general_operand" "r"))))]
-  "TARGET_BIT_TEST && GET_CODE (operands[2]) != CONST_INT"
+  "TARGET_USE_BIT_TEST && GET_CODE (operands[2]) != CONST_INT"
   "*
 {
   CC_STATUS_INIT;
 (define_expand "strlensi"
   [(parallel [(set (match_dup 4)
                   (unspec:SI [(mem:BLK (match_operand:BLK 1 "general_operand" ""))
-                              (match_operand:QI 2 "register_operand" "")
+                              (match_operand:QI 2 "immediate_operand" "")
                               (match_operand:SI 3 "immediate_operand" "")] 0))
              (clobber (match_dup 1))])
    (set (match_dup 5)
        (not:SI (match_dup 4)))
    (set (match_operand:SI 0 "register_operand" "")
-       (minus:SI (match_dup 5)
-                (const_int 1)))]
+       (plus:SI (match_dup 5)
+                (const_int -1)))]
   ""
   "
 {
+  if (TARGET_UNROLL_STRLEN && operands[2] == const0_rtx && optimize > 1)
+    {
+      rtx address;
+      rtx scratch;
+
+       /* well it seems that some optimizer does not combine a call like
+            foo(strlen(bar), strlen(bar));
+          when the move and the subtraction is done here.  It does calculate
+          the length just once when these instructions are done inside of
+          output_strlen_unroll().  But I think since &bar[strlen(bar)] is
+          often used and I use one fewer register for the lifetime of
+          output_strlen_unroll() this is better.  */
+      scratch = gen_reg_rtx (SImode);
+      address = force_reg (SImode, XEXP (operands[1], 0));
+
+       /* move address to scratch-register
+          this is done here because the i586 can do the following and
+          in the same cycle with the following move.  */
+      if (GET_CODE (operands[3]) != CONST_INT || INTVAL (operands[3]) < 4)
+         emit_insn (gen_movsi (scratch, address));
+
+      emit_insn (gen_movsi (operands[0], address));
+
+      if(TARGET_USE_Q_REG)
+       emit_insn (gen_strlensi_unroll5 (operands[0],
+                                       operands[3],
+                                       scratch,
+                                       operands[0]));
+      else
+       emit_insn (gen_strlensi_unroll4 (operands[0],
+                                       operands[3],
+                                       scratch,
+                                       operands[0]));
+
+        /* gen_strlensi_unroll[45] returns the address of the zero
+           at the end of the string, like memchr(), so compute the
+           length by subtracting the startaddress.  */
+      emit_insn (gen_subsi3 (operands[0], operands[0], address));
+      DONE;
+    }
+
   operands[1] = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
   operands[4] = gen_reg_rtx (SImode);
   operands[5] = gen_reg_rtx (SImode);
 (define_insn ""
   [(set (match_operand:SI 0 "register_operand" "=&c")
        (unspec:SI [(mem:BLK (match_operand:SI 1 "address_operand" "D"))
-                   (match_operand:QI 2 "register_operand" "a")
+                   (match_operand:QI 2 "immediate_operand" "a")
                    (match_operand:SI 3 "immediate_operand" "i")] 0))
    (clobber (match_dup 1))]
   ""
   output_asm_insn (AS2 (mov%L0,%1,%0), xops);
   return \"repnz\;scas%B2\";
 }")
+
+;; the only difference between the following patterns is the register preference
+;; on a pentium using a q-register saves one clock cycle per 4 characters
+
+(define_insn "strlensi_unroll4"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+                  (unspec:SI [(mem:BLK (match_operand:SI 3 "register_operand" "0,0"))
+                              (match_operand:SI 1 "immediate_operand" "i,i")
+                              (match_operand:SI 2 "register_operand" "=&q,&!r")] 0))
+   (clobber (match_dup 2))]
+  "(TARGET_USE_ANY_REG && optimize > 1)"
+  "* return output_strlen_unroll (operands);")
+
+(define_insn "strlensi_unroll5"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+                  (unspec:SI [(mem:BLK (match_operand:SI 3 "register_operand" "0"))
+                              (match_operand:SI 1 "immediate_operand" "i")
+                              (match_operand:SI 2 "register_operand" "=&q")] 0))
+   (clobber (match_dup 2))]
+  "(TARGET_USE_Q_REG && optimize > 1)"
+  "* return output_strlen_unroll (operands);")