arm.c (arm_emit_coreregs_64bit_shift): Fix comment.
authorAndrew Stubbs <ams@codesourcery.com>
Tue, 13 Nov 2012 13:31:50 +0000 (13:31 +0000)
committerUlrich Weigand <uweigand@gcc.gnu.org>
Tue, 13 Nov 2012 13:31:50 +0000 (13:31 +0000)
2012-11-13  Andrew Stubbs  <ams@codesourcery.com>
            Ulrich Weigand  <ulrich.weigand@linaro.org>

* config/arm/arm.c (arm_emit_coreregs_64bit_shift): Fix comment.
* config/arm/arm.md (opt, opt_enabled): New attributes.
(enabled): Use opt_enabled.
(ashldi3, ashrdi3, lshrdi3): Add TARGET_NEON case.
(ashldi3): Allow general operands for TARGET_NEON case.
* config/arm/iterators.md (rshifts): New code iterator.
(shift, shifttype): New code attributes.
* config/arm/neon.md (UNSPEC_LOAD_COUNT): New unspec type.
(neon_load_count, ashldi3_neon_noclobber, ashldi3_neon,
signed_shift_di3_neon, unsigned_shift_di3_neon,
ashrdi3_neon_imm_noclobber, lshrdi3_neon_imm_noclobber,
<shift>di3_neon): New patterns.

Co-Authored-By: Ulrich Weigand <ulrich.weigand@linaro.org>
From-SVN: r193476

gcc/ChangeLog
gcc/config/arm/arm.c
gcc/config/arm/arm.md
gcc/config/arm/iterators.md
gcc/config/arm/neon.md

index bebebe0bdb6d6d438f45bedb5f8ca4d2697fd75c..863190fe1d1f80bf36db4e8d546cf578d09274ed 100644 (file)
@@ -1,3 +1,19 @@
+2012-11-13  Andrew Stubbs  <ams@codesourcery.com>
+           Ulrich Weigand  <ulrich.weigand@linaro.org>
+
+       * config/arm/arm.c (arm_emit_coreregs_64bit_shift): Fix comment.
+       * config/arm/arm.md (opt, opt_enabled): New attributes.
+       (enabled): Use opt_enabled.
+       (ashldi3, ashrdi3, lshrdi3): Add TARGET_NEON case.
+       (ashldi3): Allow general operands for TARGET_NEON case.
+       * config/arm/iterators.md (rshifts): New code iterator.
+       (shift, shifttype): New code attributes.
+       * config/arm/neon.md (UNSPEC_LOAD_COUNT): New unspec type.
+       (neon_load_count, ashldi3_neon_noclobber, ashldi3_neon,
+       signed_shift_di3_neon, unsigned_shift_di3_neon,
+       ashrdi3_neon_imm_noclobber, lshrdi3_neon_imm_noclobber,
+       <shift>di3_neon): New patterns.
+
 2012-11-13  Jakub Jelinek  <jakub@redhat.com>
 
        * Makefile.in (asan.o): Depend on $(TM_P_H).
index f802507d638451d05094f7ba95995a147bc238ba..3bec7c57421ced32836bd8952824c88bc8688c43 100644 (file)
@@ -26635,8 +26635,7 @@ arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code)
    Input requirements:
     - It is safe for the input and output to be the same register, but
       early-clobber rules apply for the shift amount and scratch registers.
-    - Shift by register requires both scratch registers.  Shift by a constant
-      less than 32 in Thumb2 mode requires SCRATCH1 only.  In all other cases
+    - Shift by register requires both scratch registers.  In all other cases
       the scratch registers may be NULL.
     - Ashiftrt by a register also clobbers the CC register.  */
 void
index e169af910dd17e0b7c2e192eb0e8d1359e4c0e8b..7e92b69ad861fe90ed409494d451854f30888462 100644 (file)
 
        (const_string "no")))
 
+(define_attr "opt" "any,speed,size"
+  (const_string "any"))
+
+(define_attr "opt_enabled" "no,yes"
+  (cond [(eq_attr "opt" "any")
+         (const_string "yes")
+
+        (and (eq_attr "opt" "speed")
+             (match_test "optimize_function_for_speed_p (cfun)"))
+        (const_string "yes")
+
+        (and (eq_attr "opt" "size")
+             (match_test "optimize_function_for_size_p (cfun)"))
+        (const_string "yes")]
+       (const_string "no")))
+
 ; Allows an insn to disable certain alternatives for reasons other than
 ; arch support.
 (define_attr "insn_enabled" "no,yes"
 
 ; Enable all alternatives that are both arch_enabled and insn_enabled.
  (define_attr "enabled" "no,yes"
-   (if_then_else (eq_attr "insn_enabled" "yes")
-               (if_then_else (eq_attr "arch_enabled" "yes")
-                             (const_string "yes")
-                             (const_string "no"))
-                (const_string "no")))
+   (cond [(eq_attr "insn_enabled" "no")
+         (const_string "no")
+
+         (eq_attr "arch_enabled" "no")
+         (const_string "no")
+
+         (eq_attr "opt_enabled" "no")
+         (const_string "no")]
+        (const_string "yes")))
 
 ; POOL_RANGE is how far away from a constant pool entry that this insn
 ; can be placed.  If the distance is zero, then this insn will never
 (define_expand "ashldi3"
   [(set (match_operand:DI            0 "s_register_operand" "")
         (ashift:DI (match_operand:DI 1 "s_register_operand" "")
-                   (match_operand:SI 2 "reg_or_int_operand" "")))]
+                   (match_operand:SI 2 "general_operand" "")))]
   "TARGET_32BIT"
   "
+  if (TARGET_NEON)
+    {
+      /* Delay the decision whether to use NEON or core-regs until
+        register allocation.  */
+      emit_insn (gen_ashldi3_neon (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  else
+    {
+      /* Only the NEON case can handle in-memory shift counts.  */
+      if (!reg_or_int_operand (operands[2], SImode))
+        operands[2] = force_reg (SImode, operands[2]);
+    }
+
   if (!CONST_INT_P (operands[2]) && TARGET_REALLY_IWMMXT)
     ; /* No special preparation statements; expand pattern as above.  */
   else
                      (match_operand:SI 2 "reg_or_int_operand" "")))]
   "TARGET_32BIT"
   "
+  if (TARGET_NEON)
+    {
+      /* Delay the decision whether to use NEON or core-regs until
+        register allocation.  */
+      emit_insn (gen_ashrdi3_neon (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+
   if (!CONST_INT_P (operands[2]) && TARGET_REALLY_IWMMXT)
     ; /* No special preparation statements; expand pattern as above.  */
   else
                      (match_operand:SI 2 "reg_or_int_operand" "")))]
   "TARGET_32BIT"
   "
+  if (TARGET_NEON)
+    {
+      /* Delay the decision whether to use NEON or core-regs until
+        register allocation.  */
+      emit_insn (gen_lshrdi3_neon (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+
   if (!CONST_INT_P (operands[2]) && TARGET_REALLY_IWMMXT)
     ; /* No special preparation statements; expand pattern as above.  */
   else
index def8d9f9692648cb50aef502919875aaa4cebedf..0a34c82974a4b3f65bbd40032c5d33cc862be095 100644 (file)
 ;; A list of widening operators
 (define_code_iterator SE [sign_extend zero_extend])
 
+;; Right shifts
+(define_code_iterator rshifts [ashiftrt lshiftrt])
+
 ;;----------------------------------------------------------------------------
 ;; Mode attributes
 ;;----------------------------------------------------------------------------
 
 ;; Assembler mnemonics for signedness of widening operations.
 (define_code_attr US [(sign_extend "s") (zero_extend "u")])
+
+;; Right shifts
+(define_code_attr shift [(ashiftrt "ashr") (lshiftrt "lshr")])
+(define_code_attr shifttype [(ashiftrt "signed") (lshiftrt "unsigned")])
+
index 92e03b097604fd56130ac88e767ceeb89bf3e1ef..21035808c0e5964d29b7c903b5721210223153e0 100644 (file)
@@ -23,6 +23,7 @@
 (define_c_enum "unspec" [
   UNSPEC_ASHIFT_SIGNED
   UNSPEC_ASHIFT_UNSIGNED
+  UNSPEC_LOAD_COUNT
   UNSPEC_VABD
   UNSPEC_VABDL
   UNSPEC_VADD
   DONE;
 })
 
+;; 64-bit shifts
+
+;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
+;; leaving the upper half uninitalized.  This is OK since the shift
+;; instruction only looks at the low 8 bits anyway.  To avoid confusing
+;; data flow analysis however, we pretend the full register is set
+;; using an unspec.
+(define_insn "neon_load_count"
+  [(set (match_operand:DI 0 "s_register_operand" "=w,w")
+        (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
+                   UNSPEC_LOAD_COUNT))]
+  "TARGET_NEON"
+  "@
+   vld1.32\t{%P0[0]}, %A1
+   vmov.32\t%P0[0], %1"
+  [(set_attr "neon_type" "neon_vld1_vld2_lane,neon_mcr")]
+)
+
+(define_insn "ashldi3_neon_noclobber"
+  [(set (match_operand:DI 0 "s_register_operand"           "=w,w")
+       (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w")
+                  (match_operand:DI 2 "reg_or_int_operand" " i,w")))]
+  "TARGET_NEON && reload_completed
+   && (!CONST_INT_P (operands[2])
+       || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))"
+  "@
+   vshl.u64\t%P0, %P1, %2
+   vshl.u64\t%P0, %P1, %P2"
+  [(set_attr "neon_type" "neon_vshl_ddd,neon_vshl_ddd")]
+)
+
+(define_insn_and_split "ashldi3_neon"
+  [(set (match_operand:DI 0 "s_register_operand"           "= w, w,?&r,?r, ?w,w")
+       (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r, 0w,w")
+                  (match_operand:SI 2 "general_operand"    "rUm, i,  r, i,rUm,i")))
+   (clobber (match_scratch:SI 3                                    "= X, X,?&r, X,  X,X"))
+   (clobber (match_scratch:SI 4                                    "= X, X,?&r, X,  X,X"))
+   (clobber (match_scratch:DI 5                                    "=&w, X,  X, X, &w,X"))
+   (clobber (reg:CC_C CC_REGNUM))]
+  "TARGET_NEON"
+  "#"
+  "TARGET_NEON && reload_completed"
+  [(const_int 0)]
+  "
+  {
+    if (IS_VFP_REGNUM (REGNO (operands[0])))
+      {
+        if (CONST_INT_P (operands[2]))
+         {
+           if (INTVAL (operands[2]) < 1)
+             {
+               emit_insn (gen_movdi (operands[0], operands[1]));
+               DONE;
+             }
+           else if (INTVAL (operands[2]) > 63)
+             operands[2] = gen_rtx_CONST_INT (VOIDmode, 63);
+         }
+       else
+         {
+           emit_insn (gen_neon_load_count (operands[5], operands[2]));
+           operands[2] = operands[5];
+         }
+
+       /* Ditch the unnecessary clobbers.  */
+       emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1],
+                                              operands[2]));
+      }
+    else
+      {
+       if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1)
+         /* This clobbers CC.  */
+         emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
+       else
+         arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
+                                        operands[2], operands[3], operands[4]);
+      }
+    DONE;
+  }"
+  [(set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8")
+   (set_attr "opt" "*,*,speed,speed,*,*")]
+)
+
+; The shift amount needs to be negated for right-shifts
+(define_insn "signed_shift_di3_neon"
+  [(set (match_operand:DI 0 "s_register_operand"            "=w")
+       (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
+                   (match_operand:DI 2 "s_register_operand" " w")]
+                  UNSPEC_ASHIFT_SIGNED))]
+  "TARGET_NEON && reload_completed"
+  "vshl.s64\t%P0, %P1, %P2"
+  [(set_attr "neon_type" "neon_vshl_ddd")]
+)
+
+; The shift amount needs to be negated for right-shifts
+(define_insn "unsigned_shift_di3_neon"
+  [(set (match_operand:DI 0 "s_register_operand"            "=w")
+       (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
+                   (match_operand:DI 2 "s_register_operand" " w")]
+                  UNSPEC_ASHIFT_UNSIGNED))]
+  "TARGET_NEON && reload_completed"
+  "vshl.u64\t%P0, %P1, %P2"
+  [(set_attr "neon_type" "neon_vshl_ddd")]
+)
+
+(define_insn "ashrdi3_neon_imm_noclobber"
+  [(set (match_operand:DI 0 "s_register_operand"             "=w")
+       (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
+                    (match_operand:DI 2 "const_int_operand"  " i")))]
+  "TARGET_NEON && reload_completed
+   && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
+  "vshr.s64\t%P0, %P1, %2"
+  [(set_attr "neon_type" "neon_vshl_ddd")]
+)
+
+(define_insn "lshrdi3_neon_imm_noclobber"
+  [(set (match_operand:DI 0 "s_register_operand"             "=w")
+       (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
+                    (match_operand:DI 2 "const_int_operand"  " i")))]
+  "TARGET_NEON && reload_completed
+   && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
+  "vshr.u64\t%P0, %P1, %2"
+  [(set_attr "neon_type" "neon_vshl_ddd")]
+)
+
+;; ashrdi3_neon
+;; lshrdi3_neon
+(define_insn_and_split "<shift>di3_neon"
+  [(set (match_operand:DI 0 "s_register_operand"            "= w, w,?&r,?r,?w,?w")
+       (rshifts:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w")
+                   (match_operand:SI 2 "reg_or_int_operand" "  r, i,  r, i, r, i")))
+   (clobber (match_scratch:SI 3                                     "=2r, X, &r, X,2r, X"))
+   (clobber (match_scratch:SI 4                                     "= X, X, &r, X, X, X"))
+   (clobber (match_scratch:DI 5                                     "=&w, X,  X, X,&w, X"))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_NEON"
+  "#"
+  "TARGET_NEON && reload_completed"
+  [(const_int 0)]
+  "
+  {
+    if (IS_VFP_REGNUM (REGNO (operands[0])))
+      {
+       if (CONST_INT_P (operands[2]))
+         {
+           if (INTVAL (operands[2]) < 1)
+             {
+               emit_insn (gen_movdi (operands[0], operands[1]));
+               DONE;
+             }
+           else if (INTVAL (operands[2]) > 64)
+             operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
+
+           /* Ditch the unnecessary clobbers.  */
+           emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0],
+                                                         operands[1],
+                                                         operands[2]));
+         }
+       else 
+         {
+           /* We must use a negative left-shift.  */
+           emit_insn (gen_negsi2 (operands[3], operands[2]));
+           emit_insn (gen_neon_load_count (operands[5], operands[3]));
+           emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1],
+                                                      operands[5]));
+         }
+      }
+    else
+      {
+       if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1)
+         /* This clobbers CC.  */
+         emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1]));
+       else
+         /* This clobbers CC (ASHIFTRT by register only).  */
+         arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
+                                        operands[2], operands[3], operands[4]);
+      }
+
+    DONE;
+  }"
+  [(set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8")
+   (set_attr "opt" "*,*,speed,speed,*,*")]
+)
+
 ;; Widening operations
 
 (define_insn "widen_ssum<mode>3"