[PATCH 1/2] [AARCH64,NEON] Add patterns + builtins for vld[234](q?)_lane_* intrinsics
authorCharles Baylis <charles.baylis@linaro.org>
Fri, 24 Oct 2014 14:58:51 +0000 (14:58 +0000)
committerCharles Baylis <cbaylis@gcc.gnu.org>
Fri, 24 Oct 2014 14:58:51 +0000 (14:58 +0000)
2014-10-24  Charles Baylis  <charles.baylis@linaro.org>
* config/aarch64/aarch64-builtins.c
(aarch64_types_loadstruct_lane_qualifiers): Define.
* config/aarch64/aarch64-simd-builtins.def (ld2_lane, ld3_lane,
ld4_lane): New builtins.
* config/aarch64/aarch64-simd.md (aarch64_vec_load_lanesoi_lane<mode>):
New pattern.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_ld2_lane<mode>): New expand.
(aarch64_ld3_lane<mode>): Likewise.
(aarch64_ld4_lane<mode>): Likewise.
* config/aarch64/aarch64.md (define_c_enum "unspec"): Add
UNSPEC_LD2_LANE, UNSPEC_LD3_LANE, UNSPEC_LD4_LANE.

From-SVN: r216671

gcc/ChangeLog
gcc/config/aarch64/aarch64-builtins.c
gcc/config/aarch64/aarch64-simd-builtins.def
gcc/config/aarch64/aarch64-simd.md
gcc/config/aarch64/aarch64.md

index 8863eb6853106490b854163851b38cffe395759e..698944c527d010c44e5dc44a1207988c7376a6e6 100644 (file)
@@ -1,3 +1,20 @@
+2014-10-24  Charles Baylis  <charles.baylis@linaro.org>
+       * config/aarch64/aarch64-builtins.c
+       (aarch64_types_loadstruct_lane_qualifiers): Define.
+       * config/aarch64/aarch64-simd-builtins.def (ld2_lane, ld3_lane,
+       ld4_lane): New builtins.
+       * config/aarch64/aarch64-simd.md (aarch64_vec_load_lanesoi_lane<mode>):
+       New pattern.
+       (aarch64_vec_load_lanesci_lane<mode>): Likewise.
+       (aarch64_vec_load_lanesxi_lane<mode>): Likewise.
+       (aarch64_ld2_lane<mode>): New expand.
+       (aarch64_ld3_lane<mode>): Likewise.
+       (aarch64_ld4_lane<mode>): Likewise.
+       * config/aarch64/aarch64.md (define_c_enum "unspec"): Add
+       UNSPEC_LD2_LANE, UNSPEC_LD3_LANE, UNSPEC_LD4_LANE.
+
+Change-Id: I4c36d18072215133573e07483cfe12165201c339
+
 2014-10-24  Georg-Johann Lay  <avr@gjlay.de>
 
        * avr-protos.h (avr_out_sign_extend): New.
index 3dba1b29875d7721cc82724e963f7f9bffee5fe8..368d3a76bc3f9f26afa882cec531c3875621e853 100644 (file)
@@ -201,6 +201,11 @@ aarch64_types_load1_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   = { qualifier_none, qualifier_const_pointer_map_mode };
 #define TYPES_LOAD1 (aarch64_types_load1_qualifiers)
 #define TYPES_LOADSTRUCT (aarch64_types_load1_qualifiers)
+static enum aarch64_type_qualifiers
+aarch64_types_loadstruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_const_pointer_map_mode,
+      qualifier_none, qualifier_none };
+#define TYPES_LOADSTRUCT_LANE (aarch64_types_loadstruct_lane_qualifiers)
 
 static enum aarch64_type_qualifiers
 aarch64_types_bsl_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
index ace5ebe0b7b181735ef508495eed9f92fcf12b66..5d0e7d8a714e73dd9f8258435891052f47702e66 100644 (file)
   BUILTIN_VALLDIF (LOADSTRUCT, ld2r, 0)
   BUILTIN_VALLDIF (LOADSTRUCT, ld3r, 0)
   BUILTIN_VALLDIF (LOADSTRUCT, ld4r, 0)
+  /* Implemented by aarch64_ld<VSTRUCT:nregs>_lane<VQ:mode>.  */
+  BUILTIN_VQ (LOADSTRUCT_LANE, ld2_lane, 0)
+  BUILTIN_VQ (LOADSTRUCT_LANE, ld3_lane, 0)
+  BUILTIN_VQ (LOADSTRUCT_LANE, ld4_lane, 0)
   /* Implemented by aarch64_st<VSTRUCT:nregs><VDC:mode>.  */
   BUILTIN_VDC (STORESTRUCT, st2, 0)
   BUILTIN_VDC (STORESTRUCT, st3, 0)
index da576a57154e7f6a31a269ec36236941b1a6aec4..b260bc62a2f259aa5e82fa457fe644144de54274 100644 (file)
   [(set_attr "type" "neon_load2_all_lanes<q>")]
 )
 
+(define_insn "aarch64_vec_load_lanesoi_lane<mode>"
+  [(set (match_operand:OI 0 "register_operand" "=w")
+       (unspec:OI [(match_operand:<V_TWO_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
+                   (match_operand:OI 2 "register_operand" "0")
+                   (match_operand:SI 3 "immediate_operand" "i")
+                   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
+                  UNSPEC_LD2_LANE))]
+  "TARGET_SIMD"
+  "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1"
+  [(set_attr "type" "neon_load2_one_lane")]
+)
+
 (define_insn "vec_store_lanesoi<mode>"
   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
        (unspec:OI [(match_operand:OI 1 "register_operand" "w")
   [(set_attr "type" "neon_load3_all_lanes<q>")]
 )
 
+(define_insn "aarch64_vec_load_lanesci_lane<mode>"
+  [(set (match_operand:CI 0 "register_operand" "=w")
+       (unspec:CI [(match_operand:<V_THREE_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
+                   (match_operand:CI 2 "register_operand" "0")
+                   (match_operand:SI 3 "immediate_operand" "i")
+                   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                  UNSPEC_LD3_LANE))]
+  "TARGET_SIMD"
+  "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1"
+  [(set_attr "type" "neon_load3_one_lane")]
+)
+
 (define_insn "vec_store_lanesci<mode>"
   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
        (unspec:CI [(match_operand:CI 1 "register_operand" "w")
   [(set_attr "type" "neon_load4_all_lanes<q>")]
 )
 
+(define_insn "aarch64_vec_load_lanesxi_lane<mode>"
+  [(set (match_operand:XI 0 "register_operand" "=w")
+       (unspec:XI [(match_operand:<V_FOUR_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
+                   (match_operand:XI 2 "register_operand" "0")
+                   (match_operand:SI 3 "immediate_operand" "i")
+                   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                  UNSPEC_LD4_LANE))]
+  "TARGET_SIMD"
+  "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1"
+  [(set_attr "type" "neon_load4_one_lane")]
+)
+
 (define_insn "vec_store_lanesxi<mode>"
   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
        (unspec:XI [(match_operand:XI 1 "register_operand" "w")
   DONE;
 })
 
+(define_expand "aarch64_ld2_lane<mode>"
+  [(match_operand:OI 0 "register_operand" "=w")
+       (match_operand:DI 1 "register_operand" "w")
+       (match_operand:OI 2 "register_operand" "0")
+       (match_operand:SI 3 "immediate_operand" "i")
+       (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_SIMD"
+{
+  enum machine_mode mode = <V_TWO_ELEM>mode;
+  rtx mem = gen_rtx_MEM (mode, operands[1]);
+
+  aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
+  emit_insn (gen_aarch64_vec_load_lanesoi_lane<mode> (operands[0],
+                                                     mem,
+                                                     operands[2],
+                                                     operands[3]));
+  DONE;
+})
+
+(define_expand "aarch64_ld3_lane<mode>"
+  [(match_operand:CI 0 "register_operand" "=w")
+       (match_operand:DI 1 "register_operand" "w")
+       (match_operand:CI 2 "register_operand" "0")
+       (match_operand:SI 3 "immediate_operand" "i")
+       (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_SIMD"
+{
+  enum machine_mode mode = <V_THREE_ELEM>mode;
+  rtx mem = gen_rtx_MEM (mode, operands[1]);
+
+  aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
+  emit_insn (gen_aarch64_vec_load_lanesci_lane<mode> (operands[0],
+                                                     mem,
+                                                     operands[2],
+                                                     operands[3]));
+  DONE;
+})
+
+(define_expand "aarch64_ld4_lane<mode>"
+  [(match_operand:XI 0 "register_operand" "=w")
+       (match_operand:DI 1 "register_operand" "w")
+       (match_operand:XI 2 "register_operand" "0")
+       (match_operand:SI 3 "immediate_operand" "i")
+       (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_SIMD"
+{
+  enum machine_mode mode = <V_FOUR_ELEM>mode;
+  rtx mem = gen_rtx_MEM (mode, operands[1]);
+
+  aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
+  emit_insn (gen_aarch64_vec_load_lanesxi_lane<mode> (operands[0],
+                                                     mem,
+                                                     operands[2],
+                                                     operands[3]));
+  DONE;
+})
+
+
+
 ;; Expanders for builtins to extract vector registers from large
 ;; opaque integer modes.
 
index cda69791cdb53463e6ae286de22b7f81cc5b49c3..341c26f42d867f4de484c87d5f1aed0dd0fbf6e4 100644 (file)
@@ -95,6 +95,9 @@
     UNSPEC_LD3_DUP
     UNSPEC_LD4
     UNSPEC_LD4_DUP
+    UNSPEC_LD2_LANE
+    UNSPEC_LD3_LANE
+    UNSPEC_LD4_LANE
     UNSPEC_MB
     UNSPEC_NOP
     UNSPEC_PRLG_STK