From 246cc0600251b3c8e2e237a55270816cdd5a8ad2 Mon Sep 17 00:00:00 2001 From: Christophe Lyon Date: Fri, 6 Nov 2015 21:34:12 +0100 Subject: [PATCH] [AArch64] Fix vqtb[lx][234] on big-endian 2015-11-06 Christophe Lyon gcc/ * config/aarch64/aarch64-simd-builtins.def: Update builtins tables: add tbl3v16qi, qtbl[34]*, tbx4v16qi, qtbx[34]*. * config/aarch64/aarch64-simd.md (aarch64_tbl3v8qi): Rename to... (aarch64_tbl3) ... this, which supports v16qi too. (aarch64_tbx4v8qi): Rename to... aarch64_tbx4): ... this. (aarch64_qtbl3): New pattern. (aarch64_qtbx3): New pattern. (aarch64_qtbl4): New pattern. (aarch64_qtbx4): New pattern. * config/aarch64/arm_neon.h (vqtbl2_s8, vqtbl2_u8, vqtbl2_p8) (vqtbl2q_s8, vqtbl2q_u8, vqtbl2q_p8, vqtbl3_s8, vqtbl3_u8) (vqtbl3_p8, vqtbl3q_s8, vqtbl3q_u8, vqtbl3q_p8, vqtbl4_s8) (vqtbl4_u8, vqtbl4_p8, vqtbl4q_s8, vqtbl4q_u8, vqtbl4q_p8) (vqtbx2_s8, vqtbx2_u8, vqtbx2_p8, vqtbx2q_s8, vqtbx2q_u8) (vqtbx2q_p8, vqtbx3_s8, vqtbx3_u8, vqtbx3_p8, vqtbx3q_s8) (vqtbx3q_u8, vqtbx3q_p8, vqtbx4_s8, vqtbx4_u8, vqtbx4_p8) (vqtbx4q_s8, vqtbx4q_u8, vqtbx4q_p8): Rewrite using builtin functions. gcc/testsuite/ * gcc.target/aarch64/advsimd-intrinsics/vqtbX.c: New test. From-SVN: r229886 --- gcc/ChangeLog | 22 + gcc/config/aarch64/aarch64-simd-builtins.def | 22 +- gcc/config/aarch64/aarch64-simd.md | 66 +- gcc/config/aarch64/arm_neon.h | 822 +++++++++---------- gcc/testsuite/ChangeLog | 6 +- 5 files changed, 486 insertions(+), 452 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 6756e356cf9..3fe6742545b 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,25 @@ +2015-11-06 Christophe Lyon + + * config/aarch64/aarch64-simd-builtins.def: Update builtins + tables: add tbl3v16qi, qtbl[34]*, tbx4v16qi, qtbx[34]*. + * config/aarch64/aarch64-simd.md (aarch64_tbl3v8qi): Rename to... + (aarch64_tbl3) ... this, which supports v16qi too. + (aarch64_tbx4v8qi): Rename to... + aarch64_tbx4): ... this. + (aarch64_qtbl3): New pattern. + (aarch64_qtbx3): New pattern. + (aarch64_qtbl4): New pattern. + (aarch64_qtbx4): New pattern. + * config/aarch64/arm_neon.h (vqtbl2_s8, vqtbl2_u8, vqtbl2_p8) + (vqtbl2q_s8, vqtbl2q_u8, vqtbl2q_p8, vqtbl3_s8, vqtbl3_u8) + (vqtbl3_p8, vqtbl3q_s8, vqtbl3q_u8, vqtbl3q_p8, vqtbl4_s8) + (vqtbl4_u8, vqtbl4_p8, vqtbl4q_s8, vqtbl4q_u8, vqtbl4q_p8) + (vqtbx2_s8, vqtbx2_u8, vqtbx2_p8, vqtbx2q_s8, vqtbx2q_u8) + (vqtbx2q_p8, vqtbx3_s8, vqtbx3_u8, vqtbx3_p8, vqtbx3q_s8) + (vqtbx3q_u8, vqtbx3q_p8, vqtbx4_s8, vqtbx4_u8, vqtbx4_p8) + (vqtbx4q_s8, vqtbx4q_u8, vqtbx4q_p8): Rewrite using builtin + functions. + 2015-11-06 Mike Stump PR debug/66728 diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index 04dac6f84db..aad66b134ef 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -408,8 +408,26 @@ VAR1 (BINOPP, crypto_pmull, 0, di) VAR1 (BINOPP, crypto_pmull, 0, v2di) - /* Implemented by aarch64_tbl3v8qi. */ + /* Implemented by aarch64_tbl3. */ VAR1 (BINOP, tbl3, 0, v8qi) + VAR1 (BINOP, tbl3, 0, v16qi) - /* Implemented by aarch64_tbx4v8qi. */ + /* Implemented by aarch64_qtbl3. */ + VAR1 (BINOP, qtbl3, 0, v8qi) + VAR1 (BINOP, qtbl3, 0, v16qi) + + /* Implemented by aarch64_qtbl4. */ + VAR1 (BINOP, qtbl4, 0, v8qi) + VAR1 (BINOP, qtbl4, 0, v16qi) + + /* Implemented by aarch64_tbx4. */ VAR1 (TERNOP, tbx4, 0, v8qi) + VAR1 (TERNOP, tbx4, 0, v16qi) + + /* Implemented by aarch64_qtbx3. */ + VAR1 (TERNOP, qtbx3, 0, v8qi) + VAR1 (TERNOP, qtbx3, 0, v16qi) + + /* Implemented by aarch64_qtbx4. */ + VAR1 (TERNOP, qtbx4, 0, v8qi) + VAR1 (TERNOP, qtbx4, 0, v16qi) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 55974e61118..0fb81342128 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -4816,24 +4816,70 @@ [(set_attr "type" "neon_tbl2_q")] ) -(define_insn "aarch64_tbl3v8qi" - [(set (match_operand:V8QI 0 "register_operand" "=w") - (unspec:V8QI [(match_operand:OI 1 "register_operand" "w") - (match_operand:V8QI 2 "register_operand" "w")] +(define_insn "aarch64_tbl3" + [(set (match_operand:VB 0 "register_operand" "=w") + (unspec:VB [(match_operand:OI 1 "register_operand" "w") + (match_operand:VB 2 "register_operand" "w")] UNSPEC_TBL))] "TARGET_SIMD" - "tbl\\t%S0.8b, {%S1.16b - %T1.16b}, %S2.8b" + "tbl\\t%S0., {%S1.16b - %T1.16b}, %S2." [(set_attr "type" "neon_tbl3")] ) -(define_insn "aarch64_tbx4v8qi" - [(set (match_operand:V8QI 0 "register_operand" "=w") - (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0") +(define_insn "aarch64_tbx4" + [(set (match_operand:VB 0 "register_operand" "=w") + (unspec:VB [(match_operand:VB 1 "register_operand" "0") (match_operand:OI 2 "register_operand" "w") - (match_operand:V8QI 3 "register_operand" "w")] + (match_operand:VB 3 "register_operand" "w")] + UNSPEC_TBX))] + "TARGET_SIMD" + "tbx\\t%S0., {%S2.16b - %T2.16b}, %S3." + [(set_attr "type" "neon_tbl4")] +) + +;; Three source registers. + +(define_insn "aarch64_qtbl3" + [(set (match_operand:VB 0 "register_operand" "=w") + (unspec:VB [(match_operand:CI 1 "register_operand" "w") + (match_operand:VB 2 "register_operand" "w")] + UNSPEC_TBL))] + "TARGET_SIMD" + "tbl\\t%S0., {%S1.16b - %U1.16b}, %S2." + [(set_attr "type" "neon_tbl3")] +) + +(define_insn "aarch64_qtbx3" + [(set (match_operand:VB 0 "register_operand" "=w") + (unspec:VB [(match_operand:VB 1 "register_operand" "0") + (match_operand:CI 2 "register_operand" "w") + (match_operand:VB 3 "register_operand" "w")] + UNSPEC_TBX))] + "TARGET_SIMD" + "tbx\\t%S0., {%S2.16b - %U2.16b}, %S3." + [(set_attr "type" "neon_tbl3")] +) + +;; Four source registers. + +(define_insn "aarch64_qtbl4" + [(set (match_operand:VB 0 "register_operand" "=w") + (unspec:VB [(match_operand:XI 1 "register_operand" "w") + (match_operand:VB 2 "register_operand" "w")] + UNSPEC_TBL))] + "TARGET_SIMD" + "tbl\\t%S0., {%S1.16b - %V1.16b}, %S2." + [(set_attr "type" "neon_tbl4")] +) + +(define_insn "aarch64_qtbx4" + [(set (match_operand:VB 0 "register_operand" "=w") + (unspec:VB [(match_operand:VB 1 "register_operand" "0") + (match_operand:XI 2 "register_operand" "w") + (match_operand:VB 3 "register_operand" "w")] UNSPEC_TBX))] "TARGET_SIMD" - "tbx\\t%S0.8b, {%S2.16b - %T2.16b}, %S3.8b" + "tbx\\t%S0., {%S2.16b - %V2.16b}, %S3." [(set_attr "type" "neon_tbl4")] ) diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 265c2667908..7f69043d8a7 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -10622,224 +10622,6 @@ vqtbl1q_u8 (uint8x16_t a, uint8x16_t b) return result; } -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vqtbl2_s8 (int8x16x2_t tab, uint8x8_t idx) -{ - int8x8_t result; - __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" - "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t" - :"=w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17"); - return result; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vqtbl2_u8 (uint8x16x2_t tab, uint8x8_t idx) -{ - uint8x8_t result; - __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" - "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t" - :"=w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17"); - return result; -} - -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vqtbl2_p8 (poly8x16x2_t tab, uint8x8_t idx) -{ - poly8x8_t result; - __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" - "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t" - :"=w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17"); - return result; -} - -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vqtbl2q_s8 (int8x16x2_t tab, uint8x16_t idx) -{ - int8x16_t result; - __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" - "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t" - :"=w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17"); - return result; -} - -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vqtbl2q_u8 (uint8x16x2_t tab, uint8x16_t idx) -{ - uint8x16_t result; - __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" - "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t" - :"=w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17"); - return result; -} - -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vqtbl2q_p8 (poly8x16x2_t tab, uint8x16_t idx) -{ - poly8x16_t result; - __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" - "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t" - :"=w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17"); - return result; -} - -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vqtbl3_s8 (int8x16x3_t tab, uint8x8_t idx) -{ - int8x8_t result; - __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" - "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t" - :"=w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18"); - return result; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vqtbl3_u8 (uint8x16x3_t tab, uint8x8_t idx) -{ - uint8x8_t result; - __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" - "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t" - :"=w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18"); - return result; -} - -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vqtbl3_p8 (poly8x16x3_t tab, uint8x8_t idx) -{ - poly8x8_t result; - __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" - "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t" - :"=w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18"); - return result; -} - -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vqtbl3q_s8 (int8x16x3_t tab, uint8x16_t idx) -{ - int8x16_t result; - __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" - "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t" - :"=w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18"); - return result; -} - -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vqtbl3q_u8 (uint8x16x3_t tab, uint8x16_t idx) -{ - uint8x16_t result; - __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" - "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t" - :"=w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18"); - return result; -} - -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vqtbl3q_p8 (poly8x16x3_t tab, uint8x16_t idx) -{ - poly8x16_t result; - __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" - "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t" - :"=w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18"); - return result; -} - -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vqtbl4_s8 (int8x16x4_t tab, uint8x8_t idx) -{ - int8x8_t result; - __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" - "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t" - :"=w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18", "v19"); - return result; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vqtbl4_u8 (uint8x16x4_t tab, uint8x8_t idx) -{ - uint8x8_t result; - __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" - "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t" - :"=w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18", "v19"); - return result; -} - -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vqtbl4_p8 (poly8x16x4_t tab, uint8x8_t idx) -{ - poly8x8_t result; - __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" - "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t" - :"=w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18", "v19"); - return result; -} - - -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vqtbl4q_s8 (int8x16x4_t tab, uint8x16_t idx) -{ - int8x16_t result; - __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" - "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t" - :"=w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18", "v19"); - return result; -} - -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vqtbl4q_u8 (uint8x16x4_t tab, uint8x16_t idx) -{ - uint8x16_t result; - __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" - "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t" - :"=w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18", "v19"); - return result; -} - -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vqtbl4q_p8 (poly8x16x4_t tab, uint8x16_t idx) -{ - poly8x16_t result; - __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" - "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t" - :"=w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18", "v19"); - return result; -} - - __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vqtbx1_s8 (int8x8_t r, int8x16_t tab, uint8x8_t idx) { @@ -10906,227 +10688,6 @@ vqtbx1q_p8 (poly8x16_t r, poly8x16_t tab, uint8x16_t idx) return result; } -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vqtbx2_s8 (int8x8_t r, int8x16x2_t tab, uint8x8_t idx) -{ - int8x8_t result = r; - __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" - "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t" - :"+w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17"); - return result; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vqtbx2_u8 (uint8x8_t r, uint8x16x2_t tab, uint8x8_t idx) -{ - uint8x8_t result = r; - __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" - "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t" - :"+w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17"); - return result; -} - -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vqtbx2_p8 (poly8x8_t r, poly8x16x2_t tab, uint8x8_t idx) -{ - poly8x8_t result = r; - __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" - "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t" - :"+w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17"); - return result; -} - - -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vqtbx2q_s8 (int8x16_t r, int8x16x2_t tab, uint8x16_t idx) -{ - int8x16_t result = r; - __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" - "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t" - :"+w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17"); - return result; -} - -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vqtbx2q_u8 (uint8x16_t r, uint8x16x2_t tab, uint8x16_t idx) -{ - uint8x16_t result = r; - __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" - "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t" - :"+w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17"); - return result; -} - -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vqtbx2q_p8 (poly8x16_t r, poly8x16x2_t tab, uint8x16_t idx) -{ - poly8x16_t result = r; - __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" - "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t" - :"+w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17"); - return result; -} - - -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vqtbx3_s8 (int8x8_t r, int8x16x3_t tab, uint8x8_t idx) -{ - int8x8_t result = r; - __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" - "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t" - :"+w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18"); - return result; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vqtbx3_u8 (uint8x8_t r, uint8x16x3_t tab, uint8x8_t idx) -{ - uint8x8_t result = r; - __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" - "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t" - :"+w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18"); - return result; -} - -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vqtbx3_p8 (poly8x8_t r, poly8x16x3_t tab, uint8x8_t idx) -{ - poly8x8_t result = r; - __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" - "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t" - :"+w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18"); - return result; -} - - -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vqtbx3q_s8 (int8x16_t r, int8x16x3_t tab, uint8x16_t idx) -{ - int8x16_t result = r; - __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" - "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t" - :"+w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18"); - return result; -} - -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vqtbx3q_u8 (uint8x16_t r, uint8x16x3_t tab, uint8x16_t idx) -{ - uint8x16_t result = r; - __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" - "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t" - :"+w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18"); - return result; -} - -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vqtbx3q_p8 (poly8x16_t r, poly8x16x3_t tab, uint8x16_t idx) -{ - poly8x16_t result = r; - __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" - "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t" - :"+w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18"); - return result; -} - - -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vqtbx4_s8 (int8x8_t r, int8x16x4_t tab, uint8x8_t idx) -{ - int8x8_t result = r; - __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" - "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t" - :"+w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18", "v19"); - return result; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vqtbx4_u8 (uint8x8_t r, uint8x16x4_t tab, uint8x8_t idx) -{ - uint8x8_t result = r; - __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" - "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t" - :"+w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18", "v19"); - return result; -} - -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vqtbx4_p8 (poly8x8_t r, poly8x16x4_t tab, uint8x8_t idx) -{ - poly8x8_t result = r; - __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" - "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t" - :"+w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18", "v19"); - return result; -} - - -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vqtbx4q_s8 (int8x16_t r, int8x16x4_t tab, uint8x16_t idx) -{ - int8x16_t result = r; - __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" - "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t" - :"+w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18", "v19"); - return result; -} - -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vqtbx4q_u8 (uint8x16_t r, uint8x16x4_t tab, uint8x16_t idx) -{ - uint8x16_t result = r; - __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" - "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t" - :"+w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18", "v19"); - return result; -} - -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vqtbx4q_p8 (poly8x16_t r, poly8x16x4_t tab, uint8x16_t idx) -{ - poly8x16_t result = r; - __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" - "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t" - :"+w"(result) - :"Q"(tab),"w"(idx) - :"memory", "v16", "v17", "v18", "v19"); - return result; -} - /* V7 legacy table intrinsics. */ __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) @@ -20714,6 +20275,389 @@ vqsubd_u64 (uint64_t __a, uint64_t __b) return __builtin_aarch64_uqsubdi_uuu (__a, __b); } +/* vqtbl2 */ + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqtbl2_s8 (int8x16x2_t tab, uint8x8_t idx) +{ + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[0], 0); + __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[1], 1); + return __builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqtbl2_u8 (uint8x16x2_t tab, uint8x8_t idx) +{ + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0); + __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1); + return (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vqtbl2_p8 (poly8x16x2_t tab, uint8x8_t idx) +{ + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0); + __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1); + return (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vqtbl2q_s8 (int8x16x2_t tab, uint8x16_t idx) +{ + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0); + __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1); + return __builtin_aarch64_tbl3v16qi (__o, (int8x16_t)idx); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vqtbl2q_u8 (uint8x16x2_t tab, uint8x16_t idx) +{ + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0); + __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1); + return (uint8x16_t)__builtin_aarch64_tbl3v16qi (__o, (int8x16_t)idx); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vqtbl2q_p8 (poly8x16x2_t tab, uint8x16_t idx) +{ + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0); + __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1); + return (poly8x16_t)__builtin_aarch64_tbl3v16qi (__o, (int8x16_t)idx); +} + +/* vqtbl3 */ + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqtbl3_s8 (int8x16x3_t tab, uint8x8_t idx) +{ + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); + return __builtin_aarch64_qtbl3v8qi (__o, (int8x8_t)idx); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqtbl3_u8 (uint8x16x3_t tab, uint8x8_t idx) +{ + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); + return (uint8x8_t)__builtin_aarch64_qtbl3v8qi (__o, (int8x8_t)idx); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vqtbl3_p8 (poly8x16x3_t tab, uint8x8_t idx) +{ + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); + return (poly8x8_t)__builtin_aarch64_qtbl3v8qi (__o, (int8x8_t)idx); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vqtbl3q_s8 (int8x16x3_t tab, uint8x16_t idx) +{ + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); + return __builtin_aarch64_qtbl3v16qi (__o, (int8x16_t)idx); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vqtbl3q_u8 (uint8x16x3_t tab, uint8x16_t idx) +{ + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); + return (uint8x16_t)__builtin_aarch64_qtbl3v16qi (__o, (int8x16_t)idx); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vqtbl3q_p8 (poly8x16x3_t tab, uint8x16_t idx) +{ + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); + return (poly8x16_t)__builtin_aarch64_qtbl3v16qi (__o, (int8x16_t)idx); +} + +/* vqtbl4 */ + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqtbl4_s8 (int8x16x4_t tab, uint8x8_t idx) +{ + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); + return __builtin_aarch64_qtbl4v8qi (__o, (int8x8_t)idx); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqtbl4_u8 (uint8x16x4_t tab, uint8x8_t idx) +{ + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); + return (uint8x8_t)__builtin_aarch64_qtbl4v8qi (__o, (int8x8_t)idx); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vqtbl4_p8 (poly8x16x4_t tab, uint8x8_t idx) +{ + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); + return (poly8x8_t)__builtin_aarch64_qtbl4v8qi (__o, (int8x8_t)idx); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vqtbl4q_s8 (int8x16x4_t tab, uint8x16_t idx) +{ + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); + return __builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)idx); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vqtbl4q_u8 (uint8x16x4_t tab, uint8x16_t idx) +{ + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); + return (uint8x16_t)__builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)idx); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vqtbl4q_p8 (poly8x16x4_t tab, uint8x16_t idx) +{ + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); + return (poly8x16_t)__builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)idx); +} + + +/* vqtbx2 */ +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqtbx2_s8 (int8x8_t r, int8x16x2_t tab, uint8x8_t idx) +{ + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[0], 0); + __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[1], 1); + return __builtin_aarch64_tbx4v8qi (r, __o, (int8x8_t)idx); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqtbx2_u8 (uint8x8_t r, uint8x16x2_t tab, uint8x8_t idx) +{ + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0); + __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1); + return (uint8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)r, __o, + (int8x8_t)idx); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vqtbx2_p8 (poly8x8_t r, poly8x16x2_t tab, uint8x8_t idx) +{ + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0); + __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1); + return (poly8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)r, __o, + (int8x8_t)idx); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vqtbx2q_s8 (int8x16_t r, int8x16x2_t tab, uint8x16_t idx) +{ + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[0], 0); + __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[1], 1); + return __builtin_aarch64_tbx4v16qi (r, __o, (int8x16_t)idx); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vqtbx2q_u8 (uint8x16_t r, uint8x16x2_t tab, uint8x16_t idx) +{ + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0); + __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1); + return (uint8x16_t)__builtin_aarch64_tbx4v16qi ((int8x16_t)r, __o, + (int8x16_t)idx); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vqtbx2q_p8 (poly8x16_t r, poly8x16x2_t tab, uint8x16_t idx) +{ + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0); + __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1); + return (poly8x16_t)__builtin_aarch64_tbx4v16qi ((int8x16_t)r, __o, + (int8x16_t)idx); +} + +/* vqtbx3 */ +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqtbx3_s8 (int8x8_t r, int8x16x3_t tab, uint8x8_t idx) +{ + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[0], 0); + __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[1], 1); + __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[2], 2); + return __builtin_aarch64_qtbx3v8qi (r, __o, (int8x8_t)idx); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqtbx3_u8 (uint8x8_t r, uint8x16x3_t tab, uint8x8_t idx) +{ + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); + return (uint8x8_t)__builtin_aarch64_qtbx3v8qi ((int8x8_t)r, __o, + (int8x8_t)idx); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vqtbx3_p8 (poly8x8_t r, poly8x16x3_t tab, uint8x8_t idx) +{ + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); + return (poly8x8_t)__builtin_aarch64_qtbx3v8qi ((int8x8_t)r, __o, + (int8x8_t)idx); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vqtbx3q_s8 (int8x16_t r, int8x16x3_t tab, uint8x16_t idx) +{ + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[0], 0); + __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[1], 1); + __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[2], 2); + return __builtin_aarch64_qtbx3v16qi (r, __o, (int8x16_t)idx); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vqtbx3q_u8 (uint8x16_t r, uint8x16x3_t tab, uint8x16_t idx) +{ + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); + return (uint8x16_t)__builtin_aarch64_qtbx3v16qi ((int8x16_t)r, __o, + (int8x16_t)idx); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vqtbx3q_p8 (poly8x16_t r, poly8x16x3_t tab, uint8x16_t idx) +{ + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); + return (poly8x16_t)__builtin_aarch64_qtbx3v16qi ((int8x16_t)r, __o, + (int8x16_t)idx); +} + +/* vqtbx4 */ + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqtbx4_s8 (int8x8_t r, int8x16x4_t tab, uint8x8_t idx) +{ + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[0], 0); + __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[1], 1); + __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[2], 2); + __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[3], 3); + return __builtin_aarch64_qtbx4v8qi (r, __o, (int8x8_t)idx); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqtbx4_u8 (uint8x8_t r, uint8x16x4_t tab, uint8x8_t idx) +{ + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); + return (uint8x8_t)__builtin_aarch64_qtbx4v8qi ((int8x8_t)r, __o, + (int8x8_t)idx); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vqtbx4_p8 (poly8x8_t r, poly8x16x4_t tab, uint8x8_t idx) +{ + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); + return (poly8x8_t)__builtin_aarch64_qtbx4v8qi ((int8x8_t)r, __o, + (int8x8_t)idx); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vqtbx4q_s8 (int8x16_t r, int8x16x4_t tab, uint8x16_t idx) +{ + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[0], 0); + __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[1], 1); + __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[2], 2); + __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[3], 3); + return __builtin_aarch64_qtbx4v16qi (r, __o, (int8x16_t)idx); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vqtbx4q_u8 (uint8x16_t r, uint8x16x4_t tab, uint8x16_t idx) +{ + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); + return (uint8x16_t)__builtin_aarch64_qtbx4v16qi ((int8x16_t)r, __o, + (int8x16_t)idx); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vqtbx4q_p8 (poly8x16_t r, poly8x16x4_t tab, uint8x16_t idx) +{ + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); + return (poly8x16_t)__builtin_aarch64_qtbx4v16qi ((int8x16_t)r, __o, + (int8x16_t)idx); +} + /* vrbit */ __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 9a0d1aa22f8..564b45a9d83 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2015-11-06 Christophe Lyon + + * gcc.target/aarch64/advsimd-intrinsics/vqtbX.c: New test. + 2015-11-06 David Malcolm * gcc.dg/plugin/diagnostic-test-show-locus-bw.c: New file. @@ -17,7 +21,7 @@ * gfortran.dg/goacc/combined_loop.f90: XFAIL. -2015-11-07 Jan Hubicka +2015-11-06 Jan Hubicka PR ipa/68057 PR ipa/68220 -- 2.30.2