+2019-11-07 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
+
+ * config/arm/aout.h (REGISTER_NAMES): Add apsrge.
+ * config/arm/arm.md (APSRGE_REGNUM): Define.
+ (arm_<simd32_op>): New define_insn.
+ (arm_sel): Likewise.
+ * config/arm/arm.h (FIXED_REGISTERS): Add entry for apsrge.
+ (CALL_USED_REGISTERS): Likewise.
+ (REG_ALLOC_ORDER): Likewise.
+ (FIRST_PSEUDO_REGISTER): Update value.
+ (ARM_GE_BITS_READ): Define.
+ * config/arm/arm.c (arm_conditional_register_usage): Clear
+ APSRGE_REGNUM from operand_reg_set.
+ (arm_ge_bits_access): Define.
+ * config/arm/arm-builtins.c (arm_check_builtin_call): Handle
+ ARM_BUIILTIN_sel.
+ * config/arm/arm-protos.h (arm_ge_bits_access): Declare prototype.
+ * config/arm/arm-fixed.md (add<mode>3): Convert to define_expand.
+ FAIL if ARM_GE_BITS_READ.
+ (*arm_add<mode>3): New define_insn.
+ (sub<mode>3): Convert to define_expand. FAIL if ARM_GE_BITS_READ.
+ (*arm_sub<mode>3): New define_insn.
+ * config/arm/arm_acle.h (__sel, __sadd8, __ssub8, __uadd8, __usub8,
+ __sadd16, __sasx, __ssax, __ssub16, __uadd16, __uasx, __usax,
+ __usub16): Define.
+ * config/arm/arm_acle_builtins.def: Define builtins for the above.
+ * config/arm/iterators.md (SIMD32_GE): New int_iterator.
+ (simd32_op): Handle the above.
+ * config/arm/unspecs.md (UNSPEC_GE_SET): Define.
+ (UNSPEC_SEL, UNSPEC_SADD8, UNSPEC_SSUB8, UNSPEC_UADD8, UNSPEC_USUB8,
+ UNSPEC_SADD16, UNSPEC_SASX, UNSPEC_SSAX, UNSPEC_SSUB16, UNSPEC_UADD16,
+ UNSPEC_UASX, UNSPEC_USAX, UNSPEC_USUB16): Define.
+
2019-11-07 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* config/arm/arm.md (arm_smlabb_setq): New define_insn.
"wr8", "wr9", "wr10", "wr11", \
"wr12", "wr13", "wr14", "wr15", \
"wcgr0", "wcgr1", "wcgr2", "wcgr3", \
- "cc", "vfpcc", "sfp", "afp", "apsrq" \
+ "cc", "vfpcc", "sfp", "afp", "apsrq", "apsrge" \
}
#endif
= tree_cons (get_identifier ("acle qbit"), NULL_TREE,
DECL_ATTRIBUTES (cfun->decl));
}
+ if (fcode == ARM_BUILTIN_sel)
+ {
+ if (cfun && cfun->decl)
+ DECL_ATTRIBUTES (cfun->decl)
+ = tree_cons (get_identifier ("acle gebits"), NULL_TREE,
+ DECL_ATTRIBUTES (cfun->decl));
+ }
return true;
}
(set_attr "predicable_short_it" "yes,no")
(set_attr "type" "alu_sreg")])
-(define_insn "add<mode>3"
+(define_expand "add<mode>3"
+ [(set (match_operand:ADDSUB 0 "s_register_operand")
+ (plus:ADDSUB (match_operand:ADDSUB 1 "s_register_operand")
+ (match_operand:ADDSUB 2 "s_register_operand")))]
+ "TARGET_INT_SIMD"
+ {
+ if (ARM_GE_BITS_READ)
+ FAIL;
+ }
+)
+
+(define_insn "*arm_add<mode>3"
[(set (match_operand:ADDSUB 0 "s_register_operand" "=r")
(plus:ADDSUB (match_operand:ADDSUB 1 "s_register_operand" "r")
(match_operand:ADDSUB 2 "s_register_operand" "r")))]
- "TARGET_INT_SIMD"
+ "TARGET_INT_SIMD && !ARM_GE_BITS_READ"
"sadd<qaddsub_suf>%?\\t%0, %1, %2"
[(set_attr "predicable" "yes")
(set_attr "type" "alu_dsp_reg")])
(set_attr "predicable_short_it" "yes,no")
(set_attr "type" "alu_sreg")])
-(define_insn "sub<mode>3"
+(define_expand "sub<mode>3"
+ [(set (match_operand:ADDSUB 0 "s_register_operand")
+ (minus:ADDSUB (match_operand:ADDSUB 1 "s_register_operand")
+ (match_operand:ADDSUB 2 "s_register_operand")))]
+ "TARGET_INT_SIMD"
+ {
+ if (ARM_GE_BITS_READ)
+ FAIL;
+ }
+)
+
+(define_insn "*arm_sub<mode>3"
[(set (match_operand:ADDSUB 0 "s_register_operand" "=r")
(minus:ADDSUB (match_operand:ADDSUB 1 "s_register_operand" "r")
(match_operand:ADDSUB 2 "s_register_operand" "r")))]
- "TARGET_INT_SIMD"
+ "TARGET_INT_SIMD && !ARM_GE_BITS_READ"
"ssub<qaddsub_suf>%?\\t%0, %1, %2"
[(set_attr "predicable" "yes")
(set_attr "type" "alu_dsp_reg")])
extern void arm_emit_speculation_barrier_function (void);
extern void arm_decompose_di_binop (rtx, rtx, rtx *, rtx *, rtx *, rtx *);
extern bool arm_q_bit_access (void);
+extern bool arm_ge_bits_access (void);
#ifdef RTX_CODE
extern void arm_gen_unlikely_cbranch (enum rtx_code, machine_mode cc_mode,
global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
}
- /* The Q bit is only accessed via special ACLE patterns. */
+ /* The Q and GE bits are only accessed via special ACLE patterns. */
CLEAR_HARD_REG_BIT (operand_reg_set, APSRQ_REGNUM);
+ CLEAR_HARD_REG_BIT (operand_reg_set, APSRGE_REGNUM);
SUBTARGET_CONDITIONAL_REGISTER_USAGE
}
return true;
}
+/* Have we recorded an explicit access to the GE bits of PSTATE?. */
+bool
+arm_ge_bits_access (void)
+{
+ if (cfun && cfun->decl)
+ return lookup_attribute ("acle gebits",
+ DECL_ATTRIBUTES (cfun->decl));
+ return true;
+}
+
#if CHECKING_P
namespace selftest {
fp exactly at all times.
apsrq Nor this, it is used to track operations on the Q bit
of APSR by ACLE saturating intrinsics.
+ apsrge Nor this, it is used to track operations on the GE bits
+ of APSR by ACLE SIMD32 intrinsics
*: See TARGET_CONDITIONAL_REGISTER_USAGE */
1,1,1,1,1,1,1,1, \
1,1,1,1, \
/* Specials. */ \
- 1,1,1,1,1 \
+ 1,1,1,1,1,1 \
}
/* 1 for registers not available across function calls.
1,1,1,1,1,1,1,1, \
1,1,1,1, \
/* Specials. */ \
- 1,1,1,1,1 \
+ 1,1,1,1,1,1 \
}
#ifndef SUBTARGET_CONDITIONAL_REGISTER_USAGE
((((REGNUM) - FIRST_VFP_REGNUM) & 3) == 0 \
&& (LAST_VFP_REGNUM - (REGNUM) >= 2 * (N) - 1))
-/* The number of hard registers is 16 ARM + 1 CC + 1 SFP + 1 AFP + 1 APSRQ. */
+/* The number of hard registers is 16 ARM + 1 CC + 1 SFP + 1 AFP
+ + 1 APSRQ + 1 APSRGE. */
/* Intel Wireless MMX Technology registers add 16 + 4 more. */
/* VFP (VFP3) adds 32 (64) + 1 VFPCC. */
-#define FIRST_PSEUDO_REGISTER 105
+#define FIRST_PSEUDO_REGISTER 106
#define DBX_REGISTER_NUMBER(REGNO) arm_dbx_register_number (REGNO)
/* Registers not for general use. */ \
CC_REGNUM, VFPCC_REGNUM, \
FRAME_POINTER_REGNUM, ARG_POINTER_REGNUM, \
- SP_REGNUM, PC_REGNUM, APSRQ_REGNUM \
+ SP_REGNUM, PC_REGNUM, APSRQ_REGNUM, APSRGE_REGNUM \
}
/* Use different register alloc ordering for Thumb. */
#endif
#define ARM_Q_BIT_READ (arm_q_bit_access ())
+#define ARM_GE_BITS_READ (arm_ge_bits_access ())
/* As in the machine_function, a global set of call-via labels, for code
that is in text_section. */
(CC_REGNUM 100) ; Condition code pseudo register
(VFPCC_REGNUM 101) ; VFP Condition code pseudo register
(APSRQ_REGNUM 104) ; Q bit pseudo register
+ (APSRGE_REGNUM 105) ; GE bits pseudo register
]
)
;; 3rd operand to select_dominance_cc_mode
[(set (match_operand:SI 0 "s_register_operand" "=r")
(unspec:SI
[(match_operand:SI 1 "s_register_operand" "r")
- (match_operand:SI 2 "s_register_operand" "r")
- (match_operand:SI 3 "s_register_operand" "r")] UNSPEC_USADA8))]
+ (match_operand:SI 2 "s_register_operand" "r")
+ (match_operand:SI 3 "s_register_operand" "r")] UNSPEC_USADA8))]
"TARGET_INT_SIMD"
"usada8%?\\t%0, %1, %2, %3"
[(set_attr "predicable" "yes")
[(set_attr "predicable" "yes")
(set_attr "type" "smlald")])
+(define_insn "arm_<simd32_op>"
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
+ (unspec:SI
+ [(match_operand:SI 1 "s_register_operand" "r")
+ (match_operand:SI 2 "s_register_operand" "r")] SIMD32_GE))
+ (set (reg:CC APSRGE_REGNUM)
+ (unspec:CC [(reg:CC APSRGE_REGNUM)] UNSPEC_GE_SET))]
+ "TARGET_INT_SIMD"
+ "<simd32_op>%?\\t%0, %1, %2"
+ [(set_attr "predicable" "yes")
+ (set_attr "type" "alu_sreg")])
+
+(define_insn "arm_sel"
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
+ (unspec:SI
+ [(match_operand:SI 1 "s_register_operand" "r")
+ (match_operand:SI 2 "s_register_operand" "r")
+ (reg:CC APSRGE_REGNUM)] UNSPEC_SEL))]
+ "TARGET_INT_SIMD"
+ "sel%?\\t%0, %1, %2"
+ [(set_attr "predicable" "yes")
+ (set_attr "type" "alu_sreg")])
+
(define_expand "extendsfdf2"
[(set (match_operand:DF 0 "s_register_operand")
(float_extend:DF (match_operand:SF 1 "s_register_operand")))]
return __builtin_arm_smlsldx (__a, __b, __c);
}
+__extension__ extern __inline uint8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__sel (uint8x4_t __a, uint8x4_t __b)
+{
+ return __builtin_arm_sel (__a, __b);
+}
+
+__extension__ extern __inline int8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__sadd8 (int8x4_t __a, int8x4_t __b)
+{
+ return __builtin_arm_sadd8 (__a, __b);
+}
+
+__extension__ extern __inline int8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__ssub8 (int8x4_t __a, int8x4_t __b)
+{
+ return __builtin_arm_ssub8 (__a, __b);
+}
+
+__extension__ extern __inline uint8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__uadd8 (uint8x4_t __a, uint8x4_t __b)
+{
+ return __builtin_arm_uadd8 (__a, __b);
+}
+
+__extension__ extern __inline uint8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__usub8 (uint8x4_t __a, uint8x4_t __b)
+{
+ return __builtin_arm_usub8 (__a, __b);
+}
+
+__extension__ extern __inline int16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__sadd16 (int16x2_t __a, int16x2_t __b)
+{
+ return __builtin_arm_sadd16 (__a, __b);
+}
+
+__extension__ extern __inline int16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__sasx (int16x2_t __a, int16x2_t __b)
+{
+ return __builtin_arm_sasx (__a, __b);
+}
+
+__extension__ extern __inline int16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__ssax (int16x2_t __a, int16x2_t __b)
+{
+ return __builtin_arm_ssax (__a, __b);
+}
+
+__extension__ extern __inline int16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__ssub16 (int16x2_t __a, int16x2_t __b)
+{
+ return __builtin_arm_ssub16 (__a, __b);
+}
+
+__extension__ extern __inline uint16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__uadd16 (uint16x2_t __a, uint16x2_t __b)
+{
+ return __builtin_arm_uadd16 (__a, __b);
+}
+
+__extension__ extern __inline uint16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__uasx (uint16x2_t __a, uint16x2_t __b)
+{
+ return __builtin_arm_uasx (__a, __b);
+}
+
+__extension__ extern __inline uint16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__usax (uint16x2_t __a, uint16x2_t __b)
+{
+ return __builtin_arm_usax (__a, __b);
+}
+
+__extension__ extern __inline uint16x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__usub16 (uint16x2_t __a, uint16x2_t __b)
+{
+ return __builtin_arm_usub16 (__a, __b);
+}
+
#endif
#ifdef __ARM_FEATURE_SAT
VAR1 (TERNOP, smlatt, si)
VAR1 (TERNOP, smlawb, si)
VAR1 (TERNOP, smlawt, si)
+
+VAR1 (BINOP, sadd8, si)
+VAR1 (BINOP, ssub8, si)
+VAR1 (BINOP, sadd16, si)
+VAR1 (BINOP, sasx, si)
+VAR1 (BINOP, ssax, si)
+VAR1 (BINOP, ssub16, si)
+
+VAR1 (UBINOP, uadd8, si)
+VAR1 (UBINOP, usub8, si)
+VAR1 (UBINOP, uadd16, si)
+VAR1 (UBINOP, uasx, si)
+VAR1 (UBINOP, usax, si)
+VAR1 (UBINOP, usub16, si)
+
+VAR1 (UBINOP, sel, si)
(define_int_iterator SMLAWBT [UNSPEC_SMLAWB UNSPEC_SMLAWT])
+(define_int_iterator SIMD32_GE [UNSPEC_SADD8 UNSPEC_SSUB8 UNSPEC_UADD8
+ UNSPEC_USUB8 UNSPEC_SADD16 UNSPEC_SASX
+ UNSPEC_SSAX UNSPEC_SSUB16 UNSPEC_UADD16
+ UNSPEC_UASX UNSPEC_USAX UNSPEC_USUB16])
+
(define_int_iterator VQRDMLH_AS [UNSPEC_VQRDMLAH UNSPEC_VQRDMLSH])
(define_int_iterator VFM_LANE_AS [UNSPEC_VFMA_LANE UNSPEC_VFMS_LANE])
(UNSPEC_SXTAB16 "sxtab16") (UNSPEC_UXTAB16 "uxtab16")
(UNSPEC_USAD8 "usad8") (UNSPEC_SMLALD "smlald")
(UNSPEC_SMLALDX "smlaldx") (UNSPEC_SMLSLD "smlsld")
- (UNSPEC_SMLSLDX "smlsldx")])
+ (UNSPEC_SMLSLDX "smlsldx")(UNSPEC_SADD8 "sadd8")
+ (UNSPEC_UADD8 "uadd8") (UNSPEC_SSUB8 "ssub8")
+ (UNSPEC_USUB8 "usub8") (UNSPEC_SADD16 "sadd16")
+ (UNSPEC_SASX "sasx") (UNSPEC_SSAX "ssax")
+ (UNSPEC_SSUB16 "ssub16") (UNSPEC_UADD16 "uadd16")
+ (UNSPEC_UASX "uasx") (UNSPEC_USAX "usax")
+ (UNSPEC_USUB16 "usub16")])
;; Both kinds of return insn.
(define_code_iterator RETURNS [return simple_return])
UNSPEC_UNALIGNED_STORE ; Same for str/strh.
UNSPEC_PIC_UNIFIED ; Create a common pic addressing form.
UNSPEC_Q_SET ; Represent setting the Q bit.
+ UNSPEC_GE_SET ; Represent setting the GE bits.
UNSPEC_APSR_READ ; Represent reading the APSR.
UNSPEC_LL ; Represent an unpaired load-register-exclusive.
UNSPEC_SMLSLDX ; Represent the SMLSLDX operation.
UNSPEC_SMLAWB ; Represent the SMLAWB operation.
UNSPEC_SMLAWT ; Represent the SMLAWT operation.
+ UNSPEC_SEL ; Represent the SEL operation.
+ UNSPEC_SADD8 ; Represent the SADD8 operation.
+ UNSPEC_SSUB8 ; Represent the SSUB8 operation.
+ UNSPEC_UADD8 ; Represent the UADD8 operation.
+ UNSPEC_USUB8 ; Represent the USUB8 operation.
+ UNSPEC_SADD16 ; Represent the SADD16 operation.
+ UNSPEC_SASX ; Represent the SASX operation.
+ UNSPEC_SSAX ; Represent the SSAX operation.
+ UNSPEC_SSUB16 ; Represent the SSUB16 operation.
+ UNSPEC_UADD16 ; Represent the UADD16 operation.
+ UNSPEC_UASX ; Represent the UASX operation.
+ UNSPEC_USAX ; Represent the USAX operation.
+ UNSPEC_USUB16 ; Represent the USUB16 operation.
])
+2019-11-07 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
+
+ * gcc.target/arm/acle/simd32.c: Update test.
+ * gcc.target/arm/acle/simd32_sel.c: New test.
+
2019-11-07 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* gcc.target/arm/acle/dsp_arith.c: Update test.
}
/* { dg-final { scan-assembler-times "\tsmlsldx\t...?, ...?, ...?, ...?" 1 } } */
+
+int8x4_t
+test_sadd8 (int8x4_t a, int8x4_t b)
+{
+ return __sadd8 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tsadd8\t...?, ...?, ...?" 1 } } */
+
+int8x4_t
+test_ssub8 (int8x4_t a, int8x4_t b)
+{
+ return __ssub8 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tssub8\t...?, ...?, ...?" 1 } } */
+
+uint8x4_t
+test_uadd8 (uint8x4_t a, uint8x4_t b)
+{
+ return __uadd8 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tuadd8\t...?, ...?, ...?" 1 } } */
+
+uint8x4_t
+test_usub8 (uint8x4_t a, uint8x4_t b)
+{
+ return __usub8 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tusub8\t...?, ...?, ...?" 1 } } */
+
+int16x2_t
+test_sadd16 (int16x2_t a, int16x2_t b)
+{
+ return __sadd16 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tsadd8\t...?, ...?, ...?" 1 } } */
+
+int16x2_t
+test_sasx (int16x2_t a, int16x2_t b)
+{
+ return __sasx (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tsasx\t...?, ...?, ...?" 1 } } */
+
+int16x2_t
+test_ssax (int16x2_t a, int16x2_t b)
+{
+ return __ssax (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tssax\t...?, ...?, ...?" 1 } } */
+
+int16x2_t
+test_ssub16 (int16x2_t a, int16x2_t b)
+{
+ return __ssub16 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tssub16\t...?, ...?, ...?" 1 } } */
+
+uint16x2_t
+test_uadd16 (uint16x2_t a, uint16x2_t b)
+{
+ return __uadd16 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tuadd16\t...?, ...?, ...?" 1 } } */
+
+uint16x2_t
+test_uasx (uint16x2_t a, uint16x2_t b)
+{
+ return __uasx (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tuasx\t...?, ...?, ...?" 1 } } */
+
+uint16x2_t
+test_usax (uint16x2_t a, uint16x2_t b)
+{
+ return __usax (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tusax\t...?, ...?, ...?" 1 } } */
+
+uint16x2_t
+test_usub16 (uint16x2_t a, uint16x2_t b)
+{
+ return __usub16 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tusub16\t...?, ...?, ...?" 1 } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_simd32_ok } */
+/* { dg-add-options arm_simd32 } */
+
+#include <arm_acle.h>
+
+int8x4_t
+test_sel (int8x4_t a, int8x4_t b, uint8x4_t c, uint8x4_t d)
+{
+ int8x4_t res1 = __sadd8 (a, b);
+ return __sel (c, d);
+}
+
+/* { dg-final { scan-assembler-times "sadd8\t...?, ...?, ...?" 1 } } */
+/* { dg-final { scan-assembler-times "sel\t...?, ...?, ...?" 1 } } */