From: Dimitar Dimitrov Date: Sun, 1 Nov 2020 13:09:27 +0000 (+0200) Subject: pru: Add builtins for HALT and LMBD X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=5ace1776b88d4b0fc371414d0b3983015e22fead;p=gcc.git pru: Add builtins for HALT and LMBD Add builtins for HALT and LMBD, per Texas Instruments document SPRUHV7C. Use the new LMBD pattern to define an expand for clz. Binutils [1] and sim [2] support for LMBD instruction are merged now. [1] https://sourceware.org/pipermail/binutils/2020-October/113901.html [2] https://sourceware.org/pipermail/gdb-patches/2020-November/173141.html gcc/ChangeLog: * config/pru/alu-zext.md: Add lmbd patterns for zero_extend variants. * config/pru/pru.c (enum pru_builtin): Add HALT and LMBD. (pru_init_builtins): Ditto. (pru_builtin_decl): Ditto. (pru_expand_builtin): Ditto. * config/pru/pru.h (CLZ_DEFINED_VALUE_AT_ZERO): Define PRU value for CLZ with zero value parameter. * config/pru/pru.md: Add halt, lmbd and clz patterns. * doc/extend.texi: Document PRU builtins. gcc/testsuite/ChangeLog: * gcc.target/pru/halt.c: New test. * gcc.target/pru/lmbd.c: New test. --- diff --git a/gcc/config/pru/alu-zext.md b/gcc/config/pru/alu-zext.md index 65916c70d65..35a6dbdda79 100644 --- a/gcc/config/pru/alu-zext.md +++ b/gcc/config/pru/alu-zext.md @@ -37,6 +37,10 @@ (define_subst_attr "alu3_zext_op2" "alu3_zext_op2_subst" "_z2" "_noz2") (define_subst_attr "alu3_zext" "alu3_zext_subst" "_z" "_noz") +(define_subst_attr "lmbd_zext_op1" "lmbd_zext_op1_subst" "_z1" "_noz1") +(define_subst_attr "lmbd_zext_op2" "lmbd_zext_op2_subst" "_z2" "_noz2") +(define_subst_attr "lmbd_zext" "lmbd_zext_subst" "_z" "_noz") + (define_subst_attr "bitalu_zext" "bitalu_zext_subst" "_z" "_noz") (define_code_iterator ALUOP3 [plus minus and ior xor umin umax ashift lshiftrt]) @@ -72,6 +76,19 @@ [(set_attr "type" "alu")]) +;; Left Most Bit Detect instruction. +(define_insn "pru_lmbd_impl_" + [(set (match_operand:EQD 0 "register_operand" "=r") + (unspec:EQD + [(zero_extend:EQD + (match_operand:EQS0 1 "register_operand" "r")) + (zero_extend:EQD + (match_operand:EQS1 2 "reg_or_ubyte_operand" "r"))] + UNSPEC_LMBD))] + "" + "lmbd\t%0, %1, %2" + [(set_attr "type" "alu")]) + (define_insn "neg_impl_" [(set (match_operand:EQD 0 "register_operand" "=r") (neg:EQD @@ -179,3 +196,37 @@ [(set (match_dup 0) (ALUOP3:EQD (zero_extend:EQD (match_dup 1)) (match_dup 2)))]) + + +(define_subst "lmbd_zext_subst" + [(set (match_operand:EQD 0) + (unspec:EQD [(zero_extend:EQD (match_operand:EQD 1)) + (zero_extend:EQD (match_operand:EQD 2))] + UNSPEC_LMBD))] + "" + [(set (match_dup 0) + (unspec:EQD [(match_dup 1) + (match_dup 2)] + UNSPEC_LMBD))]) + +(define_subst "lmbd_zext_op1_subst" + [(set (match_operand:EQD 0) + (unspec:EQD [(zero_extend:EQD (match_operand:EQD 1)) + (zero_extend:EQD (match_operand:EQS1 2))] + UNSPEC_LMBD))] + "" + [(set (match_dup 0) + (unspec:EQD [(match_dup 1) + (zero_extend:EQD (match_dup 2))] + UNSPEC_LMBD))]) + +(define_subst "lmbd_zext_op2_subst" + [(set (match_operand:EQD 0) + (unspec:EQD [(zero_extend:EQD (match_operand:EQD 1)) + (zero_extend:EQD (match_operand:EQD 2))] + UNSPEC_LMBD))] + "" + [(set (match_dup 0) + (unspec:EQD [(zero_extend:EQD (match_dup 1)) + (match_dup 2)] + UNSPEC_LMBD))]) diff --git a/gcc/config/pru/pru.c b/gcc/config/pru/pru.c index 39104e5f9cd..65ad6878a12 100644 --- a/gcc/config/pru/pru.c +++ b/gcc/config/pru/pru.c @@ -2705,6 +2705,8 @@ pru_reorg (void) enum pru_builtin { PRU_BUILTIN_DELAY_CYCLES, + PRU_BUILTIN_HALT, + PRU_BUILTIN_LMBD, PRU_BUILTIN_max }; @@ -2719,11 +2721,31 @@ pru_init_builtins (void) = build_function_type_list (void_type_node, long_long_integer_type_node, NULL); + tree uint_ftype_uint_uint + = build_function_type_list (unsigned_type_node, + unsigned_type_node, + unsigned_type_node, + NULL); + + tree void_ftype_void + = build_function_type_list (void_type_node, + void_type_node, + NULL); pru_builtins[PRU_BUILTIN_DELAY_CYCLES] = add_builtin_function ("__delay_cycles", void_ftype_longlong, PRU_BUILTIN_DELAY_CYCLES, BUILT_IN_MD, NULL, NULL_TREE); + + pru_builtins[PRU_BUILTIN_HALT] + = add_builtin_function ("__halt", void_ftype_void, + PRU_BUILTIN_HALT, BUILT_IN_MD, NULL, + NULL_TREE); + + pru_builtins[PRU_BUILTIN_LMBD] + = add_builtin_function ("__lmbd", uint_ftype_uint_uint, + PRU_BUILTIN_LMBD, BUILT_IN_MD, NULL, + NULL_TREE); } /* Implement TARGET_BUILTIN_DECL. */ @@ -2734,6 +2756,8 @@ pru_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED) switch (code) { case PRU_BUILTIN_DELAY_CYCLES: + case PRU_BUILTIN_HALT: + case PRU_BUILTIN_LMBD: return pru_builtins[code]; default: return error_mark_node; @@ -2806,19 +2830,45 @@ pru_expand_delay_cycles (rtx arg) IGNORE is nonzero if the value is to be ignored. */ static rtx -pru_expand_builtin (tree exp, rtx target ATTRIBUTE_UNUSED, +pru_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, - machine_mode mode ATTRIBUTE_UNUSED, + machine_mode mode, int ignore ATTRIBUTE_UNUSED) { tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl); - rtx arg1 = expand_normal (CALL_EXPR_ARG (exp, 0)); - if (fcode == PRU_BUILTIN_DELAY_CYCLES) - return pru_expand_delay_cycles (arg1); + switch (fcode) + { + case PRU_BUILTIN_DELAY_CYCLES: + { + rtx arg1 = expand_normal (CALL_EXPR_ARG (exp, 0)); + return pru_expand_delay_cycles (arg1); + } + break; + case PRU_BUILTIN_HALT: + { + emit_insn (gen_pru_halt ()); + return NULL_RTX; + } + break; + case PRU_BUILTIN_LMBD: + { + rtx arg1 = expand_normal (CALL_EXPR_ARG (exp, 0)); + rtx arg2 = expand_normal (CALL_EXPR_ARG (exp, 1)); + + if (target == NULL_RTX || GET_MODE (target) != mode) + { + target = gen_reg_rtx (mode); + } - internal_error ("bad builtin code"); + emit_insn (gen_pru_lmbd (mode, target, arg1, arg2)); + return target; + } + break; + default: + internal_error ("bad builtin code"); + } return NULL_RTX; } diff --git a/gcc/config/pru/pru.h b/gcc/config/pru/pru.h index 314e877a5f9..7f217fe2045 100644 --- a/gcc/config/pru/pru.h +++ b/gcc/config/pru/pru.h @@ -562,6 +562,9 @@ do { \ #define CASE_VECTOR_MODE Pmode +/* See definition of clz pattern for rationale of value -1. */ +#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = -1, 2) + /* Jumps are cheap on PRU. */ #define LOGICAL_OP_NON_SHORT_CIRCUIT 0 diff --git a/gcc/config/pru/pru.md b/gcc/config/pru/pru.md index 2f1bc21f025..125444cf47b 100644 --- a/gcc/config/pru/pru.md +++ b/gcc/config/pru/pru.md @@ -51,6 +51,10 @@ ;; Enumeration of UNSPECs. +(define_c_enum "unspec" [ + UNSPEC_LMBD +]) + (define_c_enum "unspecv" [ UNSPECV_DELAY_CYCLES_START UNSPECV_DELAY_CYCLES_END @@ -61,6 +65,8 @@ UNSPECV_LOOP_BEGIN UNSPECV_LOOP_END + UNSPECV_HALT + UNSPECV_BLOCKAGE ]) @@ -1020,3 +1026,37 @@ "" "nop\\t# Loop end guard" [(set_attr "type" "alu")]) + +;; HALT instruction. +(define_insn "pru_halt" + [(unspec_volatile [(const_int 0)] UNSPECV_HALT)] + "" + "halt" + [(set_attr "type" "control")]) + +;; Count Leading Zeros implemented using LMBD. +;; LMBD returns 32 if bit value is not present, and we subtract 31 to get CLZ. +;; Hence we get a defined value -1 for CLZ_DEFINED_VALUE_AT_ZERO. +(define_expand "clz2" + [(set (match_operand:QISI 0 "register_operand") + (clz:QISI (match_operand:QISI 1 "register_operand")))] + "" +{ + rtx dst = operands[0]; + rtx src = operands[1]; + rtx tmpval = gen_reg_rtx (mode); + + emit_insn (gen_pru_lmbd (mode, tmpval, src, const1_rtx)); + emit_insn (gen_sub3_insn (dst, GEN_INT (31), tmpval)); + DONE; +}) + +;; Left Most Bit Detect operation, which maps to a single instruction. +(define_expand "@pru_lmbd" + [(set (match_operand:QISI 0 "register_operand") + (unspec:QISI + [(match_operand:QISI 1 "register_operand") + (match_operand:QISI 2 "reg_or_ubyte_operand")] + UNSPEC_LMBD))] + "" + "") diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index c084dd15367..cdf9108cd5b 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -14154,6 +14154,7 @@ instructions, but allow the compiler to schedule those calls. * PowerPC Hardware Transactional Memory Built-in Functions:: * PowerPC Atomic Memory Operation Functions:: * PowerPC Matrix-Multiply Assist Built-in Functions:: +* PRU Built-in Functions:: * RISC-V Built-in Functions:: * RX Built-in Functions:: * S/390 System z Built-in Functions:: @@ -21934,6 +21935,33 @@ vec_t __builtin_vsx_xvcvspbf16 (vec_t); vec_t __builtin_vsx_xvcvbf16spn (vec_t); @end smallexample +@node PRU Built-in Functions +@subsection PRU Built-in Functions + +GCC provides a couple of special builtin functions to aid in utilizing +special PRU instructions. + +The built-in functions supported are: + +@table @code +@item __delay_cycles (long long @var{cycles}) +This inserts an instruction sequence that takes exactly @var{cycles} +cycles (between 0 and 0xffffffff) to complete. The inserted sequence +may use jumps, loops, or no-ops, and does not interfere with any other +instructions. Note that @var{cycles} must be a compile-time constant +integer - that is, you must pass a number, not a variable that may be +optimized to a constant later. The number of cycles delayed by this +builtin is exact. + +@item __halt (void) +This inserts a HALT instruction to stop processor execution. + +@item unsigned int __lmbd (unsigned int @var{wordval}, unsigned int @var{bitval}) +This inserts LMBD instruction to calculate the left-most bit with value +@var{bitval} in value @var{wordval}. Only the least significant bit +of @var{bitval} is taken into account. +@end table + @node RISC-V Built-in Functions @subsection RISC-V Built-in Functions diff --git a/gcc/testsuite/gcc.target/pru/halt.c b/gcc/testsuite/gcc.target/pru/halt.c new file mode 100644 index 00000000000..8aed576fe6c --- /dev/null +++ b/gcc/testsuite/gcc.target/pru/halt.c @@ -0,0 +1,9 @@ +/* Test HALT builtin. */ + +void +test_halt (void) +{ + /* { dg-final { scan-assembler "halt" } } */ + __halt(); +} + diff --git a/gcc/testsuite/gcc.target/pru/lmbd.c b/gcc/testsuite/gcc.target/pru/lmbd.c new file mode 100644 index 00000000000..bfe4beb0c4d --- /dev/null +++ b/gcc/testsuite/gcc.target/pru/lmbd.c @@ -0,0 +1,14 @@ +/* Test LMBD builtin. */ + +/* { dg-options "-O1" } */ + +/* -O1 in the options is significant. Without it zero_extend + operation may not be optimized. */ + +unsigned int +test_lmbd (unsigned char a, unsigned short b) +{ + /* { dg-final { scan-assembler "lmbd\\tr14, r14.w1, r14.b0" } } */ + return __lmbd(b, a); +} +