From: Robin Dapp Date: Mon, 8 Jul 2019 14:40:48 +0000 (+0000) Subject: S/390: Rework shift count handling. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=e2839e47894f0b4917ddbd59013071e98f525336;p=gcc.git S/390: Rework shift count handling. Add s390_valid_shift_count to determine the validity of a shift-count operand. This is used to replace increasingly complex substitutions that should have allowed address-style shift-count handling, an and mask as well as no-op subregs on the operand. gcc/ChangeLog: 2019-07-08 Robin Dapp * config/s390/constraints.md: Add new jsc constraint. * config/s390/predicates.md: New predicates. * config/s390/s390-protos.h (s390_valid_shift_count): New function. * config/s390/s390.c (s390_valid_shift_count): New function. (print_shift_count_operand): Use s390_valid_shift_count. (print_operand): Likewise. * config/s390/s390.md: Use new predicate. * config/s390/subst.md: Remove addr_style_op and masked_op substs. * config/s390/vector.md: Use new predicate. 2019-07-08 Robin Dapp * gcc.target/s390/combine-rotate-modulo.c: New test. * gcc.target/s390/combine-shift-rotate-add-mod.c: New test. * gcc.target/s390/vector/combine-shift-vec.c: New test. From-SVN: r273236 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a912a2e8e1c..c4f0503365a 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,15 @@ +2019-07-08 Robin Dapp + + * config/s390/constraints.md: Add new jsc constraint. + * config/s390/predicates.md: New predicates. + * config/s390/s390-protos.h (s390_valid_shift_count): New function. + * config/s390/s390.c (s390_valid_shift_count): New function. + (print_shift_count_operand): Use s390_valid_shift_count. + (print_operand): Likewise. + * config/s390/s390.md: Use new predicate. + * config/s390/subst.md: Remove addr_style_op and masked_op substs. + * config/s390/vector.md: Use new predicate. + 2019-07-08 Joern Rennecke Avoid clash with system header declaration. diff --git a/gcc/config/s390/constraints.md b/gcc/config/s390/constraints.md index 4055cbc7c68..45d41ae8bf8 100644 --- a/gcc/config/s390/constraints.md +++ b/gcc/config/s390/constraints.md @@ -204,6 +204,18 @@ (match_test "s390_decompose_addrstyle_without_index (op, NULL, NULL)" )) +;; Shift count operands are not necessarily legitimate addresses +;; but the predicate shift_count_operand will only allow +;; proper operands. If reload/lra need to change e.g. a spilled register +;; they can still do so via the special handling of address constraints. +;; To avoid further reloading (caused by a non-matching constraint) we +;; always return true here as the predicate's checks are already sufficient. + +(define_address_constraint "jsc" + "Address style operand used as shift count." + (match_test "true" )) + + ;; N -- Multiple letter constraint followed by 4 parameter letters. ;; 0..9,x: number of the part counting from most to least significant ;; S,H,Q: mode of the part diff --git a/gcc/config/s390/predicates.md b/gcc/config/s390/predicates.md index 92c602e4add..4d2f8b25d83 100644 --- a/gcc/config/s390/predicates.md +++ b/gcc/config/s390/predicates.md @@ -556,3 +556,32 @@ { return memory_operand (op, mode) && !contains_symbol_ref_p (op); }) + +;; Check for a valid shift count operand with an implicit +;; shift truncation mask of 63. + +(define_predicate "shift_count_operand" + (and (match_code "reg, subreg, and, plus, const_int") + (match_test "CONST_INT_P (op) || GET_MODE (op) == E_QImode")) +{ + return s390_valid_shift_count (op, 63); +} +) + +;; This is used as operand predicate. As we do not know +;; the mode of the first operand here and the shift truncation +;; mask depends on the mode, we cannot check the mask. +;; This is supposed to happen in the insn condition which +;; calls s390_valid_shift_count with the proper mode size. +;; We need two separate predicates for non-vector and vector +;; shifts since the (less restrictive) insn condition is checked +;; after the more restrictive operand predicate which will +;; disallow the operand before we can check the condition. + +(define_predicate "shift_count_operand_vec" + (and (match_code "reg, subreg, and, plus, const_int") + (match_test "CONST_INT_P (op) || GET_MODE (op) == E_QImode")) +{ + return s390_valid_shift_count (op, 0); +} +) diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h index b162b26b344..ae70b2fee18 100644 --- a/gcc/config/s390/s390-protos.h +++ b/gcc/config/s390/s390-protos.h @@ -141,6 +141,7 @@ extern void s390_emit_tpf_eh_return (rtx); extern bool s390_legitimate_address_without_index_p (rtx); extern bool s390_decompose_addrstyle_without_index (rtx, rtx *, HOST_WIDE_INT *); +extern bool s390_valid_shift_count (rtx op, HOST_WIDE_INT required_mask = 63); extern int s390_branch_condition_mask (rtx); extern int s390_compare_and_branch_condition_mask (rtx); extern bool s390_extzv_shift_ok (int, int, unsigned HOST_WIDE_INT); diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c index 5ec26a0592b..324d9d23210 100644 --- a/gcc/config/s390/s390.c +++ b/gcc/config/s390/s390.c @@ -3131,6 +3131,49 @@ s390_decompose_addrstyle_without_index (rtx op, rtx *base, return true; } +/* Check that OP is a valid shift count operand. + It should be of the following structure: + (subreg (and (plus (reg imm_op)) 2^k-1) 7) + where subreg, and and plus are optional. + + If IMPLICIT_MASK is > 0 and OP contains and + (AND ... immediate) + it is checked whether IMPLICIT_MASK and the immediate match. + Otherwise, no checking is performed. + */ +bool +s390_valid_shift_count (rtx op, HOST_WIDE_INT implicit_mask) +{ + /* Strip subreg. */ + while (GET_CODE (op) == SUBREG && subreg_lowpart_p (op)) + op = XEXP (op, 0); + + /* Check for an and with proper constant. */ + if (GET_CODE (op) == AND) + { + rtx op1 = XEXP (op, 0); + rtx imm = XEXP (op, 1); + + if (GET_CODE (op1) == SUBREG && subreg_lowpart_p (op1)) + op1 = XEXP (op1, 0); + + if (!(register_operand (op1, GET_MODE (op1)) || GET_CODE (op1) == PLUS)) + return false; + + if (!immediate_operand (imm, GET_MODE (imm))) + return false; + + HOST_WIDE_INT val = INTVAL (imm); + if (implicit_mask > 0 + && (val & implicit_mask) != implicit_mask) + return false; + + op = op1; + } + + /* Check the rest. */ + return s390_decompose_addrstyle_without_index (op, NULL, NULL); +} /* Return true if CODE is a valid address without index. */ @@ -7448,6 +7491,27 @@ print_addrstyle_operand (FILE *file, rtx op) fprintf (file, "(%s)", reg_names[REGNO (base)]); } +/* Print the shift count operand OP to FILE. + OP is an address-style operand in a form which + s390_valid_shift_count permits. Subregs and no-op + and-masking of the operand are stripped. */ + +static void +print_shift_count_operand (FILE *file, rtx op) +{ + /* No checking of the and mask required here. */ + if (!s390_valid_shift_count (op, 0)) + gcc_unreachable (); + + while (op && GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + if (GET_CODE (op) == AND) + op = XEXP (op, 0); + + print_addrstyle_operand (file, op); +} + /* Assigns the number of NOP halfwords to be emitted before and after the function label to *HW_BEFORE and *HW_AFTER. Both pointers must not be NULL. If hotpatching is disabled for the function, the values are set to zero. @@ -7912,7 +7976,7 @@ print_operand (FILE *file, rtx x, int code) break; case 'Y': - print_addrstyle_operand (file, x); + print_shift_count_operand (file, x); return; } diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md index 79451d7aef3..94a7340401d 100644 --- a/gcc/config/s390/s390.md +++ b/gcc/config/s390/s390.md @@ -8937,17 +8937,17 @@ (define_expand "rotl3" [(set (match_operand:GPR 0 "register_operand" "") (rotate:GPR (match_operand:GPR 1 "register_operand" "") - (match_operand:SI 2 "nonmemory_operand" "")))] + (match_operand:QI 2 "shift_count_operand" "")))] "" "") ; rll, rllg -(define_insn "*rotl3" +(define_insn "*rotl3" [(set (match_operand:GPR 0 "register_operand" "=d") (rotate:GPR (match_operand:GPR 1 "register_operand" "d") - (match_operand:SI 2 "nonmemory_operand" "an")))] + (match_operand:QI 2 "shift_count_operand" "jsc")))] "" - "rll\t%0,%1," + "rll\t%0,%1,%Y2" [(set_attr "op_type" "RSE") (set_attr "atype" "reg") (set_attr "z10prop" "z10_super_E1")]) @@ -8964,18 +8964,18 @@ (define_expand "3" [(set (match_operand:DSI 0 "register_operand" "") (SHIFT:DSI (match_operand:DSI 1 "register_operand" "") - (match_operand:SI 2 "nonmemory_operand" "")))] + (match_operand:QI 2 "shift_count_operand" "")))] "" "") ; ESA 64 bit register pair shift with reg or imm shift count ; sldl, srdl -(define_insn "*di3_31" +(define_insn "*di3_31" [(set (match_operand:DI 0 "register_operand" "=d") (SHIFT:DI (match_operand:DI 1 "register_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "an")))] + (match_operand:QI 2 "shift_count_operand" "jsc")))] "!TARGET_ZARCH" - "sdl\t%0," + "sdl\t%0,%Y2" [(set_attr "op_type" "RS") (set_attr "atype" "reg") (set_attr "z196prop" "z196_cracked")]) @@ -8983,19 +8983,20 @@ ; 64 bit register shift with reg or imm shift count ; sll, srl, sllg, srlg, sllk, srlk -(define_insn "*3" +(define_insn "*3" [(set (match_operand:GPR 0 "register_operand" "=d, d") (SHIFT:GPR (match_operand:GPR 1 "register_operand" ", d") - (match_operand:SI 2 "nonmemory_operand" "an,an")))] + (match_operand:QI 2 "shift_count_operand" "jsc,jsc")))] "" "@ - sl\t%0,<1> - sl\t%0,%1," + sl\t%0,<1>%Y2 + sl\t%0,%1,%Y2" [(set_attr "op_type" "RS,RSY") (set_attr "atype" "reg,reg") (set_attr "cpu_facility" "*,z196") (set_attr "z10prop" "z10_super_E1,*")]) + ; ; ashr(di|si)3 instruction pattern(s). ; Arithmetic right shifts @@ -9004,7 +9005,7 @@ [(parallel [(set (match_operand:DSI 0 "register_operand" "") (ashiftrt:DSI (match_operand:DSI 1 "register_operand" "") - (match_operand:SI 2 "nonmemory_operand" ""))) + (match_operand:QI 2 "shift_count_operand" ""))) (clobber (reg:CC CC_REGNUM))])] "" "") @@ -9013,29 +9014,29 @@ ; number of 2 in the subst pattern for the (clobber (match_scratch... ; The right fix should be to support match_scratch in the output ; pattern of a define_subst. -(define_insn "*ashrdi3_31" +(define_insn "*ashrdi3_31" [(set (match_operand:DI 0 "register_operand" "=d, d") (ashiftrt:DI (match_operand:DI 1 "register_operand" "0, 0") - (match_operand:SI 2 "nonmemory_operand" "an,an"))) + (match_operand:QI 2 "shift_count_operand" "jsc,jsc"))) (clobber (reg:CC CC_REGNUM))] "!TARGET_ZARCH" "@ - srda\t%0, - srda\t%0," + srda\t%0,%Y2 + srda\t%0,%Y2" [(set_attr "op_type" "RS") (set_attr "atype" "reg")]) ; sra, srag -(define_insn "*ashr3" +(define_insn "*ashr3" [(set (match_operand:GPR 0 "register_operand" "=d, d") (ashiftrt:GPR (match_operand:GPR 1 "register_operand" ", d") - (match_operand:SI 2 "nonmemory_operand" "an,an"))) + (match_operand:QI 2 "shift_count_operand" "jsc,jsc"))) (clobber (reg:CC CC_REGNUM))] "" "@ - sra\t%0,<1> - sra\t%0,%1," + sra\t%0,<1>%Y2 + sra\t%0,%1,%Y2" [(set_attr "op_type" "RS,RSY") (set_attr "atype" "reg") (set_attr "cpu_facility" "*,z196") diff --git a/gcc/config/s390/subst.md b/gcc/config/s390/subst.md index 0518ed20e77..9c0c87a13be 100644 --- a/gcc/config/s390/subst.md +++ b/gcc/config/s390/subst.md @@ -22,78 +22,6 @@ (define_code_iterator SUBST [rotate ashift lshiftrt ashiftrt]) (define_mode_iterator DSI_VI [SI DI V2QI V4QI V8QI V16QI V2HI V4HI V8HI V2SI V4SI V2DI]) -; This expands an register/immediate operand to a register+immediate -; operand to draw advantage of the address style operand format -; providing a addition for free. -(define_subst "addr_style_op_subst" - [(set (match_operand:DSI_VI 0 "" "") - (SUBST:DSI_VI (match_operand:DSI_VI 1 "" "") - (match_operand:SI 2 "" "")))] - "" - [(set (match_dup 0) - (SUBST:DSI_VI (match_dup 1) - (plus:SI (match_operand:SI 2 "register_operand" "a") - (match_operand 3 "const_int_operand" "n"))))]) - -; Use this in the insn name. -(define_subst_attr "addr_style_op" "addr_style_op_subst" "" "_plus") - -; In the subst pattern the additional const int operand will be used -; as displacement. In the normal version %Y is able to print the -; operand either as displacement or as base register. -(define_subst_attr "addr_style_op_ops" "addr_style_op_subst" "%Y2" "%Y3(%2)") - - -; This substitution adds an explicit AND operation to the second -; operand. This way previous operations on the now masked out bits -; might get optimized away. -(define_subst "masked_op_subst" - [(set (match_operand:DSI 0 "" "") - (SUBST:DSI (match_operand:DSI 1 "" "") - (match_operand:SI 2 "" "")))] - "" - [(set (match_dup 0) - (SUBST:DSI (match_dup 1) - (and:SI (match_dup 2) - (match_operand:SI 3 "const_int_6bitset_operand" "jm6"))))]) - -; Use this in the insn name. -(define_subst_attr "masked_op" "masked_op_subst" "" "_and") - - - -; This is like the addr_style_op substitution above but with a CC clobber. -(define_subst "addr_style_op_cc_subst" - [(set (match_operand:DSI 0 "" "") - (ashiftrt:DSI (match_operand:DSI 1 "" "") - (match_operand:SI 2 "" ""))) - (clobber (reg:CC CC_REGNUM))] - "REG_P (operands[2])" - [(set (match_dup 0) - (ashiftrt:DSI (match_dup 1) - (plus:SI (match_dup 2) - (match_operand 3 "const_int_operand" "n")))) - (clobber (reg:CC CC_REGNUM))]) - -(define_subst_attr "addr_style_op_cc" "addr_style_op_cc_subst" "" "_plus") -(define_subst_attr "addr_style_op_cc_ops" "addr_style_op_cc_subst" "%Y2" "%Y3(%2)") - - -; This is like the masked_op substitution but with a CC clobber. -(define_subst "masked_op_cc_subst" - [(set (match_operand:DSI 0 "" "") - (ashiftrt:DSI (match_operand:DSI 1 "" "") - (match_operand:SI 2 "" ""))) - (clobber (reg:CC CC_REGNUM))] - "" - [(set (match_dup 0) - (ashiftrt:DSI (match_dup 1) - (and:SI (match_dup 2) - (match_operand:SI 3 "const_int_6bitset_operand" "")))) - (clobber (reg:CC CC_REGNUM))]) -(define_subst_attr "masked_op_cc" "masked_op_cc_subst" "" "_and") - - ; This adds an explicit CC reg set to an operation while keeping the ; set for the operation result as well. (define_subst "setcc_subst" diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md index 140ef474a92..0702e1de835 100644 --- a/gcc/config/s390/vector.md +++ b/gcc/config/s390/vector.md @@ -969,21 +969,25 @@ (define_expand "3" [(set (match_operand:VI 0 "register_operand" "") (VEC_SHIFTS:VI (match_operand:VI 1 "register_operand" "") - (match_operand:SI 2 "nonmemory_operand" "")))] + (match_operand:QI 2 "shift_count_operand" "")))] "TARGET_VX") ; verllb, verllh, verllf, verllg ; veslb, veslh, veslf, veslg ; vesrab, vesrah, vesraf, vesrag ; vesrlb, vesrlh, vesrlf, vesrlg -(define_insn "*3" +(define_insn "*3" [(set (match_operand:VI 0 "register_operand" "=v") (VEC_SHIFTS:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:SI 2 "nonmemory_operand" "an")))] - "TARGET_VX" - "\t%v0,%v1," + (match_operand:QI 2 "shift_count_operand_vec" "jsc")))] + "TARGET_VX + && s390_valid_shift_count (operands[2], + GET_MODE_BITSIZE (GET_MODE_INNER (mode)) - 1) + " + "\t%v0,%v1,%Y2" [(set_attr "op_type" "VRS")]) + ; Shift each element by corresponding vector element ; veslvb, veslvh, veslvf, veslvg diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index dbe70b9a98e..b1c6ea25ced 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2019-07-08 Robin Dapp + + * gcc.target/s390/combine-rotate-modulo.c: New test. + * gcc.target/s390/combine-shift-rotate-add-mod.c: New test. + * gcc.target/s390/vector/combine-shift-vec.c: New test. + 2019-07-08 Richard Biener PR tree-optimization/91108 diff --git a/gcc/testsuite/gcc.target/s390/combine-rotate-modulo.c b/gcc/testsuite/gcc.target/s390/combine-rotate-modulo.c new file mode 100644 index 00000000000..6cbbb552cd1 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/combine-rotate-modulo.c @@ -0,0 +1,36 @@ +/* Check that we do not emit & 63 via risbg for rotating. */ + +/* { dg-options "-O1 -m64" } */ + +/* { dg-final { scan-assembler-not "risbg" } } */ +/* { dg-final { scan-assembler-not "nilf" } } */ + +long shiftl (long in, unsigned long sh) +{ + sh %= 64; + return (in << sh); +} + +unsigned long shiftll (unsigned long in, unsigned long sh) +{ + sh %= 64; + return (in << sh); +} + +long shiftr (long in, unsigned long sh) +{ + sh %= 64; + return (in >> sh); +} + +unsigned long shiftrl (unsigned long in, unsigned long sh) +{ + sh %= 64; + return (in >> sh); +} + +unsigned long rotlmod (unsigned long in, unsigned long sh) +{ + sh %= 64; + return (in << sh) | (in >> (64 - sh)); +} diff --git a/gcc/testsuite/gcc.target/s390/combine-shift-rotate-add-mod.c b/gcc/testsuite/gcc.target/s390/combine-shift-rotate-add-mod.c new file mode 100644 index 00000000000..dc63bfa1481 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/combine-shift-rotate-add-mod.c @@ -0,0 +1,29 @@ +/* Check shift via address-style displacement. There should not be any + and operations that the instructions perform implicitly anyway.*/ + +/* { dg-options "-O1 -m64" } */ + +/* { dg-final { scan-assembler-not "risbg\t%r.+,.*63" } } */ +/* { dg-final { scan-assembler "rllg\t%r.+,3.%r.+" } } */ +/* { dg-final { scan-assembler "sllg\t%r.+,2.%r.+" } } */ + +unsigned long rotlmodp (unsigned long in, unsigned long sh) +{ + sh = (sh + 3) % 64; + return (in << sh) | (in >> (64 - sh)); +} + +unsigned long shiftmodp (unsigned long in, unsigned long sh) +{ + sh = (sh + 2) % 64; + return (in << sh); +} + +/* We expect a displacement of 1 here since combine simplifies + modulo 255 when substituting into a QImode subreg. */ +/* { dg-final { scan-assembler "sllg\t%r.+,1.%r.+" } } */ +unsigned long shiftp (unsigned long in, unsigned long sh) +{ + sh = sh + 4097; + return (in << sh); +} diff --git a/gcc/testsuite/gcc.target/s390/vector/combine-shift-vec.c b/gcc/testsuite/gcc.target/s390/vector/combine-shift-vec.c new file mode 100644 index 00000000000..1ac9496cf9f --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/vector/combine-shift-vec.c @@ -0,0 +1,107 @@ +/* Check vector shift patterns. */ + +/* { dg-options "-march=z13 -O1 -m64" } */ + +/* { dg-final { scan-assembler "veslb\t%v.+,%v.+,2.%r2" } } */ +/* { dg-final { scan-assembler "veslh\t%v.+,%v.+,3.%r2" } } */ +/* { dg-final { scan-assembler "veslf\t%v.+,%v.+,4.%r2" } } */ +/* { dg-final { scan-assembler "veslg\t%v.+,%v.+,5.%r2" } } */ +/* { dg-final { scan-assembler "vesrab\t%v.+,%v.+,2.%r2" } } */ +/* { dg-final { scan-assembler "vesrah\t%v.+,%v.+,3.%r2" } } */ +/* { dg-final { scan-assembler "vesraf\t%v.+,%v.+,4.%r2" } } */ +/* { dg-final { scan-assembler "vesrag\t%v.+,%v.+,5.%r2" } } */ +/* { dg-final { scan-assembler "vesrlb\t%v.+,%v.+,2.%r2" } } */ +/* { dg-final { scan-assembler "vesrlh\t%v.+,%v.+,3.%r2" } } */ +/* { dg-final { scan-assembler "vesrlf\t%v.+,%v.+,4.%r2" } } */ +/* { dg-final { scan-assembler "vesrlg\t%v.+,%v.+,5.%r2" } } */ +/* { dg-final { scan-assembler-not "ahi" } } */ +/* { dg-final { scan-assembler-not "nilf" } } */ +/* { dg-final { scan-assembler-not "risbg" } } */ + +typedef __attribute__((vector_size(16))) signed char v16qi; + +v16qi vshiftlqi (v16qi in, unsigned int sh) +{ + sh = (sh + 2) % 8; + return (in << sh); +} + +typedef __attribute__((vector_size(16))) signed short v8hi; + +v8hi vshiftlhi (v8hi in, unsigned int sh) +{ + sh = (sh + 3) % 16; + return (in << sh); +} + +typedef __attribute__((vector_size(16))) signed int v4si; + +v4si vshiftlsi (v4si in, unsigned int sh) +{ + sh = (sh + 4) % 32; + return (in << sh); +} + +typedef __attribute__((vector_size(16))) signed long v2di; + +v2di vshiftldi (v2di in, unsigned int sh) +{ + sh = (sh + 5) % 64; + return (in << sh); +} + +typedef __attribute__((vector_size(16))) unsigned char uv16qi; + +uv16qi vshiftrqiu (uv16qi in, unsigned int sh) +{ + sh = (sh + 2) % 8; + return (in >> sh); +} + +typedef __attribute__((vector_size(16))) unsigned short uv8hi; + +uv8hi vshiftrhiu (uv8hi in, unsigned int sh) +{ + sh = (sh + 3) % 16; + return (in >> sh); +} + +typedef __attribute__((vector_size(16))) unsigned int uv4si; + +uv4si vshiftrsiu (uv4si in, unsigned int sh) +{ + sh = (sh + 4) % 32; + return (in >> sh); +} + +typedef __attribute__((vector_size(16))) unsigned long uv2di; + +uv2di vshiftrdiu (uv2di in, unsigned int sh) +{ + sh = (sh + 5) % 64; + return (in >> sh); +} + +v16qi vshiftrqi (v16qi in, unsigned int sh) +{ + sh = (sh + 2) % 8; + return (in >> sh); +} + +v8hi vshiftrhi (v8hi in, unsigned int sh) +{ + sh = (sh + 3) % 16; + return (in >> sh); +} + +v4si vshiftrsi (v4si in, unsigned int sh) +{ + sh = (sh + 4) % 32; + return (in >> sh); +} + +v2di vshiftrdi (v2di in, unsigned int sh) +{ + sh = (sh + 5) % 64; + return (in >> sh); +}