From 19fad46721af338b24aff48127ae654673bffa7b Mon Sep 17 00:00:00 2001 From: Andrew Stubbs Date: Fri, 31 Jul 2020 11:27:24 +0100 Subject: [PATCH] amdgcn: TImode shifts Implement TImode shifts in the backend. The middle-end support that does it for other architectures doesn't work for GCN because BITS_PER_WORD==32, meaning that TImode is quad-word, not double-word. gcc/ChangeLog: * config/gcn/gcn.md ("ti3"): New. --- gcc/config/gcn/gcn.md | 105 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md index 8cfb3a85d25..ed98d2d2706 100644 --- a/gcc/config/gcn/gcn.md +++ b/gcc/config/gcn/gcn.md @@ -1538,6 +1538,111 @@ [(set_attr "type" "sop2,sop2,vop2") (set_attr "length" "8")]) +;; }}} +;; {{{ ALU: generic 128-bit binop + +; TImode shifts can't be synthesized by the middle-end +(define_expand "ti3" + [(set (match_operand:TI 0 "register_operand") + (vec_and_scalar_nocom:TI + (match_operand:TI 1 "gcn_alu_operand") + (match_operand:SI 2 "gcn_alu_operand")))] + "" + { + rtx dest = operands[0]; + rtx src = operands[1]; + rtx shift = operands[2]; + + enum {ashr, lshr, ashl} shiftop = ; + rtx (*inverse_shift_fn) (rtx, rtx, rtx) + = (shiftop == ashl ? gen_lshrdi3 : gen_ashldi3); + rtx (*logical_shift_fn) (rtx, rtx, rtx) + = (shiftop == ashl ? gen_ashldi3 : gen_lshrdi3); + + /* We shift "from" one subreg "to" the other, according to shiftop. */ + int from = (shiftop == ashl ? 0 : 8); + int to = (shiftop == ashl ? 8 : 0); + rtx destfrom = simplify_gen_subreg (DImode, dest, TImode, from); + rtx destto = simplify_gen_subreg (DImode, dest, TImode, to); + rtx srcfrom = simplify_gen_subreg (DImode, src, TImode, from); + rtx srcto = simplify_gen_subreg (DImode, src, TImode, to); + + int shiftval = (CONST_INT_P (shift) ? INTVAL (shift) : -1); + enum {RUNTIME, ZERO, SMALL, LARGE} shiftcomparison + = (!CONST_INT_P (shift) ? RUNTIME + : shiftval == 0 ? ZERO + : shiftval < 64 ? SMALL + : LARGE); + + rtx large_label, zero_label, exit_label; + + if (shiftcomparison == RUNTIME) + { + zero_label = gen_label_rtx (); + large_label = gen_label_rtx (); + exit_label = gen_label_rtx (); + + rtx cond = gen_rtx_EQ (VOIDmode, shift, const0_rtx); + emit_insn (gen_cbranchsi4 (cond, shift, const0_rtx, zero_label)); + + rtx sixtyfour = GEN_INT (64); + cond = gen_rtx_GE (VOIDmode, shift, sixtyfour); + emit_insn (gen_cbranchsi4 (cond, shift, sixtyfour, large_label)); + } + + if (shiftcomparison == SMALL || shiftcomparison == RUNTIME) + { + /* Shift both parts by the same amount, then patch in the bits that + cross the boundary. + This does *not* work for zero-length shifts. */ + rtx tmpto1 = gen_reg_rtx (DImode); + rtx tmpto2 = gen_reg_rtx (DImode); + emit_insn (gen_di3 (destfrom, srcfrom, shift)); + emit_insn (logical_shift_fn (tmpto1, srcto, shift)); + rtx lessershiftval = gen_reg_rtx (SImode); + emit_insn (gen_subsi3 (lessershiftval, GEN_INT (64), shift)); + emit_insn (inverse_shift_fn (tmpto2, srcfrom, lessershiftval)); + emit_insn (gen_iordi3 (destto, tmpto1, tmpto2)); + } + + if (shiftcomparison == RUNTIME) + { + emit_jump_insn (gen_jump (exit_label)); + emit_barrier (); + + emit_label (zero_label); + } + + if (shiftcomparison == ZERO || shiftcomparison == RUNTIME) + emit_move_insn (dest, src); + + if (shiftcomparison == RUNTIME) + { + emit_jump_insn (gen_jump (exit_label)); + emit_barrier (); + + emit_label (large_label); + } + + if (shiftcomparison == LARGE || shiftcomparison == RUNTIME) + { + /* Do the shift within one part, and set the other part appropriately. + Shifts of 128+ bits are an error. */ + rtx lessershiftval = gen_reg_rtx (SImode); + emit_insn (gen_subsi3 (lessershiftval, shift, GEN_INT (64))); + emit_insn (gen_di3 (destto, srcfrom, lessershiftval)); + if (shiftop == ashr) + emit_insn (gen_ashrdi3 (destfrom, srcfrom, GEN_INT (63))); + else + emit_move_insn (destfrom, const0_rtx); + } + + if (shiftcomparison == RUNTIME) + emit_label (exit_label); + + DONE; + }) + ;; }}} ;; {{{ Atomics -- 2.30.2