From c5395d88dfa2123ca5155008c8c8339ded98fd32 Mon Sep 17 00:00:00 2001 From: Claudiu Zissulescu Date: Mon, 9 Nov 2020 17:34:13 +0200 Subject: [PATCH] arc: Improve/add instruction patterns to better use MAC instructions. ARC MYP7+ instructions adds MAC instructions for either vector and scalar data types. This patch adds a madd pattern for 16it datum using the 32bit MAC instruction, and dot_prod patterns for v4hi vector types. The 64bit moves are also upgraded by using vadd2 instuction. 2020-11-09 Claudiu Zissulescu gcc/ * config/arc/arc.c (arc_split_move): Recognize vadd2 instructions. * config/arc/arc.md (movdi_insn): Update pattern to use vadd2 instructions. (movdf_insn): Likewise. (maddhisi4): New pattern. (umaddhisi4): Likewise. * config/arc/simdext.md (mov_int): Update pattern to use vadd2. (sdot_prodv4hi): New pattern. (udot_prodv4hi): Likewise. (arc_vec_mac_hi_v4hi): Update/renamed to arc_vec_mac_v2hiv2si. (arc_vec_mac_v2hiv2si_zero): New pattern. * config/arc/constraints.md (Ral): Accumulator register constraint. Signed-off-by: Claudiu Zissulescu --- gcc/config/arc/arc.c | 8 ++++ gcc/config/arc/arc.md | 71 ++++++++++++++++++++++++--- gcc/config/arc/constraints.md | 5 ++ gcc/config/arc/simdext.md | 90 +++++++++++++++++++++++++++-------- 4 files changed, 147 insertions(+), 27 deletions(-) diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c index 6b96c5e4bf5..2a7b1fb48bc 100644 --- a/gcc/config/arc/arc.c +++ b/gcc/config/arc/arc.c @@ -10154,6 +10154,14 @@ arc_split_move (rtx *operands) return; } + if (TARGET_PLUS_QMACW + && even_register_operand (operands[0], mode) + && even_register_operand (operands[1], mode)) + { + emit_move_insn (operands[0], operands[1]); + return; + } + if (TARGET_PLUS_QMACW && GET_CODE (operands[1]) == CONST_VECTOR) { diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md index a7f4056c7af..266b7ce7666 100644 --- a/gcc/config/arc/arc.md +++ b/gcc/config/arc/arc.md @@ -1322,8 +1322,8 @@ core_3, archs4x, archs4xd, archs4xd_slow" ") (define_insn_and_split "*movdi_insn" - [(set (match_operand:DI 0 "move_dest_operand" "=w, w,r, m") - (match_operand:DI 1 "move_double_src_operand" "c,Hi,m,cCm3"))] + [(set (match_operand:DI 0 "move_dest_operand" "=r, r,r, m") + (match_operand:DI 1 "move_double_src_operand" "r,Hi,m,rCm3"))] "register_operand (operands[0], DImode) || register_operand (operands[1], DImode) || (satisfies_constraint_Cm3 (operands[1]) @@ -1335,6 +1335,13 @@ core_3, archs4x, archs4xd, archs4xd_slow" default: return \"#\"; + case 0: + if (TARGET_PLUS_QMACW + && even_register_operand (operands[0], DImode) + && even_register_operand (operands[1], DImode)) + return \"vadd2\\t%0,%1,0\"; + return \"#\"; + case 2: if (TARGET_LL64 && memory_operand (operands[1], DImode) @@ -1351,7 +1358,7 @@ core_3, archs4x, archs4xd, archs4xd_slow" return \"#\"; } }" - "reload_completed" + "&& reload_completed" [(const_int 0)] { arc_split_move (operands); @@ -1397,15 +1404,24 @@ core_3, archs4x, archs4xd, archs4xd_slow" "if (prepare_move_operands (operands, DFmode)) DONE;") (define_insn_and_split "*movdf_insn" - [(set (match_operand:DF 0 "move_dest_operand" "=D,r,c,c,r,m") - (match_operand:DF 1 "move_double_src_operand" "r,D,c,E,m,c"))] - "register_operand (operands[0], DFmode) || register_operand (operands[1], DFmode)" + [(set (match_operand:DF 0 "move_dest_operand" "=D,r,r,r,r,m") + (match_operand:DF 1 "move_double_src_operand" "r,D,r,E,m,r"))] + "register_operand (operands[0], DFmode) + || register_operand (operands[1], DFmode)" "* { switch (which_alternative) { default: return \"#\"; + + case 2: + if (TARGET_PLUS_QMACW + && even_register_operand (operands[0], DFmode) + && even_register_operand (operands[1], DFmode)) + return \"vadd2\\t%0,%1,0\"; + return \"#\"; + case 4: if (TARGET_LL64 && ((even_register_operand (operands[0], DFmode) @@ -6126,6 +6142,49 @@ core_3, archs4x, archs4xd, archs4xd_slow" [(set_attr "length" "0")]) ;; MAC and DMPY instructions + +; Use MAC instruction to emulate 16bit mac. +(define_expand "maddhisi4" + [(match_operand:SI 0 "register_operand" "") + (match_operand:HI 1 "register_operand" "") + (match_operand:HI 2 "extend_operand" "") + (match_operand:SI 3 "register_operand" "")] + "TARGET_PLUS_DMPY" + "{ + rtx acc_reg = gen_rtx_REG (DImode, ACC_REG_FIRST); + rtx tmp1 = gen_reg_rtx (SImode); + rtx tmp2 = gen_reg_rtx (SImode); + rtx accl = gen_lowpart (SImode, acc_reg); + + emit_move_insn (accl, operands[3]); + emit_insn (gen_rtx_SET (tmp1, gen_rtx_SIGN_EXTEND (SImode, operands[1]))); + emit_insn (gen_rtx_SET (tmp2, gen_rtx_SIGN_EXTEND (SImode, operands[2]))); + emit_insn (gen_mac (tmp1, tmp2)); + emit_move_insn (operands[0], accl); + DONE; + }") + +; The same for the unsigned variant, but using MACU instruction. +(define_expand "umaddhisi4" + [(match_operand:SI 0 "register_operand" "") + (match_operand:HI 1 "register_operand" "") + (match_operand:HI 2 "extend_operand" "") + (match_operand:SI 3 "register_operand" "")] + "TARGET_PLUS_DMPY" + "{ + rtx acc_reg = gen_rtx_REG (DImode, ACC_REG_FIRST); + rtx tmp1 = gen_reg_rtx (SImode); + rtx tmp2 = gen_reg_rtx (SImode); + rtx accl = gen_lowpart (SImode, acc_reg); + + emit_move_insn (accl, operands[3]); + emit_insn (gen_rtx_SET (tmp1, gen_rtx_ZERO_EXTEND (SImode, operands[1]))); + emit_insn (gen_rtx_SET (tmp2, gen_rtx_ZERO_EXTEND (SImode, operands[2]))); + emit_insn (gen_macu (tmp1, tmp2)); + emit_move_insn (operands[0], accl); + DONE; + }") + (define_expand "maddsidi4" [(match_operand:DI 0 "register_operand" "") (match_operand:SI 1 "register_operand" "") diff --git a/gcc/config/arc/constraints.md b/gcc/config/arc/constraints.md index b7a563a72ad..a2a8e84ac45 100644 --- a/gcc/config/arc/constraints.md +++ b/gcc/config/arc/constraints.md @@ -493,6 +493,11 @@ Condition Codes" (and (match_code "reg") (match_test "cc_register (op, VOIDmode)"))) +(define_constraint "Ral" + "@internal + Accumulator register @code{ACCL} - do not reload into its class" + (and (match_code "reg") + (match_test "REGNO (op) == ACCL_REGNO"))) (define_constraint "Q" "@internal diff --git a/gcc/config/arc/simdext.md b/gcc/config/arc/simdext.md index 0e88b3dd815..d2fc309ea87 100644 --- a/gcc/config/arc/simdext.md +++ b/gcc/config/arc/simdext.md @@ -1400,8 +1400,7 @@ (define_insn_and_split "*mov_insn" [(set (match_operand:VWH 0 "move_dest_operand" "=r,r,r,m") (match_operand:VWH 1 "general_operand" "i,r,m,r"))] - "TARGET_PLUS_QMACW - && (register_operand (operands[0], mode) + "(register_operand (operands[0], mode) || register_operand (operands[1], mode))" "* { @@ -1411,7 +1410,11 @@ return \"#\"; case 1: - return \"vadd2 %0, %1, 0\"; + if (TARGET_PLUS_QMACW + && even_register_operand (operands[0], mode) + && even_register_operand (operands[1], mode)) + return \"vadd2\\t%0,%1,0\"; + return \"#\"; case 2: if (TARGET_LL64) @@ -1430,7 +1433,7 @@ arc_split_move (operands); DONE; } - [(set_attr "type" "move,move,load,store") + [(set_attr "type" "move,multi,load,store") (set_attr "predicable" "yes,no,no,no") (set_attr "iscompact" "false,false,false,false") ]) @@ -1612,6 +1615,44 @@ DONE; }) +(define_expand "sdot_prodv4hi" + [(match_operand:V2SI 0 "register_operand" "") + (match_operand:V4HI 1 "register_operand" "") + (match_operand:V4HI 2 "register_operand" "") + (match_operand:V2SI 3 "register_operand" "")] + "TARGET_PLUS_MACD" +{ + rtx acc_reg = gen_rtx_REG (V2SImode, ACC_REG_FIRST); + rtx op1_low = gen_lowpart (V2HImode, operands[1]); + rtx op1_high = gen_highpart (V2HImode, operands[1]); + rtx op2_low = gen_lowpart (V2HImode, operands[2]); + rtx op2_high = gen_highpart (V2HImode, operands[2]); + + emit_move_insn (acc_reg, operands[3]); + emit_insn (gen_arc_vec_smac_v2hiv2si_zero (op1_low, op2_low)); + emit_insn (gen_arc_vec_smac_v2hiv2si (operands[0], op1_high, op2_high)); + DONE; +}) + +(define_expand "udot_prodv4hi" + [(match_operand:V2SI 0 "register_operand" "") + (match_operand:V4HI 1 "register_operand" "") + (match_operand:V4HI 2 "register_operand" "") + (match_operand:V2SI 3 "register_operand" "")] + "TARGET_PLUS_MACD" +{ + rtx acc_reg = gen_rtx_REG (V2SImode, ACC_REG_FIRST); + rtx op1_low = gen_lowpart (V2HImode, operands[1]); + rtx op1_high = gen_highpart (V2HImode, operands[1]); + rtx op2_low = gen_lowpart (V2HImode, operands[2]); + rtx op2_high = gen_highpart (V2HImode, operands[2]); + + emit_move_insn (acc_reg, operands[3]); + emit_insn (gen_arc_vec_umac_v2hiv2si_zero (op1_low, op2_low)); + emit_insn (gen_arc_vec_umac_v2hiv2si (operands[0], op1_high, op2_high)); + DONE; +}) + (define_insn "arc_vec_mult_lo_v4hi" [(set (match_operand:V2SI 0 "even_register_operand" "=r,r") (mult:V2SI (SE:V2SI (vec_select:V2HI @@ -1704,30 +1745,37 @@ } ) -(define_insn "arc_vec_mac_hi_v4hi" - [(set (match_operand:V2SI 0 "even_register_operand" "=r,r") +(define_insn "arc_vec_mac_v2hiv2si" + [(set (match_operand:V2SI 0 "even_register_operand" "=r,Ral,r") (plus:V2SI - (reg:V2SI ARCV2_ACC) - (mult:V2SI (SE:V2SI (vec_select:V2HI - (match_operand:V4HI 1 "even_register_operand" "0,r") - (parallel [(const_int 2) (const_int 3)]))) - (SE:V2SI (vec_select:V2HI - (match_operand:V4HI 2 "even_register_operand" "r,r") - (parallel [(const_int 2) (const_int 3)])))))) + (mult:V2SI (SE:V2SI (match_operand:V2HI 1 "register_operand" "0, r,r")) + (SE:V2SI (match_operand:V2HI 2 "register_operand" "r, r,r"))) + (reg:V2SI ARCV2_ACC))) (set (reg:V2SI ARCV2_ACC) (plus:V2SI - (reg:V2SI ARCV2_ACC) - (mult:V2SI (SE:V2SI (vec_select:V2HI (match_dup 1) - (parallel [(const_int 2) (const_int 3)]))) - (SE:V2SI (vec_select:V2HI (match_dup 2) - (parallel [(const_int 2) (const_int 3)])))))) + (mult:V2SI (SE:V2SI (match_dup 1)) + (SE:V2SI (match_dup 2))) + (reg:V2SI ARCV2_ACC))) ] "TARGET_PLUS_MACD" - "vmac2h%? %0, %R1, %R2" + "@ + vmac2h%?\\t%0,%1,%2 + vmac2h%?\\t0,%1,%2 + vmac2h%?\\t%0,%1,%2" [(set_attr "length" "4") (set_attr "type" "multi") - (set_attr "predicable" "yes,no") - (set_attr "cond" "canuse,nocond")]) + (set_attr "predicable" "yes,no,no")]) + +(define_insn "arc_vec_mac_v2hiv2si_zero" + [(set (reg:V2SI ARCV2_ACC) + (plus:V2SI + (mult:V2SI (SE:V2SI (match_operand:V2HI 0 "register_operand" "r")) + (SE:V2SI (match_operand:V2HI 1 "register_operand" "r"))) + (reg:V2SI ARCV2_ACC)))] + "TARGET_PLUS_MACD" + "vmac2h%?\\t0,%0,%1" + [(set_attr "length" "4") + (set_attr "type" "multi")]) ;; Builtins (define_insn "dmach" -- 2.30.2