From 8889fbe5424f437f504901a659ebcc772066a478 Mon Sep 17 00:00:00 2001 From: Monk Chiang Date: Sun, 27 May 2018 16:18:24 +0000 Subject: [PATCH] [NDS32] Optimize movmem and setmem operations. gcc/ * config/nds32/nds32-intrinsic.md (unaligned_storedi): Modify patterns implementation. (unaligned_store_dw): Ditto. * config/nds32/nds32-memory-manipulation.c (nds32_expand_movmemsi_loop_known_size): Refactoring implementation. (nds32_gen_dup_4_byte_to_word_value): Rename to ... (nds32_gen_dup_4_byte_to_word_value_aux): ... this. (emit_setmem_word_loop): Rename to ... (emit_setmem_doubleword_loop): ... this. (nds32_gen_dup_4_byte_to_word_value): New function. (nds32_gen_dup_8_byte_to_double_word_value): New function. (nds32_expand_setmem_loop): Refine implementation. (nds32_expand_setmem_loop_v3m): Ditto. * config/nds32/nds32-multiple.md (unaligned_store_update_base_dw): New pattern. Co-Authored-By: Chung-Ju Wu From-SVN: r260805 --- gcc/ChangeLog | 19 ++ gcc/config/nds32/nds32-intrinsic.md | 15 +- gcc/config/nds32/nds32-memory-manipulation.c | 186 ++++++++++++++++--- gcc/config/nds32/nds32-multiple.md | 19 ++ 4 files changed, 205 insertions(+), 34 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 293856b0888..cddfeb7ddbd 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,22 @@ +2018-05-27 Monk Chiang + Chung-Ju Wu + + * config/nds32/nds32-intrinsic.md (unaligned_storedi): Modify patterns + implementation. + (unaligned_store_dw): Ditto. + * config/nds32/nds32-memory-manipulation.c + (nds32_expand_movmemsi_loop_known_size): Refactoring implementation. + (nds32_gen_dup_4_byte_to_word_value): Rename to ... + (nds32_gen_dup_4_byte_to_word_value_aux): ... this. + (emit_setmem_word_loop): Rename to ... + (emit_setmem_doubleword_loop): ... this. + (nds32_gen_dup_4_byte_to_word_value): New function. + (nds32_gen_dup_8_byte_to_double_word_value): New function. + (nds32_expand_setmem_loop): Refine implementation. + (nds32_expand_setmem_loop_v3m): Ditto. + * config/nds32/nds32-multiple.md (unaligned_store_update_base_dw): New + pattern. + 2018-05-27 Chung-Ju Wu * config/nds32/nds32.md (bswapsi2, bswaphi2): New patterns. diff --git a/gcc/config/nds32/nds32-intrinsic.md b/gcc/config/nds32/nds32-intrinsic.md index 02f72853685..c70a6fcc99b 100644 --- a/gcc/config/nds32/nds32-intrinsic.md +++ b/gcc/config/nds32/nds32-intrinsic.md @@ -1596,22 +1596,17 @@ if (TARGET_ISA_V3M) nds32_expand_unaligned_store (operands, DImode); else - emit_insn (gen_unaligned_store_dw (operands[0], operands[1])); + emit_insn (gen_unaligned_store_dw (gen_rtx_MEM (DImode, operands[0]), + operands[1])); DONE; }) (define_insn "unaligned_store_dw" - [(set (mem:DI (match_operand:SI 0 "register_operand" "r")) - (unspec:DI [(match_operand:DI 1 "register_operand" "r")] UNSPEC_UASTORE_DW))] + [(set (match_operand:DI 0 "nds32_lmw_smw_base_operand" "=Umw") + (unspec:DI [(match_operand:DI 1 "register_operand" " r")] UNSPEC_UASTORE_DW))] "" { - rtx otherops[3]; - otherops[0] = gen_rtx_REG (SImode, REGNO (operands[1])); - otherops[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1); - otherops[2] = operands[0]; - - output_asm_insn ("smw.bi\t%0, [%2], %1, 0", otherops); - return ""; + return nds32_output_smw_double_word (operands); } [(set_attr "type" "store") (set_attr "length" "4")] diff --git a/gcc/config/nds32/nds32-memory-manipulation.c b/gcc/config/nds32/nds32-memory-manipulation.c index d02aabfc88f..f6140e65130 100644 --- a/gcc/config/nds32/nds32-memory-manipulation.c +++ b/gcc/config/nds32/nds32-memory-manipulation.c @@ -257,8 +257,124 @@ static bool nds32_expand_movmemsi_loop_known_size (rtx dstmem, rtx srcmem, rtx size, rtx alignment) { - return nds32_expand_movmemsi_loop_unknown_size (dstmem, srcmem, - size, alignment); + rtx dst_base_reg, src_base_reg; + rtx dst_itr, src_itr; + rtx dstmem_m, srcmem_m, dst_itr_m, src_itr_m; + rtx dst_end; + rtx double_word_mode_loop, byte_mode_loop; + rtx tmp; + int start_regno; + bool align_to_4_bytes = (INTVAL (alignment) & 3) == 0; + unsigned HOST_WIDE_INT total_bytes = UINTVAL (size); + + if (TARGET_ISA_V3M && !align_to_4_bytes) + return 0; + + if (TARGET_REDUCED_REGS) + start_regno = 2; + else + start_regno = 16; + + dst_itr = gen_reg_rtx (Pmode); + src_itr = gen_reg_rtx (Pmode); + dst_end = gen_reg_rtx (Pmode); + tmp = gen_reg_rtx (QImode); + + double_word_mode_loop = gen_label_rtx (); + byte_mode_loop = gen_label_rtx (); + + dst_base_reg = copy_to_mode_reg (Pmode, XEXP (dstmem, 0)); + src_base_reg = copy_to_mode_reg (Pmode, XEXP (srcmem, 0)); + + if (total_bytes < 8) + { + /* Emit total_bytes less than 8 loop version of movmem. + add $dst_end, $dst, $size + move $dst_itr, $dst + .Lbyte_mode_loop: + lbi.bi $tmp, [$src_itr], #1 + sbi.bi $tmp, [$dst_itr], #1 + ! Not readch upper bound. Loop. + bne $dst_itr, $dst_end, .Lbyte_mode_loop */ + + /* add $dst_end, $dst, $size */ + dst_end = expand_binop (Pmode, add_optab, dst_base_reg, size, + NULL_RTX, 0, OPTAB_WIDEN); + /* move $dst_itr, $dst + move $src_itr, $src */ + emit_move_insn (dst_itr, dst_base_reg); + emit_move_insn (src_itr, src_base_reg); + + /* .Lbyte_mode_loop: */ + emit_label (byte_mode_loop); + + /* lbi.bi $tmp, [$src_itr], #1 */ + nds32_emit_post_inc_load_store (tmp, src_itr, QImode, true); + + /* sbi.bi $tmp, [$dst_itr], #1 */ + nds32_emit_post_inc_load_store (tmp, dst_itr, QImode, false); + /* ! Not readch upper bound. Loop. + bne $dst_itr, $dst_end, .Lbyte_mode_loop */ + emit_cmp_and_jump_insns (dst_itr, dst_end, NE, NULL, + SImode, 1, byte_mode_loop); + return true; + } + else if (total_bytes % 8 == 0) + { + /* Emit multiple of 8 loop version of movmem. + + add $dst_end, $dst, $size + move $dst_itr, $dst + move $src_itr, $src + + .Ldouble_word_mode_loop: + lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr + smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr + ! move will delete after register allocation + move $src_itr, $src_itr' + move $dst_itr, $dst_itr' + ! Not readch upper bound. Loop. + bne $double_word_end, $dst_itr, .Ldouble_word_mode_loop */ + + /* add $dst_end, $dst, $size */ + dst_end = expand_binop (Pmode, add_optab, dst_base_reg, size, + NULL_RTX, 0, OPTAB_WIDEN); + + /* move $dst_itr, $dst + move $src_itr, $src */ + emit_move_insn (dst_itr, dst_base_reg); + emit_move_insn (src_itr, src_base_reg); + + /* .Ldouble_word_mode_loop: */ + emit_label (double_word_mode_loop); + /* lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr + smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr */ + src_itr_m = src_itr; + dst_itr_m = dst_itr; + srcmem_m = srcmem; + dstmem_m = dstmem; + nds32_emit_mem_move_block (start_regno, 2, + &dst_itr_m, &dstmem_m, + &src_itr_m, &srcmem_m, + true); + /* move $src_itr, $src_itr' + move $dst_itr, $dst_itr' */ + emit_move_insn (dst_itr, dst_itr_m); + emit_move_insn (src_itr, src_itr_m); + + /* ! Not readch upper bound. Loop. + bne $double_word_end, $dst_itr, .Ldouble_word_mode_loop */ + emit_cmp_and_jump_insns (dst_end, dst_itr, NE, NULL, + Pmode, 1, double_word_mode_loop); + } + else + { + /* Handle size greater than 8, and not a multiple of 8. */ + return nds32_expand_movmemsi_loop_unknown_size (dstmem, srcmem, + size, alignment); + } + + return true; } static bool @@ -433,10 +549,8 @@ nds32_expand_movmemsi (rtx dstmem, rtx srcmem, rtx total_bytes, rtx alignment) /* Auxiliary function for expand setmem pattern. */ static rtx -nds32_gen_dup_4_byte_to_word_value (rtx value) +nds32_gen_dup_4_byte_to_word_value_aux (rtx value, rtx value4word) { - rtx value4word = gen_reg_rtx (SImode); - gcc_assert (GET_MODE (value) == QImode || CONST_INT_P (value)); if (CONST_INT_P (value)) @@ -493,7 +607,30 @@ nds32_gen_dup_4_byte_to_word_value (rtx value) } static rtx -emit_setmem_word_loop (rtx itr, rtx size, rtx value) +nds32_gen_dup_4_byte_to_word_value (rtx value) +{ + rtx value4word = gen_reg_rtx (SImode); + nds32_gen_dup_4_byte_to_word_value_aux (value, value4word); + + return value4word; +} + +static rtx +nds32_gen_dup_8_byte_to_double_word_value (rtx value) +{ + rtx value4doubleword = gen_reg_rtx (DImode); + + nds32_gen_dup_4_byte_to_word_value_aux ( + value, nds32_di_low_part_subreg(value4doubleword)); + + emit_move_insn (nds32_di_high_part_subreg(value4doubleword), + nds32_di_low_part_subreg(value4doubleword)); + return value4doubleword; +} + + +static rtx +emit_setmem_doubleword_loop (rtx itr, rtx size, rtx value) { rtx word_mode_label = gen_label_rtx (); rtx word_mode_end_label = gen_label_rtx (); @@ -502,9 +639,9 @@ emit_setmem_word_loop (rtx itr, rtx size, rtx value) rtx word_mode_end = gen_reg_rtx (SImode); rtx size_for_word = gen_reg_rtx (SImode); - /* and $size_for_word, $size, #~3 */ + /* and $size_for_word, $size, #~0x7 */ size_for_word = expand_binop (SImode, and_optab, size, - gen_int_mode (~3, SImode), + gen_int_mode (~0x7, SImode), NULL_RTX, 0, OPTAB_WIDEN); emit_move_insn (byte_mode_size, size); @@ -516,8 +653,8 @@ emit_setmem_word_loop (rtx itr, rtx size, rtx value) word_mode_end = expand_binop (Pmode, add_optab, itr, size_for_word, NULL_RTX, 0, OPTAB_WIDEN); - /* andi $byte_mode_size, $size, 3 */ - byte_mode_size_tmp = expand_binop (SImode, and_optab, size, GEN_INT (3), + /* andi $byte_mode_size, $size, 0x7 */ + byte_mode_size_tmp = expand_binop (SImode, and_optab, size, GEN_INT (0x7), NULL_RTX, 0, OPTAB_WIDEN); emit_move_insn (byte_mode_size, byte_mode_size_tmp); @@ -527,9 +664,9 @@ emit_setmem_word_loop (rtx itr, rtx size, rtx value) /* ! word-mode set loop smw.bim $value4word, [$dst_itr], $value4word, 0 bne $word_mode_end, $dst_itr, .Lword_mode */ - emit_insn (gen_unaligned_store_update_base_w (itr, - itr, - value)); + emit_insn (gen_unaligned_store_update_base_dw (itr, + itr, + value)); emit_cmp_and_jump_insns (word_mode_end, itr, NE, NULL, Pmode, 1, word_mode_label); @@ -581,7 +718,7 @@ emit_setmem_byte_loop (rtx itr, rtx size, rtx value, bool need_end) static bool nds32_expand_setmem_loop (rtx dstmem, rtx size, rtx value) { - rtx value4word; + rtx value4doubleword; rtx value4byte; rtx dst; rtx byte_mode_size; @@ -624,7 +761,7 @@ nds32_expand_setmem_loop (rtx dstmem, rtx size, rtx value) or $tmp3, $tmp1, $tmp2 ! $tmp3 <- 0x0000abab slli $tmp4, $tmp3, 16 ! $tmp4 <- 0xabab0000 or $val4word, $tmp3, $tmp4 ! $value4word <- 0xabababab */ - value4word = nds32_gen_dup_4_byte_to_word_value (value); + value4doubleword = nds32_gen_dup_8_byte_to_double_word_value (value); /* and $size_for_word, $size, #-4 beqz $size_for_word, .Lword_mode_end @@ -637,7 +774,7 @@ nds32_expand_setmem_loop (rtx dstmem, rtx size, rtx value) smw.bim $value4word, [$dst], $value4word, 0 bne $word_mode_end, $dst, .Lword_mode .Lword_mode_end: */ - byte_mode_size = emit_setmem_word_loop (dst, size, value4word); + byte_mode_size = emit_setmem_doubleword_loop (dst, size, value4doubleword); /* beqz $byte_mode_size, .Lend add $byte_mode_end, $dst, $byte_mode_size @@ -648,8 +785,8 @@ nds32_expand_setmem_loop (rtx dstmem, rtx size, rtx value) bne $byte_mode_end, $dst, .Lbyte_mode .Lend: */ - value4byte = simplify_gen_subreg (QImode, value4word, SImode, - subreg_lowpart_offset (QImode, SImode)); + value4byte = simplify_gen_subreg (QImode, value4doubleword, DImode, + subreg_lowpart_offset (QImode, DImode)); emit_setmem_byte_loop (dst, byte_mode_size, value4byte, false); @@ -666,14 +803,15 @@ nds32_expand_setmem_loop_v3m (rtx dstmem, rtx size, rtx value) rtx byte_loop_size = gen_reg_rtx (SImode); rtx remain_size = gen_reg_rtx (SImode); rtx new_base_reg; - rtx value4byte, value4word; + rtx value4byte, value4doubleword; rtx byte_mode_size; rtx last_byte_loop_label = gen_label_rtx (); size = force_reg (SImode, size); - value4word = nds32_gen_dup_4_byte_to_word_value (value); - value4byte = simplify_gen_subreg (QImode, value4word, SImode, 0); + value4doubleword = nds32_gen_dup_8_byte_to_double_word_value (value); + value4byte = simplify_gen_subreg (QImode, value4doubleword, DImode, + subreg_lowpart_offset (QImode, DImode)); emit_move_insn (byte_loop_size, size); emit_move_insn (byte_loop_base, base_reg); @@ -701,9 +839,9 @@ nds32_expand_setmem_loop_v3m (rtx dstmem, rtx size, rtx value) emit_insn (gen_subsi3 (remain_size, size, need_align_bytes)); /* Set memory word by word. */ - byte_mode_size = emit_setmem_word_loop (new_base_reg, - remain_size, - value4word); + byte_mode_size = emit_setmem_doubleword_loop (new_base_reg, + remain_size, + value4doubleword); emit_move_insn (byte_loop_base, new_base_reg); emit_move_insn (byte_loop_size, byte_mode_size); diff --git a/gcc/config/nds32/nds32-multiple.md b/gcc/config/nds32/nds32-multiple.md index a8f77175927..80746b19323 100644 --- a/gcc/config/nds32/nds32-multiple.md +++ b/gcc/config/nds32/nds32-multiple.md @@ -2854,6 +2854,25 @@ (set_attr "length" "4")] ) +(define_expand "unaligned_store_update_base_dw" + [(parallel [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_operand:SI 1 "register_operand" "0") (const_int 8))) + (set (mem:DI (match_dup 1)) + (unspec:DI [(match_operand:DI 2 "register_operand" "r")] UNSPEC_UASTORE_DW))])] + "" +{ + /* DO NOT emit unaligned_store_w_m immediately since web pass don't + recognize post_inc, try it again after GCC 5.0. + REF: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63156 */ + emit_insn (gen_unaligned_store_dw (gen_rtx_MEM (DImode, operands[1]), operands[2])); + emit_insn (gen_addsi3 (operands[0], operands[1], gen_int_mode (8, Pmode))); + DONE; +} + [(set_attr "type" "store_multiple") + (set_attr "combo" "2") + (set_attr "length" "4")] +) + (define_insn "*stmsi25" [(match_parallel 0 "nds32_store_multiple_operation" [(set (mem:SI (match_operand:SI 1 "register_operand" "r")) -- 2.30.2