From: John David Anglin Date: Sat, 8 Nov 2003 20:36:30 +0000 (+0000) Subject: re PR rtl-optimization/12630 (Various unrecognizable insns and ICEs at -O3) X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=cdc9103cba6f914981e3ba352d22db578c4aa209;p=gcc.git re PR rtl-optimization/12630 (Various unrecognizable insns and ICEs at -O3) PR optimization/12630 * pa.c (compute_movstrsi_length): Rename to compute_movstr_length. Handle length computation 64-bit moves. (compute_clrstr_length, output_block_clear): Implement block clear. (output_block_move): Handle 64-bit moves. (pa_adjust_insn_length): Use compute_movstr_length and compute_clrstr_length. * pa.md (movstrsi): Revise operand order and comments. Don't use match_scratch. (movstrsi_internal): Delete. (movstrsi_prereload, movstrsi_postreload): New insns. Define splitter and peephole2 patterns to transform prereload to postreload form. (movstrdi, movstrdi_prereload, movstrdi_postreload, clrstrsi, clrstrsi_prereload, clrstrsi_postreload, clrstrdi, clrstrdi_prereload, clrstrdi_postreload): New patterns for 64-bit block move, and block clear. * pa-protos.h (output_block_clear): New prototype. From-SVN: r73375 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 7835708d52c..60d79213f56 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,23 @@ +2003-11-08 John David Anglin + + PR optimization/12630 + * pa.c (compute_movstrsi_length): Rename to compute_movstr_length. + Handle length computation 64-bit moves. + (compute_clrstr_length, output_block_clear): Implement block clear. + (output_block_move): Handle 64-bit moves. + (pa_adjust_insn_length): Use compute_movstr_length and + compute_clrstr_length. + * pa.md (movstrsi): Revise operand order and comments. Don't use + match_scratch. + (movstrsi_internal): Delete. + (movstrsi_prereload, movstrsi_postreload): New insns. Define splitter + and peephole2 patterns to transform prereload to postreload form. + (movstrdi, movstrdi_prereload, movstrdi_postreload, clrstrsi, + clrstrsi_prereload, clrstrsi_postreload, clrstrdi, clrstrdi_prereload, + clrstrdi_postreload): New patterns for 64-bit block move, and block + clear. + * pa-protos.h (output_block_clear): New prototype. + 2003-11-08 Andreas Schwab * dbxout.c (current_file): Also wrap inside DBX_DEBUGGING_INFO || diff --git a/gcc/config/pa/pa-protos.h b/gcc/config/pa/pa-protos.h index f5f15cb3fd3..4d5ce69b47b 100644 --- a/gcc/config/pa/pa-protos.h +++ b/gcc/config/pa/pa-protos.h @@ -40,6 +40,7 @@ extern const char *output_ior (rtx *); extern const char *output_move_double (rtx *); extern const char *output_fp_move_double (rtx *); extern const char *output_block_move (rtx *, int); +extern const char *output_block_clear (rtx *, int); extern const char *output_cbranch (rtx *, int, int, int, rtx); extern const char *output_lbranch (rtx, rtx); extern const char *output_bb (rtx *, int, int, int, rtx, int); diff --git a/gcc/config/pa/pa.c b/gcc/config/pa/pa.c index 856e3012a22..8c8aebbf31b 100644 --- a/gcc/config/pa/pa.c +++ b/gcc/config/pa/pa.c @@ -103,7 +103,8 @@ static int pa_can_combine_p (rtx, rtx, rtx, int, rtx, rtx, rtx); static int forward_branch_p (rtx); static int shadd_constant_p (int); static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *); -static int compute_movstrsi_length (rtx); +static int compute_movstr_length (rtx); +static int compute_clrstr_length (rtx); static bool pa_assemble_integer (rtx, unsigned int, int); static void remove_useless_addtr_insns (int); static void store_reg (int, int, int); @@ -2431,8 +2432,8 @@ find_addr_reg (rtx addr) OPERANDS[0] is the destination pointer as a REG, clobbered. OPERANDS[1] is the source pointer as a REG, clobbered. OPERANDS[2] is a register for temporary storage. - OPERANDS[4] is the size as a CONST_INT OPERANDS[3] is a register for temporary storage. + OPERANDS[4] is the size as a CONST_INT OPERANDS[5] is the alignment safe to use, as a CONST_INT. OPERANDS[6] is another temporary register. */ @@ -2442,15 +2443,43 @@ output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED) int align = INTVAL (operands[5]); unsigned long n_bytes = INTVAL (operands[4]); - /* We can't move more than four bytes at a time because the PA + /* We can't move more than a word at a time because the PA has no longer integer move insns. (Could use fp mem ops?) */ - if (align > 4) - align = 4; + if (align > (TARGET_64BIT ? 8 : 4)) + align = (TARGET_64BIT ? 8 : 4); /* Note that we know each loop below will execute at least twice (else we would have open-coded the copy). */ switch (align) { + case 8: + /* Pre-adjust the loop counter. */ + operands[4] = GEN_INT (n_bytes - 16); + output_asm_insn ("ldi %4,%2", operands); + + /* Copying loop. */ + output_asm_insn ("ldd,ma 8(%1),%3", operands); + output_asm_insn ("ldd,ma 8(%1),%6", operands); + output_asm_insn ("std,ma %3,8(%0)", operands); + output_asm_insn ("addib,>= -16,%2,.-12", operands); + output_asm_insn ("std,ma %6,8(%0)", operands); + + /* Handle the residual. There could be up to 7 bytes of + residual to copy! */ + if (n_bytes % 16 != 0) + { + operands[4] = GEN_INT (n_bytes % 8); + if (n_bytes % 16 >= 8) + output_asm_insn ("ldd,ma 8(%1),%3", operands); + if (n_bytes % 8 != 0) + output_asm_insn ("ldd 0(%1),%6", operands); + if (n_bytes % 16 >= 8) + output_asm_insn ("std,ma %3,8(%0)", operands); + if (n_bytes % 8 != 0) + output_asm_insn ("stdby,e %6,%4(%0)", operands); + } + return ""; + case 4: /* Pre-adjust the loop counter. */ operands[4] = GEN_INT (n_bytes - 8); @@ -2536,7 +2565,7 @@ output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED) count insns rather than emit them. */ static int -compute_movstrsi_length (rtx insn) +compute_movstr_length (rtx insn) { rtx pat = PATTERN (insn); unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0)); @@ -2545,8 +2574,8 @@ compute_movstrsi_length (rtx insn) /* We can't move more than four bytes at a time because the PA has no longer integer move insns. (Could use fp mem ops?) */ - if (align > 4) - align = 4; + if (align > (TARGET_64BIT ? 8 : 4)) + align = (TARGET_64BIT ? 8 : 4); /* The basic copying loop. */ n_insns = 6; @@ -2564,6 +2593,148 @@ compute_movstrsi_length (rtx insn) /* Lengths are expressed in bytes now; each insn is 4 bytes. */ return n_insns * 4; } + +/* Emit code to perform a block clear. + + OPERANDS[0] is the destination pointer as a REG, clobbered. + OPERANDS[1] is a register for temporary storage. + OPERANDS[2] is the size as a CONST_INT + OPERANDS[3] is the alignment safe to use, as a CONST_INT. */ + +const char * +output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED) +{ + int align = INTVAL (operands[3]); + unsigned long n_bytes = INTVAL (operands[2]); + + /* We can't clear more than a word at a time because the PA + has no longer integer move insns. */ + if (align > (TARGET_64BIT ? 8 : 4)) + align = (TARGET_64BIT ? 8 : 4); + + /* Note that we know each loop below will execute at least twice + (else we would have open-coded the copy). */ + switch (align) + { + case 8: + /* Pre-adjust the loop counter. */ + operands[2] = GEN_INT (n_bytes - 16); + output_asm_insn ("ldi %2,%1", operands); + + /* Loop. */ + output_asm_insn ("std,ma %%r0,8(%0)", operands); + output_asm_insn ("addib,>= -16,%1,.-4", operands); + output_asm_insn ("std,ma %%r0,8(%0)", operands); + + /* Handle the residual. There could be up to 7 bytes of + residual to copy! */ + if (n_bytes % 16 != 0) + { + operands[2] = GEN_INT (n_bytes % 8); + if (n_bytes % 16 >= 8) + output_asm_insn ("std,ma %%r0,8(%0)", operands); + if (n_bytes % 8 != 0) + output_asm_insn ("stdby,e %%r0,%2(%0)", operands); + } + return ""; + + case 4: + /* Pre-adjust the loop counter. */ + operands[2] = GEN_INT (n_bytes - 8); + output_asm_insn ("ldi %2,%1", operands); + + /* Loop. */ + output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands); + output_asm_insn ("addib,>= -8,%1,.-4", operands); + output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands); + + /* Handle the residual. There could be up to 7 bytes of + residual to copy! */ + if (n_bytes % 8 != 0) + { + operands[2] = GEN_INT (n_bytes % 4); + if (n_bytes % 8 >= 4) + output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands); + if (n_bytes % 4 != 0) + output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands); + } + return ""; + + case 2: + /* Pre-adjust the loop counter. */ + operands[2] = GEN_INT (n_bytes - 4); + output_asm_insn ("ldi %2,%1", operands); + + /* Loop. */ + output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands); + output_asm_insn ("addib,>= -4,%1,.-4", operands); + output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands); + + /* Handle the residual. */ + if (n_bytes % 4 != 0) + { + if (n_bytes % 4 >= 2) + output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands); + if (n_bytes % 2 != 0) + output_asm_insn ("stb %%r0,0(%0)", operands); + } + return ""; + + case 1: + /* Pre-adjust the loop counter. */ + operands[2] = GEN_INT (n_bytes - 2); + output_asm_insn ("ldi %2,%1", operands); + + /* Loop. */ + output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands); + output_asm_insn ("addib,>= -2,%1,.-4", operands); + output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands); + + /* Handle the residual. */ + if (n_bytes % 2 != 0) + output_asm_insn ("stb %%r0,0(%0)", operands); + + return ""; + + default: + abort (); + } +} + +/* Count the number of insns necessary to handle this block move. + + Basic structure is the same as emit_block_move, except that we + count insns rather than emit them. */ + +static int +compute_clrstr_length (rtx insn) +{ + rtx pat = PATTERN (insn); + unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0)); + unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0)); + unsigned int n_insns = 0; + + /* We can't clear more than a word at a time because the PA + has no longer integer move insns. */ + if (align > (TARGET_64BIT ? 8 : 4)) + align = (TARGET_64BIT ? 8 : 4); + + /* The basic loop. */ + n_insns = 4; + + /* Residuals. */ + if (n_bytes % (2 * align) != 0) + { + if ((n_bytes % (2 * align)) >= align) + n_insns++; + + if ((n_bytes % align) != 0) + n_insns++; + } + + /* Lengths are expressed in bytes now; each insn is 4 bytes. */ + return n_insns * 4; +} const char * @@ -4337,7 +4508,15 @@ pa_adjust_insn_length (rtx insn, int length) && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode) - return compute_movstrsi_length (insn) - 4; + return compute_movstr_length (insn) - 4; + /* Block clear pattern. */ + else if (GET_CODE (insn) == INSN + && GET_CODE (pat) == PARALLEL + && GET_CODE (XVECEXP (pat, 0, 0)) == SET + && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM + && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx + && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode) + return compute_clrstr_length (insn) - 4; /* Conditional branch with an unfilled delay slot. */ else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn)) { diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md index 79f2e26cd27..aa0bfcbe783 100644 --- a/gcc/config/pa/pa.md +++ b/gcc/config/pa/pa.md @@ -2955,20 +2955,20 @@ (set_attr "length" "4")]) ;; The definition of this insn does not really explain what it does, -;; but it should suffice -;; that anything generated as this insn will be recognized as one -;; and that it will not successfully combine with anything. +;; but it should suffice that anything generated as this insn will be +;; recognized as a movstrsi operation, and that it will not successfully +;; combine with anything. (define_expand "movstrsi" [(parallel [(set (match_operand:BLK 0 "" "") (match_operand:BLK 1 "" "")) - (clobber (match_scratch:SI 7 "")) - (clobber (match_scratch:SI 8 "")) (clobber (match_dup 4)) (clobber (match_dup 5)) (clobber (match_dup 6)) + (clobber (match_dup 7)) + (clobber (match_dup 8)) (use (match_operand:SI 2 "arith_operand" "")) (use (match_operand:SI 3 "const_int_operand" ""))])] - "!TARGET_64BIT" + "!TARGET_64BIT && optimize > 0" " { int size, align; @@ -2990,7 +2990,7 @@ If the size is large in respect to the known alignment, then use the library routines. - If the size is small in repsect to the known alignment, then open + If the size is small in respect to the known alignment, then open code the copy (since that will lead to better scheduling). Else use the block move pattern. */ @@ -3003,8 +3003,7 @@ align = INTVAL (operands[3]); align = align > 4 ? 4 : align; - /* If size/alignment > 8 (eg size is large in respect to alignment), - then use the library routines. */ + /* If size/alignment is large, then use the library routines. */ if (size / align > 16) FAIL; @@ -3022,28 +3021,470 @@ operands[4] = gen_reg_rtx (SImode); operands[5] = gen_reg_rtx (SImode); operands[6] = gen_reg_rtx (SImode); - operands[7] = XEXP (operands[0], 0); - operands[8] = XEXP (operands[1], 0); + operands[7] = gen_reg_rtx (SImode); + operands[8] = gen_reg_rtx (SImode); }") ;; The operand constraints are written like this to support both compile-time -;; and run-time determined byte count. If the count is run-time determined, -;; the register with the byte count is clobbered by the copying code, and -;; therefore it is forced to operand 2. If the count is compile-time -;; determined, we need two scratch registers for the unrolled code. -(define_insn "movstrsi_internal" +;; and run-time determined byte counts. The expander and output_block_move +;; only support compile-time determined counts at this time. +;; +;; If the count is run-time determined, the register with the byte count +;; is clobbered by the copying code, and therefore it is forced to operand 2. +;; +;; We used to clobber operands 0 and 1. However, a change to regrename.c +;; broke this semantic for pseudo registers. We can't use match_scratch +;; as this requires two registers in the class R1_REGS when the MEMs for +;; operands 0 and 1 are both equivalent to symbolic MEMs. Thus, we are +;; forced to internally copy operands 0 and 1 to operands 7 and 8, +;; respectively. We then split or peephole optimize after reload. +(define_insn "movstrsi_prereload" [(set (mem:BLK (match_operand:SI 0 "register_operand" "r,r")) (mem:BLK (match_operand:SI 1 "register_operand" "r,r"))) - (clobber (match_scratch:SI 7 "=0,0")) - (clobber (match_scratch:SI 8 "=1,1")) (clobber (match_operand:SI 2 "register_operand" "=r,r")) ;loop cnt/tmp - (clobber (match_operand:SI 3 "register_operand" "=&r,&r")) ;item tmp + (clobber (match_operand:SI 3 "register_operand" "=&r,&r")) ;item tmp1 (clobber (match_operand:SI 6 "register_operand" "=&r,&r")) ;item tmp2 + (clobber (match_operand:SI 7 "register_operand" "=&r,&r")) ;item tmp3 + (clobber (match_operand:SI 8 "register_operand" "=&r,&r")) ;item tmp4 (use (match_operand:SI 4 "arith_operand" "J,2")) ;byte count (use (match_operand:SI 5 "const_int_operand" "n,n"))] ;alignment "!TARGET_64BIT" + "#" + [(set_attr "type" "multi,multi")]) + +(define_split + [(parallel [(set (mem:BLK (match_operand:SI 0 "register_operand" "")) + (mem:BLK (match_operand:SI 1 "register_operand" ""))) + (clobber (match_operand:SI 2 "register_operand" "")) + (clobber (match_operand:SI 3 "register_operand" "")) + (clobber (match_operand:SI 6 "register_operand" "")) + (clobber (match_operand:SI 7 "register_operand" "")) + (clobber (match_operand:SI 8 "register_operand" "")) + (use (match_operand:SI 4 "arith_operand" "")) + (use (match_operand:SI 5 "const_int_operand" ""))])] + "!TARGET_64BIT && reload_completed && !flag_peephole2" + [(set (match_dup 7) (match_dup 0)) + (set (match_dup 8) (match_dup 1)) + (parallel [(set (mem:BLK (match_dup 7)) (mem:BLK (match_dup 8))) + (clobber (match_dup 2)) + (clobber (match_dup 3)) + (clobber (match_dup 6)) + (clobber (match_dup 7)) + (clobber (match_dup 8)) + (use (match_dup 4)) + (use (match_dup 5)) + (const_int 0)])] + "") + +(define_peephole2 + [(parallel [(set (mem:BLK (match_operand:SI 0 "register_operand" "")) + (mem:BLK (match_operand:SI 1 "register_operand" ""))) + (clobber (match_operand:SI 2 "register_operand" "")) + (clobber (match_operand:SI 3 "register_operand" "")) + (clobber (match_operand:SI 6 "register_operand" "")) + (clobber (match_operand:SI 7 "register_operand" "")) + (clobber (match_operand:SI 8 "register_operand" "")) + (use (match_operand:SI 4 "arith_operand" "")) + (use (match_operand:SI 5 "const_int_operand" ""))])] + "!TARGET_64BIT" + [(parallel [(set (mem:BLK (match_dup 7)) (mem:BLK (match_dup 8))) + (clobber (match_dup 2)) + (clobber (match_dup 3)) + (clobber (match_dup 6)) + (clobber (match_dup 7)) + (clobber (match_dup 8)) + (use (match_dup 4)) + (use (match_dup 5)) + (const_int 0)])] + " +{ + if (dead_or_set_p (curr_insn, operands[0])) + operands[7] = operands[0]; + else + emit_insn (gen_rtx_SET (VOIDmode, operands[7], operands[0])); + + if (dead_or_set_p (curr_insn, operands[1])) + operands[8] = operands[1]; + else + emit_insn (gen_rtx_SET (VOIDmode, operands[8], operands[1])); +}") + +(define_insn "movstrsi_postreload" + [(set (mem:BLK (match_operand:SI 0 "register_operand" "r,r")) + (mem:BLK (match_operand:SI 1 "register_operand" "r,r"))) + (clobber (match_operand:SI 2 "register_operand" "=r,r")) ;loop cnt/tmp + (clobber (match_operand:SI 3 "register_operand" "=&r,&r")) ;item tmp1 + (clobber (match_operand:SI 6 "register_operand" "=&r,&r")) ;item tmp2 + (clobber (match_dup 0)) + (clobber (match_dup 1)) + (use (match_operand:SI 4 "arith_operand" "J,2")) ;byte count + (use (match_operand:SI 5 "const_int_operand" "n,n")) ;alignment + (const_int 0)] + "!TARGET_64BIT && reload_completed" "* return output_block_move (operands, !which_alternative);" [(set_attr "type" "multi,multi")]) + +(define_expand "movstrdi" + [(parallel [(set (match_operand:BLK 0 "" "") + (match_operand:BLK 1 "" "")) + (clobber (match_dup 4)) + (clobber (match_dup 5)) + (clobber (match_dup 6)) + (clobber (match_dup 7)) + (clobber (match_dup 8)) + (use (match_operand:DI 2 "arith_operand" "")) + (use (match_operand:DI 3 "const_int_operand" ""))])] + "TARGET_64BIT && optimize > 0" + " +{ + int size, align; + + /* HP provides very fast block move library routine for the PA; + this routine includes: + + 4x4 byte at a time block moves, + 1x4 byte at a time with alignment checked at runtime with + attempts to align the source and destination as needed + 1x1 byte loop + + With that in mind, here's the heuristics to try and guess when + the inlined block move will be better than the library block + move: + + If the size isn't constant, then always use the library routines. + + If the size is large in respect to the known alignment, then use + the library routines. + + If the size is small in respect to the known alignment, then open + code the copy (since that will lead to better scheduling). + + Else use the block move pattern. */ + + /* Undetermined size, use the library routine. */ + if (GET_CODE (operands[2]) != CONST_INT) + FAIL; + + size = INTVAL (operands[2]); + align = INTVAL (operands[3]); + align = align > 8 ? 8 : align; + + /* If size/alignment is large, then use the library routines. */ + if (size / align > 16) + FAIL; + + /* This does happen, but not often enough to worry much about. */ + if (size / align < MOVE_RATIO) + FAIL; + + /* Fall through means we're going to use our block move pattern. */ + operands[0] + = replace_equiv_address (operands[0], + copy_to_mode_reg (DImode, XEXP (operands[0], 0))); + operands[1] + = replace_equiv_address (operands[1], + copy_to_mode_reg (DImode, XEXP (operands[1], 0))); + operands[4] = gen_reg_rtx (DImode); + operands[5] = gen_reg_rtx (DImode); + operands[6] = gen_reg_rtx (DImode); + operands[7] = gen_reg_rtx (DImode); + operands[8] = gen_reg_rtx (DImode); +}") + +;; The operand constraints are written like this to support both compile-time +;; and run-time determined byte counts. The expander and output_block_move +;; only support compile-time determined counts at this time. +;; +;; If the count is run-time determined, the register with the byte count +;; is clobbered by the copying code, and therefore it is forced to operand 2. +;; +;; We used to clobber operands 0 and 1. However, a change to regrename.c +;; broke this semantic for pseudo registers. We can't use match_scratch +;; as this requires two registers in the class R1_REGS when the MEMs for +;; operands 0 and 1 are both equivalent to symbolic MEMs. Thus, we are +;; forced to internally copy operands 0 and 1 to operands 7 and 8, +;; respectively. We then split or peephole optimize after reload. +(define_insn "movstrdi_prereload" + [(set (mem:BLK (match_operand:DI 0 "register_operand" "r,r")) + (mem:BLK (match_operand:DI 1 "register_operand" "r,r"))) + (clobber (match_operand:DI 2 "register_operand" "=r,r")) ;loop cnt/tmp + (clobber (match_operand:DI 3 "register_operand" "=&r,&r")) ;item tmp1 + (clobber (match_operand:DI 6 "register_operand" "=&r,&r")) ;item tmp2 + (clobber (match_operand:DI 7 "register_operand" "=&r,&r")) ;item tmp3 + (clobber (match_operand:DI 8 "register_operand" "=&r,&r")) ;item tmp4 + (use (match_operand:DI 4 "arith_operand" "J,2")) ;byte count + (use (match_operand:DI 5 "const_int_operand" "n,n"))] ;alignment + "TARGET_64BIT" + "#" + [(set_attr "type" "multi,multi")]) + +(define_split + [(parallel [(set (mem:BLK (match_operand:DI 0 "register_operand" "")) + (mem:BLK (match_operand:DI 1 "register_operand" ""))) + (clobber (match_operand:DI 2 "register_operand" "")) + (clobber (match_operand:DI 3 "register_operand" "")) + (clobber (match_operand:DI 6 "register_operand" "")) + (clobber (match_operand:DI 7 "register_operand" "")) + (clobber (match_operand:DI 8 "register_operand" "")) + (use (match_operand:DI 4 "arith_operand" "")) + (use (match_operand:DI 5 "const_int_operand" ""))])] + "TARGET_64BIT && reload_completed && !flag_peephole2" + [(set (match_dup 7) (match_dup 0)) + (set (match_dup 8) (match_dup 1)) + (parallel [(set (mem:BLK (match_dup 7)) (mem:BLK (match_dup 8))) + (clobber (match_dup 2)) + (clobber (match_dup 3)) + (clobber (match_dup 6)) + (clobber (match_dup 7)) + (clobber (match_dup 8)) + (use (match_dup 4)) + (use (match_dup 5)) + (const_int 0)])] + "") + +(define_peephole2 + [(parallel [(set (mem:BLK (match_operand:DI 0 "register_operand" "")) + (mem:BLK (match_operand:DI 1 "register_operand" ""))) + (clobber (match_operand:DI 2 "register_operand" "")) + (clobber (match_operand:DI 3 "register_operand" "")) + (clobber (match_operand:DI 6 "register_operand" "")) + (clobber (match_operand:DI 7 "register_operand" "")) + (clobber (match_operand:DI 8 "register_operand" "")) + (use (match_operand:DI 4 "arith_operand" "")) + (use (match_operand:DI 5 "const_int_operand" ""))])] + "TARGET_64BIT" + [(parallel [(set (mem:BLK (match_dup 7)) (mem:BLK (match_dup 8))) + (clobber (match_dup 2)) + (clobber (match_dup 3)) + (clobber (match_dup 6)) + (clobber (match_dup 7)) + (clobber (match_dup 8)) + (use (match_dup 4)) + (use (match_dup 5)) + (const_int 0)])] + " +{ + if (dead_or_set_p (curr_insn, operands[0])) + operands[7] = operands[0]; + else + emit_insn (gen_rtx_SET (VOIDmode, operands[7], operands[0])); + + if (dead_or_set_p (curr_insn, operands[1])) + operands[8] = operands[1]; + else + emit_insn (gen_rtx_SET (VOIDmode, operands[8], operands[1])); +}") + +(define_insn "movstrdi_postreload" + [(set (mem:BLK (match_operand:DI 0 "register_operand" "r,r")) + (mem:BLK (match_operand:DI 1 "register_operand" "r,r"))) + (clobber (match_operand:DI 2 "register_operand" "=r,r")) ;loop cnt/tmp + (clobber (match_operand:DI 3 "register_operand" "=&r,&r")) ;item tmp1 + (clobber (match_operand:DI 6 "register_operand" "=&r,&r")) ;item tmp2 + (clobber (match_dup 0)) + (clobber (match_dup 1)) + (use (match_operand:DI 4 "arith_operand" "J,2")) ;byte count + (use (match_operand:DI 5 "const_int_operand" "n,n")) ;alignment + (const_int 0)] + "TARGET_64BIT && reload_completed" + "* return output_block_move (operands, !which_alternative);" + [(set_attr "type" "multi,multi")]) + +(define_expand "clrstrsi" + [(parallel [(set (match_operand:BLK 0 "" "") + (const_int 0)) + (clobber (match_dup 3)) + (clobber (match_dup 4)) + (use (match_operand:SI 1 "arith_operand" "")) + (use (match_operand:SI 2 "const_int_operand" ""))])] + "!TARGET_64BIT && optimize > 0" + " +{ + int size, align; + + /* Undetermined size, use the library routine. */ + if (GET_CODE (operands[1]) != CONST_INT) + FAIL; + + size = INTVAL (operands[1]); + align = INTVAL (operands[2]); + align = align > 4 ? 4 : align; + + /* If size/alignment is large, then use the library routines. */ + if (size / align > 16) + FAIL; + + /* This does happen, but not often enough to worry much about. */ + if (size / align < MOVE_RATIO) + FAIL; + + /* Fall through means we're going to use our block clear pattern. */ + operands[0] + = replace_equiv_address (operands[0], + copy_to_mode_reg (SImode, XEXP (operands[0], 0))); + operands[3] = gen_reg_rtx (SImode); + operands[4] = gen_reg_rtx (SImode); +}") + +(define_insn "clrstrsi_prereload" + [(set (mem:BLK (match_operand:SI 0 "register_operand" "r,r")) + (const_int 0)) + (clobber (match_operand:SI 1 "register_operand" "=r,r")) ;loop cnt/tmp + (clobber (match_operand:SI 4 "register_operand" "=&r,&r")) ;tmp1 + (use (match_operand:SI 2 "arith_operand" "J,1")) ;byte count + (use (match_operand:SI 3 "const_int_operand" "n,n"))] ;alignment + "!TARGET_64BIT" + "#" + [(set_attr "type" "multi,multi")]) + +(define_split + [(parallel [(set (mem:BLK (match_operand:SI 0 "register_operand" "")) + (const_int 0)) + (clobber (match_operand:SI 1 "register_operand" "")) + (clobber (match_operand:SI 4 "register_operand" "")) + (use (match_operand:SI 2 "arith_operand" "")) + (use (match_operand:SI 3 "const_int_operand" ""))])] + "!TARGET_64BIT && reload_completed && !flag_peephole2" + [(set (match_dup 4) (match_dup 0)) + (parallel [(set (mem:BLK (match_dup 4)) (const_int 0)) + (clobber (match_dup 1)) + (clobber (match_dup 4)) + (use (match_dup 2)) + (use (match_dup 3)) + (const_int 0)])] + "") + +(define_peephole2 + [(parallel [(set (mem:BLK (match_operand:SI 0 "register_operand" "")) + (const_int 0)) + (clobber (match_operand:SI 1 "register_operand" "")) + (clobber (match_operand:SI 4 "register_operand" "")) + (use (match_operand:SI 2 "arith_operand" "")) + (use (match_operand:SI 3 "const_int_operand" ""))])] + "!TARGET_64BIT" + [(parallel [(set (mem:BLK (match_dup 4)) (const_int 0)) + (clobber (match_dup 1)) + (clobber (match_dup 4)) + (use (match_dup 2)) + (use (match_dup 3)) + (const_int 0)])] + " +{ + if (dead_or_set_p (curr_insn, operands[0])) + operands[4] = operands[0]; + else + emit_insn (gen_rtx_SET (VOIDmode, operands[4], operands[0])); +}") + +(define_insn "clrstrsi_postreload" + [(set (mem:BLK (match_operand:SI 0 "register_operand" "r,r")) + (const_int 0)) + (clobber (match_operand:SI 1 "register_operand" "=r,r")) ;loop cnt/tmp + (clobber (match_dup 0)) + (use (match_operand:SI 2 "arith_operand" "J,1")) ;byte count + (use (match_operand:SI 3 "const_int_operand" "n,n")) ;alignment + (const_int 0)] + "!TARGET_64BIT && reload_completed" + "* return output_block_clear (operands, !which_alternative);" + [(set_attr "type" "multi,multi")]) + +(define_expand "clrstrdi" + [(parallel [(set (match_operand:BLK 0 "" "") + (const_int 0)) + (clobber (match_dup 3)) + (clobber (match_dup 4)) + (use (match_operand:DI 1 "arith_operand" "")) + (use (match_operand:DI 2 "const_int_operand" ""))])] + "TARGET_64BIT && optimize > 0" + " +{ + int size, align; + + /* Undetermined size, use the library routine. */ + if (GET_CODE (operands[1]) != CONST_INT) + FAIL; + + size = INTVAL (operands[1]); + align = INTVAL (operands[2]); + align = align > 8 ? 8 : align; + + /* If size/alignment is large, then use the library routines. */ + if (size / align > 16) + FAIL; + + /* This does happen, but not often enough to worry much about. */ + if (size / align < MOVE_RATIO) + FAIL; + + /* Fall through means we're going to use our block clear pattern. */ + operands[0] + = replace_equiv_address (operands[0], + copy_to_mode_reg (DImode, XEXP (operands[0], 0))); + operands[3] = gen_reg_rtx (DImode); + operands[4] = gen_reg_rtx (DImode); +}") + +(define_insn "clrstrdi_prereload" + [(set (mem:BLK (match_operand:DI 0 "register_operand" "r,r")) + (const_int 0)) + (clobber (match_operand:DI 1 "register_operand" "=r,r")) ;loop cnt/tmp + (clobber (match_operand:DI 4 "register_operand" "=&r,&r")) ;item tmp1 + (use (match_operand:DI 2 "arith_operand" "J,1")) ;byte count + (use (match_operand:DI 3 "const_int_operand" "n,n"))] ;alignment + "TARGET_64BIT" + "#" + [(set_attr "type" "multi,multi")]) + +(define_split + [(parallel [(set (mem:BLK (match_operand:DI 0 "register_operand" "")) + (const_int 0)) + (clobber (match_operand:DI 1 "register_operand" "")) + (clobber (match_operand:DI 4 "register_operand" "")) + (use (match_operand:DI 2 "arith_operand" "")) + (use (match_operand:DI 3 "const_int_operand" ""))])] + "TARGET_64BIT && reload_completed && !flag_peephole2" + [(set (match_dup 4) (match_dup 0)) + (parallel [(set (mem:BLK (match_dup 4)) (const_int 0)) + (clobber (match_dup 1)) + (clobber (match_dup 4)) + (use (match_dup 2)) + (use (match_dup 3)) + (const_int 0)])] + "") + +(define_peephole2 + [(parallel [(set (mem:BLK (match_operand:DI 0 "register_operand" "")) + (const_int 0)) + (clobber (match_operand:DI 1 "register_operand" "")) + (clobber (match_operand:DI 4 "register_operand" "")) + (use (match_operand:DI 2 "arith_operand" "")) + (use (match_operand:DI 3 "const_int_operand" ""))])] + "TARGET_64BIT" + [(parallel [(set (mem:BLK (match_dup 4)) (const_int 0)) + (clobber (match_dup 1)) + (clobber (match_dup 4)) + (use (match_dup 2)) + (use (match_dup 3)) + (const_int 0)])] + " +{ + if (dead_or_set_p (curr_insn, operands[0])) + operands[4] = operands[0]; + else + emit_insn (gen_rtx_SET (VOIDmode, operands[4], operands[0])); +}") + +(define_insn "clrstrdi_postreload" + [(set (mem:BLK (match_operand:DI 0 "register_operand" "r,r")) + (const_int 0)) + (clobber (match_operand:DI 1 "register_operand" "=r,r")) ;loop cnt/tmp + (clobber (match_dup 0)) + (use (match_operand:DI 2 "arith_operand" "J,1")) ;byte count + (use (match_operand:DI 3 "const_int_operand" "n,n")) ;alignment + (const_int 0)] + "TARGET_64BIT && reload_completed" + "* return output_block_clear (operands, !which_alternative);" + [(set_attr "type" "multi,multi")]) ;; Floating point move insns