From 15c77315001af0842a537ff748d1a0995ad38545 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Sat, 30 Sep 2017 10:10:15 +0200 Subject: [PATCH] re PR target/82361 (Useless "mov eax, eax" in generated code) PR target/82361 * config/i386/i386.md (TARGET_USE_8BIT_IDIV zext divmodsi4 splitter): New define_split. (divmodsi4_zext_1, divmodsi4_zext_2, *divmodsi4_zext_1, *divmodsi4_zext_2): New define_insn_and_split. (*divmodsi4_noext_zext_1, *divmodsi4_noext_zext_2): New define_insn. (TARGET_USE_8BIT_IDIV zext udivmodsi4 splitter): New define_split. (udivmodsi4_zext_1, udivmodsi4_zext_2, *udivmodsi4_zext_1, *udivmodsi4_zext_2, *udivmodsi4_pow2_zext_1, *udivmodsi4_pow2_zext_2): New define_insn_and_split. (*udivmodsi4_noext_zext_1, *udivmodsi4_noext_zext_2): New define_insn. * config/i386/i386.c (ix86_split_idivmod): Handle operands[0] or operands[1] having DImode when mode is SImode. * gcc.target/i386/pr82361-1.c: New test. * gcc.target/i386/pr82361-2.c: New test. From-SVN: r253317 --- gcc/ChangeLog | 14 + gcc/config/i386/i386.c | 29 +- gcc/config/i386/i386.md | 406 ++++++++++++++++++++++ gcc/testsuite/ChangeLog | 6 + gcc/testsuite/gcc.target/i386/pr82361-1.c | 53 +++ gcc/testsuite/gcc.target/i386/pr82361-2.c | 10 + 6 files changed, 514 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr82361-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr82361-2.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 7cae1deeef8..65cfd80bfaf 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,19 @@ 2017-09-30 Jakub Jelinek + PR target/82361 + * config/i386/i386.md + (TARGET_USE_8BIT_IDIV zext divmodsi4 splitter): New define_split. + (divmodsi4_zext_1, divmodsi4_zext_2, *divmodsi4_zext_1, + *divmodsi4_zext_2): New define_insn_and_split. + (*divmodsi4_noext_zext_1, *divmodsi4_noext_zext_2): New define_insn. + (TARGET_USE_8BIT_IDIV zext udivmodsi4 splitter): New define_split. + (udivmodsi4_zext_1, udivmodsi4_zext_2, *udivmodsi4_zext_1, + *udivmodsi4_zext_2, *udivmodsi4_pow2_zext_1, *udivmodsi4_pow2_zext_2): + New define_insn_and_split. + (*udivmodsi4_noext_zext_1, *udivmodsi4_noext_zext_2): New define_insn. + * config/i386/i386.c (ix86_split_idivmod): Handle operands[0] or + operands[1] having DImode when mode is SImode. + * config/i386/i386.c (ix86_split_idivmod): Use mode instead of always SImode for DIV and MOD in REG_EQUAL notes. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 4a212153e4b..519336e0ed1 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -21927,9 +21927,22 @@ ix86_split_idivmod (machine_mode mode, rtx operands[], switch (mode) { case E_SImode: - gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1; + if (GET_MODE (operands[0]) == SImode) + { + if (GET_MODE (operands[1]) == SImode) + gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1; + else + gen_divmod4_1 + = signed_p ? gen_divmodsi4_zext_2 : gen_udivmodsi4_zext_2; + gen_zero_extend = gen_zero_extendqisi2; + } + else + { + gen_divmod4_1 + = signed_p ? gen_divmodsi4_zext_1 : gen_udivmodsi4_zext_1; + gen_zero_extend = gen_zero_extendqidi2; + } gen_test_ccno_1 = gen_testsi_ccno_1; - gen_zero_extend = gen_zero_extendqisi2; break; case E_DImode: gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1; @@ -21988,16 +22001,24 @@ ix86_split_idivmod (machine_mode mode, rtx operands[], div = gen_rtx_UDIV (mode, operands[2], operands[3]); mod = gen_rtx_UMOD (mode, operands[2], operands[3]); } + if (mode == SImode) + { + if (GET_MODE (operands[0]) != SImode) + div = gen_rtx_ZERO_EXTEND (DImode, div); + if (GET_MODE (operands[1]) != SImode) + mod = gen_rtx_ZERO_EXTEND (DImode, mod); + } /* Extract remainder from AH. */ - tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8)); + tmp1 = gen_rtx_ZERO_EXTRACT (GET_MODE (operands[1]), + tmp0, GEN_INT (8), GEN_INT (8)); if (REG_P (operands[1])) insn = emit_move_insn (operands[1], tmp1); else { /* Need a new scratch register since the old one has result of 8bit divide. */ - scratch = gen_reg_rtx (mode); + scratch = gen_reg_rtx (GET_MODE (operands[1])); emit_move_insn (scratch, tmp1); insn = emit_move_insn (operands[1], scratch); } diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 8786ee4036a..99497a9f654 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -7635,6 +7635,36 @@ [(const_int 0)] "ix86_split_idivmod (mode, operands, true); DONE;") +(define_split + [(set (match_operand:DI 0 "register_operand") + (zero_extend:DI + (div:SI (match_operand:SI 2 "register_operand") + (match_operand:SI 3 "nonimmediate_operand")))) + (set (match_operand:SI 1 "register_operand") + (mod:SI (match_dup 2) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_8BIT_IDIV + && TARGET_QIMODE_MATH + && can_create_pseudo_p () + && !optimize_insn_for_size_p ()" + [(const_int 0)] + "ix86_split_idivmod (SImode, operands, true); DONE;") + +(define_split + [(set (match_operand:DI 1 "register_operand") + (zero_extend:DI + (mod:SI (match_operand:SI 2 "register_operand") + (match_operand:SI 3 "nonimmediate_operand")))) + (set (match_operand:SI 0 "register_operand") + (div:SI (match_dup 2) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_8BIT_IDIV + && TARGET_QIMODE_MATH + && can_create_pseudo_p () + && !optimize_insn_for_size_p ()" + [(const_int 0)] + "ix86_split_idivmod (SImode, operands, true); DONE;") + (define_insn_and_split "divmod4_1" [(set (match_operand:SWI48 0 "register_operand" "=a") (div:SWI48 (match_operand:SWI48 2 "register_operand" "0") @@ -7670,6 +7700,79 @@ [(set_attr "type" "multi") (set_attr "mode" "")]) +(define_insn_and_split "divmodsi4_zext_1" + [(set (match_operand:DI 0 "register_operand" "=a") + (zero_extend:DI + (div:SI (match_operand:SI 2 "register_operand" "0") + (match_operand:SI 3 "nonimmediate_operand" "rm")))) + (set (match_operand:SI 1 "register_operand" "=&d") + (mod:SI (match_dup 2) (match_dup 3))) + (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "#" + "reload_completed" + [(parallel [(set (match_dup 1) + (ashiftrt:SI (match_dup 4) (match_dup 5))) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (match_dup 0) + (zero_extend:DI (div:SI (match_dup 2) (match_dup 3)))) + (set (match_dup 1) + (mod:SI (match_dup 2) (match_dup 3))) + (use (match_dup 1)) + (clobber (reg:CC FLAGS_REG))])] +{ + operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1); + + if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD) + operands[4] = operands[2]; + else + { + /* Avoid use of cltd in favor of a mov+shift. */ + emit_move_insn (operands[1], operands[2]); + operands[4] = operands[1]; + } +} + [(set_attr "type" "multi") + (set_attr "mode" "SI")]) + +(define_insn_and_split "divmodsi4_zext_2" + [(set (match_operand:DI 1 "register_operand" "=&d") + (zero_extend:DI + (mod:SI (match_operand:SI 2 "register_operand" "0") + (match_operand:SI 3 "nonimmediate_operand" "rm")))) + (set (match_operand:SI 0 "register_operand" "=a") + (div:SI (match_dup 2) (match_dup 3))) + (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "#" + "reload_completed" + [(parallel [(set (match_dup 6) + (ashiftrt:SI (match_dup 4) (match_dup 5))) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (match_dup 1) + (zero_extend:DI (mod:SI (match_dup 2) (match_dup 3)))) + (set (match_dup 0) + (div:SI (match_dup 2) (match_dup 3))) + (use (match_dup 6)) + (clobber (reg:CC FLAGS_REG))])] +{ + operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1); + operands[6] = gen_lowpart (SImode, operands[1]); + + if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD) + operands[4] = operands[2]; + else + { + /* Avoid use of cltd in favor of a mov+shift. */ + emit_move_insn (operands[6], operands[2]); + operands[4] = operands[6]; + } +} + [(set_attr "type" "multi") + (set_attr "mode" "SI")]) + (define_insn_and_split "*divmod4" [(set (match_operand:SWIM248 0 "register_operand" "=a") (div:SWIM248 (match_operand:SWIM248 2 "register_operand" "0") @@ -7705,6 +7808,77 @@ [(set_attr "type" "multi") (set_attr "mode" "")]) +(define_insn_and_split "*divmodsi4_zext_1" + [(set (match_operand:DI 0 "register_operand" "=a") + (zero_extend:DI + (div:SI (match_operand:SI 2 "register_operand" "0") + (match_operand:SI 3 "nonimmediate_operand" "rm")))) + (set (match_operand:SI 1 "register_operand" "=&d") + (mod:SI (match_dup 2) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "#" + "reload_completed" + [(parallel [(set (match_dup 1) + (ashiftrt:SI (match_dup 4) (match_dup 5))) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (match_dup 0) + (zero_extend:DI (div:SI (match_dup 2) (match_dup 3)))) + (set (match_dup 1) + (mod:SI (match_dup 2) (match_dup 3))) + (use (match_dup 1)) + (clobber (reg:CC FLAGS_REG))])] +{ + operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1); + + if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD) + operands[4] = operands[2]; + else + { + /* Avoid use of cltd in favor of a mov+shift. */ + emit_move_insn (operands[1], operands[2]); + operands[4] = operands[1]; + } +} + [(set_attr "type" "multi") + (set_attr "mode" "SI")]) + +(define_insn_and_split "*divmodsi4_zext_2" + [(set (match_operand:DI 1 "register_operand" "=&d") + (zero_extend:DI + (mod:SI (match_operand:SI 2 "register_operand" "0") + (match_operand:SI 3 "nonimmediate_operand" "rm")))) + (set (match_operand:SI 0 "register_operand" "=a") + (div:SI (match_dup 2) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "#" + "reload_completed" + [(parallel [(set (match_dup 6) + (ashiftrt:SI (match_dup 4) (match_dup 5))) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (match_dup 1) + (zero_extend:DI (mod:SI (match_dup 2) (match_dup 3)))) + (set (match_dup 0) + (div:SI (match_dup 2) (match_dup 3))) + (use (match_dup 6)) + (clobber (reg:CC FLAGS_REG))])] +{ + operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1); + operands[6] = gen_lowpart (SImode, operands[1]); + + if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD) + operands[4] = operands[2]; + else + { + /* Avoid use of cltd in favor of a mov+shift. */ + emit_move_insn (operands[6], operands[2]); + operands[4] = operands[6]; + } +} + [(set_attr "type" "multi") + (set_attr "mode" "SI")]) + (define_insn "*divmod4_noext" [(set (match_operand:SWIM248 0 "register_operand" "=a") (div:SWIM248 (match_operand:SWIM248 2 "register_operand" "0") @@ -7718,6 +7892,34 @@ [(set_attr "type" "idiv") (set_attr "mode" "")]) +(define_insn "*divmodsi4_noext_zext_1" + [(set (match_operand:DI 0 "register_operand" "=a") + (zero_extend:DI + (div:SI (match_operand:SI 2 "register_operand" "0") + (match_operand:SI 3 "nonimmediate_operand" "rm")))) + (set (match_operand:SI 1 "register_operand" "=d") + (mod:SI (match_dup 2) (match_dup 3))) + (use (match_operand:SI 4 "register_operand" "1")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "idiv{l}\t%3" + [(set_attr "type" "idiv") + (set_attr "mode" "SI")]) + +(define_insn "*divmodsi4_noext_zext_2" + [(set (match_operand:DI 1 "register_operand" "=d") + (zero_extend:DI + (mod:SI (match_operand:SI 2 "register_operand" "0") + (match_operand:SI 3 "nonimmediate_operand" "rm")))) + (set (match_operand:SI 0 "register_operand" "=a") + (div:SI (match_dup 2) (match_dup 3))) + (use (match_operand:SI 4 "register_operand" "1")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "idiv{l}\t%3" + [(set_attr "type" "idiv") + (set_attr "mode" "SI")]) + (define_expand "divmodqi4" [(parallel [(set (match_operand:QI 0 "register_operand") (div:QI @@ -7808,6 +8010,38 @@ [(const_int 0)] "ix86_split_idivmod (mode, operands, false); DONE;") +(define_split + [(set (match_operand:DI 0 "register_operand") + (zero_extend:DI + (udiv:SI (match_operand:SI 2 "register_operand") + (match_operand:SI 3 "nonimmediate_operand")))) + (set (match_operand:SI 1 "register_operand") + (umod:SI (match_dup 2) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && TARGET_USE_8BIT_IDIV + && TARGET_QIMODE_MATH + && can_create_pseudo_p () + && !optimize_insn_for_size_p ()" + [(const_int 0)] + "ix86_split_idivmod (SImode, operands, false); DONE;") + +(define_split + [(set (match_operand:DI 1 "register_operand") + (zero_extend:DI + (umod:SI (match_operand:SI 2 "register_operand") + (match_operand:SI 3 "nonimmediate_operand")))) + (set (match_operand:SI 0 "register_operand") + (udiv:SI (match_dup 2) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && TARGET_USE_8BIT_IDIV + && TARGET_QIMODE_MATH + && can_create_pseudo_p () + && !optimize_insn_for_size_p ()" + [(const_int 0)] + "ix86_split_idivmod (SImode, operands, false); DONE;") + (define_insn_and_split "udivmod4_1" [(set (match_operand:SWI48 0 "register_operand" "=a") (udiv:SWI48 (match_operand:SWI48 2 "register_operand" "0") @@ -7830,6 +8064,52 @@ [(set_attr "type" "multi") (set_attr "mode" "")]) +(define_insn_and_split "udivmodsi4_zext_1" + [(set (match_operand:DI 0 "register_operand" "=a") + (zero_extend:DI + (udiv:SI (match_operand:SI 2 "register_operand" "0") + (match_operand:SI 3 "nonimmediate_operand" "rm")))) + (set (match_operand:SI 1 "register_operand" "=&d") + (umod:SI (match_dup 2) (match_dup 3))) + (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "#" + "reload_completed" + [(set (match_dup 1) (const_int 0)) + (parallel [(set (match_dup 0) + (zero_extend:DI (udiv:SI (match_dup 2) (match_dup 3)))) + (set (match_dup 1) + (umod:SI (match_dup 2) (match_dup 3))) + (use (match_dup 1)) + (clobber (reg:CC FLAGS_REG))])] + "" + [(set_attr "type" "multi") + (set_attr "mode" "SI")]) + +(define_insn_and_split "udivmodsi4_zext_2" + [(set (match_operand:DI 1 "register_operand" "=&d") + (zero_extend:DI + (umod:SI (match_operand:SI 2 "register_operand" "0") + (match_operand:SI 3 "nonimmediate_operand" "rm")))) + (set (match_operand:SI 0 "register_operand" "=a") + (udiv:SI (match_dup 2) (match_dup 3))) + (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "#" + "reload_completed" + [(set (match_dup 4) (const_int 0)) + (parallel [(set (match_dup 1) + (zero_extend:DI (umod:SI (match_dup 2) (match_dup 3)))) + (set (match_dup 0) + (udiv:SI (match_dup 2) (match_dup 3))) + (use (match_dup 4)) + (clobber (reg:CC FLAGS_REG))])] + "operands[4] = gen_lowpart (SImode, operands[1]);" + [(set_attr "type" "multi") + (set_attr "mode" "SI")]) + (define_insn_and_split "*udivmod4" [(set (match_operand:SWIM248 0 "register_operand" "=a") (udiv:SWIM248 (match_operand:SWIM248 2 "register_operand" "0") @@ -7851,6 +8131,50 @@ [(set_attr "type" "multi") (set_attr "mode" "")]) +(define_insn_and_split "*udivmodsi4_zext_1" + [(set (match_operand:DI 0 "register_operand" "=a") + (zero_extend:DI + (udiv:SI (match_operand:SI 2 "register_operand" "0") + (match_operand:SI 3 "nonimmediate_operand" "rm")))) + (set (match_operand:SI 1 "register_operand" "=&d") + (umod:SI (match_dup 2) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "#" + "reload_completed" + [(set (match_dup 1) (const_int 0)) + (parallel [(set (match_dup 0) + (zero_extend:DI (udiv:SI (match_dup 2) (match_dup 3)))) + (set (match_dup 1) + (umod:SI (match_dup 2) (match_dup 3))) + (use (match_dup 1)) + (clobber (reg:CC FLAGS_REG))])] + "" + [(set_attr "type" "multi") + (set_attr "mode" "SI")]) + +(define_insn_and_split "*udivmodsi4_zext_2" + [(set (match_operand:DI 1 "register_operand" "=&d") + (zero_extend:DI + (umod:SI (match_operand:SI 2 "register_operand" "0") + (match_operand:SI 3 "nonimmediate_operand" "rm")))) + (set (match_operand:SI 0 "register_operand" "=a") + (udiv:SI (match_dup 2) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "#" + "reload_completed" + [(set (match_dup 4) (const_int 0)) + (parallel [(set (match_dup 1) + (zero_extend:DI (umod:SI (match_dup 2) (match_dup 3)))) + (set (match_dup 0) + (udiv:SI (match_dup 2) (match_dup 3))) + (use (match_dup 4)) + (clobber (reg:CC FLAGS_REG))])] + "operands[4] = gen_lowpart (SImode, operands[1]);" + [(set_attr "type" "multi") + (set_attr "mode" "SI")]) + ;; Optimize division or modulo by constant power of 2, if the constant ;; materializes only after expansion. (define_insn_and_split "*udivmod4_pow2" @@ -7877,6 +8201,60 @@ [(set_attr "type" "multi") (set_attr "mode" "")]) +(define_insn_and_split "*udivmodsi4_pow2_zext_1" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (udiv:SI (match_operand:SI 2 "register_operand" "0") + (match_operand:SI 3 "const_int_operand" "n")))) + (set (match_operand:SI 1 "register_operand" "=r") + (umod:SI (match_dup 2) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && IN_RANGE (INTVAL (operands[3]), 2, HOST_WIDE_INT_UC (0x80000000)) + && (UINTVAL (operands[3]) & (UINTVAL (operands[3]) - 1)) == 0" + "#" + "&& 1" + [(set (match_dup 1) (match_dup 2)) + (parallel [(set (match_dup 0) + (zero_extend:DI (lshiftrt:SI (match_dup 2) (match_dup 4)))) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (match_dup 1) (and:SI (match_dup 1) (match_dup 5))) + (clobber (reg:CC FLAGS_REG))])] +{ + int v = exact_log2 (UINTVAL (operands[3])); + operands[4] = GEN_INT (v); + operands[5] = GEN_INT ((HOST_WIDE_INT_1U << v) - 1); +} + [(set_attr "type" "multi") + (set_attr "mode" "SI")]) + +(define_insn_and_split "*udivmodsi4_pow2_zext_2" + [(set (match_operand:DI 1 "register_operand" "=r") + (zero_extend:DI + (umod:SI (match_operand:SI 2 "register_operand" "0") + (match_operand:SI 3 "const_int_operand" "n")))) + (set (match_operand:SI 0 "register_operand" "=r") + (umod:SI (match_dup 2) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && IN_RANGE (INTVAL (operands[3]), 2, HOST_WIDE_INT_UC (0x80000000)) + && (UINTVAL (operands[3]) & (UINTVAL (operands[3]) - 1)) == 0" + "#" + "&& 1" + [(set (match_dup 1) (match_dup 2)) + (parallel [(set (match_dup 0) (lshiftrt:SI (match_dup 2) (match_dup 4))) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (match_dup 1) + (zero_extend:DI (and:SI (match_dup 1) (match_dup 5)))) + (clobber (reg:CC FLAGS_REG))])] +{ + int v = exact_log2 (UINTVAL (operands[3])); + operands[4] = GEN_INT (v); + operands[5] = GEN_INT ((HOST_WIDE_INT_1U << v) - 1); +} + [(set_attr "type" "multi") + (set_attr "mode" "SI")]) + (define_insn "*udivmod4_noext" [(set (match_operand:SWIM248 0 "register_operand" "=a") (udiv:SWIM248 (match_operand:SWIM248 2 "register_operand" "0") @@ -7890,6 +8268,34 @@ [(set_attr "type" "idiv") (set_attr "mode" "")]) +(define_insn "*udivmodsi4_noext_zext_1" + [(set (match_operand:DI 0 "register_operand" "=a") + (zero_extend:DI + (udiv:SI (match_operand:SI 2 "register_operand" "0") + (match_operand:SI 3 "nonimmediate_operand" "rm")))) + (set (match_operand:SI 1 "register_operand" "=d") + (umod:SI (match_dup 2) (match_dup 3))) + (use (match_operand:SI 4 "register_operand" "1")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "div{l}\t%3" + [(set_attr "type" "idiv") + (set_attr "mode" "SI")]) + +(define_insn "*udivmodsi4_noext_zext_2" + [(set (match_operand:DI 1 "register_operand" "=d") + (zero_extend:DI + (umod:SI (match_operand:SI 2 "register_operand" "0") + (match_operand:SI 3 "nonimmediate_operand" "rm")))) + (set (match_operand:SI 0 "register_operand" "=a") + (udiv:SI (match_dup 2) (match_dup 3))) + (use (match_operand:SI 4 "register_operand" "1")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "div{l}\t%3" + [(set_attr "type" "idiv") + (set_attr "mode" "SI")]) + (define_expand "udivmodqi4" [(parallel [(set (match_operand:QI 0 "register_operand") (udiv:QI diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index e5c66005808..ebcfb1b5181 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2017-09-30 Jakub Jelinek + + PR target/82361 + * gcc.target/i386/pr82361-1.c: New test. + * gcc.target/i386/pr82361-2.c: New test. + 2017-09-29 Yury Gribov PR middle-end/82319 diff --git a/gcc/testsuite/gcc.target/i386/pr82361-1.c b/gcc/testsuite/gcc.target/i386/pr82361-1.c new file mode 100644 index 00000000000..fbef3c928ad --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr82361-1.c @@ -0,0 +1,53 @@ +/* PR target/82361 */ +/* { dg-do compile { target lp64 } } */ +/* { dg-options "-O2 -mtune=generic -masm=att -mno-8bit-idiv" } */ +/* We should be able to optimize all %eax to %rax zero extensions, because + div and idiv instructions with 32-bit operands zero-extend both results. */ +/* { dg-final { scan-assembler-not "movl\t%eax, %eax" } } */ +/* FIXME: We are still not able to optimize the modulo in f1/f2, only manage + one. */ +/* { dg-final { scan-assembler-times "movl\t%edx, %edx" 2 } } */ + +void +f1 (unsigned int a, unsigned int b) +{ + unsigned long long c = a / b; + unsigned long long d = a % b; + asm volatile ("" : : "r" (c), "r" (d)); +} + +void +f2 (int a, int b) +{ + unsigned long long c = (unsigned int) (a / b); + unsigned long long d = (unsigned int) (a % b); + asm volatile ("" : : "r" (c), "r" (d)); +} + +void +f3 (unsigned int a, unsigned int b) +{ + unsigned long long c = a / b; + asm volatile ("" : : "r" (c)); +} + +void +f4 (int a, int b) +{ + unsigned long long c = (unsigned int) (a / b); + asm volatile ("" : : "r" (c)); +} + +void +f5 (unsigned int a, unsigned int b) +{ + unsigned long long d = a % b; + asm volatile ("" : : "r" (d)); +} + +void +f6 (int a, int b) +{ + unsigned long long d = (unsigned int) (a % b); + asm volatile ("" : : "r" (d)); +} diff --git a/gcc/testsuite/gcc.target/i386/pr82361-2.c b/gcc/testsuite/gcc.target/i386/pr82361-2.c new file mode 100644 index 00000000000..c1e484d6e11 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr82361-2.c @@ -0,0 +1,10 @@ +/* PR target/82361 */ +/* { dg-do compile { target lp64 } } */ +/* { dg-options "-O2 -mtune=generic -masm=att -m8bit-idiv" } */ +/* We should be able to optimize all %eax to %rax zero extensions, because + div and idiv instructions with 32-bit operands zero-extend both results. */ +/* { dg-final { scan-assembler-not "movl\t%eax, %eax" } } */ +/* Ditto %edx to %rdx zero extensions. */ +/* { dg-final { scan-assembler-not "movl\t%edx, %edx" } } */ + +#include "pr82361-1.c" -- 2.30.2