From bb0f9c0249ee6a1b53e6ae8bdd9d3543991c7291 Mon Sep 17 00:00:00 2001 From: Segher Boessenkool Date: Fri, 9 Dec 2016 20:31:06 +0100 Subject: [PATCH] rs6000: clz/ctz/ffs improvement (PR78683) On CPUs that implement popcnt[wd] but not cnttz[wd] we can do better for the ctz sequences than we do today. CL[TZ]_DEFINED_VALUE_AT_ZERO can return 2, since we always return the same fixed value (only dependent on TARGET_* options). PR target/78683 * config/rs6000/rs6000.h (CLZ_DEFINED_VALUE_AT_ZERO): Use GET_MODE_BITSIZE. Return 2. (CTZ_DEFINED_VALUE_AT_ZERO): Use GET_MODE_BITSIZE. Return 2. Handle TARGET_POPCNTD the same as TARGET_CTZ. * config/rs6000/rs6000.md (ctz2): Reimplement. (ffs2): Reimplement. From-SVN: r243499 --- gcc/ChangeLog | 10 ++++++ gcc/config/rs6000/rs6000.h | 11 ++++--- gcc/config/rs6000/rs6000.md | 62 +++++++++++++++++++------------------ 3 files changed, 48 insertions(+), 35 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 96853f280a5..269f785d7a1 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,13 @@ +2016-12-09 Segher Boessenkool + + PR target/78683 + * config/rs6000/rs6000.h (CLZ_DEFINED_VALUE_AT_ZERO): Use + GET_MODE_BITSIZE. Return 2. + (CTZ_DEFINED_VALUE_AT_ZERO): Use GET_MODE_BITSIZE. Return 2. Handle + TARGET_POPCNTD the same as TARGET_CTZ. + * config/rs6000/rs6000.md (ctz2): Reimplement. + (ffs2): Reimplement. + 2016-12-09 Andre Vieira PR rtl-optimization/78255 diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index 5d56927f6f7..fe314bff8ca 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -2199,14 +2199,15 @@ do { \ /* The cntlzw and cntlzd instructions return 32 and 64 for input of zero. */ #define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ - ((VALUE) = ((MODE) == SImode ? 32 : 64), 1) + ((VALUE) = GET_MODE_BITSIZE (MODE), 2) /* The CTZ patterns that are implemented in terms of CLZ return -1 for input of - zero. The hardware instructions added in Power9 return 32 or 64. */ + zero. The hardware instructions added in Power9 and the sequences using + popcount return 32 or 64. */ #define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ - ((!TARGET_CTZ) \ - ? ((VALUE) = -1, 1) \ - : ((VALUE) = ((MODE) == SImode ? 32 : 64), 1)) + (TARGET_CTZ || TARGET_POPCNTD \ + ? ((VALUE) = GET_MODE_BITSIZE (MODE), 2) \ + : ((VALUE) = -1, 2)) /* Specify the machine mode that pointers have. After generation of rtl, the compiler makes no further distinction diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 4726d73d573..777b996991b 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -2220,17 +2220,8 @@ [(set_attr "type" "cntlz")]) (define_expand "ctz2" - [(set (match_dup 2) - (neg:GPR (match_operand:GPR 1 "gpc_reg_operand" ""))) - (set (match_dup 3) - (and:GPR (match_dup 1) - (match_dup 2))) - (set (match_dup 4) - (clz:GPR (match_dup 3))) - (parallel [(set (match_operand:GPR 0 "gpc_reg_operand" "") - (minus:GPR (match_dup 5) - (match_dup 4))) - (clobber (reg:GPR CA_REGNO))])] + [(set (match_operand:GPR 0 "gpc_reg_operand") + (ctz:GPR (match_operand:GPR 1 "gpc_reg_operand")))] "" { if (TARGET_CTZ) @@ -2239,10 +2230,26 @@ DONE; } - operands[2] = gen_reg_rtx (mode); - operands[3] = gen_reg_rtx (mode); - operands[4] = gen_reg_rtx (mode); - operands[5] = GEN_INT (GET_MODE_BITSIZE (mode) - 1); + rtx tmp1 = gen_reg_rtx (mode); + rtx tmp2 = gen_reg_rtx (mode); + rtx tmp3 = gen_reg_rtx (mode); + + if (TARGET_POPCNTD) + { + emit_insn (gen_add3 (tmp1, operands[1], constm1_rtx)); + emit_insn (gen_one_cmpl2 (tmp2, operands[1])); + emit_insn (gen_and3 (tmp3, tmp1, tmp2)); + emit_insn (gen_popcntd2 (operands[0], tmp3)); + } + else + { + emit_insn (gen_neg2 (tmp1, operands[1])); + emit_insn (gen_and3 (tmp2, operands[1], tmp1)); + emit_insn (gen_clz2 (tmp3, tmp2)); + emit_insn (gen_sub3 (operands[0], GEN_INT ( - 1), tmp3)); + } + + DONE; }) (define_insn "ctz2_hw" @@ -2253,23 +2260,18 @@ [(set_attr "type" "cntlz")]) (define_expand "ffs2" - [(set (match_dup 2) - (neg:GPR (match_operand:GPR 1 "gpc_reg_operand" ""))) - (set (match_dup 3) - (and:GPR (match_dup 1) - (match_dup 2))) - (set (match_dup 4) - (clz:GPR (match_dup 3))) - (parallel [(set (match_operand:GPR 0 "gpc_reg_operand" "") - (minus:GPR (match_dup 5) - (match_dup 4))) - (clobber (reg:GPR CA_REGNO))])] + [(set (match_operand:GPR 0 "gpc_reg_operand") + (ffs:GPR (match_operand:GPR 1 "gpc_reg_operand")))] "" { - operands[2] = gen_reg_rtx (mode); - operands[3] = gen_reg_rtx (mode); - operands[4] = gen_reg_rtx (mode); - operands[5] = GEN_INT (GET_MODE_BITSIZE (mode)); + rtx tmp1 = gen_reg_rtx (mode); + rtx tmp2 = gen_reg_rtx (mode); + rtx tmp3 = gen_reg_rtx (mode); + emit_insn (gen_neg2 (tmp1, operands[1])); + emit_insn (gen_and3 (tmp2, operands[1], tmp1)); + emit_insn (gen_clz2 (tmp3, tmp2)); + emit_insn (gen_sub3 (operands[0], GEN_INT (), tmp3)); + DONE; }) -- 2.30.2