From: Matt Turner Date: Thu, 11 Feb 2016 20:27:02 +0000 (-0800) Subject: i965: Lower min/max after optimization on Gen4/5. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=2f2c00c7279e7c43e520e21de1781f8cec263e92;p=mesa.git i965: Lower min/max after optimization on Gen4/5. Gen4/5's SEL instruction cannot use conditional modifiers, so min/max are implemented as CMP + SEL. Handling that after optimization lets us CSE more. On Ironlake: total instructions in shared programs: 6426035 -> 6422753 (-0.05%) instructions in affected programs: 326604 -> 323322 (-1.00%) helped: 1411 total cycles in shared programs: 129184700 -> 129101586 (-0.06%) cycles in affected programs: 18950290 -> 18867176 (-0.44%) helped: 2419 HURT: 328 Reviewed-by: Francisco Jerez --- diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 41b10a45105..b5060408d95 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -3476,6 +3476,36 @@ fs_visitor::lower_integer_multiplication() return progress; } +bool +fs_visitor::lower_minmax() +{ + assert(devinfo->gen < 6); + + bool progress = false; + + foreach_block_and_inst_safe(block, fs_inst, inst, cfg) { + const fs_builder ibld(this, block, inst); + + if (inst->opcode == BRW_OPCODE_SEL && + inst->predicate == BRW_PREDICATE_NONE) { + /* FIXME: Using CMP doesn't preserve the NaN propagation semantics of + * the original SEL.L/GE instruction + */ + ibld.CMP(ibld.null_reg_d(), inst->src[0], inst->src[1], + inst->conditional_mod); + inst->predicate = BRW_PREDICATE_NORMAL; + inst->conditional_mod = BRW_CONDITIONAL_NONE; + + progress = true; + } + } + + if (progress) + invalidate_live_intervals(); + + return progress; +} + static void setup_color_payload(const fs_builder &bld, const brw_wm_prog_key *key, fs_reg *dst, fs_reg color, unsigned components) @@ -5138,6 +5168,13 @@ fs_visitor::optimize() OPT(opt_combine_constants); OPT(lower_integer_multiplication); + if (devinfo->gen <= 5 && OPT(lower_minmax)) { + OPT(opt_cmod_propagation); + OPT(opt_cse); + OPT(opt_copy_propagate); + OPT(dead_code_eliminate); + } + lower_uniform_pull_constant_loads(); validate(); diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 7c3d85c2688..7446ca10cd1 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -175,6 +175,7 @@ public: bool lower_load_payload(); bool lower_logical_sends(); bool lower_integer_multiplication(); + bool lower_minmax(); bool lower_simd_width(); bool opt_combine_constants(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h b/src/mesa/drivers/dri/i965/brw_fs_builder.h index dd3c383a17d..bb94c3d7071 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_builder.h +++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h @@ -375,14 +375,8 @@ namespace brw { { assert(mod == BRW_CONDITIONAL_GE || mod == BRW_CONDITIONAL_L); - if (shader->devinfo->gen >= 6) { - set_condmod(mod, SEL(dst, fix_unsigned_negate(src0), - fix_unsigned_negate(src1))); - } else { - CMP(null_reg_d(), src0, src1, mod); - set_predicate(BRW_PREDICATE_NORMAL, - SEL(dst, src0, src1)); - } + set_condmod(mod, SEL(dst, fix_unsigned_negate(src0), + fix_unsigned_negate(src1))); } /** diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 1d5d316ace9..cf2e782c630 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -950,28 +950,16 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) case nir_op_fmin: case nir_op_imin: case nir_op_umin: - if (devinfo->gen >= 6) { - inst = bld.emit(BRW_OPCODE_SEL, result, op[0], op[1]); - inst->conditional_mod = BRW_CONDITIONAL_L; - } else { - bld.CMP(bld.null_reg_d(), op[0], op[1], BRW_CONDITIONAL_L); - inst = bld.SEL(result, op[0], op[1]); - inst->predicate = BRW_PREDICATE_NORMAL; - } + inst = bld.emit(BRW_OPCODE_SEL, result, op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_L; inst->saturate = instr->dest.saturate; break; case nir_op_fmax: case nir_op_imax: case nir_op_umax: - if (devinfo->gen >= 6) { - inst = bld.emit(BRW_OPCODE_SEL, result, op[0], op[1]); - inst->conditional_mod = BRW_CONDITIONAL_GE; - } else { - bld.CMP(bld.null_reg_d(), op[0], op[1], BRW_CONDITIONAL_GE); - inst = bld.SEL(result, op[0], op[1]); - inst->predicate = BRW_PREDICATE_NORMAL; - } + inst = bld.emit(BRW_OPCODE_SEL, result, op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_GE; inst->saturate = instr->dest.saturate; break; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index e6c580eb899..a826fce6d54 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -26,6 +26,7 @@ #include "brw_cfg.h" #include "brw_vs.h" #include "brw_nir.h" +#include "brw_vec4_builder.h" #include "brw_vec4_live_variables.h" #include "brw_dead_control_flow.h" #include "program/prog_parameter.h" @@ -1632,6 +1633,36 @@ vec4_vs_visitor::setup_payload(void) this->first_non_payload_grf = reg; } +bool +vec4_visitor::lower_minmax() +{ + assert(devinfo->gen < 6); + + bool progress = false; + + foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) { + const vec4_builder ibld(this, block, inst); + + if (inst->opcode == BRW_OPCODE_SEL && + inst->predicate == BRW_PREDICATE_NONE) { + /* FIXME: Using CMP doesn't preserve the NaN propagation semantics of + * the original SEL.L/GE instruction + */ + ibld.CMP(ibld.null_reg_d(), inst->src[0], inst->src[1], + inst->conditional_mod); + inst->predicate = BRW_PREDICATE_NORMAL; + inst->conditional_mod = BRW_CONDITIONAL_NONE; + + progress = true; + } + } + + if (progress) + invalidate_live_intervals(); + + return progress; +} + src_reg vec4_visitor::get_timestamp() { @@ -1904,6 +1935,13 @@ vec4_visitor::run() OPT(dead_code_eliminate); } + if (devinfo->gen <= 5 && OPT(lower_minmax)) { + OPT(opt_cmod_propagation); + OPT(opt_cse); + OPT(opt_copy_propagation); + OPT(dead_code_eliminate); + } + if (failed) return false; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 14a5f0e428f..633f13c2c97 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -307,6 +307,8 @@ public: void resolve_ud_negate(src_reg *reg); + bool lower_minmax(); + src_reg get_timestamp(); void dump_instruction(backend_instruction *inst); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_builder.h b/src/mesa/drivers/dri/i965/brw_vec4_builder.h index 5d4b4520322..a0b390b0fd5 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_builder.h +++ b/src/mesa/drivers/dri/i965/brw_vec4_builder.h @@ -303,14 +303,8 @@ namespace brw { emit_minmax(const dst_reg &dst, const src_reg &src0, const src_reg &src1, brw_conditional_mod mod) const { - if (shader->devinfo->gen >= 6) { - set_condmod(mod, SEL(dst, fix_unsigned_negate(src0), - fix_unsigned_negate(src1))); - } else { - CMP(null_reg_d(), src0, src1, mod); - set_predicate(BRW_PREDICATE_NORMAL, - SEL(dst, src0, src1)); - } + set_condmod(mod, SEL(dst, fix_unsigned_negate(src0), + fix_unsigned_negate(src1))); } /** diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 6a8c76c4ea0..cfd4d9b5d00 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -679,18 +679,8 @@ vec4_instruction * vec4_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst, src_reg src0, src_reg src1) { - vec4_instruction *inst; - - if (devinfo->gen >= 6) { - inst = emit(BRW_OPCODE_SEL, dst, src0, src1); - inst->conditional_mod = conditionalmod; - } else { - emit(CMP(dst, src0, src1, conditionalmod)); - - inst = emit(BRW_OPCODE_SEL, dst, src0, src1); - inst->predicate = BRW_PREDICATE_NORMAL; - } - + vec4_instruction *inst = emit(BRW_OPCODE_SEL, dst, src0, src1); + inst->conditional_mod = conditionalmod; return inst; }