From: Ian Romanick Date: Wed, 11 Jun 2014 01:07:50 +0000 (-0700) Subject: i965/vec4: Emit smarter code for b2f of a comparison X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=63117ac32948310c87e30f67b475a07f98884633;p=mesa.git i965/vec4: Emit smarter code for b2f of a comparison Previously we would emit the comparison, emit an AND to mask off extra bits from the comparison result, then convert the result to float. Now, do the comparison, then use a cleverly constructed SEL to pick either 0.0f or 1.0f. No piglit regressions on Ivybridge. total instructions in shared programs: 1642311 -> 1639449 (-0.17%) instructions in affected programs: 136533 -> 133671 (-2.10%) GAINED: 0 LOST: 0 Programs that are affected appear to save between 1 and 5 instuctions (just by skimming the output from shader-db report.py. v2: s/b2i/b2f/ in commit subject (noticed by Chris Forbes). Remove extraneous fix_3src_operand (suggested by Matt). The latter change required swapping the order of the operands and using predicate_inverse. Signed-off-by: Ian Romanick Reviewed-by: Matt Turner --- diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 614ff0b7faa..20d717a77fb 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -587,6 +587,7 @@ public: bool try_emit_sat(ir_expression *ir); bool try_emit_mad(ir_expression *ir); + bool try_emit_b2f_of_compare(ir_expression *ir); void resolve_ud_negate(src_reg *reg); src_reg get_timestamp(); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 249072c64b7..7fd8c2b3761 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1126,6 +1126,48 @@ vec4_visitor::try_emit_mad(ir_expression *ir) return true; } +bool +vec4_visitor::try_emit_b2f_of_compare(ir_expression *ir) +{ + ir_expression *const cmp = ir->operands[0]->as_expression(); + + if (cmp == NULL) + return false; + + switch (cmp->operation) { + case ir_binop_less: + case ir_binop_greater: + case ir_binop_lequal: + case ir_binop_gequal: + case ir_binop_equal: + case ir_binop_nequal: + break; + + default: + return false; + } + + cmp->operands[0]->accept(this); + const src_reg cmp_src0 = this->result; + + cmp->operands[1]->accept(this); + const src_reg cmp_src1 = this->result; + + this->result = src_reg(this, ir->type); + + emit(CMP(dst_reg(this->result), cmp_src0, cmp_src1, + brw_conditional_for_comparison(cmp->operation))); + + /* If the comparison is false, this->result will just happen to be zero. + */ + vec4_instruction *const inst = emit(BRW_OPCODE_SEL, dst_reg(this->result), + this->result, src_reg(1.0f)); + inst->predicate = BRW_PREDICATE_NORMAL; + inst->predicate_inverse = true; + + return true; +} + void vec4_visitor::emit_bool_comparison(unsigned int op, dst_reg dst, src_reg src0, src_reg src1) @@ -1202,6 +1244,11 @@ vec4_visitor::visit(ir_expression *ir) return; } + if (ir->operation == ir_unop_b2f) { + if (try_emit_b2f_of_compare(ir)) + return; + } + for (operand = 0; operand < ir->get_num_operands(); operand++) { this->result.file = BAD_FILE; ir->operands[operand]->accept(this);