From 7642c1de6b65b7dfd9e39904291cc9737cd54b56 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 11 Aug 2011 08:09:10 -0700 Subject: [PATCH] i965/vs: Avoid generating a MOV for most ir_assignment handling. Removes an average of 11.5% of instructions in 54% of vertex shaders in shader-db. --- src/mesa/drivers/dri/i965/brw_vec4.h | 6 ++ .../drivers/dri/i965/brw_vec4_visitor.cpp | 67 +++++++++++++++++++ 2 files changed, 73 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 082021513d2..620b05570a6 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -388,6 +388,12 @@ public: vec4_instruction *emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1, src_reg src2); + bool try_rewrite_rhs_to_dst(ir_assignment *ir, + dst_reg dst, + src_reg src, + vec4_instruction *pre_rhs_inst, + vec4_instruction *last_rhs_inst); + /** Walks an exec_list of ir_instruction and sends it through this visitor. */ void visit_instructions(const exec_list *list); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index b1792a8ee16..ae733810757 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1342,6 +1342,63 @@ vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src, src->reg_offset++; } + +/* If the RHS processing resulted in an instruction generating a + * temporary value, and it would be easy to rewrite the instruction to + * generate its result right into the LHS instead, do so. This ends + * up reliably removing instructions where it can be tricky to do so + * later without real UD chain information. + */ +bool +vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir, + dst_reg dst, + src_reg src, + vec4_instruction *pre_rhs_inst, + vec4_instruction *last_rhs_inst) +{ + /* This could be supported, but it would take more smarts. */ + if (ir->condition) + return false; + + if (pre_rhs_inst == last_rhs_inst) + return false; /* No instructions generated to work with. */ + + /* Make sure the last instruction generated our source reg. */ + if (src.file != GRF || + src.file != last_rhs_inst->dst.file || + src.reg != last_rhs_inst->dst.reg || + src.reg_offset != last_rhs_inst->dst.reg_offset || + src.reladdr || + src.abs || + src.negate || + last_rhs_inst->predicate != BRW_PREDICATE_NONE) + return false; + + /* Check that that last instruction fully initialized the channels + * we want to use, in the order we want to use them. We could + * potentially reswizzle the operands of many instructions so that + * we could handle out of order channels, but don't yet. + */ + for (int i = 0; i < 4; i++) { + if (dst.writemask & (1 << i)) { + if (!(last_rhs_inst->dst.writemask & (1 << i))) + return false; + + if (BRW_GET_SWZ(src.swizzle, i) != i) + return false; + } + } + + /* Success! Rewrite the instruction. */ + last_rhs_inst->dst.file = dst.file; + last_rhs_inst->dst.reg = dst.reg; + last_rhs_inst->dst.reg_offset = dst.reg_offset; + last_rhs_inst->dst.reladdr = dst.reladdr; + last_rhs_inst->dst.writemask &= dst.writemask; + + return true; +} + void vec4_visitor::visit(ir_assignment *ir) { @@ -1363,7 +1420,13 @@ vec4_visitor::visit(ir_assignment *ir) /* Now we're down to just a scalar/vector with writemasks. */ int i; + vec4_instruction *pre_rhs_inst, *last_rhs_inst; + pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail(); + ir->rhs->accept(this); + + last_rhs_inst = (vec4_instruction *)this->instructions.get_tail(); + src_reg src = this->result; int swizzles[4]; @@ -1396,6 +1459,10 @@ vec4_visitor::visit(ir_assignment *ir) src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1], swizzles[2], swizzles[3]); + if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) { + return; + } + if (ir->condition) { emit_bool_to_cond_code(ir->condition); } -- 2.30.2