From 263a7e4cd992738814575b04d2de24ca0a0ad08a Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 6 Jun 2013 10:14:41 -0700 Subject: [PATCH] i965/vs: Use the MAD instruction when possible. This is different from how we do it in the FS - we are using MAD even when some of the args are constants, because with the relatively unrestrained ability to schedule a MOV to prepare a temporary with that data, we can get lower latency for the sequence of instructions. No significant performance difference on GLB2.7 trex (n=33/34), though it doesn't have that many MADs. I noticed MAD opportunities while reading the code for the DOTA2 bug. Reviewed-by: Kenneth Graunke Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_vec4.h | 1 + .../dri/i965/brw_vec4_copy_propagation.cpp | 1 + src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 4 ++ .../drivers/dri/i965/brw_vec4_visitor.cpp | 37 +++++++++++++++++++ 4 files changed, 43 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index e6e59bc9af9..a72d6941394 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -468,6 +468,7 @@ public: int base_offset); bool try_emit_sat(ir_expression *ir); + bool try_emit_mad(ir_expression *ir, int mul_arg); void resolve_ud_negate(src_reg *reg); src_reg get_timestamp(); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp index 39eef4b0d65..1a667ebf2b2 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp @@ -216,6 +216,7 @@ vec4_visitor::try_copy_propagation(struct intel_context *intel, return false; bool is_3src_inst = (inst->opcode == BRW_OPCODE_LRP || + inst->opcode == BRW_OPCODE_MAD || inst->opcode == BRW_OPCODE_BFE || inst->opcode == BRW_OPCODE_BFI2); if (is_3src_inst && value.file == UNIFORM) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index 91101f29b0d..fbb93db6cdd 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -772,6 +772,10 @@ vec4_generator::generate_code(exec_list *instructions) brw_set_acc_write_control(p, 0); break; + case BRW_OPCODE_MAD: + brw_MAD(p, dst, src[0], src[1], src[2]); + break; + case BRW_OPCODE_FRC: brw_FRC(p, dst, src[0]); break; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 33c1b2483c4..451f7d5991b 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1250,6 +1250,38 @@ vec4_visitor::try_emit_sat(ir_expression *ir) return true; } +bool +vec4_visitor::try_emit_mad(ir_expression *ir, int mul_arg) +{ + /* 3-src instructions were introduced in gen6. */ + if (intel->gen < 6) + return false; + + /* MAD can only handle floating-point data. */ + if (ir->type->base_type != GLSL_TYPE_FLOAT) + return false; + + ir_rvalue *nonmul = ir->operands[1 - mul_arg]; + ir_expression *mul = ir->operands[mul_arg]->as_expression(); + + if (!mul || mul->operation != ir_binop_mul) + return false; + + nonmul->accept(this); + src_reg src0 = fix_3src_operand(this->result); + + mul->operands[0]->accept(this); + src_reg src1 = fix_3src_operand(this->result); + + mul->operands[1]->accept(this); + src_reg src2 = fix_3src_operand(this->result); + + this->result = src_reg(this, ir->type); + emit(BRW_OPCODE_MAD, dst_reg(this->result), src0, src1, src2); + + return true; +} + void vec4_visitor::emit_bool_comparison(unsigned int op, dst_reg dst, src_reg src0, src_reg src1) @@ -1293,6 +1325,11 @@ vec4_visitor::visit(ir_expression *ir) if (try_emit_sat(ir)) return; + if (ir->operation == ir_binop_add) { + if (try_emit_mad(ir, 0) || try_emit_mad(ir, 1)) + return; + } + for (operand = 0; operand < ir->get_num_operands(); operand++) { this->result.file = BAD_FILE; ir->operands[operand]->accept(this); -- 2.30.2