From 0c1d87b0d7e2c9f1ae6e838a8fa7f074557e45f0 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Thu, 25 Apr 2013 11:03:38 -0700 Subject: [PATCH] i965/vs: Add support for LRP instruction. Only 13 affected programs in shader-db, but they were all helped. total instructions in shared programs: 368877 -> 368851 (-0.01%) instructions in affected programs: 1576 -> 1550 (-1.65%) Reviewed-by: Chris Forbes Reviewed-by: Eric Anholt --- src/mesa/drivers/dri/i965/brw_shader.cpp | 3 +-- src/mesa/drivers/dri/i965/brw_vec4.h | 1 + .../drivers/dri/i965/brw_vec4_copy_propagation.cpp | 3 +++ src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 4 ++++ src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 14 +++++++++++++- 5 files changed, 22 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index b3bd1b97667..5addff67318 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -152,8 +152,7 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) */ brw_lower_packing_builtins(brw, (gl_shader_type) stage, shader->ir); do_mat_op_to_vec(shader->ir); - const int lrp_to_arith = (intel->gen < 6 || stage != MESA_SHADER_FRAGMENT) - ? LRP_TO_ARITH : 0; + const int lrp_to_arith = intel->gen < 6 ? LRP_TO_ARITH : 0; lower_instructions(shader->ir, MOD_TO_FRACT | DIV_TO_MUL_RCP | diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index c28092244a4..d34ed35ebc6 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -386,6 +386,7 @@ public: vec4_instruction *PULL_CONSTANT_LOAD(dst_reg dst, src_reg index); vec4_instruction *SCRATCH_READ(dst_reg dst, src_reg index); vec4_instruction *SCRATCH_WRITE(dst_reg dst, src_reg src, src_reg index); + vec4_instruction *LRP(dst_reg dst, src_reg a, src_reg y, src_reg x); int implied_mrf_writes(vec4_instruction *inst); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp index 51ee4750490..f2c6cd60911 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp @@ -215,6 +215,9 @@ vec4_visitor::try_copy_propagation(struct intel_context *intel, if (has_source_modifiers && !can_do_source_mods(inst)) return false; + if (inst->opcode == BRW_OPCODE_LRP && value.file == UNIFORM) + return false; + /* We can't copy-propagate a UD negation into a condmod * instruction, because the condmod ends up looking at the 33-bit * signed accumulator value instead of the 32-bit value we wanted diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index c9963bff80a..96b4965045b 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -838,6 +838,10 @@ vec4_generator::generate_code(exec_list *instructions) brw_F16TO32(p, dst, src[0]); break; + case BRW_OPCODE_LRP: + brw_LRP(p, dst, src[0], src[1], src[2]); + break; + case BRW_OPCODE_IF: if (inst->src[0].file != BAD_FILE) { /* The instruction has an embedded compare (only allowed on gen6) */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 69e805d1e1a..88c435ca292 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -107,6 +107,14 @@ vec4_visitor::emit(enum opcode opcode) src0, src1); \ } +#define ALU3(op) \ + vec4_instruction * \ + vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1, src_reg src2)\ + { \ + return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \ + src0, src1, src2); \ + } + ALU1(NOT) ALU1(MOV) ALU1(FRC) @@ -127,6 +135,7 @@ ALU2(DPH) ALU2(SHL) ALU2(SHR) ALU2(ASR) +ALU3(LRP) /** Gen4 predicated IF. */ vec4_instruction * @@ -1619,7 +1628,10 @@ vec4_visitor::visit(ir_expression *ir) } case ir_triop_lrp: - assert(!"not reached: should be handled by lrp_to_arith"); + op[0] = fix_3src_operand(op[0]); + op[1] = fix_3src_operand(op[1]); + op[2] = fix_3src_operand(op[2]); + emit(LRP(result_dst, op[0], op[1], op[2])); break; case ir_quadop_vector: -- 2.30.2