From d5f7aebac2b1afbc5023cd114174860d8d763d06 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 4 Apr 2013 14:10:18 -0700 Subject: [PATCH] i965/vs: Use GRFs for pull constant offsets on gen7. This allows the computation of the offset to get written directly into the message source. shader-db results: total instructions in shared programs: 3308390 -> 3283025 (-0.77%) instructions in affected programs: 442998 -> 417633 (-5.73%) No difference in GLB2.7 low res (n=9). Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_defines.h | 1 + src/mesa/drivers/dri/i965/brw_shader.cpp | 2 + src/mesa/drivers/dri/i965/brw_vec4.cpp | 8 +++- src/mesa/drivers/dri/i965/brw_vec4.h | 4 ++ src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 45 ++++++++++++------- .../drivers/dri/i965/brw_vec4_visitor.cpp | 18 ++++++-- 6 files changed, 56 insertions(+), 22 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 3d07c36b747..a13f9dc57ab 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -739,6 +739,7 @@ enum opcode { VS_OPCODE_SCRATCH_READ, VS_OPCODE_SCRATCH_WRITE, VS_OPCODE_PULL_CONSTANT_LOAD, + VS_OPCODE_PULL_CONSTANT_LOAD_GEN7, }; #define BRW_PREDICATE_NONE 0 diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 1a520391ec1..b3bd1b97667 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -498,6 +498,8 @@ brw_instruction_name(enum opcode op) return "scratch_write"; case VS_OPCODE_PULL_CONSTANT_LOAD: return "pull_constant_load"; + case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: + return "pull_constant_load_gen7"; default: /* Yes, this leaks. It's in debug code, it should never occur, and if diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index e470ac80ab5..67dd17a45e3 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -230,7 +230,13 @@ vec4_instruction::is_math() bool vec4_instruction::is_send_from_grf() { - return opcode == SHADER_OPCODE_SHADER_TIME_ADD; + switch (opcode) { + case SHADER_OPCODE_SHADER_TIME_ADD: + case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: + return true; + default: + return false; + } } bool diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 8f130e15428..e286925a0ea 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -544,6 +544,10 @@ private: struct brw_reg dst, struct brw_reg index, struct brw_reg offset); + void generate_pull_constant_load_gen7(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg surf_index, + struct brw_reg offset); struct brw_context *brw; struct intel_context *intel; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index e378f7fd5f0..963901c029a 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -558,27 +558,11 @@ vec4_generator::generate_pull_constant_load(vec4_instruction *inst, struct brw_reg index, struct brw_reg offset) { + assert(intel->gen <= 7); assert(index.file == BRW_IMMEDIATE_VALUE && index.type == BRW_REGISTER_TYPE_UD); uint32_t surf_index = index.dw1.ud; - if (intel->gen == 7) { - gen6_resolve_implied_move(p, &offset, inst->base_mrf); - brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND); - brw_set_dest(p, insn, dst); - brw_set_src0(p, insn, offset); - brw_set_sampler_message(p, insn, - surf_index, - 0, /* LD message ignores sampler unit */ - GEN5_SAMPLER_MESSAGE_SAMPLE_LD, - 1, /* rlen */ - 1, /* mlen */ - false, /* no header */ - BRW_SAMPLER_SIMD_MODE_SIMD4X2, - 0); - return; - } - struct brw_reg header = brw_vec8_grf(0, 0); gen6_resolve_implied_move(p, &header, inst->base_mrf); @@ -613,6 +597,29 @@ vec4_generator::generate_pull_constant_load(vec4_instruction *inst, 1 /* rlen */); } +void +vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg surf_index, + struct brw_reg offset) +{ + assert(surf_index.file == BRW_IMMEDIATE_VALUE && + surf_index.type == BRW_REGISTER_TYPE_UD); + + brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND); + brw_set_dest(p, insn, dst); + brw_set_src0(p, insn, offset); + brw_set_sampler_message(p, insn, + surf_index.dw1.ud, + 0, /* LD message ignores sampler unit */ + GEN5_SAMPLER_MESSAGE_SAMPLE_LD, + 1, /* rlen */ + 1, /* mlen */ + false, /* no header */ + BRW_SAMPLER_SIMD_MODE_SIMD4X2, + 0); +} + void vec4_generator::generate_vs_instruction(vec4_instruction *instruction, struct brw_reg dst, @@ -673,6 +680,10 @@ vec4_generator::generate_vs_instruction(vec4_instruction *instruction, generate_pull_constant_load(inst, dst, src[0], src[1]); break; + case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: + generate_pull_constant_load_gen7(inst, dst, src[0], src[1]); + break; + case SHADER_OPCODE_SHADER_TIME_ADD: brw_shader_time_add(p, src[0], SURF_INDEX_VS_SHADER_TIME); break; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index ca1cfe8b879..ed8e65deb33 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -2880,10 +2880,20 @@ vec4_visitor::emit_pull_constant_load(vec4_instruction *inst, src_reg offset = get_pull_constant_offset(inst, orig_src.reladdr, reg_offset); vec4_instruction *load; - load = new(mem_ctx) vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD, - temp, index, offset); - load->base_mrf = 14; - load->mlen = 1; + if (intel->gen >= 7) { + dst_reg grf_offset = dst_reg(this, glsl_type::int_type); + grf_offset.type = offset.type; + emit_before(inst, MOV(grf_offset, offset)); + + load = new(mem_ctx) vec4_instruction(this, + VS_OPCODE_PULL_CONSTANT_LOAD_GEN7, + temp, index, src_reg(grf_offset)); + } else { + load = new(mem_ctx) vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD, + temp, index, offset); + load->base_mrf = 14; + load->mlen = 1; + } emit_before(inst, load); } -- 2.30.2