From 5c5218ea6163f694a256562df1d73a108396e40d Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 19 Mar 2013 15:28:11 -0700 Subject: [PATCH] i965/fs: Switch shader_time writes to using GRFs. This avoids conflicts between shader_time and FB writes, so we can include more of the program under our profiling. This does mean hiding more of the message setup from the optimizer, which doesn't have a way to handle multi-reg sends from GRFs. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_eu.h | 2 +- src/mesa/drivers/dri/i965/brw_eu_emit.c | 6 ++-- src/mesa/drivers/dri/i965/brw_fs.cpp | 29 ++++++++-------- src/mesa/drivers/dri/i965/brw_fs.h | 5 +++ src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 37 ++++++++++++++++++++- src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 3 +- 6 files changed, 63 insertions(+), 19 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 60ce231bac5..6ef1f83b5ca 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -305,7 +305,7 @@ void brw_oword_block_write_scratch(struct brw_compile *p, GLuint offset); void brw_shader_time_add(struct brw_compile *p, - int mrf, + struct brw_reg payload, uint32_t surf_index); /* If/else/endif. Works by manipulating the execution flags on each diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 23556264269..2578bf89cca 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -2436,7 +2436,7 @@ brw_svb_write(struct brw_compile *p, * messages. */ void brw_shader_time_add(struct brw_compile *p, - int base_mrf, + struct brw_reg payload, uint32_t surf_index) { struct intel_context *intel = &p->brw->intel; @@ -2453,8 +2453,8 @@ void brw_shader_time_add(struct brw_compile *p, */ brw_set_dest(p, send, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_NULL, 0)); - brw_set_src0(p, send, brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, - base_mrf, 0)); + brw_set_src0(p, send, brw_vec1_reg(payload.file, + payload.nr, 0)); uint32_t sfid, msg_type; if (intel->is_haswell) { diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 01a1ec06ac0..9ea7339f581 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -377,6 +377,7 @@ bool fs_inst::is_send_from_grf() { return (opcode == FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7 || + opcode == SHADER_OPCODE_SHADER_TIME_ADD || (opcode == FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD && src[1].file == GRF)); } @@ -607,19 +608,16 @@ fs_visitor::emit_shader_time_write(enum shader_time_shader_type type, { int shader_time_index = brw_get_shader_time_index(brw, prog, &fp->Base, type); - int base_mrf = 6; + fs_reg offset = fs_reg(shader_time_index * SHADER_TIME_STRIDE); - fs_reg offset_mrf = fs_reg(MRF, base_mrf); - offset_mrf.type = BRW_REGISTER_TYPE_UD; - emit(MOV(offset_mrf, fs_reg(shader_time_index * SHADER_TIME_STRIDE))); - - fs_reg time_mrf = fs_reg(MRF, base_mrf + 1); - time_mrf.type = BRW_REGISTER_TYPE_UD; - emit(MOV(time_mrf, value)); + fs_reg payload; + if (dispatch_width == 8) + payload = fs_reg(this, glsl_type::uvec2_type); + else + payload = fs_reg(this, glsl_type::uint_type); - fs_inst *inst = emit(fs_inst(SHADER_OPCODE_SHADER_TIME_ADD)); - inst->base_mrf = base_mrf; - inst->mlen = 2; + emit(fs_inst(SHADER_OPCODE_SHADER_TIME_ADD, + fs_reg(), payload, offset, value)); } void @@ -735,8 +733,6 @@ fs_visitor::implied_mrf_writes(fs_inst *inst) case SHADER_OPCODE_TXL: case SHADER_OPCODE_TXS: return 1; - case SHADER_OPCODE_SHADER_TIME_ADD: - return 0; case FS_OPCODE_FB_WRITE: return 2; case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: @@ -1359,6 +1355,13 @@ fs_visitor::split_virtual_grfs() if (inst->regs_written() > 1) { split_grf[inst->dst.reg] = false; } + + /* If we're sending from a GRF, don't split it, on the assumption that + * the send is reading the whole thing. + */ + if (inst->is_send_from_grf()) { + split_grf[inst->src[0].reg] = false; + } } /* Allocate new space for split regs. Note that the virtual diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 1fabec45cf8..d9d17a2520e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -559,6 +559,11 @@ private: struct brw_reg dst, struct brw_reg src); + void generate_shader_time_add(fs_inst *inst, + struct brw_reg payload, + struct brw_reg offset, + struct brw_reg value); + void patch_discard_jumps_to_fb_writes(); struct brw_context *brw; diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index 712fef6e093..5db481c60ee 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -971,6 +971,41 @@ fs_generator::generate_unpack_half_2x16_split(fs_inst *inst, brw_F16TO32(p, dst, src_w); } +void +fs_generator::generate_shader_time_add(fs_inst *inst, + struct brw_reg payload, + struct brw_reg offset, + struct brw_reg value) +{ + assert(intel->gen >= 7); + brw_push_insn_state(p); + brw_set_mask_control(p, true); + + assert(payload.file == BRW_GENERAL_REGISTER_FILE); + struct brw_reg payload_offset = retype(brw_vec1_grf(payload.nr, 0), + offset.type); + struct brw_reg payload_value = retype(brw_vec1_grf(payload.nr + 1, 0), + value.type); + + assert(offset.file == BRW_IMMEDIATE_VALUE); + if (value.file == BRW_GENERAL_REGISTER_FILE) { + value.width = BRW_WIDTH_1; + value.hstride = BRW_HORIZONTAL_STRIDE_0; + value.vstride = BRW_VERTICAL_STRIDE_0; + } else { + assert(value.file == BRW_IMMEDIATE_VALUE); + } + + /* Trying to deal with setup of the params from the IR is crazy in the FS8 + * case, and we don't really care about squeezing every bit of performance + * out of this path, so we just emit the MOVs from here. + */ + brw_MOV(p, payload_offset, offset); + brw_MOV(p, payload_value, value); + brw_shader_time_add(p, payload, SURF_INDEX_WM_SHADER_TIME); + brw_pop_insn_state(p); +} + void fs_generator::generate_code(exec_list *instructions) { @@ -1291,7 +1326,7 @@ fs_generator::generate_code(exec_list *instructions) break; case SHADER_OPCODE_SHADER_TIME_ADD: - brw_shader_time_add(p, inst->base_mrf, SURF_INDEX_WM_SHADER_TIME); + generate_shader_time_add(inst, src[0], src[1], src[2]); break; case FS_OPCODE_SET_SIMD4X2_OFFSET: diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index b73711cf7ad..54f3efd08ec 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -674,7 +674,8 @@ vec4_generator::generate_vs_instruction(vec4_instruction *instruction, break; case SHADER_OPCODE_SHADER_TIME_ADD: - brw_shader_time_add(p, inst->base_mrf, SURF_INDEX_VS_SHADER_TIME); + brw_shader_time_add(p, brw_message_reg(inst->base_mrf), + SURF_INDEX_VS_SHADER_TIME); break; default: -- 2.30.2