From c7893dc3c590b86787d8118e3920debaea3f16da Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 19 Jun 2015 14:46:03 -0700 Subject: [PATCH] i965: Use a single index per shader for shader_time. Previously, each shader took 3 shader time indices which were potentially at arbirary points in the shader time buffer. Now, each shader gets a single index which refers to 3 consecutive locations in the buffer. This simplifies some of the logic at the cost of having a magic 3 a few places. Reviewed-by: Kenneth Graunke Reviewed-by: Chris Forbes --- src/mesa/drivers/dri/i965/brw_context.h | 14 +--- src/mesa/drivers/dri/i965/brw_fs.cpp | 28 +++----- src/mesa/drivers/dri/i965/brw_fs.h | 3 +- src/mesa/drivers/dri/i965/brw_program.c | 67 ++++++------------- src/mesa/drivers/dri/i965/brw_vec4.cpp | 18 ++--- src/mesa/drivers/dri/i965/brw_vec4.h | 10 +-- .../drivers/dri/i965/brw_vec4_gs_visitor.cpp | 3 +- .../drivers/dri/i965/brw_vec4_visitor.cpp | 8 +-- .../drivers/dri/i965/brw_vec4_vs_visitor.cpp | 2 +- .../dri/i965/test_vec4_copy_propagation.cpp | 3 +- .../dri/i965/test_vec4_register_coalesce.cpp | 3 +- 11 files changed, 55 insertions(+), 104 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index d8fcfffcd10..a7d83f8d7b4 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -821,20 +821,10 @@ struct brw_tracked_state { enum shader_time_shader_type { ST_NONE, ST_VS, - ST_VS_WRITTEN, - ST_VS_RESET, ST_GS, - ST_GS_WRITTEN, - ST_GS_RESET, ST_FS8, - ST_FS8_WRITTEN, - ST_FS8_RESET, ST_FS16, - ST_FS16_WRITTEN, - ST_FS16_RESET, ST_CS, - ST_CS_WRITTEN, - ST_CS_RESET, }; struct brw_vertex_buffer { @@ -979,6 +969,8 @@ enum brw_predicate_state { BRW_PREDICATE_STATE_USE_BIT }; +struct shader_times; + /** * brw_context is derived from gl_context. */ @@ -1503,7 +1495,7 @@ struct brw_context const char **names; int *ids; enum shader_time_shader_type *types; - uint64_t *cumulative; + struct shader_times *cumulative; int num_entries; int max_entries; double report_time; diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 0c11a9ef65f..2839b9f7b89 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -578,38 +578,30 @@ fs_visitor::emit_shader_time_begin() void fs_visitor::emit_shader_time_end() { - enum shader_time_shader_type type, written_type, reset_type; + enum shader_time_shader_type type; switch (stage) { case MESA_SHADER_VERTEX: type = ST_VS; - written_type = ST_VS_WRITTEN; - reset_type = ST_VS_RESET; break; case MESA_SHADER_GEOMETRY: type = ST_GS; - written_type = ST_GS_WRITTEN; - reset_type = ST_GS_RESET; break; case MESA_SHADER_FRAGMENT: if (dispatch_width == 8) { type = ST_FS8; - written_type = ST_FS8_WRITTEN; - reset_type = ST_FS8_RESET; } else { assert(dispatch_width == 16); type = ST_FS16; - written_type = ST_FS16_WRITTEN; - reset_type = ST_FS16_RESET; } break; case MESA_SHADER_COMPUTE: type = ST_CS; - written_type = ST_CS_WRITTEN; - reset_type = ST_CS_RESET; break; default: unreachable("fs_visitor::emit_shader_time_end missing code"); } + int shader_time_index = brw_get_shader_time_index(brw, shader_prog, prog, + type); /* Insert our code just before the final SEND with EOT. */ exec_node *end = this->instructions.get_tail(); @@ -639,20 +631,20 @@ fs_visitor::emit_shader_time_end() * trying to determine the time taken for single instructions. */ ibld.ADD(diff, diff, fs_reg(-2u)); - SHADER_TIME_ADD(ibld, type, diff); - SHADER_TIME_ADD(ibld, written_type, fs_reg(1u)); + SHADER_TIME_ADD(ibld, shader_time_index, 0, diff); + SHADER_TIME_ADD(ibld, shader_time_index, 1, fs_reg(1u)); ibld.emit(BRW_OPCODE_ELSE); - SHADER_TIME_ADD(ibld, reset_type, fs_reg(1u)); + SHADER_TIME_ADD(ibld, shader_time_index, 2, fs_reg(1u)); ibld.emit(BRW_OPCODE_ENDIF); } void fs_visitor::SHADER_TIME_ADD(const fs_builder &bld, - enum shader_time_shader_type type, fs_reg value) + int shader_time_index, int shader_time_subindex, + fs_reg value) { - int shader_time_index = - brw_get_shader_time_index(brw, shader_prog, prog, type); - fs_reg offset = fs_reg(shader_time_index * SHADER_TIME_STRIDE); + int index = shader_time_index * 3 + shader_time_subindex; + fs_reg offset = fs_reg(index * SHADER_TIME_STRIDE); fs_reg payload; if (dispatch_width == 8) diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index cffedc0c6d1..55a97228bb4 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -278,7 +278,8 @@ public: void emit_shader_time_begin(); void emit_shader_time_end(); void SHADER_TIME_ADD(const brw::fs_builder &bld, - enum shader_time_shader_type type, fs_reg value); + int shader_time_index, int shader_time_subindex, + fs_reg value); void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index, fs_reg dst, fs_reg offset, fs_reg src0, diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 414eab9c002..2327af77ad3 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -287,18 +287,24 @@ void brwInitFragProgFuncs( struct dd_function_table *functions ) functions->MemoryBarrier = brw_memory_barrier; } +struct shader_times { + uint64_t time; + uint64_t written; + uint64_t reset; +}; + void brw_init_shader_time(struct brw_context *brw) { - const int max_entries = 4096; - brw->shader_time.bo = drm_intel_bo_alloc(brw->bufmgr, "shader time", - max_entries * SHADER_TIME_STRIDE, - 4096); + const int max_entries = 2048; + brw->shader_time.bo = + drm_intel_bo_alloc(brw->bufmgr, "shader time", + max_entries * SHADER_TIME_STRIDE * 3, 4096); brw->shader_time.names = rzalloc_array(brw, const char *, max_entries); brw->shader_time.ids = rzalloc_array(brw, int, max_entries); brw->shader_time.types = rzalloc_array(brw, enum shader_time_shader_type, max_entries); - brw->shader_time.cumulative = rzalloc_array(brw, uint64_t, + brw->shader_time.cumulative = rzalloc_array(brw, struct shader_times, max_entries); brw->shader_time.max_entries = max_entries; } @@ -318,27 +324,6 @@ compare_time(const void *a, const void *b) return 1; } -static void -get_written_and_reset(struct brw_context *brw, int i, - uint64_t *written, uint64_t *reset) -{ - enum shader_time_shader_type type = brw->shader_time.types[i]; - assert(type == ST_VS || type == ST_GS || type == ST_FS8 || - type == ST_FS16 || type == ST_CS); - - /* Find where we recorded written and reset. */ - int wi, ri; - - for (wi = i; brw->shader_time.types[wi] != type + 1; wi++) - ; - - for (ri = i; brw->shader_time.types[ri] != type + 2; ri++) - ; - - *written = brw->shader_time.cumulative[wi]; - *reset = brw->shader_time.cumulative[ri]; -} - static void print_shader_time_line(const char *stage, const char *name, int shader_num, uint64_t time, uint64_t total) @@ -374,26 +359,13 @@ brw_report_shader_time(struct brw_context *brw) sorted[i] = &scaled[i]; switch (type) { - case ST_VS_WRITTEN: - case ST_VS_RESET: - case ST_GS_WRITTEN: - case ST_GS_RESET: - case ST_FS8_WRITTEN: - case ST_FS8_RESET: - case ST_FS16_WRITTEN: - case ST_FS16_RESET: - case ST_CS_WRITTEN: - case ST_CS_RESET: - /* We'll handle these when along with the time. */ - scaled[i] = 0; - continue; - case ST_VS: case ST_GS: case ST_FS8: case ST_FS16: case ST_CS: - get_written_and_reset(brw, i, &written, &reset); + written = brw->shader_time.cumulative[i].written; + reset = brw->shader_time.cumulative[i].reset; break; default: @@ -405,7 +377,7 @@ brw_report_shader_time(struct brw_context *brw) break; } - uint64_t time = brw->shader_time.cumulative[i]; + uint64_t time = brw->shader_time.cumulative[i].time; if (written) { scaled[i] = time / written * (written + reset); } else { @@ -491,16 +463,19 @@ brw_collect_shader_time(struct brw_context *brw) * overhead compared to the cost of tracking the time in the first place. */ drm_intel_bo_map(brw->shader_time.bo, true); - - uint32_t *times = brw->shader_time.bo->virtual; + void *bo_map = brw->shader_time.bo->virtual; for (int i = 0; i < brw->shader_time.num_entries; i++) { - brw->shader_time.cumulative[i] += times[i * SHADER_TIME_STRIDE / 4]; + uint32_t *times = bo_map + i * 3 * SHADER_TIME_STRIDE; + + brw->shader_time.cumulative[i].time += times[SHADER_TIME_STRIDE * 0 / 4]; + brw->shader_time.cumulative[i].written += times[SHADER_TIME_STRIDE * 1 / 4]; + brw->shader_time.cumulative[i].reset += times[SHADER_TIME_STRIDE * 2 / 4]; } /* Zero the BO out to clear it out for our next collection. */ - memset(times, 0, brw->shader_time.bo->size); + memset(bo_map, 0, brw->shader_time.bo->size); drm_intel_bo_unmap(brw->shader_time.bo); } diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 82f055f229e..234ee188c27 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1676,20 +1676,21 @@ vec4_visitor::emit_shader_time_end() */ emit(ADD(diff, src_reg(diff), src_reg(-2u))); - emit_shader_time_write(st_base, src_reg(diff)); - emit_shader_time_write(st_written, src_reg(1u)); + int shader_time_index = + brw_get_shader_time_index(brw, shader_prog, prog, st_type); + + emit_shader_time_write(shader_time_index, 0, src_reg(diff)); + emit_shader_time_write(shader_time_index, 1, src_reg(1u)); emit(BRW_OPCODE_ELSE); - emit_shader_time_write(st_reset, src_reg(1u)); + emit_shader_time_write(shader_time_index, 2, src_reg(1u)); emit(BRW_OPCODE_ENDIF); } void -vec4_visitor::emit_shader_time_write(enum shader_time_shader_type type, +vec4_visitor::emit_shader_time_write(int shader_time_index, + int shader_time_subindex, src_reg value) { - int shader_time_index = - brw_get_shader_time_index(brw, shader_prog, prog, type); - dst_reg dst = dst_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type, 2)); @@ -1698,7 +1699,8 @@ vec4_visitor::emit_shader_time_write(enum shader_time_shader_type type, time.reg_offset++; offset.type = BRW_REGISTER_TYPE_UD; - emit(MOV(offset, src_reg(shader_time_index * SHADER_TIME_STRIDE))); + int index = shader_time_index * 3 + shader_time_subindex; + emit(MOV(offset, src_reg(index * SHADER_TIME_STRIDE))); time.type = BRW_REGISTER_TYPE_UD; emit(MOV(time, src_reg(value))); diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 2228d478d09..8d332af17f4 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -85,9 +85,7 @@ public: gl_shader_stage stage, void *mem_ctx, bool no_spills, - shader_time_shader_type st_base, - shader_time_shader_type st_written, - shader_time_shader_type st_reset); + shader_time_shader_type st_type); ~vec4_visitor(); dst_reg dst_null_f() @@ -345,7 +343,7 @@ public: void emit_shader_time_begin(); void emit_shader_time_end(); - void emit_shader_time_write(enum shader_time_shader_type type, + void emit_shader_time_write(int shader_time_index, int shader_time_subindex, src_reg value); void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index, @@ -413,9 +411,7 @@ private: */ const bool no_spills; - const shader_time_shader_type st_base; - const shader_time_shader_type st_written; - const shader_time_shader_type st_reset; + const shader_time_shader_type st_type; }; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index b047aa18e0a..d3754de0ca3 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -41,8 +41,7 @@ vec4_gs_visitor::vec4_gs_visitor(struct brw_context *brw, bool no_spills) : vec4_visitor(brw, &c->base, &c->gp->program.Base, &c->key.base, &c->prog_data.base, prog, MESA_SHADER_GEOMETRY, mem_ctx, - no_spills, - ST_GS, ST_GS_WRITTEN, ST_GS_RESET), + no_spills, ST_GS), c(c) { } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 669f76973ea..5ae572b4c41 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -3688,9 +3688,7 @@ vec4_visitor::vec4_visitor(struct brw_context *brw, gl_shader_stage stage, void *mem_ctx, bool no_spills, - shader_time_shader_type st_base, - shader_time_shader_type st_written, - shader_time_shader_type st_reset) + shader_time_shader_type st_type) : backend_shader(brw, mem_ctx, shader_prog, prog, &prog_data->base, stage), c(c), key(key), @@ -3700,9 +3698,7 @@ vec4_visitor::vec4_visitor(struct brw_context *brw, first_non_payload_grf(0), need_all_constants_in_pull_buffer(false), no_spills(no_spills), - st_base(st_base), - st_written(st_written), - st_reset(st_reset) + st_type(st_type) { this->failed = false; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp index 4baf73ebde1..731176afd18 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp @@ -221,7 +221,7 @@ vec4_vs_visitor::vec4_vs_visitor(struct brw_context *brw, &vs_compile->key.base, &vs_prog_data->base, prog, MESA_SHADER_VERTEX, mem_ctx, false /* no_spills */, - ST_VS, ST_VS_WRITTEN, ST_VS_RESET), + ST_VS), vs_compile(vs_compile), vs_prog_data(vs_prog_data) { diff --git a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp index 2ef52e9fd6b..8a867366517 100644 --- a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp @@ -48,8 +48,7 @@ public: struct gl_shader_program *shader_prog) : vec4_visitor(brw, NULL, NULL, NULL, NULL, shader_prog, MESA_SHADER_VERTEX, NULL, - false /* no_spills */, - ST_NONE, ST_NONE, ST_NONE) + false /* no_spills */, ST_NONE) { } diff --git a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp index c8c67574e95..87ebdfa6e9c 100644 --- a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp +++ b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp @@ -51,8 +51,7 @@ public: struct gl_shader_program *shader_prog) : vec4_visitor(brw, NULL, NULL, NULL, NULL, shader_prog, MESA_SHADER_VERTEX, NULL, - false /* no_spills */, - ST_NONE, ST_NONE, ST_NONE) + false /* no_spills */, ST_NONE) { } -- 2.30.2