From 6579f562c307d12a2654b511a7ef85f7b4cddeae Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Thu, 26 Mar 2020 16:27:32 -0700 Subject: [PATCH] intel/ir: Use brw::performance object instead of CFG cycle counts for codegen stats. These should be more accurate than the current cycle counts, since among other things they consider the effect of post-scheduling passes like the software scoreboard on TGL. In addition it will enable us to clean up some of the now redundant cycle-count estimation functionality in the instruction scheduler. Reviewed-by: Kenneth Graunke --- src/intel/compiler/brw_fs.cpp | 16 ++++++++++------ src/intel/compiler/brw_fs.h | 1 + src/intel/compiler/brw_fs_generator.cpp | 7 ++++--- src/intel/compiler/brw_shader.cpp | 7 +++++-- src/intel/compiler/brw_vec4.cpp | 7 +++++-- src/intel/compiler/brw_vec4.h | 1 + src/intel/compiler/brw_vec4_generator.cpp | 10 ++++++---- src/intel/compiler/brw_vec4_gs_visitor.cpp | 11 ++++++++--- src/intel/compiler/brw_vec4_tcs.cpp | 7 +++++-- 9 files changed, 45 insertions(+), 22 deletions(-) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 57d12139233..ccefdb081fd 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -8845,21 +8845,24 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, if (simd8_cfg) { prog_data->dispatch_8 = true; - g.generate_code(simd8_cfg, 8, v8->shader_stats, stats); + g.generate_code(simd8_cfg, 8, v8->shader_stats, + v8->performance_analysis.require(), stats); stats = stats ? stats + 1 : NULL; } if (simd16_cfg) { prog_data->dispatch_16 = true; - prog_data->prog_offset_16 = g.generate_code(simd16_cfg, 16, - v16->shader_stats, stats); + prog_data->prog_offset_16 = g.generate_code( + simd16_cfg, 16, v16->shader_stats, + v16->performance_analysis.require(), stats); stats = stats ? stats + 1 : NULL; } if (simd32_cfg) { prog_data->dispatch_32 = true; - prog_data->prog_offset_32 = g.generate_code(simd32_cfg, 32, - v32->shader_stats, stats); + prog_data->prog_offset_32 = g.generate_code( + simd32_cfg, 32, v32->shader_stats, + v32->performance_analysis.require(), stats); stats = stats ? stats + 1 : NULL; } @@ -9118,7 +9121,8 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data, g.enable_debug(name); } - g.generate_code(v->cfg, prog_data->simd_size, v->shader_stats, stats); + g.generate_code(v->cfg, prog_data->simd_size, v->shader_stats, + v->performance_analysis.require(), stats); ret = g.get_assembly(); } diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 4e8f8ccac78..b9783eb2285 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -477,6 +477,7 @@ public: void enable_debug(const char *shader_name); int generate_code(const cfg_t *cfg, int dispatch_width, struct shader_stats shader_stats, + const brw::performance &perf, struct brw_compile_stats *stats); const unsigned *get_assembly(); diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index d7464c95297..dc524a61e1d 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -1715,6 +1715,7 @@ fs_generator::enable_debug(const char *shader_name) int fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, struct shader_stats shader_stats, + const brw::performance &perf, struct brw_compile_stats *stats) { /* align to 64 byte boundary. */ @@ -2462,7 +2463,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, "Compacted %d to %d bytes (%.0f%%)\n", shader_name, sha1buf, dispatch_width, before_size / 16, - loop_count, cfg->cycle_count, + loop_count, perf.latency, spill_count, fill_count, send_count, shader_stats.scheduler_mode, shader_stats.promoted_constants, @@ -2487,7 +2488,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, "compacted %d to %d bytes.", _mesa_shader_stage_to_abbrev(stage), dispatch_width, before_size / 16 - nop_count, - loop_count, cfg->cycle_count, + loop_count, perf.latency, spill_count, fill_count, send_count, shader_stats.scheduler_mode, shader_stats.promoted_constants, @@ -2497,7 +2498,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, stats->instructions = before_size / 16 - nop_count; stats->sends = send_count; stats->loops = loop_count; - stats->cycles = cfg->cycle_count; + stats->cycles = perf.latency; stats->spills = spill_count; stats->fills = fill_count; } diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp index b0a75f067a6..470d77227ab 100644 --- a/src/intel/compiler/brw_shader.cpp +++ b/src/intel/compiler/brw_shader.cpp @@ -1371,7 +1371,8 @@ brw_compile_tes(const struct brw_compiler *compiler, nir->info.name)); } - g.generate_code(v.cfg, 8, v.shader_stats, stats); + g.generate_code(v.cfg, 8, v.shader_stats, + v.performance_analysis.require(), stats); assembly = g.get_assembly(); } else { @@ -1387,7 +1388,9 @@ brw_compile_tes(const struct brw_compiler *compiler, v.dump_instructions(); assembly = brw_vec4_generate_assembly(compiler, log_data, mem_ctx, nir, - &prog_data->base, v.cfg, stats); + &prog_data->base, v.cfg, + v.performance_analysis.require(), + stats); } return assembly; diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp index 6f05046bca6..864ece7e636 100644 --- a/src/intel/compiler/brw_vec4.cpp +++ b/src/intel/compiler/brw_vec4.cpp @@ -2999,7 +2999,8 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data, g.enable_debug(debug_name); } - g.generate_code(v.cfg, 8, v.shader_stats, stats); + g.generate_code(v.cfg, 8, v.shader_stats, + v.performance_analysis.require(), stats); assembly = g.get_assembly(); } @@ -3017,7 +3018,9 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data, assembly = brw_vec4_generate_assembly(compiler, log_data, mem_ctx, shader, &prog_data->base, - v.cfg, stats); + v.cfg, + v.performance_analysis.require(), + stats); } return assembly; diff --git a/src/intel/compiler/brw_vec4.h b/src/intel/compiler/brw_vec4.h index aa93b05d5af..73c18b6748c 100644 --- a/src/intel/compiler/brw_vec4.h +++ b/src/intel/compiler/brw_vec4.h @@ -48,6 +48,7 @@ brw_vec4_generate_assembly(const struct brw_compiler *compiler, const nir_shader *nir, struct brw_vue_prog_data *prog_data, const struct cfg_t *cfg, + const brw::performance &perf, struct brw_compile_stats *stats); #ifdef __cplusplus diff --git a/src/intel/compiler/brw_vec4_generator.cpp b/src/intel/compiler/brw_vec4_generator.cpp index bdc45371c4e..e95a70d8605 100644 --- a/src/intel/compiler/brw_vec4_generator.cpp +++ b/src/intel/compiler/brw_vec4_generator.cpp @@ -1498,6 +1498,7 @@ generate_code(struct brw_codegen *p, const nir_shader *nir, struct brw_vue_prog_data *prog_data, const struct cfg_t *cfg, + const performance &perf, struct brw_compile_stats *stats) { const struct gen_device_info *devinfo = p->devinfo; @@ -2220,7 +2221,7 @@ generate_code(struct brw_codegen *p, fprintf(stderr, "%s vec4 shader: %d instructions. %d loops. %u cycles. %d:%d " "spills:fills, %u sends. Compacted %d to %d bytes (%.0f%%)\n", - stage_abbrev, before_size / 16, loop_count, cfg->cycle_count, + stage_abbrev, before_size / 16, loop_count, perf.latency, spill_count, fill_count, send_count, before_size, after_size, 100.0f * (before_size - after_size) / before_size); @@ -2239,14 +2240,14 @@ generate_code(struct brw_codegen *p, "%d:%d spills:fills, %u sends, " "compacted %d to %d bytes.", stage_abbrev, before_size / 16, - loop_count, cfg->cycle_count, spill_count, + loop_count, perf.latency, spill_count, fill_count, send_count, before_size, after_size); if (stats) { stats->dispatch_width = 0; stats->instructions = before_size / 16; stats->sends = send_count; stats->loops = loop_count; - stats->cycles = cfg->cycle_count; + stats->cycles = perf.latency; stats->spills = spill_count; stats->fills = fill_count; } @@ -2259,13 +2260,14 @@ brw_vec4_generate_assembly(const struct brw_compiler *compiler, const nir_shader *nir, struct brw_vue_prog_data *prog_data, const struct cfg_t *cfg, + const performance &perf, struct brw_compile_stats *stats) { struct brw_codegen *p = rzalloc(mem_ctx, struct brw_codegen); brw_init_codegen(compiler->devinfo, p, mem_ctx); brw_set_default_access_mode(p, BRW_ALIGN_16); - generate_code(p, compiler, log_data, nir, prog_data, cfg, stats); + generate_code(p, compiler, log_data, nir, prog_data, cfg, perf, stats); return brw_get_program(p, &prog_data->base.program_size); } diff --git a/src/intel/compiler/brw_vec4_gs_visitor.cpp b/src/intel/compiler/brw_vec4_gs_visitor.cpp index a0b78eb4d48..ce341806c7b 100644 --- a/src/intel/compiler/brw_vec4_gs_visitor.cpp +++ b/src/intel/compiler/brw_vec4_gs_visitor.cpp @@ -865,7 +865,8 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, label, shader->info.name); g.enable_debug(name); } - g.generate_code(v.cfg, 8, v.shader_stats, stats); + g.generate_code(v.cfg, 8, v.shader_stats, + v.performance_analysis.require(), stats); return g.get_assembly(); } } @@ -897,7 +898,9 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, ralloc_free(param); return brw_vec4_generate_assembly(compiler, log_data, mem_ctx, shader, &prog_data->base, - v.cfg, stats); + v.cfg, + v.performance_analysis.require(), + stats); } else { /* These variables could be modified by the execution of the GS * visitor if it packed the uniforms in the push constant buffer. @@ -960,7 +963,9 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, *error_str = ralloc_strdup(mem_ctx, gs->fail_msg); } else { ret = brw_vec4_generate_assembly(compiler, log_data, mem_ctx, shader, - &prog_data->base, gs->cfg, stats); + &prog_data->base, gs->cfg, + gs->performance_analysis.require(), + stats); } delete gs; diff --git a/src/intel/compiler/brw_vec4_tcs.cpp b/src/intel/compiler/brw_vec4_tcs.cpp index 26dc4f18040..852c5a9865c 100644 --- a/src/intel/compiler/brw_vec4_tcs.cpp +++ b/src/intel/compiler/brw_vec4_tcs.cpp @@ -480,7 +480,8 @@ brw_compile_tcs(const struct brw_compiler *compiler, nir->info.name)); } - g.generate_code(v.cfg, 8, v.shader_stats, stats); + g.generate_code(v.cfg, 8, v.shader_stats, + v.performance_analysis.require(), stats); assembly = g.get_assembly(); } else { @@ -497,7 +498,9 @@ brw_compile_tcs(const struct brw_compiler *compiler, assembly = brw_vec4_generate_assembly(compiler, log_data, mem_ctx, nir, - &prog_data->base, v.cfg, stats); + &prog_data->base, v.cfg, + v.performance_analysis.require(), + stats); } return assembly; -- 2.30.2