From: Connor Abbott Date: Sat, 6 Jun 2015 14:55:21 +0000 (-0400) Subject: i965: dump scheduling cycle estimates X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=45cd76e342d1e8ecea38e2048b96cf5be3a30fab;p=mesa.git i965: dump scheduling cycle estimates The heuristic we're using is rather lame, since it assumes everything is non-uniform and loops execute 10 times, but it should be enough for measuring improvements in the scheduler that don't result in a change in the number of instructions. v2: - Switch loops and cycle counts to be compatible with older shader-db. - Make loop heuristic 10x to match with spilling code. Reviewed-by: Jason Ekstrand --- diff --git a/src/mesa/drivers/dri/i965/brw_cfg.h b/src/mesa/drivers/dri/i965/brw_cfg.h index a06b0aa1cd0..69e39e8964d 100644 --- a/src/mesa/drivers/dri/i965/brw_cfg.h +++ b/src/mesa/drivers/dri/i965/brw_cfg.h @@ -90,6 +90,8 @@ struct bblock_t { struct exec_list parents; struct exec_list children; int num; + + unsigned cycle_count; }; static inline struct backend_instruction * @@ -285,6 +287,8 @@ struct cfg_t { int num_blocks; bool idom_dirty; + + unsigned cycle_count; }; /* Note that this is implemented with a double for loop -- break will diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 139d1dd17ec..58bd23f6cbc 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -2269,9 +2269,9 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) if (unlikely(debug_flag)) { fprintf(stderr, "Native code for %s\n" - "SIMD%d shader: %d instructions. %d loops. %d:%d spills:fills. Promoted %u constants. Compacted %d to %d" + "SIMD%d shader: %d instructions. %d loops. %u cycles. %d:%d spills:fills. Promoted %u constants. Compacted %d to %d" " bytes (%.0f%%)\n", - shader_name, dispatch_width, before_size / 16, loop_count, + shader_name, dispatch_width, before_size / 16, loop_count, cfg->cycle_count, spill_count, fill_count, promoted_constants, before_size, after_size, 100.0f * (before_size - after_size) / before_size); @@ -2281,12 +2281,13 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) } compiler->shader_debug_log(log_data, - "%s SIMD%d shader: %d inst, %d loops, " + "%s SIMD%d shader: %d inst, %d loops, %u cycles, " "%d:%d spills:fills, Promoted %u constants, " "compacted %d to %d bytes.\n", stage_abbrev, dispatch_width, before_size / 16, - loop_count, spill_count, fill_count, - promoted_constants, before_size, after_size); + loop_count, cfg->cycle_count, spill_count, + fill_count, promoted_constants, before_size, + after_size); return start_offset; } diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp index 094c47aba37..46da3251ea7 100644 --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp @@ -1467,6 +1467,24 @@ instruction_scheduler::schedule_instructions(bblock_t *block) if (block->end()->opcode == BRW_OPCODE_NOP) block->end()->remove(block); assert(instructions_to_schedule == 0); + + block->cycle_count = time; +} + +static unsigned get_cycle_count(cfg_t *cfg) +{ + unsigned count = 0, multiplier = 1; + foreach_block(block, cfg) { + if (block->start()->opcode == BRW_OPCODE_DO) + multiplier *= 10; /* assume that loops execute ~10 times */ + + count += block->cycle_count * multiplier; + + if (block->end()->opcode == BRW_OPCODE_WHILE) + multiplier /= 10; + } + + return count; } void @@ -1507,6 +1525,8 @@ instruction_scheduler::run(cfg_t *cfg) post_reg_alloc); bs->dump_instructions(); } + + cfg->cycle_count = get_cycle_count(cfg); } void diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index d74b1b2939d..8bc21df5ffc 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -1558,10 +1558,10 @@ generate_code(struct brw_codegen *p, nir->info.label ? nir->info.label : "unnamed", _mesa_shader_stage_to_string(nir->stage), nir->info.name); - fprintf(stderr, "%s vec4 shader: %d instructions. %d loops. Compacted %d to %d" - " bytes (%.0f%%)\n", + fprintf(stderr, "%s vec4 shader: %d instructions. %d loops. %u cycles." + "Compacted %d to %d bytes (%.0f%%)\n", stage_abbrev, - before_size / 16, loop_count, before_size, after_size, + before_size / 16, loop_count, cfg->cycle_count, before_size, after_size, 100.0f * (before_size - after_size) / before_size); dump_assembly(p->store, annotation.ann_count, annotation.ann, @@ -1570,9 +1570,10 @@ generate_code(struct brw_codegen *p, } compiler->shader_debug_log(log_data, - "%s vec4 shader: %d inst, %d loops, " + "%s vec4 shader: %d inst, %d loops, %u cycles, " "compacted %d to %d bytes.\n", - stage_abbrev, before_size / 16, loop_count, + stage_abbrev, before_size / 16, + loop_count, cfg->cycle_count, before_size, after_size); }