This make shader-db's report.py work on Haswell and earlier platforms.
The problem is that the script would detect the "sends" output for
scalar shaders and expect in in vec4 shaders too. When it didn't find
it, the script would fail with:
Traceback (most recent call last):
File "./report.py", line 351, in <module>
main()
File "./report.py", line 182, in main
before_count = before[p][m]
KeyError: 'sends'
Fixes: f192741ddd8 ("intel/compiler: Report the number of non-spill/fill SEND messages")
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
bool debug_flag = INTEL_DEBUG &
intel_debug_flag_for_shader_stage(nir->info.stage);
struct disasm_info *disasm_info = disasm_initialize(devinfo, cfg);
bool debug_flag = INTEL_DEBUG &
intel_debug_flag_for_shader_stage(nir->info.stage);
struct disasm_info *disasm_info = disasm_initialize(devinfo, cfg);
+
+ /* `send_count` explicitly does not include spills or fills, as we'd
+ * like to use it as a metric for intentional memory access or other
+ * shared function use. Otherwise, subtle changes to scheduling or
+ * register allocation could cause it to fluctuate wildly - and that
+ * effect is already counted in spill/fill counts.
+ */
int spill_count = 0, fill_count = 0;
int spill_count = 0, fill_count = 0;
+ int loop_count = 0, send_count = 0;
foreach_block_and_inst (block, vec4_instruction, inst, cfg) {
struct brw_reg src[3], dst;
foreach_block_and_inst (block, vec4_instruction, inst, cfg) {
struct brw_reg src[3], dst;
generate_math_gen6(p, inst, dst, src[0], brw_null_reg());
} else {
generate_math1_gen4(p, inst, dst, src[0]);
generate_math_gen6(p, inst, dst, src[0], brw_null_reg());
} else {
generate_math1_gen4(p, inst, dst, src[0]);
generate_math_gen6(p, inst, dst, src[0], src[1]);
} else {
generate_math2_gen4(p, inst, dst, src[0], src[1]);
generate_math_gen6(p, inst, dst, src[0], src[1]);
} else {
generate_math2_gen4(p, inst, dst, src[0], src[1]);
case SHADER_OPCODE_SAMPLEINFO:
generate_tex(p, prog_data, nir->info.stage,
inst, dst, src[0], src[1], src[2]);
case SHADER_OPCODE_SAMPLEINFO:
generate_tex(p, prog_data, nir->info.stage,
inst, dst, src[0], src[1], src[2]);
break;
case SHADER_OPCODE_GET_BUFFER_SIZE:
generate_get_buffer_size(p, prog_data, inst, dst, src[0], src[1]);
break;
case SHADER_OPCODE_GET_BUFFER_SIZE:
generate_get_buffer_size(p, prog_data, inst, dst, src[0], src[1]);
break;
case VS_OPCODE_URB_WRITE:
generate_vs_urb_write(p, inst);
break;
case VS_OPCODE_URB_WRITE:
generate_vs_urb_write(p, inst);
break;
case SHADER_OPCODE_GEN4_SCRATCH_READ:
break;
case SHADER_OPCODE_GEN4_SCRATCH_READ:
case VS_OPCODE_PULL_CONSTANT_LOAD:
generate_pull_constant_load(p, prog_data, inst, dst, src[0], src[1]);
case VS_OPCODE_PULL_CONSTANT_LOAD:
generate_pull_constant_load(p, prog_data, inst, dst, src[0], src[1]);
break;
case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
generate_pull_constant_load_gen7(p, prog_data, inst, dst, src[0], src[1]);
break;
case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
generate_pull_constant_load_gen7(p, prog_data, inst, dst, src[0], src[1]);
break;
case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9:
break;
case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9:
case GS_OPCODE_URB_WRITE:
generate_gs_urb_write(p, inst);
case GS_OPCODE_URB_WRITE:
generate_gs_urb_write(p, inst);
break;
case GS_OPCODE_URB_WRITE_ALLOCATE:
generate_gs_urb_write_allocate(p, inst);
break;
case GS_OPCODE_URB_WRITE_ALLOCATE:
generate_gs_urb_write_allocate(p, inst);
break;
case GS_OPCODE_SVB_WRITE:
generate_gs_svb_write(p, prog_data, inst, dst, src[0], src[1]);
break;
case GS_OPCODE_SVB_WRITE:
generate_gs_svb_write(p, prog_data, inst, dst, src[0], src[1]);
break;
case GS_OPCODE_SVB_SET_DST_INDEX:
break;
case GS_OPCODE_SVB_SET_DST_INDEX:
case GS_OPCODE_THREAD_END:
generate_gs_thread_end(p, inst);
case GS_OPCODE_THREAD_END:
generate_gs_thread_end(p, inst);
break;
case GS_OPCODE_SET_WRITE_OFFSET:
break;
case GS_OPCODE_SET_WRITE_OFFSET:
case GS_OPCODE_FF_SYNC:
generate_gs_ff_sync(p, inst, dst, src[0], src[1]);
case GS_OPCODE_FF_SYNC:
generate_gs_ff_sync(p, inst, dst, src[0], src[1]);
break;
case GS_OPCODE_FF_SYNC_SET_PRIMITIVES:
break;
case GS_OPCODE_FF_SYNC_SET_PRIMITIVES:
case SHADER_OPCODE_SHADER_TIME_ADD:
brw_shader_time_add(p, src[0],
prog_data->base.binding_table.shader_time_start);
case SHADER_OPCODE_SHADER_TIME_ADD:
brw_shader_time_add(p, src[0],
prog_data->base.binding_table.shader_time_start);
break;
case VEC4_OPCODE_UNTYPED_ATOMIC:
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_untyped_atomic(p, dst, src[0], src[1], src[2].ud, inst->mlen,
!inst->dst.is_null(), inst->header_size);
break;
case VEC4_OPCODE_UNTYPED_ATOMIC:
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_untyped_atomic(p, dst, src[0], src[1], src[2].ud, inst->mlen,
!inst->dst.is_null(), inst->header_size);
break;
case VEC4_OPCODE_UNTYPED_SURFACE_READ:
break;
case VEC4_OPCODE_UNTYPED_SURFACE_READ:
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_untyped_surface_read(p, dst, src[0], src[1], inst->mlen,
src[2].ud);
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_untyped_surface_read(p, dst, src[0], src[1], inst->mlen,
src[2].ud);
break;
case VEC4_OPCODE_UNTYPED_SURFACE_WRITE:
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_untyped_surface_write(p, src[0], src[1], inst->mlen,
src[2].ud, inst->header_size);
break;
case VEC4_OPCODE_UNTYPED_SURFACE_WRITE:
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_untyped_surface_write(p, src[0], src[1], inst->mlen,
src[2].ud, inst->header_size);
break;
case SHADER_OPCODE_MEMORY_FENCE:
brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND, false, /* bti */ 0);
break;
case SHADER_OPCODE_MEMORY_FENCE:
brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND, false, /* bti */ 0);
break;
case SHADER_OPCODE_FIND_LIVE_CHANNEL: {
break;
case SHADER_OPCODE_FIND_LIVE_CHANNEL: {
case TCS_OPCODE_URB_WRITE:
generate_tcs_urb_write(p, inst, src[0]);
case TCS_OPCODE_URB_WRITE:
generate_tcs_urb_write(p, inst, src[0]);
break;
case VEC4_OPCODE_URB_READ:
generate_vec4_urb_read(p, inst, dst, src[0]);
break;
case VEC4_OPCODE_URB_READ:
generate_vec4_urb_read(p, inst, dst, src[0]);
break;
case TCS_OPCODE_SET_INPUT_URB_OFFSETS:
break;
case TCS_OPCODE_SET_INPUT_URB_OFFSETS:
case TCS_OPCODE_RELEASE_INPUT:
generate_tcs_release_input(p, dst, src[0], src[1]);
case TCS_OPCODE_RELEASE_INPUT:
generate_tcs_release_input(p, dst, src[0], src[1]);
break;
case TCS_OPCODE_THREAD_END:
generate_tcs_thread_end(p, inst);
break;
case TCS_OPCODE_THREAD_END:
generate_tcs_thread_end(p, inst);
break;
case SHADER_OPCODE_BARRIER:
brw_barrier(p, src[0]);
brw_WAIT(p);
break;
case SHADER_OPCODE_BARRIER:
brw_barrier(p, src[0]);
brw_WAIT(p);
break;
case SHADER_OPCODE_MOV_INDIRECT:
break;
case SHADER_OPCODE_MOV_INDIRECT:
sha1buf);
fprintf(stderr, "%s vec4 shader: %d instructions. %d loops. %u cycles. %d:%d "
sha1buf);
fprintf(stderr, "%s vec4 shader: %d instructions. %d loops. %u cycles. %d:%d "
- "spills:fills. Compacted %d to %d bytes (%.0f%%)\n",
+ "spills:fills, %u sends. Compacted %d to %d bytes (%.0f%%)\n",
stage_abbrev, before_size / 16, loop_count, cfg->cycle_count,
stage_abbrev, before_size / 16, loop_count, cfg->cycle_count,
- spill_count, fill_count, before_size, after_size,
+ spill_count, fill_count, send_count, before_size, after_size,
100.0f * (before_size - after_size) / before_size);
/* overriding the shader makes disasm_info invalid */
100.0f * (before_size - after_size) / before_size);
/* overriding the shader makes disasm_info invalid */
compiler->shader_debug_log(log_data,
"%s vec4 shader: %d inst, %d loops, %u cycles, "
compiler->shader_debug_log(log_data,
"%s vec4 shader: %d inst, %d loops, %u cycles, "
- "%d:%d spills:fills, compacted %d to %d bytes.",
+ "%d:%d spills:fills, %u sends, "
+ "compacted %d to %d bytes.",
stage_abbrev, before_size / 16,
loop_count, cfg->cycle_count, spill_count,
stage_abbrev, before_size / 16,
loop_count, cfg->cycle_count, spill_count,
- fill_count, before_size, after_size);
+ fill_count, send_count, before_size, after_size);
if (stats) {
stats->dispatch_width = 0;
stats->instructions = before_size / 16;
if (stats) {
stats->dispatch_width = 0;
stats->instructions = before_size / 16;