break;
}
+ /* Stomp the resinfo output type to UINT32. On gens 4-5, the output type
+ * is set as part of the message descriptor. On gen4, the PRM seems to
+ * allow UINT32 and FLOAT32 (i965 PRM, Vol. 4 Section 4.8.1.1), but on
+ * later gens UINT32 is required. Once you hit Sandy Bridge, the bit is
+ * gone from the message descriptor entirely and you just get UINT32 all
+ * the time regasrdless. Since we can really only do non-UINT32 on gen4,
+ * just stomp it to UINT32 all the time.
+ */
+ if (inst->opcode == SHADER_OPCODE_TXS)
+ return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32;
+
uint32_t base_binding_table_index = (inst->opcode == SHADER_OPCODE_TG4 ||
inst->opcode == SHADER_OPCODE_TG4_OFFSET)
? prog_data->base.binding_table.gather_texture_start
const nir_shader *nir,
struct brw_vue_prog_data *prog_data,
const struct cfg_t *cfg,
+ const performance &perf,
struct brw_compile_stats *stats)
{
const struct gen_device_info *devinfo = p->devinfo;
send_count++;
break;
- case SHADER_OPCODE_MEMORY_FENCE: {
- const unsigned sends =
- brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND, false,
- /* bti */ 0);
- send_count += sends;
+ case SHADER_OPCODE_MEMORY_FENCE:
+ brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND,
+ brw_message_target(inst->sfid),
+ /* commit_enable */ false,
+ /* bti */ 0);
+ send_count++;
break;
- }
case SHADER_OPCODE_FIND_LIVE_CHANNEL: {
const struct brw_reg mask =
fprintf(stderr, "%s vec4 shader: %d instructions. %d loops. %u cycles. %d:%d "
"spills:fills, %u sends. Compacted %d to %d bytes (%.0f%%)\n",
- stage_abbrev, before_size / 16, loop_count, cfg->cycle_count,
+ stage_abbrev, before_size / 16, loop_count, perf.latency,
spill_count, fill_count, send_count, before_size, after_size,
100.0f * (before_size - after_size) / before_size);
/* overriding the shader makes disasm_info invalid */
if (!brw_try_override_assembly(p, 0, sha1buf)) {
- dump_assembly(p->store, disasm_info);
+ dump_assembly(p->store, 0, p->next_insn_offset,
+ disasm_info, perf.block_latency);
} else {
fprintf(stderr, "Successfully overrode shader with sha1 %s\n\n", sha1buf);
}
"%d:%d spills:fills, %u sends, "
"compacted %d to %d bytes.",
stage_abbrev, before_size / 16,
- loop_count, cfg->cycle_count, spill_count,
+ loop_count, perf.latency, spill_count,
fill_count, send_count, before_size, after_size);
if (stats) {
stats->dispatch_width = 0;
stats->instructions = before_size / 16;
stats->sends = send_count;
stats->loops = loop_count;
- stats->cycles = cfg->cycle_count;
+ stats->cycles = perf.latency;
stats->spills = spill_count;
stats->fills = fill_count;
}
const nir_shader *nir,
struct brw_vue_prog_data *prog_data,
const struct cfg_t *cfg,
+ const performance &perf,
struct brw_compile_stats *stats)
{
struct brw_codegen *p = rzalloc(mem_ctx, struct brw_codegen);
brw_init_codegen(compiler->devinfo, p, mem_ctx);
brw_set_default_access_mode(p, BRW_ALIGN_16);
- generate_code(p, compiler, log_data, nir, prog_data, cfg, stats);
+ generate_code(p, compiler, log_data, nir, prog_data, cfg, perf, stats);
+
+ assert(prog_data->base.const_data_size == 0);
+ if (nir->constant_data_size > 0) {
+ prog_data->base.const_data_size = nir->constant_data_size;
+ prog_data->base.const_data_offset =
+ brw_append_data(p, nir->constant_data, nir->constant_data_size, 32);
+ }
return brw_get_program(p, &prog_data->base.program_size);
}