this->first_non_payload_grf = payload.num_regs + prog_data->curb_read_length;
}
+/*
+ * Build up an array of indices into the urb_setup array that
+ * references the active entries of the urb_setup array.
+ * Used to accelerate walking the active entries of the urb_setup array
+ * on each upload.
+ */
+void
+brw_compute_urb_setup_index(struct brw_wm_prog_data *wm_prog_data)
+{
+ /* Make sure uint8_t is sufficient */
+ STATIC_ASSERT(VARYING_SLOT_MAX <= 0xff);
+ uint8_t index = 0;
+ for (uint8_t attr = 0; attr < VARYING_SLOT_MAX; attr++) {
+ if (wm_prog_data->urb_setup[attr] >= 0) {
+ wm_prog_data->urb_setup_attribs[index++] = attr;
+ }
+ }
+ wm_prog_data->urb_setup_attribs_count = index;
+}
+
static void
calculate_urb_setup(const struct gen_device_info *devinfo,
const struct brw_wm_prog_key *key,
}
prog_data->num_varying_inputs = urb_next;
+
+ brw_compute_urb_setup_index(prog_data);
}
void
if (devinfo->gen >= 7)
return false;
- calculate_live_intervals();
+ const fs_live_variables &live = live_analysis.require();
foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
int ip = next_ip;
/* Can't compute-to-MRF this GRF if someone else was going to
* read it later.
*/
- if (live_intervals->vgrf_end[inst->src[0].nr] > ip)
+ if (live.vgrf_end[inst->src[0].nr] > ip)
continue;
/* Found a move of a GRF to a MRF. Let's see if we can go rewrite the
}
void
-fs_visitor::dump_instructions()
+fs_visitor::dump_instructions() const
{
dump_instructions(NULL);
}
void
-fs_visitor::dump_instructions(const char *name)
+fs_visitor::dump_instructions(const char *name) const
{
FILE *file = stderr;
if (name && geteuid() != 0) {
}
if (cfg) {
- calculate_register_pressure();
- int ip = 0, max_pressure = 0;
+ const register_pressure &rp = regpressure_analysis.require();
+ unsigned ip = 0, max_pressure = 0;
foreach_block_and_inst(block, backend_instruction, inst, cfg) {
- max_pressure = MAX2(max_pressure, regs_live_at_ip[ip]);
- fprintf(file, "{%3d} %4d: ", regs_live_at_ip[ip], ip);
+ max_pressure = MAX2(max_pressure, rp.regs_live_at_ip[ip]);
+ fprintf(file, "{%3d} %4d: ", rp.regs_live_at_ip[ip], ip);
dump_instruction(inst, file);
ip++;
}
}
void
-fs_visitor::dump_instruction(backend_instruction *be_inst)
+fs_visitor::dump_instruction(const backend_instruction *be_inst) const
{
dump_instruction(be_inst, stderr);
}
void
-fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
+fs_visitor::dump_instruction(const backend_instruction *be_inst, FILE *file) const
{
- fs_inst *inst = (fs_inst *)be_inst;
+ const fs_inst *inst = (const fs_inst *)be_inst;
if (inst->predicate) {
fprintf(file, "(%cf%d.%d) ",
payload.num_regs = 1;
}
-void
-fs_visitor::calculate_register_pressure()
+brw::register_pressure::register_pressure(const fs_visitor *v)
{
- invalidate_analysis(DEPENDENCY_EVERYTHING);
- calculate_live_intervals();
+ const fs_live_variables &live = v->live_analysis.require();
+ const unsigned num_instructions = v->cfg->num_blocks ?
+ v->cfg->blocks[v->cfg->num_blocks - 1]->end_ip + 1 : 0;
- unsigned num_instructions = 0;
- foreach_block(block, cfg)
- num_instructions += block->instructions.length();
+ regs_live_at_ip = new unsigned[num_instructions]();
- regs_live_at_ip = rzalloc_array(mem_ctx, int, num_instructions);
-
- for (unsigned reg = 0; reg < alloc.count; reg++) {
- for (int ip = live_intervals->vgrf_start[reg];
- ip <= live_intervals->vgrf_end[reg]; ip++)
- regs_live_at_ip[ip] += alloc.sizes[reg];
+ for (unsigned reg = 0; reg < v->alloc.count; reg++) {
+ for (int ip = live.vgrf_start[reg]; ip <= live.vgrf_end[reg]; ip++)
+ regs_live_at_ip[ip] += v->alloc.sizes[reg];
}
}
+brw::register_pressure::~register_pressure()
+{
+ delete[] regs_live_at_ip;
+}
+
void
fs_visitor::invalidate_analysis(brw::analysis_dependency_class c)
{
backend_shader::invalidate_analysis(c);
+ live_analysis.invalidate(c);
+ regpressure_analysis.invalidate(c);
}
void
unsigned depth = 0;
bool progress = false;
- calculate_live_intervals();
+ const fs_live_variables &live_vars = live_analysis.require();
/* Scan the program backwards in order to be able to easily determine
* whether the flag register is live at any point.
*/
foreach_block_reverse_safe(block, cfg) {
- BITSET_WORD flag_liveout = live_intervals->block_data[block->num]
+ BITSET_WORD flag_liveout = live_vars.block_data[block->num]
.flag_liveout[0];
- STATIC_ASSERT(ARRAY_SIZE(live_intervals->block_data[0].flag_liveout) == 1);
+ STATIC_ASSERT(ARRAY_SIZE(live_vars.block_data[0].flag_liveout) == 1);
foreach_inst_in_block_reverse_safe(fs_inst, inst, block) {
if (!inst->predicate && inst->exec_size >= 8)
break;
}
+ /* Scheduling may create additional opportunities for CMOD propagation,
+ * so let's do it again. If CMOD propagation made any progress,
+ * elminate dead code one more time.
+ */
+ bool progress = false;
+ const int iteration = 99;
+ int pass_num = 0;
+
+ if (OPT(opt_cmod_propagation)) {
+ /* dead_code_eliminate "undoes" the fixing done by
+ * fixup_3src_null_dest, so we have to do it again if
+ * dead_code_eliminiate makes any progress.
+ */
+ if (OPT(dead_code_eliminate))
+ fixup_3src_null_dest();
+ }
+
+
/* We only allow spilling for the last schedule mode and only if the
* allow_spilling parameter and dispatch width work out ok.
*/
wm_prog_data->urb_setup[VARYING_SLOT_LAYER] = 0;
wm_prog_data->num_varying_inputs = 1;
+
+ brw_compute_urb_setup_index(wm_prog_data);
}
bool
brw_compute_flat_inputs(prog_data, shader);
cfg_t *simd8_cfg = NULL, *simd16_cfg = NULL, *simd32_cfg = NULL;
+ struct shader_stats v8_shader_stats, v16_shader_stats, v32_shader_stats;
fs_visitor v8(compiler, log_data, mem_ctx, &key->base,
&prog_data->base, shader, 8,
return NULL;
} else if (likely(!(INTEL_DEBUG & DEBUG_NO8))) {
simd8_cfg = v8.cfg;
+ v8_shader_stats = v8.shader_stats;
prog_data->base.dispatch_grf_start_reg = v8.payload.num_regs;
prog_data->reg_blocks_8 = brw_register_blocks(v8.grf_used);
}
v16.fail_msg);
} else {
simd16_cfg = v16.cfg;
+ v16_shader_stats = v16.shader_stats;
prog_data->dispatch_grf_start_reg_16 = v16.payload.num_regs;
prog_data->reg_blocks_16 = brw_register_blocks(v16.grf_used);
}
v32.fail_msg);
} else {
simd32_cfg = v32.cfg;
+ v32_shader_stats = v32.shader_stats;
prog_data->dispatch_grf_start_reg_32 = v32.payload.num_regs;
prog_data->reg_blocks_32 = brw_register_blocks(v32.grf_used);
}
}
fs_generator g(compiler, log_data, mem_ctx, &prog_data->base,
- v8.shader_stats, v8.runtime_check_aads_emit,
- MESA_SHADER_FRAGMENT);
+ v8.runtime_check_aads_emit, MESA_SHADER_FRAGMENT);
if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
g.enable_debug(ralloc_asprintf(mem_ctx, "%s fragment shader %s",
if (simd8_cfg) {
prog_data->dispatch_8 = true;
- g.generate_code(simd8_cfg, 8, stats);
+ g.generate_code(simd8_cfg, 8, v8_shader_stats, stats);
stats = stats ? stats + 1 : NULL;
}
if (simd16_cfg) {
prog_data->dispatch_16 = true;
- prog_data->prog_offset_16 = g.generate_code(simd16_cfg, 16, stats);
+ prog_data->prog_offset_16 = g.generate_code(simd16_cfg, 16, v16_shader_stats, stats);
stats = stats ? stats + 1 : NULL;
}
if (simd32_cfg) {
prog_data->dispatch_32 = true;
- prog_data->prog_offset_32 = g.generate_code(simd32_cfg, 32, stats);
+ prog_data->prog_offset_32 = g.generate_code(simd32_cfg, 32, v32_shader_stats, stats);
stats = stats ? stats + 1 : NULL;
}
*error_str = ralloc_strdup(mem_ctx, fail_msg);
} else {
fs_generator g(compiler, log_data, mem_ctx, &prog_data->base,
- v->shader_stats, v->runtime_check_aads_emit,
- MESA_SHADER_COMPUTE);
+ v->runtime_check_aads_emit, MESA_SHADER_COMPUTE);
if (INTEL_DEBUG & DEBUG_CS) {
char *name = ralloc_asprintf(mem_ctx, "%s compute shader %s",
src_shader->info.label ?
g.enable_debug(name);
}
- g.generate_code(v->cfg, prog_data->simd_size, stats);
+ g.generate_code(v->cfg, prog_data->simd_size, v->shader_stats, stats);
ret = g.get_assembly();
}