From 2ff0ff880c14f246a419ae3949b2462617e485e1 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Mon, 1 Sep 2014 13:35:04 -0700 Subject: [PATCH] i965/fs: Don't use instruction list after calculating the cfg. The only trick is changing a break into a return true in register coalescing, since the macro is actually a double loop, and break will do something different than you expect. (Wish I'd realized that earlier!) Reviewed-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/brw_fs.cpp | 22 +++++++++---------- .../drivers/dri/i965/brw_fs_reg_allocate.cpp | 10 ++++----- .../dri/i965/brw_fs_register_coalesce.cpp | 12 +++++----- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 5c78b7ae3c7..e3bb807f9cb 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1478,7 +1478,7 @@ fs_visitor::assign_curb_setup() prog_data->curb_read_length = ALIGN(stage_prog_data->nr_params, 8) / 8; /* Map the offsets in the UNIFORM file to fixed HW regs. */ - foreach_in_list(fs_inst, inst, &instructions) { + foreach_block_and_inst(block, fs_inst, inst, cfg) { for (unsigned int i = 0; i < inst->sources; i++) { if (inst->src[i].file == UNIFORM) { int uniform_nr = inst->src[i].reg + inst->src[i].reg_offset; @@ -1607,7 +1607,7 @@ fs_visitor::assign_urb_setup() /* Offset all the urb_setup[] index by the actual position of the * setup regs, now that the location of the constants has been chosen. */ - foreach_in_list(fs_inst, inst, &instructions) { + foreach_block_and_inst(block, fs_inst, inst, cfg) { if (inst->opcode == FS_OPCODE_LINTERP) { assert(inst->src[2].file == HW_REG); inst->src[2].fixed_hw_reg.nr += urb_start; @@ -1668,7 +1668,7 @@ fs_visitor::split_virtual_grfs() false; } - foreach_in_list(fs_inst, inst, &instructions) { + foreach_block_and_inst(block, fs_inst, inst, cfg) { /* If there's a SEND message that requires contiguous destination * registers, no splitting is allowed. */ @@ -1703,7 +1703,7 @@ fs_visitor::split_virtual_grfs() } } - foreach_in_list(fs_inst, inst, &instructions) { + foreach_block_and_inst(block, fs_inst, inst, cfg) { if (inst->dst.file == GRF && split_grf[inst->dst.reg] && inst->dst.reg_offset != 0) { @@ -1743,7 +1743,7 @@ fs_visitor::compact_virtual_grfs() int remap_table[this->virtual_grf_count]; memset(remap_table, -1, sizeof(remap_table)); - foreach_in_list(const fs_inst, inst, &instructions) { + foreach_block_and_inst(block, const fs_inst, inst, cfg) { if (inst->dst.file == GRF) remap_table[inst->dst.reg] = 0; @@ -1767,7 +1767,7 @@ fs_visitor::compact_virtual_grfs() this->virtual_grf_count = new_index; /* Patch all the instructions to use the newly renumbered registers */ - foreach_in_list(fs_inst, inst, &instructions) { + foreach_block_and_inst(block, fs_inst, inst, cfg) { if (inst->dst.file == GRF) inst->dst.reg = remap_table[inst->dst.reg]; @@ -1831,7 +1831,7 @@ fs_visitor::move_uniform_array_access_to_pull_constants() * Note that we don't move constant-indexed accesses to arrays. No * testing has been done of the performance impact of this choice. */ - foreach_in_list_safe(fs_inst, inst, &instructions) { + foreach_block_and_inst_safe(block, fs_inst, inst, cfg) { for (int i = 0 ; i < inst->sources; i++) { if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr) continue; @@ -1879,7 +1879,7 @@ fs_visitor::assign_constant_locations() is_live[i] = false; } - foreach_in_list(fs_inst, inst, &instructions) { + foreach_block_and_inst(block, fs_inst, inst, cfg) { for (int i = 0; i < inst->sources; i++) { if (inst->src[i].file != UNIFORM) continue; @@ -1999,7 +1999,7 @@ fs_visitor::opt_algebraic() { bool progress = false; - foreach_in_list(fs_inst, inst, &instructions) { + foreach_block_and_inst(block, fs_inst, inst, cfg) { switch (inst->opcode) { case BRW_OPCODE_MUL: if (inst->src[1].file != IMM) @@ -2112,7 +2112,7 @@ fs_visitor::opt_register_renaming() int remap[virtual_grf_count]; memset(remap, -1, sizeof(int) * virtual_grf_count); - foreach_in_list(fs_inst, inst, &this->instructions) { + foreach_block_and_inst(block, fs_inst, inst, cfg) { if (inst->opcode == BRW_OPCODE_IF || inst->opcode == BRW_OPCODE_DO) { depth++; } else if (inst->opcode == BRW_OPCODE_ENDIF || @@ -2827,7 +2827,7 @@ fs_visitor::dump_instructions(const char *name) } int ip = 0, max_pressure = 0; - foreach_in_list(backend_instruction, inst, &instructions) { + foreach_block_and_inst(block, backend_instruction, inst, cfg) { max_pressure = MAX2(max_pressure, regs_live_at_ip[ip]); fprintf(file, "{%3d} %4d: ", regs_live_at_ip[ip], ip); dump_instruction(inst, file); diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index eb443d22d83..24c3d3a21d6 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -55,7 +55,7 @@ fs_visitor::assign_regs_trivial() } this->grf_used = hw_reg_mapping[this->virtual_grf_count]; - foreach_in_list(fs_inst, inst, &instructions) { + foreach_block_and_inst(block, fs_inst, inst, cfg) { assign_reg(hw_reg_mapping, &inst->dst, reg_width); for (i = 0; i < inst->sources; i++) { assign_reg(hw_reg_mapping, &inst->src[i], reg_width); @@ -242,7 +242,7 @@ fs_visitor::setup_payload_interference(struct ra_graph *g, int payload_last_use_ip[payload_node_count]; memset(payload_last_use_ip, 0, sizeof(payload_last_use_ip)); int ip = 0; - foreach_in_list(fs_inst, inst, &instructions) { + foreach_block_and_inst(block, fs_inst, inst, cfg) { switch (inst->opcode) { case BRW_OPCODE_DO: loop_depth++; @@ -362,7 +362,7 @@ fs_visitor::get_used_mrfs(bool *mrf_used) memset(mrf_used, 0, BRW_MAX_MRF * sizeof(bool)); - foreach_in_list(fs_inst, inst, &instructions) { + foreach_block_and_inst(block, fs_inst, inst, cfg) { if (inst->dst.file == MRF) { int reg = inst->dst.reg & ~BRW_MRF_COMPR4; mrf_used[reg] = true; @@ -520,7 +520,7 @@ fs_visitor::assign_regs(bool allow_spilling) reg_width); } - foreach_in_list(fs_inst, inst, &instructions) { + foreach_block_and_inst(block, fs_inst, inst, cfg) { assign_reg(hw_reg_mapping, &inst->dst, reg_width); for (int i = 0; i < inst->sources; i++) { assign_reg(hw_reg_mapping, &inst->src[i], reg_width); @@ -578,7 +578,7 @@ fs_visitor::choose_spill_reg(struct ra_graph *g) * spill/unspill we'll have to do, and guess that the insides of * loops run 10 times. */ - foreach_in_list(fs_inst, inst, &instructions) { + foreach_block_and_inst(block, fs_inst, inst, cfg) { for (unsigned int i = 0; i < inst->sources; i++) { if (inst->src[i].file == GRF) { spill_costs[inst->src[i].reg] += loop_scale; diff --git a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp index b4abb4caa7e..318bfa610d4 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp @@ -113,7 +113,7 @@ is_coalesce_candidate(const fs_inst *inst, const int *virtual_grf_sizes) static bool can_coalesce_vars(brw::fs_live_variables *live_intervals, - const exec_list *instructions, const fs_inst *inst, + const cfg_t *cfg, const fs_inst *inst, int var_to, int var_from) { if (!live_intervals->vars_interfere(var_from, var_to)) @@ -132,7 +132,7 @@ can_coalesce_vars(brw::fs_live_variables *live_intervals, int start_ip = MIN2(start_to, start_from); int scan_ip = -1; - foreach_in_list(fs_inst, scan_inst, instructions) { + foreach_block_and_inst(block, fs_inst, scan_inst, cfg) { scan_ip++; if (scan_ip < start_ip) @@ -145,7 +145,7 @@ can_coalesce_vars(brw::fs_live_variables *live_intervals, continue; if (scan_ip > live_intervals->end[var_to]) - break; + return true; if (scan_inst->dst.equals(inst->dst) || scan_inst->dst.equals(inst->src[0])) @@ -170,7 +170,7 @@ fs_visitor::register_coalesce() int var_to[MAX_SAMPLER_MESSAGE_SIZE]; int var_from[MAX_SAMPLER_MESSAGE_SIZE]; - foreach_in_list(fs_inst, inst, &instructions) { + foreach_block_and_inst(block, fs_inst, inst, cfg) { if (!is_coalesce_candidate(inst, virtual_grf_sizes)) continue; @@ -216,7 +216,7 @@ fs_visitor::register_coalesce() var_to[i] = live_intervals->var_from_vgrf[reg_to] + reg_to_offset[i]; var_from[i] = live_intervals->var_from_vgrf[reg_from] + i; - if (!can_coalesce_vars(live_intervals, &instructions, inst, + if (!can_coalesce_vars(live_intervals, cfg, inst, var_to[i], var_from[i])) { can_coalesce = false; reg_from = -1; @@ -241,7 +241,7 @@ fs_visitor::register_coalesce() } } - foreach_in_list(fs_inst, scan_inst, &instructions) { + foreach_block_and_inst(block, fs_inst, scan_inst, cfg) { for (int i = 0; i < src_size; i++) { if (mov[i] || was_load_payload) { if (scan_inst->dst.file == GRF && -- 2.30.2