From: Eric Anholt Date: Tue, 30 Apr 2013 22:00:40 +0000 (-0700) Subject: i965/fs: Make virtual grf live intervals actually cover their used range. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=e290372542d0475e612e4d10a27b22eae3158ecd;p=mesa.git i965/fs: Make virtual grf live intervals actually cover their used range. Previously, we would sometimes not consider a write to a register to extend the end of the interval, nor would we consider a read before a write to extend the start. This made for a bunch of complicated logic related to how to treat the results when dead code might be present. Instead, just extend the interval and fix dead code elimination to know how to remove it. Interestingly, this actually results in a tiny bit more optimization: total instructions in shared programs: 1391220 -> 1390799 (-0.03%) instructions in affected programs: 14037 -> 13616 (-3.00%) v2: Fix a theoretical problem with the simd16 workaround if dst == src, where we would revert the bump of the live range. Reviewed-by: Ian Romanick (v1) --- diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 778a69e7091..baaa25c1347 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1456,8 +1456,8 @@ fs_visitor::compact_virtual_grfs() remap_table[i] = new_index; virtual_grf_sizes[new_index] = virtual_grf_sizes[i]; if (live_intervals_valid) { - virtual_grf_use[new_index] = virtual_grf_use[i]; - virtual_grf_def[new_index] = virtual_grf_def[i]; + virtual_grf_start[new_index] = virtual_grf_start[i]; + virtual_grf_end[new_index] = virtual_grf_end[i]; } ++new_index; } @@ -1771,10 +1771,8 @@ fs_visitor::opt_algebraic() } /** - * Must be called after calculate_live_intervales() to remove unused - * writes to registers -- register allocation will fail otherwise - * because something deffed but not used won't be considered to - * interfere with other regs. + * Removes any instructions writing a VGRF where that VGRF is not used by any + * later instruction. */ bool fs_visitor::dead_code_eliminate() @@ -1787,9 +1785,12 @@ fs_visitor::dead_code_eliminate() foreach_list_safe(node, &this->instructions) { fs_inst *inst = (fs_inst *)node; - if (inst->dst.file == GRF && this->virtual_grf_use[inst->dst.reg] <= pc) { - inst->remove(); - progress = true; + if (inst->dst.file == GRF) { + assert(this->virtual_grf_end[inst->dst.reg] >= pc); + if (this->virtual_grf_end[inst->dst.reg] == pc) { + inst->remove(); + progress = true; + } } pc++; @@ -2201,7 +2202,7 @@ fs_visitor::compute_to_mrf() /* Can't compute-to-MRF this GRF if someone else was going to * read it later. */ - if (this->virtual_grf_use[inst->src[0].reg] > ip) + if (this->virtual_grf_end[inst->src[0].reg] > ip) continue; /* Found a move of a GRF to a MRF. Let's see if we can go diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 9a2bcc07685..3d44daf8545 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -430,8 +430,8 @@ public: int *virtual_grf_sizes; int virtual_grf_count; int virtual_grf_array_size; - int *virtual_grf_def; - int *virtual_grf_use; + int *virtual_grf_start; + int *virtual_grf_end; bool live_intervals_valid; /* This is the map from UNIFORM hw_reg + reg_offset as generated by diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp index b5c220090cf..9b60d9be41b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp @@ -194,7 +194,7 @@ fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb) /* Kill any AEB entries using registers that don't get reused any * more -- a sure sign they'll fail operands_match(). */ - if (src_reg->file == GRF && virtual_grf_use[src_reg->reg] < ip) { + if (src_reg->file == GRF && virtual_grf_end[src_reg->reg] < ip) { entry->remove(); ralloc_free(entry); break; diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp index fdcfac61194..3daf8fa7786 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp @@ -167,16 +167,16 @@ fs_visitor::calculate_live_intervals() if (this->live_intervals_valid) return; - int *def = ralloc_array(mem_ctx, int, num_vars); - int *use = ralloc_array(mem_ctx, int, num_vars); - ralloc_free(this->virtual_grf_def); - ralloc_free(this->virtual_grf_use); - this->virtual_grf_def = def; - this->virtual_grf_use = use; + int *start = ralloc_array(mem_ctx, int, num_vars); + int *end = ralloc_array(mem_ctx, int, num_vars); + ralloc_free(this->virtual_grf_start); + ralloc_free(this->virtual_grf_end); + this->virtual_grf_start = start; + this->virtual_grf_end = end; for (int i = 0; i < num_vars; i++) { - def[i] = MAX_INSTRUCTION; - use[i] = -1; + start[i] = MAX_INSTRUCTION; + end[i] = -1; } /* Start by setting up the intervals with no knowledge of control @@ -189,8 +189,7 @@ fs_visitor::calculate_live_intervals() for (unsigned int i = 0; i < 3; i++) { if (inst->src[i].file == GRF) { int reg = inst->src[i].reg; - - use[reg] = ip; + int end_ip = ip; /* In most cases, a register can be written over safely by the * same instruction that is its last use. For a single @@ -220,15 +219,19 @@ fs_visitor::calculate_live_intervals() if (dispatch_width == 16 && (inst->src[i].smear || (this->pixel_x.reg == reg || this->pixel_y.reg == reg))) { - use[reg]++; + end_ip++; } + + start[reg] = MIN2(start[reg], ip); + end[reg] = MAX2(end[reg], end_ip); } } if (inst->dst.file == GRF) { int reg = inst->dst.reg; - def[reg] = MIN2(def[reg], ip); + start[reg] = MIN2(start[reg], ip); + end[reg] = MAX2(end[reg], ip); } ip++; @@ -241,60 +244,23 @@ fs_visitor::calculate_live_intervals() for (int b = 0; b < cfg.num_blocks; b++) { for (int i = 0; i < num_vars; i++) { if (BITSET_TEST(livevars.bd[b].livein, i)) { - def[i] = MIN2(def[i], cfg.blocks[b]->start_ip); - use[i] = MAX2(use[i], cfg.blocks[b]->start_ip); + start[i] = MIN2(start[i], cfg.blocks[b]->start_ip); + end[i] = MAX2(end[i], cfg.blocks[b]->start_ip); } if (BITSET_TEST(livevars.bd[b].liveout, i)) { - def[i] = MIN2(def[i], cfg.blocks[b]->end_ip); - use[i] = MAX2(use[i], cfg.blocks[b]->end_ip); + start[i] = MIN2(start[i], cfg.blocks[b]->end_ip); + end[i] = MAX2(end[i], cfg.blocks[b]->end_ip); } } } this->live_intervals_valid = true; - - /* Note in the non-control-flow code above, that we only take def[] as the - * first store, and use[] as the last use. We use this in dead code - * elimination, to determine when a store never gets used. However, we - * also use these arrays to answer the virtual_grf_interferes() question - * (live interval analysis), which is used for register coalescing and - * register allocation. - * - * So, there's a conflict over what the array should mean: if use[] - * considers a def after the last use, then the dead code elimination pass - * never does anything (and it's an important pass!). But if we don't - * include dead code, then virtual_grf_interferes() lies and we'll do - * horrible things like coalesce the register that is dead-code-written - * into another register that was live across the dead write (causing the - * use of the second register to take the dead write's source value instead - * of the coalesced MOV's source value). - * - * To resolve the conflict, immediately after calculating live intervals, - * detect dead code, nuke it, and if we changed anything, calculate again - * before returning to the caller. Now we happen to produce def[] and - * use[] arrays that will work for virtual_grf_interferes(). - */ - if (dead_code_eliminate()) - calculate_live_intervals(); } bool fs_visitor::virtual_grf_interferes(int a, int b) { - int a_def = this->virtual_grf_def[a], a_use = this->virtual_grf_use[a]; - int b_def = this->virtual_grf_def[b], b_use = this->virtual_grf_use[b]; - - /* If there's dead code (def but not use), it would break our test - * unless we consider it used. - */ - if ((a_use == -1 && a_def != MAX_INSTRUCTION) || - (b_use == -1 && b_def != MAX_INSTRUCTION)) { - return true; - } - - int start = MAX2(a_def, b_def); - int end = MIN2(a_use, b_use); - - return start < end; + return !(virtual_grf_end[a] <= virtual_grf_start[b] || + virtual_grf_end[b] <= virtual_grf_start[a]); } diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index fa1a93820d2..acd98466860 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -316,8 +316,7 @@ fs_visitor::setup_payload_interference(struct ra_graph *g, * in order to not have to worry about the uniform issue described in * calculate_live_intervals(). */ - if (this->virtual_grf_def[j] <= payload_last_use_ip[i] || - this->virtual_grf_use[j] <= payload_last_use_ip[i]) { + if (this->virtual_grf_start[j] <= payload_last_use_ip[i]) { ra_add_node_interference(g, first_payload_node + i, j); } } diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index d2bac2a3def..b7bbaabf699 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -2451,8 +2451,8 @@ fs_visitor::fs_visitor(struct brw_context *brw, this->virtual_grf_sizes = NULL; this->virtual_grf_count = 0; this->virtual_grf_array_size = 0; - this->virtual_grf_def = NULL; - this->virtual_grf_use = NULL; + this->virtual_grf_start = NULL; + this->virtual_grf_end = NULL; this->live_intervals_valid = false; this->force_uncompressed_stack = 0;