From 92ab06e782c31fe0209e5d0181967a2ff6739c9b Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Thu, 4 May 2017 10:35:29 +0200 Subject: [PATCH] st/glsl_to_tgsi: fix renumber_registers() in presence of dead code MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The TGSI DCE pass doesn't eliminate dead assignments like MOV TEMP[0], TEMP[1] in presence of loops because it assumes that the visitor doesn't emit dead code. This assumption is actually wrong and this situation happens. However, it appears that the merge_registers() pass accidentally takes care of this for some weird reasons. But since this pass has been disabled for RadeonSI and Nouveau, the renumber_registers() pass which is called *after*, can't do its job correctly. This is because it assumes that no dead code is present. But if there is still a dead assignment, it might re-use the TEMP register id incorrectly and emits wrong code. This patches fixes the issue by recording writes instead of reads, and this has the advantage to be faster. This should fix Unigine Heaven on RadeonSI and Nouveau. shader-db results with RadeonSI: 47109 shaders in 29632 tests Totals: SGPRS: 1923308 -> 1923316 (0.00 %) VGPRS: 1133843 -> 1133847 (0.00 %) Spilled SGPRs: 2516 -> 2518 (0.08 %) Spilled VGPRs: 65 -> 65 (0.00 %) Private memory VGPRs: 1184 -> 1184 (0.00 %) Scratch size: 1308 -> 1308 (0.00 %) dwords per thread Code Size: 60095968 -> 60096256 (0.00 %) bytes LDS: 1077 -> 1077 (0.00 %) blocks Max Waves: 431889 -> 431889 (0.00 %) Wait states: 0 -> 0 (0.00 %) It's still interesting to disable the merge_registers() pass. Signed-off-by: Samuel Pitoiset Reviewed-by: Nicolai Hähnle --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 39 +++++++++++++++++++--- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 9858673ff44..81c1d00dfbc 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -559,6 +559,7 @@ public: void rename_temp_registers(int num_renames, struct rename_reg_pair *renames); void get_first_temp_read(int *first_reads); + void get_first_temp_write(int *first_writes); void get_last_temp_read_first_temp_write(int *last_reads, int *first_writes); void get_last_temp_write(int *last_writes); @@ -4758,6 +4759,33 @@ glsl_to_tgsi_visitor::rename_temp_registers(int num_renames, struct rename_reg_p } } +void +glsl_to_tgsi_visitor::get_first_temp_write(int *first_writes) +{ + int depth = 0; /* loop depth */ + int loop_start = -1; /* index of the first active BGNLOOP (if any) */ + unsigned i = 0, j; + + foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { + for (j = 0; j < num_inst_dst_regs(inst); j++) { + if (inst->dst[j].file == PROGRAM_TEMPORARY) { + if (first_writes[inst->dst[j].index] == -1) + first_writes[inst->dst[j].index] = (depth == 0) ? i : loop_start; + } + } + + if (inst->op == TGSI_OPCODE_BGNLOOP) { + if(depth++ == 0) + loop_start = i; + } else if (inst->op == TGSI_OPCODE_ENDLOOP) { + if (--depth == 0) + loop_start = -1; + } + assert(depth >= 0); + i++; + } +} + void glsl_to_tgsi_visitor::get_first_temp_read(int *first_reads) { @@ -5347,16 +5375,17 @@ glsl_to_tgsi_visitor::renumber_registers(void) { int i = 0; int new_index = 0; - int *first_reads = rzalloc_array(mem_ctx, int, this->next_temp); + int *first_writes = ralloc_array(mem_ctx, int, this->next_temp); struct rename_reg_pair *renames = rzalloc_array(mem_ctx, struct rename_reg_pair, this->next_temp); int num_renames = 0; + for (i = 0; i < this->next_temp; i++) { - first_reads[i] = -1; + first_writes[i] = -1; } - get_first_temp_read(first_reads); + get_first_temp_write(first_writes); for (i = 0; i < this->next_temp; i++) { - if (first_reads[i] < 0) continue; + if (first_writes[i] < 0) continue; if (i != new_index) { renames[num_renames].old_reg = i; renames[num_renames].new_reg = new_index; @@ -5368,7 +5397,7 @@ glsl_to_tgsi_visitor::renumber_registers(void) rename_temp_registers(num_renames, renames); this->next_temp = new_index; ralloc_free(renames); - ralloc_free(first_reads); + ralloc_free(first_writes); } /* ------------------------- TGSI conversion stuff -------------------------- */ -- 2.30.2