From: Dave Airlie Date: Thu, 27 Aug 2015 00:46:33 +0000 (+0100) Subject: st/mesa: reduce time spent in calculating temp read/writes X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=aee73f2942eff2ffb4a0497ac81f01a3b00294b8;p=mesa.git st/mesa: reduce time spent in calculating temp read/writes The glsl->tgsi convertor does some temporary register reduction however in profiling shader-db this shows up quite highly, so optimise things to reduce the number of loops through all the instructions we do. This drops merge_registers from 4-5% on the profile to 1%. I think this can be reduced further by possibly optimising the renumber pass. Acked-by: Marek Olšák --- diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index db6a5a417ea..9174b41d8c9 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -480,10 +480,9 @@ public: void simplify_cmp(void); void rename_temp_register(int index, int new_index); - int get_first_temp_read(int index); - int get_first_temp_write(int index); - int get_last_temp_read(int index); - int get_last_temp_write(int index); + void get_first_temp_read(int *first_reads); + void get_last_temp_read_first_temp_write(int *last_reads, int *first_writes); + void get_last_temp_write(int *last_writes); void copy_propagate(void); int eliminate_dead_code(void); @@ -3714,8 +3713,8 @@ glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index) } } -int -glsl_to_tgsi_visitor::get_first_temp_read(int index) +void +glsl_to_tgsi_visitor::get_first_temp_read(int *first_reads) { int depth = 0; /* loop depth */ int loop_start = -1; /* index of the first active BGNLOOP (if any) */ @@ -3723,15 +3722,15 @@ glsl_to_tgsi_visitor::get_first_temp_read(int index) foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { for (j = 0; j < num_inst_src_regs(inst); j++) { - if (inst->src[j].file == PROGRAM_TEMPORARY && - inst->src[j].index == index) { - return (depth == 0) ? i : loop_start; + if (inst->src[j].file == PROGRAM_TEMPORARY) { + if (first_reads[inst->src[j].index] == -1) + first_reads[inst->src[j].index] = (depth == 0) ? i : loop_start; } } for (j = 0; j < inst->tex_offset_num_offset; j++) { - if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY && - inst->tex_offsets[j].index == index) { - return (depth == 0) ? i : loop_start; + if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY) { + if (first_reads[inst->tex_offsets[j].index] == -1) + first_reads[inst->tex_offsets[j].index] = (depth == 0) ? i : loop_start; } } if (inst->op == TGSI_OPCODE_BGNLOOP) { @@ -3744,91 +3743,73 @@ glsl_to_tgsi_visitor::get_first_temp_read(int index) assert(depth >= 0); i++; } - return -1; } -int -glsl_to_tgsi_visitor::get_first_temp_write(int index) +void +glsl_to_tgsi_visitor::get_last_temp_read_first_temp_write(int *last_reads, int *first_writes) { int depth = 0; /* loop depth */ int loop_start = -1; /* index of the first active BGNLOOP (if any) */ - int i = 0; - unsigned j; - + unsigned i = 0, j; + int k; foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { + for (j = 0; j < num_inst_src_regs(inst); j++) { + if (inst->src[j].file == PROGRAM_TEMPORARY) + last_reads[inst->src[j].index] = (depth == 0) ? i : -2; + } for (j = 0; j < num_inst_dst_regs(inst); j++) { - if (inst->dst[j].file == PROGRAM_TEMPORARY && inst->dst[j].index == index) { - return (depth == 0) ? i : loop_start; - } + if (inst->dst[j].file == PROGRAM_TEMPORARY) + if (first_writes[inst->dst[j].index] == -1) + first_writes[inst->dst[j].index] = (depth == 0) ? i : loop_start; + } + for (j = 0; j < inst->tex_offset_num_offset; j++) { + if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY) + last_reads[inst->tex_offsets[j].index] = (depth == 0) ? i : -2; } if (inst->op == TGSI_OPCODE_BGNLOOP) { if(depth++ == 0) loop_start = i; } else if (inst->op == TGSI_OPCODE_ENDLOOP) { - if (--depth == 0) + if (--depth == 0) { loop_start = -1; - } - assert(depth >= 0); - i++; - } - return -1; -} - -int -glsl_to_tgsi_visitor::get_last_temp_read(int index) -{ - int depth = 0; /* loop depth */ - int last = -1; /* index of last instruction that reads the temporary */ - unsigned i = 0, j; - - foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { - for (j = 0; j < num_inst_src_regs(inst); j++) { - if (inst->src[j].file == PROGRAM_TEMPORARY && - inst->src[j].index == index) { - last = (depth == 0) ? i : -2; + for (k = 0; k < this->next_temp; k++) { + if (last_reads[k] == -2) { + last_reads[k] = i; + } + } } } - for (j = 0; j < inst->tex_offset_num_offset; j++) { - if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY && - inst->tex_offsets[j].index == index) - last = (depth == 0) ? i : -2; - } - if (inst->op == TGSI_OPCODE_BGNLOOP) - depth++; - else if (inst->op == TGSI_OPCODE_ENDLOOP) - if (--depth == 0 && last == -2) - last = i; assert(depth >= 0); i++; } - assert(last >= -1); - return last; } -int -glsl_to_tgsi_visitor::get_last_temp_write(int index) +void +glsl_to_tgsi_visitor::get_last_temp_write(int *last_writes) { int depth = 0; /* loop depth */ - int last = -1; /* index of last instruction that writes to the temporary */ - int i = 0; + int i = 0, k; unsigned j; foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { for (j = 0; j < num_inst_dst_regs(inst); j++) { - if (inst->dst[j].file == PROGRAM_TEMPORARY && inst->dst[j].index == index) - last = (depth == 0) ? i : -2; + if (inst->dst[j].file == PROGRAM_TEMPORARY) + last_writes[inst->dst[j].index] = (depth == 0) ? i : -2; } if (inst->op == TGSI_OPCODE_BGNLOOP) depth++; else if (inst->op == TGSI_OPCODE_ENDLOOP) - if (--depth == 0 && last == -2) - last = i; + if (--depth == 0) { + for (k = 0; k < this->next_temp; k++) { + if (last_writes[k] == -2) { + last_writes[k] = i; + } + } + } assert(depth >= 0); i++; } - assert(last >= -1); - return last; } /* @@ -4264,9 +4245,10 @@ glsl_to_tgsi_visitor::merge_registers(void) * into an array so that we don't have to traverse the instruction list as * much. */ for (i = 0; i < this->next_temp; i++) { - last_reads[i] = get_last_temp_read(i); - first_writes[i] = get_first_temp_write(i); + last_reads[i] = -1; + first_writes[i] = -1; } + get_last_temp_read_first_temp_write(last_reads, first_writes); /* Start looking for registers with non-overlapping usages that can be * merged together. */ @@ -4307,15 +4289,21 @@ glsl_to_tgsi_visitor::renumber_registers(void) { int i = 0; int new_index = 0; + int *first_reads = rzalloc_array(mem_ctx, int, this->next_temp); + + for (i = 0; i < this->next_temp; i++) + first_reads[i] = -1; + get_first_temp_read(first_reads); for (i = 0; i < this->next_temp; i++) { - if (get_first_temp_read(i) < 0) continue; + if (first_reads[i] < 0) continue; if (i != new_index) rename_temp_register(i, new_index); new_index++; } this->next_temp = new_index; + ralloc_free(first_reads); } /** @@ -5790,14 +5778,31 @@ get_mesa_program(struct gl_context *ctx, #if 0 /* Print out some information (for debugging purposes) used by the * optimization passes. */ - for (i = 0; i < v->next_temp; i++) { - int fr = v->get_first_temp_read(i); - int fw = v->get_first_temp_write(i); - int lr = v->get_last_temp_read(i); - int lw = v->get_last_temp_write(i); - - printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, fr, fw, lr, lw); - assert(fw <= fr); + { + int i; + int *first_writes = rzalloc_array(v->mem_ctx, int, v->next_temp); + int *first_reads = rzalloc_array(v->mem_ctx, int, v->next_temp); + int *last_writes = rzalloc_array(v->mem_ctx, int, v->next_temp); + int *last_reads = rzalloc_array(v->mem_ctx, int, v->next_temp); + + for (i = 0; i < v->next_temp; i++) { + first_writes[i] = -1; + first_reads[i] = -1; + last_writes[i] = -1; + last_reads[i] = -1; + } + v->get_first_temp_read(first_reads); + v->get_last_temp_read_first_temp_write(last_reads, first_writes); + v->get_last_temp_write(last_writes); + for (i = 0; i < v->next_temp; i++) + printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, first_reads[i], + first_writes[i], + last_reads[i], + last_writes[i]); + ralloc_free(first_writes); + ralloc_free(first_reads); + ralloc_free(last_writes); + ralloc_free(last_reads); } #endif