From ce6be334bbf7f44c71ad5d190f9fb075d2f9a38c Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 6 Dec 2011 12:30:03 -0800 Subject: [PATCH] i965: Don't make consumers of brw_WHILE do pre-gen6 BREAK/CONT patching. The EU code itself can just do this work, since all the consumers were duplicating it. Reviewed-by: Yuanhan Liu --- src/mesa/drivers/dri/i965/brw_eu_emit.c | 35 +++++++++++++++++++-- src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 33 +++---------------- src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 33 +++---------------- src/mesa/drivers/dri/i965/brw_vs_emit.c | 30 +++--------------- 4 files changed, 45 insertions(+), 86 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 5f92075a1b5..30ae3bb2ac0 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -1343,7 +1343,35 @@ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) } } - +/** + * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE + * instruction here. + * + * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop + * nesting, since it can always just point to the end of the block/current loop. + */ +static void +brw_patch_break_cont(struct brw_compile *p, struct brw_instruction *while_inst) +{ + struct intel_context *intel = &p->brw->intel; + struct brw_instruction *do_inst = get_inner_do_insn(p); + struct brw_instruction *inst; + int br = (intel->gen == 5) ? 2 : 1; + + for (inst = while_inst - 1; inst != do_inst; inst--) { + /* If the jump count is != 0, that means that this instruction has already + * been patched because it's part of a loop inside of the one we're + * patching. + */ + if (inst->header.opcode == BRW_OPCODE_BREAK && + inst->bits3.if_else.jump_count == 0) { + inst->bits3.if_else.jump_count = br * ((while_inst - inst) + 1); + } else if (inst->header.opcode == BRW_OPCODE_CONTINUE && + inst->bits3.if_else.jump_count == 0) { + inst->bits3.if_else.jump_count = br * (while_inst - inst); + } + } +} struct brw_instruction *brw_WHILE(struct brw_compile *p) { @@ -1352,7 +1380,6 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p) GLuint br = 1; do_insn = get_inner_do_insn(p); - p->loop_stack_depth--; if (intel->gen >= 5) br = 2; @@ -1396,11 +1423,15 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p) insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); insn->bits3.if_else.pop_count = 0; insn->bits3.if_else.pad0 = 0; + + brw_patch_break_cont(p, insn); } } insn->header.compression_control = BRW_COMPRESSION_NONE; p->current->header.predicate_control = BRW_PREDICATE_NONE; + p->loop_stack_depth--; + return insn; } diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index ded58a2db91..91e6961982f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -658,8 +658,6 @@ fs_visitor::generate_code() int loop_stack_array_size = 16; int loop_stack_depth = 0; - brw_instruction **loop_stack = - rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size); int *if_depth_in_loop = rzalloc_array(this->mem_ctx, int, loop_stack_array_size); @@ -795,11 +793,10 @@ fs_visitor::generate_code() break; case BRW_OPCODE_DO: - loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8); + brw_DO(p, BRW_EXECUTE_8); + loop_stack_depth++; if (loop_stack_array_size <= loop_stack_depth) { loop_stack_array_size *= 2; - loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *, - loop_stack_array_size); if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int, loop_stack_array_size); } @@ -819,31 +816,10 @@ fs_visitor::generate_code() brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; - case BRW_OPCODE_WHILE: { - struct brw_instruction *inst0, *inst1; - GLuint br = 1; - - if (intel->gen >= 5) - br = 2; - + case BRW_OPCODE_WHILE: assert(loop_stack_depth > 0); loop_stack_depth--; - inst0 = inst1 = brw_WHILE(p); - if (intel->gen < 6) { - /* patch all the BREAK/CONT instructions from last BGNLOOP */ - while (inst0 > loop_stack[loop_stack_depth]) { - inst0--; - if (inst0->header.opcode == BRW_OPCODE_BREAK && - inst0->bits3.if_else.jump_count == 0) { - inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); - } - else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && - inst0->bits3.if_else.jump_count == 0) { - inst0->bits3.if_else.jump_count = br * (inst1 - inst0); - } - } - } - } + brw_WHILE(p); break; case SHADER_OPCODE_RCP: @@ -947,7 +923,6 @@ fs_visitor::generate_code() printf("\n"); } - ralloc_free(loop_stack); ralloc_free(if_depth_in_loop); brw_set_uip_jip(p); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index a7eba216cf9..8ea5c14075c 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -800,8 +800,6 @@ vec4_visitor::generate_code() int loop_stack_array_size = 16; int loop_stack_depth = 0; - brw_instruction **loop_stack = - rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size); int *if_depth_in_loop = rzalloc_array(this->mem_ctx, int, loop_stack_array_size); @@ -931,11 +929,10 @@ vec4_visitor::generate_code() break; case BRW_OPCODE_DO: - loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8); + brw_DO(p, BRW_EXECUTE_8); + loop_stack_depth++; if (loop_stack_array_size <= loop_stack_depth) { loop_stack_array_size *= 2; - loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *, - loop_stack_array_size); if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int, loop_stack_array_size); } @@ -955,31 +952,10 @@ vec4_visitor::generate_code() brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; - case BRW_OPCODE_WHILE: { - struct brw_instruction *inst0, *inst1; - GLuint br = 1; - - if (intel->gen >= 5) - br = 2; - + case BRW_OPCODE_WHILE: assert(loop_stack_depth > 0); loop_stack_depth--; - inst0 = inst1 = brw_WHILE(p); - if (intel->gen < 6) { - /* patch all the BREAK/CONT instructions from last BGNLOOP */ - while (inst0 > loop_stack[loop_stack_depth]) { - inst0--; - if (inst0->header.opcode == BRW_OPCODE_BREAK && - inst0->bits3.if_else.jump_count == 0) { - inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); - } - else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && - inst0->bits3.if_else.jump_count == 0) { - inst0->bits3.if_else.jump_count = br * (inst1 - inst0); - } - } - } - } + brw_WHILE(p); break; default: @@ -1007,7 +983,6 @@ vec4_visitor::generate_code() printf("\n"); } - ralloc_free(loop_stack); ralloc_free(if_depth_in_loop); brw_set_uip_jip(p); diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 05f3ecba1d4..62adc54c524 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -1844,7 +1844,6 @@ void brw_old_vs_emit(struct brw_vs_compile *c ) struct intel_context *intel = &brw->intel; const GLuint nr_insns = c->vp->program.Base.NumInstructions; GLuint insn, loop_depth = 0; - struct brw_instruction *loop_inst[MAX_LOOP_DEPTH] = { 0 }; int if_depth_in_loop[MAX_LOOP_DEPTH]; const struct brw_indirect stack_index = brw_indirect(0, 0); GLuint index; @@ -2095,7 +2094,8 @@ void brw_old_vs_emit(struct brw_vs_compile *c ) break; case OPCODE_BGNLOOP: clear_current_const(c); - loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8); + brw_DO(p, BRW_EXECUTE_8); + loop_depth++; if_depth_in_loop[loop_depth] = 0; break; case OPCODE_BRK: @@ -2113,32 +2113,10 @@ void brw_old_vs_emit(struct brw_vs_compile *c ) brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; - case OPCODE_ENDLOOP: { + case OPCODE_ENDLOOP: clear_current_const(c); - struct brw_instruction *inst0, *inst1; - GLuint br = 1; - loop_depth--; - - if (intel->gen == 5) - br = 2; - - inst0 = inst1 = brw_WHILE(p); - - if (intel->gen < 6) { - /* patch all the BREAK/CONT instructions from last BEGINLOOP */ - while (inst0 > loop_inst[loop_depth]) { - inst0--; - if (inst0->header.opcode == BRW_OPCODE_BREAK && - inst0->bits3.if_else.jump_count == 0) { - inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); - } else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && - inst0->bits3.if_else.jump_count == 0) { - inst0->bits3.if_else.jump_count = br * (inst1 - inst0); - } - } - } - } + brw_WHILE(p); break; case OPCODE_BRA: -- 2.30.2