From 32118cfe37495738ed5931c6b1a71b8ee2ad189c Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 6 Dec 2011 12:13:32 -0800 Subject: [PATCH] i965: Don't make consumers of brw_DO()/brw_WHILE() track loop start. This is a similar cleanup to what we did for brw_IF(), brw_ELSE(), brw_ENDIF() handling. Reviewed-by: Yuanhan Liu --- src/mesa/drivers/dri/i965/brw_clip_line.c | 5 ++-- src/mesa/drivers/dri/i965/brw_clip_tri.c | 15 ++++------ src/mesa/drivers/dri/i965/brw_clip_unfilled.c | 14 ++++----- src/mesa/drivers/dri/i965/brw_eu.c | 4 +++ src/mesa/drivers/dri/i965/brw_eu.h | 12 ++++++-- src/mesa/drivers/dri/i965/brw_eu_emit.c | 30 +++++++++++++++++-- src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 2 +- src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 2 +- src/mesa/drivers/dri/i965/brw_vs_emit.c | 2 +- 9 files changed, 58 insertions(+), 28 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_clip_line.c b/src/mesa/drivers/dri/i965/brw_clip_line.c index 614849a1917..6cf2bd293cf 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_line.c +++ b/src/mesa/drivers/dri/i965/brw_clip_line.c @@ -132,7 +132,6 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) struct brw_indirect newvtx0 = brw_indirect(2, 0); struct brw_indirect newvtx1 = brw_indirect(3, 0); struct brw_indirect plane_ptr = brw_indirect(4, 0); - struct brw_instruction *plane_loop; struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD); GLuint hpos_offset = brw_vert_result_to_offset(&c->vue_map, VERT_RESULT_HPOS); @@ -160,7 +159,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) brw_set_predicate_control(p, BRW_PREDICATE_NONE); - plane_loop = brw_DO(p, BRW_EXECUTE_1); + brw_DO(p, BRW_EXECUTE_1); { /* if (planemask & 1) */ @@ -245,7 +244,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1)); } - brw_WHILE(p, plane_loop); + brw_WHILE(p); brw_ADD(p, c->reg.t, c->reg.t0, c->reg.t1); brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.t, brw_imm_f(1.0)); diff --git a/src/mesa/drivers/dri/i965/brw_clip_tri.c b/src/mesa/drivers/dri/i965/brw_clip_tri.c index 12d67242ef3..a29f8e05b34 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_tri.c +++ b/src/mesa/drivers/dri/i965/brw_clip_tri.c @@ -232,8 +232,6 @@ void brw_clip_tri( struct brw_clip_compile *c ) struct brw_indirect inlist_ptr = brw_indirect(4, 0); struct brw_indirect outlist_ptr = brw_indirect(5, 0); struct brw_indirect freelist_ptr = brw_indirect(6, 0); - struct brw_instruction *plane_loop; - struct brw_instruction *vertex_loop; GLuint hpos_offset = brw_vert_result_to_offset(&c->vue_map, VERT_RESULT_HPOS); @@ -244,7 +242,7 @@ void brw_clip_tri( struct brw_clip_compile *c ) brw_MOV(p, get_addr_reg(freelist_ptr), brw_address(c->reg.vertex[3]) ); - plane_loop = brw_DO(p, BRW_EXECUTE_1); + brw_DO(p, BRW_EXECUTE_1); { /* if (planemask & 1) */ @@ -266,7 +264,7 @@ void brw_clip_tri( struct brw_clip_compile *c ) brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); brw_MOV(p, c->reg.nr_verts, brw_imm_ud(0)); - vertex_loop = brw_DO(p, BRW_EXECUTE_1); + brw_DO(p, BRW_EXECUTE_1); { /* vtx = *input_ptr; */ @@ -364,7 +362,7 @@ void brw_clip_tri( struct brw_clip_compile *c ) brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); } - brw_WHILE(p, vertex_loop); + brw_WHILE(p); /* vtxPrev = *(outlist_ptr-1) OR: outlist[nr_verts-1] * inlist = outlist @@ -396,7 +394,7 @@ void brw_clip_tri( struct brw_clip_compile *c ) brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1)); } - brw_WHILE(p, plane_loop); + brw_WHILE(p); } @@ -404,7 +402,6 @@ void brw_clip_tri( struct brw_clip_compile *c ) void brw_clip_tri_emit_polygon(struct brw_clip_compile *c) { struct brw_compile *p = &c->func; - struct brw_instruction *loop; /* for (loopcount = nr_verts-2; loopcount > 0; loopcount--) */ @@ -429,7 +426,7 @@ void brw_clip_tri_emit_polygon(struct brw_clip_compile *c) brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2)); brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); - loop = brw_DO(p, BRW_EXECUTE_1); + brw_DO(p, BRW_EXECUTE_1); { brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_TRIFAN << URB_WRITE_PRIM_TYPE_SHIFT)); @@ -440,7 +437,7 @@ void brw_clip_tri_emit_polygon(struct brw_clip_compile *c) brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); } - brw_WHILE(p, loop); + brw_WHILE(p); brw_clip_emit_vue(c, v0, 0, 1, ((_3DPRIM_TRIFAN << URB_WRITE_PRIM_TYPE_SHIFT) diff --git a/src/mesa/drivers/dri/i965/brw_clip_unfilled.c b/src/mesa/drivers/dri/i965/brw_clip_unfilled.c index 01f14b091be..03c7d428bd2 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_unfilled.c +++ b/src/mesa/drivers/dri/i965/brw_clip_unfilled.c @@ -273,7 +273,6 @@ static void emit_lines(struct brw_clip_compile *c, bool do_offset) { struct brw_compile *p = &c->func; - struct brw_instruction *loop; struct brw_indirect v0 = brw_indirect(0, 0); struct brw_indirect v1 = brw_indirect(1, 0); struct brw_indirect v0ptr = brw_indirect(2, 0); @@ -285,7 +284,7 @@ static void emit_lines(struct brw_clip_compile *c, brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist)); - loop = brw_DO(p, BRW_EXECUTE_1); + brw_DO(p, BRW_EXECUTE_1); { brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); @@ -295,7 +294,7 @@ static void emit_lines(struct brw_clip_compile *c, brw_set_conditionalmod(p, BRW_CONDITIONAL_G); brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); } - brw_WHILE(p, loop); + brw_WHILE(p); } /* v1ptr = &inlist[nr_verts] @@ -307,7 +306,7 @@ static void emit_lines(struct brw_clip_compile *c, brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v1ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW)); brw_MOV(p, deref_1uw(v1ptr, 0), deref_1uw(v0ptr, 0)); - loop = brw_DO(p, BRW_EXECUTE_1); + brw_DO(p, BRW_EXECUTE_1); { brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); brw_MOV(p, get_addr_reg(v1), deref_1uw(v0ptr, 2)); @@ -333,7 +332,7 @@ static void emit_lines(struct brw_clip_compile *c, brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); } - brw_WHILE(p, loop); + brw_WHILE(p); } @@ -342,7 +341,6 @@ static void emit_points(struct brw_clip_compile *c, bool do_offset ) { struct brw_compile *p = &c->func; - struct brw_instruction *loop; struct brw_indirect v0 = brw_indirect(0, 0); struct brw_indirect v0ptr = brw_indirect(2, 0); @@ -350,7 +348,7 @@ static void emit_points(struct brw_clip_compile *c, brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist)); - loop = brw_DO(p, BRW_EXECUTE_1); + brw_DO(p, BRW_EXECUTE_1); { brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); @@ -376,7 +374,7 @@ static void emit_points(struct brw_clip_compile *c, brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); } - brw_WHILE(p, loop); + brw_WHILE(p); } diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c index b5a858b78a4..c0126ff9ffb 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.c +++ b/src/mesa/drivers/dri/i965/brw_eu.c @@ -193,6 +193,10 @@ brw_init_compile(struct brw_context *brw, struct brw_compile *p, void *mem_ctx) p->if_stack_array_size = 16; p->if_stack = rzalloc_array(mem_ctx, struct brw_instruction *, p->if_stack_array_size); + + p->loop_stack_depth = 0; + p->loop_stack_array_size = 16; + p->loop_stack = rzalloc_array(mem_ctx, int, p->loop_stack_array_size); } diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 481fbf1c7b6..8d06fefb573 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -128,6 +128,15 @@ struct brw_compile { int if_stack_depth; int if_stack_array_size; + /** + * loop_stack contains the instruction pointers of the starts of loops which + * must be patched (and popped) once the matching WHILE instruction is + * encountered. + */ + int *loop_stack; + int loop_stack_depth; + int loop_stack_array_size; + struct brw_glsl_label *first_label; /**< linked list of labels */ struct brw_glsl_call *first_call; /**< linked list of CALs */ }; @@ -1015,8 +1024,7 @@ void brw_ENDIF(struct brw_compile *p); struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size); -struct brw_instruction *brw_WHILE(struct brw_compile *p, - struct brw_instruction *patch_insn); +struct brw_instruction *brw_WHILE(struct brw_compile *p); struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count); struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count); diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 6247e4c4b39..5f92075a1b5 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -911,6 +911,25 @@ push_if_stack(struct brw_compile *p, struct brw_instruction *inst) } } +static void +push_loop_stack(struct brw_compile *p, struct brw_instruction *inst) +{ + if (p->loop_stack_array_size < p->loop_stack_depth) { + p->loop_stack_array_size *= 2; + p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int, + p->loop_stack_array_size); + } + + p->loop_stack[p->loop_stack_depth] = inst - p->store; + p->loop_stack_depth++; +} + +static struct brw_instruction * +get_inner_do_insn(struct brw_compile *p) +{ + return &p->store[p->loop_stack[p->loop_stack_depth - 1]]; +} + /* EU takes the value from the flag register and pushes it onto some * sort of a stack (presumably merging with any flag value already on * the stack). Within an if block, the flags at the top of the stack @@ -1301,10 +1320,13 @@ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) struct intel_context *intel = &p->brw->intel; if (intel->gen >= 6 || p->single_program_flow) { + push_loop_stack(p, &p->store[p->nr_insn]); return &p->store[p->nr_insn]; } else { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO); + push_loop_stack(p, insn); + /* Override the defaults for this instruction: */ brw_set_dest(p, insn, brw_null_reg()); @@ -1323,13 +1345,15 @@ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) -struct brw_instruction *brw_WHILE(struct brw_compile *p, - struct brw_instruction *do_insn) +struct brw_instruction *brw_WHILE(struct brw_compile *p) { struct intel_context *intel = &p->brw->intel; - struct brw_instruction *insn; + struct brw_instruction *insn, *do_insn; GLuint br = 1; + do_insn = get_inner_do_insn(p); + p->loop_stack_depth--; + if (intel->gen >= 5) br = 2; diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index 2f5a026c54f..ded58a2db91 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -828,7 +828,7 @@ fs_visitor::generate_code() assert(loop_stack_depth > 0); loop_stack_depth--; - inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]); + inst0 = inst1 = brw_WHILE(p); if (intel->gen < 6) { /* patch all the BREAK/CONT instructions from last BGNLOOP */ while (inst0 > loop_stack[loop_stack_depth]) { diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index ef1ca3dd84a..a7eba216cf9 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -964,7 +964,7 @@ vec4_visitor::generate_code() assert(loop_stack_depth > 0); loop_stack_depth--; - inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]); + inst0 = inst1 = brw_WHILE(p); if (intel->gen < 6) { /* patch all the BREAK/CONT instructions from last BGNLOOP */ while (inst0 > loop_stack[loop_stack_depth]) { diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 6c96a48ce50..05f3ecba1d4 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -2123,7 +2123,7 @@ void brw_old_vs_emit(struct brw_vs_compile *c ) if (intel->gen == 5) br = 2; - inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]); + inst0 = inst1 = brw_WHILE(p); if (intel->gen < 6) { /* patch all the BREAK/CONT instructions from last BEGINLOOP */ -- 2.30.2