From a9e8c10bd76f9a94b878b76bb5ae696beeaae2e0 Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Sat, 10 Aug 2013 21:13:33 -0700 Subject: [PATCH] i965: Combine 4 boolean args of brw_urb_WRITE into a flags bitfield. The arguments to brw_urb_WRITE() were getting pretty unwieldy, and we have to add more flags to support geometry shaders anyhow. Also plumb these flags through brw_clip_emit_vue(), brw_set_urb_message(), and the vec4_instruction class. Reviewed-by: Ian Romanick Reviewed-by: Kenneth Graunke Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_clip.h | 3 +- src/mesa/drivers/dri/i965/brw_clip_line.c | 4 +- src/mesa/drivers/dri/i965/brw_clip_tri.c | 6 +-- src/mesa/drivers/dri/i965/brw_clip_unfilled.c | 6 +-- src/mesa/drivers/dri/i965/brw_clip_util.c | 19 ++++----- src/mesa/drivers/dri/i965/brw_eu.h | 42 +++++++++++++++++-- src/mesa/drivers/dri/i965/brw_eu_emit.c | 32 ++++++-------- src/mesa/drivers/dri/i965/brw_gs_emit.c | 6 +-- src/mesa/drivers/dri/i965/brw_sf_emit.c | 20 ++------- src/mesa/drivers/dri/i965/brw_vec4.h | 2 +- src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 5 +-- .../drivers/dri/i965/brw_vec4_visitor.cpp | 2 +- 12 files changed, 76 insertions(+), 71 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_clip.h b/src/mesa/drivers/dri/i965/brw_clip.h index f26d75d7128..5af0ad3429b 100644 --- a/src/mesa/drivers/dri/i965/brw_clip.h +++ b/src/mesa/drivers/dri/i965/brw_clip.h @@ -173,8 +173,7 @@ void brw_clip_init_planes( struct brw_clip_compile *c ); void brw_clip_emit_vue(struct brw_clip_compile *c, struct brw_indirect vert, - bool allocate, - bool eot, + unsigned flags, GLuint header); void brw_clip_kill_thread(struct brw_clip_compile *c); diff --git a/src/mesa/drivers/dri/i965/brw_clip_line.c b/src/mesa/drivers/dri/i965/brw_clip_line.c index 8466b1c74c8..5238598132b 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_line.c +++ b/src/mesa/drivers/dri/i965/brw_clip_line.c @@ -282,10 +282,10 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) brw_clip_interp_vertex(c, newvtx0, vtx0, vtx1, c->reg.t0, false); brw_clip_interp_vertex(c, newvtx1, vtx1, vtx0, c->reg.t1, false); - brw_clip_emit_vue(c, newvtx0, 1, 0, + brw_clip_emit_vue(c, newvtx0, BRW_URB_WRITE_ALLOCATE_COMPLETE, (_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT) | URB_WRITE_PRIM_START); - brw_clip_emit_vue(c, newvtx1, 0, 1, + brw_clip_emit_vue(c, newvtx1, BRW_URB_WRITE_EOT_COMPLETE, (_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT) | URB_WRITE_PRIM_END); } diff --git a/src/mesa/drivers/dri/i965/brw_clip_tri.c b/src/mesa/drivers/dri/i965/brw_clip_tri.c index 1eeb99510a2..995a8909ccf 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_tri.c +++ b/src/mesa/drivers/dri/i965/brw_clip_tri.c @@ -466,7 +466,7 @@ void brw_clip_tri_emit_polygon(struct brw_clip_compile *c) brw_MOV(p, get_addr_reg(vptr), brw_address(c->reg.inlist)); brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); - brw_clip_emit_vue(c, v0, 1, 0, + brw_clip_emit_vue(c, v0, BRW_URB_WRITE_ALLOCATE_COMPLETE, ((_3DPRIM_TRIFAN << URB_WRITE_PRIM_TYPE_SHIFT) | URB_WRITE_PRIM_START)); @@ -475,7 +475,7 @@ void brw_clip_tri_emit_polygon(struct brw_clip_compile *c) brw_DO(p, BRW_EXECUTE_1); { - brw_clip_emit_vue(c, v0, 1, 0, + brw_clip_emit_vue(c, v0, BRW_URB_WRITE_ALLOCATE_COMPLETE, (_3DPRIM_TRIFAN << URB_WRITE_PRIM_TYPE_SHIFT)); brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2)); @@ -486,7 +486,7 @@ void brw_clip_tri_emit_polygon(struct brw_clip_compile *c) } brw_WHILE(p); - brw_clip_emit_vue(c, v0, 0, 1, + brw_clip_emit_vue(c, v0, BRW_URB_WRITE_EOT_COMPLETE, ((_3DPRIM_TRIFAN << URB_WRITE_PRIM_TYPE_SHIFT) | URB_WRITE_PRIM_END)); } diff --git a/src/mesa/drivers/dri/i965/brw_clip_unfilled.c b/src/mesa/drivers/dri/i965/brw_clip_unfilled.c index af327d68ce7..644c99aebf1 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_unfilled.c +++ b/src/mesa/drivers/dri/i965/brw_clip_unfilled.c @@ -319,10 +319,10 @@ static void emit_lines(struct brw_clip_compile *c, brw_imm_f(0)); brw_IF(p, BRW_EXECUTE_1); { - brw_clip_emit_vue(c, v0, 1, 0, + brw_clip_emit_vue(c, v0, BRW_URB_WRITE_ALLOCATE_COMPLETE, (_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT) | URB_WRITE_PRIM_START); - brw_clip_emit_vue(c, v1, 1, 0, + brw_clip_emit_vue(c, v1, BRW_URB_WRITE_ALLOCATE_COMPLETE, (_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT) | URB_WRITE_PRIM_END); } @@ -364,7 +364,7 @@ static void emit_points(struct brw_clip_compile *c, if (do_offset) apply_one_offset(c, v0); - brw_clip_emit_vue(c, v0, 1, 0, + brw_clip_emit_vue(c, v0, BRW_URB_WRITE_ALLOCATE_COMPLETE, (_3DPRIM_POINTLIST << URB_WRITE_PRIM_TYPE_SHIFT) | URB_WRITE_PRIM_START | URB_WRITE_PRIM_END); } diff --git a/src/mesa/drivers/dri/i965/brw_clip_util.c b/src/mesa/drivers/dri/i965/brw_clip_util.c index 62172ecf6b8..d5c50d7a890 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_util.c +++ b/src/mesa/drivers/dri/i965/brw_clip_util.c @@ -313,15 +313,18 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c, void brw_clip_emit_vue(struct brw_clip_compile *c, struct brw_indirect vert, - bool allocate, - bool eot, + unsigned flags, GLuint header) { struct brw_compile *p = &c->func; + bool allocate = flags & BRW_URB_WRITE_ALLOCATE; brw_clip_ff_sync(c); - assert(!(allocate && eot)); + /* Any URB entry that is allocated must subsequently be used or discarded, + * so it doesn't make sense to mark EOT and ALLOCATE at the same time. + */ + assert(!(allocate && (flags & BRW_URB_WRITE_EOT))); /* Copy the vertex from vertn into m1..mN+1: */ @@ -343,12 +346,9 @@ void brw_clip_emit_vue(struct brw_clip_compile *c, allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), 0, c->reg.R0, - allocate, - 1, /* used */ + flags, c->nr_regs + 1, /* msg length */ allocate ? 1 : 0, /* response_length */ - eot, /* eot */ - 1, /* writes_complete */ 0, /* urb offset */ BRW_URB_SWIZZLE_NONE); } @@ -367,12 +367,9 @@ void brw_clip_kill_thread(struct brw_clip_compile *c) retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), 0, c->reg.R0, - 0, /* allocate */ - 0, /* used */ + BRW_URB_WRITE_UNUSED | BRW_URB_WRITE_EOT_COMPLETE, 1, /* msg len */ 0, /* response len */ - 1, /* eot */ - 1, /* writes complete */ 0, BRW_URB_SWIZZLE_NONE); } diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 0e08e89eecc..ae4cab56637 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -228,16 +228,50 @@ void brw_set_dp_write_message(struct brw_compile *p, GLuint end_of_thread, GLuint send_commit_msg); +enum brw_urb_write_flags { + /** + * Causes a new URB entry to be allocated, and its address stored in the + * destination register (gen < 7). + */ + BRW_URB_WRITE_ALLOCATE = 0x1, + + /** + * Causes the current URB entry to be deallocated (gen < 7). + */ + BRW_URB_WRITE_UNUSED = 0x2, + + /** + * Causes the thread to terminate. + */ + BRW_URB_WRITE_EOT = 0x4, + + /** + * Indicates that the given URB entry is complete, and may be sent further + * down the 3D pipeline (gen < 7). + */ + BRW_URB_WRITE_COMPLETE = 0x8, + + /** + * Convenient combination of flags: end the thread while simultaneously + * marking the given URB entry as complete. + */ + BRW_URB_WRITE_EOT_COMPLETE = BRW_URB_WRITE_EOT | BRW_URB_WRITE_COMPLETE, + + /** + * Convenient combination of flags: mark the given URB entry as complete + * and simultaneously allocate a new one. + */ + BRW_URB_WRITE_ALLOCATE_COMPLETE = + BRW_URB_WRITE_ALLOCATE | BRW_URB_WRITE_COMPLETE, +}; + void brw_urb_WRITE(struct brw_compile *p, struct brw_reg dest, GLuint msg_reg_nr, struct brw_reg src0, - bool allocate, - bool used, + unsigned flags, GLuint msg_length, GLuint response_length, - bool eot, - bool writes_complete, GLuint offset, GLuint swizzle); diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 204cea2437e..622b22f7981 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -515,19 +515,17 @@ static void brw_set_ff_sync_message(struct brw_compile *p, static void brw_set_urb_message( struct brw_compile *p, struct brw_instruction *insn, - bool allocate, - bool used, + unsigned flags, GLuint msg_length, GLuint response_length, - bool end_of_thread, - bool complete, GLuint offset, GLuint swizzle_control ) { struct brw_context *brw = p->brw; brw_set_message_descriptor(p, insn, BRW_SFID_URB, - msg_length, response_length, true, end_of_thread); + msg_length, response_length, true, + flags & BRW_URB_WRITE_EOT); if (brw->gen == 7) { insn->bits3.urb_gen7.opcode = 0; /* URB_WRITE_HWORD */ insn->bits3.urb_gen7.offset = offset; @@ -535,21 +533,21 @@ static void brw_set_urb_message( struct brw_compile *p, insn->bits3.urb_gen7.swizzle_control = swizzle_control; /* per_slot_offset = 0 makes it ignore offsets in message header */ insn->bits3.urb_gen7.per_slot_offset = 0; - insn->bits3.urb_gen7.complete = complete; + insn->bits3.urb_gen7.complete = flags & BRW_URB_WRITE_COMPLETE ? 1 : 0; } else if (brw->gen >= 5) { insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */ insn->bits3.urb_gen5.offset = offset; insn->bits3.urb_gen5.swizzle_control = swizzle_control; - insn->bits3.urb_gen5.allocate = allocate; - insn->bits3.urb_gen5.used = used; /* ? */ - insn->bits3.urb_gen5.complete = complete; + insn->bits3.urb_gen5.allocate = flags & BRW_URB_WRITE_ALLOCATE ? 1 : 0; + insn->bits3.urb_gen5.used = flags & BRW_URB_WRITE_UNUSED ? 0 : 1; + insn->bits3.urb_gen5.complete = flags & BRW_URB_WRITE_COMPLETE ? 1 : 0; } else { insn->bits3.urb.opcode = 0; /* ? */ insn->bits3.urb.offset = offset; insn->bits3.urb.swizzle_control = swizzle_control; - insn->bits3.urb.allocate = allocate; - insn->bits3.urb.used = used; /* ? */ - insn->bits3.urb.complete = complete; + insn->bits3.urb.allocate = flags & BRW_URB_WRITE_ALLOCATE ? 1 : 0; + insn->bits3.urb.used = flags & BRW_URB_WRITE_UNUSED ? 0 : 1; + insn->bits3.urb.complete = flags & BRW_URB_WRITE_COMPLETE ? 1 : 0; } } @@ -2215,12 +2213,9 @@ void brw_urb_WRITE(struct brw_compile *p, struct brw_reg dest, GLuint msg_reg_nr, struct brw_reg src0, - bool allocate, - bool used, + unsigned flags, GLuint msg_length, GLuint response_length, - bool eot, - bool writes_complete, GLuint offset, GLuint swizzle) { @@ -2254,12 +2249,9 @@ void brw_urb_WRITE(struct brw_compile *p, brw_set_urb_message(p, insn, - allocate, - used, + flags, msg_length, response_length, - eot, - writes_complete, offset, swizzle); } diff --git a/src/mesa/drivers/dri/i965/brw_gs_emit.c b/src/mesa/drivers/dri/i965/brw_gs_emit.c index 6034a9df0b1..fff3585aec4 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_gs_emit.c @@ -185,12 +185,10 @@ static void brw_gs_emit_vue(struct brw_gs_compile *c, : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), 0, c->reg.header, - allocate, - 1, /* used */ + allocate ? BRW_URB_WRITE_ALLOCATE_COMPLETE + : BRW_URB_WRITE_EOT_COMPLETE, c->nr_regs + 1, /* msg length */ allocate ? 1 : 0, /* response length */ - allocate ? 0 : 1, /* eot */ - 1, /* writes_complete */ 0, /* urb offset */ BRW_URB_SWIZZLE_NONE); diff --git a/src/mesa/drivers/dri/i965/brw_sf_emit.c b/src/mesa/drivers/dri/i965/brw_sf_emit.c index 0131de57c2d..d329bef1aef 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_emit.c +++ b/src/mesa/drivers/dri/i965/brw_sf_emit.c @@ -491,12 +491,9 @@ void brw_emit_tri_setup(struct brw_sf_compile *c, bool allocate) brw_null_reg(), 0, brw_vec8_grf(0, 0), /* r0, will be copied to m0 */ - 0, /* allocate */ - 1, /* used */ + last ? BRW_URB_WRITE_EOT_COMPLETE : 0, 4, /* msg len */ 0, /* response len */ - last, /* eot */ - last, /* writes complete */ i*4, /* offset */ BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */ } @@ -565,12 +562,9 @@ void brw_emit_line_setup(struct brw_sf_compile *c, bool allocate) brw_null_reg(), 0, brw_vec8_grf(0, 0), - 0, /* allocate */ - 1, /* used */ + last ? BRW_URB_WRITE_EOT_COMPLETE : 0, 4, /* msg len */ 0, /* response len */ - last, /* eot */ - last, /* writes complete */ i*4, /* urb destination offset */ BRW_URB_SWIZZLE_TRANSPOSE); } @@ -655,12 +649,9 @@ void brw_emit_point_sprite_setup(struct brw_sf_compile *c, bool allocate) brw_null_reg(), 0, brw_vec8_grf(0, 0), - 0, /* allocate */ - 1, /* used */ + last ? BRW_URB_WRITE_EOT_COMPLETE : 0, 4, /* msg len */ 0, /* response len */ - last, /* eot */ - last, /* writes complete */ i*4, /* urb destination offset */ BRW_URB_SWIZZLE_TRANSPOSE); } @@ -715,12 +706,9 @@ void brw_emit_point_setup(struct brw_sf_compile *c, bool allocate) brw_null_reg(), 0, brw_vec8_grf(0, 0), - 0, /* allocate */ - 1, /* used */ + last ? BRW_URB_WRITE_EOT_COMPLETE : 0, 4, /* msg len */ 0, /* response len */ - last, /* eot */ - last, /* writes complete */ i*4, /* urb destination offset */ BRW_URB_SWIZZLE_TRANSPOSE); } diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 171f14dbfa7..a398f71a500 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -222,7 +222,7 @@ public: int target; /**< MRT target. */ bool shadow_compare; - bool eot; + unsigned urb_write_flags; bool header_present; int mlen; /**< SEND message length */ int base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index 53b4bf2b307..89831de43f5 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -405,12 +405,9 @@ vec4_generator::generate_urb_write(vec4_instruction *inst) brw_null_reg(), /* dest */ inst->base_mrf, /* starting mrf reg nr */ brw_vec8_grf(0, 0), /* src */ - false, /* allocate */ - true, /* used */ + inst->urb_write_flags, inst->mlen, 0, /* response len */ - inst->eot, /* eot */ - inst->eot, /* writes complete */ inst->offset, /* urb destination offset */ BRW_URB_SWIZZLE_INTERLEAVE); } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index c3070495da1..72841e7e7fa 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -2788,7 +2788,7 @@ vec4_vs_visitor::emit_urb_write_opcode(bool complete) } vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE); - inst->eot = complete; + inst->urb_write_flags = complete ? BRW_URB_WRITE_EOT_COMPLETE : 0; return inst; } -- 2.30.2