From 0edb084f9d0444c451a08fd2ed7daee2eb8a6f4a Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Wed, 1 Jul 2015 17:01:24 -0700 Subject: [PATCH] i965/gs: Move vertex_count != 0 check up a level; skip one caller. Paul's original code had emit_control_data_bits() skip the URB write if vertex_count was 0. This meant wrapping every control data write in a conditional write. We accumulate control data bits in a single UD (32-bit) register. For simple shaders that don't emit many vertices, the control data header will be <= 32-bits long, so we only need to write it once at the end of the shader. For shaders with larger headers, we write out batches of control data bits at EmitVertex(), when (vertex_count * bits_per_vertex) % 32 == 0. On the first EmitVertex() call, the above expression will evaluate to true simply because vertex_count == 0. But we want to avoid emitting the control data bits, because we haven't accumulated 32-bits worth yet. In other words, the vertex_count != 0 check is really only necessary in the EmitVertex() batching case, not the end-of-thread case. This saves a CMP/IF/ENDIF in every shader that uses EndPrimitive() or multiple streams. The only downside is that a shader which emits no vertices at all will execute an additional URB write---but such shaders are pointless and not worth optimizing. Signed-off-by: Kenneth Graunke Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index 2f948ee73c0..55408eb0b0c 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -348,11 +348,6 @@ vec4_gs_visitor::emit_control_data_bits() if (c->control_data_header_size_bits > 128) urb_write_flags = urb_write_flags | BRW_URB_WRITE_PER_SLOT_OFFSET; - /* If vertex_count is 0, then no control data bits have been accumulated - * yet, so we should do nothing. - */ - emit(CMP(dst_null_d(), this->vertex_count, 0u, BRW_CONDITIONAL_NEQ)); - emit(IF(BRW_PREDICATE_NORMAL)); { /* If we are using either channel masks or a per-slot offset, then we * need to figure out which DWORD we are trying to write to, using the @@ -431,7 +426,6 @@ vec4_gs_visitor::emit_control_data_bits() inst->base_mrf = base_mrf; inst->mlen = 2; } - emit(BRW_OPCODE_ENDIF); } void @@ -531,9 +525,17 @@ vec4_gs_visitor::visit(ir_emit_vertex *ir) emit(AND(dst_null_d(), this->vertex_count, (uint32_t) (32 / c->control_data_bits_per_vertex - 1))); inst->conditional_mod = BRW_CONDITIONAL_Z; + emit(IF(BRW_PREDICATE_NORMAL)); { + /* If vertex_count is 0, then no control data bits have been + * accumulated yet, so we skip emitting them. + */ + emit(CMP(dst_null_d(), this->vertex_count, 0u, + BRW_CONDITIONAL_NEQ)); + emit(IF(BRW_PREDICATE_NORMAL)); emit_control_data_bits(); + emit(BRW_OPCODE_ENDIF); /* Reset control_data_bits to 0 so we can start accumulating a new * batch. -- 2.30.2