i965/gs: Use new NIR intrinsics.
authorKenneth Graunke <kenneth@whitecape.org>
Wed, 5 Aug 2015 16:16:59 +0000 (09:16 -0700)
committerKenneth Graunke <kenneth@whitecape.org>
Wed, 23 Sep 2015 18:00:00 +0000 (11:00 -0700)
By performing the vertex counting in NIR, we're able to elide a ton of
useless safety checks around every EmitVertex() call:

total instructions in shared programs: 3952 -> 3720 (-5.87%)
instructions in affected programs:     3491 -> 3259 (-6.65%)
helped:                                11
HURT:                                  0

Improves performance in Gl32GSCloth by 0.671742% +/- 0.142202% (n=621)
on Haswell GT3e at 1024x768.

This should also make it easier to implement Broadwell's "Static Vertex
Count" feature someday.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
src/mesa/drivers/dri/i965/brw_nir.c
src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp
src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp

index b47b87e07dd58d7c0d485610b09ba8000cc87599..1d4f6ab2ccd5e257a4c11889f1178d92db1c96c9 100644 (file)
@@ -96,6 +96,11 @@ brw_create_nir(struct brw_context *brw,
    }
    nir_validate_shader(nir);
 
+   if (stage == MESA_SHADER_GEOMETRY) {
+      nir_lower_gs_intrinsics(nir);
+      nir_validate_shader(nir);
+   }
+
    nir_lower_global_vars_to_local(nir);
    nir_validate_shader(nir);
 
index 8a8dd571e74921f6c14dc6b9d6eec57954594f8c..4f4e1e12fabf0e8897fd0d579ddee5cc7b2f3408 100644 (file)
@@ -92,16 +92,25 @@ vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
    src_reg src;
 
    switch (instr->intrinsic) {
-   case nir_intrinsic_emit_vertex: {
+   case nir_intrinsic_emit_vertex_with_counter: {
+      this->vertex_count =
+         retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD);
       int stream_id = instr->const_index[0];
       gs_emit_vertex(stream_id);
       break;
    }
 
-   case nir_intrinsic_end_primitive:
+   case nir_intrinsic_end_primitive_with_counter:
+      this->vertex_count =
+         retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD);
       gs_end_primitive();
       break;
 
+   case nir_intrinsic_set_vertex_count:
+      this->vertex_count =
+         retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD);
+      break;
+
    case nir_intrinsic_load_invocation_id: {
       src_reg invocation_id =
          src_reg(nir_system_values[SYSTEM_VALUE_INVOCATION_ID]);
index b9694f67787225ab26d5951b0a04a4dd49d3b56f..7a5b945650c9eb1c7ad724a42ef9d9ad45d3afa3 100644 (file)
@@ -484,14 +484,6 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id)
    if (stream_id > 0 && shader_prog->TransformFeedback.NumVarying == 0)
       return;
 
-   /* To ensure that we don't output more vertices than the shader specified
-    * using max_vertices, do the logic inside a conditional of the form "if
-    * (vertex_count < MAX)"
-    */
-   unsigned num_output_vertices = c->gp->program.VerticesOut;
-   emit(CMP(dst_null_d(), this->vertex_count,
-            src_reg(num_output_vertices), BRW_CONDITIONAL_L));
-   emit(IF(BRW_PREDICATE_NORMAL));
    {
       /* If we're outputting 32 control data bits or less, then we can wait
        * until the shader is over to output them all.  Otherwise we need to
@@ -562,12 +554,7 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id)
           this->current_annotation = "emit vertex: Stream control data bits";
           set_stream_control_data_bits(stream_id);
       }
-
-      this->current_annotation = "emit vertex: increment vertex count";
-      emit(ADD(dst_reg(this->vertex_count), this->vertex_count,
-               src_reg(1u)));
    }
-   emit(BRW_OPCODE_ENDIF);
 
    this->current_annotation = NULL;
 }
@@ -575,7 +562,22 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id)
 void
 vec4_gs_visitor::visit(ir_emit_vertex *ir)
 {
+   /* To ensure that we don't output more vertices than the shader specified
+    * using max_vertices, do the logic inside a conditional of the form "if
+    * (vertex_count < MAX)"
+    */
+   unsigned num_output_vertices = c->gp->program.VerticesOut;
+   emit(CMP(dst_null_d(), this->vertex_count,
+            src_reg(num_output_vertices), BRW_CONDITIONAL_L));
+   emit(IF(BRW_PREDICATE_NORMAL));
+
    gs_emit_vertex(ir->stream_id());
+
+   this->current_annotation = "emit vertex: increment vertex count";
+   emit(ADD(dst_reg(this->vertex_count), this->vertex_count,
+            src_reg(1u)));
+
+   emit(BRW_OPCODE_ENDIF);
 }
 
 void
index 68e443d38a5399ceb55b67805e9eb31efaf4110d..5cfff7b62ba8c61ad6b47a09c1eddc7f35595aa9 100644 (file)
@@ -149,19 +149,29 @@ gen6_gs_visitor::emit_prolog()
 void
 gen6_gs_visitor::visit(ir_emit_vertex *ir)
 {
+   /* To ensure that we don't output more vertices than the shader specified
+    * using max_vertices, do the logic inside a conditional of the form "if
+    * (vertex_count < MAX)"
+    */
+   unsigned num_output_vertices = c->gp->program.VerticesOut;
+   emit(CMP(dst_null_d(), this->vertex_count,
+            src_reg(num_output_vertices), BRW_CONDITIONAL_L));
+   emit(IF(BRW_PREDICATE_NORMAL));
+
    gs_emit_vertex(ir->stream_id());
+
+   this->current_annotation = "emit vertex: increment vertex count";
+   emit(ADD(dst_reg(this->vertex_count), this->vertex_count,
+            src_reg(1u)));
+
+   emit(BRW_OPCODE_ENDIF);
 }
+
 void
 gen6_gs_visitor::gs_emit_vertex(int stream_id)
 {
    this->current_annotation = "gen6 emit vertex";
-   /* Honor max_vertex layout indication in geometry shader by ignoring any
-    * vertices coming after c->gp->program.VerticesOut.
-    */
-   unsigned num_output_vertices = c->gp->program.VerticesOut;
-   emit(CMP(dst_null_d(), this->vertex_count, src_reg(num_output_vertices),
-            BRW_CONDITIONAL_L));
-   emit(IF(BRW_PREDICATE_NORMAL));
+
    {
       /* Buffer all output slots for this vertex in vertex_output */
       for (int slot = 0; slot < prog_data->vue_map.num_slots; ++slot) {
@@ -219,11 +229,7 @@ gen6_gs_visitor::gs_emit_vertex(int stream_id)
       }
       emit(ADD(dst_reg(this->vertex_output_offset),
                this->vertex_output_offset, 1u));
-
-      /* Update vertex count */
-      emit(ADD(dst_reg(this->vertex_count), this->vertex_count, 1u));
    }
-   emit(BRW_OPCODE_ENDIF);
 }
 
 void