i965: Separate gl_InstanceID and gl_VertexID uploading.
authorKenneth Graunke <kenneth@whitecape.org>
Wed, 10 Sep 2014 22:41:40 +0000 (15:41 -0700)
committerIan Romanick <ian.d.romanick@intel.com>
Fri, 12 Sep 2014 23:35:35 +0000 (16:35 -0700)
We always uploaded them together, mostly out of laziness - both required
an additional vertex element.  However, gl_VertexID now also requires an
additional vertex buffer for storing gl_BaseVertex; for non-indirect
draws this also means uploading (a small amount of) data.  This is extra
overhead we don't need if the shader only uses gl_InstanceID.

In particular, our clear shaders currently use gl_InstanceID for doing
layered clears, but don't need gl_VertexID.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Cc: "10.3" <mesa-stable@lists.freedesktop.org>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Tested-by: Ian Romanick <ian.d.romanick@intel.com>
src/mesa/drivers/dri/i965/brw_context.h
src/mesa/drivers/dri/i965/brw_draw_upload.c
src/mesa/drivers/dri/i965/brw_vec4.cpp
src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
src/mesa/drivers/dri/i965/gen8_draw_upload.c

index 39cb8566b635d23131a2c7f23321316cc9106f90..5830aa993d57e8cdaf1beff89258378cd42682ea 100644 (file)
@@ -553,6 +553,7 @@ struct brw_vs_prog_data {
    GLbitfield64 inputs_read;
 
    bool uses_vertexid;
+   bool uses_instanceid;
 };
 
 
index d59ca8bf3e8bb555d84d4d2958399391f2b0de16..2162624cede8d66dc02722f02a102cb1f7138a55 100644 (file)
@@ -671,14 +671,16 @@ emit_vertex_buffer_state(struct brw_context *brw,
 
 static void brw_emit_vertices(struct brw_context *brw)
 {
-   GLuint i, nr_elements;
+   GLuint i;
 
    brw_prepare_vertices(brw);
    brw_prepare_shader_draw_parameters(brw);
 
    brw_emit_query_begin(brw);
 
-   nr_elements = brw->vb.nr_enabled + brw->vs.prog_data->uses_vertexid;
+   unsigned nr_elements = brw->vb.nr_enabled;
+   if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid)
+      ++nr_elements;
 
    /* If the VS doesn't read any inputs (calculating vertex position from
     * a state variable for some reason, for example), emit a single pad
@@ -824,13 +826,26 @@ static void brw_emit_vertices(struct brw_context *brw)
                 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
    }
 
-   if (brw->vs.prog_data->uses_vertexid) {
+   if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid) {
       uint32_t dw0 = 0, dw1 = 0;
+      uint32_t comp0 = BRW_VE1_COMPONENT_STORE_0;
+      uint32_t comp1 = BRW_VE1_COMPONENT_STORE_0;
+      uint32_t comp2 = BRW_VE1_COMPONENT_STORE_0;
+      uint32_t comp3 = BRW_VE1_COMPONENT_STORE_0;
 
-      dw1 = (BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
-            (BRW_VE1_COMPONENT_STORE_0   << BRW_VE1_COMPONENT_1_SHIFT) |
-            (BRW_VE1_COMPONENT_STORE_VID << BRW_VE1_COMPONENT_2_SHIFT) |
-            (BRW_VE1_COMPONENT_STORE_IID << BRW_VE1_COMPONENT_3_SHIFT);
+      if (brw->vs.prog_data->uses_vertexid) {
+         comp0 = BRW_VE1_COMPONENT_STORE_SRC;
+         comp2 = BRW_VE1_COMPONENT_STORE_VID;
+      }
+
+      if (brw->vs.prog_data->uses_instanceid) {
+         comp3 = BRW_VE1_COMPONENT_STORE_IID;
+      }
+
+      dw1 = (comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
+            (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
+            (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
+            (comp3 << BRW_VE1_COMPONENT_3_SHIFT);
 
       if (brw->gen >= 6) {
          dw0 |= GEN6_VE0_VALID |
index cda097f10c39da0a03ac9708dc8c298cbb05f160..0f13c0d19f0c45082f873873106ae6cefd3a0c83 100644 (file)
@@ -1528,7 +1528,7 @@ vec4_vs_visitor::setup_attributes(int payload_reg)
     * don't represent it with a flag in inputs_read, so we call it
     * VERT_ATTRIB_MAX.
     */
-   if (vs_prog_data->uses_vertexid) {
+   if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid) {
       attribute_map[VERT_ATTRIB_MAX] = payload_reg + nr_attributes;
       nr_attributes++;
    }
index 667ed681b51eb44fbfa6b159f096265f33d2bc4e..72b6ef03b425efb8f978e2ad4d9ad72c31e93a57 100644 (file)
@@ -151,18 +151,20 @@ vec4_vs_visitor::make_reg_for_system_value(ir_variable *ir)
     * it VERT_ATTRIB_MAX, which setup_attributes() picks up on.
     */
    dst_reg *reg = new(mem_ctx) dst_reg(ATTR, VERT_ATTRIB_MAX);
-   vs_prog_data->uses_vertexid = true;
 
    switch (ir->data.location) {
    case SYSTEM_VALUE_BASE_VERTEX:
       reg->writemask = WRITEMASK_X;
+      vs_prog_data->uses_vertexid = true;
       break;
    case SYSTEM_VALUE_VERTEX_ID:
    case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
       reg->writemask = WRITEMASK_Z;
+      vs_prog_data->uses_vertexid = true;
       break;
    case SYSTEM_VALUE_INSTANCE_ID:
       reg->writemask = WRITEMASK_W;
+      vs_prog_data->uses_instanceid = true;
       break;
    default:
       unreachable("not reached");
index 7e4c1eb3b632b6044d9b16f853d86e7ae5262baa..8f0e51555564ed7e808aa29d68dce152d9657e87 100644 (file)
@@ -43,7 +43,7 @@ gen8_emit_vertices(struct brw_context *brw)
    brw_prepare_vertices(brw);
    brw_prepare_shader_draw_parameters(brw);
 
-   if (brw->vs.prog_data->uses_vertexid) {
+   if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid) {
       unsigned vue = brw->vb.nr_enabled;
 
       WARN_ONCE(brw->vs.prog_data->inputs_read & VERT_BIT_EDGEFLAG,
@@ -53,14 +53,22 @@ gen8_emit_vertices(struct brw_context *brw)
                 "Trying to insert VID/IID past 33rd vertex element, "
                 "need to reorder the vertex attrbutes.");
 
+      unsigned dw1 = 0;
+      if (brw->vs.prog_data->uses_vertexid) {
+         dw1 |= GEN8_SGVS_ENABLE_VERTEX_ID |
+                (2 << GEN8_SGVS_VERTEX_ID_COMPONENT_SHIFT) |  /* .z channel */
+                (vue << GEN8_SGVS_VERTEX_ID_ELEMENT_OFFSET_SHIFT);
+      }
+
+      if (brw->vs.prog_data->uses_instanceid) {
+         dw1 |= GEN8_SGVS_ENABLE_INSTANCE_ID |
+                (3 << GEN8_SGVS_INSTANCE_ID_COMPONENT_SHIFT) | /* .w channel */
+                (vue << GEN8_SGVS_INSTANCE_ID_ELEMENT_OFFSET_SHIFT);
+      }
+
       BEGIN_BATCH(2);
       OUT_BATCH(_3DSTATE_VF_SGVS << 16 | (2 - 2));
-      OUT_BATCH(GEN8_SGVS_ENABLE_VERTEX_ID |
-                (2 << GEN8_SGVS_VERTEX_ID_COMPONENT_SHIFT) |   /* .z channel */
-                (vue << GEN8_SGVS_VERTEX_ID_ELEMENT_OFFSET_SHIFT) |
-                GEN8_SGVS_ENABLE_INSTANCE_ID |
-                (3 << GEN8_SGVS_INSTANCE_ID_COMPONENT_SHIFT) | /* .w channel */
-                (vue << GEN8_SGVS_INSTANCE_ID_ELEMENT_OFFSET_SHIFT));
+      OUT_BATCH(dw1);
       ADVANCE_BATCH();
 
       BEGIN_BATCH(3);