i965: Expose gl_BaseVertex via a vertex attribute.
authorKenneth Graunke <kenneth@whitecape.org>
Fri, 8 Aug 2014 03:59:56 +0000 (20:59 -0700)
committerKenneth Graunke <kenneth@whitecape.org>
Wed, 10 Sep 2014 18:05:08 +0000 (11:05 -0700)
Now that we have the data available, we need to expose it to the
shaders.  We can reuse the same vertex element that we use for
gl_VertexID, but we need to back it by an actual vertex buffer.

A hardware restriction requires that vertex attributes coming from a
buffer (STORE_SRC) must come before any other types (i.e. STORE_0).
So, we have to make gl_BaseVertex be the .x component of the vertex
attribute.  This means moving gl_VertexID to a different component.

I chose to move gl_VertexID and gl_InstanceID to the .z and .w
components, respectively, to make room for gl_BaseInstance in the .y
component (which would also come from a buffer, and therefore be
STORE_SRC).

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
src/mesa/drivers/dri/i965/brw_draw_upload.c
src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
src/mesa/drivers/dri/i965/gen8_draw_upload.c

index 7c01d79c5e730098ce00e2a8593c4373d6a83013..d59ca8bf3e8bb555d84d4d2958399391f2b0de16 100644 (file)
@@ -712,15 +712,18 @@ static void brw_emit_vertices(struct brw_context *brw)
    /* Now emit VB and VEP state packets.
     */
 
-   if (brw->vb.nr_buffers) {
+   unsigned nr_buffers =
+      brw->vb.nr_buffers + brw->vs.prog_data->uses_vertexid;
+
+   if (nr_buffers) {
       if (brw->gen >= 6) {
-        assert(brw->vb.nr_buffers <= 33);
+        assert(nr_buffers <= 33);
       } else {
-        assert(brw->vb.nr_buffers <= 17);
+        assert(nr_buffers <= 17);
       }
 
-      BEGIN_BATCH(1 + 4*brw->vb.nr_buffers);
-      OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4*brw->vb.nr_buffers - 1));
+      BEGIN_BATCH(1 + 4 * nr_buffers);
+      OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4 * nr_buffers - 1));
       for (i = 0; i < brw->vb.nr_buffers; i++) {
         struct brw_vertex_buffer *buffer = &brw->vb.buffers[i];
          emit_vertex_buffer_state(brw, i, buffer->bo, buffer->bo->size - 1,
@@ -728,6 +731,15 @@ static void brw_emit_vertices(struct brw_context *brw)
                                   buffer->step_rate);
 
       }
+
+      if (brw->vs.prog_data->uses_vertexid) {
+         emit_vertex_buffer_state(brw, brw->vb.nr_buffers,
+                                  brw->draw.draw_params_bo,
+                                  brw->draw.draw_params_bo->size - 1,
+                                  brw->draw.draw_params_offset,
+                                  0,  /* stride */
+                                  0); /* step rate */
+      }
       ADVANCE_BATCH();
    }
 
@@ -815,15 +827,19 @@ static void brw_emit_vertices(struct brw_context *brw)
    if (brw->vs.prog_data->uses_vertexid) {
       uint32_t dw0 = 0, dw1 = 0;
 
-      dw1 = ((BRW_VE1_COMPONENT_STORE_VID << BRW_VE1_COMPONENT_0_SHIFT) |
-            (BRW_VE1_COMPONENT_STORE_IID << BRW_VE1_COMPONENT_1_SHIFT) |
-            (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
-            (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
+      dw1 = (BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
+            (BRW_VE1_COMPONENT_STORE_0   << BRW_VE1_COMPONENT_1_SHIFT) |
+            (BRW_VE1_COMPONENT_STORE_VID << BRW_VE1_COMPONENT_2_SHIFT) |
+            (BRW_VE1_COMPONENT_STORE_IID << BRW_VE1_COMPONENT_3_SHIFT);
 
       if (brw->gen >= 6) {
-        dw0 |= GEN6_VE0_VALID;
+         dw0 |= GEN6_VE0_VALID |
+                brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT |
+                BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT;
       } else {
-        dw0 |= BRW_VE0_VALID;
+         dw0 |= BRW_VE0_VALID |
+                brw->vb.nr_buffers << BRW_VE0_INDEX_SHIFT |
+                BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT;
         dw1 |= (i * 4) << BRW_VE1_DST_OFFSET_SHIFT;
       }
 
index 6a1c049e1f91fe2cacf3ba3ed835589b6b62498a..667ed681b51eb44fbfa6b159f096265f33d2bc4e 100644 (file)
@@ -154,12 +154,15 @@ vec4_vs_visitor::make_reg_for_system_value(ir_variable *ir)
    vs_prog_data->uses_vertexid = true;
 
    switch (ir->data.location) {
+   case SYSTEM_VALUE_BASE_VERTEX:
+      reg->writemask = WRITEMASK_X;
+      break;
    case SYSTEM_VALUE_VERTEX_ID:
    case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
-      reg->writemask = WRITEMASK_X;
+      reg->writemask = WRITEMASK_Z;
       break;
    case SYSTEM_VALUE_INSTANCE_ID:
-      reg->writemask = WRITEMASK_Y;
+      reg->writemask = WRITEMASK_W;
       break;
    default:
       unreachable("not reached");
index 8e4fe5d3bb07187d49f8f81f91d66a5f794c6d78..7e4c1eb3b632b6044d9b16f853d86e7ae5262baa 100644 (file)
@@ -56,12 +56,18 @@ gen8_emit_vertices(struct brw_context *brw)
       BEGIN_BATCH(2);
       OUT_BATCH(_3DSTATE_VF_SGVS << 16 | (2 - 2));
       OUT_BATCH(GEN8_SGVS_ENABLE_VERTEX_ID |
-                (0 << GEN8_SGVS_VERTEX_ID_COMPONENT_SHIFT) |   /* .x channel */
+                (2 << GEN8_SGVS_VERTEX_ID_COMPONENT_SHIFT) |   /* .z channel */
                 (vue << GEN8_SGVS_VERTEX_ID_ELEMENT_OFFSET_SHIFT) |
                 GEN8_SGVS_ENABLE_INSTANCE_ID |
-                (1 << GEN8_SGVS_INSTANCE_ID_COMPONENT_SHIFT) | /* .y channel */
+                (3 << GEN8_SGVS_INSTANCE_ID_COMPONENT_SHIFT) | /* .w channel */
                 (vue << GEN8_SGVS_INSTANCE_ID_ELEMENT_OFFSET_SHIFT));
       ADVANCE_BATCH();
+
+      BEGIN_BATCH(3);
+      OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2));
+      OUT_BATCH(brw->vb.nr_buffers | GEN8_VF_INSTANCING_ENABLE);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
    } else {
       BEGIN_BATCH(2);
       OUT_BATCH(_3DSTATE_VF_SGVS << 16 | (2 - 2));
@@ -92,11 +98,12 @@ gen8_emit_vertices(struct brw_context *brw)
    }
 
    /* Now emit 3DSTATE_VERTEX_BUFFERS and 3DSTATE_VERTEX_ELEMENTS packets. */
-   if (brw->vb.nr_buffers) {
-      assert(brw->vb.nr_buffers <= 33);
+   unsigned nr_buffers = brw->vb.nr_buffers + brw->vs.prog_data->uses_vertexid;
+   if (nr_buffers) {
+      assert(nr_buffers <= 33);
 
-      BEGIN_BATCH(1 + 4*brw->vb.nr_buffers);
-      OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4*brw->vb.nr_buffers - 1));
+      BEGIN_BATCH(1 + 4 * nr_buffers);
+      OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4 * nr_buffers - 1));
       for (unsigned i = 0; i < brw->vb.nr_buffers; i++) {
          struct brw_vertex_buffer *buffer = &brw->vb.buffers[i];
          uint32_t dw0 = 0;
@@ -110,10 +117,19 @@ gen8_emit_vertices(struct brw_context *brw)
          OUT_RELOC64(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->offset);
          OUT_BATCH(buffer->bo->size);
       }
+
+      if (brw->vs.prog_data->uses_vertexid) {
+         OUT_BATCH(brw->vb.nr_buffers << GEN6_VB0_INDEX_SHIFT |
+                   GEN7_VB0_ADDRESS_MODIFYENABLE |
+                   BDW_MOCS_WB << 16);
+         OUT_RELOC64(brw->draw.draw_params_bo, I915_GEM_DOMAIN_VERTEX, 0,
+                     brw->draw.draw_params_offset);
+         OUT_BATCH(brw->draw.draw_params_bo->size);
+      }
       ADVANCE_BATCH();
    }
 
-   unsigned nr_elements = brw->vb.nr_enabled;
+   unsigned nr_elements = brw->vb.nr_enabled + brw->vs.prog_data->uses_vertexid;
 
    /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS,
     * presumably for VertexID/InstanceID.
@@ -181,6 +197,16 @@ gen8_emit_vertices(struct brw_context *brw)
                 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
                 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
    }
+
+   if (brw->vs.prog_data->uses_vertexid) {
+      OUT_BATCH(GEN6_VE0_VALID |
+                brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT |
+                BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT);
+      OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
+                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
+                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
+                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
+   }
    ADVANCE_BATCH();
 
    for (unsigned i = 0; i < brw->vb.nr_enabled; i++) {