i965: Add support for gl_DrawIDARB and enable extension
authorKristian Høgsberg Kristensen <krh@bitplanet.net>
Thu, 10 Dec 2015 20:27:38 +0000 (12:27 -0800)
committerKristian Høgsberg Kristensen <krh@bitplanet.net>
Tue, 29 Dec 2015 18:39:25 +0000 (10:39 -0800)
We have to break open a new vec4 for gl_DrawIDARB. We've used up all
space in the vec4 we use for SGVS and gl_DrawIDARB has to come from its
own separate vertex buffer anyway.  This is because we point the vb for
base vertex and base instance into the draw parameter BO for indirect
draw calls, but the draw id is generated by mesa in a different buffer.

Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
12 files changed:
src/mesa/drivers/dri/i965/brw_compiler.h
src/mesa/drivers/dri/i965/brw_context.h
src/mesa/drivers/dri/i965/brw_draw.c
src/mesa/drivers/dri/i965/brw_draw_upload.c
src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/brw_fs_nir.cpp
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
src/mesa/drivers/dri/i965/brw_vec4.cpp
src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
src/mesa/drivers/dri/i965/gen8_draw_upload.c
src/mesa/drivers/dri/i965/intel_extensions.c

index 9b3bb9fe30b988cad54b5680f86ae874262d08de..224ddb14ed1b0580abfd7508f54a9b3ad4527463 100644 (file)
@@ -597,6 +597,7 @@ struct brw_vs_prog_data {
    bool uses_instanceid;
    bool uses_basevertex;
    bool uses_baseinstance;
+   bool uses_drawid;
 };
 
 struct brw_tcs_prog_data
index 4cbe585cd56933411b7e10ba3429c3e29dbaa8cf..7b0340fc2abcd81224b4d2d3cc16a4f3636dd584 100644 (file)
@@ -923,6 +923,15 @@ struct brw_context
        */
       drm_intel_bo *draw_params_bo;
       uint32_t draw_params_offset;
+
+      /**
+       * The value of gl_DrawID for the current _mesa_prim. This always comes
+       * in from it's own vertex buffer since it's not part of the indirect
+       * draw parameters.
+       */
+      int gl_drawid;
+      drm_intel_bo *draw_id_bo;
+      uint32_t draw_id_offset;
    } draw;
 
    struct {
index e0665d3c85262cf51ec6cd4526786fb563f83aa6..b0a162aa0b68f7ed008e5319ae06daf4d2198913 100644 (file)
@@ -511,6 +511,18 @@ brw_try_draw_prims(struct gl_context *ctx,
          brw->draw.draw_params_offset = 0;
       }
 
+      /* gl_DrawID always needs its own vertex buffer since it's not part of
+       * the indirect parameter buffer. If the program uses gl_DrawID we need
+       * to flag BRW_NEW_VERTICES. For the first iteration, we don't have
+       * valid brw->vs.prog_data, but we always flag BRW_NEW_VERTICES before
+       * the loop.
+       */
+      brw->draw.gl_drawid = prims[i].draw_id;
+      drm_intel_bo_unreference(brw->draw.draw_id_bo);
+      brw->draw.draw_id_bo = NULL;
+      if (i > 0 && brw->vs.prog_data->uses_drawid)
+         brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
+
       if (brw->gen < 6)
         brw_set_prim(brw, &prims[i]);
       else
index ccf963cbd78c64febc0ba41bad424141348965ab..f781d8ba72db8acd9bcaa808ab574039233c7ddc 100644 (file)
@@ -599,6 +599,12 @@ brw_prepare_shader_draw_parameters(struct brw_context *brw)
                        &brw->draw.draw_params_bo,
                         &brw->draw.draw_params_offset);
    }
+
+   if (brw->vs.prog_data->uses_drawid) {
+      intel_upload_data(brw, &brw->draw.gl_drawid, sizeof(brw->draw.gl_drawid), 4,
+                        &brw->draw.draw_id_bo,
+                        &brw->draw.draw_id_offset);
+   }
 }
 
 /**
@@ -663,6 +669,8 @@ brw_emit_vertices(struct brw_context *brw)
    if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid ||
        brw->vs.prog_data->uses_basevertex || brw->vs.prog_data->uses_baseinstance)
       ++nr_elements;
+   if (brw->vs.prog_data->uses_drawid)
+      nr_elements++;
 
    /* If the VS doesn't read any inputs (calculating vertex position from
     * a state variable for some reason, for example), emit a single pad
@@ -699,7 +707,8 @@ brw_emit_vertices(struct brw_context *brw)
    const bool uses_draw_params =
       brw->vs.prog_data->uses_basevertex ||
       brw->vs.prog_data->uses_baseinstance;
-   const unsigned nr_buffers = brw->vb.nr_buffers + uses_draw_params;
+   const unsigned nr_buffers = brw->vb.nr_buffers +
+      uses_draw_params + brw->vs.prog_data->uses_drawid;
 
    if (nr_buffers) {
       if (brw->gen >= 6) {
@@ -726,6 +735,16 @@ brw_emit_vertices(struct brw_context *brw)
                                   0,  /* stride */
                                   0); /* step rate */
       }
+
+      if (brw->vs.prog_data->uses_drawid) {
+         EMIT_VERTEX_BUFFER_STATE(brw, brw->vb.nr_buffers + 1,
+                                  brw->draw.draw_id_bo,
+                                  brw->draw.draw_id_bo->size - 1,
+                                  brw->draw.draw_id_offset,
+                                  0,  /* stride */
+                                  0); /* step rate */
+      }
+
       ADVANCE_BATCH();
    }
 
@@ -839,6 +858,30 @@ brw_emit_vertices(struct brw_context *brw)
       OUT_BATCH(dw1);
    }
 
+   if (brw->vs.prog_data->uses_drawid) {
+      uint32_t dw0 = 0, dw1 = 0;
+
+      dw1 = (BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
+            (BRW_VE1_COMPONENT_STORE_0   << BRW_VE1_COMPONENT_1_SHIFT) |
+            (BRW_VE1_COMPONENT_STORE_0   << BRW_VE1_COMPONENT_2_SHIFT) |
+            (BRW_VE1_COMPONENT_STORE_0   << BRW_VE1_COMPONENT_3_SHIFT);
+
+      if (brw->gen >= 6) {
+         dw0 |= GEN6_VE0_VALID |
+                ((brw->vb.nr_buffers + 1) << GEN6_VE0_INDEX_SHIFT) |
+                (BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT);
+      } else {
+         dw0 |= BRW_VE0_VALID |
+                ((brw->vb.nr_buffers + 1) << BRW_VE0_INDEX_SHIFT) |
+                (BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT);
+
+        dw1 |= (i * 4) << BRW_VE1_DST_OFFSET_SHIFT;
+      }
+
+      OUT_BATCH(dw0);
+      OUT_BATCH(dw1);
+   }
+
    if (brw->gen >= 6 && gen6_edgeflag_input) {
       uint32_t format =
          brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray);
index 8235ce76abd6be62547c5c6595206645023120c0..286ee0ed4e7e1e115f84cbeb59a23ec9d30f3834 100644 (file)
@@ -1674,6 +1674,8 @@ fs_visitor::assign_vs_urb_setup()
    if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid ||
        vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance)
       count++;
+   if (vs_prog_data->uses_drawid)
+      count++;
 
    /* Each attribute is 4 regs. */
    this->first_non_payload_grf += 4 * vs_prog_data->nr_attributes;
index 5b901a082c666fdf6f410f71d4f6993951f83947..827dbeeb7b6c4310063f9041a6a3208b49eed5ef 100644 (file)
@@ -229,6 +229,13 @@ emit_system_values_block(nir_block *block, void *void_visitor)
             *reg = *v->emit_vs_system_value(SYSTEM_VALUE_BASE_INSTANCE);
          break;
 
+      case nir_intrinsic_load_draw_id:
+         assert(v->stage == MESA_SHADER_VERTEX);
+         reg = &v->nir_system_values[SYSTEM_VALUE_DRAW_ID];
+         if (reg->file == BAD_FILE)
+            *reg = *v->emit_vs_system_value(SYSTEM_VALUE_DRAW_ID);
+         break;
+
       case nir_intrinsic_load_invocation_id:
          assert(v->stage == MESA_SHADER_GEOMETRY);
          reg = &v->nir_system_values[SYSTEM_VALUE_INVOCATION_ID];
@@ -1755,7 +1762,8 @@ fs_visitor::nir_emit_vs_intrinsic(const fs_builder &bld,
    case nir_intrinsic_load_vertex_id_zero_base:
    case nir_intrinsic_load_base_vertex:
    case nir_intrinsic_load_instance_id:
-   case nir_intrinsic_load_base_instance: {
+   case nir_intrinsic_load_base_instance:
+   case nir_intrinsic_load_draw_id: {
       gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
       fs_reg val = nir_system_values[sv];
       assert(val.file != BAD_FILE);
index d6941fa1daf11a5359e1ba071c25fd4d08459eb3..25240ad65fae5fa59dfe7f7f1a1542e99dbb43d0 100644 (file)
@@ -59,6 +59,16 @@ fs_visitor::emit_vs_system_value(int location)
       reg->reg_offset = 3;
       vs_prog_data->uses_instanceid = true;
       break;
+   case SYSTEM_VALUE_DRAW_ID:
+      if (nir->info.system_values_read &
+          (BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX) |
+           BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE) |
+           BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) |
+           BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID)))
+         reg->nr += 4;
+      reg->reg_offset = 0;
+      vs_prog_data->uses_drawid = true;
+      break;
    default:
       unreachable("not reached");
    }
index b2a27d873e71545374f2beed2a38564276f972a6..dd223985d1cb8e0c3b11cfb532f878bf59de1e7f 100644 (file)
@@ -1566,7 +1566,7 @@ int
 vec4_vs_visitor::setup_attributes(int payload_reg)
 {
    int nr_attributes;
-   int attribute_map[VERT_ATTRIB_MAX + 1];
+   int attribute_map[VERT_ATTRIB_MAX + 2];
    memset(attribute_map, 0, sizeof(attribute_map));
 
    nr_attributes = 0;
@@ -1577,6 +1577,11 @@ vec4_vs_visitor::setup_attributes(int payload_reg)
       }
    }
 
+   if (vs_prog_data->uses_drawid) {
+      attribute_map[VERT_ATTRIB_MAX + 1] = payload_reg + nr_attributes;
+      nr_attributes++;
+   }
+
    /* VertexID is stored by the VF as the last vertex element, but we
     * don't represent it with a flag in inputs_read, so we call it
     * VERT_ATTRIB_MAX.
@@ -1584,6 +1589,7 @@ vec4_vs_visitor::setup_attributes(int payload_reg)
    if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid ||
        vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance) {
       attribute_map[VERT_ATTRIB_MAX] = payload_reg + nr_attributes;
+      nr_attributes++;
    }
 
    lower_attributes_to_hw_regs(attribute_map, false /* interleaved */);
@@ -1990,6 +1996,11 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
       nr_attributes++;
    }
 
+   /* gl_DrawID has its very own vec4 */
+   if (shader->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID)) {
+      nr_attributes++;
+   }
+
    /* The 3DSTATE_VS documentation lists the lower bound on "Vertex URB Entry
     * Read Length" as 1 in vec4 mode, and 0 in SIMD8 mode.  Empirically, in
     * vec4 mode, the hardware appears to wedge unless we read something.
index c20da9ba859a5d959ed0dd633cef8e6a68f64933..a3bdbc35b495b37cb7af89552981752426637834 100644 (file)
@@ -85,6 +85,13 @@ vec4_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr)
                                            glsl_type::int_type);
       break;
 
+   case nir_intrinsic_load_draw_id:
+      reg = &nir_system_values[SYSTEM_VALUE_DRAW_ID];
+      if (reg->file == BAD_FILE)
+         *reg = *make_reg_for_system_value(SYSTEM_VALUE_DRAW_ID,
+                                           glsl_type::int_type);
+      break;
+
    default:
       break;
    }
@@ -677,6 +684,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
    case nir_intrinsic_load_base_vertex:
    case nir_intrinsic_load_instance_id:
    case nir_intrinsic_load_base_instance:
+   case nir_intrinsic_load_draw_id:
    case nir_intrinsic_load_invocation_id:
    case nir_intrinsic_load_tess_level_inner:
    case nir_intrinsic_load_tess_level_outer: {
index bd6a9a4ef7b8f6dc664af063a369a829da5b1a20..1d6914902b391f21143cb3ff86ba90fc83a9b9f1 100644 (file)
@@ -170,6 +170,11 @@ vec4_vs_visitor::make_reg_for_system_value(int location,
       reg->writemask = WRITEMASK_W;
       vs_prog_data->uses_instanceid = true;
       break;
+   case SYSTEM_VALUE_DRAW_ID:
+      reg = new(mem_ctx) dst_reg(ATTR, VERT_ATTRIB_MAX + 1);
+      reg->writemask = WRITEMASK_X;
+      vs_prog_data->uses_drawid = true;
+      break;
    default:
       unreachable("not reached");
    }
index 451cf0bd287f98375c2ebc0cd89830c04431eb20..ff89e5f240d3fe6215f4f2ca80dab34b4a35bf90 100644 (file)
@@ -118,7 +118,8 @@ gen8_emit_vertices(struct brw_context *brw)
    const bool uses_draw_params =
       brw->vs.prog_data->uses_basevertex ||
       brw->vs.prog_data->uses_baseinstance;
-   const unsigned nr_buffers = brw->vb.nr_buffers + uses_draw_params;
+   const unsigned nr_buffers = brw->vb.nr_buffers +
+      uses_draw_params + brw->vs.prog_data->uses_drawid;
 
    if (nr_buffers) {
       assert(nr_buffers <= 33);
@@ -147,6 +148,15 @@ gen8_emit_vertices(struct brw_context *brw)
                      brw->draw.draw_params_offset);
          OUT_BATCH(brw->draw.draw_params_bo->size);
       }
+
+      if (brw->vs.prog_data->uses_drawid) {
+         OUT_BATCH((brw->vb.nr_buffers + 1) << GEN6_VB0_INDEX_SHIFT |
+                   GEN7_VB0_ADDRESS_MODIFYENABLE |
+                   mocs_wb << 16);
+         OUT_RELOC64(brw->draw.draw_id_bo, I915_GEM_DOMAIN_VERTEX, 0,
+                     brw->draw.draw_id_offset);
+         OUT_BATCH(brw->draw.draw_id_bo->size);
+      }
       ADVANCE_BATCH();
    }
 
@@ -163,7 +173,8 @@ gen8_emit_vertices(struct brw_context *brw)
                                     ((brw->vs.prog_data->uses_instanceid ||
                                       brw->vs.prog_data->uses_vertexid) &&
                                      uses_edge_flag));
-   const unsigned nr_elements = brw->vb.nr_enabled + needs_sgvs_element;
+   const unsigned nr_elements =
+      brw->vb.nr_enabled + needs_sgvs_element + brw->vs.prog_data->uses_drawid;
 
    /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS,
     * presumably for VertexID/InstanceID.
@@ -236,6 +247,16 @@ gen8_emit_vertices(struct brw_context *brw)
       }
    }
 
+   if (brw->vs.prog_data->uses_drawid) {
+      OUT_BATCH(GEN6_VE0_VALID |
+                ((brw->vb.nr_buffers + 1) << GEN6_VE0_INDEX_SHIFT) |
+                (BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT));
+      OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
+                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
+                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
+                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
+   }
+
    if (gen6_edgeflag_input) {
       uint32_t format =
          brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray);
@@ -273,6 +294,15 @@ gen8_emit_vertices(struct brw_context *brw)
       OUT_BATCH(buffer->step_rate);
       ADVANCE_BATCH();
    }
+
+   if (brw->vs.prog_data->uses_drawid) {
+      const unsigned element = brw->vb.nr_enabled + needs_sgvs_element;
+      BEGIN_BATCH(3);
+      OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2));
+      OUT_BATCH(element);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
 }
 
 const struct brw_tracked_state gen8_vertices = {
index de16ebb0a498b33f2d9976c180a990a243486254..e1338e92e15a0dae6a2d5d5f43dc8cd1a9ce1b41 100644 (file)
@@ -203,6 +203,7 @@ intelInitExtensions(struct gl_context *ctx)
    ctx->Extensions.ARB_point_sprite = true;
    ctx->Extensions.ARB_seamless_cube_map = true;
    ctx->Extensions.ARB_shader_bit_encoding = true;
+   ctx->Extensions.ARB_shader_draw_parameters = true;
    ctx->Extensions.ARB_shader_texture_lod = true;
    ctx->Extensions.ARB_shadow = true;
    ctx->Extensions.ARB_sync = true;