i965/cs: Setup surface binding for gl_NumWorkGroups
authorJordan Justen <jordan.l.justen@intel.com>
Thu, 24 Sep 2015 08:29:32 +0000 (01:29 -0700)
committerJordan Justen <jordan.l.justen@intel.com>
Tue, 29 Sep 2015 15:23:47 +0000 (08:23 -0700)
This will only be setup when the prog_data uses_num_work_groups
boolean is set.

At this point nothing will set uses_num_work_groups, but soon code
will set it when emitting code for the intrinsic that loads
gl_NumWorkGroups.

We can't emit this surface information earlier at the start of the
DispatchCompute* call because we may not have generated the program
yet. Until we generate the program, we don't know if the
gl_NumWorkGroups variable is accessed.

We also can't emit the surface as part of the brw_cs_state atom,
because we might not need the surface if gl_NumWorkGroups is not used
by the program.

Lastly, we cannot emit the surface later (after state upload) in the
DispatchCompute* call, because it needs to be run before the
brw_cs_state atom is emitted, since it changes the surface state.

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
src/mesa/drivers/dri/i965/brw_compute.c
src/mesa/drivers/dri/i965/brw_context.h
src/mesa/drivers/dri/i965/brw_state.h
src/mesa/drivers/dri/i965/brw_state_upload.c
src/mesa/drivers/dri/i965/brw_wm_surface_state.c

index 505023d285196cdd1cb74858cbc20914166f6e44..fe991a4615331d4e3fc402c4619a6a99dcff3d62 100644 (file)
@@ -184,6 +184,7 @@ brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups) {
 
    brw->compute.num_work_groups_bo = NULL;
    brw->compute.num_work_groups = num_groups;
+   ctx->NewDriverState |= BRW_NEW_CS_WORK_GROUPS;
 
    brw_dispatch_compute_common(ctx);
 }
@@ -202,6 +203,7 @@ brw_dispatch_compute_indirect(struct gl_context *ctx, GLintptr indirect)
    brw->compute.num_work_groups_bo = bo;
    brw->compute.num_work_groups_offset = indirect;
    brw->compute.num_work_groups = indirect_group_counts;
+   ctx->NewDriverState |= BRW_NEW_CS_WORK_GROUPS;
 
    brw_dispatch_compute_common(ctx);
 }
index e407dec8eab2a7de3144e363b0a1848fc288ad0c..8b790fe0bca18fa76c7366d70bf65d455116e3d2 100644 (file)
@@ -213,6 +213,7 @@ enum brw_state_id {
    BRW_STATE_SAMPLER_STATE_TABLE,
    BRW_STATE_VS_ATTRIB_WORKAROUNDS,
    BRW_STATE_COMPUTE_PROGRAM,
+   BRW_STATE_CS_WORK_GROUPS,
    BRW_NUM_STATE_BITS
 };
 
@@ -294,6 +295,7 @@ enum brw_state_id {
 #define BRW_NEW_SAMPLER_STATE_TABLE     (1ull << BRW_STATE_SAMPLER_STATE_TABLE)
 #define BRW_NEW_VS_ATTRIB_WORKAROUNDS   (1ull << BRW_STATE_VS_ATTRIB_WORKAROUNDS)
 #define BRW_NEW_COMPUTE_PROGRAM         (1ull << BRW_STATE_COMPUTE_PROGRAM)
+#define BRW_NEW_CS_WORK_GROUPS          (1ull << BRW_STATE_CS_WORK_GROUPS)
 
 struct brw_state_flags {
    /** State update flags signalled by mesa internals */
@@ -497,6 +499,7 @@ struct brw_cs_prog_data {
    unsigned local_size[3];
    unsigned simd_size;
    bool uses_barrier;
+   bool uses_num_work_groups;
 
    struct {
       /** @{
@@ -1545,7 +1548,7 @@ struct brw_context
 
    int num_atoms[BRW_NUM_PIPELINES];
    const struct brw_tracked_state render_atoms[60];
-   const struct brw_tracked_state compute_atoms[6];
+   const struct brw_tracked_state compute_atoms[7];
 
    /* If (INTEL_DEBUG & DEBUG_BATCH) */
    struct {
index afce8ad3b2738c4a51235c94282a8c2e9de97f8f..3b7a4330c7aea86a8f7b39ae995fe09900a4e7ea 100644 (file)
@@ -158,6 +158,7 @@ extern const struct brw_tracked_state gen8_sf_clip_viewport;
 extern const struct brw_tracked_state gen8_vertices;
 extern const struct brw_tracked_state gen8_vf_topology;
 extern const struct brw_tracked_state gen8_vs_state;
+extern const struct brw_tracked_state brw_cs_work_groups_surface;
 
 static inline bool
 brw_state_dirty(struct brw_context *brw, GLuint mesa_flags, uint64_t brw_flags)
index 2e8a0b3de9b4dabee01db2a181fc48d81395f2cb..46687e342d3fbbb8fc86bb4dbe4bdf5dad87baba 100644 (file)
@@ -261,6 +261,7 @@ static const struct brw_tracked_state *gen7_compute_atoms[] =
    &gen7_cs_push_constants,
    &brw_cs_abo_surfaces,
    &brw_texture_surfaces,
+   &brw_cs_work_groups_surface,
    &brw_cs_state,
 };
 
@@ -353,6 +354,7 @@ static const struct brw_tracked_state *gen8_compute_atoms[] =
    &gen7_cs_push_constants,
    &brw_cs_abo_surfaces,
    &brw_texture_surfaces,
+   &brw_cs_work_groups_surface,
    &brw_cs_state,
 };
 
@@ -613,6 +615,7 @@ static struct dirty_bit_map brw_bits[] = {
    DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE),
    DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS),
    DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM),
+   DEFINE_BIT(BRW_NEW_CS_WORK_GROUPS),
    {0, 0, 0}
 };
 
index f2aaa0b178ed7e99c94c03a36fa4a75cd9adc1d7..c9316963840f9675f097becda12dba3f77d6b8ad 100644 (file)
@@ -1336,3 +1336,46 @@ gen4_init_vtable_surface_functions(struct brw_context *brw)
    brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
    brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
 }
+
+static void
+brw_upload_cs_work_groups_surface(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   /* _NEW_PROGRAM */
+   struct gl_shader_program *prog =
+      ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
+
+   if (prog && brw->cs.prog_data->uses_num_work_groups) {
+      const unsigned surf_idx =
+         brw->cs.prog_data->binding_table.work_groups_start;
+      uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
+      drm_intel_bo *bo;
+      uint32_t bo_offset;
+
+      if (brw->compute.num_work_groups_bo == NULL) {
+         bo = NULL;
+         intel_upload_data(brw,
+                           (void *)brw->compute.num_work_groups,
+                           3 * sizeof(GLuint),
+                           sizeof(GLuint),
+                           &bo,
+                           &bo_offset);
+      } else {
+         bo = brw->compute.num_work_groups_bo;
+         bo_offset = brw->compute.num_work_groups_offset;
+      }
+
+      brw->vtbl.emit_buffer_surface_state(brw, surf_offset,
+                                          bo, bo_offset,
+                                          BRW_SURFACEFORMAT_RAW,
+                                          3 * sizeof(GLuint), 1, true);
+      brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
+   }
+}
+
+const struct brw_tracked_state brw_cs_work_groups_surface = {
+   .dirty = {
+      .brw = BRW_NEW_CS_WORK_GROUPS
+   },
+   .emit = brw_upload_cs_work_groups_surface,
+};