glsl: keep track of intra-stage indices for atomics
authorTimothy Arceri <timothy.arceri@collabora.com>
Mon, 26 Oct 2015 19:58:15 +0000 (06:58 +1100)
committerTimothy Arceri <timothy.arceri@collabora.com>
Mon, 26 Oct 2015 20:03:05 +0000 (07:03 +1100)
This is more optimal as it means we no longer have to upload the same set
of ABO surfaces to all stages in the program.

This also fixes a bug where since commit c0cd5b var->data.binding was
being used as a replacement for atomic buffer index, but they don't have
to be the same value they just happened to end up the same when binding is 0.

Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Cc: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: Alejandro Piñeiro <apinheiro@igalia.com>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90175

src/glsl/link_atomics.cpp
src/glsl/nir/glsl_to_nir.cpp
src/glsl/nir/nir.h
src/glsl/nir/nir_lower_atomics.c
src/mesa/drivers/dri/i965/brw_context.h
src/mesa/drivers/dri/i965/brw_gs_surface_state.c
src/mesa/drivers/dri/i965/brw_nir.c
src/mesa/drivers/dri/i965/brw_shader.cpp
src/mesa/drivers/dri/i965/brw_vs_surface_state.c
src/mesa/drivers/dri/i965/brw_wm_surface_state.c
src/mesa/main/mtypes.h

index 70ef0e1c891602e819101a400226c240cc41b2f6..cdcc06d53e232461b7af9722f540b85d131bcbf7 100644 (file)
@@ -198,6 +198,7 @@ link_assign_atomic_counter_resources(struct gl_context *ctx,
                                      struct gl_shader_program *prog)
 {
    unsigned num_buffers;
+   unsigned num_atomic_buffers[MESA_SHADER_STAGES] = {};
    active_atomic_buffer *abs =
       find_active_atomic_counters(ctx, prog, &num_buffers);
 
@@ -242,13 +243,49 @@ link_assign_atomic_counter_resources(struct gl_context *ctx,
       }
 
       /* Assign stage-specific fields. */
-      for (unsigned j = 0; j < MESA_SHADER_STAGES; ++j)
-         mab.StageReferences[j] =
-            (ab.stage_references[j] ? GL_TRUE : GL_FALSE);
+      for (unsigned j = 0; j < MESA_SHADER_STAGES; ++j) {
+         if (ab.stage_references[j]) {
+            mab.StageReferences[j] = GL_TRUE;
+            num_atomic_buffers[j]++;
+         } else {
+            mab.StageReferences[j] = GL_FALSE;
+         }
+      }
 
       i++;
    }
 
+   /* Store a list pointers to atomic buffers per stage and store the index
+    * to the intra-stage buffer list in uniform storage.
+    */
+   for (unsigned j = 0; j < MESA_SHADER_STAGES; ++j) {
+      if (prog->_LinkedShaders[j] && num_atomic_buffers[j] > 0) {
+         prog->_LinkedShaders[j]->NumAtomicBuffers = num_atomic_buffers[j];
+         prog->_LinkedShaders[j]->AtomicBuffers =
+            rzalloc_array(prog, gl_active_atomic_buffer *,
+                          num_atomic_buffers[j]);
+
+         unsigned intra_stage_idx = 0;
+         for (unsigned i = 0; i < num_buffers; i++) {
+            struct gl_active_atomic_buffer *atomic_buffer =
+               &prog->AtomicBuffers[i];
+            if (atomic_buffer->StageReferences[j]) {
+               prog->_LinkedShaders[j]->AtomicBuffers[intra_stage_idx] =
+                  atomic_buffer;
+
+               for (unsigned u = 0; u < atomic_buffer->NumUniforms; u++) {
+                  prog->UniformStorage[atomic_buffer->Uniforms[u]].opaque[j].index =
+                     intra_stage_idx;
+                  prog->UniformStorage[atomic_buffer->Uniforms[u]].opaque[j].active =
+                     true;
+               }
+
+               intra_stage_idx++;
+            }
+         }
+      }
+   }
+
    delete [] abs;
    assert(i == num_buffers);
 }
index 9b50a93e7f6e4a58e12b40b35c7b0ae820ad334d..01f16d70eb13aaf138a6cef253185d3f315dc0ae 100644 (file)
@@ -392,8 +392,6 @@ nir_visitor::visit(ir_variable *ir)
 
    var->data.index = ir->data.index;
    var->data.binding = ir->data.binding;
-   /* XXX Get rid of buffer_index */
-   var->data.atomic.buffer_index = ir->data.binding;
    var->data.atomic.offset = ir->data.atomic.offset;
    var->data.image.read_only = ir->data.image_read_only;
    var->data.image.write_only = ir->data.image_write_only;
index e3777f926e2aa238813e586b5c054d05b440188a..04a21a7ead6562f0a8e6a68bc8c6d8564b378517 100644 (file)
@@ -308,7 +308,6 @@ typedef struct {
        * Location an atomic counter is stored at.
        */
       struct {
-         unsigned buffer_index;
          unsigned offset;
       } atomic;
 
@@ -1978,7 +1977,8 @@ void nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables);
 
 void nir_lower_two_sided_color(nir_shader *shader);
 
-void nir_lower_atomics(nir_shader *shader);
+void nir_lower_atomics(nir_shader *shader,
+                       const struct gl_shader_program *shader_program);
 void nir_lower_to_source_mods(nir_shader *shader);
 
 bool nir_lower_gs_intrinsics(nir_shader *shader);
index 46e137652a19acd358c07b4429bd895e4d836841..40ca3de96cfdb7054ee81353f54a890a1671e9b2 100644 (file)
  *
  */
 
+#include "ir_uniform.h"
 #include "nir.h"
 #include "main/config.h"
 #include <assert.h>
 
+typedef struct {
+   const struct gl_shader_program *shader_program;
+   nir_shader   *shader;
+} lower_atomic_state;
+
 /*
  * replace atomic counter intrinsics that use a variable with intrinsics
  * that directly store the buffer index and byte offset
  */
 
 static void
-lower_instr(nir_intrinsic_instr *instr, nir_function_impl *impl)
+lower_instr(nir_intrinsic_instr *instr,
+            lower_atomic_state *state)
 {
    nir_intrinsic_op op;
    switch (instr->intrinsic) {
@@ -60,10 +67,11 @@ lower_instr(nir_intrinsic_instr *instr, nir_function_impl *impl)
       return; /* atomics passed as function arguments can't be lowered */
 
    void *mem_ctx = ralloc_parent(instr);
+   unsigned uniform_loc = instr->variables[0]->var->data.location;
 
    nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(mem_ctx, op);
    new_instr->const_index[0] =
-      (int) instr->variables[0]->var->data.atomic.buffer_index;
+      state->shader_program->UniformStorage[uniform_loc].opaque[state->shader->stage].index;
 
    nir_load_const_instr *offset_const = nir_load_const_instr_create(mem_ctx, 1);
    offset_const->value.u[0] = instr->variables[0]->var->data.atomic.offset;
@@ -132,18 +140,25 @@ lower_block(nir_block *block, void *state)
 {
    nir_foreach_instr_safe(block, instr) {
       if (instr->type == nir_instr_type_intrinsic)
-         lower_instr(nir_instr_as_intrinsic(instr), state);
+         lower_instr(nir_instr_as_intrinsic(instr),
+                     (lower_atomic_state *) state);
    }
 
    return true;
 }
 
 void
-nir_lower_atomics(nir_shader *shader)
+nir_lower_atomics(nir_shader *shader,
+                  const struct gl_shader_program *shader_program)
 {
+   lower_atomic_state state = {
+      .shader = shader,
+      .shader_program = shader_program,
+   };
+
    nir_foreach_overload(shader, overload) {
       if (overload->impl) {
-         nir_foreach_block(overload->impl, lower_block, overload->impl);
+         nir_foreach_block(overload->impl, lower_block, (void *) &state);
          nir_metadata_preserve(overload->impl, nir_metadata_block_index |
                                                nir_metadata_dominance);
       }
index 0fdc83ef7e1a438ebb4fa919a50dd35e58b55060..18c361ea8cd60f7936d9436bee0fc745895b1a37 100644 (file)
@@ -1463,7 +1463,7 @@ void brw_upload_ubo_surfaces(struct brw_context *brw,
                              struct brw_stage_prog_data *prog_data,
                              bool dword_pitch);
 void brw_upload_abo_surfaces(struct brw_context *brw,
-                             struct gl_shader_program *prog,
+                             struct gl_shader *shader,
                              struct brw_stage_state *stage_state,
                              struct brw_stage_prog_data *prog_data);
 void brw_upload_image_surfaces(struct brw_context *brw,
index 00125c0f4055a96eb49d8cab14e54340adb0296c..76ed237d88a3c5a484f1da43e37f2ccb7ee0c282 100644 (file)
@@ -105,8 +105,8 @@ brw_upload_gs_abo_surfaces(struct brw_context *brw)
 
    if (prog) {
       /* BRW_NEW_GS_PROG_DATA */
-      brw_upload_abo_surfaces(brw, prog, &brw->gs.base,
-                              &brw->gs.prog_data->base.base);
+      brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_GEOMETRY],
+                              &brw->gs.base, &brw->gs.prog_data->base.base);
    }
 }
 
index 1b4dace84fbdf5f25eeca99bce9c54f71d1ad310..11f111382f45c846e81ae1ba3ddebf0244a1bcbd 100644 (file)
@@ -249,8 +249,10 @@ brw_create_nir(struct brw_context *brw,
    nir_lower_system_values(nir);
    nir_validate_shader(nir);
 
-   nir_lower_atomics(nir);
-   nir_validate_shader(nir);
+   if (shader_prog) {
+      nir_lower_atomics(nir, shader_prog);
+      nir_validate_shader(nir);
+   }
 
    nir_optimize(nir, is_scalar);
 
index 204935641f33a90850455487c4973f39b67756ee..4ea297ade4ca3c879c2fde44906b487797a6bf6d 100644 (file)
@@ -1191,9 +1191,9 @@ brw_assign_common_binding_table_offsets(gl_shader_stage stage,
       stage_prog_data->binding_table.gather_texture_start = 0xd0d0d0d0;
    }
 
-   if (shader_prog && shader_prog->NumAtomicBuffers) {
+   if (shader && shader->NumAtomicBuffers) {
       stage_prog_data->binding_table.abo_start = next_binding_table_offset;
-      next_binding_table_offset += shader_prog->NumAtomicBuffers;
+      next_binding_table_offset += shader->NumAtomicBuffers;
    } else {
       stage_prog_data->binding_table.abo_start = 0xd0d0d0d0;
    }
index f65258a52a5d8662e9aa90c0eed4606608b7d491..d7473845c7286e897636b6a66044d11526f30f81 100644 (file)
@@ -177,8 +177,8 @@ brw_upload_vs_abo_surfaces(struct brw_context *brw)
 
    if (prog) {
       /* BRW_NEW_VS_PROG_DATA */
-      brw_upload_abo_surfaces(brw, prog, &brw->vs.base,
-                              &brw->vs.prog_data->base.base);
+      brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_VERTEX],
+                              &brw->vs.base, &brw->vs.prog_data->base.base);
    }
 }
 
index 6ebe6481c32cf723e7a0387788403f319cc16eb9..f88f8d591966151cde5f5833dfefd883ca72dcf5 100644 (file)
@@ -1029,7 +1029,7 @@ const struct brw_tracked_state brw_cs_ubo_surfaces = {
 
 void
 brw_upload_abo_surfaces(struct brw_context *brw,
-                       struct gl_shader_program *prog,
+                        struct gl_shader *shader,
                         struct brw_stage_state *stage_state,
                         struct brw_stage_prog_data *prog_data)
 {
@@ -1037,21 +1037,22 @@ brw_upload_abo_surfaces(struct brw_context *brw,
    uint32_t *surf_offsets =
       &stage_state->surf_offset[prog_data->binding_table.abo_start];
 
-   for (unsigned i = 0; i < prog->NumAtomicBuffers; i++) {
-      struct gl_atomic_buffer_binding *binding =
-         &ctx->AtomicBufferBindings[prog->AtomicBuffers[i].Binding];
-      struct intel_buffer_object *intel_bo =
-         intel_buffer_object(binding->BufferObject);
-      drm_intel_bo *bo = intel_bufferobj_buffer(
-         brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
-
-      brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo,
-                                          binding->Offset, BRW_SURFACEFORMAT_RAW,
-                                          bo->size - binding->Offset, 1, true);
-   }
+   if (shader && shader->NumAtomicBuffers) {
+      for (unsigned i = 0; i < shader->NumAtomicBuffers; i++) {
+         struct gl_atomic_buffer_binding *binding =
+            &ctx->AtomicBufferBindings[shader->AtomicBuffers[i]->Binding];
+         struct intel_buffer_object *intel_bo =
+            intel_buffer_object(binding->BufferObject);
+         drm_intel_bo *bo = intel_bufferobj_buffer(
+            brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
+
+         brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo,
+                                             binding->Offset, BRW_SURFACEFORMAT_RAW,
+                                             bo->size - binding->Offset, 1, true);
+      }
 
-   if (prog->NumAtomicBuffers)
       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
+   }
 }
 
 static void
@@ -1063,8 +1064,8 @@ brw_upload_wm_abo_surfaces(struct brw_context *brw)
 
    if (prog) {
       /* BRW_NEW_FS_PROG_DATA */
-      brw_upload_abo_surfaces(brw, prog, &brw->wm.base,
-                              &brw->wm.prog_data->base);
+      brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
+                              &brw->wm.base, &brw->wm.prog_data->base);
    }
 }
 
@@ -1088,8 +1089,8 @@ brw_upload_cs_abo_surfaces(struct brw_context *brw)
 
    if (prog) {
       /* BRW_NEW_CS_PROG_DATA */
-      brw_upload_abo_surfaces(brw, prog, &brw->cs.base,
-                              &brw->cs.prog_data->base);
+      brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
+                              &brw->cs.base, &brw->cs.prog_data->base);
    }
 }
 
index 20dd70ef73476b225034957bb78dc0349fa66615..34120cf5777a0761263006b1607ad5fa7373a474 100644 (file)
@@ -2389,6 +2389,9 @@ struct gl_shader
     */
    GLuint NumImages;
 
+   struct gl_active_atomic_buffer **AtomicBuffers;
+   unsigned NumAtomicBuffers;
+
    /**
     * Whether early fragment tests are enabled as defined by
     * ARB_shader_image_load_store.
@@ -4496,7 +4499,7 @@ static inline bool
 _mesa_active_fragment_shader_has_atomic_ops(const struct gl_context *ctx)
 {
    return ctx->Shader._CurrentFragmentProgram != NULL &&
-      ctx->Shader._CurrentFragmentProgram->NumAtomicBuffers > 0;
+      ctx->Shader._CurrentFragmentProgram->_LinkedShaders[MESA_SHADER_FRAGMENT]->NumAtomicBuffers > 0;
 }
 
 #ifdef __cplusplus