gallium: Pack the atomic counters just above the SSBOs.
authorEric Anholt <eric@anholt.net>
Fri, 20 Dec 2019 21:30:04 +0000 (13:30 -0800)
committerEric Anholt <eric@anholt.net>
Tue, 21 Jan 2020 18:06:23 +0000 (10:06 -0800)
We carve out half the SSBO space for atomics, and we were just binding
them way up there.  freedreno was then using a remapping table to map the
sparse buffer index back down, since space in the descriptor array is a
shared resource that may limit parallelism.  That remapping table
generated inside of the ir3 compiler is getting thoroughly in the way of
implementing vulkan descriptor sets.

We will be able to get rid of the freedreno's remapping table, and
hopefully save shared resources on other hardware, by packing the atomics
tightly above the SSBOs (like i965 does).  We already rebind the shader
buffers on program change if either the old or new program has SSBOs or
ABOs, so this doesn't necessarily increase the program state change cost
(the only cost increase I can come up with is if you're using the same
atomic counter without rebinding it across changes of programs with
varying SSBO counts, meaning it would now bounce around index space).

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3240>

src/mesa/state_tracker/st_atom_atomicbuf.c
src/mesa/state_tracker/st_atom_list.h
src/mesa/state_tracker/st_atom_storagebuf.c
src/mesa/state_tracker/st_context.h
src/mesa/state_tracker/st_glsl_to_nir.cpp
src/mesa/state_tracker/st_glsl_to_tgsi.cpp

index dad2b65b4c8bd53f8a5a76e8c6635d48c23e01f7..1855c771d0c3385c4732ae197ca8df0752153c3a 100644 (file)
@@ -75,10 +75,10 @@ st_bind_atomics(struct st_context *st, struct gl_program *prog,
       return;
 
    /* For !has_hw_atomics, the atomic counters have been rewritten to be above
-    * the SSBO range.
+    * the SSBOs used by the program.
     */
-   unsigned buffer_base = st->ctx->Const.Program[stage].MaxShaderStorageBlocks;
-
+   unsigned buffer_base = prog->info.num_ssbos;
+   unsigned used_bindings = 0;
    for (i = 0; i < prog->sh.data->NumAtomicBuffers; i++) {
       struct gl_active_atomic_buffer *atomic =
          &prog->sh.data->AtomicBuffers[i];
@@ -88,7 +88,9 @@ st_bind_atomics(struct st_context *st, struct gl_program *prog,
 
       st->pipe->set_shader_buffers(st->pipe, shader_type,
                                    buffer_base + atomic->Binding, 1, &sb, 0x1);
+      used_bindings = MAX2(atomic->Binding + 1, used_bindings);
    }
+   st->last_used_atomic_bindings[shader_type] = used_bindings;
 }
 
 void
index e1aebc91e78ea30383e9cee84588e892bd42629d..a4d55837034745ab04637f7aad463e2c21528602 100644 (file)
@@ -57,6 +57,9 @@ ST_STATE(ST_NEW_TES_ATOMICS, st_bind_tes_atomics)
 ST_STATE(ST_NEW_FS_ATOMICS, st_bind_fs_atomics)
 ST_STATE(ST_NEW_GS_ATOMICS, st_bind_gs_atomics)
 
+/* SSBOs depend on the _atomics having been updated first in the
+ * !has_hw_atomics case.
+ */
 ST_STATE(ST_NEW_VS_SSBOS, st_bind_vs_ssbos)
 ST_STATE(ST_NEW_TCS_SSBOS, st_bind_tcs_ssbos)
 ST_STATE(ST_NEW_TES_SSBOS, st_bind_tes_ssbos)
index 5ffafaa611bb7560e9a0232c59d483d7fd9592f2..0355c988529000739a3c95b68fabcba98e8e8202 100644 (file)
@@ -46,12 +46,9 @@ st_bind_ssbos(struct st_context *st, struct gl_program *prog,
 {
    unsigned i;
    struct pipe_shader_buffer buffers[MAX_SHADER_STORAGE_BUFFERS];
-   struct gl_program_constants *c;
    if (!prog || !st->pipe->set_shader_buffers)
       return;
 
-   c = &st->ctx->Const.Program[prog->info.stage];
-
    for (i = 0; i < prog->info.num_ssbos; i++) {
       struct gl_buffer_binding *binding;
       struct st_buffer_object *st_obj;
@@ -81,13 +78,19 @@ st_bind_ssbos(struct st_context *st, struct gl_program *prog,
    st->pipe->set_shader_buffers(st->pipe, shader_type, 0,
                                 prog->info.num_ssbos, buffers,
                                 prog->sh.ShaderStorageBlocksWriteAccess);
-   /* clear out any stale shader buffers */
-   if (prog->info.num_ssbos < c->MaxShaderStorageBlocks)
+
+   /* Clear out any stale shader buffers (or lowered atomic counters). */
+   int num_ssbos = prog->info.num_ssbos;
+   if (!st->has_hw_atomics)
+      num_ssbos += st->last_used_atomic_bindings[shader_type];
+   if (st->last_num_ssbos[shader_type] > num_ssbos) {
       st->pipe->set_shader_buffers(
             st->pipe, shader_type,
-            prog->info.num_ssbos,
-            c->MaxShaderStorageBlocks - prog->info.num_ssbos,
+            num_ssbos,
+            st->last_num_ssbos[shader_type] - num_ssbos,
             NULL, 0);
+      st->last_num_ssbos[shader_type] = num_ssbos;
+   }
 }
 
 void st_bind_vs_ssbos(struct st_context *st)
index 9a6a1f80d33989de8bfaef591436a32a5955592b..68708818fdf0e3c5b3046aeec96eef55d2e4bb96 100644 (file)
@@ -337,6 +337,9 @@ struct st_context
    /* The number of vertex buffers from the last call of validate_arrays. */
    unsigned last_num_vbuffers;
 
+   unsigned last_used_atomic_bindings[PIPE_SHADER_TYPES];
+   unsigned last_num_ssbos[PIPE_SHADER_TYPES];
+
    int32_t draw_stamp;
    int32_t read_stamp;
 
index d19398bd4b983149fbc47dd3ebb3064d67deb714..fadd1b4ba81ee2a639ce12002dd2ab99b63b6663 100644 (file)
@@ -504,8 +504,7 @@ st_glsl_to_nir_post_opts(struct st_context *st, struct gl_program *prog,
    nir_remove_dead_variables(nir, mask);
 
    if (!st->has_hw_atomics)
-      NIR_PASS_V(nir, nir_lower_atomics_to_ssbo,
-                 st->ctx->Const.Program[nir->info.stage].MaxShaderStorageBlocks);
+      NIR_PASS_V(nir, nir_lower_atomics_to_ssbo, prog->info.num_ssbos);
 
    st_finalize_nir_before_variants(nir);
 
index aec59e75e71dedaa79ca99044ded59bd11619ca1..c9f8d13c764164620f69e86eae4269c78611f6cb 100644 (file)
@@ -3448,7 +3448,7 @@ glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir)
       resource = buffer;
    } else {
       st_src_reg buffer(PROGRAM_BUFFER,
-                        ctx->Const.Program[shader->Stage].MaxShaderStorageBlocks +
+                        prog->info.num_ssbos +
                         location->data.binding,
                         GLSL_TYPE_ATOMIC_UINT);
 
@@ -7051,7 +7051,7 @@ st_translate_program(
 
       if (!st_context(ctx)->has_hw_atomics) {
          for (i = 0; i < prog->info.num_abos; i++) {
-            unsigned index = (frag_const->MaxShaderStorageBlocks +
+            unsigned index = (prog->info.num_ssbos +
                               prog->sh.AtomicBuffers[i]->Binding);
             assert(prog->sh.AtomicBuffers[i]->Binding <
                    frag_const->MaxAtomicBuffers);