freedreno: Track the set of UBOs to be uploaded in UBO analysis.
authorEric Anholt <eric@anholt.net>
Mon, 9 Dec 2019 19:55:21 +0000 (11:55 -0800)
committerEric Anholt <eric@anholt.net>
Mon, 9 Dec 2019 22:13:50 +0000 (14:13 -0800)
We were iterating over the entire 32-entry array each time, when we
can just use a bitset to know that we're only uploading from the first
entry normally.

Knocks ir3_emit_user_consts down from ~.5% of CPU to .1% on WebGL
fishtank.

Reviewed-by: Rob Clark <robdclark@chromium.org>
src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c
src/freedreno/ir3/ir3_shader.h
src/gallium/drivers/freedreno/ir3/ir3_gallium.c

index 720fd06a1d70f8538adcedce849a17358a9d9ee7..136ec9a65e40e28ed27f4d1672c5645dc71827b9 100644 (file)
@@ -246,6 +246,9 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader *shader)
                        state->range[i].end = state->range[i].start + range_size;
                }
                offset += range_size;
+
+               if (state->range[i].start < state->range[i].end)
+                       state->enabled |= 1 << i;
        }
        state->size = offset;
 
index c829422ca4d2bb4e92626eb70ca71224fcf28395..3a0447a13345d7b9f0991cfe6d90e981ddb01e3e 100644 (file)
@@ -615,6 +615,7 @@ struct ir3_ubo_range {
 
 struct ir3_ubo_analysis_state {
        struct ir3_ubo_range range[IR3_MAX_CONSTANT_BUFFERS];
+       uint32_t enabled;
        uint32_t size;
        uint32_t lower_count;
        uint32_t cmdstream_size; /* for per-gen backend to stash required cmdstream size */
index 6ba01c62a537d0e078a9dcbd6f4e4d7e7f85a5ec..e5c44521af67bccc99546a78247f67fd84d4a3a4 100644 (file)
@@ -235,6 +235,11 @@ ir3_user_consts_size(struct ir3_ubo_analysis_state *state,
        }
 }
 
+/**
+ * Uploads sub-ranges of UBOs to the hardware's constant buffer (UBO access
+ * outside of these ranges will be done using full UBO accesses in the
+ * shader).
+ */
 void
 ir3_emit_user_consts(struct fd_screen *screen, const struct ir3_shader_variant *v,
                struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf)
@@ -242,31 +247,28 @@ ir3_emit_user_consts(struct fd_screen *screen, const struct ir3_shader_variant *
        struct ir3_ubo_analysis_state *state;
        state = &v->shader->ubo_state;
 
-       for (uint32_t i = 0; i < ARRAY_SIZE(state->range); i++) {
+       uint32_t i;
+       foreach_bit(i, state->enabled & constbuf->enabled_mask) {
                struct pipe_constant_buffer *cb = &constbuf->cb[i];
 
-               if (state->range[i].start < state->range[i].end &&
-                       constbuf->enabled_mask & (1 << i)) {
-
-                       uint32_t size = state->range[i].end - state->range[i].start;
-                       uint32_t offset = cb->buffer_offset + state->range[i].start;
+               uint32_t size = state->range[i].end - state->range[i].start;
+               uint32_t offset = cb->buffer_offset + state->range[i].start;
 
-                       /* and even if the start of the const buffer is before
-                        * first_immediate, the end may not be:
-                        */
-                       size = MIN2(size, (16 * v->constlen) - state->range[i].offset);
+               /* and even if the start of the const buffer is before
+                * first_immediate, the end may not be:
+                */
+               size = MIN2(size, (16 * v->constlen) - state->range[i].offset);
 
-                       if (size == 0)
-                               continue;
+               if (size == 0)
+                       continue;
 
-                       /* things should be aligned to vec4: */
-                       debug_assert((state->range[i].offset % 16) == 0);
-                       debug_assert((size % 16) == 0);
-                       debug_assert((offset % 16) == 0);
+               /* things should be aligned to vec4: */
+               debug_assert((state->range[i].offset % 16) == 0);
+               debug_assert((size % 16) == 0);
+               debug_assert((offset % 16) == 0);
 
-                       emit_const(screen, ring, v, state->range[i].offset / 4,
-                                                       offset, size / 4, cb->user_buffer, cb->buffer);
-               }
+               emit_const(screen, ring, v, state->range[i].offset / 4,
+                               offset, size / 4, cb->user_buffer, cb->buffer);
        }
 }