From 0470a03769715a3258c04e6b18848cef321bd213 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 9 Dec 2019 11:55:21 -0800 Subject: [PATCH] freedreno: Track the set of UBOs to be uploaded in UBO analysis. We were iterating over the entire 32-entry array each time, when we can just use a bitset to know that we're only uploading from the first entry normally. Knocks ir3_emit_user_consts down from ~.5% of CPU to .1% on WebGL fishtank. Reviewed-by: Rob Clark --- .../ir3/ir3_nir_analyze_ubo_ranges.c | 3 ++ src/freedreno/ir3/ir3_shader.h | 1 + .../drivers/freedreno/ir3/ir3_gallium.c | 40 ++++++++++--------- 3 files changed, 25 insertions(+), 19 deletions(-) diff --git a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c index 720fd06a1d7..136ec9a65e4 100644 --- a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c +++ b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c @@ -246,6 +246,9 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader *shader) state->range[i].end = state->range[i].start + range_size; } offset += range_size; + + if (state->range[i].start < state->range[i].end) + state->enabled |= 1 << i; } state->size = offset; diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index c829422ca4d..3a0447a1334 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -615,6 +615,7 @@ struct ir3_ubo_range { struct ir3_ubo_analysis_state { struct ir3_ubo_range range[IR3_MAX_CONSTANT_BUFFERS]; + uint32_t enabled; uint32_t size; uint32_t lower_count; uint32_t cmdstream_size; /* for per-gen backend to stash required cmdstream size */ diff --git a/src/gallium/drivers/freedreno/ir3/ir3_gallium.c b/src/gallium/drivers/freedreno/ir3/ir3_gallium.c index 6ba01c62a53..e5c44521af6 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_gallium.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_gallium.c @@ -235,6 +235,11 @@ ir3_user_consts_size(struct ir3_ubo_analysis_state *state, } } +/** + * Uploads sub-ranges of UBOs to the hardware's constant buffer (UBO access + * outside of these ranges will be done using full UBO accesses in the + * shader). + */ void ir3_emit_user_consts(struct fd_screen *screen, const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf) @@ -242,31 +247,28 @@ ir3_emit_user_consts(struct fd_screen *screen, const struct ir3_shader_variant * struct ir3_ubo_analysis_state *state; state = &v->shader->ubo_state; - for (uint32_t i = 0; i < ARRAY_SIZE(state->range); i++) { + uint32_t i; + foreach_bit(i, state->enabled & constbuf->enabled_mask) { struct pipe_constant_buffer *cb = &constbuf->cb[i]; - if (state->range[i].start < state->range[i].end && - constbuf->enabled_mask & (1 << i)) { - - uint32_t size = state->range[i].end - state->range[i].start; - uint32_t offset = cb->buffer_offset + state->range[i].start; + uint32_t size = state->range[i].end - state->range[i].start; + uint32_t offset = cb->buffer_offset + state->range[i].start; - /* and even if the start of the const buffer is before - * first_immediate, the end may not be: - */ - size = MIN2(size, (16 * v->constlen) - state->range[i].offset); + /* and even if the start of the const buffer is before + * first_immediate, the end may not be: + */ + size = MIN2(size, (16 * v->constlen) - state->range[i].offset); - if (size == 0) - continue; + if (size == 0) + continue; - /* things should be aligned to vec4: */ - debug_assert((state->range[i].offset % 16) == 0); - debug_assert((size % 16) == 0); - debug_assert((offset % 16) == 0); + /* things should be aligned to vec4: */ + debug_assert((state->range[i].offset % 16) == 0); + debug_assert((size % 16) == 0); + debug_assert((offset % 16) == 0); - emit_const(screen, ring, v, state->range[i].offset / 4, - offset, size / 4, cb->user_buffer, cb->buffer); - } + emit_const(screen, ring, v, state->range[i].offset / 4, + offset, size / 4, cb->user_buffer, cb->buffer); } } -- 2.30.2