From 5a05f9714bb5c78c01de39d2e6dc1ac5b351f8f1 Mon Sep 17 00:00:00 2001 From: Pierre-Eric Pelloux-Prayer Date: Fri, 29 May 2020 11:59:50 +0200 Subject: [PATCH] radeonsi: bump SI_NUM_SHADER_BUFFERS to 32 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Some app uses more than 8 SSBOs (https://gitlab.freedesktop.org/mesa/mesa/-/issues/2946), so increase SI_NUM_SHADER_BUFFERS to 32 (which allows 16 SSBOs). Since we're now using a 64 bits number to track buffers, we could bump SI_NUM_SHADER_BUFFERS to 48 but that would conflict with Mesa's MAX_COMBINED_ATOMIC_BUFFERS limit (= 90). Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/2122 Reviewed-by: Marek Olšák Part-of: --- src/gallium/drivers/radeonsi/si_debug.c | 9 +++- src/gallium/drivers/radeonsi/si_descriptors.c | 48 +++++++++---------- src/gallium/drivers/radeonsi/si_shader.h | 2 +- src/gallium/drivers/radeonsi/si_state.h | 12 ++--- .../drivers/radeonsi/si_state_shaders.c | 4 +- 5 files changed, 40 insertions(+), 35 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c index f596d51e0f6..4b22f4a7aed 100644 --- a/src/gallium/drivers/radeonsi/si_debug.c +++ b/src/gallium/drivers/radeonsi/si_debug.c @@ -796,8 +796,13 @@ static void si_dump_descriptors(struct si_context *sctx, enum pipe_shader_type p enabled_constbuf = sctx->const_and_shader_buffers[processor].enabled_mask >> SI_NUM_SHADER_BUFFERS; enabled_shaderbuf = sctx->const_and_shader_buffers[processor].enabled_mask & - u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS); - enabled_shaderbuf = util_bitreverse(enabled_shaderbuf) >> (32 - SI_NUM_SHADER_BUFFERS); + u_bit_consecutive64(0, SI_NUM_SHADER_BUFFERS); + enabled_shaderbuf = 0; + for (int i = 0; i < SI_NUM_SHADER_BUFFERS; i++) { + enabled_shaderbuf |= + (sctx->const_and_shader_buffers[processor].enabled_mask & + 1llu << (SI_NUM_SHADER_BUFFERS - i - 1)) << i; + } enabled_samplers = sctx->samplers[processor].enabled_mask; enabled_images = sctx->images[processor].enabled_mask; } diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 865f0b1b771..33d5d5fe063 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -897,12 +897,12 @@ void si_update_ps_colorbuf0_slot(struct si_context *sctx) pipe_resource_reference(&buffers->buffers[slot], &tex->buffer.b.b); radeon_add_to_buffer_list(sctx, sctx->gfx_cs, &tex->buffer, RADEON_USAGE_READ, RADEON_PRIO_SHADER_RW_IMAGE); - buffers->enabled_mask |= 1u << slot; + buffers->enabled_mask |= 1llu << slot; } else { /* Clear the descriptor. */ memset(descs->list + slot * 4, 0, 8 * 4); pipe_resource_reference(&buffers->buffers[slot], NULL); - buffers->enabled_mask &= ~(1u << slot); + buffers->enabled_mask &= ~(1llu << slot); } sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS; @@ -985,15 +985,15 @@ static void si_release_buffer_resources(struct si_buffer_resources *buffers, static void si_buffer_resources_begin_new_cs(struct si_context *sctx, struct si_buffer_resources *buffers) { - unsigned mask = buffers->enabled_mask; + uint64_t mask = buffers->enabled_mask; /* Add buffers to the CS. */ while (mask) { - int i = u_bit_scan(&mask); + int i = u_bit_scan64(&mask); radeon_add_to_buffer_list( sctx, sctx->gfx_cs, si_resource(buffers->buffers[i]), - buffers->writable_mask & (1u << i) ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ, + buffers->writable_mask & (1llu << i) ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ, i < SI_NUM_SHADER_BUFFERS ? buffers->priority : buffers->priority_constbuf); } } @@ -1001,13 +1001,13 @@ static void si_buffer_resources_begin_new_cs(struct si_context *sctx, static bool si_buffer_resources_check_encrypted(struct si_context *sctx, struct si_buffer_resources *buffers) { - unsigned mask = buffers->enabled_mask; + uint64_t mask = buffers->enabled_mask; while (mask) { - int i = u_bit_scan(&mask); + int i = u_bit_scan64(&mask); /* only check for reads */ - if ((buffers->writable_mask & (1u << i)) == 0 && + if ((buffers->writable_mask & (1llu << i)) == 0 && (si_resource(buffers->buffers[i])->flags & RADEON_FLAG_ENCRYPTED)) return true; } @@ -1238,11 +1238,11 @@ static void si_set_constant_buffer(struct si_context *sctx, struct si_buffer_res buffers->offsets[slot] = buffer_offset; radeon_add_to_gfx_buffer_list_check_mem(sctx, si_resource(buffer), RADEON_USAGE_READ, buffers->priority_constbuf, true); - buffers->enabled_mask |= 1u << slot; + buffers->enabled_mask |= 1llu << slot; } else { /* Clear the descriptor. */ memset(descs->list + slot * 4, 0, sizeof(uint32_t) * 4); - buffers->enabled_mask &= ~(1u << slot); + buffers->enabled_mask &= ~(1llu << slot); } sctx->descriptors_dirty |= 1u << descriptors_idx; @@ -1292,8 +1292,8 @@ static void si_set_shader_buffer(struct si_context *sctx, struct si_buffer_resou if (!sbuffer || !sbuffer->buffer) { pipe_resource_reference(&buffers->buffers[slot], NULL); memset(desc, 0, sizeof(uint32_t) * 4); - buffers->enabled_mask &= ~(1u << slot); - buffers->writable_mask &= ~(1u << slot); + buffers->enabled_mask &= ~(1llu << slot); + buffers->writable_mask &= ~(1llu << slot); sctx->descriptors_dirty |= 1u << descriptors_idx; return; } @@ -1320,12 +1320,12 @@ static void si_set_shader_buffer(struct si_context *sctx, struct si_buffer_resou radeon_add_to_gfx_buffer_list_check_mem( sctx, buf, writable ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ, priority, true); if (writable) - buffers->writable_mask |= 1u << slot; + buffers->writable_mask |= 1llu << slot; else - buffers->writable_mask &= ~(1u << slot); + buffers->writable_mask &= ~(1llu << slot); - buffers->enabled_mask |= 1u << slot; - sctx->descriptors_dirty |= 1u << descriptors_idx; + buffers->enabled_mask |= 1llu << slot; + sctx->descriptors_dirty |= 1lu << descriptors_idx; util_range_add(&buf->b.b, &buf->valid_buffer_range, sbuffer->buffer_offset, sbuffer->buffer_offset + sbuffer->buffer_size); @@ -1469,11 +1469,11 @@ void si_set_ring_buffer(struct si_context *sctx, uint slot, struct pipe_resource pipe_resource_reference(&buffers->buffers[slot], buffer); radeon_add_to_buffer_list(sctx, sctx->gfx_cs, si_resource(buffer), RADEON_USAGE_READWRITE, buffers->priority); - buffers->enabled_mask |= 1u << slot; + buffers->enabled_mask |= 1llu << slot; } else { /* Clear the descriptor. */ memset(descs->list + slot * 4, 0, sizeof(uint32_t) * 4); - buffers->enabled_mask &= ~(1u << slot); + buffers->enabled_mask &= ~(1llu << slot); } sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS; @@ -1557,14 +1557,14 @@ void si_update_needs_color_decompress_masks(struct si_context *sctx) * If buf == NULL, reset all descriptors. */ static void si_reset_buffer_resources(struct si_context *sctx, struct si_buffer_resources *buffers, - unsigned descriptors_idx, unsigned slot_mask, + unsigned descriptors_idx, uint64_t slot_mask, struct pipe_resource *buf, enum radeon_bo_priority priority) { struct si_descriptors *descs = &sctx->descriptors[descriptors_idx]; - unsigned mask = buffers->enabled_mask & slot_mask; + uint64_t mask = buffers->enabled_mask & slot_mask; while (mask) { - unsigned i = u_bit_scan(&mask); + unsigned i = u_bit_scan64(&mask); struct pipe_resource *buffer = buffers->buffers[i]; if (buffer && (!buf || buffer == buf)) { @@ -1573,7 +1573,7 @@ static void si_reset_buffer_resources(struct si_context *sctx, struct si_buffer_ radeon_add_to_gfx_buffer_list_check_mem( sctx, si_resource(buffer), - buffers->writable_mask & (1u << i) ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ, + buffers->writable_mask & (1llu << i) ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ, priority, true); } } @@ -1646,7 +1646,7 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf) for (shader = 0; shader < SI_NUM_SHADERS; shader++) si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader], si_const_and_shader_buffer_descriptors_idx(shader), - u_bit_consecutive(SI_NUM_SHADER_BUFFERS, SI_NUM_CONST_BUFFERS), + u_bit_consecutive64(SI_NUM_SHADER_BUFFERS, SI_NUM_CONST_BUFFERS), buf, sctx->const_and_shader_buffers[shader].priority_constbuf); } @@ -1654,7 +1654,7 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf) for (shader = 0; shader < SI_NUM_SHADERS; shader++) si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader], si_const_and_shader_buffer_descriptors_idx(shader), - u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS), buf, + u_bit_consecutive64(0, SI_NUM_SHADER_BUFFERS), buf, sctx->const_and_shader_buffers[shader].priority); } diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 756bcd5a90e..b46a48380e0 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -482,7 +482,7 @@ struct si_shader_selector { uint64_t inputs_read; /* "get_unique_index" bits */ /* bitmasks of used descriptor slots */ - uint32_t active_const_and_shader_buffers; + uint64_t active_const_and_shader_buffers; uint64_t active_samplers_and_images; }; diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 7e1c81d1dad..a45b18c827c 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -37,7 +37,7 @@ #define SI_NUM_CONST_BUFFERS 16 #define SI_NUM_IMAGES 16 #define SI_NUM_IMAGE_SLOTS (SI_NUM_IMAGES * 2) /* the second half are FMASK slots */ -#define SI_NUM_SHADER_BUFFERS 16 +#define SI_NUM_SHADER_BUFFERS 32 struct si_screen; struct si_shader; @@ -457,8 +457,8 @@ struct si_buffer_resources { enum radeon_bo_priority priority_constbuf : 6; /* The i-th bit is set if that element is enabled (non-NULL resource). */ - unsigned enabled_mask; - unsigned writable_mask; + uint64_t enabled_mask; + uint64_t writable_mask; }; #define si_pm4_state_changed(sctx, member) \ @@ -571,7 +571,7 @@ void si_schedule_initial_compile(struct si_context *sctx, unsigned processor, struct util_queue_fence *ready_fence, struct si_compiler_ctx_state *compiler_ctx_state, void *job, util_queue_execute_func execute); -void si_get_active_slot_masks(const struct si_shader_info *info, uint32_t *const_and_shader_buffers, +void si_get_active_slot_masks(const struct si_shader_info *info, uint64_t *const_and_shader_buffers, uint64_t *samplers_and_images); int si_shader_select_with_key(struct si_screen *sscreen, struct si_shader_ctx_state *state, struct si_compiler_ctx_state *compiler_state, @@ -602,13 +602,13 @@ void si_init_streamout_functions(struct si_context *sctx); static inline unsigned si_get_constbuf_slot(unsigned slot) { - /* Constant buffers are in slots [16..31], ascending */ + /* Constant buffers are in slots [32..47], ascending */ return SI_NUM_SHADER_BUFFERS + slot; } static inline unsigned si_get_shaderbuf_slot(unsigned slot) { - /* shader buffers are in slots [15..0], descending */ + /* shader buffers are in slots [31..0], descending */ return SI_NUM_SHADER_BUFFERS - 1 - slot; } diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index cd14fdc741e..5945a47b167 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -2542,7 +2542,7 @@ void si_schedule_initial_compile(struct si_context *sctx, unsigned processor, } /* Return descriptor slot usage masks from the given shader info. */ -void si_get_active_slot_masks(const struct si_shader_info *info, uint32_t *const_and_shader_buffers, +void si_get_active_slot_masks(const struct si_shader_info *info, uint64_t *const_and_shader_buffers, uint64_t *samplers_and_images) { unsigned start, num_shaderbufs, num_constbufs, num_images, num_msaa_images, num_samplers; @@ -2556,7 +2556,7 @@ void si_get_active_slot_masks(const struct si_shader_info *info, uint32_t *const /* The layout is: sb[last] ... sb[0], cb[0] ... cb[last] */ start = si_get_shaderbuf_slot(num_shaderbufs - 1); - *const_and_shader_buffers = u_bit_consecutive(start, num_shaderbufs + num_constbufs); + *const_and_shader_buffers = u_bit_consecutive64(start, num_shaderbufs + num_constbufs); /* The layout is: * - fmask[last] ... fmask[0] go to [15-last .. 15] -- 2.30.2