From 42f63e6334c925f0eb11805f8b9279e4f449d976 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 28 Feb 2019 13:02:13 -0500 Subject: [PATCH] radeonsi: set exact shader buffer read/write usage in CS Reviewed-by: Timothy Arceri --- .../drivers/radeonsi/si_compute_blit.c | 10 ++++- src/gallium/drivers/radeonsi/si_descriptors.c | 40 +++++++++---------- src/gallium/drivers/radeonsi/si_query.c | 3 +- src/gallium/drivers/radeonsi/si_query.h | 1 + src/gallium/drivers/radeonsi/si_state.c | 8 ++++ src/gallium/drivers/radeonsi/si_state.h | 3 +- 6 files changed, 41 insertions(+), 24 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c b/src/gallium/drivers/radeonsi/si_compute_blit.c index d24c2f3493f..1abeac6adb0 100644 --- a/src/gallium/drivers/radeonsi/si_compute_blit.c +++ b/src/gallium/drivers/radeonsi/si_compute_blit.c @@ -102,6 +102,13 @@ static void si_compute_do_clear_or_copy(struct si_context *sctx, struct pipe_shader_buffer saved_sb[2] = {}; si_get_shader_buffers(sctx, PIPE_SHADER_COMPUTE, 0, src ? 2 : 1, saved_sb); + unsigned saved_writable_mask = 0; + for (unsigned i = 0; i < (src ? 2 : 1); i++) { + if (sctx->const_and_shader_buffers[PIPE_SHADER_COMPUTE].writable_mask & + (1u << si_get_shaderbuf_slot(i))) + saved_writable_mask |= 1 << i; + } + /* The memory accesses are coalesced, meaning that the 1st instruction writes * the 1st contiguous block of data for the whole wave, the 2nd instruction * writes the 2nd contiguous block of data, etc. @@ -172,7 +179,8 @@ static void si_compute_do_clear_or_copy(struct si_context *sctx, /* Restore states. */ ctx->bind_compute_state(ctx, saved_cs); - ctx->set_shader_buffers(ctx, PIPE_SHADER_COMPUTE, 0, src ? 2 : 1, saved_sb, ~0); + ctx->set_shader_buffers(ctx, PIPE_SHADER_COMPUTE, 0, src ? 2 : 1, saved_sb, + saved_writable_mask); si_compute_internal_end(sctx); } diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index c9e7a3dc61d..244ba5a7bec 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -993,13 +993,9 @@ static void si_init_buffer_resources(struct si_buffer_resources *buffers, struct si_descriptors *descs, unsigned num_buffers, short shader_userdata_rel_index, - enum radeon_bo_usage shader_usage, - enum radeon_bo_usage shader_usage_constbuf, enum radeon_bo_priority priority, enum radeon_bo_priority priority_constbuf) { - buffers->shader_usage = shader_usage; - buffers->shader_usage_constbuf = shader_usage_constbuf; buffers->priority = priority; buffers->priority_constbuf = priority_constbuf; buffers->buffers = CALLOC(num_buffers, sizeof(struct pipe_resource*)); @@ -1030,8 +1026,8 @@ static void si_buffer_resources_begin_new_cs(struct si_context *sctx, radeon_add_to_buffer_list(sctx, sctx->gfx_cs, si_resource(buffers->buffers[i]), - i < SI_NUM_SHADER_BUFFERS ? buffers->shader_usage : - buffers->shader_usage_constbuf, + buffers->writable_mask & (1u << i) ? RADEON_USAGE_READWRITE : + RADEON_USAGE_READ, i < SI_NUM_SHADER_BUFFERS ? buffers->priority : buffers->priority_constbuf); } @@ -1258,7 +1254,7 @@ static void si_set_constant_buffer(struct si_context *sctx, buffers->buffers[slot] = buffer; radeon_add_to_gfx_buffer_list_check_mem(sctx, si_resource(buffer), - buffers->shader_usage_constbuf, + RADEON_USAGE_READ, buffers->priority_constbuf, true); buffers->enabled_mask |= 1u << slot; } else { @@ -1311,7 +1307,7 @@ static void si_set_shader_buffer(struct si_context *sctx, struct si_buffer_resources *buffers, unsigned descriptors_idx, uint slot, const struct pipe_shader_buffer *sbuffer, - enum radeon_bo_priority priority) + bool writable, enum radeon_bo_priority priority) { struct si_descriptors *descs = &sctx->descriptors[descriptors_idx]; uint32_t *desc = descs->list + slot * 4; @@ -1320,6 +1316,7 @@ static void si_set_shader_buffer(struct si_context *sctx, pipe_resource_reference(&buffers->buffers[slot], NULL); memset(desc, 0, sizeof(uint32_t) * 4); buffers->enabled_mask &= ~(1u << slot); + buffers->writable_mask &= ~(1u << slot); sctx->descriptors_dirty |= 1u << descriptors_idx; return; } @@ -1340,8 +1337,13 @@ static void si_set_shader_buffer(struct si_context *sctx, pipe_resource_reference(&buffers->buffers[slot], &buf->b.b); radeon_add_to_gfx_buffer_list_check_mem(sctx, buf, - buffers->shader_usage, + writable ? RADEON_USAGE_READWRITE : + RADEON_USAGE_READ, priority, true); + if (writable) + buffers->writable_mask |= 1u << slot; + else + buffers->writable_mask &= ~(1u << slot); buffers->enabled_mask |= 1u << slot; sctx->descriptors_dirty |= 1u << descriptors_idx; @@ -1371,6 +1373,7 @@ static void si_set_shader_buffers(struct pipe_context *ctx, si_resource(sbuffer->buffer)->bind_history |= PIPE_BIND_SHADER_BUFFER; si_set_shader_buffer(sctx, buffers, descriptors_idx, slot, sbuffer, + !!(writable_bitmask & (1u << i)), buffers->priority); } } @@ -1405,7 +1408,7 @@ void si_set_rw_shader_buffer(struct si_context *sctx, uint slot, const struct pipe_shader_buffer *sbuffer) { si_set_shader_buffer(sctx, &sctx->rw_buffers, SI_DESCS_RW_BUFFERS, - slot, sbuffer, RADEON_PRIO_SHADER_RW_BUFFER); + slot, sbuffer, true, RADEON_PRIO_SHADER_RW_BUFFER); } void si_set_ring_buffer(struct si_context *sctx, uint slot, @@ -1491,7 +1494,7 @@ void si_set_ring_buffer(struct si_context *sctx, uint slot, pipe_resource_reference(&buffers->buffers[slot], buffer); radeon_add_to_buffer_list(sctx, sctx->gfx_cs, si_resource(buffer), - buffers->shader_usage, buffers->priority); + RADEON_USAGE_READWRITE, buffers->priority); buffers->enabled_mask |= 1u << slot; } else { /* Clear the descriptor. */ @@ -1601,7 +1604,6 @@ static void si_reset_buffer_resources(struct si_context *sctx, unsigned slot_mask, struct pipe_resource *buf, uint64_t old_va, - enum radeon_bo_usage usage, enum radeon_bo_priority priority) { struct si_descriptors *descs = &sctx->descriptors[descriptors_idx]; @@ -1616,7 +1618,10 @@ static void si_reset_buffer_resources(struct si_context *sctx, radeon_add_to_gfx_buffer_list_check_mem(sctx, si_resource(buf), - usage, priority, true); + buffers->writable_mask & (1u << i) ? + RADEON_USAGE_READWRITE : + RADEON_USAGE_READ, + priority, true); } } } @@ -1670,7 +1675,7 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf, sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS; radeon_add_to_gfx_buffer_list_check_mem(sctx, - buffer, buffers->shader_usage, + buffer, RADEON_USAGE_WRITE, RADEON_PRIO_SHADER_RW_BUFFER, true); @@ -1690,7 +1695,6 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf, si_const_and_shader_buffer_descriptors_idx(shader), u_bit_consecutive(SI_NUM_SHADER_BUFFERS, SI_NUM_CONST_BUFFERS), buf, old_va, - sctx->const_and_shader_buffers[shader].shader_usage_constbuf, sctx->const_and_shader_buffers[shader].priority_constbuf); } @@ -1700,7 +1704,6 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf, si_const_and_shader_buffer_descriptors_idx(shader), u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS), buf, old_va, - sctx->const_and_shader_buffers[shader].shader_usage, sctx->const_and_shader_buffers[shader].priority); } @@ -2677,8 +2680,6 @@ void si_init_all_descriptors(struct si_context *sctx) desc = si_const_and_shader_buffer_descriptors(sctx, i); si_init_buffer_resources(&sctx->const_and_shader_buffers[i], desc, num_buffer_slots, rel_dw_offset, - RADEON_USAGE_READWRITE, - RADEON_USAGE_READ, RADEON_PRIO_SHADER_RW_BUFFER, RADEON_PRIO_CONST_BUFFER); desc->slot_index_to_bind_directly = si_get_constbuf_slot(0); @@ -2708,9 +2709,8 @@ void si_init_all_descriptors(struct si_context *sctx) si_init_buffer_resources(&sctx->rw_buffers, &sctx->descriptors[SI_DESCS_RW_BUFFERS], SI_NUM_RW_BUFFERS, SI_SGPR_RW_BUFFERS, - /* The second set of usage/priority is used by + /* The second priority is used by * const buffers in RW buffer slots. */ - RADEON_USAGE_READWRITE, RADEON_USAGE_READ, RADEON_PRIO_SHADER_RINGS, RADEON_PRIO_CONST_BUFFER); sctx->descriptors[SI_DESCS_RW_BUFFERS].num_active_slots = SI_NUM_RW_BUFFERS; diff --git a/src/gallium/drivers/radeonsi/si_query.c b/src/gallium/drivers/radeonsi/si_query.c index 5b2c4ae6e18..d98bea2eeb3 100644 --- a/src/gallium/drivers/radeonsi/si_query.c +++ b/src/gallium/drivers/radeonsi/si_query.c @@ -1439,7 +1439,8 @@ static void si_restore_qbo_state(struct si_context *sctx, sctx->b.set_constant_buffer(&sctx->b, PIPE_SHADER_COMPUTE, 0, &st->saved_const0); pipe_resource_reference(&st->saved_const0.buffer, NULL); - sctx->b.set_shader_buffers(&sctx->b, PIPE_SHADER_COMPUTE, 0, 3, st->saved_ssbo, ~0); + sctx->b.set_shader_buffers(&sctx->b, PIPE_SHADER_COMPUTE, 0, 3, st->saved_ssbo, + st->saved_ssbo_writable_mask); for (unsigned i = 0; i < 3; ++i) pipe_resource_reference(&st->saved_ssbo[i].buffer, NULL); } diff --git a/src/gallium/drivers/radeonsi/si_query.h b/src/gallium/drivers/radeonsi/si_query.h index c61af51d57c..6ff0a239cb3 100644 --- a/src/gallium/drivers/radeonsi/si_query.h +++ b/src/gallium/drivers/radeonsi/si_query.h @@ -253,6 +253,7 @@ struct si_qbo_state { void *saved_compute; struct pipe_constant_buffer saved_const0; struct pipe_shader_buffer saved_ssbo[3]; + unsigned saved_ssbo_writable_mask; }; #endif /* SI_QUERY_H */ diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index e39387a6080..757c17f7df8 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -1356,6 +1356,14 @@ void si_save_qbo_state(struct si_context *sctx, struct si_qbo_state *st) si_get_pipe_constant_buffer(sctx, PIPE_SHADER_COMPUTE, 0, &st->saved_const0); si_get_shader_buffers(sctx, PIPE_SHADER_COMPUTE, 0, 3, st->saved_ssbo); + + st->saved_ssbo_writable_mask = 0; + + for (unsigned i = 0; i < 3; i++) { + if (sctx->const_and_shader_buffers[PIPE_SHADER_COMPUTE].writable_mask & + (1u << si_get_shaderbuf_slot(i))) + st->saved_ssbo_writable_mask |= 1 << i; + } } static void si_emit_db_render_state(struct si_context *sctx) diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 6faa4c511b1..311e1a428ae 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -409,13 +409,12 @@ struct si_descriptors { struct si_buffer_resources { struct pipe_resource **buffers; /* this has num_buffers elements */ - enum radeon_bo_usage shader_usage:4; /* READ, WRITE, or READWRITE */ - enum radeon_bo_usage shader_usage_constbuf:4; enum radeon_bo_priority priority:6; enum radeon_bo_priority priority_constbuf:6; /* The i-th bit is set if that element is enabled (non-NULL resource). */ unsigned enabled_mask; + unsigned writable_mask; }; #define si_pm4_state_changed(sctx, member) \ -- 2.30.2