From cf248929bfdffaf53cb4865640929a6d0accb0c3 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 17 Jan 2017 21:30:23 +0100 Subject: [PATCH] radeonsi: use a global dirty mask for shader pointers MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Only vertex buffers use a separate bool flag. Reviewed-by: Nicolai Hähnle --- src/gallium/drivers/radeonsi/si_descriptors.c | 86 +++++++++++-------- src/gallium/drivers/radeonsi/si_pipe.h | 2 + src/gallium/drivers/radeonsi/si_state.h | 2 - src/gallium/drivers/radeonsi/si_state_draw.c | 2 +- 4 files changed, 51 insertions(+), 41 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index a535fa0e965..0a49c877b12 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -246,7 +246,6 @@ static bool si_upload_descriptors(struct si_context *sctx, radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer, RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS); } - desc->pointer_dirty = true; desc->dirty_mask = 0; if (atom) @@ -1035,9 +1034,9 @@ bool si_upload_vertex_buffer_descriptors(struct si_context *sctx) * on performance (confirmed by testing). New descriptors are always * uploaded to a fresh new buffer, so I don't think flushing the const * cache is needed. */ - desc->pointer_dirty = true; si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom); sctx->vertex_buffers_dirty = false; + sctx->vertex_buffer_pointer_dirty = true; return true; } @@ -1735,26 +1734,21 @@ void si_update_all_texture_descriptors(struct si_context *sctx) static void si_mark_shader_pointers_dirty(struct si_context *sctx, unsigned shader) { - struct si_descriptors *descs = - &sctx->descriptors[SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS]; - - for (unsigned i = 0; i < SI_NUM_SHADER_DESCS; ++i, ++descs) - descs->pointer_dirty = true; + sctx->shader_pointers_dirty |= + u_bit_consecutive(SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS, + SI_NUM_SHADER_DESCS); if (shader == PIPE_SHADER_VERTEX) - sctx->vertex_buffers.pointer_dirty = true; + sctx->vertex_buffer_pointer_dirty = sctx->vertex_buffers.buffer != NULL; si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom); } static void si_shader_userdata_begin_new_cs(struct si_context *sctx) { - int i; - - for (i = 0; i < SI_NUM_SHADERS; i++) { - si_mark_shader_pointers_dirty(sctx, i); - } - sctx->descriptors[SI_DESCS_RW_BUFFERS].pointer_dirty = true; + sctx->shader_pointers_dirty = u_bit_consecutive(0, SI_NUM_DESCS); + sctx->vertex_buffer_pointer_dirty = sctx->vertex_buffers.buffer != NULL; + si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom); } /* Set a base register address for user data constants in the given shader. @@ -1807,13 +1801,12 @@ void si_shader_change_notify(struct si_context *sctx) static void si_emit_shader_pointer(struct si_context *sctx, struct si_descriptors *desc, - unsigned sh_base, bool keep_dirty) + unsigned sh_base) { struct radeon_winsys_cs *cs = sctx->b.gfx.cs; uint64_t va; - if (!desc->pointer_dirty || !desc->buffer) - return; + assert(desc->buffer); va = desc->buffer->gpu_address + desc->buffer_offset; @@ -1822,55 +1815,66 @@ static void si_emit_shader_pointer(struct si_context *sctx, radeon_emit(cs, (sh_base + desc->shader_userdata_offset - SI_SH_REG_OFFSET) >> 2); radeon_emit(cs, va); radeon_emit(cs, va >> 32); - - desc->pointer_dirty = keep_dirty; } void si_emit_graphics_shader_userdata(struct si_context *sctx, struct r600_atom *atom) { - unsigned shader; + unsigned mask; uint32_t *sh_base = sctx->shader_userdata.sh_base; struct si_descriptors *descs; descs = &sctx->descriptors[SI_DESCS_RW_BUFFERS]; - if (descs->pointer_dirty) { + if (sctx->shader_pointers_dirty & (1 << SI_DESCS_RW_BUFFERS)) { si_emit_shader_pointer(sctx, descs, - R_00B030_SPI_SHADER_USER_DATA_PS_0, true); + R_00B030_SPI_SHADER_USER_DATA_PS_0); si_emit_shader_pointer(sctx, descs, - R_00B130_SPI_SHADER_USER_DATA_VS_0, true); + R_00B130_SPI_SHADER_USER_DATA_VS_0); si_emit_shader_pointer(sctx, descs, - R_00B230_SPI_SHADER_USER_DATA_GS_0, true); + R_00B230_SPI_SHADER_USER_DATA_GS_0); si_emit_shader_pointer(sctx, descs, - R_00B330_SPI_SHADER_USER_DATA_ES_0, true); + R_00B330_SPI_SHADER_USER_DATA_ES_0); si_emit_shader_pointer(sctx, descs, - R_00B430_SPI_SHADER_USER_DATA_HS_0, true); - descs->pointer_dirty = false; + R_00B430_SPI_SHADER_USER_DATA_HS_0); } - descs = &sctx->descriptors[SI_DESCS_FIRST_SHADER]; + mask = sctx->shader_pointers_dirty & + u_bit_consecutive(SI_DESCS_FIRST_SHADER, + SI_DESCS_FIRST_COMPUTE - SI_DESCS_FIRST_SHADER); - for (shader = 0; shader < SI_NUM_GRAPHICS_SHADERS; shader++) { + while (mask) { + unsigned i = u_bit_scan(&mask); + unsigned shader = (i - SI_DESCS_FIRST_SHADER) / SI_NUM_SHADER_DESCS; unsigned base = sh_base[shader]; - unsigned i; - if (!base) - continue; + if (base) + si_emit_shader_pointer(sctx, descs + i, base); + } + sctx->shader_pointers_dirty &= + ~u_bit_consecutive(SI_DESCS_RW_BUFFERS, SI_DESCS_FIRST_COMPUTE); - for (i = 0; i < SI_NUM_SHADER_DESCS; i++, descs++) - si_emit_shader_pointer(sctx, descs, base, false); + if (sctx->vertex_buffer_pointer_dirty) { + si_emit_shader_pointer(sctx, &sctx->vertex_buffers, + sh_base[PIPE_SHADER_VERTEX]); + sctx->vertex_buffer_pointer_dirty = false; } - si_emit_shader_pointer(sctx, &sctx->vertex_buffers, sh_base[PIPE_SHADER_VERTEX], false); } void si_emit_compute_shader_userdata(struct si_context *sctx) { unsigned base = R_00B900_COMPUTE_USER_DATA_0; - struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_FIRST_COMPUTE]; + struct si_descriptors *descs = sctx->descriptors; + unsigned compute_mask = + u_bit_consecutive(SI_DESCS_FIRST_COMPUTE, SI_NUM_SHADER_DESCS); + unsigned mask = sctx->shader_pointers_dirty & compute_mask; - for (unsigned i = 0; i < SI_NUM_SHADER_DESCS; ++i, ++descs) - si_emit_shader_pointer(sctx, descs, base, false); + while (mask) { + unsigned i = u_bit_scan(&mask); + + si_emit_shader_pointer(sctx, descs + i, base); + } + sctx->shader_pointers_dirty &= ~compute_mask; } /* INIT/DEINIT/UPLOAD */ @@ -1939,6 +1943,9 @@ bool si_upload_graphics_shader_descriptors(struct si_context *sctx) const unsigned mask = u_bit_consecutive(0, SI_DESCS_FIRST_COMPUTE); unsigned dirty = sctx->descriptors_dirty & mask; + /* Assume nothing will go wrong: */ + sctx->shader_pointers_dirty |= dirty; + while (dirty) { unsigned i = u_bit_scan(&dirty); @@ -1960,6 +1967,9 @@ bool si_upload_compute_shader_descriptors(struct si_context *sctx) SI_NUM_DESCS - SI_DESCS_FIRST_COMPUTE); unsigned dirty = sctx->descriptors_dirty & mask; + /* Assume nothing will go wrong: */ + sctx->shader_pointers_dirty |= dirty; + while (dirty) { unsigned i = u_bit_scan(&dirty); diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index e7d071d3dd3..421e2a4cf6a 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -268,6 +268,7 @@ struct si_context { struct si_descriptors vertex_buffers; struct si_descriptors descriptors[SI_NUM_DESCS]; unsigned descriptors_dirty; + unsigned shader_pointers_dirty; unsigned compressed_tex_shader_mask; struct si_buffer_resources rw_buffers; struct si_buffer_resources const_buffers[SI_NUM_SHADERS]; @@ -288,6 +289,7 @@ struct si_context { /* Vertex and index buffers. */ bool vertex_buffers_dirty; + bool vertex_buffer_pointer_dirty; struct pipe_index_buffer index_buffer; struct pipe_vertex_buffer vertex_buffer[SI_NUM_VERTEX_BUFFERS]; diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index edc5b933781..34a0f578492 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -237,8 +237,6 @@ struct si_descriptors { /* The shader userdata offset within a shader where the 64-bit pointer to the descriptor * array will be stored. */ unsigned shader_userdata_offset; - /* Whether the pointer should be re-emitted. */ - bool pointer_dirty; }; struct si_sampler_views { diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 96a0e846e4a..837c0250eda 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -1146,7 +1146,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) /* Vertex buffer descriptors are uploaded uncached, so prefetch * them right after the VS binary. */ - if (sctx->vertex_buffers.pointer_dirty) { + if (sctx->vertex_buffer_pointer_dirty) { cik_prefetch_TC_L2_async(sctx, &sctx->vertex_buffers.buffer->b.b, sctx->vertex_buffers.buffer_offset, sctx->vertex_elements->count * 16); -- 2.30.2