From 008d977d01255cca50fd222caea04f6787cb3b59 Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Thu, 10 Mar 2016 21:39:20 +0100 Subject: [PATCH] radeonsi: Use CE for all descriptors. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit v2: Load previous list for new CS instead of re-emitting all descriptors. v3: Do radeon_add_to_buffer_list in si_ce_upload. Signed-off-by: Bas Nieuwenhuizen Reviewed-by: Marek Olšák --- src/gallium/drivers/radeonsi/si_descriptors.c | 74 ++++++++++++++++--- 1 file changed, 64 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index a5018db901d..944c498703d 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -60,6 +60,7 @@ #include "si_shader.h" #include "sid.h" +#include "util/u_math.h" #include "util/u_memory.h" #include "util/u_suballoc.h" #include "util/u_upload_mgr.h" @@ -151,33 +152,86 @@ static bool si_ce_upload(struct si_context *sctx, unsigned ce_offset, unsigned s radeon_emit(sctx->ce_ib, va); radeon_emit(sctx->ce_ib, va >> 32); + radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, *out_buf, + RADEON_USAGE_READWRITE, RADEON_PRIO_DESCRIPTORS); + sctx->ce_need_synchronization = true; return true; } +static void si_reinitialize_ce_ram(struct si_context *sctx, + struct si_descriptors *desc) +{ + if (desc->buffer) { + struct r600_resource *buffer = (struct r600_resource*)desc->buffer; + unsigned list_size = desc->num_elements * desc->element_dw_size * 4; + uint64_t va = buffer->gpu_address + desc->buffer_offset; + struct radeon_winsys_cs *ib = sctx->ce_preamble_ib; + + if (!ib) + ib = sctx->ce_ib; + + list_size = align(list_size, 32); + + radeon_emit(ib, PKT3(PKT3_LOAD_CONST_RAM, 3, 0)); + radeon_emit(ib, va); + radeon_emit(ib, va >> 32); + radeon_emit(ib, list_size / 4); + radeon_emit(ib, desc->ce_offset); + + radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer, + RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS); + } + desc->ce_ram_dirty = false; +} static bool si_upload_descriptors(struct si_context *sctx, struct si_descriptors *desc) { unsigned list_size = desc->num_elements * desc->element_dw_size * 4; - void *ptr; if (!desc->dirty_mask) return true; - u_upload_alloc(sctx->b.uploader, 0, list_size, 256, - &desc->buffer_offset, - (struct pipe_resource**)&desc->buffer, &ptr); - if (!desc->buffer) - return false; /* skip the draw call */ + if (sctx->ce_ib) { + uint32_t const* list = (uint32_t const*)desc->list; - util_memcpy_cpu_to_le32(ptr, desc->list, list_size); + if (desc->ce_ram_dirty) + si_reinitialize_ce_ram(sctx, desc); - radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer, - RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS); + while(desc->dirty_mask) { + int begin, count; + u_bit_scan_consecutive_range64(&desc->dirty_mask, &begin, + &count); - desc->dirty_mask = 0; + begin *= desc->element_dw_size; + count *= desc->element_dw_size; + + radeon_emit(sctx->ce_ib, + PKT3(PKT3_WRITE_CONST_RAM, count, 0)); + radeon_emit(sctx->ce_ib, desc->ce_offset + begin * 4); + radeon_emit_array(sctx->ce_ib, list + begin, count); + } + + if (!si_ce_upload(sctx, desc->ce_offset, list_size, + &desc->buffer_offset, &desc->buffer)) + return false; + } else { + void *ptr; + + u_upload_alloc(sctx->b.uploader, 0, list_size, 256, + &desc->buffer_offset, + (struct pipe_resource**)&desc->buffer, &ptr); + if (!desc->buffer) + return false; /* skip the draw call */ + + util_memcpy_cpu_to_le32(ptr, desc->list, list_size); + + radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer, + RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS); + } desc->pointer_dirty = true; + desc->dirty_mask = 0; si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom); return true; } -- 2.30.2