From: Samuel Pitoiset Date: Tue, 14 Jan 2020 08:14:07 +0000 (+0100) Subject: radv/gfx10: add a separate flag for creating a GDS OA buffer X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=3c1f657f35ca9ba0381164e4914a18907ff2cf3b;p=mesa.git radv/gfx10: add a separate flag for creating a GDS OA buffer For implementing NGG GS queries, we decided to use GDS but GDS OA is only required for NGG streamout. Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen Part-of: --- diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 20ba975052b..f81e5fa91dc 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -340,6 +340,7 @@ radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer) cmd_buffer->gsvs_ring_size_needed = 0; cmd_buffer->tess_rings_needed = false; cmd_buffer->gds_needed = false; + cmd_buffer->gds_oa_needed = false; cmd_buffer->sample_positions_needed = false; if (cmd_buffer->upload.upload_bo) @@ -5919,8 +5920,10 @@ radv_set_streamout_enable(struct radv_cmd_buffer *cmd_buffer, bool enable) (old_hw_enabled_mask != so->hw_enabled_mask))) radv_emit_streamout_enable(cmd_buffer); - if (cmd_buffer->device->physical_device->use_ngg_streamout) + if (cmd_buffer->device->physical_device->use_ngg_streamout) { cmd_buffer->gds_needed = true; + cmd_buffer->gds_oa_needed = true; + } } static void radv_flush_vgt_streamout(struct radv_cmd_buffer *cmd_buffer) diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 600a740ae29..ef0e866ef52 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -3635,6 +3635,7 @@ radv_get_preamble_cs(struct radv_queue *queue, uint32_t gsvs_ring_size, bool needs_tess_rings, bool needs_gds, + bool needs_gds_oa, bool needs_sample_positions, struct radeon_cmdbuf **initial_full_flush_preamble_cs, struct radeon_cmdbuf **initial_preamble_cs, @@ -3649,7 +3650,7 @@ radv_get_preamble_cs(struct radv_queue *queue, struct radeon_winsys_bo *gds_bo = NULL; struct radeon_winsys_bo *gds_oa_bo = NULL; struct radeon_cmdbuf *dest_cs[3] = {0}; - bool add_tess_rings = false, add_gds = false, add_sample_positions = false; + bool add_tess_rings = false, add_gds = false, add_gds_oa = false, add_sample_positions = false; unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0; unsigned max_offchip_buffers; unsigned hs_offchip_param = 0; @@ -3663,6 +3664,10 @@ radv_get_preamble_cs(struct radv_queue *queue, if (needs_gds) add_gds = true; } + if (!queue->has_gds_oa) { + if (needs_gds_oa) + add_gds_oa = true; + } if (!queue->has_sample_positions) { if (needs_sample_positions) add_sample_positions = true; @@ -3692,14 +3697,14 @@ radv_get_preamble_cs(struct radv_queue *queue, compute_scratch_waves <= queue->compute_scratch_waves && esgs_ring_size <= queue->esgs_ring_size && gsvs_ring_size <= queue->gsvs_ring_size && - !add_tess_rings && !add_gds && !add_sample_positions && + !add_tess_rings && !add_gds && !add_gds_oa && !add_sample_positions && queue->initial_preamble_cs) { *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs; *initial_preamble_cs = queue->initial_preamble_cs; *continue_preamble_cs = queue->continue_preamble_cs; if (!scratch_size_per_wave && !compute_scratch_size_per_wave && !esgs_ring_size && !gsvs_ring_size && !needs_tess_rings && - !needs_gds && !needs_sample_positions) + !needs_gds && !needs_gds_oa && !needs_sample_positions) *continue_preamble_cs = NULL; return VK_SUCCESS; } @@ -3787,6 +3792,12 @@ radv_get_preamble_cs(struct radv_queue *queue, RADV_BO_PRIORITY_SCRATCH); if (!gds_bo) goto fail; + } else { + gds_bo = queue->gds_bo; + } + + if (add_gds_oa) { + assert(queue->device->physical_device->rad_info.chip_class >= GFX10); gds_oa_bo = queue->device->ws->buffer_create(queue->device->ws, 4, 1, @@ -3796,7 +3807,6 @@ radv_get_preamble_cs(struct radv_queue *queue, if (!gds_oa_bo) goto fail; } else { - gds_bo = queue->gds_bo; gds_oa_bo = queue->gds_oa_bo; } @@ -3980,8 +3990,10 @@ radv_get_preamble_cs(struct radv_queue *queue, queue->has_gds = true; } - if (gds_oa_bo != queue->gds_oa_bo) + if (gds_oa_bo != queue->gds_oa_bo) { queue->gds_oa_bo = gds_oa_bo; + queue->has_gds_oa = true; + } if (descriptor_bo != queue->descriptor_bo) { if (queue->descriptor_bo) @@ -4252,6 +4264,7 @@ radv_get_preambles(struct radv_queue *queue, uint32_t esgs_ring_size = 0, gsvs_ring_size = 0; bool tess_rings_needed = false; bool gds_needed = false; + bool gds_oa_needed = false; bool sample_positions_needed = false; for (uint32_t j = 0; j < cmd_buffer_count; j++) { @@ -4268,13 +4281,14 @@ radv_get_preambles(struct radv_queue *queue, gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed); tess_rings_needed |= cmd_buffer->tess_rings_needed; gds_needed |= cmd_buffer->gds_needed; + gds_oa_needed |= cmd_buffer->gds_oa_needed; sample_positions_needed |= cmd_buffer->sample_positions_needed; } return radv_get_preamble_cs(queue, scratch_size_per_wave, waves_wanted, compute_scratch_size_per_wave, compute_waves_wanted, esgs_ring_size, gsvs_ring_size, tess_rings_needed, - gds_needed, sample_positions_needed, + gds_needed, gds_oa_needed, sample_positions_needed, initial_full_flush_preamble_cs, initial_preamble_cs, continue_preamble_cs); } diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 03e91cbb0e7..4494d595074 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -721,6 +721,7 @@ struct radv_queue { uint32_t gsvs_ring_size; bool has_tess_rings; bool has_gds; + bool has_gds_oa; bool has_sample_positions; struct radeon_winsys_bo *scratch_bo; @@ -1333,6 +1334,7 @@ struct radv_cmd_buffer { uint32_t gsvs_ring_size_needed; bool tess_rings_needed; bool gds_needed; /* for GFX10 streamout */ + bool gds_oa_needed; /* for GFX10 streamout */ bool sample_positions_needed; VkResult record_result;