From c0225a728e1f33b2073af7c64d9449680aece536 Mon Sep 17 00:00:00 2001 From: Rafael Antognolli Date: Mon, 2 Dec 2019 13:41:32 -0800 Subject: [PATCH] anv: Move gen8+ push constant packet workaround. Store push_ranges in ascending order, and only "shift" them to the end of the array during state packet emission. We don't need this workaround with the new 3DSTATE_CONSTANT_ALL packet. So instead of applying the workaround here just for GEN < 12 (which requires and extra loop through all the ranges to figure out if we should shift them or not), we simply move the whole logic to the state emission code. At that point, in a later commit, we are already looping through all of the ranges anyway to check which packet we will be using, so we might as well implement the workaround there, where it is going to be used. v3: Move gen8+ workaround to the state emission code (Caio). v4: Add explanation of why we moved the workaroudn (Caio). Reviewed-by: Caio Marcelo de Oliveira Filho --- .../vulkan/anv_nir_compute_push_layout.c | 23 ++++----------- src/intel/vulkan/genX_cmd_buffer.c | 29 ++++++++++++++++--- 2 files changed, 31 insertions(+), 21 deletions(-) diff --git a/src/intel/vulkan/anv_nir_compute_push_layout.c b/src/intel/vulkan/anv_nir_compute_push_layout.c index 0b696fbc9e7..3a6e393672d 100644 --- a/src/intel/vulkan/anv_nir_compute_push_layout.c +++ b/src/intel/vulkan/anv_nir_compute_push_layout.c @@ -131,20 +131,12 @@ anv_nir_compute_push_layout(const struct anv_physical_device *pdevice, } assert(total_push_regs <= 64); - /* The Skylake PRM contains the following restriction: - * - * "The driver must ensure The following case does not occur - * without a flush to the 3D engine: 3DSTATE_CONSTANT_* with - * buffer 3 read length equal to zero committed followed by a - * 3DSTATE_CONSTANT_* with buffer 0 read length not equal to - * zero committed." - * - * To avoid this, we program the buffers in the highest slots. - * This way, slot 0 is only used if slot 3 is also used. - */ - int n = 3; + int n = 0; - for (int i = 3; i >= 0; i--) { + if (push_constant_range.length > 0) + map->push_ranges[n++] = push_constant_range; + + for (int i = 0; i < 4; i++) { const struct brw_ubo_range *ubo_range = &prog_data->ubo_ranges[i]; if (ubo_range->length == 0) continue; @@ -152,7 +144,7 @@ anv_nir_compute_push_layout(const struct anv_physical_device *pdevice, const struct anv_pipeline_binding *binding = &map->surface_to_descriptor[ubo_range->block]; - map->push_ranges[n--] = (struct anv_push_range) { + map->push_ranges[n++] = (struct anv_push_range) { .set = binding->set, .index = binding->index, .dynamic_offset_index = binding->dynamic_offset_index, @@ -160,9 +152,6 @@ anv_nir_compute_push_layout(const struct anv_physical_device *pdevice, .length = ubo_range->length, }; } - - if (push_constant_range.length > 0) - map->push_ranges[n--] = push_constant_range; } else { /* For Ivy Bridge, the push constants packets have a different * rule that would require us to iterate in the other direction diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 67500f91cdf..bceae33cfbc 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -2539,10 +2539,31 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer, &pipeline->shaders[stage]->bind_map; #if GEN_GEN >= 8 || GEN_IS_HASWELL + unsigned buffer_count = 0; for (unsigned i = 0; i < 4; i++) { const struct anv_push_range *range = &bind_map->push_ranges[i]; - if (range->length == 0) - continue; + if (range->length > 0) + buffer_count++; + } + + /* The Skylake PRM contains the following restriction: + * + * "The driver must ensure The following case does not occur + * without a flush to the 3D engine: 3DSTATE_CONSTANT_* with + * buffer 3 read length equal to zero committed followed by a + * 3DSTATE_CONSTANT_* with buffer 0 read length not equal to + * zero committed." + * + * To avoid this, we program the buffers in the highest slots. + * This way, slot 0 is only used if slot 3 is also used. + */ + assert(buffer_count <= 4); + const unsigned shift = 4 - buffer_count; + for (unsigned i = 0; i < buffer_count; i++) { + const struct anv_push_range *range = &bind_map->push_ranges[i]; + + /* At this point we only have non-empty ranges */ + assert(range->length > 0); struct anv_address addr; switch (range->set) { @@ -2588,8 +2609,8 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer, } } - c.ConstantBody.ReadLength[i] = range->length; - c.ConstantBody.Buffer[i] = + c.ConstantBody.ReadLength[i + shift] = range->length; + c.ConstantBody.Buffer[i + shift] = anv_address_add(addr, range->start * 32); } #else -- 2.30.2