From 06438ea7fa137db821b3c7d256008c26e23012a7 Mon Sep 17 00:00:00 2001 From: Rafael Antognolli Date: Mon, 23 Sep 2019 13:25:01 -0700 Subject: [PATCH] iris: Use 3DSTATE_CONSTANT_ALL when possible. Use this new instruction introduced in Gen12. The instruction itself is smaller, and it also allows us to emit a single instruction to all stages that have the same push constant buffers (e.g. when they don't have constant buffers). There's one restriction to use this instruction, though: the length field is only 5 bits long, so we need to check whether we can use it, and fallback to the old 3DSTATE_CONSTANT_XS if that field is >= 32. v2 (Suggestions from Caio): - use max_length instead of large_buffers. - remove UNUSED and use #if GEN_GEN >= 12 instead. - inline "buffers" and drop BITSET_RANGE() usage. - add assert(n <= max_pointers) - move emit to outside of the loop. Reviewed-by: Caio Marcelo de Oliveira Filho --- src/gallium/drivers/iris/iris_genx_macros.h | 7 +- src/gallium/drivers/iris/iris_state.c | 71 +++++++++++++++++++++ 2 files changed, 76 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/iris/iris_genx_macros.h b/src/gallium/drivers/iris/iris_genx_macros.h index 58680341a50..74fe9139add 100644 --- a/src/gallium/drivers/iris/iris_genx_macros.h +++ b/src/gallium/drivers/iris/iris_genx_macros.h @@ -88,12 +88,15 @@ __gen_combine_address(struct iris_batch *batch, void *location, #define iris_pack_command(cmd, dst, name) \ _iris_pack_command(NULL, cmd, dst, name) -#define iris_pack_state(cmd, dst, name) \ +#define _iris_pack_state(batch, cmd, dst, name) \ for (struct cmd name = {}, \ *_dst = (void *)(dst); __builtin_expect(_dst != NULL, 1); \ - __genxml_cmd_pack(cmd)(NULL, (void *)_dst, &name), \ + __genxml_cmd_pack(cmd)(batch, (void *)_dst, &name), \ _dst = NULL) +#define iris_pack_state(cmd, dst, name) \ + _iris_pack_state(NULL, cmd, dst, name) + #define iris_emit_cmd(batch, cmd, name) \ _iris_pack_command(batch, cmd, __gen_get_batch_dwords(batch, __genxml_cmd_length(cmd)), name) diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index 1eea996ab37..8ea79f722a3 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -5095,6 +5095,7 @@ struct push_bos { uint32_t length; } buffers[4]; int buffer_count; + uint32_t max_length; }; static void @@ -5114,6 +5115,9 @@ setup_constant_buffers(struct iris_context *ice, if (range->length == 0) continue; + if (range->length > push_bos->max_length) + push_bos->max_length = range->length; + /* Range block is a binding table index, map back to UBO index. */ unsigned block_index = iris_bti_to_group_index( &shader->bt, IRIS_SURFACE_GROUP_UBO, range->block); @@ -5169,6 +5173,45 @@ emit_push_constant_packets(struct iris_context *ice, } } +#if GEN_GEN >= 12 +static void +emit_push_constant_packet_all(struct iris_context *ice, + struct iris_batch *batch, + uint32_t shader_mask, + const struct push_bos *push_bos) +{ + if (!push_bos) { + iris_emit_cmd(batch, GENX(3DSTATE_CONSTANT_ALL), pc) { + pc.ShaderUpdateEnable = shader_mask; + } + return; + } + + const uint32_t n = push_bos->buffer_count; + const uint32_t max_pointers = 4; + const uint32_t num_dwords = 2 + 2 * n; + uint32_t const_all[2 + 2 * max_pointers]; + uint32_t *dw = &const_all[0]; + + assert(n <= max_pointers); + iris_pack_command(GENX(3DSTATE_CONSTANT_ALL), dw, all) { + all.DWordLength = num_dwords - 2; + all.ShaderUpdateEnable = shader_mask; + all.PointerBufferMask = (1 << n) - 1; + } + dw += 2; + + for (int i = 0; i < n; i++) { + _iris_pack_state(batch, GENX(3DSTATE_CONSTANT_ALL_DATA), + dw + i * 2, data) { + data.PointerToConstantBuffer = push_bos->buffers[i].addr; + data.ConstantBufferReadLength = push_bos->buffers[i].length; + } + } + iris_batch_emit(batch, const_all, sizeof(uint32_t) * num_dwords); +} +#endif + static void iris_upload_dirty_render_state(struct iris_context *ice, struct iris_batch *batch, @@ -5347,6 +5390,10 @@ iris_upload_dirty_render_state(struct iris_context *ice, } } +#if GEN_GEN >= 12 + uint32_t nobuffer_stages = 0; +#endif + for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { if (!(dirty & (IRIS_DIRTY_CONSTANTS_VS << stage))) continue; @@ -5362,9 +5409,33 @@ iris_upload_dirty_render_state(struct iris_context *ice, struct push_bos push_bos = {}; setup_constant_buffers(ice, batch, stage, &push_bos); + +#if GEN_GEN >= 12 + /* If this stage doesn't have any push constants, emit it later in a + * single CONSTANT_ALL packet with all the other stages. + */ + if (push_bos.buffer_count == 0) { + nobuffer_stages |= 1 << stage; + continue; + } + + /* The Constant Buffer Read Length field from 3DSTATE_CONSTANT_ALL + * contains only 5 bits, so we can only use it for buffers smaller than + * 32. + */ + if (push_bos.max_length < 32) { + emit_push_constant_packet_all(ice, batch, 1 << stage, &push_bos); + continue; + } +#endif emit_push_constant_packets(ice, batch, stage, &push_bos); } +#if GEN_GEN >= 12 + if (nobuffer_stages) + emit_push_constant_packet_all(ice, batch, nobuffer_stages, NULL); +#endif + for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { /* Gen9 requires 3DSTATE_BINDING_TABLE_POINTERS_XS to be re-emitted * in order to commit constants. TODO: Investigate "Disable Gather -- 2.30.2