From e1fa740c4c1412458b56732a222c86bef3bd6b31 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Mon, 13 Jul 2020 12:22:20 +0200 Subject: [PATCH] freedreno/a6xx: Rename and document HLSQ_UPDATE_CNTL It turns out that this clears CP_LOAD_STATE6 packets, including disabling any pending loads for SS6_INDIRECT/SS6_BINDLESS (these loads don't actually happen until the draw itself, and I'm not sure if they happen if the state is unused by the shader) and marking constants and UBO descriptors loaded with SS6_DIRECT as invalid. It's used very differently from HLSQ_UPDATE_CNTL on a4xx from whence the name came, and unlike on a4xx it's not readable, so this probably doesn't line up with HLSQ_UPDATE_CNTL on a4xx. Part-of: --- src/freedreno/computerator/a6xx.c | 11 ++++++-- src/freedreno/registers/a6xx.xml | 27 +++++++++++++++++-- src/freedreno/vulkan/tu_clear_blit.c | 13 ++++++++- src/freedreno/vulkan/tu_cmd_buffer.c | 22 +++++++++++---- src/freedreno/vulkan/tu_pipeline.c | 22 ++++++++++++--- .../drivers/freedreno/a6xx/fd6_compute.c | 13 +++++++-- src/gallium/drivers/freedreno/a6xx/fd6_draw.c | 15 +++++++++-- src/gallium/drivers/freedreno/a6xx/fd6_emit.c | 16 +++++++++-- .../drivers/freedreno/a6xx/fd6_program.c | 13 +++++++-- 9 files changed, 130 insertions(+), 22 deletions(-) diff --git a/src/freedreno/computerator/a6xx.c b/src/freedreno/computerator/a6xx.c index 72b0f067efe..21d7d1c4894 100644 --- a/src/freedreno/computerator/a6xx.c +++ b/src/freedreno/computerator/a6xx.c @@ -117,8 +117,15 @@ cs_program_emit(struct fd_ringbuffer *ring, struct kernel *kernel) const struct ir3_info *i = &v->info; enum a3xx_threadsize thrsz = FOUR_QUADS; - OUT_PKT4(ring, REG_A6XX_HLSQ_UPDATE_CNTL, 1); - OUT_RING(ring, 0xff); + OUT_PKT4(ring, REG_A6XX_HLSQ_INVALIDATE_CMD, 1); + OUT_RING(ring, A6XX_HLSQ_INVALIDATE_CMD_VS_STATE | + A6XX_HLSQ_INVALIDATE_CMD_HS_STATE | + A6XX_HLSQ_INVALIDATE_CMD_DS_STATE | + A6XX_HLSQ_INVALIDATE_CMD_GS_STATE | + A6XX_HLSQ_INVALIDATE_CMD_FS_STATE | + A6XX_HLSQ_INVALIDATE_CMD_CS_STATE | + A6XX_HLSQ_INVALIDATE_CMD_CS_IBO | + A6XX_HLSQ_INVALIDATE_CMD_GFX_IBO); unsigned constlen = align(v->constlen, 4); OUT_PKT4(ring, REG_A6XX_HLSQ_CS_CNTL, 1); diff --git a/src/freedreno/registers/a6xx.xml b/src/freedreno/registers/a6xx.xml index 56e1746303b..f821832ba06 100644 --- a/src/freedreno/registers/a6xx.xml +++ b/src/freedreno/registers/a6xx.xml @@ -3408,8 +3408,31 @@ to upconvert to 32b float internally? - - + + + This register clears pending loads queued up by + CP_LOAD_STATE6. Each bit resets a particular kind(s) of + CP_LOAD_STATE6. + + + + + + + + + + + + + + + + + + + + diff --git a/src/freedreno/vulkan/tu_clear_blit.c b/src/freedreno/vulkan/tu_clear_blit.c index 29b952548bf..2be3e38dccc 100644 --- a/src/freedreno/vulkan/tu_clear_blit.c +++ b/src/freedreno/vulkan/tu_clear_blit.c @@ -428,7 +428,18 @@ r3d_common(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool blit, uint32_t num_ .const_state = &dummy_const_state, }; - tu_cs_emit_regs(cs, A6XX_HLSQ_UPDATE_CNTL(0x7ffff)); + tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD( + .vs_state = true, + .hs_state = true, + .ds_state = true, + .gs_state = true, + .fs_state = true, + .cs_state = true, + .gfx_ibo = true, + .cs_ibo = true, + .gfx_shared_const = true, + .gfx_bindless = 0x1f, + .cs_bindless = 0x1f)); tu6_emit_xs_config(cs, MESA_SHADER_VERTEX, &vs, global_iova(cmd, shaders[GLOBAL_SH_VS])); tu6_emit_xs_config(cs, MESA_SHADER_TESS_CTRL, NULL, 0); diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index 765732ad9a9..d1145bfb13e 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -719,7 +719,19 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs) tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE); - tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UPDATE_CNTL, 0xfffff); + tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD( + .vs_state = true, + .hs_state = true, + .ds_state = true, + .gs_state = true, + .fs_state = true, + .cs_state = true, + .gfx_ibo = true, + .cs_ibo = true, + .gfx_shared_const = true, + .cs_shared_const = true, + .gfx_bindless = 0x1f, + .cs_bindless = 0x1f)); tu_cs_emit_regs(cs, A6XX_RB_CCU_CNTL(.offset = phys_dev->ccu_offset_bypass)); @@ -1684,7 +1696,7 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, } assert(dyn_idx == dynamicOffsetCount); - uint32_t sp_bindless_base_reg, hlsq_bindless_base_reg, hlsq_update_value; + uint32_t sp_bindless_base_reg, hlsq_bindless_base_reg, hlsq_invalidate_value; uint64_t addr[MAX_SETS + 1] = {}; struct tu_cs cs; @@ -1709,7 +1721,7 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) { sp_bindless_base_reg = REG_A6XX_SP_BINDLESS_BASE(0); hlsq_bindless_base_reg = REG_A6XX_HLSQ_BINDLESS_BASE(0); - hlsq_update_value = 0x7c000; + hlsq_invalidate_value = A6XX_HLSQ_INVALIDATE_CMD_GFX_BINDLESS(0x1f); cmd->state.dirty |= TU_CMD_DIRTY_DESCRIPTOR_SETS | TU_CMD_DIRTY_SHADER_CONSTS; } else { @@ -1717,7 +1729,7 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, sp_bindless_base_reg = REG_A6XX_SP_CS_BINDLESS_BASE(0); hlsq_bindless_base_reg = REG_A6XX_HLSQ_CS_BINDLESS_BASE(0); - hlsq_update_value = 0x3e00; + hlsq_invalidate_value = A6XX_HLSQ_INVALIDATE_CMD_CS_BINDLESS(0x1f); cmd->state.dirty |= TU_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS; } @@ -1728,7 +1740,7 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, tu_cs_emit_array(&cs, (const uint32_t*) addr, 10); tu_cs_emit_pkt4(&cs, hlsq_bindless_base_reg, 10); tu_cs_emit_array(&cs, (const uint32_t*) addr, 10); - tu_cs_emit_regs(&cs, A6XX_HLSQ_UPDATE_CNTL(.dword = hlsq_update_value)); + tu_cs_emit_regs(&cs, A6XX_HLSQ_INVALIDATE_CMD(.dword = hlsq_invalidate_value)); struct tu_cs_entry ib = tu_cs_end_sub_stream(&cmd->sub_cs, &cs); if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) { diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c index 88cdca7e19c..84cb9c465d9 100644 --- a/src/freedreno/vulkan/tu_pipeline.c +++ b/src/freedreno/vulkan/tu_pipeline.c @@ -468,8 +468,15 @@ tu6_emit_cs_config(struct tu_cs *cs, const struct tu_shader *shader, const struct ir3_shader_variant *v, uint32_t binary_iova) { - tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_UPDATE_CNTL, 1); - tu_cs_emit(cs, 0xff); + tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD( + .vs_state = true, + .hs_state = true, + .ds_state = true, + .gs_state = true, + .fs_state = true, + .cs_state = true, + .cs_ibo = true, + .gfx_ibo = true)); tu6_emit_xs_config(cs, MESA_SHADER_COMPUTE, v, binary_iova); @@ -1355,8 +1362,15 @@ tu6_emit_program(struct tu_cs *cs, STATIC_ASSERT(MESA_SHADER_VERTEX == 0); - tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_UPDATE_CNTL, 1); - tu_cs_emit(cs, 0xff); /* XXX */ + tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD( + .vs_state = true, + .hs_state = true, + .ds_state = true, + .gs_state = true, + .fs_state = true, + .cs_state = true, + .cs_ibo = true, + .gfx_ibo = true)); /* Don't use the binning pass variant when GS is present because we don't * support compiling correct binning pass variants with GS. diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_compute.c b/src/gallium/drivers/freedreno/a6xx/fd6_compute.c index 438557600f2..75d4b965f6f 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_compute.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_compute.c @@ -34,6 +34,7 @@ #include "fd6_const.h" #include "fd6_context.h" #include "fd6_emit.h" +#include "fd6_pack.h" struct fd6_compute_stateobj { struct ir3_shader *shader; @@ -78,8 +79,16 @@ cs_program_emit(struct fd_ringbuffer *ring, struct ir3_shader_variant *v) const struct ir3_info *i = &v->info; enum a3xx_threadsize thrsz = FOUR_QUADS; - OUT_PKT4(ring, REG_A6XX_HLSQ_UPDATE_CNTL, 1); - OUT_RING(ring, 0xff); + OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD( + .vs_state = true, + .hs_state = true, + .ds_state = true, + .gs_state = true, + .fs_state = true, + .cs_state = true, + .gfx_ibo = true, + .cs_ibo = true, + )); OUT_PKT4(ring, REG_A6XX_HLSQ_CS_CNTL, 1); OUT_RING(ring, A6XX_HLSQ_CS_CNTL_CONSTLEN(v->constlen) | diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c index 4fa32b5d2ac..ab8fdea19de 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c @@ -356,8 +356,19 @@ fd6_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth) OUT_PKT4(ring, REG_A6XX_RB_CCU_CNTL, 1); OUT_RING(ring, fd6_ctx->magic.RB_CCU_CNTL_bypass); - OUT_PKT4(ring, REG_A6XX_HLSQ_UPDATE_CNTL, 1); - OUT_RING(ring, 0x7ffff); + OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD( + .vs_state = true, + .hs_state = true, + .ds_state = true, + .gs_state = true, + .fs_state = true, + .cs_state = true, + .gfx_ibo = true, + .cs_ibo = true, + .gfx_shared_const = true, + .gfx_bindless = 0x1f, + .cs_bindless = 0x1f + )); emit_marker6(ring, 7); OUT_PKT7(ring, CP_SET_MARKER, 1); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c index f20666c145b..4740f60ab45 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c @@ -1130,8 +1130,20 @@ fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring) fd6_cache_inv(batch, ring); - OUT_PKT4(ring, REG_A6XX_HLSQ_UPDATE_CNTL, 1); - OUT_RING(ring, 0xfffff); + OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD( + .vs_state = true, + .hs_state = true, + .ds_state = true, + .gs_state = true, + .fs_state = true, + .cs_state = true, + .gfx_ibo = true, + .cs_ibo = true, + .gfx_shared_const = true, + .cs_shared_const = true, + .gfx_bindless = 0x1f, + .cs_bindless = 0x1f + )); OUT_WFI5(ring); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c index 72a47c1f571..4ee227b027e 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c @@ -39,6 +39,7 @@ #include "fd6_emit.h" #include "fd6_texture.h" #include "fd6_format.h" +#include "fd6_pack.h" void fd6_emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so) @@ -225,8 +226,16 @@ setup_stream_out(struct fd6_program_state *state, const struct ir3_shader_varian static void setup_config_stateobj(struct fd_ringbuffer *ring, struct fd6_program_state *state) { - OUT_PKT4(ring, REG_A6XX_HLSQ_UPDATE_CNTL, 1); - OUT_RING(ring, 0xff); /* XXX */ + OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD( + .vs_state = true, + .hs_state = true, + .ds_state = true, + .gs_state = true, + .fs_state = true, + .cs_state = true, + .gfx_ibo = true, + .cs_ibo = true, + )); debug_assert(state->vs->constlen >= state->bs->constlen); -- 2.30.2