freedreno/a6xx: Rename and document HLSQ_UPDATE_CNTL
authorConnor Abbott <cwabbott0@gmail.com>
Mon, 13 Jul 2020 10:22:20 +0000 (12:22 +0200)
committerConnor Abbott <cwabbott0@gmail.com>
Tue, 14 Jul 2020 08:23:58 +0000 (10:23 +0200)
It turns out that this clears CP_LOAD_STATE6 packets, including
disabling any pending loads for SS6_INDIRECT/SS6_BINDLESS (these loads
don't actually happen until the draw itself, and I'm not sure if they
happen if the state is unused by the shader) and marking constants and
UBO descriptors loaded with SS6_DIRECT as invalid. It's used very
differently from HLSQ_UPDATE_CNTL on a4xx from whence the name came, and
unlike on a4xx it's not readable, so this probably doesn't line up with
HLSQ_UPDATE_CNTL on a4xx.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5877>

src/freedreno/computerator/a6xx.c
src/freedreno/registers/a6xx.xml
src/freedreno/vulkan/tu_clear_blit.c
src/freedreno/vulkan/tu_cmd_buffer.c
src/freedreno/vulkan/tu_pipeline.c
src/gallium/drivers/freedreno/a6xx/fd6_compute.c
src/gallium/drivers/freedreno/a6xx/fd6_draw.c
src/gallium/drivers/freedreno/a6xx/fd6_emit.c
src/gallium/drivers/freedreno/a6xx/fd6_program.c

index 72b0f067efe7f7185c8f505540835dccf296c662..21d7d1c4894a9c0b254ca4120bc2bf7aa2815ac5 100644 (file)
@@ -117,8 +117,15 @@ cs_program_emit(struct fd_ringbuffer *ring, struct kernel *kernel)
        const struct ir3_info *i = &v->info;
        enum a3xx_threadsize thrsz = FOUR_QUADS;
 
-       OUT_PKT4(ring, REG_A6XX_HLSQ_UPDATE_CNTL, 1);
-       OUT_RING(ring, 0xff);
+       OUT_PKT4(ring, REG_A6XX_HLSQ_INVALIDATE_CMD, 1);
+       OUT_RING(ring, A6XX_HLSQ_INVALIDATE_CMD_VS_STATE |
+                   A6XX_HLSQ_INVALIDATE_CMD_HS_STATE |
+                   A6XX_HLSQ_INVALIDATE_CMD_DS_STATE |
+                   A6XX_HLSQ_INVALIDATE_CMD_GS_STATE |
+                   A6XX_HLSQ_INVALIDATE_CMD_FS_STATE |
+                   A6XX_HLSQ_INVALIDATE_CMD_CS_STATE |
+                   A6XX_HLSQ_INVALIDATE_CMD_CS_IBO |
+                   A6XX_HLSQ_INVALIDATE_CMD_GFX_IBO);
 
        unsigned constlen = align(v->constlen, 4);
        OUT_PKT4(ring, REG_A6XX_HLSQ_CS_CNTL, 1);
index 56e1746303bee61229ac84ecc9f1d643dc6f6991..f821832ba0697bf1f2ee4a3cdbe2db4c1fa48a8e 100644 (file)
@@ -3408,8 +3408,31 @@ to upconvert to 32b float internally?
                <bitfield name="EVENT" low="0" high="6" type="vgt_event_type"/>
        </reg32>
 
-       <!-- probably: -->
-       <reg32 offset="0xbb08" name="HLSQ_UPDATE_CNTL"/>
+       <reg32 offset="0xbb08" name="HLSQ_INVALIDATE_CMD">
+               <doc>
+                       This register clears pending loads queued up by
+                       CP_LOAD_STATE6. Each bit resets a particular kind(s) of
+                       CP_LOAD_STATE6.
+               </doc>
+
+               <!-- per-stage state: shader, non-bindless UBO, textures, and samplers -->
+               <bitfield name="VS_STATE" pos="0" type="boolean"/>
+               <bitfield name="HS_STATE" pos="1" type="boolean"/>
+               <bitfield name="DS_STATE" pos="2" type="boolean"/>
+               <bitfield name="GS_STATE" pos="3" type="boolean"/>
+               <bitfield name="FS_STATE" pos="4" type="boolean"/>
+               <bitfield name="CS_STATE" pos="5" type="boolean"/>
+
+               <bitfield name="CS_IBO" pos="6" type="boolean"/>
+               <bitfield name="GFX_IBO" pos="7" type="boolean"/>
+
+               <bitfield name="CS_SHARED_CONST" pos="19" type="boolean"/>
+               <bitfield name="GFX_SHARED_CONST" pos="8" type="boolean"/>
+
+               <!-- SS6_BINDLESS: one bit per bindless base -->
+               <bitfield name="CS_BINDLESS" low="9" high="13" type="hex"/>
+               <bitfield name="GFX_BINDLESS" low="14" high="18" type="hex"/>
+       </reg32>
 
        <reg32 offset="0xbb10" name="HLSQ_FS_CNTL" type="a6xx_hlsq_xs_cntl"/>
 
index 29b952548bfbdd1356afdd4c7ceca7c79582cbd5..2be3e38dccc3efb75b05d5fbf86bab52d94ba185 100644 (file)
@@ -428,7 +428,18 @@ r3d_common(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool blit, uint32_t num_
       .const_state = &dummy_const_state,
    };
 
-   tu_cs_emit_regs(cs, A6XX_HLSQ_UPDATE_CNTL(0x7ffff));
+   tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD(
+         .vs_state = true,
+         .hs_state = true,
+         .ds_state = true,
+         .gs_state = true,
+         .fs_state = true,
+         .cs_state = true,
+         .gfx_ibo = true,
+         .cs_ibo = true,
+         .gfx_shared_const = true,
+         .gfx_bindless = 0x1f,
+         .cs_bindless = 0x1f));
 
    tu6_emit_xs_config(cs, MESA_SHADER_VERTEX, &vs, global_iova(cmd, shaders[GLOBAL_SH_VS]));
    tu6_emit_xs_config(cs, MESA_SHADER_TESS_CTRL, NULL, 0);
index 765732ad9a99ef97f7ddf74098c2638d051a9730..d1145bfb13e1953dcbc33122b17f60559d609e68 100644 (file)
@@ -719,7 +719,19 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
 
    tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE);
 
-   tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UPDATE_CNTL, 0xfffff);
+   tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD(
+         .vs_state = true,
+         .hs_state = true,
+         .ds_state = true,
+         .gs_state = true,
+         .fs_state = true,
+         .cs_state = true,
+         .gfx_ibo = true,
+         .cs_ibo = true,
+         .gfx_shared_const = true,
+         .cs_shared_const = true,
+         .gfx_bindless = 0x1f,
+         .cs_bindless = 0x1f));
 
    tu_cs_emit_regs(cs,
                    A6XX_RB_CCU_CNTL(.offset = phys_dev->ccu_offset_bypass));
@@ -1684,7 +1696,7 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
    }
    assert(dyn_idx == dynamicOffsetCount);
 
-   uint32_t sp_bindless_base_reg, hlsq_bindless_base_reg, hlsq_update_value;
+   uint32_t sp_bindless_base_reg, hlsq_bindless_base_reg, hlsq_invalidate_value;
    uint64_t addr[MAX_SETS + 1] = {};
    struct tu_cs cs;
 
@@ -1709,7 +1721,7 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
    if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
       sp_bindless_base_reg = REG_A6XX_SP_BINDLESS_BASE(0);
       hlsq_bindless_base_reg = REG_A6XX_HLSQ_BINDLESS_BASE(0);
-      hlsq_update_value = 0x7c000;
+      hlsq_invalidate_value = A6XX_HLSQ_INVALIDATE_CMD_GFX_BINDLESS(0x1f);
 
       cmd->state.dirty |= TU_CMD_DIRTY_DESCRIPTOR_SETS | TU_CMD_DIRTY_SHADER_CONSTS;
    } else {
@@ -1717,7 +1729,7 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
 
       sp_bindless_base_reg = REG_A6XX_SP_CS_BINDLESS_BASE(0);
       hlsq_bindless_base_reg = REG_A6XX_HLSQ_CS_BINDLESS_BASE(0);
-      hlsq_update_value = 0x3e00;
+      hlsq_invalidate_value = A6XX_HLSQ_INVALIDATE_CMD_CS_BINDLESS(0x1f);
 
       cmd->state.dirty |= TU_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS;
    }
@@ -1728,7 +1740,7 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
    tu_cs_emit_array(&cs, (const uint32_t*) addr, 10);
    tu_cs_emit_pkt4(&cs, hlsq_bindless_base_reg, 10);
    tu_cs_emit_array(&cs, (const uint32_t*) addr, 10);
-   tu_cs_emit_regs(&cs, A6XX_HLSQ_UPDATE_CNTL(.dword = hlsq_update_value));
+   tu_cs_emit_regs(&cs, A6XX_HLSQ_INVALIDATE_CMD(.dword = hlsq_invalidate_value));
 
    struct tu_cs_entry ib = tu_cs_end_sub_stream(&cmd->sub_cs, &cs);
    if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
index 88cdca7e19ca632da84c8b7d0dfff5809de9d43d..84cb9c465d92edaff37e0246eb72b096173dc082 100644 (file)
@@ -468,8 +468,15 @@ tu6_emit_cs_config(struct tu_cs *cs, const struct tu_shader *shader,
                    const struct ir3_shader_variant *v,
                    uint32_t binary_iova)
 {
-   tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_UPDATE_CNTL, 1);
-   tu_cs_emit(cs, 0xff);
+   tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD(
+         .vs_state = true,
+         .hs_state = true,
+         .ds_state = true,
+         .gs_state = true,
+         .fs_state = true,
+         .cs_state = true,
+         .cs_ibo = true,
+         .gfx_ibo = true));
 
    tu6_emit_xs_config(cs, MESA_SHADER_COMPUTE, v, binary_iova);
 
@@ -1355,8 +1362,15 @@ tu6_emit_program(struct tu_cs *cs,
 
    STATIC_ASSERT(MESA_SHADER_VERTEX == 0);
 
-   tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_UPDATE_CNTL, 1);
-   tu_cs_emit(cs, 0xff); /* XXX */
+   tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD(
+         .vs_state = true,
+         .hs_state = true,
+         .ds_state = true,
+         .gs_state = true,
+         .fs_state = true,
+         .cs_state = true,
+         .cs_ibo = true,
+         .gfx_ibo = true));
 
   /* Don't use the binning pass variant when GS is present because we don't
    * support compiling correct binning pass variants with GS.
index 438557600f27d93a6eea9216dd452a399b5dd04b..75d4b965f6fa8e770811df6a776c797e8afe56e1 100644 (file)
@@ -34,6 +34,7 @@
 #include "fd6_const.h"
 #include "fd6_context.h"
 #include "fd6_emit.h"
+#include "fd6_pack.h"
 
 struct fd6_compute_stateobj {
        struct ir3_shader *shader;
@@ -78,8 +79,16 @@ cs_program_emit(struct fd_ringbuffer *ring, struct ir3_shader_variant *v)
        const struct ir3_info *i = &v->info;
        enum a3xx_threadsize thrsz = FOUR_QUADS;
 
-       OUT_PKT4(ring, REG_A6XX_HLSQ_UPDATE_CNTL, 1);
-       OUT_RING(ring, 0xff);
+       OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD(
+                       .vs_state = true,
+                       .hs_state = true,
+                       .ds_state = true,
+                       .gs_state = true,
+                       .fs_state = true,
+                       .cs_state = true,
+                       .gfx_ibo = true,
+                       .cs_ibo = true,
+               ));
 
        OUT_PKT4(ring, REG_A6XX_HLSQ_CS_CNTL, 1);
        OUT_RING(ring, A6XX_HLSQ_CS_CNTL_CONSTLEN(v->constlen) |
index 4fa32b5d2aced80abe1093c32741cefcc1830f11..ab8fdea19de7b6afb6559565eab0971a1559cd10 100644 (file)
@@ -356,8 +356,19 @@ fd6_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth)
        OUT_PKT4(ring, REG_A6XX_RB_CCU_CNTL, 1);
        OUT_RING(ring, fd6_ctx->magic.RB_CCU_CNTL_bypass);
 
-       OUT_PKT4(ring, REG_A6XX_HLSQ_UPDATE_CNTL, 1);
-       OUT_RING(ring, 0x7ffff);
+       OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD(
+                       .vs_state = true,
+                       .hs_state = true,
+                       .ds_state = true,
+                       .gs_state = true,
+                       .fs_state = true,
+                       .cs_state = true,
+                       .gfx_ibo = true,
+                       .cs_ibo = true,
+                       .gfx_shared_const = true,
+                       .gfx_bindless = 0x1f,
+                       .cs_bindless = 0x1f
+               ));
 
        emit_marker6(ring, 7);
        OUT_PKT7(ring, CP_SET_MARKER, 1);
index f20666c145b9afc20d2e9b9b244b6e37f32aa70b..4740f60ab45a409d010103eae811cccfb58a329a 100644 (file)
@@ -1130,8 +1130,20 @@ fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring)
 
        fd6_cache_inv(batch, ring);
 
-       OUT_PKT4(ring, REG_A6XX_HLSQ_UPDATE_CNTL, 1);
-       OUT_RING(ring, 0xfffff);
+       OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD(
+                       .vs_state = true,
+                       .hs_state = true,
+                       .ds_state = true,
+                       .gs_state = true,
+                       .fs_state = true,
+                       .cs_state = true,
+                       .gfx_ibo = true,
+                       .cs_ibo = true,
+                       .gfx_shared_const = true,
+                       .cs_shared_const = true,
+                       .gfx_bindless = 0x1f,
+                       .cs_bindless = 0x1f
+               ));
 
        OUT_WFI5(ring);
 
index 72a47c1f5717b8d40e41ccc90ed2b3ff80cdd136..4ee227b027e5ec4791642f13031396324dcd5c93 100644 (file)
@@ -39,6 +39,7 @@
 #include "fd6_emit.h"
 #include "fd6_texture.h"
 #include "fd6_format.h"
+#include "fd6_pack.h"
 
 void
 fd6_emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so)
@@ -225,8 +226,16 @@ setup_stream_out(struct fd6_program_state *state, const struct ir3_shader_varian
 static void
 setup_config_stateobj(struct fd_ringbuffer *ring, struct fd6_program_state *state)
 {
-       OUT_PKT4(ring, REG_A6XX_HLSQ_UPDATE_CNTL, 1);
-       OUT_RING(ring, 0xff);        /* XXX */
+       OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD(
+                       .vs_state = true,
+                       .hs_state = true,
+                       .ds_state = true,
+                       .gs_state = true,
+                       .fs_state = true,
+                       .cs_state = true,
+                       .gfx_ibo = true,
+                       .cs_ibo = true,
+               ));
 
        debug_assert(state->vs->constlen >= state->bs->constlen);