From 373e9ab27c767b13846c81d1c20102bc583415e4 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 29 Apr 2020 10:09:28 -0700 Subject: [PATCH] freedreno/a6xx: convert const emit to OUT_PKT() This is another hot packet. This splits out each of the four cases (geom vs frag, and indirect vs inline) intentionally, to avoid some parity bit calc. Signed-off-by: Rob Clark Part-of: --- .../drivers/freedreno/a6xx/fd6_const.c | 93 ++++++++++++------- src/gallium/drivers/freedreno/a6xx/fd6_emit.h | 14 ++- 2 files changed, 71 insertions(+), 36 deletions(-) diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_const.c b/src/gallium/drivers/freedreno/a6xx/fd6_const.c index 0c519c2577e..81fbf67c2df 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_const.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_const.c @@ -23,6 +23,7 @@ */ #include "fd6_const.h" +#include "fd6_pack.h" #include "ir3_const.h" @@ -35,43 +36,71 @@ fd6_emit_const(struct fd_ringbuffer *ring, gl_shader_stage type, uint32_t regid, uint32_t offset, uint32_t sizedwords, const uint32_t *dwords, struct pipe_resource *prsc) { - uint32_t i, sz, align_sz; - enum a6xx_state_src src; - - debug_assert((regid % 4) == 0); - - if (prsc) { - sz = 0; - src = SS6_INDIRECT; - } else { - sz = sizedwords; - src = SS6_DIRECT; - } - - align_sz = align(sz, 4); - - OUT_PKT7(ring, fd6_stage2opcode(type), 3 + align_sz); - OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(regid/4) | - CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | - CP_LOAD_STATE6_0_STATE_SRC(src) | - CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(type)) | - CP_LOAD_STATE6_0_NUM_UNIT(DIV_ROUND_UP(sizedwords, 4))); if (prsc) { struct fd_bo *bo = fd_resource(prsc)->bo; - OUT_RELOC(ring, bo, offset, 0, 0); + + if (fd6_geom_stage(type)) { + OUT_PKT(ring, CP_LOAD_STATE6_GEOM, + CP_LOAD_STATE6_0( + .dst_off = regid/4, + .state_type = ST6_CONSTANTS, + .state_src = SS6_INDIRECT, + .state_block = fd6_stage2shadersb(type), + .num_unit = DIV_ROUND_UP(sizedwords, 4) + ), + CP_LOAD_STATE6_EXT_SRC_ADDR( + .bo = bo, + .bo_offset = offset + ) + ); + } else { + OUT_PKT(ring, CP_LOAD_STATE6_FRAG, + CP_LOAD_STATE6_0( + .dst_off = regid/4, + .state_type = ST6_CONSTANTS, + .state_src = SS6_INDIRECT, + .state_block = fd6_stage2shadersb(type), + .num_unit = DIV_ROUND_UP(sizedwords, 4) + ), + CP_LOAD_STATE6_EXT_SRC_ADDR( + .bo = bo, + .bo_offset = offset + ) + ); + } } else { - OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); - OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); + /* NOTE we cheat a bit here, since we know mesa is aligning + * the size of the user buffer to 16 bytes. And we want to + * cut cycles in a hot path. + */ + uint32_t align_sz = align(sizedwords, 4); dwords = (uint32_t *)&((uint8_t *)dwords)[offset]; - } - for (i = 0; i < sz; i++) { - OUT_RING(ring, dwords[i]); - } - - /* Zero-pad to multiple of 4 dwords */ - for (i = sz; i < align_sz; i++) { - OUT_RING(ring, 0); + if (fd6_geom_stage(type)) { + OUT_PKTBUF(ring, CP_LOAD_STATE6_GEOM, dwords, align_sz, + CP_LOAD_STATE6_0( + .dst_off = regid/4, + .state_type = ST6_CONSTANTS, + .state_src = SS6_DIRECT, + .state_block = fd6_stage2shadersb(type), + .num_unit = DIV_ROUND_UP(sizedwords, 4) + ), + CP_LOAD_STATE6_1(), + CP_LOAD_STATE6_2() + ); + } else { + OUT_PKTBUF(ring, CP_LOAD_STATE6_FRAG, dwords, align_sz, + CP_LOAD_STATE6_0( + .dst_off = regid/4, + .state_type = ST6_CONSTANTS, + .state_src = SS6_DIRECT, + .state_block = fd6_stage2shadersb(type), + .num_unit = DIV_ROUND_UP(sizedwords, 4) + ), + CP_LOAD_STATE6_1(), + CP_LOAD_STATE6_2() + ); + } } } diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h index 32968c819d5..2979e750908 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h @@ -210,24 +210,30 @@ fd6_emit_lrz_flush(struct fd_ringbuffer *ring) OUT_RING(ring, LRZ_FLUSH); } -static inline uint32_t -fd6_stage2opcode(gl_shader_stage type) +static inline bool +fd6_geom_stage(gl_shader_stage type) { switch (type) { case MESA_SHADER_VERTEX: case MESA_SHADER_TESS_CTRL: case MESA_SHADER_TESS_EVAL: case MESA_SHADER_GEOMETRY: - return CP_LOAD_STATE6_GEOM; + return true; case MESA_SHADER_FRAGMENT: case MESA_SHADER_COMPUTE: case MESA_SHADER_KERNEL: - return CP_LOAD_STATE6_FRAG; + return false; default: unreachable("bad shader type"); } } +static inline uint32_t +fd6_stage2opcode(gl_shader_stage type) +{ + return fd6_geom_stage(type) ? CP_LOAD_STATE6_GEOM : CP_LOAD_STATE6_FRAG; +} + static inline enum a6xx_state_block fd6_stage2shadersb(gl_shader_stage type) { -- 2.30.2