From: Rob Clark Date: Fri, 6 Sep 2013 22:21:25 +0000 (-0400) Subject: freedreno/a3xx: use INDIRECT state load for shaders X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=1a42d4ee34d73cbc3e5bff3dcce5a913cd58aaba;p=mesa.git freedreno/a3xx: use INDIRECT state load for shaders With a debug option to force DIRECT (mainly to make it easier for capturing cmdstream dumps). Using INDIRECT for large shaders at least makes a noticable reduction in CPU load, which helps for CPU limited games. Signed-off-by: Rob Clark --- diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c index d84bbe9c36f..b0eec6e66d3 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c @@ -186,7 +186,8 @@ emit_shader(struct fd_ringbuffer *ring, struct fd3_shader_stateobj *so) { struct ir3_shader_info *si = &so->info; enum adreno_state_block sb; - uint32_t i, *bin; + enum adreno_state_src src; + uint32_t i, sz, *bin; if (so->type == SHADER_VERTEX) { sb = SB_VERT_SHADER; @@ -194,17 +195,31 @@ emit_shader(struct fd_ringbuffer *ring, struct fd3_shader_stateobj *so) sb = SB_FRAG_SHADER; } - // XXX use SS_INDIRECT - bin = fd_bo_map(so->bo); - OUT_PKT3(ring, CP_LOAD_STATE, 2 + si->sizedwords); + if (fd_mesa_debug & FD_DBG_DIRECT) { + sz = si->sizedwords; + src = SS_DIRECT; + bin = fd_bo_map(so->bo); + } else { + sz = 0; + src = SS_INDIRECT; + bin = NULL; + } + + OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz); OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) | - CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | + CP_LOAD_STATE_0_STATE_SRC(src) | CP_LOAD_STATE_0_STATE_BLOCK(sb) | CP_LOAD_STATE_0_NUM_UNIT(so->instrlen)); - OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) | - CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); - for (i = 0; i < si->sizedwords; i++) + if (bin) { + OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | + CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER)); + } else { + OUT_RELOC(ring, so->bo, 0, + CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER), 0); + } + for (i = 0; i < sz; i++) { OUT_RING(ring, bin[i]); + } } void @@ -223,6 +238,10 @@ fd3_program_emit(struct fd_ringbuffer *ring, OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 6); OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) | + /* NOTE: I guess SHADERRESTART and CONSTFULLUPDATE maybe + * flush some caches? I think we only need to set those + * bits if we have updated const or shader.. + */ A3XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART | A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE); OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) | diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index 7412e3dca96..eada1af9892 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -61,6 +61,7 @@ static const struct debug_named_value debug_options[] = { {"dclear", FD_DBG_DCLEAR, "Mark all state dirty after clear"}, {"dgmem", FD_DBG_DGMEM, "Mark all state dirty after GMEM tile pass"}, {"dscis", FD_DBG_DSCIS, "Disable scissor optimization"}, + {"direct", FD_DBG_DIRECT, "Force inline (SS_DIRECT) state loads"}, DEBUG_NAMED_VALUE_END }; diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h index f8672339cff..4c7c78b955d 100644 --- a/src/gallium/drivers/freedreno/freedreno_util.h +++ b/src/gallium/drivers/freedreno/freedreno_util.h @@ -57,6 +57,7 @@ enum adreno_stencil_op fd_stencil_op(unsigned op); #define FD_DBG_DCLEAR 0x04 #define FD_DBG_DGMEM 0x08 #define FD_DBG_DSCIS 0x10 +#define FD_DBG_DIRECT 0x20 extern int fd_mesa_debug; #define DBG(fmt, ...) \