From 70b2f872ea6ae651b832a2b3dd975efd78289fad Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sat, 11 Apr 2015 12:15:17 -0400 Subject: [PATCH] freedreno/a4xx: sysvals and UBOs Basically just sync up the cmdstream emit parts to match the changes already done on a3xx. Also, fix scheduling for mem instructions. This is needed on a4xx, and I am a bit surprised it isn't needed for a3xx. Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/a4xx/fd4_emit.c | 75 +++++++++++++------ .../drivers/freedreno/freedreno_screen.c | 2 +- src/gallium/drivers/freedreno/ir3/ir3_depth.c | 3 +- 3 files changed, 56 insertions(+), 24 deletions(-) diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c index bf51847706e..c315a47bb16 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c @@ -87,11 +87,12 @@ static void emit_constants(struct fd_ringbuffer *ring, enum adreno_state_block sb, struct fd_constbuf_stateobj *constbuf, - struct ir3_shader_variant *shader) + struct ir3_shader_variant *shader, + bool emit_immediates) { uint32_t enabled_mask = constbuf->enabled_mask; - uint32_t first_immediate; - uint32_t base = 0; + uint32_t max_const; + int i; // XXX TODO only emit dirty consts.. but we need to keep track if // they are clobbered by a clear, gmem2mem, or mem2gmem.. @@ -102,42 +103,57 @@ emit_constants(struct fd_ringbuffer *ring, * than first_immediate. In that case truncate the user consts * early to avoid HLSQ lockup caused by writing too many consts */ - first_immediate = MIN2(shader->first_immediate, shader->constlen); + max_const = MIN2(shader->first_driver_param, shader->constlen); /* emit user constants: */ - while (enabled_mask) { - unsigned index = ffs(enabled_mask) - 1; + if (enabled_mask & 1) { + const unsigned index = 0; struct pipe_constant_buffer *cb = &constbuf->cb[index]; unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */ // I expect that size should be a multiple of vec4's: assert(size == align(size, 4)); - /* gallium could leave const buffers bound above what the - * current shader uses.. don't let that confuse us. + /* and even if the start of the const buffer is before + * first_immediate, the end may not be: */ - if (base >= (4 * first_immediate)) - break; - - if (constbuf->dirty_mask & (1 << index)) { - /* and even if the start of the const buffer is before - * first_immediate, the end may not be: - */ - size = MIN2(size, (4 * first_immediate) - base); - fd4_emit_constant(ring, sb, base, + size = MIN2(size, 4 * max_const); + + if (size && (constbuf->dirty_mask & (1 << index))) { + fd4_emit_constant(ring, sb, 0, cb->buffer_offset, size, cb->user_buffer, cb->buffer); constbuf->dirty_mask &= ~(1 << index); } - base += size; enabled_mask &= ~(1 << index); } + /* emit ubos: */ + if (shader->constlen > shader->first_driver_param) { + uint32_t params = MIN2(4, shader->constlen - shader->first_driver_param); + OUT_PKT3(ring, CP_LOAD_STATE, 2 + params * 4); + OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(shader->first_driver_param) | + CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | + CP_LOAD_STATE_0_STATE_BLOCK(sb) | + CP_LOAD_STATE_0_NUM_UNIT(params)); + OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | + CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS)); + + for (i = 1; i <= params * 4; i++) { + struct pipe_constant_buffer *cb = &constbuf->cb[i]; + assert(!cb->user_buffer); + if ((enabled_mask & (1 << i)) && cb->buffer) + OUT_RELOC(ring, fd_resource(cb->buffer)->bo, cb->buffer_offset, 0, 0); + else + OUT_RING(ring, 0xbad00000 | ((i - 1) << 16)); + } + } + /* emit shader immediates: */ - if (shader) { + if (shader && emit_immediates) { int size = shader->immediates_count; - base = shader->first_immediate; + uint32_t base = shader->first_immediate; /* truncate size to avoid writing constants that shader * does not use: @@ -499,11 +515,26 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, fd_wfi(ctx, ring); emit_constants(ring, SB_VERT_SHADER, &ctx->constbuf[PIPE_SHADER_VERTEX], - (emit->prog->dirty & FD_SHADER_DIRTY_VP) ? vp : NULL); + vp, emit->prog->dirty & FD_SHADER_DIRTY_VP); if (!emit->key.binning_pass) { emit_constants(ring, SB_FRAG_SHADER, &ctx->constbuf[PIPE_SHADER_FRAGMENT], - (emit->prog->dirty & FD_SHADER_DIRTY_FP) ? fp : NULL); + fp, emit->prog->dirty & FD_SHADER_DIRTY_FP); + } + } + + /* emit driver params every time */ + if (emit->info && emit->prog == &ctx->prog) { + uint32_t vertex_params[4] = { + emit->info->indexed ? emit->info->index_bias : emit->info->start, + 0, + 0, + 0 + }; + if (vp->constlen >= vp->first_driver_param + 4) { + fd4_emit_constant(ring, SB_VERT_SHADER, + (vp->first_driver_param + 4) * 4, + 0, 4, vertex_params, NULL); } } diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index 1b89387678d..fda60eda6fd 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -363,7 +363,7 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, */ return ((is_a3xx(screen) || is_a4xx(screen)) ? 4096 : 64) * sizeof(float[4]); case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: - return is_a3xx(screen) ? 16 : 1; + return (is_a3xx(screen) || is_a4xx(screen)) ? 16 : 1; case PIPE_SHADER_CAP_MAX_PREDS: return 0; /* nothing uses this */ case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: diff --git a/src/gallium/drivers/freedreno/ir3/ir3_depth.c b/src/gallium/drivers/freedreno/ir3/ir3_depth.c index 9e1f45dabaf..b899c66b37e 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_depth.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_depth.c @@ -71,7 +71,8 @@ int ir3_delayslots(struct ir3_instruction *assigner, return 0; /* assigner must be alu: */ - if (is_flow(consumer) || is_sfu(consumer) || is_tex(consumer)) { + if (is_flow(consumer) || is_sfu(consumer) || is_tex(consumer) || + is_mem(consumer)) { return 6; } else if ((consumer->category == 3) && (is_mad(consumer->opc) || is_madsh(consumer->opc)) && -- 2.30.2