From: Rob Clark Date: Fri, 24 Jul 2015 17:07:33 +0000 (-0400) Subject: freedreno/ir3: move emit_const to ir3 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=56462a30080c1f25a81ae566d59a25d2ad6bb809;p=mesa.git freedreno/ir3: move emit_const to ir3 Details of the cmdstream packets are different between a3xx and a4xx, but the logic about the layout of const registers is the same, as that is dictated by the ir3 shader compiler. So rather than duplicating logic that is tightly coupled to ir3 between a3xx and a4xx, move this into ir3 and use per-generation callbacks for to build the cmdstream packets. This should make it easier to pass additional const regs (such as for transform feedback). And it also keeps the layout internal to ir3 in case we want to make the layout more dynamic some day. Signed-off-by: Rob Clark --- diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_context.c b/src/gallium/drivers/freedreno/a3xx/fd3_context.c index 8441898382b..dc33783e398 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_context.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.c @@ -121,6 +121,7 @@ fd3_context_create(struct pipe_screen *pscreen, void *priv) fd3_gmem_init(pctx); fd3_texture_init(pctx); fd3_prog_init(pctx); + fd3_emit_init(pctx); pctx = fd_context_init(&fd3_ctx->base, pscreen, primtypes, priv); if (!pctx) diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c index 070ed43a279..fc30d4842ba 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c @@ -345,7 +345,7 @@ fd3_clear(struct fd_context *ctx, unsigned buffers, fd3_emit_vertex_bufs(ring, &emit); - fd3_emit_constant(ring, SB_FRAG_SHADER, 0, 0, 4, color->ui, NULL); + fd3_emit_const(ring, SHADER_FRAGMENT, 0, 0, 4, color->ui, NULL); OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1); OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) | diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index 07cc2266d08..9032366b748 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -43,19 +43,26 @@ #include "fd3_format.h" #include "fd3_zsa.h" +static const enum adreno_state_block sb[] = { + [SHADER_VERTEX] = SB_VERT_SHADER, + [SHADER_FRAGMENT] = SB_FRAG_SHADER, +}; + /* regid: base const register * prsc or dwords: buffer containing constant values * sizedwords: size of const value buffer */ void -fd3_emit_constant(struct fd_ringbuffer *ring, - enum adreno_state_block sb, +fd3_emit_const(struct fd_ringbuffer *ring, enum shader_t type, uint32_t regid, uint32_t offset, uint32_t sizedwords, const uint32_t *dwords, struct pipe_resource *prsc) { uint32_t i, sz; enum adreno_state_src src; + debug_assert((regid % 4) == 0); + debug_assert((sizedwords % 4) == 0); + if (prsc) { sz = 0; src = SS_INDIRECT; @@ -67,7 +74,7 @@ fd3_emit_constant(struct fd_ringbuffer *ring, OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz); OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/2) | CP_LOAD_STATE_0_STATE_SRC(src) | - CP_LOAD_STATE_0_STATE_BLOCK(sb) | + CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) | CP_LOAD_STATE_0_NUM_UNIT(sizedwords/2)); if (prsc) { struct fd_bo *bo = fd_resource(prsc)->bo; @@ -84,89 +91,31 @@ fd3_emit_constant(struct fd_ringbuffer *ring, } static void -emit_constants(struct fd_ringbuffer *ring, - enum adreno_state_block sb, - struct fd_constbuf_stateobj *constbuf, - struct ir3_shader_variant *shader, - bool emit_immediates) +fd3_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write, + uint32_t regid, uint32_t num, struct fd_bo **bos, uint32_t *offsets) { - uint32_t enabled_mask = constbuf->enabled_mask; - uint32_t max_const; - int i; - - // XXX TODO only emit dirty consts.. but we need to keep track if - // they are clobbered by a clear, gmem2mem, or mem2gmem.. - constbuf->dirty_mask = enabled_mask; - - /* in particular, with binning shader we may end up with unused - * consts, ie. we could end up w/ constlen that is smaller - * than first_immediate. In that case truncate the user consts - * early to avoid HLSQ lockup caused by writing too many consts - */ - max_const = MIN2(shader->first_driver_param, shader->constlen); - - /* emit user constants: */ - if (enabled_mask & 1) { - const unsigned index = 0; - struct pipe_constant_buffer *cb = &constbuf->cb[index]; - unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */ - - // I expect that size should be a multiple of vec4's: - assert(size == align(size, 4)); - - /* and even if the start of the const buffer is before - * first_immediate, the end may not be: - */ - size = MIN2(size, 4 * max_const); - - if (size && constbuf->dirty_mask & (1 << index)) { - fd3_emit_constant(ring, sb, 0, - cb->buffer_offset, size, - cb->user_buffer, cb->buffer); - constbuf->dirty_mask &= ~(1 << index); - } - - enabled_mask &= ~(1 << index); - } - - if (shader->constlen > shader->first_driver_param) { - uint32_t params = MIN2(4, shader->constlen - shader->first_driver_param); - /* emit ubos: */ - OUT_PKT3(ring, CP_LOAD_STATE, 2 + params * 4); - OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(shader->first_driver_param * 2) | - CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | - CP_LOAD_STATE_0_STATE_BLOCK(sb) | - CP_LOAD_STATE_0_NUM_UNIT(params * 2)); - OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | - CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS)); - - for (i = 1; i <= params * 4; i++) { - struct pipe_constant_buffer *cb = &constbuf->cb[i]; - assert(!cb->user_buffer); - if ((enabled_mask & (1 << i)) && cb->buffer) - OUT_RELOC(ring, fd_resource(cb->buffer)->bo, cb->buffer_offset, 0, 0); - else - OUT_RING(ring, 0xbad00000 | ((i - 1) << 16)); - } - } + uint32_t i; - /* emit shader immediates: */ - if (shader && emit_immediates) { - int size = shader->immediates_count; - uint32_t base = shader->first_immediate; + debug_assert((regid % 4) == 0); + debug_assert((num % 4) == 0); - /* truncate size to avoid writing constants that shader - * does not use: - */ - size = MIN2(size + base, shader->constlen) - base; - - /* convert out of vec4: */ - base *= 4; - size *= 4; - - if (size > 0) { - fd3_emit_constant(ring, sb, base, - 0, size, shader->immediates[0].val, NULL); + OUT_PKT3(ring, CP_LOAD_STATE, 2 + num); + OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/2) | + CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | + CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) | + CP_LOAD_STATE_0_NUM_UNIT(num/2)); + OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | + CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS)); + + for (i = 0; i < num; i++) { + if (bos[i]) { + if (write) { + OUT_RELOCW(ring, bos[i], offsets[i], 0, 0); + } else { + OUT_RELOC(ring, bos[i], offsets[i], 0, 0); + } + } else { + OUT_RING(ring, 0xbad00000 | (i << 16)); } } } @@ -669,33 +618,12 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_PKT3(ring, CP_EVENT_WRITE, 1); OUT_RING(ring, HLSQ_FLUSH); - if ((dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) && - /* evil hack to deal sanely with clear path: */ - (emit->prog == &ctx->prog)) { - fd_wfi(ctx, ring); - emit_constants(ring, SB_VERT_SHADER, - &ctx->constbuf[PIPE_SHADER_VERTEX], - vp, emit->prog->dirty & FD_SHADER_DIRTY_VP); - if (!emit->key.binning_pass) { - emit_constants(ring, SB_FRAG_SHADER, - &ctx->constbuf[PIPE_SHADER_FRAGMENT], - fp, emit->prog->dirty & FD_SHADER_DIRTY_FP); - } - } - - /* emit driver params every time */ - if (emit->info && emit->prog == &ctx->prog) { - uint32_t vertex_params[4] = { - emit->info->indexed ? emit->info->index_bias : emit->info->start, - 0, - 0, - 0 - }; - if (vp->constlen >= vp->first_driver_param + 4) { - fd3_emit_constant(ring, SB_VERT_SHADER, - (vp->first_driver_param + 4) * 4, - 0, 4, vertex_params, NULL); - } + if (emit->prog == &ctx->prog) { /* evil hack to deal sanely with clear path */ + ir3_emit_consts(vp, ring, emit->info, dirty); + if (!emit->key.binning_pass) + ir3_emit_consts(fp, ring, emit->info, dirty); + /* mark clean after emitting consts: */ + ctx->prog.dirty = 0; } if ((dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) && ctx->blend) { @@ -930,3 +858,11 @@ fd3_emit_restore(struct fd_context *ctx) ctx->needs_rb_fbd = true; } + +void +fd3_emit_init(struct pipe_context *pctx) +{ + struct fd_context *ctx = fd_context(pctx); + ctx->emit_const = fd3_emit_const; + ctx->emit_const_bo = fd3_emit_const_bo; +} diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h index 8f21919c9a7..795654706a7 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h @@ -37,10 +37,8 @@ #include "ir3_shader.h" struct fd_ringbuffer; -enum adreno_state_block; -void fd3_emit_constant(struct fd_ringbuffer *ring, - enum adreno_state_block sb, +void fd3_emit_const(struct fd_ringbuffer *ring, enum shader_t type, uint32_t regid, uint32_t offset, uint32_t sizedwords, const uint32_t *dwords, struct pipe_resource *prsc); @@ -90,4 +88,6 @@ void fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, void fd3_emit_restore(struct fd_context *ctx); +void fd3_emit_init(struct pipe_context *pctx); + #endif /* FD3_EMIT_H */ diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_context.c b/src/gallium/drivers/freedreno/a4xx/fd4_context.c index 6e109b6205a..e172d350517 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_context.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_context.c @@ -119,6 +119,7 @@ fd4_context_create(struct pipe_screen *pscreen, void *priv) fd4_gmem_init(pctx); fd4_texture_init(pctx); fd4_prog_init(pctx); + fd4_emit_init(pctx); pctx = fd_context_init(&fd4_ctx->base, pscreen, primtypes, priv); if (!pctx) diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c index d070f5fd6b7..ff1dfdc392f 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c @@ -295,7 +295,7 @@ fd4_clear(struct fd_context *ctx, unsigned buffers, OUT_RING(ring, colr); /* RB_CLEAR_COLOR_DW3 */ /* until fastclear works: */ - fd4_emit_constant(ring, SB_FRAG_SHADER, 0, 0, 4, color->ui, NULL); + fd4_emit_const(ring, SHADER_FRAGMENT, 0, 0, 4, color->ui, NULL); OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2); OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */ diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c index f3e1ccebccc..4462a82777f 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c @@ -43,19 +43,26 @@ #include "fd4_format.h" #include "fd4_zsa.h" +static const enum adreno_state_block sb[] = { + [SHADER_VERTEX] = SB_VERT_SHADER, + [SHADER_FRAGMENT] = SB_FRAG_SHADER, +}; + /* regid: base const register * prsc or dwords: buffer containing constant values * sizedwords: size of const value buffer */ void -fd4_emit_constant(struct fd_ringbuffer *ring, - enum adreno_state_block sb, +fd4_emit_const(struct fd_ringbuffer *ring, enum shader_t type, uint32_t regid, uint32_t offset, uint32_t sizedwords, const uint32_t *dwords, struct pipe_resource *prsc) { uint32_t i, sz; enum adreno_state_src src; + debug_assert((regid % 4) == 0); + debug_assert((sizedwords % 4) == 0); + if (prsc) { sz = 0; src = 0x2; // TODO ?? @@ -67,7 +74,7 @@ fd4_emit_constant(struct fd_ringbuffer *ring, OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz); OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/4) | CP_LOAD_STATE_0_STATE_SRC(src) | - CP_LOAD_STATE_0_STATE_BLOCK(sb) | + CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) | CP_LOAD_STATE_0_NUM_UNIT(sizedwords/4)); if (prsc) { struct fd_bo *bo = fd_resource(prsc)->bo; @@ -84,89 +91,31 @@ fd4_emit_constant(struct fd_ringbuffer *ring, } static void -emit_constants(struct fd_ringbuffer *ring, - enum adreno_state_block sb, - struct fd_constbuf_stateobj *constbuf, - struct ir3_shader_variant *shader, - bool emit_immediates) +fd4_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write, + uint32_t regid, uint32_t num, struct fd_bo **bos, uint32_t *offsets) { - uint32_t enabled_mask = constbuf->enabled_mask; - uint32_t max_const; - int i; - - // XXX TODO only emit dirty consts.. but we need to keep track if - // they are clobbered by a clear, gmem2mem, or mem2gmem.. - constbuf->dirty_mask = enabled_mask; - - /* in particular, with binning shader we may end up with unused - * consts, ie. we could end up w/ constlen that is smaller - * than first_immediate. In that case truncate the user consts - * early to avoid HLSQ lockup caused by writing too many consts - */ - max_const = MIN2(shader->first_driver_param, shader->constlen); - - /* emit user constants: */ - if (enabled_mask & 1) { - const unsigned index = 0; - struct pipe_constant_buffer *cb = &constbuf->cb[index]; - unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */ - - // I expect that size should be a multiple of vec4's: - assert(size == align(size, 4)); - - /* and even if the start of the const buffer is before - * first_immediate, the end may not be: - */ - size = MIN2(size, 4 * max_const); - - if (size && (constbuf->dirty_mask & (1 << index))) { - fd4_emit_constant(ring, sb, 0, - cb->buffer_offset, size, - cb->user_buffer, cb->buffer); - constbuf->dirty_mask &= ~(1 << index); - } + uint32_t i; - enabled_mask &= ~(1 << index); - } - - /* emit ubos: */ - if (shader->constlen > shader->first_driver_param) { - uint32_t params = MIN2(4, shader->constlen - shader->first_driver_param); - OUT_PKT3(ring, CP_LOAD_STATE, 2 + params * 4); - OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(shader->first_driver_param) | - CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | - CP_LOAD_STATE_0_STATE_BLOCK(sb) | - CP_LOAD_STATE_0_NUM_UNIT(params)); - OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | - CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS)); - - for (i = 1; i <= params * 4; i++) { - struct pipe_constant_buffer *cb = &constbuf->cb[i]; - assert(!cb->user_buffer); - if ((enabled_mask & (1 << i)) && cb->buffer) - OUT_RELOC(ring, fd_resource(cb->buffer)->bo, cb->buffer_offset, 0, 0); - else - OUT_RING(ring, 0xbad00000 | ((i - 1) << 16)); - } - } + debug_assert((regid % 4) == 0); + debug_assert((num % 4) == 0); - /* emit shader immediates: */ - if (shader && emit_immediates) { - int size = shader->immediates_count; - uint32_t base = shader->first_immediate; - - /* truncate size to avoid writing constants that shader - * does not use: - */ - size = MIN2(size + base, shader->constlen) - base; - - /* convert out of vec4: */ - base *= 4; - size *= 4; - - if (size > 0) { - fd4_emit_constant(ring, sb, base, - 0, size, shader->immediates[0].val, NULL); + OUT_PKT3(ring, CP_LOAD_STATE, 2 + num); + OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/4) | + CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | + CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) | + CP_LOAD_STATE_0_NUM_UNIT(num/4)); + OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | + CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS)); + + for (i = 0; i < num; i++) { + if (bos[i]) { + if (write) { + OUT_RELOCW(ring, bos[i], offsets[i], 0, 0); + } else { + OUT_RELOC(ring, bos[i], offsets[i], 0, 0); + } + } else { + OUT_RING(ring, 0xbad00000 | (i << 16)); } } } @@ -520,33 +469,12 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, if (dirty & FD_DIRTY_PROG) fd4_program_emit(ring, emit); - if ((dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) && - /* evil hack to deal sanely with clear path: */ - (emit->prog == &ctx->prog)) { - fd_wfi(ctx, ring); - emit_constants(ring, SB_VERT_SHADER, - &ctx->constbuf[PIPE_SHADER_VERTEX], - vp, emit->prog->dirty & FD_SHADER_DIRTY_VP); - if (!emit->key.binning_pass) { - emit_constants(ring, SB_FRAG_SHADER, - &ctx->constbuf[PIPE_SHADER_FRAGMENT], - fp, emit->prog->dirty & FD_SHADER_DIRTY_FP); - } - } - - /* emit driver params every time */ - if (emit->info && emit->prog == &ctx->prog) { - uint32_t vertex_params[4] = { - emit->info->indexed ? emit->info->index_bias : emit->info->start, - 0, - 0, - 0 - }; - if (vp->constlen >= vp->first_driver_param + 4) { - fd4_emit_constant(ring, SB_VERT_SHADER, - (vp->first_driver_param + 4) * 4, - 0, 4, vertex_params, NULL); - } + if (emit->prog == &ctx->prog) { /* evil hack to deal sanely with clear path */ + ir3_emit_consts(vp, ring, emit->info, dirty); + if (!emit->key.binning_pass) + ir3_emit_consts(fp, ring, emit->info, dirty); + /* mark clean after emitting consts: */ + ctx->prog.dirty = 0; } if ((dirty & FD_DIRTY_BLEND) && ctx->blend) { @@ -767,3 +695,11 @@ fd4_emit_restore(struct fd_context *ctx) ctx->needs_rb_fbd = true; } + +void +fd4_emit_init(struct pipe_context *pctx) +{ + struct fd_context *ctx = fd_context(pctx); + ctx->emit_const = fd4_emit_const; + ctx->emit_const_bo = fd4_emit_const_bo; +} diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.h b/src/gallium/drivers/freedreno/a4xx/fd4_emit.h index 7d059f8e532..7debee59471 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.h +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.h @@ -37,10 +37,8 @@ #include "ir3_shader.h" struct fd_ringbuffer; -enum adreno_state_block; -void fd4_emit_constant(struct fd_ringbuffer *ring, - enum adreno_state_block sb, +void fd4_emit_const(struct fd_ringbuffer *ring, enum shader_t type, uint32_t regid, uint32_t offset, uint32_t sizedwords, const uint32_t *dwords, struct pipe_resource *prsc); @@ -96,4 +94,6 @@ void fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, void fd4_emit_restore(struct fd_context *ctx); +void fd4_emit_init(struct pipe_context *pctx); + #endif /* FD4_EMIT_H */ diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h index c2d98345349..bc5267aa96e 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.h +++ b/src/gallium/drivers/freedreno/freedreno_context.h @@ -351,9 +351,16 @@ struct fd_context { void (*emit_sysmem_prep)(struct fd_context *ctx); /* draw: */ - void (*draw_vbo)(struct fd_context *pctx, const struct pipe_draw_info *info); + void (*draw_vbo)(struct fd_context *ctx, const struct pipe_draw_info *info); void (*clear)(struct fd_context *ctx, unsigned buffers, const union pipe_color_union *color, double depth, unsigned stencil); + + /* constant emit: (note currently not used/needed for a2xx) */ + void (*emit_const)(struct fd_ringbuffer *ring, enum shader_t type, + uint32_t regid, uint32_t offset, uint32_t sizedwords, + const uint32_t *dwords, struct pipe_resource *prsc); + void (*emit_const_bo)(struct fd_ringbuffer *ring, enum shader_t type, boolean write, + uint32_t regid, uint32_t num, struct fd_bo **bos, uint32_t *offsets); }; static inline struct fd_context * diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h index 1b78763c58e..6aec2585ceb 100644 --- a/src/gallium/drivers/freedreno/freedreno_util.h +++ b/src/gallium/drivers/freedreno/freedreno_util.h @@ -40,6 +40,7 @@ #include "util/u_dynarray.h" #include "util/u_pack_color.h" +#include "disasm.h" #include "adreno_common.xml.h" #include "adreno_pm4.xml.h" diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c index d4027729a22..75425e91378 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c @@ -412,3 +412,151 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin) debug_printf("\n"); } + +/* This has to reach into the fd_context a bit more than the rest of + * ir3, but it needs to be aligned with the compiler, so both agree + * on which const regs hold what. And the logic is identical between + * a3xx/a4xx, the only difference is small details in the actual + * CP_LOAD_STATE packets (which is handled inside the generation + * specific ctx->emit_const(_bo)() fxns) + */ + +#include "freedreno_resource.h" + +static void +emit_user_consts(struct ir3_shader_variant *v, struct fd_ringbuffer *ring, + struct fd_constbuf_stateobj *constbuf) +{ + struct fd_context *ctx = fd_context(v->shader->pctx); + const unsigned index = 0; /* user consts are index 0 */ + /* TODO save/restore dirty_mask for binning pass instead: */ + uint32_t dirty_mask = constbuf->enabled_mask; + + if (dirty_mask & (1 << index)) { + struct pipe_constant_buffer *cb = &constbuf->cb[index]; + unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */ + + /* in particular, with binning shader we may end up with + * unused consts, ie. we could end up w/ constlen that is + * smaller than first_driver_param. In that case truncate + * the user consts early to avoid HLSQ lockup caused by + * writing too many consts + */ + uint32_t max_const = MIN2(v->first_driver_param, v->constlen); + + // I expect that size should be a multiple of vec4's: + assert(size == align(size, 4)); + + /* and even if the start of the const buffer is before + * first_immediate, the end may not be: + */ + size = MIN2(size, 4 * max_const); + + if (size > 0) { + fd_wfi(ctx, ring); + ctx->emit_const(ring, v->type, 0, + cb->buffer_offset, size, + cb->user_buffer, cb->buffer); + constbuf->dirty_mask &= ~(1 << index); + } + } +} + +static void +emit_ubos(struct ir3_shader_variant *v, struct fd_ringbuffer *ring, + struct fd_constbuf_stateobj *constbuf) +{ + if (v->constlen > v->first_driver_param) { + struct fd_context *ctx = fd_context(v->shader->pctx); + uint32_t offset = v->first_driver_param; /* UBOs after user consts */ + uint32_t params = MIN2(4, v->constlen - v->first_driver_param) * 4; + uint32_t offsets[params]; + struct fd_bo *bos[params]; + + for (uint32_t i = 0; i < params; i++) { + const uint32_t index = i + 1; /* UBOs start at index 1 */ + struct pipe_constant_buffer *cb = &constbuf->cb[index]; + assert(!cb->user_buffer); + + if ((constbuf->enabled_mask & (1 << index)) && cb->buffer) { + offsets[i] = cb->buffer_offset; + bos[i] = fd_resource(cb->buffer)->bo; + } else { + offsets[i] = 0; + bos[i] = NULL; + } + } + + fd_wfi(ctx, ring); + ctx->emit_const_bo(ring, v->type, false, offset * 4, params, bos, offsets); + } +} + +static void +emit_immediates(struct ir3_shader_variant *v, struct fd_ringbuffer *ring) +{ + struct fd_context *ctx = fd_context(v->shader->pctx); + int size = v->immediates_count; + uint32_t base = v->first_immediate; + + /* truncate size to avoid writing constants that shader + * does not use: + */ + size = MIN2(size + base, v->constlen) - base; + + /* convert out of vec4: */ + base *= 4; + size *= 4; + + if (size > 0) { + fd_wfi(ctx, ring); + ctx->emit_const(ring, v->type, base, + 0, size, v->immediates[0].val, NULL); + } +} + +void +ir3_emit_consts(struct ir3_shader_variant *v, struct fd_ringbuffer *ring, + const struct pipe_draw_info *info, uint32_t dirty) +{ + struct fd_context *ctx = fd_context(v->shader->pctx); + + if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) { + struct fd_constbuf_stateobj *constbuf; + bool shader_dirty; + + if (v->type == SHADER_VERTEX) { + constbuf = &ctx->constbuf[PIPE_SHADER_VERTEX]; + shader_dirty = !!(ctx->prog.dirty & FD_SHADER_DIRTY_VP); + } else if (v->type == SHADER_FRAGMENT) { + constbuf = &ctx->constbuf[PIPE_SHADER_FRAGMENT]; + shader_dirty = !!(ctx->prog.dirty & FD_SHADER_DIRTY_FP); + } else { + unreachable("bad shader type"); + return; + } + + emit_user_consts(v, ring, constbuf); + emit_ubos(v, ring, constbuf); + if (shader_dirty) + emit_immediates(v, ring); + } + + /* emit driver params every time: */ + /* TODO skip emit if shader doesn't use driver params to avoid WFI.. */ + if (info && (v->type == SHADER_VERTEX)) { + uint32_t offset = v->first_driver_param + 4; /* driver params after UBOs */ + if (v->constlen >= offset) { + uint32_t vertex_params[4] = { + info->indexed ? info->index_bias : info->start, + 0, + 0, + 0 + }; + + fd_wfi(ctx, ring); + ctx->emit_const(ring, SHADER_VERTEX, offset * 4, 0, + ARRAY_SIZE(vertex_params), vertex_params, NULL); + } + } +} diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h index 5365d5687f1..ef16d7b2f6e 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h @@ -224,6 +224,10 @@ struct ir3_shader_variant * ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key); void ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin); +struct fd_ringbuffer; +void ir3_emit_consts(struct ir3_shader_variant *v, struct fd_ringbuffer *ring, + const struct pipe_draw_info *info, uint32_t dirty); + static inline const char * ir3_shader_stage(struct ir3_shader *shader) {