freedreno/ir3: move emit_const to ir3
authorRob Clark <robclark@freedesktop.org>
Fri, 24 Jul 2015 17:07:33 +0000 (13:07 -0400)
committerRob Clark <robclark@freedesktop.org>
Mon, 27 Jul 2015 17:51:05 +0000 (13:51 -0400)
Details of the cmdstream packets are different between a3xx and a4xx,
but the logic about the layout of const registers is the same, as that
is dictated by the ir3 shader compiler.  So rather than duplicating
logic that is tightly coupled to ir3 between a3xx and a4xx, move this
into ir3 and use per-generation callbacks for to build the cmdstream
packets.

This should make it easier to pass additional const regs (such as for
transform feedback).  And it also keeps the layout internal to ir3 in
case we want to make the layout more dynamic some day.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
12 files changed:
src/gallium/drivers/freedreno/a3xx/fd3_context.c
src/gallium/drivers/freedreno/a3xx/fd3_draw.c
src/gallium/drivers/freedreno/a3xx/fd3_emit.c
src/gallium/drivers/freedreno/a3xx/fd3_emit.h
src/gallium/drivers/freedreno/a4xx/fd4_context.c
src/gallium/drivers/freedreno/a4xx/fd4_draw.c
src/gallium/drivers/freedreno/a4xx/fd4_emit.c
src/gallium/drivers/freedreno/a4xx/fd4_emit.h
src/gallium/drivers/freedreno/freedreno_context.h
src/gallium/drivers/freedreno/freedreno_util.h
src/gallium/drivers/freedreno/ir3/ir3_shader.c
src/gallium/drivers/freedreno/ir3/ir3_shader.h

index 8441898382b16a9c9336ae124a961637a13cc96b..dc33783e3980918c3b3115d7c71c06cedf5c3311 100644 (file)
@@ -121,6 +121,7 @@ fd3_context_create(struct pipe_screen *pscreen, void *priv)
        fd3_gmem_init(pctx);
        fd3_texture_init(pctx);
        fd3_prog_init(pctx);
+       fd3_emit_init(pctx);
 
        pctx = fd_context_init(&fd3_ctx->base, pscreen, primtypes, priv);
        if (!pctx)
index 070ed43a279d6c06ac8c9cd2d94d6ac3c3dc5c69..fc30d4842ba1ad94c8429aa77ff34da3411811ba 100644 (file)
@@ -345,7 +345,7 @@ fd3_clear(struct fd_context *ctx, unsigned buffers,
 
        fd3_emit_vertex_bufs(ring, &emit);
 
-       fd3_emit_constant(ring, SB_FRAG_SHADER, 0, 0, 4, color->ui, NULL);
+       fd3_emit_const(ring, SHADER_FRAGMENT, 0, 0, 4, color->ui, NULL);
 
        OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
        OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) |
index 07cc2266d088da3f2e23d4348c4cbef4f3ae9623..9032366b7481e6d2865d4b716cf1c680a0eb7cae 100644 (file)
 #include "fd3_format.h"
 #include "fd3_zsa.h"
 
+static const enum adreno_state_block sb[] = {
+       [SHADER_VERTEX]   = SB_VERT_SHADER,
+       [SHADER_FRAGMENT] = SB_FRAG_SHADER,
+};
+
 /* regid:          base const register
  * prsc or dwords: buffer containing constant values
  * sizedwords:     size of const value buffer
  */
 void
-fd3_emit_constant(struct fd_ringbuffer *ring,
-               enum adreno_state_block sb,
+fd3_emit_const(struct fd_ringbuffer *ring, enum shader_t type,
                uint32_t regid, uint32_t offset, uint32_t sizedwords,
                const uint32_t *dwords, struct pipe_resource *prsc)
 {
        uint32_t i, sz;
        enum adreno_state_src src;
 
+       debug_assert((regid % 4) == 0);
+       debug_assert((sizedwords % 4) == 0);
+
        if (prsc) {
                sz = 0;
                src = SS_INDIRECT;
@@ -67,7 +74,7 @@ fd3_emit_constant(struct fd_ringbuffer *ring,
        OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
        OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/2) |
                        CP_LOAD_STATE_0_STATE_SRC(src) |
-                       CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+                       CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) |
                        CP_LOAD_STATE_0_NUM_UNIT(sizedwords/2));
        if (prsc) {
                struct fd_bo *bo = fd_resource(prsc)->bo;
@@ -84,89 +91,31 @@ fd3_emit_constant(struct fd_ringbuffer *ring,
 }
 
 static void
-emit_constants(struct fd_ringbuffer *ring,
-               enum adreno_state_block sb,
-               struct fd_constbuf_stateobj *constbuf,
-               struct ir3_shader_variant *shader,
-               bool emit_immediates)
+fd3_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write,
+               uint32_t regid, uint32_t num, struct fd_bo **bos, uint32_t *offsets)
 {
-       uint32_t enabled_mask = constbuf->enabled_mask;
-       uint32_t max_const;
-       int i;
-
-       // XXX TODO only emit dirty consts.. but we need to keep track if
-       // they are clobbered by a clear, gmem2mem, or mem2gmem..
-       constbuf->dirty_mask = enabled_mask;
-
-       /* in particular, with binning shader we may end up with unused
-        * consts, ie. we could end up w/ constlen that is smaller
-        * than first_immediate.  In that case truncate the user consts
-        * early to avoid HLSQ lockup caused by writing too many consts
-        */
-       max_const = MIN2(shader->first_driver_param, shader->constlen);
-
-       /* emit user constants: */
-       if (enabled_mask & 1) {
-               const unsigned index = 0;
-               struct pipe_constant_buffer *cb = &constbuf->cb[index];
-               unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */
-
-               // I expect that size should be a multiple of vec4's:
-               assert(size == align(size, 4));
-
-               /* and even if the start of the const buffer is before
-                * first_immediate, the end may not be:
-                */
-               size = MIN2(size, 4 * max_const);
-
-               if (size && constbuf->dirty_mask & (1 << index)) {
-                       fd3_emit_constant(ring, sb, 0,
-                                                         cb->buffer_offset, size,
-                                                         cb->user_buffer, cb->buffer);
-                       constbuf->dirty_mask &= ~(1 << index);
-               }
-
-               enabled_mask &= ~(1 << index);
-       }
-
-       if (shader->constlen > shader->first_driver_param) {
-               uint32_t params = MIN2(4, shader->constlen - shader->first_driver_param);
-               /* emit ubos: */
-               OUT_PKT3(ring, CP_LOAD_STATE, 2 + params * 4);
-               OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(shader->first_driver_param * 2) |
-                                CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
-                                CP_LOAD_STATE_0_STATE_BLOCK(sb) |
-                                CP_LOAD_STATE_0_NUM_UNIT(params * 2));
-               OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
-                                CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
-
-               for (i = 1; i <= params * 4; i++) {
-                       struct pipe_constant_buffer *cb = &constbuf->cb[i];
-                       assert(!cb->user_buffer);
-                       if ((enabled_mask & (1 << i)) && cb->buffer)
-                               OUT_RELOC(ring, fd_resource(cb->buffer)->bo, cb->buffer_offset, 0, 0);
-                       else
-                               OUT_RING(ring, 0xbad00000 | ((i - 1) << 16));
-               }
-       }
+       uint32_t i;
 
-       /* emit shader immediates: */
-       if (shader && emit_immediates) {
-               int size = shader->immediates_count;
-               uint32_t base = shader->first_immediate;
+       debug_assert((regid % 4) == 0);
+       debug_assert((num % 4) == 0);
 
-               /* truncate size to avoid writing constants that shader
-                * does not use:
-                */
-               size = MIN2(size + base, shader->constlen) - base;
-
-               /* convert out of vec4: */
-               base *= 4;
-               size *= 4;
-
-               if (size > 0) {
-                       fd3_emit_constant(ring, sb, base,
-                               0, size, shader->immediates[0].val, NULL);
+       OUT_PKT3(ring, CP_LOAD_STATE, 2 + num);
+       OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/2) |
+                       CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+                       CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) |
+                       CP_LOAD_STATE_0_NUM_UNIT(num/2));
+       OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
+                       CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
+
+       for (i = 0; i < num; i++) {
+               if (bos[i]) {
+                       if (write) {
+                               OUT_RELOCW(ring, bos[i], offsets[i], 0, 0);
+                       } else {
+                               OUT_RELOC(ring, bos[i], offsets[i], 0, 0);
+                       }
+               } else {
+                       OUT_RING(ring, 0xbad00000 | (i << 16));
                }
        }
 }
@@ -669,33 +618,12 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
        OUT_PKT3(ring, CP_EVENT_WRITE, 1);
        OUT_RING(ring, HLSQ_FLUSH);
 
-       if ((dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) &&
-                       /* evil hack to deal sanely with clear path: */
-                       (emit->prog == &ctx->prog)) {
-               fd_wfi(ctx, ring);
-               emit_constants(ring,  SB_VERT_SHADER,
-                               &ctx->constbuf[PIPE_SHADER_VERTEX],
-                               vp, emit->prog->dirty & FD_SHADER_DIRTY_VP);
-               if (!emit->key.binning_pass) {
-                       emit_constants(ring, SB_FRAG_SHADER,
-                                       &ctx->constbuf[PIPE_SHADER_FRAGMENT],
-                                       fp, emit->prog->dirty & FD_SHADER_DIRTY_FP);
-               }
-       }
-
-       /* emit driver params every time */
-       if (emit->info && emit->prog == &ctx->prog) {
-               uint32_t vertex_params[4] = {
-                       emit->info->indexed ? emit->info->index_bias : emit->info->start,
-                       0,
-                       0,
-                       0
-               };
-               if (vp->constlen >= vp->first_driver_param + 4) {
-                       fd3_emit_constant(ring, SB_VERT_SHADER,
-                                                         (vp->first_driver_param + 4) * 4,
-                                                         0, 4, vertex_params, NULL);
-               }
+       if (emit->prog == &ctx->prog) { /* evil hack to deal sanely with clear path */
+               ir3_emit_consts(vp, ring, emit->info, dirty);
+               if (!emit->key.binning_pass)
+                       ir3_emit_consts(fp, ring, emit->info, dirty);
+               /* mark clean after emitting consts: */
+               ctx->prog.dirty = 0;
        }
 
        if ((dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) && ctx->blend) {
@@ -930,3 +858,11 @@ fd3_emit_restore(struct fd_context *ctx)
 
        ctx->needs_rb_fbd = true;
 }
+
+void
+fd3_emit_init(struct pipe_context *pctx)
+{
+       struct fd_context *ctx = fd_context(pctx);
+       ctx->emit_const = fd3_emit_const;
+       ctx->emit_const_bo = fd3_emit_const_bo;
+}
index 8f21919c9a7693c548946bbb3c66428c25963383..795654706a75da64046758591b59bde4c22a8918 100644 (file)
 #include "ir3_shader.h"
 
 struct fd_ringbuffer;
-enum adreno_state_block;
 
-void fd3_emit_constant(struct fd_ringbuffer *ring,
-               enum adreno_state_block sb,
+void fd3_emit_const(struct fd_ringbuffer *ring, enum shader_t type,
                uint32_t regid, uint32_t offset, uint32_t sizedwords,
                const uint32_t *dwords, struct pipe_resource *prsc);
 
@@ -90,4 +88,6 @@ void fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 
 void fd3_emit_restore(struct fd_context *ctx);
 
+void fd3_emit_init(struct pipe_context *pctx);
+
 #endif /* FD3_EMIT_H */
index 6e109b6205a0e06f2724f3d87192dd8816fe0890..e172d3505179c0ab4cbb4ad5b90b5908bf599c36 100644 (file)
@@ -119,6 +119,7 @@ fd4_context_create(struct pipe_screen *pscreen, void *priv)
        fd4_gmem_init(pctx);
        fd4_texture_init(pctx);
        fd4_prog_init(pctx);
+       fd4_emit_init(pctx);
 
        pctx = fd_context_init(&fd4_ctx->base, pscreen, primtypes, priv);
        if (!pctx)
index d070f5fd6b7c8ff50c9ffa5c6c582f5305e98c20..ff1dfdc392fb27cbf4aabfe7b4d1f4c09abd9679 100644 (file)
@@ -295,7 +295,7 @@ fd4_clear(struct fd_context *ctx, unsigned buffers,
        OUT_RING(ring, colr);         /* RB_CLEAR_COLOR_DW3 */
 
        /* until fastclear works: */
-       fd4_emit_constant(ring, SB_FRAG_SHADER, 0, 0, 4, color->ui, NULL);
+       fd4_emit_const(ring, SHADER_FRAGMENT, 0, 0, 4, color->ui, NULL);
 
        OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
        OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
index f3e1ccebccc7218479dcd2088434ff153497749e..4462a82777fd4c5c1b46ea405b5c42c4c5a44a18 100644 (file)
 #include "fd4_format.h"
 #include "fd4_zsa.h"
 
+static const enum adreno_state_block sb[] = {
+       [SHADER_VERTEX]   = SB_VERT_SHADER,
+       [SHADER_FRAGMENT] = SB_FRAG_SHADER,
+};
+
 /* regid:          base const register
  * prsc or dwords: buffer containing constant values
  * sizedwords:     size of const value buffer
  */
 void
-fd4_emit_constant(struct fd_ringbuffer *ring,
-               enum adreno_state_block sb,
+fd4_emit_const(struct fd_ringbuffer *ring, enum shader_t type,
                uint32_t regid, uint32_t offset, uint32_t sizedwords,
                const uint32_t *dwords, struct pipe_resource *prsc)
 {
        uint32_t i, sz;
        enum adreno_state_src src;
 
+       debug_assert((regid % 4) == 0);
+       debug_assert((sizedwords % 4) == 0);
+
        if (prsc) {
                sz = 0;
                src = 0x2;  // TODO ??
@@ -67,7 +74,7 @@ fd4_emit_constant(struct fd_ringbuffer *ring,
        OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
        OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/4) |
                        CP_LOAD_STATE_0_STATE_SRC(src) |
-                       CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+                       CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) |
                        CP_LOAD_STATE_0_NUM_UNIT(sizedwords/4));
        if (prsc) {
                struct fd_bo *bo = fd_resource(prsc)->bo;
@@ -84,89 +91,31 @@ fd4_emit_constant(struct fd_ringbuffer *ring,
 }
 
 static void
-emit_constants(struct fd_ringbuffer *ring,
-               enum adreno_state_block sb,
-               struct fd_constbuf_stateobj *constbuf,
-               struct ir3_shader_variant *shader,
-               bool emit_immediates)
+fd4_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write,
+               uint32_t regid, uint32_t num, struct fd_bo **bos, uint32_t *offsets)
 {
-       uint32_t enabled_mask = constbuf->enabled_mask;
-       uint32_t max_const;
-       int i;
-
-       // XXX TODO only emit dirty consts.. but we need to keep track if
-       // they are clobbered by a clear, gmem2mem, or mem2gmem..
-       constbuf->dirty_mask = enabled_mask;
-
-       /* in particular, with binning shader we may end up with unused
-        * consts, ie. we could end up w/ constlen that is smaller
-        * than first_immediate.  In that case truncate the user consts
-        * early to avoid HLSQ lockup caused by writing too many consts
-        */
-       max_const = MIN2(shader->first_driver_param, shader->constlen);
-
-       /* emit user constants: */
-       if (enabled_mask & 1) {
-               const unsigned index = 0;
-               struct pipe_constant_buffer *cb = &constbuf->cb[index];
-               unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */
-
-               // I expect that size should be a multiple of vec4's:
-               assert(size == align(size, 4));
-
-               /* and even if the start of the const buffer is before
-                * first_immediate, the end may not be:
-                */
-               size = MIN2(size, 4 * max_const);
-
-               if (size && (constbuf->dirty_mask & (1 << index))) {
-                       fd4_emit_constant(ring, sb, 0,
-                                       cb->buffer_offset, size,
-                                       cb->user_buffer, cb->buffer);
-                       constbuf->dirty_mask &= ~(1 << index);
-               }
+       uint32_t i;
 
-               enabled_mask &= ~(1 << index);
-       }
-
-       /* emit ubos: */
-       if (shader->constlen > shader->first_driver_param) {
-               uint32_t params = MIN2(4, shader->constlen - shader->first_driver_param);
-               OUT_PKT3(ring, CP_LOAD_STATE, 2 + params * 4);
-               OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(shader->first_driver_param) |
-                               CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
-                               CP_LOAD_STATE_0_STATE_BLOCK(sb) |
-                               CP_LOAD_STATE_0_NUM_UNIT(params));
-               OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
-                               CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
-
-               for (i = 1; i <= params * 4; i++) {
-                       struct pipe_constant_buffer *cb = &constbuf->cb[i];
-                       assert(!cb->user_buffer);
-                       if ((enabled_mask & (1 << i)) && cb->buffer)
-                               OUT_RELOC(ring, fd_resource(cb->buffer)->bo, cb->buffer_offset, 0, 0);
-                       else
-                               OUT_RING(ring, 0xbad00000 | ((i - 1) << 16));
-               }
-       }
+       debug_assert((regid % 4) == 0);
+       debug_assert((num % 4) == 0);
 
-       /* emit shader immediates: */
-       if (shader && emit_immediates) {
-               int size = shader->immediates_count;
-               uint32_t base = shader->first_immediate;
-
-               /* truncate size to avoid writing constants that shader
-                * does not use:
-                */
-               size = MIN2(size + base, shader->constlen) - base;
-
-               /* convert out of vec4: */
-               base *= 4;
-               size *= 4;
-
-               if (size > 0) {
-                       fd4_emit_constant(ring, sb, base,
-                               0, size, shader->immediates[0].val, NULL);
+       OUT_PKT3(ring, CP_LOAD_STATE, 2 + num);
+       OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/4) |
+                       CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+                       CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) |
+                       CP_LOAD_STATE_0_NUM_UNIT(num/4));
+       OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
+                       CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
+
+       for (i = 0; i < num; i++) {
+               if (bos[i]) {
+                       if (write) {
+                               OUT_RELOCW(ring, bos[i], offsets[i], 0, 0);
+                       } else {
+                               OUT_RELOC(ring, bos[i], offsets[i], 0, 0);
+                       }
+               } else {
+                       OUT_RING(ring, 0xbad00000 | (i << 16));
                }
        }
 }
@@ -520,33 +469,12 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
        if (dirty & FD_DIRTY_PROG)
                fd4_program_emit(ring, emit);
 
-       if ((dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) &&
-                       /* evil hack to deal sanely with clear path: */
-                       (emit->prog == &ctx->prog)) {
-               fd_wfi(ctx, ring);
-               emit_constants(ring,  SB_VERT_SHADER,
-                               &ctx->constbuf[PIPE_SHADER_VERTEX],
-                               vp, emit->prog->dirty & FD_SHADER_DIRTY_VP);
-               if (!emit->key.binning_pass) {
-                       emit_constants(ring, SB_FRAG_SHADER,
-                                       &ctx->constbuf[PIPE_SHADER_FRAGMENT],
-                                       fp, emit->prog->dirty & FD_SHADER_DIRTY_FP);
-               }
-       }
-
-       /* emit driver params every time */
-       if (emit->info && emit->prog == &ctx->prog) {
-               uint32_t vertex_params[4] = {
-                       emit->info->indexed ? emit->info->index_bias : emit->info->start,
-                       0,
-                       0,
-                       0
-               };
-               if (vp->constlen >= vp->first_driver_param + 4) {
-                       fd4_emit_constant(ring, SB_VERT_SHADER,
-                                                         (vp->first_driver_param + 4) * 4,
-                                                         0, 4, vertex_params, NULL);
-               }
+       if (emit->prog == &ctx->prog) { /* evil hack to deal sanely with clear path */
+               ir3_emit_consts(vp, ring, emit->info, dirty);
+               if (!emit->key.binning_pass)
+                       ir3_emit_consts(fp, ring, emit->info, dirty);
+               /* mark clean after emitting consts: */
+               ctx->prog.dirty = 0;
        }
 
        if ((dirty & FD_DIRTY_BLEND) && ctx->blend) {
@@ -767,3 +695,11 @@ fd4_emit_restore(struct fd_context *ctx)
 
        ctx->needs_rb_fbd = true;
 }
+
+void
+fd4_emit_init(struct pipe_context *pctx)
+{
+       struct fd_context *ctx = fd_context(pctx);
+       ctx->emit_const = fd4_emit_const;
+       ctx->emit_const_bo = fd4_emit_const_bo;
+}
index 7d059f8e5322b520d6263fa4bddb5542b3350017..7debee594712e99384f77311c4f8ade5d6f5c541 100644 (file)
 #include "ir3_shader.h"
 
 struct fd_ringbuffer;
-enum adreno_state_block;
 
-void fd4_emit_constant(struct fd_ringbuffer *ring,
-               enum adreno_state_block sb,
+void fd4_emit_const(struct fd_ringbuffer *ring, enum shader_t type,
                uint32_t regid, uint32_t offset, uint32_t sizedwords,
                const uint32_t *dwords, struct pipe_resource *prsc);
 
@@ -96,4 +94,6 @@ void fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 
 void fd4_emit_restore(struct fd_context *ctx);
 
+void fd4_emit_init(struct pipe_context *pctx);
+
 #endif /* FD4_EMIT_H */
index c2d98345349f2e5df752db40b2fb3a511b55d72d..bc5267aa96e5fb77d2f895d5ea098eddb8095dc4 100644 (file)
@@ -351,9 +351,16 @@ struct fd_context {
        void (*emit_sysmem_prep)(struct fd_context *ctx);
 
        /* draw: */
-       void (*draw_vbo)(struct fd_context *pctx, const struct pipe_draw_info *info);
+       void (*draw_vbo)(struct fd_context *ctx, const struct pipe_draw_info *info);
        void (*clear)(struct fd_context *ctx, unsigned buffers,
                        const union pipe_color_union *color, double depth, unsigned stencil);
+
+       /* constant emit:  (note currently not used/needed for a2xx) */
+       void (*emit_const)(struct fd_ringbuffer *ring, enum shader_t type,
+                       uint32_t regid, uint32_t offset, uint32_t sizedwords,
+                       const uint32_t *dwords, struct pipe_resource *prsc);
+       void (*emit_const_bo)(struct fd_ringbuffer *ring, enum shader_t type, boolean write,
+                       uint32_t regid, uint32_t num, struct fd_bo **bos, uint32_t *offsets);
 };
 
 static inline struct fd_context *
index 1b78763c58e274f2a34f75d70cfda341589301fc..6aec2585ceba20defc03df406f2db3af82f2144d 100644 (file)
@@ -40,6 +40,7 @@
 #include "util/u_dynarray.h"
 #include "util/u_pack_color.h"
 
+#include "disasm.h"
 #include "adreno_common.xml.h"
 #include "adreno_pm4.xml.h"
 
index d4027729a22e461ee31714e92974d8ab3a0a069c..75425e91378c6b2755997bdaa3376f0723eecb53 100644 (file)
@@ -412,3 +412,151 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin)
 
        debug_printf("\n");
 }
+
+/* This has to reach into the fd_context a bit more than the rest of
+ * ir3, but it needs to be aligned with the compiler, so both agree
+ * on which const regs hold what.  And the logic is identical between
+ * a3xx/a4xx, the only difference is small details in the actual
+ * CP_LOAD_STATE packets (which is handled inside the generation
+ * specific ctx->emit_const(_bo)() fxns)
+ */
+
+#include "freedreno_resource.h"
+
+static void
+emit_user_consts(struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
+               struct fd_constbuf_stateobj *constbuf)
+{
+       struct fd_context *ctx = fd_context(v->shader->pctx);
+       const unsigned index = 0;     /* user consts are index 0 */
+       /* TODO save/restore dirty_mask for binning pass instead: */
+       uint32_t dirty_mask = constbuf->enabled_mask;
+
+       if (dirty_mask & (1 << index)) {
+               struct pipe_constant_buffer *cb = &constbuf->cb[index];
+               unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */
+
+               /* in particular, with binning shader we may end up with
+                * unused consts, ie. we could end up w/ constlen that is
+                * smaller than first_driver_param.  In that case truncate
+                * the user consts early to avoid HLSQ lockup caused by
+                * writing too many consts
+                */
+               uint32_t max_const = MIN2(v->first_driver_param, v->constlen);
+
+               // I expect that size should be a multiple of vec4's:
+               assert(size == align(size, 4));
+
+               /* and even if the start of the const buffer is before
+                * first_immediate, the end may not be:
+                */
+               size = MIN2(size, 4 * max_const);
+
+               if (size > 0) {
+                       fd_wfi(ctx, ring);
+                       ctx->emit_const(ring, v->type, 0,
+                                       cb->buffer_offset, size,
+                                       cb->user_buffer, cb->buffer);
+                       constbuf->dirty_mask &= ~(1 << index);
+               }
+       }
+}
+
+static void
+emit_ubos(struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
+               struct fd_constbuf_stateobj *constbuf)
+{
+       if (v->constlen > v->first_driver_param) {
+               struct fd_context *ctx = fd_context(v->shader->pctx);
+               uint32_t offset = v->first_driver_param;  /* UBOs after user consts */
+               uint32_t params = MIN2(4, v->constlen - v->first_driver_param) * 4;
+               uint32_t offsets[params];
+               struct fd_bo *bos[params];
+
+               for (uint32_t i = 0; i < params; i++) {
+                       const uint32_t index = i + 1;   /* UBOs start at index 1 */
+                       struct pipe_constant_buffer *cb = &constbuf->cb[index];
+                       assert(!cb->user_buffer);
+
+                       if ((constbuf->enabled_mask & (1 << index)) && cb->buffer) {
+                               offsets[i] = cb->buffer_offset;
+                               bos[i] = fd_resource(cb->buffer)->bo;
+                       } else {
+                               offsets[i] = 0;
+                               bos[i] = NULL;
+                       }
+               }
+
+               fd_wfi(ctx, ring);
+               ctx->emit_const_bo(ring, v->type, false, offset * 4, params, bos, offsets);
+       }
+}
+
+static void
+emit_immediates(struct ir3_shader_variant *v, struct fd_ringbuffer *ring)
+{
+       struct fd_context *ctx = fd_context(v->shader->pctx);
+       int size = v->immediates_count;
+       uint32_t base = v->first_immediate;
+
+       /* truncate size to avoid writing constants that shader
+        * does not use:
+        */
+       size = MIN2(size + base, v->constlen) - base;
+
+       /* convert out of vec4: */
+       base *= 4;
+       size *= 4;
+
+       if (size > 0) {
+               fd_wfi(ctx, ring);
+               ctx->emit_const(ring, v->type, base,
+                       0, size, v->immediates[0].val, NULL);
+       }
+}
+
+void
+ir3_emit_consts(struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
+               const struct pipe_draw_info *info, uint32_t dirty)
+{
+       struct fd_context *ctx = fd_context(v->shader->pctx);
+
+       if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) {
+               struct fd_constbuf_stateobj *constbuf;
+               bool shader_dirty;
+
+               if (v->type == SHADER_VERTEX) {
+                       constbuf = &ctx->constbuf[PIPE_SHADER_VERTEX];
+                       shader_dirty = !!(ctx->prog.dirty & FD_SHADER_DIRTY_VP);
+               } else if (v->type == SHADER_FRAGMENT) {
+                       constbuf = &ctx->constbuf[PIPE_SHADER_FRAGMENT];
+                       shader_dirty = !!(ctx->prog.dirty & FD_SHADER_DIRTY_FP);
+               } else {
+                       unreachable("bad shader type");
+                       return;
+               }
+
+               emit_user_consts(v, ring, constbuf);
+               emit_ubos(v, ring, constbuf);
+               if (shader_dirty)
+                       emit_immediates(v, ring);
+       }
+
+       /* emit driver params every time: */
+       /* TODO skip emit if shader doesn't use driver params to avoid WFI.. */
+       if (info && (v->type == SHADER_VERTEX)) {
+               uint32_t offset = v->first_driver_param + 4;  /* driver params after UBOs */
+               if (v->constlen >= offset) {
+                       uint32_t vertex_params[4] = {
+                               info->indexed ? info->index_bias : info->start,
+                               0,
+                               0,
+                               0
+                       };
+
+                       fd_wfi(ctx, ring);
+                       ctx->emit_const(ring, SHADER_VERTEX, offset * 4, 0,
+                                       ARRAY_SIZE(vertex_params), vertex_params, NULL);
+               }
+       }
+}
index 5365d5687f19d7aa30d9398bef38e25a151b633c..ef16d7b2f6e66618c44dba241b219c34ae253d10 100644 (file)
@@ -224,6 +224,10 @@ struct ir3_shader_variant * ir3_shader_variant(struct ir3_shader *shader,
                struct ir3_shader_key key);
 void ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin);
 
+struct fd_ringbuffer;
+void ir3_emit_consts(struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
+               const struct pipe_draw_info *info, uint32_t dirty);
+
 static inline const char *
 ir3_shader_stage(struct ir3_shader *shader)
 {