freedreno/a4xx: sysvals and UBOs
authorRob Clark <robclark@freedesktop.org>
Sat, 11 Apr 2015 16:15:17 +0000 (12:15 -0400)
committerRob Clark <robclark@freedesktop.org>
Fri, 17 Apr 2015 14:40:18 +0000 (10:40 -0400)
Basically just sync up the cmdstream emit parts to match the changes
already done on a3xx.

Also, fix scheduling for mem instructions.  This is needed on a4xx, and
I am a bit surprised it isn't needed for a3xx.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
src/gallium/drivers/freedreno/a4xx/fd4_emit.c
src/gallium/drivers/freedreno/freedreno_screen.c
src/gallium/drivers/freedreno/ir3/ir3_depth.c

index bf51847706e82f8f05cf4c9f1cf0e7d5b44366a8..c315a47bb16adc9f0a10046af494470f43fa86a4 100644 (file)
@@ -87,11 +87,12 @@ static void
 emit_constants(struct fd_ringbuffer *ring,
                enum adreno_state_block sb,
                struct fd_constbuf_stateobj *constbuf,
-               struct ir3_shader_variant *shader)
+               struct ir3_shader_variant *shader,
+               bool emit_immediates)
 {
        uint32_t enabled_mask = constbuf->enabled_mask;
-       uint32_t first_immediate;
-       uint32_t base = 0;
+       uint32_t max_const;
+       int i;
 
        // XXX TODO only emit dirty consts.. but we need to keep track if
        // they are clobbered by a clear, gmem2mem, or mem2gmem..
@@ -102,42 +103,57 @@ emit_constants(struct fd_ringbuffer *ring,
         * than first_immediate.  In that case truncate the user consts
         * early to avoid HLSQ lockup caused by writing too many consts
         */
-       first_immediate = MIN2(shader->first_immediate, shader->constlen);
+       max_const = MIN2(shader->first_driver_param, shader->constlen);
 
        /* emit user constants: */
-       while (enabled_mask) {
-               unsigned index = ffs(enabled_mask) - 1;
+       if (enabled_mask & 1) {
+               const unsigned index = 0;
                struct pipe_constant_buffer *cb = &constbuf->cb[index];
                unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */
 
                // I expect that size should be a multiple of vec4's:
                assert(size == align(size, 4));
 
-               /* gallium could leave const buffers bound above what the
-                * current shader uses.. don't let that confuse us.
+               /* and even if the start of the const buffer is before
+                * first_immediate, the end may not be:
                 */
-               if (base >= (4 * first_immediate))
-                       break;
-
-               if (constbuf->dirty_mask & (1 << index)) {
-                       /* and even if the start of the const buffer is before
-                        * first_immediate, the end may not be:
-                        */
-                       size = MIN2(size, (4 * first_immediate) - base);
-                       fd4_emit_constant(ring, sb, base,
+               size = MIN2(size, 4 * max_const);
+
+               if (size && (constbuf->dirty_mask & (1 << index))) {
+                       fd4_emit_constant(ring, sb, 0,
                                        cb->buffer_offset, size,
                                        cb->user_buffer, cb->buffer);
                        constbuf->dirty_mask &= ~(1 << index);
                }
 
-               base += size;
                enabled_mask &= ~(1 << index);
        }
 
+       /* emit ubos: */
+       if (shader->constlen > shader->first_driver_param) {
+               uint32_t params = MIN2(4, shader->constlen - shader->first_driver_param);
+               OUT_PKT3(ring, CP_LOAD_STATE, 2 + params * 4);
+               OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(shader->first_driver_param) |
+                               CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+                               CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+                               CP_LOAD_STATE_0_NUM_UNIT(params));
+               OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
+                               CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
+
+               for (i = 1; i <= params * 4; i++) {
+                       struct pipe_constant_buffer *cb = &constbuf->cb[i];
+                       assert(!cb->user_buffer);
+                       if ((enabled_mask & (1 << i)) && cb->buffer)
+                               OUT_RELOC(ring, fd_resource(cb->buffer)->bo, cb->buffer_offset, 0, 0);
+                       else
+                               OUT_RING(ring, 0xbad00000 | ((i - 1) << 16));
+               }
+       }
+
        /* emit shader immediates: */
-       if (shader) {
+       if (shader && emit_immediates) {
                int size = shader->immediates_count;
-               base = shader->first_immediate;
+               uint32_t base = shader->first_immediate;
 
                /* truncate size to avoid writing constants that shader
                 * does not use:
@@ -499,11 +515,26 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
                fd_wfi(ctx, ring);
                emit_constants(ring,  SB_VERT_SHADER,
                                &ctx->constbuf[PIPE_SHADER_VERTEX],
-                               (emit->prog->dirty & FD_SHADER_DIRTY_VP) ? vp : NULL);
+                               vp, emit->prog->dirty & FD_SHADER_DIRTY_VP);
                if (!emit->key.binning_pass) {
                        emit_constants(ring, SB_FRAG_SHADER,
                                        &ctx->constbuf[PIPE_SHADER_FRAGMENT],
-                                       (emit->prog->dirty & FD_SHADER_DIRTY_FP) ? fp : NULL);
+                                       fp, emit->prog->dirty & FD_SHADER_DIRTY_FP);
+               }
+       }
+
+       /* emit driver params every time */
+       if (emit->info && emit->prog == &ctx->prog) {
+               uint32_t vertex_params[4] = {
+                       emit->info->indexed ? emit->info->index_bias : emit->info->start,
+                       0,
+                       0,
+                       0
+               };
+               if (vp->constlen >= vp->first_driver_param + 4) {
+                       fd4_emit_constant(ring, SB_VERT_SHADER,
+                                                         (vp->first_driver_param + 4) * 4,
+                                                         0, 4, vertex_params, NULL);
                }
        }
 
index 1b89387678d1105465f731d45bd719545c3d53de..fda60eda6fd420eaa58cac5667c0ae969989194d 100644 (file)
@@ -363,7 +363,7 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
                 */
                return ((is_a3xx(screen) || is_a4xx(screen)) ? 4096 : 64) * sizeof(float[4]);
        case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
-               return is_a3xx(screen) ? 16 : 1;
+               return (is_a3xx(screen) || is_a4xx(screen)) ? 16 : 1;
        case PIPE_SHADER_CAP_MAX_PREDS:
                return 0; /* nothing uses this */
        case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
index 9e1f45dabaf3462c65af03a224bbdccb345bacb2..b899c66b37e36fa04cca70e2b101b2d7babec158 100644 (file)
@@ -71,7 +71,8 @@ int ir3_delayslots(struct ir3_instruction *assigner,
                return 0;
 
        /* assigner must be alu: */
-       if (is_flow(consumer) || is_sfu(consumer) || is_tex(consumer)) {
+       if (is_flow(consumer) || is_sfu(consumer) || is_tex(consumer) ||
+                       is_mem(consumer)) {
                return 6;
        } else if ((consumer->category == 3) &&
                        (is_mad(consumer->opc) || is_madsh(consumer->opc)) &&