freedreno/a6xx: pre-calculate userconst stateobj size
authorRob Clark <robdclark@chromium.org>
Wed, 11 Sep 2019 17:02:37 +0000 (10:02 -0700)
committerRob Clark <robdclark@chromium.org>
Fri, 13 Sep 2019 01:07:20 +0000 (18:07 -0700)
The AnTuTu "garden" benchmark overflows the fixed size constbuffer
stateobject, so lets be more clever and calculate (a potentially
slightly pessimistic) actual size.

Signed-off-by: Rob Clark <robdclark@chromium.org>
src/freedreno/ir3/ir3_shader.h
src/gallium/drivers/freedreno/a6xx/fd6_emit.c
src/gallium/drivers/freedreno/a6xx/fd6_program.c
src/gallium/drivers/freedreno/ir3/ir3_gallium.c
src/gallium/drivers/freedreno/ir3/ir3_gallium.h

index f6896c3526ba7efbea615bb0e3e3089e6cb31970..b2d08fe95f3b8a0d2fc5e220a7eec8bcb8e3306e 100644 (file)
@@ -527,6 +527,7 @@ struct ir3_ubo_analysis_state
        struct ir3_ubo_range range[IR3_MAX_CONSTANT_BUFFERS];
        uint32_t size;
        uint32_t lower_count;
+       uint32_t cmdstream_size; /* for per-gen backend to stash required cmdstream size */
 };
 
 
index 4319b43978f8a8e04756c5defabc77acc4baa338..3e8d41c4b24bee7da5f33c4650b2da0065aadf8b 100644 (file)
@@ -928,7 +928,8 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
 
        if (ctx->dirty_shader[PIPE_SHADER_VERTEX] & DIRTY_CONST) {
                struct fd_ringbuffer *vsconstobj = fd_submit_new_ringbuffer(
-                               ctx->batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
+                               ctx->batch->submit, vp->shader->ubo_state.cmdstream_size,
+                               FD_RINGBUFFER_STREAMING);
 
                ir3_emit_user_consts(ctx->screen, vp, vsconstobj,
                                &ctx->constbuf[PIPE_SHADER_VERTEX]);
@@ -940,7 +941,8 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
 
        if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & DIRTY_CONST) {
                struct fd_ringbuffer *fsconstobj = fd_submit_new_ringbuffer(
-                               ctx->batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
+                               ctx->batch->submit, fp->shader->ubo_state.cmdstream_size,
+                               FD_RINGBUFFER_STREAMING);
 
                ir3_emit_user_consts(ctx->screen, fp, fsconstobj,
                                &ctx->constbuf[PIPE_SHADER_FRAGMENT]);
index a2acaa7b5c28344c7128a036b1a638d506970efe..8931d40a8e2e4cabe7c4678bda8bc93e9d674cd2 100644 (file)
@@ -45,7 +45,21 @@ create_shader_stateobj(struct pipe_context *pctx, const struct pipe_shader_state
 {
        struct fd_context *ctx = fd_context(pctx);
        struct ir3_compiler *compiler = ctx->screen->compiler;
-       return ir3_shader_create(compiler, cso, type, &ctx->debug, pctx->screen);
+       struct ir3_shader *shader =
+               ir3_shader_create(compiler, cso, type, &ctx->debug, pctx->screen);
+       unsigned packets, size;
+
+       /* pre-calculate size required for userconst stateobj: */
+       ir3_user_consts_size(&shader->ubo_state, &packets, &size);
+
+       /* also account for UBO addresses: */
+       packets += 1;
+       size += 2 * shader->const_state.num_ubos;
+
+       unsigned sizedwords = (4 * packets) + size;
+       shader->ubo_state.cmdstream_size = sizedwords * 4;
+
+       return shader;
 }
 
 static void *
index b72e6c754a6ff7ce7321fccf60ab7eda8ef0b153..7456a82ed7b2e07f0b9ec8d2748d1cd8b194520e 100644 (file)
@@ -211,6 +211,29 @@ emit_const(struct fd_screen *screen, struct fd_ringbuffer *ring,
                        offset, size, user_buffer, buffer);
 }
 
+/**
+ * Indirectly calculates size of cmdstream needed for ir3_emit_user_consts().
+ * Returns number of packets, and total size of all the payload.
+ *
+ * The value can be a worst-case, ie. some shader variants may not read all
+ * consts, etc.
+ *
+ * Returns size in dwords.
+ */
+void
+ir3_user_consts_size(struct ir3_ubo_analysis_state *state,
+               unsigned *packets, unsigned *size)
+{
+       *packets = *size = 0;
+
+       for (uint32_t i = 0; i < ARRAY_SIZE(state->range); i++) {
+               if (state->range[i].start < state->range[i].end) {
+                       *size += state->range[i].end - state->range[i].start;
+                       (*packets)++;
+               }
+       }
+}
+
 void
 ir3_emit_user_consts(struct fd_screen *screen, const struct ir3_shader_variant *v,
                struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf)
index cc8ae4599c951a4246ba0e6cfc3b97bd1fdac7f7..b0cf7ed9805441da066fcf5341bb42cefd3d9c64 100644 (file)
@@ -51,6 +51,8 @@ struct fd_constbuf_stateobj;
 struct fd_shaderbuf_stateobj;
 struct fd_shaderimg_stateobj;
 
+void ir3_user_consts_size(struct ir3_ubo_analysis_state *state,
+               unsigned *packets, unsigned *size);
 void ir3_emit_user_consts(struct fd_screen *screen, const struct ir3_shader_variant *v,
                struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf);
 void ir3_emit_ubos(struct fd_screen *screen, const struct ir3_shader_variant *v,