freedreno/ir3: track max flow control depth for a5xx/a6xx
authorRob Clark <robdclark@gmail.com>
Wed, 5 Dec 2018 15:51:16 +0000 (10:51 -0500)
committerRob Clark <robdclark@gmail.com>
Fri, 7 Dec 2018 18:49:21 +0000 (13:49 -0500)
Rather than just hard-coding BRANCHSTACK size.

Signed-off-by: Rob Clark <robdclark@gmail.com>
src/freedreno/ir3/ir3_compiler_nir.c
src/freedreno/ir3/ir3_context.h
src/freedreno/ir3/ir3_shader.h
src/gallium/drivers/freedreno/a5xx/fd5_program.c
src/gallium/drivers/freedreno/a6xx/fd6_program.c

index 6b33c1f89811a6f5192c5334822de23110a204ba..f8155747c529150b898f6956a4d0f29c86ecf2f1 100644 (file)
@@ -2340,6 +2340,20 @@ emit_loop(struct ir3_context *ctx, nir_loop *nloop)
        emit_cf_list(ctx, &nloop->body);
 }
 
+static void
+stack_push(struct ir3_context *ctx)
+{
+       ctx->stack++;
+       ctx->max_stack = MAX2(ctx->max_stack, ctx->stack);
+}
+
+static void
+stack_pop(struct ir3_context *ctx)
+{
+       compile_assert(ctx, ctx->stack > 0);
+       ctx->stack--;
+}
+
 static void
 emit_cf_list(struct ir3_context *ctx, struct exec_list *list)
 {
@@ -2349,10 +2363,14 @@ emit_cf_list(struct ir3_context *ctx, struct exec_list *list)
                        emit_block(ctx, nir_cf_node_as_block(node));
                        break;
                case nir_cf_node_if:
+                       stack_push(ctx);
                        emit_if(ctx, nir_cf_node_as_if(node));
+                       stack_pop(ctx);
                        break;
                case nir_cf_node_loop:
+                       stack_push(ctx);
                        emit_loop(ctx, nir_cf_node_as_loop(node));
+                       stack_pop(ctx);
                        break;
                case nir_cf_node_function:
                        ir3_context_error(ctx, "TODO\n");
@@ -2479,9 +2497,13 @@ emit_function(struct ir3_context *ctx, nir_function_impl *impl)
 {
        nir_metadata_require(impl, nir_metadata_block_index);
 
+       compile_assert(ctx, ctx->stack == 0);
+
        emit_cf_list(ctx, &impl->body);
        emit_block(ctx, impl->end_block);
 
+       compile_assert(ctx, ctx->stack == 0);
+
        /* at this point, we should have a single empty block,
         * into which we emit the 'end' instruction.
         */
@@ -3079,6 +3101,8 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
                ir3_print(ir);
        }
 
+       so->branchstack = ctx->max_stack;
+
        /* Note that actual_in counts inputs that are not bary.f'd for FS: */
        if (so->type == MESA_SHADER_VERTEX)
                so->total_in = actual_in;
index 63c5d8baaf95cc46677069c025b4d75cf4e83464..99f43cb5ab66766b2fc1860a37ffb087270cfaae 100644 (file)
@@ -86,6 +86,11 @@ struct ir3_context {
 
        unsigned num_arrays;
 
+       /* Tracking for max level of flowcontrol (branchstack) needed
+        * by a5xx+:
+        */
+       unsigned stack, max_stack;
+
        /* a common pattern for indirect addressing is to request the
         * same address register multiple times.  To avoid generating
         * duplicate instruction sequences (which our backend does not
index bc47160d6eadabca3ae5326f1b79dc761ee45153..418c77ae8b08ba2fa452db5936d30dd223aebe06 100644 (file)
@@ -295,6 +295,10 @@ struct ir3_shader_variant {
        struct ir3_info info;
        struct ir3 *ir;
 
+       /* Levels of nesting of flow control:
+        */
+       unsigned branchstack;
+
        /* the instructions length is in units of instruction groups
         * (4 instructions for a3xx, 16 instructions for a4xx.. each
         * instruction is 2 dwords):
index 97a84b01c0a4c049f6a3fe31b3f83a6a2925770a..9c54244457ff8c60805275d6e9ad532b8a57b5d4 100644 (file)
@@ -443,7 +443,7 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
        OUT_RING(ring, A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(s[VS].i->max_half_reg + 1) |
                        A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) |
                        0x6 | /* XXX seems to be always set? */
-                       A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(0x3) |  // XXX need to figure this out somehow..
+                       A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(s[VS].v->branchstack) |
                        COND(s[VS].v->num_samp > 0, A5XX_SP_VS_CTRL_REG0_PIXLODENABLE));
 
        struct ir3_shader_linkage l = {0};
@@ -567,7 +567,7 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
                        A5XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) |
                        A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(s[FS].i->max_half_reg + 1) |
                        A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) |
-                       A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(0x3) |  // XXX need to figure this out somehow..
+                       A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(s[FS].v->branchstack) |
                        COND(s[FS].v->num_samp > 0, A5XX_SP_FS_CTRL_REG0_PIXLODENABLE));
 
        OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1);
index 71dadef97e25fa67f721516325953da1fe0be475..add2d28b8662329f840da69afd51f9b9ac5f3d14 100644 (file)
@@ -402,7 +402,7 @@ setup_stateobj(struct fd_ringbuffer *ring,
        OUT_RING(ring, A6XX_SP_VS_CTRL_REG0_THREADSIZE(fssz) |
                        A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) |
                        A6XX_SP_VS_CTRL_REG0_MERGEDREGS |
-                       A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(0x3) |  // XXX need to figure this out somehow..
+                       A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(s[VS].v->branchstack) |
                        COND(s[VS].v->num_samp > 0, A6XX_SP_VS_CTRL_REG0_PIXLODENABLE));
 
        struct ir3_shader_linkage l = {0};
@@ -524,7 +524,7 @@ setup_stateobj(struct fd_ringbuffer *ring,
                        0x1000000 |
                        A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) |
                        A6XX_SP_FS_CTRL_REG0_MERGEDREGS |
-                       A6XX_SP_FS_CTRL_REG0_BRANCHSTACK(0x3) |  // XXX need to figure this out somehow..
+                       A6XX_SP_FS_CTRL_REG0_BRANCHSTACK(s[FS].v->branchstack) |
                        COND(s[FS].v->num_samp > 0, A6XX_SP_FS_CTRL_REG0_PIXLODENABLE));
 
        OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_A982, 1);