From 5c2c1f0a2d5cec771b6cbfadf43f44a632ff57fc Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 5 Dec 2018 10:51:16 -0500 Subject: [PATCH] freedreno/ir3: track max flow control depth for a5xx/a6xx Rather than just hard-coding BRANCHSTACK size. Signed-off-by: Rob Clark --- src/freedreno/ir3/ir3_compiler_nir.c | 24 +++++++++++++++++++ src/freedreno/ir3/ir3_context.h | 5 ++++ src/freedreno/ir3/ir3_shader.h | 4 ++++ .../drivers/freedreno/a5xx/fd5_program.c | 4 ++-- .../drivers/freedreno/a6xx/fd6_program.c | 4 ++-- 5 files changed, 37 insertions(+), 4 deletions(-) diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 6b33c1f8981..f8155747c52 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -2340,6 +2340,20 @@ emit_loop(struct ir3_context *ctx, nir_loop *nloop) emit_cf_list(ctx, &nloop->body); } +static void +stack_push(struct ir3_context *ctx) +{ + ctx->stack++; + ctx->max_stack = MAX2(ctx->max_stack, ctx->stack); +} + +static void +stack_pop(struct ir3_context *ctx) +{ + compile_assert(ctx, ctx->stack > 0); + ctx->stack--; +} + static void emit_cf_list(struct ir3_context *ctx, struct exec_list *list) { @@ -2349,10 +2363,14 @@ emit_cf_list(struct ir3_context *ctx, struct exec_list *list) emit_block(ctx, nir_cf_node_as_block(node)); break; case nir_cf_node_if: + stack_push(ctx); emit_if(ctx, nir_cf_node_as_if(node)); + stack_pop(ctx); break; case nir_cf_node_loop: + stack_push(ctx); emit_loop(ctx, nir_cf_node_as_loop(node)); + stack_pop(ctx); break; case nir_cf_node_function: ir3_context_error(ctx, "TODO\n"); @@ -2479,9 +2497,13 @@ emit_function(struct ir3_context *ctx, nir_function_impl *impl) { nir_metadata_require(impl, nir_metadata_block_index); + compile_assert(ctx, ctx->stack == 0); + emit_cf_list(ctx, &impl->body); emit_block(ctx, impl->end_block); + compile_assert(ctx, ctx->stack == 0); + /* at this point, we should have a single empty block, * into which we emit the 'end' instruction. */ @@ -3079,6 +3101,8 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, ir3_print(ir); } + so->branchstack = ctx->max_stack; + /* Note that actual_in counts inputs that are not bary.f'd for FS: */ if (so->type == MESA_SHADER_VERTEX) so->total_in = actual_in; diff --git a/src/freedreno/ir3/ir3_context.h b/src/freedreno/ir3/ir3_context.h index 63c5d8baaf9..99f43cb5ab6 100644 --- a/src/freedreno/ir3/ir3_context.h +++ b/src/freedreno/ir3/ir3_context.h @@ -86,6 +86,11 @@ struct ir3_context { unsigned num_arrays; + /* Tracking for max level of flowcontrol (branchstack) needed + * by a5xx+: + */ + unsigned stack, max_stack; + /* a common pattern for indirect addressing is to request the * same address register multiple times. To avoid generating * duplicate instruction sequences (which our backend does not diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index bc47160d6ea..418c77ae8b0 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -295,6 +295,10 @@ struct ir3_shader_variant { struct ir3_info info; struct ir3 *ir; + /* Levels of nesting of flow control: + */ + unsigned branchstack; + /* the instructions length is in units of instruction groups * (4 instructions for a3xx, 16 instructions for a4xx.. each * instruction is 2 dwords): diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_program.c b/src/gallium/drivers/freedreno/a5xx/fd5_program.c index 97a84b01c0a..9c54244457f 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_program.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_program.c @@ -443,7 +443,7 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(s[VS].i->max_half_reg + 1) | A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) | 0x6 | /* XXX seems to be always set? */ - A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(0x3) | // XXX need to figure this out somehow.. + A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(s[VS].v->branchstack) | COND(s[VS].v->num_samp > 0, A5XX_SP_VS_CTRL_REG0_PIXLODENABLE)); struct ir3_shader_linkage l = {0}; @@ -567,7 +567,7 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, A5XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) | A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(s[FS].i->max_half_reg + 1) | A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) | - A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(0x3) | // XXX need to figure this out somehow.. + A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(s[FS].v->branchstack) | COND(s[FS].v->num_samp > 0, A5XX_SP_FS_CTRL_REG0_PIXLODENABLE)); OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c index 71dadef97e2..add2d28b866 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c @@ -402,7 +402,7 @@ setup_stateobj(struct fd_ringbuffer *ring, OUT_RING(ring, A6XX_SP_VS_CTRL_REG0_THREADSIZE(fssz) | A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) | A6XX_SP_VS_CTRL_REG0_MERGEDREGS | - A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(0x3) | // XXX need to figure this out somehow.. + A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(s[VS].v->branchstack) | COND(s[VS].v->num_samp > 0, A6XX_SP_VS_CTRL_REG0_PIXLODENABLE)); struct ir3_shader_linkage l = {0}; @@ -524,7 +524,7 @@ setup_stateobj(struct fd_ringbuffer *ring, 0x1000000 | A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) | A6XX_SP_FS_CTRL_REG0_MERGEDREGS | - A6XX_SP_FS_CTRL_REG0_BRANCHSTACK(0x3) | // XXX need to figure this out somehow.. + A6XX_SP_FS_CTRL_REG0_BRANCHSTACK(s[FS].v->branchstack) | COND(s[FS].v->num_samp > 0, A6XX_SP_FS_CTRL_REG0_PIXLODENABLE)); OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_A982, 1); -- 2.30.2