From 8b1a3b5dde6405b4193eb0118e044a88b9b3accf Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 2 Oct 2018 16:04:39 -0400 Subject: [PATCH] freedreno/ir3: track # of samplers used by shader This is useful for a6xx to avoid program state from depending on bound tex/samp state. Signed-off-by: Rob Clark --- .../drivers/freedreno/a3xx/fd3_program.c | 2 +- .../drivers/freedreno/a4xx/fd4_program.c | 4 ++-- .../drivers/freedreno/a5xx/fd5_program.c | 4 ++-- .../drivers/freedreno/a6xx/fd6_program.c | 18 ++++++------------ src/gallium/drivers/freedreno/ir3/ir3.h | 2 +- .../drivers/freedreno/ir3/ir3_compiler_nir.c | 2 +- .../drivers/freedreno/ir3/ir3_legalize.c | 8 ++++---- src/gallium/drivers/freedreno/ir3/ir3_shader.h | 4 ++-- 8 files changed, 19 insertions(+), 25 deletions(-) diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c index ac2f76b823a..9d5c7b661fd 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c @@ -334,7 +334,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP | A3XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) | A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE | - COND(fp->has_samp > 0, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) | + COND(fp->num_samp > 0, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) | A3XX_SP_FS_CTRL_REG0_LENGTH(fpbuffersz)); OUT_RING(ring, A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(fp->constlen) | A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(fp->total_in) | diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/src/gallium/drivers/freedreno/a4xx/fd4_program.c index dcdee3b90db..860f615e31d 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_program.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.c @@ -318,7 +318,7 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, A4XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(0) | A4XX_SP_VS_CTRL_REG0_THREADSIZE(TWO_QUADS) | A4XX_SP_VS_CTRL_REG0_SUPERTHREADMODE | - COND(s[VS].v->has_samp, A4XX_SP_VS_CTRL_REG0_PIXLODENABLE)); + COND(s[VS].v->num_samp > 0, A4XX_SP_VS_CTRL_REG0_PIXLODENABLE)); OUT_RING(ring, A4XX_SP_VS_CTRL_REG1_CONSTLENGTH(s[VS].constlen) | A4XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(s[VS].v->total_in)); OUT_RING(ring, A4XX_SP_VS_PARAM_REG_POSREGID(pos_regid) | @@ -393,7 +393,7 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) | A4XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) | A4XX_SP_FS_CTRL_REG0_SUPERTHREADMODE | - COND(s[FS].v->has_samp, A4XX_SP_FS_CTRL_REG0_PIXLODENABLE)); + COND(s[FS].v->num_samp > 0, A4XX_SP_FS_CTRL_REG0_PIXLODENABLE)); OUT_RING(ring, A4XX_SP_FS_CTRL_REG1_CONSTLENGTH(s[FS].constlen) | 0x80000000 | /* XXX */ COND(s[FS].v->frag_face, A4XX_SP_FS_CTRL_REG1_FACENESS) | diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_program.c b/src/gallium/drivers/freedreno/a5xx/fd5_program.c index ba3339fb1d2..2a6e3334aed 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_program.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_program.c @@ -442,7 +442,7 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) | 0x6 | /* XXX seems to be always set? */ A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(0x3) | // XXX need to figure this out somehow.. - COND(s[VS].v->has_samp, A5XX_SP_VS_CTRL_REG0_PIXLODENABLE)); + COND(s[VS].v->num_samp > 0, A5XX_SP_VS_CTRL_REG0_PIXLODENABLE)); struct ir3_shader_linkage l = {0}; ir3_link_shaders(&l, s[VS].v, s[FS].v); @@ -566,7 +566,7 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(s[FS].i->max_half_reg + 1) | A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) | A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(0x3) | // XXX need to figure this out somehow.. - COND(s[FS].v->has_samp, A5XX_SP_FS_CTRL_REG0_PIXLODENABLE)); + COND(s[FS].v->num_samp > 0, A5XX_SP_FS_CTRL_REG0_PIXLODENABLE)); OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1); OUT_RING(ring, 0x020fffff); /* XXX */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c index cbb561a3b8d..c6d062a3a9a 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c @@ -334,14 +334,10 @@ fd6_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, * emitted if frag-prog is dirty vs if vert-prog is dirty.. */ - OUT_PKT4(ring, REG_A6XX_SP_VS_TEX_COUNT, 1); - OUT_RING(ring, 0); - - struct fd_texture_stateobj *tex = &ctx->tex[PIPE_SHADER_VERTEX]; OUT_PKT4(ring, REG_A6XX_SP_VS_CONFIG, 2); OUT_RING(ring, COND(s[VS].v, A6XX_SP_VS_CONFIG_ENABLED) | - A6XX_SP_VS_CONFIG_NTEX(tex->num_textures) | - A6XX_SP_VS_CONFIG_NSAMP(tex->num_samplers)); /* SP_VS_CONFIG */ + A6XX_SP_VS_CONFIG_NTEX(s[VS].v->num_samp) | + A6XX_SP_VS_CONFIG_NSAMP(s[VS].v->num_samp)); /* SP_VS_CONFIG */ OUT_RING(ring, s[VS].instrlen); /* SP_VS_INSTRLEN */ OUT_PKT4(ring, REG_A6XX_SP_HS_UNKNOWN_A831, 1); @@ -371,12 +367,10 @@ fd6_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_AB00, 1); OUT_RING(ring, 0x5); - tex = &ctx->tex[PIPE_SHADER_FRAGMENT]; OUT_PKT4(ring, REG_A6XX_SP_FS_CONFIG, 2); OUT_RING(ring, COND(s[FS].v, A6XX_SP_FS_CONFIG_ENABLED) | - A6XX_SP_FS_CONFIG_NTEX(tex->num_textures) | - A6XX_SP_FS_CONFIG_NSAMP(tex->num_samplers)); - /* SP_FS_CONFIG */ + A6XX_SP_FS_CONFIG_NTEX(s[FS].v->num_samp) | + A6XX_SP_FS_CONFIG_NSAMP(s[FS].v->num_samp)); /* SP_FS_CONFIG */ OUT_RING(ring, s[FS].instrlen); /* SP_FS_INSTRLEN */ OUT_PKT4(ring, REG_A6XX_HLSQ_VS_CNTL, 4); @@ -393,7 +387,7 @@ fd6_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) | A6XX_SP_VS_CTRL_REG0_MERGEDREGS | A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(0x3) | // XXX need to figure this out somehow.. - COND(s[VS].v->has_samp, A6XX_SP_VS_CTRL_REG0_PIXLODENABLE)); + COND(s[VS].v->num_samp > 0, A6XX_SP_VS_CTRL_REG0_PIXLODENABLE)); struct ir3_shader_linkage l = {0}; ir3_link_shaders(&l, s[VS].v, s[FS].v); @@ -517,7 +511,7 @@ fd6_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) | A6XX_SP_FS_CTRL_REG0_MERGEDREGS | A6XX_SP_FS_CTRL_REG0_BRANCHSTACK(0x3) | // XXX need to figure this out somehow.. - COND(s[FS].v->has_samp, A6XX_SP_FS_CTRL_REG0_PIXLODENABLE)); + COND(s[FS].v->num_samp > 0, A6XX_SP_FS_CTRL_REG0_PIXLODENABLE)); OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_A982, 1); OUT_RING(ring, 0); /* XXX */ diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h index 63215cefc96..3055c10f1dd 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.h +++ b/src/gallium/drivers/freedreno/ir3/ir3.h @@ -1005,7 +1005,7 @@ int ir3_ra(struct ir3 *ir3, enum shader_t type, bool frag_coord, bool frag_face); /* legalize: */ -void ir3_legalize(struct ir3 *ir, bool *has_samp, bool *has_ssbo, int *max_bary); +void ir3_legalize(struct ir3 *ir, int *num_samp, bool *has_ssbo, int *max_bary); /* ************************************************************************* */ /* instruction helpers */ diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 14dcb3502e8..9f6a06cf333 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -3760,7 +3760,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, /* We need to do legalize after (for frag shader's) the "bary.f" * offsets (inloc) have been assigned. */ - ir3_legalize(ir, &so->has_samp, &so->has_ssbo, &max_bary); + ir3_legalize(ir, &so->num_samp, &so->has_ssbo, &max_bary); if (fd_mesa_debug & FD_DBG_OPTMSGS) { printf("AFTER LEGALIZE:\n"); diff --git a/src/gallium/drivers/freedreno/ir3/ir3_legalize.c b/src/gallium/drivers/freedreno/ir3/ir3_legalize.c index 8c012106bf8..18bdc6ceb1f 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_legalize.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_legalize.c @@ -40,7 +40,7 @@ */ struct ir3_legalize_ctx { - bool has_samp; + int num_samp; bool has_ssbo; int max_bary; }; @@ -214,7 +214,7 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) * the samp instruction(s) could get eliminated if the * result is not used. */ - ctx->has_samp = true; + ctx->num_samp = MAX2(ctx->num_samp, n->cat5.samp + 1); regmask_set(&state->needs_sy, n->regs[0]); } else if (n->opc == OPC_RESINFO) { regmask_set(&state->needs_ss, n->regs[0]); @@ -463,7 +463,7 @@ mark_convergence_points(struct ir3 *ir) } void -ir3_legalize(struct ir3 *ir, bool *has_samp, bool *has_ssbo, int *max_bary) +ir3_legalize(struct ir3 *ir, int *num_samp, bool *has_ssbo, int *max_bary) { struct ir3_legalize_ctx *ctx = rzalloc(ir, struct ir3_legalize_ctx); bool progress; @@ -483,7 +483,7 @@ ir3_legalize(struct ir3 *ir, bool *has_samp, bool *has_ssbo, int *max_bary) } } while (progress); - *has_samp = ctx->has_samp; + *num_samp = ctx->num_samp; *has_ssbo = ctx->has_ssbo; *max_bary = ctx->max_bary; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h index b237f8a08fe..6bc24f47d75 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h @@ -298,8 +298,8 @@ struct ir3_shader_variant { */ unsigned varying_in; - /* do we have one or more texture sample instructions: */ - bool has_samp; + /* number of samplers/textures (which are currently 1:1): */ + int num_samp; /* do we have one or more SSBO instructions: */ bool has_ssbo; -- 2.30.2