From b327501dbf946279c8dff55566af73821d0d576e Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 21 Jan 2020 12:24:49 -0800 Subject: [PATCH] turnip: Add support for fine derivatives. This does appear to be the required instruction sequence (dsxpp_1 dst src; dsxpp_1.p dst src) as dropping either instruction fails the testsuite. Fixes dEQP-VK.glsl.derivate.* Reviewed-by: Jonathan Marek Reviewed-by: Rob Clark Tested-by: Marge Bot Part-of: --- src/freedreno/ir3/ir3.h | 2 ++ src/freedreno/ir3/ir3_compiler_nir.c | 8 ++++++++ src/freedreno/ir3/ir3_legalize.c | 7 +++++++ src/freedreno/ir3/ir3_shader.h | 2 ++ src/freedreno/vulkan/tu_pipeline.c | 7 ++++++- 5 files changed, 25 insertions(+), 1 deletion(-) diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index b5135fa4017..461ff633205 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -1437,7 +1437,9 @@ INSTR1(SQRT) /* cat5 instructions: */ INSTR1(DSX) +INSTR1(DSXPP_1) INSTR1(DSY) +INSTR1(DSYPP_1) INSTR1F(3D, DSX) INSTR1F(3D, DSY) INSTR1(RGETPOS) diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 602b3612165..51715025561 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -467,12 +467,20 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu) dst[0] = ir3_DSX(b, src[0], 0); dst[0]->cat5.type = TYPE_F32; break; + case nir_op_fddx_fine: + dst[0] = ir3_DSXPP_1(b, src[0], 0); + dst[0]->cat5.type = TYPE_F32; + break; case nir_op_fddy: case nir_op_fddy_coarse: dst[0] = ir3_DSY(b, src[0], 0); dst[0]->cat5.type = TYPE_F32; break; break; + case nir_op_fddy_fine: + dst[0] = ir3_DSYPP_1(b, src[0], 0); + dst[0]->cat5.type = TYPE_F32; + break; case nir_op_flt16: case nir_op_flt32: dst[0] = ir3_CMPS_F(b, src[0], 0, src[1], 0); diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c index 7894b75c0e8..db21507181c 100644 --- a/src/freedreno/ir3/ir3_legalize.c +++ b/src/freedreno/ir3/ir3_legalize.c @@ -246,6 +246,13 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) list_addtail(&n->node, &block->instr_list); } + if (n->opc == OPC_DSXPP_1 || n->opc == OPC_DSYPP_1) { + struct ir3_instruction *op_p = ir3_instr_clone(n); + op_p->flags = IR3_INSTR_P; + + ctx->so->need_fine_derivatives = true; + } + if (is_sfu(n)) regmask_set(&state->needs_ss, n->regs[0]); diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index f056a3e5cd6..e6765985676 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -564,6 +564,8 @@ struct ir3_shader_variant { /* do we need derivatives: */ bool need_pixlod; + bool need_fine_derivatives; + /* do we have kill, image write, etc (which prevents early-z): */ bool no_earlyz; diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c index 7593efb7bbd..c6624dd4932 100644 --- a/src/freedreno/vulkan/tu_pipeline.c +++ b/src/freedreno/vulkan/tu_pipeline.c @@ -378,6 +378,8 @@ tu6_emit_vs_config(struct tu_cs *cs, struct tu_shader *shader, A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(vs->branchstack); if (vs->need_pixlod) sp_vs_ctrl |= A6XX_SP_VS_CTRL_REG0_PIXLODENABLE; + if (vs->need_fine_derivatives) + sp_vs_ctrl |= A6XX_SP_VS_CTRL_REG0_DIFF_FINE; uint32_t sp_vs_config = A6XX_SP_VS_CONFIG_NTEX(shader->texture_map.num_desc) | A6XX_SP_VS_CONFIG_NSAMP(shader->sampler_map.num_desc); @@ -463,6 +465,8 @@ tu6_emit_fs_config(struct tu_cs *cs, struct tu_shader *shader, sp_fs_ctrl |= A6XX_SP_FS_CTRL_REG0_VARYING; if (fs->need_pixlod) sp_fs_ctrl |= A6XX_SP_FS_CTRL_REG0_PIXLODENABLE; + if (fs->need_fine_derivatives) + sp_fs_ctrl |= A6XX_SP_FS_CTRL_REG0_DIFF_FINE; uint32_t sp_fs_config = A6XX_SP_FS_CONFIG_NTEX(shader->texture_map.num_desc) | A6XX_SP_FS_CONFIG_NSAMP(shader->sampler_map.num_desc) | @@ -515,7 +519,8 @@ tu6_emit_cs_config(struct tu_cs *cs, const struct tu_shader *shader, A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(v->info.max_reg + 1) | A6XX_SP_CS_CTRL_REG0_MERGEDREGS | A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(v->branchstack) | - COND(v->need_pixlod, A6XX_SP_CS_CTRL_REG0_PIXLODENABLE)); + COND(v->need_pixlod, A6XX_SP_CS_CTRL_REG0_PIXLODENABLE) | + COND(v->need_fine_derivatives, A6XX_SP_CS_CTRL_REG0_DIFF_FINE)); tu_cs_emit_pkt4(cs, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1); tu_cs_emit(cs, 0x41); -- 2.30.2