turnip: Add support for fine derivatives.
authorEric Anholt <eric@anholt.net>
Tue, 21 Jan 2020 20:24:49 +0000 (12:24 -0800)
committerMarge Bot <eric+marge@anholt.net>
Thu, 23 Jan 2020 17:38:29 +0000 (17:38 +0000)
This does appear to be the required instruction sequence (dsxpp_1 dst src;
dsxpp_1.p dst src) as dropping either instruction fails the testsuite.

Fixes dEQP-VK.glsl.derivate.*

Reviewed-by: Jonathan Marek <jonathan@marek.ca>
Reviewed-by: Rob Clark <robdclark@chromium.org>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3494>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3494>

src/freedreno/ir3/ir3.h
src/freedreno/ir3/ir3_compiler_nir.c
src/freedreno/ir3/ir3_legalize.c
src/freedreno/ir3/ir3_shader.h
src/freedreno/vulkan/tu_pipeline.c

index b5135fa40175c725659124c65fda6aa028b16849..461ff6332056ae78afcc50bd4b9928485de78429 100644 (file)
@@ -1437,7 +1437,9 @@ INSTR1(SQRT)
 
 /* cat5 instructions: */
 INSTR1(DSX)
+INSTR1(DSXPP_1)
 INSTR1(DSY)
+INSTR1(DSYPP_1)
 INSTR1F(3D, DSX)
 INSTR1F(3D, DSY)
 INSTR1(RGETPOS)
index 602b36121655bf6c77c74b42ac267df1c6104533..517150255615325e714780d3652e184f65548faa 100644 (file)
@@ -467,12 +467,20 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu)
                dst[0] = ir3_DSX(b, src[0], 0);
                dst[0]->cat5.type = TYPE_F32;
                break;
+       case nir_op_fddx_fine:
+               dst[0] = ir3_DSXPP_1(b, src[0], 0);
+               dst[0]->cat5.type = TYPE_F32;
+               break;
        case nir_op_fddy:
        case nir_op_fddy_coarse:
                dst[0] = ir3_DSY(b, src[0], 0);
                dst[0]->cat5.type = TYPE_F32;
                break;
                break;
+       case nir_op_fddy_fine:
+               dst[0] = ir3_DSYPP_1(b, src[0], 0);
+               dst[0]->cat5.type = TYPE_F32;
+               break;
        case nir_op_flt16:
        case nir_op_flt32:
                dst[0] = ir3_CMPS_F(b, src[0], 0, src[1], 0);
index 7894b75c0e862b14ca2d5e143b741b6f263047a5..db21507181c2f9c9aa0f52a5ff98ebf89ee1abc2 100644 (file)
@@ -246,6 +246,13 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
                        list_addtail(&n->node, &block->instr_list);
                }
 
+               if (n->opc == OPC_DSXPP_1 || n->opc == OPC_DSYPP_1) {
+                       struct ir3_instruction *op_p = ir3_instr_clone(n);
+                       op_p->flags = IR3_INSTR_P;
+
+                       ctx->so->need_fine_derivatives = true;
+               }
+
                if (is_sfu(n))
                        regmask_set(&state->needs_ss, n->regs[0]);
 
index f056a3e5cd644a306744249c81fe3b1cb1b04909..e67659856761732b07bb47993972910d721ed325 100644 (file)
@@ -564,6 +564,8 @@ struct ir3_shader_variant {
        /* do we need derivatives: */
        bool need_pixlod;
 
+       bool need_fine_derivatives;
+
        /* do we have kill, image write, etc (which prevents early-z): */
        bool no_earlyz;
 
index 7593efb7bbd8faf611a322baaf470ee0064bdc5c..c6624dd493209ebaa4f068a3d94a154ce1a8c0d3 100644 (file)
@@ -378,6 +378,8 @@ tu6_emit_vs_config(struct tu_cs *cs, struct tu_shader *shader,
       A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(vs->branchstack);
    if (vs->need_pixlod)
       sp_vs_ctrl |= A6XX_SP_VS_CTRL_REG0_PIXLODENABLE;
+   if (vs->need_fine_derivatives)
+      sp_vs_ctrl |= A6XX_SP_VS_CTRL_REG0_DIFF_FINE;
 
    uint32_t sp_vs_config = A6XX_SP_VS_CONFIG_NTEX(shader->texture_map.num_desc) |
                            A6XX_SP_VS_CONFIG_NSAMP(shader->sampler_map.num_desc);
@@ -463,6 +465,8 @@ tu6_emit_fs_config(struct tu_cs *cs, struct tu_shader *shader,
       sp_fs_ctrl |= A6XX_SP_FS_CTRL_REG0_VARYING;
    if (fs->need_pixlod)
       sp_fs_ctrl |= A6XX_SP_FS_CTRL_REG0_PIXLODENABLE;
+   if (fs->need_fine_derivatives)
+      sp_fs_ctrl |= A6XX_SP_FS_CTRL_REG0_DIFF_FINE;
 
    uint32_t sp_fs_config = A6XX_SP_FS_CONFIG_NTEX(shader->texture_map.num_desc) |
                            A6XX_SP_FS_CONFIG_NSAMP(shader->sampler_map.num_desc) |
@@ -515,7 +519,8 @@ tu6_emit_cs_config(struct tu_cs *cs, const struct tu_shader *shader,
               A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(v->info.max_reg + 1) |
               A6XX_SP_CS_CTRL_REG0_MERGEDREGS |
               A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(v->branchstack) |
-              COND(v->need_pixlod, A6XX_SP_CS_CTRL_REG0_PIXLODENABLE));
+              COND(v->need_pixlod, A6XX_SP_CS_CTRL_REG0_PIXLODENABLE) |
+              COND(v->need_fine_derivatives, A6XX_SP_CS_CTRL_REG0_DIFF_FINE));
 
    tu_cs_emit_pkt4(cs, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1);
    tu_cs_emit(cs, 0x41);