From 3d7d5d220b5e5c2b48fa522b16ced5be0edad94f Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 30 Jun 2020 12:17:10 -0700 Subject: [PATCH] freedreno/ir3: Fix duplicated fine derivatives instructions. legalize_block() can get run multiple times, which I didn't notice when adding fine derivs support. Other instruction clones change things such that the legalization won't trigger again, but that didn't apply to the DS.PP legalization. To keep someone else from tripping over this, split the one-shot legalization out of the iterative sync flag application. Fixes failures in dEQP-VK.glsl.derivate.dfdxfine.* Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/3198 Part-of: --- .gitlab-ci/deqp-freedreno-a630-fails.txt | 1 - src/freedreno/ir3/disasm-a3xx.c | 3 ++ src/freedreno/ir3/instr-a3xx.h | 3 ++ src/freedreno/ir3/ir3.h | 4 +- src/freedreno/ir3/ir3_compiler_nir.c | 4 +- src/freedreno/ir3/ir3_legalize.c | 48 ++++++++++++++++++++---- 6 files changed, 51 insertions(+), 12 deletions(-) diff --git a/.gitlab-ci/deqp-freedreno-a630-fails.txt b/.gitlab-ci/deqp-freedreno-a630-fails.txt index daf0960d63b..274cfd6239a 100644 --- a/.gitlab-ci/deqp-freedreno-a630-fails.txt +++ b/.gitlab-ci/deqp-freedreno-a630-fails.txt @@ -5,7 +5,6 @@ dEQP-GLES31.functional.stencil_texturing.render.depth24_stencil8_clear dEQP-GLES31.functional.stencil_texturing.render.depth24_stencil8_draw dEQP-VK.binding_model.descriptorset_random.sets4.constant.ubolimitlow.sbolimithigh.imglimithigh.noiub.uab.frag.ialimitlow.0 dEQP-VK.draw.output_location.array.b8g8r8a8-unorm-mediump-output-vec3 -dEQP-VK.glsl.derivate.fwidthfine.uniform_loop.vec3_mediump dEQP-VK.glsl.linkage.varying.struct.mat3x2 dEQP-VK.graphicsfuzz.mat-array-deep-control-flow dEQP-VK.pipeline.spec_constant.graphics.geometry.composite.array.array_vec4 diff --git a/src/freedreno/ir3/disasm-a3xx.c b/src/freedreno/ir3/disasm-a3xx.c index de8387f3bca..6cd2789fcd1 100644 --- a/src/freedreno/ir3/disasm-a3xx.c +++ b/src/freedreno/ir3/disasm-a3xx.c @@ -1263,6 +1263,9 @@ static const struct opc_info { OPC(5, OPC_DSYPP_1, dsypp.1), OPC(5, OPC_RGETPOS, rgetpos), OPC(5, OPC_RGETINFO, rgetinfo), + /* macros are needed here for ir3_print */ + OPC(5, OPC_DSXPP_MACRO, dsxpp.macro), + OPC(5, OPC_DSYPP_MACRO, dsypp.macro), /* category 6: */ diff --git a/src/freedreno/ir3/instr-a3xx.h b/src/freedreno/ir3/instr-a3xx.h index 822d9dd7bc1..d2715080060 100644 --- a/src/freedreno/ir3/instr-a3xx.h +++ b/src/freedreno/ir3/instr-a3xx.h @@ -185,6 +185,9 @@ typedef enum { OPC_DSYPP_1 = _OPC(5, 25), OPC_RGETPOS = _OPC(5, 26), OPC_RGETINFO = _OPC(5, 27), + /* cat5 meta instructions, placed above the cat5 opc field's size */ + OPC_DSXPP_MACRO = _OPC(5, 32), + OPC_DSYPP_MACRO = _OPC(5, 33), /* category 6: */ OPC_LDG = _OPC(6, 0), /* load-global */ diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 634089c590b..a198e4cb927 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -1663,9 +1663,9 @@ INSTR1(SQRT) /* cat5 instructions: */ INSTR1(DSX) -INSTR1(DSXPP_1) +INSTR1(DSXPP_MACRO) INSTR1(DSY) -INSTR1(DSYPP_1) +INSTR1(DSYPP_MACRO) INSTR1F(3D, DSX) INSTR1F(3D, DSY) INSTR1(RGETPOS) diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index acfe4341ccd..10a51eb55e7 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -478,7 +478,7 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu) dst[0]->cat5.type = TYPE_F32; break; case nir_op_fddx_fine: - dst[0] = ir3_DSXPP_1(b, src[0], 0); + dst[0] = ir3_DSXPP_MACRO(b, src[0], 0); dst[0]->cat5.type = TYPE_F32; break; case nir_op_fddy: @@ -488,7 +488,7 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu) break; break; case nir_op_fddy_fine: - dst[0] = ir3_DSYPP_1(b, src[0], 0); + dst[0] = ir3_DSYPP_MACRO(b, src[0], 0); dst[0]->cat5.type = TYPE_F32; break; case nir_op_flt: diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c index c4d90c15ba9..69efdbabc3f 100644 --- a/src/freedreno/ir3/ir3_legalize.c +++ b/src/freedreno/ir3/ir3_legalize.c @@ -233,13 +233,6 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) list_addtail(&n->node, &block->instr_list); } - if (n->opc == OPC_DSXPP_1 || n->opc == OPC_DSYPP_1) { - struct ir3_instruction *op_p = ir3_instr_clone(n); - op_p->flags = IR3_INSTR_P; - - ctx->so->need_fine_derivatives = true; - } - if (is_sfu(n)) regmask_set(&state->needs_ss, n->regs[0]); @@ -360,6 +353,42 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) return true; } +/* Expands dsxpp and dsypp macros to: + * + * dsxpp.1 dst, src + * dsxpp.1.p dst, src + * + * We apply this after flags syncing, as we don't want to sync in between the + * two (which might happen if dst == src). We do it before nop scheduling + * because that needs to count actual instructions. + */ +static bool +apply_fine_deriv_macro(struct ir3_legalize_ctx *ctx, struct ir3_block *block) +{ + struct list_head instr_list; + + /* remove all the instructions from the list, we'll be adding + * them back in as we go + */ + list_replace(&block->instr_list, &instr_list); + list_inithead(&block->instr_list); + + foreach_instr_safe (n, &instr_list) { + list_addtail(&n->node, &block->instr_list); + + if (n->opc == OPC_DSXPP_MACRO || n->opc == OPC_DSYPP_MACRO) { + n->opc = (n->opc == OPC_DSXPP_MACRO) ? OPC_DSXPP_1 : OPC_DSYPP_1; + + struct ir3_instruction *op_p = ir3_instr_clone(n); + op_p->flags = IR3_INSTR_P; + + ctx->so->need_fine_derivatives = true; + } + } + + return true; +} + /* NOTE: branch instructions are always the last instruction(s) * in the block. We take advantage of this as we resolve the * branches, since "if (foo) break;" constructs turn into @@ -752,6 +781,11 @@ ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary) block_sched(ir); if (so->type == MESA_SHADER_FRAGMENT) kill_sched(ir, so); + + foreach_block (block, &ir->block_list) { + progress |= apply_fine_deriv_macro(ctx, block); + } + nop_sched(ir); do { -- 2.30.2