From 0cf6320eb5eca1ea20906624ad5a46ca386e0aa6 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Wed, 16 Aug 2017 00:34:43 -0400 Subject: [PATCH] nvc0/ir: change textureGrad to always use lane 0 as the tex origin Thanks to Karol Herbst for the debugging / tracing work that led to this change. Move to using lane 0 as the "work" lane for the texture. It is unclear why this helps, as that computation should be identical to doing it in the "correct" lane with the properly adjusted quadops. In order to be able to use the lane 0 result, we also have to ensure that lane 0 contains the proper array/indirect/shadow values. This applies to Fermi and Kepler. Maxwell+ may or may not need fixing, but that lowering logic is separate. Fixes KHR-GL45.texture_cube_map_array.sampling Signed-off-by: Ilia Mirkin --- .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 60 ++++++++++++++----- 1 file changed, 46 insertions(+), 14 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 6b51b7607cb..51f6fae2c1f 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -1081,15 +1081,20 @@ NVC0LoweringPass::handleTEX(TexInstruction *i) bool NVC0LoweringPass::handleManualTXD(TexInstruction *i) { - static const uint8_t qOps[4][2] = - { - { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(MOV2, MOV2, ADD, ADD) }, // l0 - { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(MOV2, MOV2, ADD, ADD) }, // l1 - { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l2 - { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l3 - }; + // Always done from the l0 perspective. This is the way that NVIDIA's + // driver does it, and doing it from the "current" lane's perpsective + // doesn't seem to always work for reasons that aren't altogether clear, + // even in frag shaders. + // + // Note that we must move not only the coordinates into lane0, but also all + // ancillary arguments, like array indices and depth compare as they may + // differ between lanes. Offsets for TXD are supposed to be uniform, so we + // leave them alone. + static const uint8_t qOps[2] = + { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(MOV2, MOV2, ADD, ADD) }; + Value *def[4][4]; - Value *crd[3]; + Value *crd[3], *arr[2], *shadow; Instruction *tex; Value *zero = bld.loadImm(bld.getSSA(), 0); int l, c; @@ -1100,7 +1105,7 @@ NVC0LoweringPass::handleManualTXD(TexInstruction *i) // indirect are both in the leading arg, while for Kepler, array and // indirect are separate (and both precede the coordinates). Maxwell is // handled in a separate function. - unsigned array; + int array; if (targ->getChipset() < NVISA_GK104_CHIPSET) array = i->tex.target.isArray() || i->tex.rIndirectSrc >= 0; else @@ -1110,19 +1115,34 @@ NVC0LoweringPass::handleManualTXD(TexInstruction *i) for (c = 0; c < dim; ++c) crd[c] = bld.getScratch(); + for (c = 0; c < array; ++c) + arr[c] = bld.getScratch(); + shadow = bld.getScratch(); - bld.mkOp(OP_QUADON, TYPE_NONE, NULL); for (l = 0; l < 4; ++l) { Value *src[3], *val; - // mov coordinates from lane l to all lanes + + bld.mkOp(OP_QUADON, TYPE_NONE, NULL); + // we're using the texture result from lane 0 in all cases, so make sure + // that lane 0 is pointing at the proper array index, indirect value, + // and depth compare. + if (l != 0) { + for (c = 0; c < array; ++c) + bld.mkQuadop(0x00, arr[c], l, i->getSrc(c), zero); + if (i->tex.target.isShadow()) { + // The next argument after coords is the depth compare + bld.mkQuadop(0x00, shadow, l, i->getSrc(array + dim), zero); + } + } + // mov position coordinates from lane l to all lanes for (c = 0; c < dim; ++c) bld.mkQuadop(0x00, crd[c], l, i->getSrc(c + array), zero); // add dPdx from lane l to lanes dx for (c = 0; c < dim; ++c) - bld.mkQuadop(qOps[l][0], crd[c], l, i->dPdx[c].get(), crd[c]); + bld.mkQuadop(qOps[0], crd[c], l, i->dPdx[c].get(), crd[c]); // add dPdy from lane l to lanes dy for (c = 0; c < dim; ++c) - bld.mkQuadop(qOps[l][1], crd[c], l, i->dPdy[c].get(), crd[c]); + bld.mkQuadop(qOps[1], crd[c], l, i->dPdy[c].get(), crd[c]); // normalize cube coordinates if (i->tex.target.isCube()) { for (c = 0; c < 3; ++c) @@ -1139,8 +1159,21 @@ NVC0LoweringPass::handleManualTXD(TexInstruction *i) } // texture bld.insert(tex = cloneForward(func, i)); + if (l != 0) { + for (c = 0; c < array; ++c) + tex->setSrc(c, arr[c]); + if (i->tex.target.isShadow()) + tex->setSrc(array + dim, shadow); + } for (c = 0; c < dim; ++c) tex->setSrc(c + array, src[c]); + // broadcast results from lane 0 to all lanes so that the moves *into* + // the target lane pick up the proper value. + if (l != 0) + for (c = 0; i->defExists(c); ++c) + bld.mkQuadop(0x00, tex->getDef(c), 0, tex->getDef(c), zero); + bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL); + // save results for (c = 0; i->defExists(c); ++c) { Instruction *mov; @@ -1150,7 +1183,6 @@ NVC0LoweringPass::handleManualTXD(TexInstruction *i) mov->lanes = 1 << l; } } - bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL); for (c = 0; i->defExists(c); ++c) { Instruction *u = bld.mkOp(OP_UNION, TYPE_U32, i->getDef(c)); -- 2.30.2