void
Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy)
{
- Value *val;
Value *arg[4], *src[8];
Value *lod = NULL, *shd = NULL;
unsigned int s, c, d;
shd = src[n - 1];
}
- if (tgt.isCube()) {
- for (c = 0; c < 3; ++c)
- src[c] = mkOp1v(OP_ABS, TYPE_F32, getSSA(), arg[c]);
- val = getScratch();
- mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]);
- mkOp2(OP_MAX, TYPE_F32, val, src[2], val);
- mkOp1(OP_RCP, TYPE_F32, val, val);
- for (c = 0; c < 3; ++c)
- src[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), arg[c], val);
- }
-
for (c = 0, d = 0; c < 4; ++c) {
if (dst[c]) {
texi->setDef(d++, dst[c]);
tmp = bld.getScratch();
for (l = 0; l < 4; ++l) {
+ Value *src[3], *val;
// mov coordinates from lane l to all lanes
bld.mkOp(OP_QUADON, TYPE_NONE, NULL);
for (c = 0; c < dim; ++c) {
add->lanes = 1; /* abused for .ndv */
}
+ // normalize cube coordinates if necessary
+ if (i->tex.target.isCube()) {
+ for (c = 0; c < 3; ++c)
+ src[c] = bld.mkOp1v(OP_ABS, TYPE_F32, bld.getSSA(), crd[c]);
+ val = bld.getScratch();
+ bld.mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]);
+ bld.mkOp2(OP_MAX, TYPE_F32, val, src[2], val);
+ bld.mkOp1(OP_RCP, TYPE_F32, val, val);
+ for (c = 0; c < 3; ++c)
+ src[c] = bld.mkOp2v(OP_MUL, TYPE_F32, bld.getSSA(), crd[c], val);
+ } else {
+ for (c = 0; c < dim; ++c)
+ src[c] = crd[c];
+ }
+
// texture
bld.insert(tex = cloneForward(func, i));
for (c = 0; c < dim; ++c)
- tex->setSrc(c + array, crd[c]);
+ tex->setSrc(c + array, src[c]);
bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL);
// save results
const int dref = arg;
const int lod = i->tex.target.isShadow() ? (arg + 1) : arg;
+ /* Only normalize in the non-explicit derivatives case.
+ */
+ if (i->tex.target.isCube() && i->op != OP_TXD) {
+ Value *src[3], *val;
+ int c;
+ for (c = 0; c < 3; ++c)
+ src[c] = bld.mkOp1v(OP_ABS, TYPE_F32, bld.getSSA(), i->getSrc(c));
+ val = bld.getScratch();
+ bld.mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]);
+ bld.mkOp2(OP_MAX, TYPE_F32, val, src[2], val);
+ bld.mkOp1(OP_RCP, TYPE_F32, val, val);
+ for (c = 0; c < 3; ++c) {
+ i->setSrc(c, bld.mkOp2v(OP_MUL, TYPE_F32, bld.getSSA(),
+ i->getSrc(c), val));
+ }
+ }
+
// handle MS, which means looking up the MS params for this texture, and
// adjusting the input coordinates to point at the right sample.
if (i->tex.target.isMS()) {
bld.mkOp(OP_QUADON, TYPE_NONE, NULL);
for (l = 0; l < 4; ++l) {
+ Value *src[3], *val;
// mov coordinates from lane l to all lanes
for (c = 0; c < dim; ++c)
bld.mkQuadop(0x00, crd[c], l, i->getSrc(c), zero);
// add dPdy from lane l to lanes dy
for (c = 0; c < dim; ++c)
bld.mkQuadop(qOps[l][1], crd[c], l, i->dPdy[c].get(), crd[c]);
+ // normalize cube coordinates if necessary
+ if (i->tex.target.isCube()) {
+ for (c = 0; c < 3; ++c)
+ src[c] = bld.mkOp1v(OP_ABS, TYPE_F32, bld.getSSA(), crd[c]);
+ val = bld.getScratch();
+ bld.mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]);
+ bld.mkOp2(OP_MAX, TYPE_F32, val, src[2], val);
+ bld.mkOp1(OP_RCP, TYPE_F32, val, val);
+ for (c = 0; c < 3; ++c)
+ src[c] = bld.mkOp2v(OP_MUL, TYPE_F32, bld.getSSA(), crd[c], val);
+ } else {
+ for (c = 0; c < dim; ++c)
+ src[c] = crd[c];
+ }
// texture
bld.insert(tex = cloneForward(func, i));
for (c = 0; c < dim; ++c)
- tex->setSrc(c, crd[c]);
+ tex->setSrc(c, src[c]);
// save results
for (c = 0; i->defExists(c); ++c) {
Instruction *mov;
const int lyr = arg - (i->tex.target.isMS() ? 2 : 1);
const int chipset = prog->getTarget()->getChipset();
+ /* Only normalize in the non-explicit derivatives case. For explicit
+ * derivatives, this is handled in handleManualTXD.
+ */
+ if (i->tex.target.isCube() && i->dPdx[0].get() == NULL) {
+ Value *src[3], *val;
+ int c;
+ for (c = 0; c < 3; ++c)
+ src[c] = bld.mkOp1v(OP_ABS, TYPE_F32, bld.getSSA(), i->getSrc(c));
+ val = bld.getScratch();
+ bld.mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]);
+ bld.mkOp2(OP_MAX, TYPE_F32, val, src[2], val);
+ bld.mkOp1(OP_RCP, TYPE_F32, val, val);
+ for (c = 0; c < 3; ++c) {
+ i->setSrc(c, bld.mkOp2v(OP_MUL, TYPE_F32, bld.getSSA(),
+ i->getSrc(c), val));
+ }
+ }
+
// Arguments to the TEX instruction are a little insane. Even though the
// encoding is identical between SM20 and SM30, the arguments mean
// different things between Fermi and Kepler+. A lot of arguments are
bld.mkOp(OP_QUADON, TYPE_NONE, NULL);
for (l = 0; l < 4; ++l) {
+ Value *src[3], *val;
// mov coordinates from lane l to all lanes
for (c = 0; c < dim; ++c)
bld.mkQuadop(0x00, crd[c], l, i->getSrc(c + array), zero);
// add dPdy from lane l to lanes dy
for (c = 0; c < dim; ++c)
bld.mkQuadop(qOps[l][1], crd[c], l, i->dPdy[c].get(), crd[c]);
+ // normalize cube coordinates
+ if (i->tex.target.isCube()) {
+ for (c = 0; c < 3; ++c)
+ src[c] = bld.mkOp1v(OP_ABS, TYPE_F32, bld.getSSA(), crd[c]);
+ val = bld.getScratch();
+ bld.mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]);
+ bld.mkOp2(OP_MAX, TYPE_F32, val, src[2], val);
+ bld.mkOp1(OP_RCP, TYPE_F32, val, val);
+ for (c = 0; c < 3; ++c)
+ src[c] = bld.mkOp2v(OP_MUL, TYPE_F32, bld.getSSA(), crd[c], val);
+ } else {
+ for (c = 0; c < dim; ++c)
+ src[c] = crd[c];
+ }
// texture
bld.insert(tex = cloneForward(func, i));
for (c = 0; c < dim; ++c)
- tex->setSrc(c + array, crd[c]);
+ tex->setSrc(c + array, src[c]);
// save results
for (c = 0; i->defExists(c); ++c) {
Instruction *mov;