nv50/ir: always return 0 when trying to read thread id along unit dim
authorIlia Mirkin <imirkin@alum.mit.edu>
Thu, 26 Jan 2017 03:16:56 +0000 (22:16 -0500)
committerIlia Mirkin <imirkin@alum.mit.edu>
Thu, 9 Feb 2017 20:15:36 +0000 (15:15 -0500)
Many many many compute shaders only define a 1- or 2-dimensional block,
but then continue to use system values that take the full 3d into
account (like gl_LocalInvocationIndex, etc). So for the special case
that a dimension is exactly 1, we know that the thread id along that
axis will always be 0, so return it as such and allow constant folding
to fix things up.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Pierre Moreau <pierre.morrow@free.fr>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
src/gallium/drivers/nouveau/codegen/nv50_ir_target.h

index 186c9fdbedf8a7057ae9d7e77b1334177d0c4c49..b67a1ddbd598eafff16e0f1d17bd5d68f1bcdcd8 100644 (file)
@@ -1179,7 +1179,11 @@ nv50_ir_init_prog_info(struct nv50_ir_prog_info *info)
       info->prop.gp.instanceCount = 1;
       info->prop.gp.maxVertices = 1;
    }
-   info->prop.cp.numThreads = 1;
+   if (info->type == PIPE_SHADER_COMPUTE) {
+      info->prop.cp.numThreads[0] =
+      info->prop.cp.numThreads[1] =
+      info->prop.cp.numThreads[2] = 1;
+   }
    info->io.pointSize = 0xff;
    info->io.instanceId = 0xff;
    info->io.vertexId = 0xff;
index 65d0904d0f1030494eabda0f73ea6dcfadc26d07..e7d840df00a1d8ec028314088e4b3644d2ea7235 100644 (file)
@@ -152,7 +152,7 @@ struct nv50_ir_prog_info
          uint32_t inputOffset; /* base address for user args */
          uint32_t sharedOffset; /* reserved space in s[] */
          uint32_t gridInfoBase;  /* base address for NTID,NCTAID */
-         uint32_t numThreads; /* max number of threads */
+         uint16_t numThreads[3]; /* max number of threads */
       } cp;
    } prop;
 
index 80cc7fa01ac97c0b2bdf45fe8cd047a005d06657..a376b1dcc7d1724df5b0751c7de08982c2fd2ac9 100644 (file)
@@ -1140,7 +1140,6 @@ bool Source::scanSource()
    }
 
    info->io.viewportId = -1;
-   info->prop.cp.numThreads = 1;
 
    info->immd.data = (uint32_t *)MALLOC(scan.immediate_count * 16);
    info->immd.type = (ubyte *)MALLOC(scan.immediate_count * sizeof(ubyte));
@@ -1243,9 +1242,13 @@ void Source::scanProperty(const struct tgsi_full_property *prop)
          info->prop.tp.outputPrim = PIPE_PRIM_TRIANGLES; /* anything but points */
       break;
    case TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH:
+      info->prop.cp.numThreads[0] = prop->u[0].Data;
+      break;
    case TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT:
+      info->prop.cp.numThreads[1] = prop->u[0].Data;
+      break;
    case TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH:
-      info->prop.cp.numThreads *= prop->u[0].Data;
+      info->prop.cp.numThreads[2] = prop->u[0].Data;
       break;
    case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
       info->io.clipDistances = prop->u[0].Data;
@@ -2034,6 +2037,9 @@ Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
       return ld->getDef(0);
    case TGSI_FILE_SYSTEM_VALUE:
       assert(!ptr);
+      if (info->sv[idx].sn == TGSI_SEMANTIC_THREAD_ID &&
+          info->prop.cp.numThreads[swz] == 1)
+         return zero;
       ld = mkOp1(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c));
       ld->perPatch = info->sv[idx].patch;
       return ld->getDef(0);
index eaf50cc19be02889107721b17d7a1feefafa7800..e9d10574835f7c65355335e79e450aee17603fb7 100644 (file)
@@ -174,7 +174,9 @@ public:
    virtual void getBuiltinCode(const uint32_t **code, uint32_t *size) const = 0;
 
    virtual void parseDriverInfo(const struct nv50_ir_prog_info *info) {
-      threads = info->prop.cp.numThreads;
+      threads = info->prop.cp.numThreads[0] *
+         info->prop.cp.numThreads[1] *
+         info->prop.cp.numThreads[2];
       if (threads == 0)
          threads = info->target >= NVISA_GK104_CHIPSET ? 1024 : 512;
    }