struct {
uint32_t inputOffset; /* base address for user args */
uint32_t sharedOffset; /* reserved space in s[] */
+ uint32_t gridInfoBase; /* base address for NTID,NCTAID */
} cp;
} prop;
NVC0LoweringPass::handleRDSV(Instruction *i)
{
Symbol *sym = i->getSrc(0)->asSym();
+ const SVSemantic sv = sym->reg.data.sv.sv;
Value *vtx = NULL;
Instruction *ld;
uint32_t addr = targ->getSVAddress(FILE_SHADER_INPUT, sym);
- if (addr >= 0x400) // mov $sreg
+ if (addr >= 0x400) {
+ // mov $sreg
+ if (sym->reg.data.sv.index == 3) {
+ // TGSI backend may use 4th component of TID,NTID,CTAID,NCTAID
+ i->op = OP_MOV;
+ i->setSrc(0, bld.mkImm((sv == SV_NTID || sv == SV_NCTAID) ? 1 : 0));
+ }
return true;
+ }
- switch (i->getSrc(0)->reg.data.sv.sv) {
+ switch (sv) {
case SV_POSITION:
assert(prog->getType() == Program::TYPE_FRAGMENT);
bld.mkInterp(NV50_IR_INTERP_LINEAR, i->getDef(0), addr, NULL);
assert(prog->getType() == Program::TYPE_TESSELLATION_EVAL);
readTessCoord(i->getDef(0)->asLValue(), i->getSrc(0)->reg.data.sv.index);
break;
+ case SV_NTID:
+ case SV_NCTAID:
+ case SV_GRIDID:
+ assert(targ->getChipset() >= NVISA_GK104_CHIPSET); // mov $sreg otherwise
+ if (sym->reg.data.sv.index == 3) {
+ i->op = OP_MOV;
+ i->setSrc(0, bld.mkImm(sv == SV_GRIDID ? 0 : 1));
+ return true;
+ }
+ addr += prog->driver->prop.cp.gridInfoBase;
+ bld.mkLoad(TYPE_U32, i->getDef(0),
+ bld.mkSymbol(FILE_MEMORY_CONST, 0, TYPE_U32, addr), NULL);
+ break;
default:
if (prog->getType() == Program::TYPE_TESSELLATION_EVAL)
vtx = bld.mkOp1v(OP_PFETCH, TYPE_U32, bld.getSSA(), bld.mkImm(0));
const SVSemantic sv = sym->reg.data.sv.sv;
const bool isInput = shaderFile == FILE_SHADER_INPUT;
+ const bool kepler = getChipset() >= NVISA_GK104_CHIPSET;
switch (sv) {
case SV_POSITION: return 0x070 + idx * 4;
case SV_FACE: return 0x3fc;
case SV_TESS_FACTOR: return 0x000 + idx * 4;
case SV_TESS_COORD: return 0x2f0 + idx * 4;
+ case SV_NTID: return kepler ? (0x00 + idx * 4) : ~0;
+ case SV_NCTAID: return kepler ? (0x0c + idx * 4) : ~0;
+ case SV_GRIDID: return kepler ? 0x18 : ~0;
default:
return 0xffffffff;
}
info->io.resInfoCBSlot = 0;
info->io.texBindBase = NVE4_CP_INPUT_TEX(0);
info->io.suInfoBase = NVE4_CP_INPUT_SUF(0);
+ info->prop.cp.gridInfoBase = NVE4_CP_INPUT_GRID_INFO(0);
}
info->io.msInfoCBSlot = 0;
info->io.msInfoBase = NVE4_CP_INPUT_MS_OFFSETS;
static void
-nve4_compute_upload_input(struct nvc0_context *nvc0, const void *input)
+nve4_compute_upload_input(struct nvc0_context *nvc0, const void *input,
+ const uint *block_layout,
+ const uint *grid_layout)
{
struct nvc0_screen *screen = nvc0->screen;
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nvc0_program *cp = nvc0->compprog;
- if (!cp->parm_size)
- return;
-
+ if (cp->parm_size) {
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->parm->offset);
+ PUSH_DATA (push, screen->parm->offset);
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_SIZE), 2);
+ PUSH_DATA (push, cp->parm_size);
+ PUSH_DATA (push, 0x1);
+ BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + (cp->parm_size / 4));
+ PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_UNKVAL_DATA);
+ PUSH_DATAp(push, input, cp->parm_size / 4);
+ }
BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_ADDRESS_HIGH), 2);
- PUSH_DATAh(push, screen->parm->offset);
- PUSH_DATA (push, screen->parm->offset);
+ PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_GRID_INFO(0));
+ PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_GRID_INFO(0));
BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_SIZE), 2);
- PUSH_DATA (push, cp->parm_size);
+ PUSH_DATA (push, 7 * 4);
PUSH_DATA (push, 0x1);
- BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + (cp->parm_size / 4));
+ BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + 7);
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_UNKVAL_DATA);
- PUSH_DATAp(push, input, cp->parm_size / 4);
+ PUSH_DATAp(push, block_layout, 3);
+ PUSH_DATAp(push, grid_layout, 3);
+ PUSH_DATA (push, 0);
BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
nve4_compute_setup_launch_desc(nvc0, desc, label, block_layout, grid_layout);
nve4_compute_dump_launch_desc(desc);
- nve4_compute_upload_input(nvc0, input);
+ nve4_compute_upload_input(nvc0, input, block_layout, grid_layout);
/* upload descriptor and flush */
#if 0
/* Input space is implemented as c0[], to which we bind the screen->parm bo.
*/
-#define NVE4_CP_INPUT_USER 0x0000
-#define NVE4_CP_INPUT_USER_LIMIT 0x1000
-#define NVE4_CP_INPUT_TEX(i) (0x1020 + (i) * 4)
-#define NVE4_CP_INPUT_TEX_STRIDE 4
-#define NVE4_CP_INPUT_TEX_MAX 32
-#define NVE4_CP_INPUT_MS_OFFSETS 0x10c0
-#define NVE4_CP_INPUT_SUF_STRIDE 64
-#define NVE4_CP_INPUT_SUF(i) (0x1100 + (i) * NVE4_CP_INPUT_SUF_STRIDE)
-#define NVE4_CP_INPUT_SUF_MAX 32
-#define NVE4_CP_INPUT_SIZE 0x1900
-#define NVE4_CP_PARAM_SIZE 0x2000
+#define NVE4_CP_INPUT_USER 0x0000
+#define NVE4_CP_INPUT_USER_LIMIT 0x1000
+#define NVE4_CP_INPUT_GRID_INFO(i) (0x1000 + (i) * 4)
+#define NVE4_CP_INPUT_NTID(i) (0x1000 + (i) * 4)
+#define NVE4_CP_INPUT_NCTAID(i) (0x100c + (i) * 4)
+#define NVE4_CP_INPUT_GRIDID 0x1018
+#define NVE4_CP_INPUT_TEX(i) (0x1040 + (i) * 4)
+#define NVE4_CP_INPUT_TEX_STRIDE 4
+#define NVE4_CP_INPUT_TEX_MAX 32
+#define NVE4_CP_INPUT_MS_OFFSETS 0x10c0
+#define NVE4_CP_INPUT_SUF_STRIDE 64
+#define NVE4_CP_INPUT_SUF(i) (0x1100 + (i) * NVE4_CP_INPUT_SUF_STRIDE)
+#define NVE4_CP_INPUT_SUF_MAX 32
+#define NVE4_CP_INPUT_SIZE 0x1900
+#define NVE4_CP_PARAM_SIZE 0x2000
struct nve4_cp_launch_desc
{