static int
nvc0_fp_assign_output_slots(struct nv50_ir_prog_info *info)
{
- unsigned last = info->prop.fp.numColourResults * 4;
+ unsigned count = info->prop.fp.numColourResults * 4;
unsigned i, c;
for (i = 0; i < info->numOutputs; ++i)
info->out[i].slot[c] = info->out[i].si * 4 + c;
if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS)
- info->out[info->io.sampleMask].slot[0] = last++;
+ info->out[info->io.sampleMask].slot[0] = count++;
+ else
+ if (info->target >= 0xe0)
+ count++; /* on Kepler, depth is always last colour reg + 2 */
if (info->io.fragDepth < PIPE_MAX_SHADER_OUTPUTS)
- info->out[info->io.fragDepth].slot[2] = last;
+ info->out[info->io.fragDepth].slot[2] = count;
return 0;
}
vp->vp.clip_mode |= 1 << (i * 4);
if (info->io.genUserClip < 0)
- vp->vp.num_ucps = PIPE_MAX_CLIP_PLANES; /* prevent rebuilding */
+ vp->vp.num_ucps = PIPE_MAX_CLIP_PLANES + 1; /* prevent rebuilding */
return 0;
}
{
unsigned i, c, a, m;
+ /* just 00062 on Kepler */
fp->hdr[0] = 0x20062 | (5 << 10);
fp->hdr[5] = 0x80000000; /* getting a trap if FRAG_COORD_UMASK.w = 0 */
}
fp->fp.early_z = info->prop.fp.earlyFragTests;
- if (fp->fp.early_z == FALSE && fp->code_size >= 0x400)
- fp->fp.early_z = !(info->prop.fp.writesDepth ||
- info->prop.fp.usesDiscard ||
- (info->io.globalAccess & 2));
return 0;
}
#endif
boolean
-nvc0_program_translate(struct nvc0_program *prog)
+nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset)
{
struct nv50_ir_prog_info *info;
int ret;
return FALSE;
info->type = prog->type;
- info->target = 0xc0;
+ info->target = chipset;
info->bin.sourceRep = NV50_PROGRAM_IR_TGSI;
info->bin.source = (void *)prog->pipe.tokens;
info->io.genUserClip = prog->vp.num_ucps;
+ info->io.ucpBase = 256;
+ info->io.ucpBinding = 15;
info->assignSlots = nvc0_program_assign_varying_slots;
NOUVEAU_ERR("shader translation failed: %i\n", ret);
goto out;
}
+ FREE(info->bin.syms);
prog->code = info->bin.code;
prog->code_size = info->bin.codeSize;
assert(info->bin.tlsSpace < (1 << 24));
prog->hdr[0] |= 1 << 26;
prog->hdr[1] |= info->bin.tlsSpace; /* l[] size */
+ prog->need_tls = TRUE;
+ }
+ /* TODO: factor 2 only needed where joinat/precont is used,
+ * and we only have to count non-uniform branches
+ */
+ /*
+ if ((info->maxCFDepth * 2) > 16) {
+ prog->hdr[2] |= (((info->maxCFDepth * 2) + 47) / 48) * 0x200;
+ prog->need_tls = TRUE;
}
+ */
if (info->io.globalAccess)
prog->hdr[0] |= 1 << 16;
size = align(size, 0x40);
size += prog->immd_size + 0xc0; /* add 0xc0 for align 0x40 -> 0x100 */
}
- size = align(size, 0x40); /* required by SP_START_ID */
-
- ret = nouveau_resource_alloc(screen->text_heap, size, prog, &prog->res);
+ /* On Fermi, SP_START_ID must be aligned to 0x40.
+ * On Kepler, the first instruction must be aligned to 0x80 because
+ * latency information is expected only at certain positions.
+ */
+ if (screen->base.class_3d >= NVE4_3D_CLASS)
+ size = size + 0x70;
+ size = align(size, 0x40);
+
+ ret = nouveau_heap_alloc(screen->text_heap, size, prog, &prog->mem);
if (ret) {
NOUVEAU_ERR("out of code space\n");
return FALSE;
}
- prog->code_base = prog->res->start;
- prog->immd_base = align(prog->res->start + prog->immd_base, 0x100);
+ prog->code_base = prog->mem->start;
+ prog->immd_base = align(prog->mem->start + prog->immd_base, 0x100);
assert((prog->immd_size == 0) || (prog->immd_base + prog->immd_size <=
- prog->res->start + prog->res->size));
+ prog->mem->start + prog->mem->size));
+ if (screen->base.class_3d >= NVE4_3D_CLASS) {
+ switch (prog->mem->start & 0xff) {
+ case 0x40: prog->code_base += 0x70; break;
+ case 0x80: prog->code_base += 0x30; break;
+ case 0xc0: prog->code_base += 0x70; break;
+ default:
+ prog->code_base += 0x30;
+ assert((prog->mem->start & 0xff) == 0x00);
+ break;
+ }
+ }
code_pos = prog->code_base + NVC0_SHADER_HEADER_SIZE;
if (prog->relocs)
nvc0_program_dump(prog);
#endif
- nvc0_m2mf_push_linear(&nvc0->base, screen->text, prog->code_base,
- NOUVEAU_BO_VRAM, NVC0_SHADER_HEADER_SIZE, prog->hdr);
- nvc0_m2mf_push_linear(&nvc0->base, screen->text,
- prog->code_base + NVC0_SHADER_HEADER_SIZE,
- NOUVEAU_BO_VRAM, prog->code_size, prog->code);
+ nvc0->base.push_data(&nvc0->base, screen->text, prog->code_base,
+ NOUVEAU_BO_VRAM, NVC0_SHADER_HEADER_SIZE, prog->hdr);
+ nvc0->base.push_data(&nvc0->base, screen->text,
+ prog->code_base + NVC0_SHADER_HEADER_SIZE,
+ NOUVEAU_BO_VRAM, prog->code_size, prog->code);
if (prog->immd_size)
- nvc0_m2mf_push_linear(&nvc0->base,
- screen->text, prog->immd_base, NOUVEAU_BO_VRAM,
- prog->immd_size, prog->immd_data);
+ nvc0->base.push_data(&nvc0->base,
+ screen->text, prog->immd_base, NOUVEAU_BO_VRAM,
+ prog->immd_size, prog->immd_data);
- BEGIN_RING(screen->base.channel, RING_3D(MEM_BARRIER), 1);
- OUT_RING (screen->base.channel, 0x1111);
+ BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(MEM_BARRIER), 1);
+ PUSH_DATA (nvc0->base.pushbuf, 0x1011);
return TRUE;
}
if (!size)
return;
- ret = nouveau_resource_alloc(screen->text_heap, align(size, 0x100), NULL,
- &screen->lib_code);
+ ret = nouveau_heap_alloc(screen->text_heap, align(size, 0x100), NULL,
+ &screen->lib_code);
if (ret)
return;
- nvc0_m2mf_push_linear(&nvc0->base,
- screen->text, screen->lib_code->start, NOUVEAU_BO_VRAM,
- size, code);
+ nvc0->base.push_data(&nvc0->base,
+ screen->text, screen->lib_code->start, NOUVEAU_BO_VRAM,
+ size, code);
/* no need for a memory barrier, will be emitted with first program */
}
const struct pipe_shader_state pipe = prog->pipe;
const ubyte type = prog->type;
- if (prog->res)
- nouveau_resource_free(&prog->res);
+ if (prog->mem)
+ nouveau_heap_free(&prog->mem);
- if (prog->code)
- FREE(prog->code);
- if (prog->immd_data)
- FREE(prog->immd_data);
- if (prog->relocs)
- FREE(prog->relocs);
+ FREE(prog->code);
+ FREE(prog->immd_data);
+ FREE(prog->relocs);
if (prog->tfb) {
if (nvc0->state.tfb == prog->tfb)
nvc0->state.tfb = NULL;