switch (sn) {
case NV50_SEMANTIC_TESSFACTOR: return 0x000 + si * 0x4;
case TGSI_SEMANTIC_PRIMID: return 0x060;
+ case TGSI_SEMANTIC_LAYER: return 0x064;
+ case TGSI_SEMANTIC_VIEWPORT_INDEX:return 0x068;
case TGSI_SEMANTIC_PSIZE: return 0x06c;
case TGSI_SEMANTIC_POSITION: return 0x070;
case TGSI_SEMANTIC_GENERIC: return ubase + si * 0x10;
case TGSI_SEMANTIC_VERTEXID: return 0x2fc;
case TGSI_SEMANTIC_TEXCOORD: return 0x300 + si * 0x10;
case TGSI_SEMANTIC_FACE: return 0x3fc;
- case NV50_SEMANTIC_INVOCATIONID: return ~0;
default:
assert(!"invalid TGSI input semantic");
return ~0;
case NV50_SEMANTIC_TESSFACTOR: return 0x000 + si * 0x4;
case TGSI_SEMANTIC_PRIMID: return 0x060;
case TGSI_SEMANTIC_LAYER: return 0x064;
- case NV50_SEMANTIC_VIEWPORTINDEX: return 0x068;
+ case TGSI_SEMANTIC_VIEWPORT_INDEX:return 0x068;
case TGSI_SEMANTIC_PSIZE: return 0x06c;
case TGSI_SEMANTIC_POSITION: return 0x070;
case TGSI_SEMANTIC_GENERIC: return ubase + si * 0x10;
nvc0_fp_assign_output_slots(struct nv50_ir_prog_info *info)
{
unsigned count = info->prop.fp.numColourResults * 4;
- unsigned i, c, ci;
+ unsigned i, c;
- for (i = 0, ci = 0; i < info->numOutputs; ++i) {
- if (info->out[i].sn == TGSI_SEMANTIC_COLOR) {
+ for (i = 0; i < info->numOutputs; ++i)
+ if (info->out[i].sn == TGSI_SEMANTIC_COLOR)
for (c = 0; c < 4; ++c)
- info->out[i].slot[c] = ci * 4 + c;
- ci++;
- }
- }
-
- assert(ci == info->prop.fp.numColourResults);
+ info->out[i].slot[c] = info->out[i].si * 4 + c;
if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS)
info->out[info->io.sampleMask].slot[0] = count++;
break;
}
- gp->hdr[4] = info->prop.gp.maxVertices & 0x1ff;
+ gp->hdr[4] = MIN2(info->prop.gp.maxVertices, 1024);
return nvc0_vtgp_gen_header(gp, info);
}
info->out[pso->output[i].register_index].slot[s + c];
tfb->varying_count[b] = MAX2(tfb->varying_count[b], p);
+ tfb->stream[b] = pso->output[i].stream;
}
for (b = 0; b < 4; ++b) // zero unused indices (looks nicer)
for (c = tfb->varying_count[b]; c & 3; ++c)
if (prog->type != PIPE_SHADER_COMPUTE) {
for (pos = 0; pos < sizeof(prog->hdr) / sizeof(prog->hdr[0]); ++pos)
- debug_printf("HDR[%02lx] = 0x%08x\n",
+ debug_printf("HDR[%02"PRIxPTR"] = 0x%08x\n",
pos * sizeof(prog->hdr[0]), prog->hdr[pos]);
}
debug_printf("shader binary code (0x%x bytes):", prog->code_size);
}
#endif
-boolean
+bool
nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset)
{
struct nv50_ir_prog_info *info;
info = CALLOC_STRUCT(nv50_ir_prog_info);
if (!info)
- return FALSE;
+ return false;
info->type = prog->type;
info->target = chipset;
if (info->bin.tlsSpace) {
assert(info->bin.tlsSpace < (1 << 24));
prog->hdr[0] |= 1 << 26;
- prog->hdr[1] |= info->bin.tlsSpace; /* l[] size */
- prog->need_tls = TRUE;
+ prog->hdr[1] |= align(info->bin.tlsSpace, 0x10); /* l[] size */
+ prog->need_tls = true;
}
/* TODO: factor 2 only needed where joinat/precont is used,
* and we only have to count non-uniform branches
/*
if ((info->maxCFDepth * 2) > 16) {
prog->hdr[2] |= (((info->maxCFDepth * 2) + 47) / 48) * 0x200;
- prog->need_tls = TRUE;
+ prog->need_tls = true;
}
*/
if (info->io.globalAccess)
prog->hdr[0] |= 1 << 16;
+ if (info->io.fp64)
+ prog->hdr[0] |= 1 << 27;
if (prog->pipe.stream_output.num_outputs)
prog->tfb = nvc0_program_create_tfb_state(info,
return !ret;
}
-boolean
+bool
nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
{
struct nvc0_screen *screen = nvc0->screen;
- const boolean is_cp = prog->type == PIPE_SHADER_COMPUTE;
+ const bool is_cp = prog->type == PIPE_SHADER_COMPUTE;
int ret;
uint32_t size = prog->code_size + (is_cp ? 0 : NVC0_SHADER_HEADER_SIZE);
uint32_t lib_pos = screen->lib_code->start;
ret = nouveau_heap_alloc(screen->text_heap, size, prog, &prog->mem);
if (ret) {
struct nouveau_heap *heap = screen->text_heap;
- struct nouveau_heap *iter;
- for (iter = heap; iter && iter->next != heap; iter = iter->next) {
- struct nvc0_program *evict = iter->priv;
- if (evict)
- nouveau_heap_free(&evict->mem);
+ /* Note that the code library, which is allocated before anything else,
+ * does not have a priv pointer. We can stop once we hit it.
+ */
+ while (heap->next && heap->next->priv) {
+ struct nvc0_program *evict = heap->next->priv;
+ nouveau_heap_free(&evict->mem);
}
debug_printf("WARNING: out of code space, evicting all shaders.\n");
ret = nouveau_heap_alloc(heap, size, prog, &prog->mem);
if (ret) {
NOUVEAU_ERR("shader too large (0x%x) to fit in code space ?\n", size);
- return FALSE;
+ return false;
}
IMMED_NVC0(nvc0->base.pushbuf, NVC0_3D(SERIALIZE), 0);
}
nv50_ir_relocate_code(prog->relocs, prog->code, code_pos, lib_pos, 0);
#ifdef DEBUG
- if (debug_get_bool_option("NV50_PROG_DEBUG", FALSE))
+ if (debug_get_bool_option("NV50_PROG_DEBUG", false))
nvc0_program_dump(prog);
#endif
if (!is_cp)
nvc0->base.push_data(&nvc0->base, screen->text, prog->code_base,
- NOUVEAU_BO_VRAM, NVC0_SHADER_HEADER_SIZE, prog->hdr);
+ NV_VRAM_DOMAIN(&screen->base), NVC0_SHADER_HEADER_SIZE, prog->hdr);
nvc0->base.push_data(&nvc0->base, screen->text, code_pos,
- NOUVEAU_BO_VRAM, prog->code_size, prog->code);
+ NV_VRAM_DOMAIN(&screen->base), prog->code_size, prog->code);
if (prog->immd_size)
nvc0->base.push_data(&nvc0->base,
- screen->text, prog->immd_base, NOUVEAU_BO_VRAM,
+ screen->text, prog->immd_base, NV_VRAM_DOMAIN(&screen->base),
prog->immd_size, prog->immd_data);
BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(MEM_BARRIER), 1);
PUSH_DATA (nvc0->base.pushbuf, 0x1011);
- return TRUE;
+ return true;
}
/* Upload code for builtin functions like integer division emulation. */
return;
nvc0->base.push_data(&nvc0->base,
- screen->text, screen->lib_code->start, NOUVEAU_BO_VRAM,
+ screen->text, screen->lib_code->start, NV_VRAM_DOMAIN(&screen->base),
size, code);
/* no need for a memory barrier, will be emitted with first program */
}
if (prog->mem)
nouveau_heap_free(&prog->mem);
- if (prog->code)
- FREE(prog->code); /* may be 0 for hardcoded shaders */
+ FREE(prog->code); /* may be 0 for hardcoded shaders */
FREE(prog->immd_data);
FREE(prog->relocs);
if (prog->type == PIPE_SHADER_COMPUTE && prog->cp.syms)