#include "codegen/nv50_ir_driver.h"
-static INLINE unsigned
+static inline unsigned
bitcount4(const uint32_t val)
{
static const uint8_t cnt[16]
for (c = 0; c < 4; ++c)
if (info->in[i].mask & (1 << c))
info->in[i].slot[c] = n++;
+
+ if (info->in[i].sn == TGSI_SEMANTIC_PRIMID)
+ prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID;
}
prog->in_nr = info->numInputs;
continue;
case TGSI_SEMANTIC_VERTEXID:
prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID;
- prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_UNK12;
+ prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID_DRAW_ARRAYS_ADD_START;
continue;
default:
break;
case TGSI_SEMANTIC_BCOLOR:
prog->vp.bfc[info->out[i].si] = i;
break;
+ case TGSI_SEMANTIC_LAYER:
+ prog->gp.has_layer = true;
+ prog->gp.layerid = n;
+ break;
+ case TGSI_SEMANTIC_VIEWPORT_INDEX:
+ prog->gp.has_viewport = true;
+ prog->gp.viewportid = n;
+ break;
default:
break;
}
}
prog->out_nr = info->numOutputs;
prog->max_out = n;
+ if (!prog->max_out)
+ prog->max_out = 1;
if (prog->vp.psiz < info->numOutputs)
prog->vp.psiz = prog->out[prog->vp.psiz].hw;
for (m = 0, i = 0; i < info->numInputs; ++i) {
switch (info->in[i].sn) {
case TGSI_SEMANTIC_POSITION:
- case TGSI_SEMANTIC_FACE:
continue;
default:
m += info->in[i].flat ? 0 : 1;
for (c = 0; c < 4; ++c)
if (info->in[i].mask & (1 << c))
info->in[i].slot[c] = nintp++;
- } else
- if (info->in[i].sn == TGSI_SEMANTIC_FACE) {
- info->in[i].slot[0] = 255;
} else {
unsigned j = info->in[i].flat ? m++ : n++;
if (info->in[i].sn == TGSI_SEMANTIC_COLOR)
prog->vp.bfc[info->in[i].si] = j;
+ else if (info->in[i].sn == TGSI_SEMANTIC_PRIMID)
+ prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID;
prog->in[j].id = i;
prog->in[j].mask = info->in[i].mask;
prog->max_out = MAX2(prog->max_out, prog->out[i].hw + 4);
}
- if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS)
+ if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS) {
info->out[info->io.sampleMask].slot[0] = prog->max_out++;
+ prog->fp.has_samplemask = 1;
+ }
if (info->io.fragDepth < PIPE_MAX_SHADER_OUTPUTS)
info->out[info->io.fragDepth].slot[2] = prog->max_out++;
return nv50_vertprog_assign_slots(info);
case PIPE_SHADER_FRAGMENT:
return nv50_fragprog_assign_slots(info);
+ case PIPE_SHADER_COMPUTE:
+ return 0;
default:
return -1;
}
const unsigned r = pso->output[i].register_index;
b = pso->output[i].output_buffer;
+ if (r >= info->numOutputs)
+ continue;
+
for (c = 0; c < pso->output[i].num_components; ++c)
so->map[base[b] + p + c] = info->out[r].slot[s + c];
}
return so;
}
-boolean
-nv50_program_translate(struct nv50_program *prog, uint16_t chipset)
+bool
+nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
+ struct pipe_debug_callback *debug)
{
struct nv50_ir_prog_info *info;
- int ret;
+ int i, ret;
const uint8_t map_undef = (prog->type == PIPE_SHADER_VERTEX) ? 0x40 : 0x80;
info = CALLOC_STRUCT(nv50_ir_prog_info);
if (!info)
- return FALSE;
+ return false;
info->type = prog->type;
info->target = chipset;
info->bin.sourceRep = NV50_PROGRAM_IR_TGSI;
info->bin.source = (void *)prog->pipe.tokens;
- info->io.ucpCBSlot = 15;
- info->io.ucpBase = 0;
+ info->io.auxCBSlot = 15;
+ info->io.ucpBase = NV50_CB_AUX_UCP_OFFSET;
info->io.genUserClip = prog->vp.clpd_nr;
+ if (prog->fp.alphatest)
+ info->io.alphaRefBase = NV50_CB_AUX_ALPHATEST_OFFSET;
+
+ info->io.suInfoBase = NV50_CB_AUX_TEX_MS_OFFSET;
+ info->io.sampleInfoBase = NV50_CB_AUX_SAMPLE_OFFSET;
+ info->io.msInfoCBSlot = 15;
+ info->io.msInfoBase = NV50_CB_AUX_MS_OFFSET;
info->assignSlots = nv50_program_assign_varying_slots;
prog->vp.clpd[0] = map_undef;
prog->vp.clpd[1] = map_undef;
prog->vp.psiz = map_undef;
- prog->gp.primid = 0x80;
+ prog->gp.has_layer = 0;
+ prog->gp.has_viewport = 0;
+
+ if (prog->type == PIPE_SHADER_COMPUTE)
+ info->prop.cp.inputOffset = 0x10;
info->driverPriv = prog;
NOUVEAU_ERR("shader translation failed: %i\n", ret);
goto out;
}
- FREE(info->bin.syms);
prog->code = info->bin.code;
prog->code_size = info->bin.codeSize;
prog->fixups = info->bin.relocData;
+ prog->interps = info->bin.fixupData;
prog->max_gpr = MAX2(4, (info->bin.maxGPR >> 1) + 1);
prog->tls_space = info->bin.tlsSpace;
+ prog->mul_zero_wins = info->io.mul_zero_wins;
+ prog->vp.need_vertex_id = info->io.vertexId < PIPE_MAX_SHADER_INPUTS;
+
+ prog->vp.clip_enable = (1 << info->io.clipDistances) - 1;
+ prog->vp.cull_enable =
+ ((1 << info->io.cullDistances) - 1) << info->io.clipDistances;
+ prog->vp.clip_mode = 0;
+ for (i = 0; i < info->io.cullDistances; ++i)
+ prog->vp.clip_mode |= 1 << ((info->io.clipDistances + i) * 4);
if (prog->type == PIPE_SHADER_FRAGMENT) {
if (info->prop.fp.writesDepth) {
}
if (info->prop.fp.usesDiscard)
prog->fp.flags[0] |= NV50_3D_FP_CONTROL_USES_KIL;
+ } else
+ if (prog->type == PIPE_SHADER_GEOMETRY) {
+ switch (info->prop.gp.outputPrim) {
+ case PIPE_PRIM_LINE_STRIP:
+ prog->gp.prim_type = NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_LINE_STRIP;
+ break;
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ prog->gp.prim_type = NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_TRIANGLE_STRIP;
+ break;
+ case PIPE_PRIM_POINTS:
+ default:
+ assert(info->prop.gp.outputPrim == PIPE_PRIM_POINTS);
+ prog->gp.prim_type = NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_POINTS;
+ break;
+ }
+ prog->gp.vert_count = CLAMP(info->prop.gp.maxVertices, 1, 1024);
+ }
+
+ if (prog->type == PIPE_SHADER_COMPUTE) {
+ prog->cp.syms = info->bin.syms;
+ prog->cp.num_syms = info->bin.numSyms;
+ } else {
+ FREE(info->bin.syms);
}
if (prog->pipe.stream_output.num_outputs)
prog->so = nv50_program_create_strmout_state(info,
&prog->pipe.stream_output);
+ pipe_debug_message(debug, SHADER_INFO,
+ "type: %d, local: %d, gpr: %d, inst: %d, bytes: %d",
+ prog->type, info->bin.tlsSpace, prog->max_gpr,
+ info->bin.instructions, info->bin.codeSize);
+
out:
FREE(info);
return !ret;
}
-boolean
+bool
nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
{
struct nouveau_heap *heap;
int ret;
uint32_t size = align(prog->code_size, 0x40);
+ uint8_t prog_type;
switch (prog->type) {
case PIPE_SHADER_VERTEX: heap = nv50->screen->vp_code_heap; break;
- case PIPE_SHADER_GEOMETRY: heap = nv50->screen->fp_code_heap; break;
- case PIPE_SHADER_FRAGMENT: heap = nv50->screen->gp_code_heap; break;
+ case PIPE_SHADER_GEOMETRY: heap = nv50->screen->gp_code_heap; break;
+ case PIPE_SHADER_FRAGMENT: heap = nv50->screen->fp_code_heap; break;
+ case PIPE_SHADER_COMPUTE: heap = nv50->screen->fp_code_heap; break;
default:
assert(!"invalid program type");
- return FALSE;
+ return false;
}
ret = nouveau_heap_alloc(heap, size, prog, &prog->mem);
ret = nouveau_heap_alloc(heap, size, prog, &prog->mem);
if (ret) {
NOUVEAU_ERR("shader too large (0x%x) to fit in code space ?\n", size);
- return FALSE;
+ return false;
}
}
- prog->code_base = prog->mem->start;
+
+ if (prog->type == PIPE_SHADER_COMPUTE) {
+ /* CP code must be uploaded in FP code segment. */
+ prog_type = 1;
+ } else {
+ prog->code_base = prog->mem->start;
+ prog_type = prog->type;
+ }
ret = nv50_tls_realloc(nv50->screen, prog->tls_space);
- if (ret < 0)
- return FALSE;
+ if (ret < 0) {
+ nouveau_heap_free(&prog->mem);
+ return false;
+ }
if (ret > 0)
- nv50->state.new_tls_space = TRUE;
+ nv50->state.new_tls_space = true;
if (prog->fixups)
nv50_ir_relocate_code(prog->fixups, prog->code, prog->code_base, 0, 0);
+ if (prog->interps)
+ nv50_ir_apply_fixups(prog->interps, prog->code,
+ prog->fp.force_persample_interp,
+ false /* flatshade */,
+ prog->fp.alphatest - 1);
nv50_sifc_linear_u8(&nv50->base, nv50->screen->code,
- (prog->type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base,
+ (prog_type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base,
NOUVEAU_BO_VRAM, prog->code_size, prog->code);
BEGIN_NV04(nv50->base.pushbuf, NV50_3D(CODE_CB_FLUSH), 1);
PUSH_DATA (nv50->base.pushbuf, 0);
- return TRUE;
+ return true;
}
void
FREE(p->code);
FREE(p->fixups);
-
+ FREE(p->interps);
FREE(p->so);
+ if (type == PIPE_SHADER_COMPUTE)
+ FREE(p->cp.syms);
+
memset(p, 0, sizeof(*p));
p->pipe = pipe;