#include "pipe/p_state.h"
#include "pipe/p_shader_tokens.h"
-#include "tgsi/util/tgsi_parse.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
#include "nv40_context.h"
#include "nv40_state.h"
/* TODO (at least...):
* 1. Indexed consts + ARL
- * 2. Arb. swz/negation
* 3. NV_vp11, NV_vp2, NV_vp3 features
* - extra arith opcodes
* - branching
#define neg(s) nv40_sr_neg((s))
#define abs(s) nv40_sr_abs((s))
+#define NV40_VP_INST_DEST_CLIP(n) ((~0 - 6) + (n))
+
struct nv40_vpc {
struct nv40_vertex_program *vp;
struct nv40_vertex_program_exec *vpi;
- unsigned output_map[PIPE_MAX_SHADER_OUTPUTS];
-
- int high_temp;
- int temp_temp_count;
+ unsigned r_temps;
+ unsigned r_temps_discard;
+ struct nv40_sreg r_result[PIPE_MAX_SHADER_OUTPUTS];
+ struct nv40_sreg *r_address;
+ struct nv40_sreg *r_temp;
struct nv40_sreg *imm;
unsigned nr_imm;
+
+ unsigned hpos_idx;
};
static struct nv40_sreg
temp(struct nv40_vpc *vpc)
{
- int idx;
+ int idx = ffs(~vpc->r_temps) - 1;
- idx = vpc->temp_temp_count++;
- idx += vpc->high_temp + 1;
+ if (idx < 0) {
+ NOUVEAU_ERR("out of temps!!\n");
+ assert(0);
+ return nv40_sr(NV40SR_TEMP, 0);
+ }
+
+ vpc->r_temps |= (1 << idx);
+ vpc->r_temps_discard |= (1 << idx);
return nv40_sr(NV40SR_TEMP, idx);
}
+static INLINE void
+release_temps(struct nv40_vpc *vpc)
+{
+ vpc->r_temps &= ~vpc->r_temps_discard;
+ vpc->r_temps_discard = 0;
+}
+
static struct nv40_sreg
constant(struct nv40_vpc *vpc, int pipe, float x, float y, float z, float w)
{
case NV40_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break;
case NV40_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break;
case NV40_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break;
+ case NV40_VP_INST_DEST_CLIP(0):
+ vp->or |= (1 << 6);
+ vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE0;
+ dst.index = NV40_VP_INST_DEST_FOGC;
+ break;
+ case NV40_VP_INST_DEST_CLIP(1):
+ vp->or |= (1 << 7);
+ vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE1;
+ dst.index = NV40_VP_INST_DEST_FOGC;
+ break;
+ case NV40_VP_INST_DEST_CLIP(2):
+ vp->or |= (1 << 8);
+ vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE2;
+ dst.index = NV40_VP_INST_DEST_FOGC;
+ break;
+ case NV40_VP_INST_DEST_CLIP(3):
+ vp->or |= (1 << 9);
+ vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE3;
+ dst.index = NV40_VP_INST_DEST_PSZ;
+ break;
+ case NV40_VP_INST_DEST_CLIP(4):
+ vp->or |= (1 << 10);
+ vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE4;
+ dst.index = NV40_VP_INST_DEST_PSZ;
+ break;
+ case NV40_VP_INST_DEST_CLIP(5):
+ vp->or |= (1 << 11);
+ vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE5;
+ dst.index = NV40_VP_INST_DEST_PSZ;
+ break;
default:
break;
}
src = vpc->imm[fsrc->SrcRegister.Index];
break;
case TGSI_FILE_TEMPORARY:
- if (vpc->high_temp < fsrc->SrcRegister.Index)
- vpc->high_temp = fsrc->SrcRegister.Index;
- src = nv40_sr(NV40SR_TEMP, fsrc->SrcRegister.Index);
+ src = vpc->r_temp[fsrc->SrcRegister.Index];
break;
default:
NOUVEAU_ERR("bad src file\n");
switch (fdst->DstRegister.File) {
case TGSI_FILE_OUTPUT:
- dst = nv40_sr(NV40SR_OUTPUT,
- vpc->output_map[fdst->DstRegister.Index]);
-
+ dst = vpc->r_result[fdst->DstRegister.Index];
break;
case TGSI_FILE_TEMPORARY:
- dst = nv40_sr(NV40SR_TEMP, fdst->DstRegister.Index);
- if (vpc->high_temp < dst.index)
- vpc->high_temp = dst.index;
+ dst = vpc->r_temp[fdst->DstRegister.Index];
+ break;
+ case TGSI_FILE_ADDRESS:
+ dst = vpc->r_address[fdst->DstRegister.Index];
break;
default:
NOUVEAU_ERR("bad dst file\n");
return mask;
}
+static boolean
+src_native_swz(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc,
+ struct nv40_sreg *src)
+{
+ const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0);
+ struct nv40_sreg tgsi = tgsi_src(vpc, fsrc);
+ uint mask = 0, zero_mask = 0, one_mask = 0, neg_mask = 0;
+ uint neg[4] = { fsrc->SrcRegisterExtSwz.NegateX,
+ fsrc->SrcRegisterExtSwz.NegateY,
+ fsrc->SrcRegisterExtSwz.NegateZ,
+ fsrc->SrcRegisterExtSwz.NegateW };
+ uint c;
+
+ for (c = 0; c < 4; c++) {
+ switch (tgsi_util_get_full_src_register_extswizzle(fsrc, c)) {
+ case TGSI_EXTSWIZZLE_X:
+ case TGSI_EXTSWIZZLE_Y:
+ case TGSI_EXTSWIZZLE_Z:
+ case TGSI_EXTSWIZZLE_W:
+ mask |= tgsi_mask(1 << c);
+ break;
+ case TGSI_EXTSWIZZLE_ZERO:
+ zero_mask |= tgsi_mask(1 << c);
+ tgsi.swz[c] = SWZ_X;
+ break;
+ case TGSI_EXTSWIZZLE_ONE:
+ one_mask |= tgsi_mask(1 << c);
+ tgsi.swz[c] = SWZ_X;
+ break;
+ default:
+ assert(0);
+ }
+
+ if (!tgsi.negate && neg[c])
+ neg_mask |= tgsi_mask(1 << c);
+ }
+
+ if (mask == MASK_ALL && !neg_mask)
+ return TRUE;
+
+ *src = temp(vpc);
+
+ if (mask)
+ arith(vpc, 0, OP_MOV, *src, mask, tgsi, none, none);
+
+ if (zero_mask)
+ arith(vpc, 0, OP_SFL, *src, zero_mask, *src, none, none);
+
+ if (one_mask)
+ arith(vpc, 0, OP_STR, *src, one_mask, *src, none, none);
+
+ if (neg_mask) {
+ struct nv40_sreg one = temp(vpc);
+ arith(vpc, 0, OP_STR, one, neg_mask, one, none, none);
+ arith(vpc, 0, OP_MUL, *src, neg_mask, *src, neg(one), none);
+ }
+
+ return FALSE;
+}
+
static boolean
nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
const struct tgsi_full_instruction *finst)
struct nv40_sreg src[3], dst, tmp;
struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0);
int mask;
- int ai = -1, ci = -1;
+ int ai = -1, ci = -1, ii = -1;
int i;
if (finst->Instruction.Opcode == TGSI_OPCODE_END)
return TRUE;
- vpc->temp_temp_count = 0;
for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
const struct tgsi_full_src_register *fsrc;
const struct tgsi_full_src_register *fsrc;
fsrc = &finst->FullSrcRegisters[i];
+
+ switch (fsrc->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ case TGSI_FILE_CONSTANT:
+ case TGSI_FILE_TEMPORARY:
+ if (!src_native_swz(vpc, fsrc, &src[i]))
+ continue;
+ break;
+ default:
+ break;
+ }
+
switch (fsrc->SrcRegister.File) {
case TGSI_FILE_INPUT:
if (ai == -1 || ai == fsrc->SrcRegister.Index) {
tgsi_src(vpc, fsrc), none, none);
}
break;
- /*XXX: index comparison is broken now that consts come from
- * two different register files.
- */
case TGSI_FILE_CONSTANT:
- case TGSI_FILE_IMMEDIATE:
- if (ci == -1 || ci == fsrc->SrcRegister.Index) {
+ if ((ci == -1 && ii == -1) ||
+ ci == fsrc->SrcRegister.Index) {
ci = fsrc->SrcRegister.Index;
src[i] = tgsi_src(vpc, fsrc);
} else {
tgsi_src(vpc, fsrc), none, none);
}
break;
+ case TGSI_FILE_IMMEDIATE:
+ if ((ci == -1 && ii == -1) ||
+ ii == fsrc->SrcRegister.Index) {
+ ii = fsrc->SrcRegister.Index;
+ src[i] = tgsi_src(vpc, fsrc);
+ } else {
+ src[i] = temp(vpc);
+ arith(vpc, 0, OP_MOV, src[i], MASK_ALL,
+ tgsi_src(vpc, fsrc), none, none);
+ }
+ break;
case TGSI_FILE_TEMPORARY:
/* handled above */
break;
case TGSI_OPCODE_RET:
break;
case TGSI_OPCODE_RSQ:
- arith(vpc, 1, OP_RSQ, dst, mask, none, none, src[0]);
+ arith(vpc, 1, OP_RSQ, dst, mask, none, none, abs(src[0]));
break;
case TGSI_OPCODE_SGE:
arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none);
return FALSE;
}
+ release_temps(vpc);
return TRUE;
}
nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc,
const struct tgsi_full_declaration *fdec)
{
+ unsigned idx = fdec->DeclarationRange.First;
int hw;
switch (fdec->Semantic.SemanticName) {
case TGSI_SEMANTIC_POSITION:
hw = NV40_VP_INST_DEST_POS;
+ vpc->hpos_idx = idx;
break;
case TGSI_SEMANTIC_COLOR:
if (fdec->Semantic.SemanticIndex == 0) {
return FALSE;
}
- vpc->output_map[fdec->u.DeclarationRange.First] = hw;
+ vpc->r_result[idx] = nv40_sr(NV40SR_OUTPUT, hw);
return TRUE;
}
nv40_vertprog_prepare(struct nv40_vpc *vpc)
{
struct tgsi_parse_context p;
- int nr_imm = 0;
+ int high_temp = -1, high_addr = -1, nr_imm = 0, i;
tgsi_parse_init(&p, vpc->vp->pipe.tokens);
while (!tgsi_parse_end_of_tokens(&p)) {
case TGSI_TOKEN_TYPE_IMMEDIATE:
nr_imm++;
break;
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ {
+ const struct tgsi_full_declaration *fdec;
+
+ fdec = &p.FullToken.FullDeclaration;
+ switch (fdec->Declaration.File) {
+ case TGSI_FILE_TEMPORARY:
+ if (fdec->DeclarationRange.Last > high_temp) {
+ high_temp =
+ fdec->DeclarationRange.Last;
+ }
+ break;
+#if 0 /* this would be nice.. except gallium doesn't track it */
+ case TGSI_FILE_ADDRESS:
+ if (fdec->DeclarationRange.Last > high_addr) {
+ high_addr =
+ fdec->DeclarationRange.Last;
+ }
+ break;
+#endif
+ case TGSI_FILE_OUTPUT:
+ if (!nv40_vertprog_parse_decl_output(vpc, fdec))
+ return FALSE;
+ break;
+ default:
+ break;
+ }
+ }
+ break;
+#if 1 /* yay, parse instructions looking for address regs instead */
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ {
+ const struct tgsi_full_instruction *finst;
+ const struct tgsi_full_dst_register *fdst;
+
+ finst = &p.FullToken.FullInstruction;
+ fdst = &finst->FullDstRegisters[0];
+
+ if (fdst->DstRegister.File == TGSI_FILE_ADDRESS) {
+ if (fdst->DstRegister.Index > high_addr)
+ high_addr = fdst->DstRegister.Index;
+ }
+
+ }
+ break;
+#endif
default:
break;
}
assert(vpc->imm);
}
+ if (++high_temp) {
+ vpc->r_temp = CALLOC(high_temp, sizeof(struct nv40_sreg));
+ for (i = 0; i < high_temp; i++)
+ vpc->r_temp[i] = temp(vpc);
+ }
+
+ if (++high_addr) {
+ vpc->r_address = CALLOC(high_addr, sizeof(struct nv40_sreg));
+ for (i = 0; i < high_addr; i++)
+ vpc->r_address[i] = temp(vpc);
+ }
+
+ vpc->r_temps_discard = 0;
return TRUE;
}
{
struct tgsi_parse_context parse;
struct nv40_vpc *vpc = NULL;
+ struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0);
+ int i;
vpc = CALLOC(1, sizeof(struct nv40_vpc));
if (!vpc)
return;
vpc->vp = vp;
- vpc->high_temp = -1;
if (!nv40_vertprog_prepare(vpc)) {
- free(vpc);
+ FREE(vpc);
return;
}
+ /* Redirect post-transform vertex position to a temp if user clip
+ * planes are enabled. We need to append code the the vtxprog
+ * to handle clip planes later.
+ */
+ if (vp->ucp.nr) {
+ vpc->r_result[vpc->hpos_idx] = temp(vpc);
+ vpc->r_temps_discard = 0;
+ }
+
tgsi_parse_init(&parse, vp->pipe.tokens);
while (!tgsi_parse_end_of_tokens(&parse)) {
tgsi_parse_token(&parse);
switch (parse.FullToken.Token.Type) {
- case TGSI_TOKEN_TYPE_DECLARATION:
- {
- const struct tgsi_full_declaration *fdec;
- fdec = &parse.FullToken.FullDeclaration;
- switch (fdec->Declaration.File) {
- case TGSI_FILE_OUTPUT:
- if (!nv40_vertprog_parse_decl_output(vpc, fdec))
- goto out_err;
- break;
- default:
- break;
- }
- }
- break;
case TGSI_TOKEN_TYPE_IMMEDIATE:
{
const struct tgsi_full_immediate *imm;
imm = &parse.FullToken.FullImmediate;
assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
-// assert(imm->Immediate.Size == 4);
+ assert(imm->Immediate.NrTokens == 4 + 1);
vpc->imm[vpc->nr_imm++] =
constant(vpc, -1,
imm->u.ImmediateFloat32[0].Float,
}
}
+ /* Write out HPOS if it was redirected to a temp earlier */
+ if (vpc->r_result[vpc->hpos_idx].type != NV40SR_OUTPUT) {
+ struct nv40_sreg hpos = nv40_sr(NV40SR_OUTPUT,
+ NV40_VP_INST_DEST_POS);
+ struct nv40_sreg htmp = vpc->r_result[vpc->hpos_idx];
+
+ arith(vpc, 0, OP_MOV, hpos, MASK_ALL, htmp, none, none);
+ }
+
+ /* Insert code to handle user clip planes */
+ for (i = 0; i < vp->ucp.nr; i++) {
+ struct nv40_sreg cdst = nv40_sr(NV40SR_OUTPUT,
+ NV40_VP_INST_DEST_CLIP(i));
+ struct nv40_sreg ceqn = constant(vpc, -1,
+ nv40->clip.ucp[i][0],
+ nv40->clip.ucp[i][1],
+ nv40->clip.ucp[i][2],
+ nv40->clip.ucp[i][3]);
+ struct nv40_sreg htmp = vpc->r_result[vpc->hpos_idx];
+ unsigned mask;
+
+ switch (i) {
+ case 0: case 3: mask = MASK_Y; break;
+ case 1: case 4: mask = MASK_Z; break;
+ case 2: case 5: mask = MASK_W; break;
+ default:
+ NOUVEAU_ERR("invalid clip dist #%d\n", i);
+ goto out_err;
+ }
+
+ arith(vpc, 0, OP_DP4, cdst, mask, htmp, ceqn, none);
+ }
+
vp->insns[vp->nr_insns - 1].data[3] |= NV40_VP_INST_LAST;
vp->translated = TRUE;
out_err:
tgsi_parse_free(&parse);
- free(vpc);
+ if (vpc->r_temp)
+ FREE(vpc->r_temp);
+ if (vpc->r_address)
+ FREE(vpc->r_address);
+ if (vpc->imm)
+ FREE(vpc->imm);
+ FREE(vpc);
}
static boolean
nv40_vertprog_validate(struct nv40_context *nv40)
{
- struct nouveau_winsys *nvws = nv40->nvws;
struct pipe_winsys *ws = nv40->pipe.winsys;
+ struct nouveau_grobj *curie = nv40->screen->curie;
struct nv40_vertex_program *vp;
struct pipe_buffer *constbuf;
boolean upload_code = FALSE, upload_data = FALSE;
if (nv40->render_mode == HW) {
vp = nv40->vertprog;
constbuf = nv40->constbuf[PIPE_SHADER_VERTEX];
+
+ if ((nv40->dirty & NV40_NEW_UCP) ||
+ memcmp(&nv40->clip, &vp->ucp, sizeof(vp->ucp))) {
+ nv40_vertprog_destroy(nv40, vp);
+ memcpy(&vp->ucp, &nv40->clip, sizeof(vp->ucp));
+ }
} else {
vp = nv40->swtnl.vertprog;
constbuf = NULL;
struct nouveau_stateobj *so;
uint vplen = vp->nr_insns;
- if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) {
+ if (nouveau_resource_alloc(heap, vplen, vp, &vp->exec)) {
while (heap->next && heap->size < vplen) {
struct nv40_vertex_program *evict;
evict = heap->next->priv;
- nvws->res_free(&evict->exec);
+ nouveau_resource_free(&evict->exec);
}
- if (nvws->res_alloc(heap, vplen, vp, &vp->exec))
+ if (nouveau_resource_alloc(heap, vplen, vp, &vp->exec))
assert(0);
}
- so = so_new(5, 0);
- so_method(so, nv40->screen->curie, NV40TCL_VP_START_FROM_ID, 1);
+ so = so_new(7, 0);
+ so_method(so, curie, NV40TCL_VP_START_FROM_ID, 1);
so_data (so, vp->exec->start);
- so_method(so, nv40->screen->curie, NV40TCL_VP_ATTRIB_EN, 2);
+ so_method(so, curie, NV40TCL_VP_ATTRIB_EN, 2);
so_data (so, vp->ir);
so_data (so, vp->or);
+ so_method(so, curie, NV40TCL_CLIP_PLANE_ENABLE, 1);
+ so_data (so, vp->clip_ctrl);
so_ref(so, &vp->so);
+ so_ref(NULL, &so);
upload_code = TRUE;
}
if (vp->nr_consts && !vp->data) {
struct nouveau_resource *heap = nv40->screen->vp_data_heap;
- if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) {
+ if (nouveau_resource_alloc(heap, vp->nr_consts, vp, &vp->data)) {
while (heap->next && heap->size < vp->nr_consts) {
struct nv40_vertex_program *evict;
evict = heap->next->priv;
- nvws->res_free(&evict->data);
+ nouveau_resource_free(&evict->data);
}
- if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data))
+ if (nouveau_resource_alloc(heap, vp->nr_consts, vp, &vp->data))
assert(0);
}
void
nv40_vertprog_destroy(struct nv40_context *nv40, struct nv40_vertex_program *vp)
{
- if (vp->nr_consts)
- free(vp->consts);
- if (vp->nr_insns)
- free(vp->insns);
+ vp->translated = FALSE;
+
+ if (vp->nr_insns) {
+ FREE(vp->insns);
+ vp->insns = NULL;
+ vp->nr_insns = 0;
+ }
+
+ if (vp->nr_consts) {
+ FREE(vp->consts);
+ vp->consts = NULL;
+ vp->nr_consts = 0;
+ }
+
+ nouveau_resource_free(&vp->exec);
+ vp->exec_start = 0;
+ nouveau_resource_free(&vp->data);
+ vp->data_start = 0;
+ vp->data_start_min = 0;
+
+ vp->ir = vp->or = vp->clip_ctrl = 0;
+ so_ref(NULL, &vp->so);
}
struct nv40_state_entry nv40_state_vertprog = {
.validate = nv40_vertprog_validate,
.dirty = {
- .pipe = NV40_NEW_VERTPROG,
+ .pipe = NV40_NEW_VERTPROG | NV40_NEW_UCP,
.hw = NV40_STATE_VERTPROG,
}
};