much..
nouveau_screen.c \
nouveau_span.c \
nouveau_state.c \
+ nouveau_shader.c \
+ nouveau_shader_0_arb.c \
+ nouveau_shader_1.c \
+ nouveau_shader_2.c \
nouveau_tex.c \
nouveau_swtcl.c \
nv10_swtcl.c \
nv10_state.c \
nv20_state.c \
nv30_state.c \
- nouveau_state_cache.c
+ nouveau_state_cache.c \
+ nv20_vertprog.c \
+ nv30_fragprog.c \
+ nv30_vertprog.c \
+ nv40_fragprog.c \
+ nv40_vertprog.c
C_SOURCES = \
$(COMMON_SOURCES) \
#include "tnl/tnl.h"
#include "tnl/t_pipeline.h"
+#include "tnl/t_vp_build.h"
#include "drivers/common/driverfuncs.h"
nmesa->current_primitive = -1;
+ nouveauShaderInitFuncs(ctx);
+ /* Install Mesa's fixed-function shader support */
+ if (nmesa->screen->card->type >= NV_40) {
+ ctx->_MaintainTnlProgram = GL_TRUE;
+ ctx->_MaintainTexEnvProgram = GL_TRUE;
+ }
+
/* Initialize the swrast */
_swrast_CreateContext( ctx );
_ac_CreateContext( ctx );
#include "nouveau_screen.h"
#include "nouveau_state_cache.h"
+#include "nouveau_shader.h"
#include "xmlconfig.h"
GLenum current_primitive; /* the current primitive enum */
DECLARE_RENDERINPUTS(render_inputs_bitset); /* the current render inputs */
+ /* Shader state */
+ nvsFunc VPfunc;
+ nvsFunc FPfunc;
+ nouveauShader *current_fragprog;
+ nouveauShader *current_vertprog;
+
nouveauScreenRec *screen;
drm_nouveau_sarea_t *sarea;
#include "nouveau_ctrlreg.h"
//#define NOUVEAU_RING_DEBUG
+//#define NOUVEAU_STATE_CACHE_DISABLE
#define NV_READ(reg) *(volatile u_int32_t *)(nmesa->mmio + (reg))
}while(0)
#define OUT_RING(n) do { \
- printf("OUT_RINGn: 0x%08x\n", n); \
+ printf("OUT_RINGn: 0x%08x (%s)\n", n, __func__); \
}while(0)
#define OUT_RINGf(n) do { \
- printf("OUT_RINGf: 0x%08x\n", n); \
+ printf("OUT_RINGf: %.04f (%s)\n", n, __func__); \
}while(0)
#else
#endif
+#define BEGIN_RING_SIZE(subchannel,tag,size) do { \
+ nouveau_state_cache_flush(nmesa); \
+ if (nmesa->fifo.free <= (size)) \
+ WAIT_RING(nmesa,(size)); \
+ OUT_RING( ((size)<<18) | ((subchannel) << 13) | (tag)); \
+ nmesa->fifo.free -= ((size) + 1); \
+}while(0)
+
extern void WAIT_RING(nouveauContextPtr nmesa,u_int32_t size);
extern void nouveau_state_cache_flush(nouveauContextPtr nmesa);
extern void nouveau_state_cache_init(nouveauContextPtr nmesa);
+#ifdef NOUVEAU_STATE_CACHE_DISABLE
+#define BEGIN_RING_CACHE(subc,tag,size) BEGIN_RING_SIZE((subc), (tag), (size))
+#define OUT_RING_CACHE(n) OUT_RING((n))
+#define OUT_RING_CACHEf(n) OUT_RINGf((n))
+#define OUT_RING_CACHEp(ptr, sz) OUT_RINGp((ptr), (sz))
+#else
#define BEGIN_RING_CACHE(subchannel,tag,size) do { \
nmesa->state_cache.dirty=1; \
nmesa->state_cache.current_pos=((tag)/4); \
uint32_t* p=(uint32_t*)(ptr); \
int i; for(i=0;i<sz;i++) OUT_RING_CACHE(*(p+i)); \
}while(0)
-
-#define BEGIN_RING_SIZE(subchannel,tag,size) do { \
- nouveau_state_cache_flush(nmesa); \
- if (nmesa->fifo.free <= (size)) \
- WAIT_RING(nmesa,(size)); \
- OUT_RING( ((size)<<18) | ((subchannel) << 13) | (tag)); \
- nmesa->fifo.free -= ((size) + 1); \
-}while(0)
+#endif
#define RING_AVAILABLE() (nmesa->fifo.free-1)
nouveauCreateContextObject(nmesa, Nv3D, nmesa->screen->card->class_3d, 0, 0, 0, 0);
nouveauObjectOnSubchannel(nmesa, NvSub3D, Nv3D);
/* We need to know vram size.. */
-#if 0
+#if 0
nouveauCreateDmaObject( nmesa, NvDmaFB,
0, (256*1024*1024),
0 /*NV_DMA_TARGET_FB*/, 0 /*NV_DMA_ACCESS_RW*/);
--- /dev/null
+/*
+ * Copyright (C) 2006 Ben Skeggs.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/*
+ * Authors:
+ * Ben Skeggs <darktama@iinet.net.au>
+ */
+
+#include "glheader.h"
+#include "macros.h"
+#include "enums.h"
+#include "extensions.h"
+
+#include "program.h"
+#include "tnl/tnl.h"
+
+#include "nouveau_context.h"
+#include "nouveau_shader.h"
+
+/*****************************************************************************
+ * Mesa entry points
+ */
+static void
+nouveauBindProgram(GLcontext *ctx, GLenum target, struct gl_program *prog)
+{
+}
+
+static struct gl_program *
+nouveauNewProgram(GLcontext *ctx, GLenum target, GLuint id)
+{
+ nouveauShader *nvs;
+
+ nvs = CALLOC_STRUCT(_nouveauShader);
+ switch (target) {
+ case GL_VERTEX_PROGRAM_ARB:
+ return _mesa_init_vertex_program(ctx, &nvs->mesa.vp, target, id);
+ case GL_FRAGMENT_PROGRAM_ARB:
+ return _mesa_init_fragment_program(ctx, &nvs->mesa.fp, target, id);
+ default:
+ _mesa_problem(ctx, "Unsupported shader target");
+ break;
+ }
+
+ FREE(nvs);
+ return NULL;
+}
+
+static void
+nouveauDeleteProgram(GLcontext *ctx, struct gl_program *prog)
+{
+ nouveauShader *nvs = (nouveauShader *)prog;
+
+ if (nvs->translated)
+ FREE(nvs->program);
+ _mesa_delete_program(ctx, prog);
+}
+
+static void
+nouveauProgramStringNotify(GLcontext *ctx, GLenum target,
+ struct gl_program *prog)
+{
+ nouveauShader *nvs = (nouveauShader *)prog;
+
+ if (nvs->translated)
+ FREE(nvs->program);
+ nvs->translated = 0;
+
+ _tnl_program_string(ctx, target, prog);
+}
+
+static GLboolean
+nouveauIsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog)
+{
+ nouveauShader *nvs = (nouveauShader *)prog;
+
+ return nvs->translated;
+}
+
+GLboolean
+nvsUpdateShader(GLcontext *ctx, nouveauShader *nvs)
+{
+ nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx);
+ struct gl_program_parameter_list *plist;
+ int i;
+
+ /* Translate to HW format now if necessary */
+ if (!nvs->translated) {
+ /* Mesa ASM shader -> nouveauShader */
+ if (!nouveau_shader_pass0_arb(ctx, nvs))
+ return GL_FALSE;
+ /* Basic dead code elimination + register usage info */
+ if (!nouveau_shader_pass1(nvs))
+ return GL_FALSE;
+ /* nouveauShader -> HW bytecode, HW register alloc */
+ if (!nouveau_shader_pass2(nvs))
+ return GL_FALSE;
+ assert(nvs->translated);
+ assert(nvs->program);
+ }
+
+ /* Update state parameters */
+ plist = nvs->mesa.vp.Base.Parameters;
+ _mesa_load_state_parameters(ctx, plist);
+ for (i=0; i<plist->NumParameters; i++) {
+ if (!nvs->on_hardware) {
+ /* if we've been kicked off the hardware there's no guarantee our
+ * consts are still there.. reupload them all
+ */
+ nvs->func->UpdateConst(ctx, nvs, i);
+ } else if (plist->Parameters[i].Type == PROGRAM_STATE_VAR) {
+ /* update any changed state parameters */
+ if (!TEST_EQ_4V(nvs->params[i].val, nvs->params[i].source_val))
+ nvs->func->UpdateConst(ctx, nvs, i);
+ }
+ }
+
+ /* Upload program to hardware, this must come after state param update
+ * as >=NV30 fragprogs inline consts into the bytecode.
+ */
+ if (!nvs->on_hardware) {
+ nouveauShader **current;
+
+ if (nvs->mesa.vp.Base.Target == GL_VERTEX_PROGRAM_ARB)
+ current = &nmesa->current_vertprog;
+ else
+ current = &nmesa->current_fragprog;
+ if (*current) (*current)->on_hardware = 0;
+
+ nvs->func->UploadToHW(ctx, nvs);
+ nvs->on_hardware = 1;
+
+ *current = nvs;
+ }
+
+ return GL_TRUE;
+}
+
+void
+nouveauShaderInitFuncs(GLcontext * ctx)
+{
+ nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx);
+
+ switch (nmesa->screen->card->type) {
+ case NV_20:
+ NV20VPInitShaderFuncs(&nmesa->VPfunc);
+ break;
+ case NV_30:
+ NV30VPInitShaderFuncs(&nmesa->VPfunc);
+ NV30FPInitShaderFuncs(&nmesa->FPfunc);
+ break;
+ case NV_40:
+ case G_70:
+ NV40VPInitShaderFuncs(&nmesa->VPfunc);
+ NV40FPInitShaderFuncs(&nmesa->FPfunc);
+ break;
+ default:
+ return;
+ }
+
+ _mesa_enable_extension(ctx, "GL_ARB_vertex_program");
+ ctx->Const.VertexProgram.MaxNativeInstructions = nmesa->VPfunc.MaxInst;
+ ctx->Const.VertexProgram.MaxNativeAluInstructions = nmesa->VPfunc.MaxInst;
+ ctx->Const.VertexProgram.MaxNativeTexInstructions = nmesa->VPfunc.MaxInst;
+ ctx->Const.VertexProgram.MaxNativeTexIndirections =
+ ctx->Const.VertexProgram.MaxNativeTexInstructions;
+ ctx->Const.VertexProgram.MaxNativeAttribs = nmesa->VPfunc.MaxAttrib;
+ ctx->Const.VertexProgram.MaxNativeTemps = nmesa->VPfunc.MaxTemp;
+ ctx->Const.VertexProgram.MaxNativeAddressRegs = nmesa->VPfunc.MaxAddress;
+ ctx->Const.VertexProgram.MaxNativeParameters = nmesa->VPfunc.MaxConst;
+
+ if (nmesa->screen->card->type >= NV_30) {
+ _mesa_enable_extension(ctx, "GL_ARB_fragment_program");
+
+ ctx->Const.FragmentProgram.MaxNativeInstructions = nmesa->FPfunc.MaxInst;
+ ctx->Const.FragmentProgram.MaxNativeAluInstructions = nmesa->FPfunc.MaxInst;
+ ctx->Const.FragmentProgram.MaxNativeTexInstructions = nmesa->FPfunc.MaxInst;
+ ctx->Const.FragmentProgram.MaxNativeTexIndirections =
+ ctx->Const.FragmentProgram.MaxNativeTexInstructions;
+ ctx->Const.FragmentProgram.MaxNativeAttribs = nmesa->FPfunc.MaxAttrib;
+ ctx->Const.FragmentProgram.MaxNativeTemps = nmesa->FPfunc.MaxTemp;
+ ctx->Const.FragmentProgram.MaxNativeAddressRegs = nmesa->FPfunc.MaxAddress;
+ ctx->Const.FragmentProgram.MaxNativeParameters = nmesa->FPfunc.MaxConst;
+ }
+
+ ctx->Driver.NewProgram = nouveauNewProgram;
+ ctx->Driver.BindProgram = nouveauBindProgram;
+ ctx->Driver.DeleteProgram = nouveauDeleteProgram;
+ ctx->Driver.ProgramStringNotify = nouveauProgramStringNotify;
+ ctx->Driver.IsProgramNative = nouveauIsProgramNative;
+}
+
+
+/*****************************************************************************
+ * Disassembly support structs
+ */
+#define CHECK_RANGE(idx, arr) ((idx)<sizeof(_##arr)/sizeof(const char *)) \
+ ? _##arr[(idx)] : #arr"_OOB"
+
+#define NODS (1<<0)
+#define BRANCH_TR (1<<1)
+#define BRANCH_EL (1<<2)
+#define BRANCH_EN (1<<3)
+#define BRANCH_RE (1<<4)
+#define BRANCH_ALL (BRANCH_TR|BRANCH_EL|BRANCH_EN)
+#define COUNT_INC (1<<4)
+#define COUNT_IND (1<<5)
+#define COUNT_NUM (1<<6)
+#define COUNT_ALL (COUNT_INC|COUNT_IND|COUNT_NUM)
+#define TI_UNIT (1<<7)
+struct _opcode_info
+{
+ const char *name;
+ int numsrc;
+ int flags;
+};
+
+static struct _opcode_info ops[] = {
+ [NVS_OP_ABS] = {"ABS", 1, 0},
+ [NVS_OP_ADD] = {"ADD", 2, 0},
+ [NVS_OP_ARA] = {"ARA", 1, 0},
+ [NVS_OP_ARL] = {"ARL", 1, 0},
+ [NVS_OP_ARR] = {"ARR", 1, 0},
+ [NVS_OP_BRA] = {"BRA", 0, NODS | BRANCH_TR},
+ [NVS_OP_BRK] = {"BRK", 0, NODS},
+ [NVS_OP_CAL] = {"CAL", 0, NODS | BRANCH_TR},
+ [NVS_OP_CMP] = {"CMP", 2, 0},
+ [NVS_OP_COS] = {"COS", 1, 0},
+ [NVS_OP_DIV] = {"DIV", 2, 0},
+ [NVS_OP_DDX] = {"DDX", 1, 0},
+ [NVS_OP_DDY] = {"DDY", 1, 0},
+ [NVS_OP_DP2] = {"DP2", 2, 0},
+ [NVS_OP_DP2A] = {"DP2A", 3, 0},
+ [NVS_OP_DP3] = {"DP3", 2, 0},
+ [NVS_OP_DP4] = {"DP4", 2, 0},
+ [NVS_OP_DPH] = {"DPH", 2, 0},
+ [NVS_OP_DST] = {"DST", 2, 0},
+ [NVS_OP_EX2] = {"EX2", 1, 0},
+ [NVS_OP_EXP] = {"EXP", 1, 0},
+ [NVS_OP_FLR] = {"FLR", 1, 0},
+ [NVS_OP_FRC] = {"FRC", 1, 0},
+ [NVS_OP_IF] = {"IF", 0, NODS | BRANCH_EL | BRANCH_EN},
+ [NVS_OP_KIL] = {"KIL", 1, 0},
+ [NVS_OP_LG2] = {"LG2", 1, 0},
+ [NVS_OP_LIT] = {"LIT", 1, 0},
+ [NVS_OP_LOG] = {"LOG", 1, 0},
+ [NVS_OP_LOOP] = {"LOOP", 0, NODS | COUNT_ALL | BRANCH_EN},
+ [NVS_OP_LRP] = {"LRP", 3, 0},
+ [NVS_OP_MAD] = {"MAD", 3, 0},
+ [NVS_OP_MAX] = {"MAX", 2, 0},
+ [NVS_OP_MIN] = {"MIN", 2, 0},
+ [NVS_OP_MOV] = {"MOV", 1, 0},
+ [NVS_OP_MUL] = {"MUL", 2, 0},
+ [NVS_OP_NRM] = {"NRM", 1, 0},
+ [NVS_OP_PK2H] = {"PK2H", 1, 0},
+ [NVS_OP_PK2US] = {"PK2US", 1, 0},
+ [NVS_OP_PK4B] = {"PK4B", 1, 0},
+ [NVS_OP_PK4UB] = {"PK4UB", 1, 0},
+ [NVS_OP_POW] = {"POW", 2, 0},
+ [NVS_OP_POPA] = {"POPA", 0, 0},
+ [NVS_OP_PUSHA] = {"PUSHA", 1, NODS},
+ [NVS_OP_RCC] = {"RCC", 1, 0},
+ [NVS_OP_RCP] = {"RCP", 1, 0},
+ [NVS_OP_REP] = {"REP", 0, NODS | BRANCH_EN | COUNT_NUM},
+ [NVS_OP_RET] = {"RET", 0, NODS},
+ [NVS_OP_RFL] = {"RFL", 1, 0},
+ [NVS_OP_RSQ] = {"RSQ", 1, 0},
+ [NVS_OP_SCS] = {"SCS", 1, 0},
+ [NVS_OP_SEQ] = {"SEQ", 2, 0},
+ [NVS_OP_SFL] = {"SFL", 2, 0},
+ [NVS_OP_SGE] = {"SGE", 2, 0},
+ [NVS_OP_SGT] = {"SGT", 2, 0},
+ [NVS_OP_SIN] = {"SIN", 1, 0},
+ [NVS_OP_SLE] = {"SLE", 2, 0},
+ [NVS_OP_SLT] = {"SLT", 2, 0},
+ [NVS_OP_SNE] = {"SNE", 2, 0},
+ [NVS_OP_SSG] = {"SSG", 1, 0},
+ [NVS_OP_STR] = {"STR", 2, 0},
+ [NVS_OP_SUB] = {"SUB", 2, 0},
+ [NVS_OP_TEX] = {"TEX", 1, TI_UNIT},
+ [NVS_OP_TXB] = {"TXB", 1, TI_UNIT},
+ [NVS_OP_TXD] = {"TXD", 3, TI_UNIT},
+ [NVS_OP_TXL] = {"TXL", 1, TI_UNIT},
+ [NVS_OP_TXP] = {"TXP", 1, TI_UNIT},
+ [NVS_OP_UP2H] = {"UP2H", 1, 0},
+ [NVS_OP_UP2US] = {"UP2US", 1, 0},
+ [NVS_OP_UP4B] = {"UP4B", 1, 0},
+ [NVS_OP_UP4UB] = {"UP4UB", 1, 0},
+ [NVS_OP_X2D] = {"X2D", 3, 0},
+ [NVS_OP_XPD] = {"XPD", 2, 0},
+ [NVS_OP_NOP] = {"NOP", 0, NODS},
+};
+
+static struct _opcode_info *
+_get_op_info(int op)
+{
+ if (op >= (sizeof(ops) / sizeof(struct _opcode_info)))
+ return NULL;
+ if (ops[op].name == NULL)
+ return NULL;
+ return &ops[op];
+}
+
+static const char *_SFR_STRING[] = {
+ [NVS_FR_POSITION] = "position",
+ [NVS_FR_WEIGHT] = "weight",
+ [NVS_FR_NORMAL] = "normal",
+ [NVS_FR_COL0] = "color",
+ [NVS_FR_COL1] = "color.secondary",
+ [NVS_FR_BFC0] = "bfc",
+ [NVS_FR_BFC1] = "bfc.secondary",
+ [NVS_FR_FOGCOORD] = "fogcoord",
+ [NVS_FR_POINTSZ] = "pointsize",
+ [NVS_FR_TEXCOORD0] = "texcoord[0]",
+ [NVS_FR_TEXCOORD1] = "texcoord[1]",
+ [NVS_FR_TEXCOORD2] = "texcoord[2]",
+ [NVS_FR_TEXCOORD3] = "texcoord[3]",
+ [NVS_FR_TEXCOORD4] = "texcoord[4]",
+ [NVS_FR_TEXCOORD5] = "texcoord[5]",
+ [NVS_FR_TEXCOORD6] = "texcoord[6]",
+ [NVS_FR_TEXCOORD7] = "texcoord[7]",
+ [NVS_FR_FRAGDATA0] = "data[0]",
+ [NVS_FR_FRAGDATA1] = "data[1]",
+ [NVS_FR_FRAGDATA2] = "data[2]",
+ [NVS_FR_FRAGDATA3] = "data[3]",
+ [NVS_FR_CLIP0] = "clip_plane[0]",
+ [NVS_FR_CLIP1] = "clip_plane[1]",
+ [NVS_FR_CLIP2] = "clip_plane[2]",
+ [NVS_FR_CLIP3] = "clip_plane[3]",
+ [NVS_FR_CLIP4] = "clip_plane[4]",
+ [NVS_FR_CLIP5] = "clip_plane[5]",
+ [NVS_FR_CLIP6] = "clip_plane[6]",
+ [NVS_FR_FACING] = "facing",
+};
+
+#define SFR_STRING(idx) CHECK_RANGE((idx), SFR_STRING)
+
+static const char *_SWZ_STRING[] = {
+ [NVS_SWZ_X] = "x",
+ [NVS_SWZ_Y] = "y",
+ [NVS_SWZ_Z] = "z",
+ [NVS_SWZ_W] = "w"
+};
+
+#define SWZ_STRING(idx) CHECK_RANGE((idx), SWZ_STRING)
+
+static const char *_NVS_PREC_STRING[] = {
+ [NVS_PREC_FLOAT32] = "R",
+ [NVS_PREC_FLOAT16] = "H",
+ [NVS_PREC_FIXED12] = "X",
+ [NVS_PREC_UNKNOWN] = "?"
+};
+
+#define NVS_PREC_STRING(idx) CHECK_RANGE((idx), NVS_PREC_STRING)
+
+static const char *_NVS_COND_STRING[] = {
+ [NVS_COND_FL] = "FL",
+ [NVS_COND_LT] = "LT",
+ [NVS_COND_EQ] = "EQ",
+ [NVS_COND_LE] = "LE",
+ [NVS_COND_GT] = "GT",
+ [NVS_COND_NE] = "NE",
+ [NVS_COND_GE] = "GE",
+ [NVS_COND_TR] = "TR",
+ [NVS_COND_UNKNOWN] = "??"
+};
+
+#define NVS_COND_STRING(idx) CHECK_RANGE((idx), NVS_COND_STRING)
+
+/*****************************************************************************
+ * ShaderFragment dumping
+ */
+static void
+nvsDumpIndent(int lvl)
+{
+ while (lvl--)
+ printf(" ");
+}
+
+static void
+nvsDumpSwizzle(nvsSwzComp *swz)
+{
+ printf(".%s%s%s%s",
+ SWZ_STRING(swz[0]),
+ SWZ_STRING(swz[1]), SWZ_STRING(swz[2]), SWZ_STRING(swz[3])
+ );
+}
+
+static void
+nvsDumpReg(nvsInstruction * inst, nvsRegister * reg)
+{
+ if (reg->negate)
+ printf("-");
+ if (reg->abs)
+ printf("abs(");
+
+ switch (reg->file) {
+ case NVS_FILE_TEMP:
+ printf("R%d", reg->index);
+ nvsDumpSwizzle(reg->swizzle);
+ break;
+ case NVS_FILE_ATTRIB:
+ printf("attrib.%s", SFR_STRING(reg->index));
+ nvsDumpSwizzle(reg->swizzle);
+ break;
+ case NVS_FILE_ADDRESS:
+ printf("A%d", reg->index);
+ break;
+ case NVS_FILE_CONST:
+ if (reg->indexed)
+ printf("const[A%d.%s + %d]",
+ reg->addr_reg, SWZ_STRING(reg->addr_comp), reg->index);
+ else
+ printf("const[%d]", reg->index);
+ nvsDumpSwizzle(reg->swizzle);
+ break;
+ default:
+ printf("UNKNOWN_FILE");
+ break;
+ }
+
+ if (reg->abs)
+ printf(")");
+}
+
+void
+nvsDumpInstruction(nvsInstruction * inst, int slot, int lvl)
+{
+ struct _opcode_info *opr = &ops[inst->op];
+ int i;
+
+ nvsDumpIndent(lvl);
+ printf("%s ", opr->name);
+
+ if (!opr->flags & NODS) {
+ switch (inst->dest.file) {
+ case NVS_FILE_RESULT:
+ printf("result.%s", SFR_STRING(inst->dest.index));
+ break;
+ case NVS_FILE_TEMP:
+ printf("R%d", inst->dest.index);
+ break;
+ case NVS_FILE_ADDRESS:
+ printf("A%d", inst->dest.index);
+ break;
+ default:
+ printf("UNKNOWN_DST_FILE");
+ break;
+ }
+
+ if (inst->mask != SMASK_ALL) {
+ printf(".");
+ if (inst->mask & SMASK_X)
+ printf("x");
+ if (inst->mask & SMASK_Y)
+ printf("y");
+ if (inst->mask & SMASK_Z)
+ printf("z");
+ if (inst->mask & SMASK_W)
+ printf("w");
+ }
+
+ if (opr->numsrc)
+ printf(", ");
+ }
+
+ for (i = 0; i < opr->numsrc; i++) {
+ nvsDumpReg(inst, &inst->src[i]);
+ if (i != opr->numsrc - 1)
+ printf(", ");
+ }
+ if (opr->flags & TI_UNIT)
+ printf(", texture[%d]", inst->tex_unit);
+
+ printf("\n");
+}
+
+void
+nvsDumpFragmentList(nvsFragmentList *f, int lvl)
+{
+ while (f) {
+ switch (f->fragment->type) {
+ case NVS_INSTRUCTION:
+ nvsDumpInstruction((nvsInstruction*)f->fragment, 0, lvl);
+ break;
+ default:
+ fprintf(stderr, "%s: Only NVS_INSTRUCTION fragments can be in"
+ "nvsFragmentList!\n", __func__);
+ return;
+ }
+ f = f->next;
+ }
+}
+
+/*****************************************************************************
+ * HW shader disassembly
+ */
+static void
+nvsDisasmHWShaderOp(nvsFunc * shader, int merged)
+{
+ struct _opcode_info *opi;
+ nvsOpcode op;
+ nvsRegFile file;
+ nvsSwzComp swz[4];
+ int i;
+
+ op = shader->GetOpcode(shader, merged);
+ opi = _get_op_info(op);
+ if (!opi) {
+ printf("NO OPINFO!");
+ return;
+ }
+
+ printf("%s", opi->name);
+ if (shader->GetPrecision &&
+ (!(opi->flags & BRANCH_ALL)) && (!(opi->flags * NODS)) &&
+ (op != NVS_OP_NOP))
+ printf("%s", NVS_PREC_STRING(shader->GetPrecision(shader)));
+ if (shader->SupportsConditional && shader->SupportsConditional(shader)) {
+ if (shader->GetConditionUpdate(shader)) {
+ printf("C%d", shader->GetCondRegID(shader));
+ }
+ }
+ if (shader->GetSaturate && shader->GetSaturate(shader))
+ printf("_SAT");
+
+ if (!(opi->flags & NODS)) {
+ int mask = shader->GetDestMask(shader, merged);
+
+ switch (shader->GetDestFile(shader, merged)) {
+ case NVS_FILE_ADDRESS:
+ printf(" A%d", shader->GetDestID(shader, merged));
+ break;
+ case NVS_FILE_TEMP:
+ printf(" R%d", shader->GetDestID(shader, merged));
+ break;
+ case NVS_FILE_RESULT:
+ printf(" result.%s", (SFR_STRING(shader->GetDestID(shader, merged))));
+ break;
+ default:
+ printf(" BAD_RESULT_FILE");
+ break;
+ }
+
+ if (mask != SMASK_ALL) {
+ printf(".");
+ if (mask & SMASK_X) printf("x");
+ if (mask & SMASK_Y) printf("y");
+ if (mask & SMASK_Z) printf("z");
+ if (mask & SMASK_W) printf("w");
+ }
+ }
+
+ if (shader->SupportsConditional && shader->SupportsConditional(shader) &&
+ shader->GetConditionTest(shader)) {
+ shader->GetCondRegSwizzle(shader, swz);
+
+ printf(" (%s%d.%s%s%s%s)",
+ NVS_COND_STRING(shader->GetCondition(shader)),
+ shader->GetCondRegID(shader),
+ SWZ_STRING(swz[NVS_SWZ_X]),
+ SWZ_STRING(swz[NVS_SWZ_Y]),
+ SWZ_STRING(swz[NVS_SWZ_Z]),
+ SWZ_STRING(swz[NVS_SWZ_W])
+ );
+ }
+
+ /* looping */
+ if (opi->flags & COUNT_ALL) {
+ printf(" { ");
+ if (opi->flags & COUNT_NUM) {
+ printf("%d", shader->GetLoopCount(shader));
+ }
+ if (opi->flags & COUNT_IND) {
+ printf(", %d", shader->GetLoopInitial(shader));
+ }
+ if (opi->flags & COUNT_INC) {
+ printf(", %d", shader->GetLoopIncrement(shader));
+ }
+ printf(" }");
+ }
+
+ /* branching */
+ if (opi->flags & BRANCH_TR)
+ printf(" %d", shader->GetBranch(shader));
+ if (opi->flags & BRANCH_EL)
+ printf(" ELSE %d", shader->GetBranchElse(shader));
+ if (opi->flags & BRANCH_EN)
+ printf(" END %d", shader->GetBranchEnd(shader));
+
+ if (!(opi->flags & NODS) && opi->numsrc)
+ printf(",");
+ printf(" ");
+
+ for (i = 0; i < opi->numsrc; i++) {
+ if (shader->GetSourceAbs(shader, merged, i))
+ printf("abs(");
+ if (shader->GetSourceNegate(shader, merged, i))
+ printf("-");
+
+ file = shader->GetSourceFile(shader, merged, i);
+ switch (file) {
+ case NVS_FILE_TEMP:
+ printf("R%d", shader->GetSourceID(shader, merged, i));
+ break;
+ case NVS_FILE_CONST:
+ if (shader->GetSourceIndexed(shader, merged, i)) {
+ printf("c[A%d.%s + 0x%x]",
+ shader->GetRelAddressRegID(shader),
+ SWZ_STRING(shader->GetRelAddressSwizzle(shader)),
+ shader->GetSourceID(shader, merged, i)
+ );
+ } else {
+ float val[4];
+
+ if (shader->GetSourceConstVal) {
+ shader->GetSourceConstVal(shader, merged, i, val);
+ printf("{ %.02f, %.02f, %.02f, %.02f }",
+ val[0], val[1], val[2], val[3]);
+ } else {
+ printf("c[0x%x]", shader->GetSourceID(shader, merged, i));
+ }
+ }
+ break;
+ case NVS_FILE_ATTRIB:
+ if (shader->GetSourceIndexed(shader, merged, i)) {
+ printf("attrib[A%d.%s + %d]",
+ shader->GetRelAddressRegID(shader),
+ SWZ_STRING(shader->GetRelAddressSwizzle(shader)),
+ shader->GetSourceID(shader, merged, i)
+ );
+ }
+ else {
+ printf("attrib.%s",
+ SFR_STRING(shader->GetSourceID(shader, merged, i))
+ );
+ }
+ break;
+ case NVS_FILE_ADDRESS:
+ printf("A%d", shader->GetRelAddressRegID(shader));
+ break;
+ default:
+ printf("UNKNOWN_SRC_FILE");
+ break;
+ }
+
+ shader->GetSourceSwizzle(shader, merged, i, swz);
+ if (file != NVS_FILE_ADDRESS &&
+ (swz[NVS_SWZ_X] != NVS_SWZ_X || swz[NVS_SWZ_Y] != NVS_SWZ_Y ||
+ swz[NVS_SWZ_Z] != NVS_SWZ_Z || swz[NVS_SWZ_W] != NVS_SWZ_W)) {
+ printf(".%s%s%s%s", SWZ_STRING(swz[NVS_SWZ_X]),
+ SWZ_STRING(swz[NVS_SWZ_Y]),
+ SWZ_STRING(swz[NVS_SWZ_Z]),
+ SWZ_STRING(swz[NVS_SWZ_W]));
+ }
+
+ if (shader->GetSourceAbs(shader, merged, i))
+ printf(")");
+ if (shader->GetSourceScale) {
+ int scale = shader->GetSourceScale(shader, merged, i);
+ if (scale > 1)
+ printf("{scaled %dx}", scale);
+ }
+ if (i < (opi->numsrc - 1))
+ printf(", ");
+ }
+
+ if (shader->IsLastInst(shader))
+ printf(" + END");
+}
+
+void
+nvsDisasmHWShader(nvsPtr nvs)
+{
+ nvsFunc *shader = nvs->func;
+ unsigned int iaddr = 0;
+
+ if (!nvs->program) {
+ fprintf(stderr, "No HW program present");
+ return;
+ }
+
+ shader->inst = nvs->program;
+ while (1) {
+ if (shader->inst >= (nvs->program + nvs->program_size)) {
+ fprintf(stderr, "Reached end of program, but HW inst has no END");
+ break;
+ }
+
+ printf("\t0x%08x:\n", shader->inst[0]);
+ printf("\t0x%08x:\n", shader->inst[1]);
+ printf("\t0x%08x:\n", shader->inst[2]);
+ printf("\t0x%08x:", shader->inst[3]);
+
+ printf("\n\t\tINST %d.0: ", iaddr);
+ nvsDisasmHWShaderOp(shader, 0);
+ if (shader->HasMergedInst(shader)) {
+ printf("\n\t\tINST %d.1: ", iaddr);
+ nvsDisasmHWShaderOp(shader, 1);
+ }
+ printf("\n");
+
+ if (shader->IsLastInst(shader))
+ break;
+
+ shader->inst += shader->GetOffsetNext(shader);
+ iaddr++;
+ }
+
+ printf("\n");
+}
--- /dev/null
+#ifndef __SHADER_COMMON_H__
+#define __SHADER_COMMON_H__
+
+#include "mtypes.h"
+
+typedef struct _nvsFunc nvsFunc;
+
+#define NVS_MAX_TEMPS 32
+#define NVS_MAX_ATTRIBS 16
+#define NVS_MAX_CONSTS 256
+#define NVS_MAX_ADDRESS 2
+#define NVS_MAX_INSNS 4096
+
+typedef struct {
+ enum {
+ NVS_INSTRUCTION,
+ } type;
+ int position;
+} nvsFragmentHeader;
+
+typedef struct _nvs_fragment_list {
+ struct _nvs_fragment_list *prev;
+ struct _nvs_fragment_list *next;
+ nvsFragmentHeader *fragment;
+} nvsFragmentList;
+
+typedef struct _nouveauShader {
+ union {
+ struct gl_vertex_program vp;
+ struct gl_fragment_program fp;
+ } mesa;
+ GLcontext *ctx;
+ nvsFunc *func;
+
+ /* State of the final program */
+ GLboolean translated;
+ GLboolean on_hardware;
+ unsigned int *program;
+ unsigned int program_size;
+ unsigned int program_alloc_size;
+ unsigned int program_start_id;
+ unsigned int program_current;
+ unsigned int inputs_read;
+ unsigned int outputs_written;
+ int inst_count;
+
+ struct {
+ GLfloat *source_val; /* NULL if invariant */
+ float val[4];
+ int hw_index; /* hw-specific value */
+ } params[NVS_MAX_CONSTS];
+
+ struct {
+ int last_use;
+ } temps[NVS_MAX_TEMPS];
+
+ /* Pass-private data */
+ void *pass_rec;
+
+ nvsFragmentList *list_head;
+ nvsFragmentList *list_tail;
+} nouveauShader, *nvsPtr;
+
+typedef enum {
+ NVS_FILE_NONE,
+ NVS_FILE_TEMP,
+ NVS_FILE_ATTRIB,
+ NVS_FILE_CONST,
+ NVS_FILE_RESULT,
+ NVS_FILE_ADDRESS,
+ NVS_FILE_UNKNOWN
+} nvsRegFile;
+
+typedef enum {
+ NVS_OP_UNKNOWN = 0,
+ NVS_OP_NOP,
+ NVS_OP_ABS, NVS_OP_ADD, NVS_OP_ARA, NVS_OP_ARL, NVS_OP_ARR,
+ NVS_OP_BRA, NVS_OP_BRK,
+ NVS_OP_CAL, NVS_OP_CMP, NVS_OP_COS,
+ NVS_OP_DDX, NVS_OP_DDY, NVS_OP_DIV, NVS_OP_DP2, NVS_OP_DP2A, NVS_OP_DP3,
+ NVS_OP_DP4, NVS_OP_DPH, NVS_OP_DST,
+ NVS_OP_EX2, NVS_OP_EXP,
+ NVS_OP_FLR, NVS_OP_FRC,
+ NVS_OP_IF,
+ NVS_OP_KIL,
+ NVS_OP_LG2, NVS_OP_LIT, NVS_OP_LOG, NVS_OP_LOOP, NVS_OP_LRP,
+ NVS_OP_MAD, NVS_OP_MAX, NVS_OP_MIN, NVS_OP_MOV, NVS_OP_MUL,
+ NVS_OP_NRM,
+ NVS_OP_PK2H, NVS_OP_PK2US, NVS_OP_PK4B, NVS_OP_PK4UB, NVS_OP_POW,
+ NVS_OP_POPA, NVS_OP_PUSHA,
+ NVS_OP_RCC, NVS_OP_RCP, NVS_OP_REP, NVS_OP_RET, NVS_OP_RFL, NVS_OP_RSQ,
+ NVS_OP_SCS, NVS_OP_SEQ, NVS_OP_SFL, NVS_OP_SGE, NVS_OP_SGT, NVS_OP_SIN,
+ NVS_OP_SLE, NVS_OP_SLT, NVS_OP_SNE, NVS_OP_SSG, NVS_OP_STR, NVS_OP_SUB,
+ NVS_OP_SWZ,
+ NVS_OP_TEX, NVS_OP_TXB, NVS_OP_TXD, NVS_OP_TXL, NVS_OP_TXP,
+ NVS_OP_UP2H, NVS_OP_UP2US, NVS_OP_UP4B, NVS_OP_UP4UB,
+ NVS_OP_X2D, NVS_OP_XPD,
+ NVS_OP_EMUL
+} nvsOpcode;
+
+typedef enum {
+ NVS_PREC_FLOAT32,
+ NVS_PREC_FLOAT16,
+ NVS_PREC_FIXED12,
+ NVS_PREC_UNKNOWN
+} nvsPrecision;
+
+typedef enum {
+ NVS_SWZ_X = 0,
+ NVS_SWZ_Y = 1,
+ NVS_SWZ_Z = 2,
+ NVS_SWZ_W = 3
+} nvsSwzComp;
+
+typedef enum {
+ NVS_FR_POSITION,
+ NVS_FR_WEIGHT,
+ NVS_FR_NORMAL,
+ NVS_FR_COL0,
+ NVS_FR_COL1,
+ NVS_FR_BFC0,
+ NVS_FR_BFC1,
+ NVS_FR_FOGCOORD,
+ NVS_FR_POINTSZ,
+ NVS_FR_TEXCOORD0,
+ NVS_FR_TEXCOORD1,
+ NVS_FR_TEXCOORD2,
+ NVS_FR_TEXCOORD3,
+ NVS_FR_TEXCOORD4,
+ NVS_FR_TEXCOORD5,
+ NVS_FR_TEXCOORD6,
+ NVS_FR_TEXCOORD7,
+ NVS_FR_FRAGDATA0,
+ NVS_FR_FRAGDATA1,
+ NVS_FR_FRAGDATA2,
+ NVS_FR_FRAGDATA3,
+ NVS_FR_CLIP0,
+ NVS_FR_CLIP1,
+ NVS_FR_CLIP2,
+ NVS_FR_CLIP3,
+ NVS_FR_CLIP4,
+ NVS_FR_CLIP5,
+ NVS_FR_CLIP6,
+ NVS_FR_FACING,
+ NVS_FR_UNKNOWN
+} nvsFixedReg;
+
+typedef enum {
+ NVS_COND_FL, NVS_COND_LT, NVS_COND_EQ, NVS_COND_LE, NVS_COND_GT,
+ NVS_COND_NE, NVS_COND_GE, NVS_COND_TR, NVS_COND_UN,
+ NVS_COND_UNKNOWN
+} nvsCond;
+
+typedef struct {
+ nvsRegFile file;
+ unsigned int index;
+
+ unsigned int indexed;
+ unsigned int addr_reg;
+ nvsSwzComp addr_comp;
+
+ nvsSwzComp swizzle[4];
+ int negate;
+ int abs;
+} nvsRegister;
+
+static const nvsRegister nvr_unused = {
+ .file = NVS_FILE_ATTRIB,
+ .index = 0,
+ .indexed = 0,
+ .addr_reg = 0,
+ .addr_comp = NVS_SWZ_X,
+ .swizzle = {NVS_SWZ_X, NVS_SWZ_Y, NVS_SWZ_Z, NVS_SWZ_W},
+ .negate = 0,
+ .abs = 0,
+};
+
+typedef enum {
+ NVS_TEX_TARGET_1D,
+ NVS_TEX_TARGET_2D,
+ NVS_TEX_TARGET_3D,
+ NVS_TEX_TARGET_CUBE,
+ NVS_TEX_TARGET_RECT,
+ NVS_TEX_TARGET_UNKNOWN = 0
+} nvsTexTarget;
+
+typedef struct {
+ nvsFragmentHeader header;
+
+ nvsOpcode op;
+ unsigned int saturate;
+
+ nvsRegister dest;
+ unsigned int mask;
+
+ nvsRegister src[3];
+
+ unsigned int tex_unit;
+ nvsTexTarget tex_target;
+
+ nvsCond cond;
+ nvsSwzComp cond_swizzle[4];
+ int cond_reg;
+ int cond_test;
+ int cond_update;
+} nvsInstruction;
+
+#define SMASK_X (1<<0)
+#define SMASK_Y (1<<1)
+#define SMASK_Z (1<<2)
+#define SMASK_W (1<<3)
+#define SMASK_ALL (SMASK_X|SMASK_Y|SMASK_Z|SMASK_W)
+
+#define SPOS_ADDRESS 3
+struct _op_xlat {
+ unsigned int NV;
+ nvsOpcode SOP;
+ int srcpos[3];
+};
+#define MOD_OPCODE(t,hw,sop,s0,s1,s2) do { \
+ t[hw].NV = hw; \
+ t[hw].SOP = sop; \
+ t[hw].srcpos[0] = s0; \
+ t[hw].srcpos[1] = s1; \
+ t[hw].srcpos[2] = s2; \
+} while(0)
+
+extern unsigned int NVVP_TX_VOP_COUNT;
+extern unsigned int NVVP_TX_NVS_OP_COUNT;
+extern struct _op_xlat NVVP_TX_VOP[];
+extern struct _op_xlat NVVP_TX_SOP[];
+
+extern unsigned int NVFP_TX_AOP_COUNT;
+extern unsigned int NVFP_TX_BOP_COUNT;
+extern struct _op_xlat NVFP_TX_AOP[];
+extern struct _op_xlat NVFP_TX_BOP[];
+
+#define SCAP_SRC_ABS (1<<0)
+
+struct _nvsFunc {
+ unsigned int MaxInst;
+ unsigned int MaxAttrib;
+ unsigned int MaxTemp;
+ unsigned int MaxAddress;
+ unsigned int MaxConst;
+ unsigned int caps;
+
+ unsigned int *inst;
+ void (*UploadToHW) (GLcontext *, nouveauShader *);
+ void (*UpdateConst) (GLcontext *, nouveauShader *, int);
+
+ struct _op_xlat*(*GetOPTXRec) (nvsFunc *, int merged);
+ struct _op_xlat*(*GetOPTXFromSOP) (nvsOpcode, int *id);
+
+ int (*SupportsOpcode) (nvsFunc *, nvsOpcode);
+ void (*SetOpcode) (nvsFunc *, unsigned int opcode,
+ int slot);
+ void (*SetCCUpdate) (nvsFunc *);
+ void (*SetCondition) (nvsFunc *, int on, nvsCond, int reg,
+ nvsSwzComp *swizzle);
+ void (*SetResult) (nvsFunc *, nvsRegister *,
+ unsigned int mask, int slot);
+ void (*SetSource) (nvsFunc *, nvsRegister *, int pos);
+ void (*SetUnusedSource) (nvsFunc *, int pos);
+ void (*SetTexImageUnit) (nvsFunc *, int unit);
+ void (*SetSaturate) (nvsFunc *);
+ void (*SetLastInst) (nvsFunc *);
+
+ int (*HasMergedInst) (nvsFunc *);
+ int (*IsLastInst) (nvsFunc *);
+ int (*GetOffsetNext) (nvsFunc *);
+
+ int (*GetOpcodeSlot) (nvsFunc *, int merged);
+ unsigned int (*GetOpcodeHW) (nvsFunc *, int slot);
+ nvsOpcode (*GetOpcode) (nvsFunc *, int merged);
+
+ nvsPrecision (*GetPrecision) (nvsFunc *);
+ int (*GetSaturate) (nvsFunc *);
+
+ nvsRegFile (*GetDestFile) (nvsFunc *, int merged);
+ unsigned int (*GetDestID) (nvsFunc *, int merged);
+ unsigned int (*GetDestMask) (nvsFunc *, int merged);
+
+ unsigned int (*GetSourceHW) (nvsFunc *, int merged, int pos);
+ nvsRegFile (*GetSourceFile) (nvsFunc *, int merged, int pos);
+ int (*GetSourceID) (nvsFunc *, int merged, int pos);
+ int (*GetTexImageUnit) (nvsFunc *);
+ int (*GetSourceNegate) (nvsFunc *, int merged, int pos);
+ int (*GetSourceAbs) (nvsFunc *, int merged, int pos);
+ void (*GetSourceSwizzle) (nvsFunc *, int merged, int pos,
+ nvsSwzComp *swz);
+ int (*GetSourceIndexed) (nvsFunc *, int merged, int pos);
+ void (*GetSourceConstVal) (nvsFunc *, int merged, int pos,
+ float *val);
+ int (*GetSourceScale) (nvsFunc *, int merged, int pos);
+
+ int (*GetRelAddressRegID) (nvsFunc *);
+ nvsSwzComp (*GetRelAddressSwizzle) (nvsFunc *);
+
+ int (*SupportsConditional) (nvsFunc *);
+ int (*GetConditionUpdate) (nvsFunc *);
+ int (*GetConditionTest) (nvsFunc *);
+ nvsCond (*GetCondition) (nvsFunc *);
+ void (*GetCondRegSwizzle) (nvsFunc *, nvsSwzComp *swz);
+ int (*GetCondRegID) (nvsFunc *);
+ int (*GetBranch) (nvsFunc *);
+ int (*GetBranchElse) (nvsFunc *);
+ int (*GetBranchEnd) (nvsFunc *);
+
+ int (*GetLoopCount) (nvsFunc *);
+ int (*GetLoopInitial) (nvsFunc *);
+ int (*GetLoopIncrement) (nvsFunc *);
+};
+
+static inline nvsRegister
+nvsNegate(nvsRegister reg)
+{
+ reg.negate = !reg.negate;
+ return reg;
+}
+
+static inline nvsRegister
+nvsAbs(nvsRegister reg)
+{
+ reg.abs = 1;
+ return reg;
+}
+
+static inline nvsRegister
+nvsSwizzle(nvsRegister reg, nvsSwzComp x, nvsSwzComp y,
+ nvsSwzComp z, nvsSwzComp w)
+{
+ nvsSwzComp sc[4] = { x, y, z, w };
+ nvsSwzComp oc[4];
+ int i;
+
+ for (i=0;i<4;i++)
+ oc[i] = reg.swizzle[i];
+ for (i=0;i<4;i++)
+ reg.swizzle[i] = oc[sc[i]];
+ return reg;
+}
+
+extern GLboolean nvsUpdateShader(GLcontext *ctx, nouveauShader *nvs);
+extern void nvsDisasmHWShader(nvsPtr);
+
+extern void NV20VPInitShaderFuncs(nvsFunc *);
+extern void NV30VPInitShaderFuncs(nvsFunc *);
+extern void NV40VPInitShaderFuncs(nvsFunc *);
+
+extern void NV30FPInitShaderFuncs(nvsFunc *);
+extern void NV40FPInitShaderFuncs(nvsFunc *);
+
+extern void nouveauShaderInitFuncs(GLcontext *ctx);
+
+extern GLboolean nouveau_shader_pass0_arb(GLcontext *ctx, nouveauShader *nvs);
+extern GLboolean nouveau_shader_pass0_slang(GLcontext *ctx, nouveauShader *nvs);
+extern GLboolean nouveau_shader_pass1(nvsPtr nvs);
+extern GLboolean nouveau_shader_pass2(nvsPtr nvs);
+
+#endif
+
--- /dev/null
+/*
+ * Copyright (C) 2006 Ben Skeggs.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/*
+ * Authors:
+ * Ben Skeggs <darktama@iinet.net.au>
+ */
+
+#include "glheader.h"
+#include "macros.h"
+#include "enums.h"
+
+#include "program.h"
+#include "programopt.h"
+#include "program_instruction.h"
+
+#include "nouveau_context.h"
+#include "nouveau_shader.h"
+
+static nvsFixedReg _tx_mesa_vp_dst_reg[VERT_RESULT_MAX] = {
+ NVS_FR_POSITION, NVS_FR_COL0, NVS_FR_COL1, NVS_FR_FOGCOORD,
+ NVS_FR_TEXCOORD0, NVS_FR_TEXCOORD1, NVS_FR_TEXCOORD2, NVS_FR_TEXCOORD3,
+ NVS_FR_TEXCOORD4, NVS_FR_TEXCOORD5, NVS_FR_TEXCOORD6, NVS_FR_TEXCOORD7,
+ NVS_FR_POINTSZ, NVS_FR_BFC0, NVS_FR_BFC1, NVS_FR_UNKNOWN /* EDGE */
+};
+
+static nvsFixedReg _tx_mesa_fp_dst_reg[FRAG_RESULT_MAX] = {
+ NVS_FR_FRAGDATA0 /* COLR */, NVS_FR_FRAGDATA0 /* COLH */,
+ NVS_FR_UNKNOWN /* DEPR */
+};
+
+static nvsFixedReg _tx_mesa_vp_src_reg[VERT_ATTRIB_MAX] = {
+ NVS_FR_POSITION, NVS_FR_WEIGHT, NVS_FR_NORMAL, NVS_FR_COL0, NVS_FR_COL1,
+ NVS_FR_FOGCOORD, NVS_FR_UNKNOWN /* COLOR_INDEX */, NVS_FR_UNKNOWN,
+ NVS_FR_TEXCOORD0, NVS_FR_TEXCOORD1, NVS_FR_TEXCOORD2, NVS_FR_TEXCOORD3,
+ NVS_FR_TEXCOORD4, NVS_FR_TEXCOORD5, NVS_FR_TEXCOORD6, NVS_FR_TEXCOORD7,
+/* Generic attribs 0-15, aliased to the above */
+ NVS_FR_POSITION, NVS_FR_WEIGHT, NVS_FR_NORMAL, NVS_FR_COL0, NVS_FR_COL1,
+ NVS_FR_FOGCOORD, NVS_FR_UNKNOWN /* COLOR_INDEX */, NVS_FR_UNKNOWN,
+ NVS_FR_TEXCOORD0, NVS_FR_TEXCOORD1, NVS_FR_TEXCOORD2, NVS_FR_TEXCOORD3,
+ NVS_FR_TEXCOORD4, NVS_FR_TEXCOORD5, NVS_FR_TEXCOORD6, NVS_FR_TEXCOORD7
+};
+
+static nvsFixedReg _tx_mesa_fp_src_reg[FRAG_ATTRIB_MAX] = {
+ NVS_FR_POSITION, NVS_FR_COL0, NVS_FR_COL1, NVS_FR_FOGCOORD,
+ NVS_FR_TEXCOORD0, NVS_FR_TEXCOORD1, NVS_FR_TEXCOORD2, NVS_FR_TEXCOORD3,
+ NVS_FR_TEXCOORD4, NVS_FR_TEXCOORD5, NVS_FR_TEXCOORD6, NVS_FR_TEXCOORD7
+};
+
+static nvsSwzComp _tx_mesa_swizzle[4] = {
+ NVS_SWZ_X, NVS_SWZ_Y, NVS_SWZ_Z, NVS_SWZ_W
+};
+
+static nvsOpcode _tx_mesa_opcode[] = {
+ [OPCODE_ABS] = NVS_OP_ABS, [OPCODE_ADD] = NVS_OP_ADD,
+ [OPCODE_ARA] = NVS_OP_ARA, [OPCODE_ARL] = NVS_OP_ARL,
+ [OPCODE_ARL_NV] = NVS_OP_ARL, [OPCODE_ARR] = NVS_OP_ARR,
+ [OPCODE_CMP] = NVS_OP_CMP, [OPCODE_COS] = NVS_OP_COS,
+ [OPCODE_DDX] = NVS_OP_DDX, [OPCODE_DDY] = NVS_OP_DDY,
+ [OPCODE_DP3] = NVS_OP_DP3, [OPCODE_DP4] = NVS_OP_DP4,
+ [OPCODE_DPH] = NVS_OP_DPH, [OPCODE_DST] = NVS_OP_DST,
+ [OPCODE_EX2] = NVS_OP_EX2, [OPCODE_EXP] = NVS_OP_EXP,
+ [OPCODE_FLR] = NVS_OP_FLR, [OPCODE_FRC] = NVS_OP_FRC,
+ [OPCODE_KIL] = NVS_OP_EMUL, [OPCODE_KIL_NV] = NVS_OP_KIL,
+ [OPCODE_LG2] = NVS_OP_LG2, [OPCODE_LIT] = NVS_OP_LIT,
+ [OPCODE_LOG] = NVS_OP_LOG,
+ [OPCODE_LRP] = NVS_OP_LRP,
+ [OPCODE_MAD] = NVS_OP_MAD, [OPCODE_MAX] = NVS_OP_MAX,
+ [OPCODE_MIN] = NVS_OP_MIN, [OPCODE_MOV] = NVS_OP_MOV,
+ [OPCODE_MUL] = NVS_OP_MUL,
+ [OPCODE_PK2H] = NVS_OP_PK2H, [OPCODE_PK2US] = NVS_OP_PK2US,
+ [OPCODE_PK4B] = NVS_OP_PK4B, [OPCODE_PK4UB] = NVS_OP_PK4UB,
+ [OPCODE_POW] = NVS_OP_POW, [OPCODE_POPA] = NVS_OP_POPA,
+ [OPCODE_PUSHA] = NVS_OP_PUSHA,
+ [OPCODE_RCC] = NVS_OP_RCC, [OPCODE_RCP] = NVS_OP_RCP,
+ [OPCODE_RFL] = NVS_OP_RFL, [OPCODE_RSQ] = NVS_OP_RSQ,
+ [OPCODE_SCS] = NVS_OP_SCS, [OPCODE_SEQ] = NVS_OP_SEQ,
+ [OPCODE_SFL] = NVS_OP_SFL, [OPCODE_SGE] = NVS_OP_SGE,
+ [OPCODE_SGT] = NVS_OP_SGT, [OPCODE_SIN] = NVS_OP_SIN,
+ [OPCODE_SLE] = NVS_OP_SLE, [OPCODE_SLT] = NVS_OP_SLT,
+ [OPCODE_SNE] = NVS_OP_SNE, [OPCODE_SSG] = NVS_OP_SSG,
+ [OPCODE_STR] = NVS_OP_STR, [OPCODE_SUB] = NVS_OP_SUB,
+ [OPCODE_SWZ] = NVS_OP_MOV,
+ [OPCODE_TEX] = NVS_OP_TEX, [OPCODE_TXB] = NVS_OP_TXB,
+ [OPCODE_TXD] = NVS_OP_TXD,
+ [OPCODE_TXL] = NVS_OP_TXL, [OPCODE_TXP] = NVS_OP_TXP,
+ [OPCODE_TXP_NV] = NVS_OP_TXP,
+ [OPCODE_UP2H] = NVS_OP_UP2H, [OPCODE_UP2US] = NVS_OP_UP2US,
+ [OPCODE_UP4B] = NVS_OP_UP4B, [OPCODE_UP4UB] = NVS_OP_UP4UB,
+ [OPCODE_X2D] = NVS_OP_X2D,
+ [OPCODE_XPD] = NVS_OP_XPD
+};
+
+static nvsCond _tx_mesa_condmask[] = {
+ NVS_COND_UNKNOWN, NVS_COND_GT, NVS_COND_LT, NVS_COND_UN, NVS_COND_GE,
+ NVS_COND_LE, NVS_COND_NE, NVS_COND_NE, NVS_COND_TR, NVS_COND_FL
+};
+
+struct pass0_rec {
+ int nvs_ipos;
+ int next_temp;
+ int swzconst_done;
+ int swzconst_id;
+};
+
+#define X NVS_SWZ_X
+#define Y NVS_SWZ_Y
+#define Z NVS_SWZ_Z
+#define W NVS_SWZ_W
+
+static void
+pass0_append_fragment(nouveauShader *nvs, nvsFragmentHeader *fragment)
+{
+ nvsFragmentList *list = calloc(1, sizeof(nvsFragmentList));
+ if (!list)
+ return;
+
+ list->fragment = fragment;
+ list->prev = nvs->list_tail;
+ if ( nvs->list_tail)
+ nvs->list_tail->next = list;
+ if (!nvs->list_head)
+ nvs->list_head = list;
+ nvs->list_tail = list;
+
+ nvs->inst_count++;
+}
+
+static void
+pass0_make_reg(nouveauShader *nvs, nvsRegister *reg,
+ nvsRegFile file, unsigned int index)
+{
+ struct pass0_rec *rec = nvs->pass_rec;
+
+ /* defaults */
+ *reg = nvr_unused;
+ /* -1 == quick-and-dirty temp alloc */
+ if (file == NVS_FILE_TEMP && index == -1) {
+ index = rec->next_temp++;
+ assert(index < NVS_MAX_TEMPS);
+ }
+ reg->file = file;
+ reg->index = index;
+}
+
+static void
+pass0_make_swizzle(nvsSwzComp *swz, unsigned int mesa)
+{
+ int i;
+
+ for (i=0;i<4;i++)
+ swz[i] = _tx_mesa_swizzle[GET_SWZ(mesa, i)];
+}
+
+static nvsOpcode
+pass0_make_opcode(enum prog_opcode op)
+{
+ if (op > MAX_OPCODE)
+ return NVS_OP_UNKNOWN;
+ return _tx_mesa_opcode[op];
+}
+
+static nvsCond
+pass0_make_condmask(GLuint mesa)
+{
+ if (mesa > COND_FL)
+ return NVS_COND_UNKNOWN;
+ return _tx_mesa_condmask[mesa];
+}
+
+static unsigned int
+pass0_make_mask(GLuint mesa_mask)
+{
+ unsigned int mask = 0;
+
+ if (mesa_mask & WRITEMASK_X) mask |= SMASK_X;
+ if (mesa_mask & WRITEMASK_Y) mask |= SMASK_Y;
+ if (mesa_mask & WRITEMASK_Z) mask |= SMASK_Z;
+ if (mesa_mask & WRITEMASK_W) mask |= SMASK_W;
+
+ return mask;
+}
+
+static nvsTexTarget
+pass0_make_tex_target(GLuint mesa)
+{
+ switch (mesa) {
+ case TEXTURE_1D_INDEX: return NVS_TEX_TARGET_1D;
+ case TEXTURE_2D_INDEX: return NVS_TEX_TARGET_2D;
+ case TEXTURE_3D_INDEX: return NVS_TEX_TARGET_3D;
+ case TEXTURE_CUBE_INDEX: return NVS_TEX_TARGET_CUBE;
+ case TEXTURE_RECT_INDEX: return NVS_TEX_TARGET_RECT;
+ default:
+ return NVS_TEX_TARGET_UNKNOWN;
+ }
+}
+
+static void
+pass0_make_dst_reg(nvsPtr nvs, nvsRegister *reg,
+ struct prog_dst_register *dst)
+{
+ struct gl_program *mesa = (struct gl_program*)&nvs->mesa.vp;
+ nvsFixedReg sfr;
+
+ switch (dst->File) {
+ case PROGRAM_OUTPUT:
+ if (mesa->Target == GL_VERTEX_PROGRAM_ARB) {
+ sfr = (dst->Index < VERT_RESULT_MAX) ?
+ _tx_mesa_vp_dst_reg[dst->Index] : NVS_FR_UNKNOWN;
+ } else {
+ sfr = (dst->Index < FRAG_RESULT_MAX) ?
+ _tx_mesa_fp_dst_reg[dst->Index] : NVS_FR_UNKNOWN;
+ }
+ pass0_make_reg(nvs, reg, NVS_FILE_RESULT, sfr);
+ break;
+ case PROGRAM_TEMPORARY:
+ pass0_make_reg(nvs, reg, NVS_FILE_TEMP, dst->Index);
+ break;
+ case PROGRAM_ADDRESS:
+ pass0_make_reg(nvs, reg, NVS_FILE_ADDRESS, dst->Index);
+ break;
+ default:
+ fprintf(stderr, "Unknown dest file %d\n", dst->File);
+ assert(0);
+ }
+}
+
+static void
+pass0_make_src_reg(nvsPtr nvs, nvsRegister *reg, struct prog_src_register *src)
+{
+ struct gl_program *mesa = (struct gl_program *)&nvs->mesa.vp.Base;
+ struct gl_program_parameter_list *p = mesa->Parameters;
+
+ *reg = nvr_unused;
+
+ switch (src->File) {
+ case PROGRAM_INPUT:
+ reg->file = NVS_FILE_ATTRIB;
+ if (mesa->Target == GL_VERTEX_PROGRAM_ARB) {
+ reg->index = (src->Index < VERT_ATTRIB_MAX) ?
+ _tx_mesa_vp_src_reg[src->Index] : NVS_FR_UNKNOWN;
+ } else {
+ reg->index = (src->Index < FRAG_ATTRIB_MAX) ?
+ _tx_mesa_fp_src_reg[src->Index] : NVS_FR_UNKNOWN;
+ }
+ break;
+ /* All const types seem to get shoved into here, not really sure why */
+ case PROGRAM_STATE_VAR:
+ switch (p->Parameters[src->Index].Type) {
+ case PROGRAM_NAMED_PARAM:
+ case PROGRAM_CONSTANT:
+ nvs->params[src->Index].source_val = NULL;
+ COPY_4V(nvs->params[src->Index].val, p->ParameterValues[src->Index]);
+ break;
+ case PROGRAM_STATE_VAR:
+ nvs->params[src->Index].source_val = p->ParameterValues[src->Index];
+ break;
+ default:
+ fprintf(stderr, "Unknown parameter type %d\n",
+ p->Parameters[src->Index].Type);
+ assert(0);
+ break;
+ }
+
+ if (src->RelAddr) {
+ reg->indexed = 1;
+ reg->addr_reg = 0;
+ reg->addr_comp = NVS_SWZ_X;
+ } else
+ reg->indexed = 0;
+ reg->file = NVS_FILE_CONST;
+ reg->index = src->Index;
+ break;
+ case PROGRAM_TEMPORARY:
+ reg->file = NVS_FILE_TEMP;
+ reg->index = src->Index;
+ break;
+ default:
+ fprintf(stderr, "Unknown source type %d\n", src->File);
+ assert(0);
+ }
+
+ /* per-component negate handled elsewhere */
+ reg->negate = src->NegateBase != 0;
+ reg->abs = src->Abs;
+ pass0_make_swizzle(reg->swizzle, src->Swizzle);
+}
+
+static nvsInstruction *
+pass0_emit(nouveauShader *nvs, nvsOpcode op, nvsRegister dst,
+ unsigned int mask, int saturate,
+ nvsRegister src0, nvsRegister src1, nvsRegister src2)
+{
+ struct pass0_rec *rec = nvs->pass_rec;
+ nvsInstruction *sif = NULL;
+
+ /* Seems mesa doesn't explicitly 0 this.. */
+ if (nvs->mesa.vp.Base.Target == GL_VERTEX_PROGRAM_ARB)
+ saturate = 0;
+
+ sif = calloc(1, sizeof(nvsInstruction));
+ if (sif) {
+ sif->header.type = NVS_INSTRUCTION;
+ sif->header.position = rec->nvs_ipos++;
+ sif->op = op;
+ sif->saturate = saturate;
+ sif->dest = dst;
+ sif->mask = mask;
+ sif->src[0] = src0;
+ sif->src[1] = src1;
+ sif->src[2] = src2;
+ sif->cond = COND_TR;
+ sif->cond_reg = 0;
+ sif->cond_test = 0;
+ sif->cond_update = 0;
+ pass0_make_swizzle(sif->cond_swizzle, SWIZZLE_NOOP);
+ pass0_append_fragment(nvs, (nvsFragmentHeader *)sif);
+ }
+
+ return sif;
+}
+
+static void
+pass0_fixup_swizzle(nvsPtr nvs,
+ struct prog_src_register *src,
+ unsigned int sm1,
+ unsigned int sm2)
+{
+ static const float sc[4] = { 1.0, 0.0, -1.0, 0.0 };
+ struct pass0_rec *rec = nvs->pass_rec;
+ int fixup_1, fixup_2;
+ nvsRegister sr, dr = nvr_unused;
+ nvsRegister sm1const, sm2const;
+
+ if (!rec->swzconst_done) {
+ struct gl_program *prog = &nvs->mesa.vp.Base;
+ rec->swzconst_id = _mesa_add_unnamed_constant(prog->Parameters, sc, 4);
+ rec->swzconst_done = 1;
+ COPY_4V(nvs->params[rec->swzconst_id].val, sc);
+ }
+
+ fixup_1 = (sm1 != MAKE_SWIZZLE4(0,0,0,0) && sm2 != MAKE_SWIZZLE4(2,2,2,2));
+ fixup_2 = (sm2 != MAKE_SWIZZLE4(2,2,2,2));
+
+ if (src->File != PROGRAM_TEMPORARY && src->File != PROGRAM_INPUT) {
+ /* We can't use more than one const in an instruction, so move the const
+ * into a temp, and swizzle from there.
+ *TODO: should just emit the swizzled const, instead of swizzling it
+ * in the shader.. would need to reswizzle any state params when they
+ * change however..
+ */
+ pass0_make_reg(nvs, &dr, NVS_FILE_TEMP, -1);
+ pass0_make_src_reg(nvs, &sr, src);
+ pass0_emit(nvs, NVS_OP_MOV, dr, SMASK_ALL, 0, sr, nvr_unused, nvr_unused);
+ pass0_make_reg(nvs, &sr, NVS_FILE_TEMP, dr.index);
+ } else {
+ if (fixup_1)
+ src->NegateBase = 0;
+ pass0_make_src_reg(nvs, &sr, src);
+ pass0_make_reg(nvs, &dr, NVS_FILE_TEMP, -1);
+ }
+
+ pass0_make_reg(nvs, &sm1const, NVS_FILE_CONST, rec->swzconst_id);
+ pass0_make_swizzle(sm1const.swizzle, sm1);
+ if (fixup_1 && fixup_2) {
+ /* Any combination with SWIZZLE_ONE */
+ pass0_make_reg(nvs, &sm2const, NVS_FILE_CONST, rec->swzconst_id);
+ pass0_make_swizzle(sm2const.swizzle, sm2);
+ pass0_emit(nvs, NVS_OP_MAD, dr, SMASK_ALL, 0, sr, sm1const, sm2const);
+ } else {
+ /* SWIZZLE_ZERO || arbitrary negate */
+ pass0_emit(nvs, NVS_OP_MUL, dr, SMASK_ALL, 0, sr, sm1const, nvr_unused);
+ }
+
+ src->File = PROGRAM_TEMPORARY;
+ src->Index = dr.index;
+ src->Swizzle = SWIZZLE_NOOP;
+}
+
+#define SET_SWZ(fs, cp, c) fs = (fs & ~(0x7<<(cp*3))) | (c<<(cp*3))
+static void
+pass0_check_sources(nvsPtr nvs, struct prog_instruction *inst)
+{
+ unsigned int insrc = -1, constsrc = -1;
+ int i;
+
+ for (i=0;i<_mesa_num_inst_src_regs(inst->Opcode);i++) {
+ struct prog_src_register *src = &inst->SrcReg[i];
+ unsigned int sm_1 = 0, sm_2 = 0;
+ nvsRegister sr, dr;
+ int do_mov = 0, c;
+
+ /* Build up swizzle masks as if we were going to use
+ * "MAD new, src, const1, const2" to support arbitrary negation
+ * and SWIZZLE_ZERO/SWIZZLE_ONE.
+ */
+ for (c=0;c<4;c++) {
+ if (GET_SWZ(src->Swizzle, c) == SWIZZLE_ZERO) {
+ SET_SWZ(sm_1, c, SWIZZLE_Y); /* 0.0 */
+ SET_SWZ(sm_2, c, SWIZZLE_Y);
+ SET_SWZ(src->Swizzle, c, SWIZZLE_X);
+ } else if (GET_SWZ(src->Swizzle, c) == SWIZZLE_ONE) {
+ SET_SWZ(sm_1, c, SWIZZLE_Y);
+ if (src->NegateBase & (1<<c))
+ SET_SWZ(sm_2, c, SWIZZLE_Z); /* -1.0 */
+ else
+ SET_SWZ(sm_2, c, SWIZZLE_X); /* 1.0 */
+ SET_SWZ(src->Swizzle, c, SWIZZLE_X);
+ } else {
+ if (src->NegateBase & (1<<c))
+ SET_SWZ(sm_1, c, SWIZZLE_Z); /* -[xyzw] */
+ else
+ SET_SWZ(sm_1, c, SWIZZLE_X); /* [xyzw] */
+ SET_SWZ(sm_2, c, SWIZZLE_Y);
+ }
+ }
+ /* Unless we're multiplying by 1.0 or -1.0 on all components, and we're
+ * adding nothing to any component we have to emulate the swizzle.
+ */
+ if ((sm_1 != MAKE_SWIZZLE4(0,0,0,0) && sm_1 != MAKE_SWIZZLE4(2,2,2,2)) ||
+ sm_2 != MAKE_SWIZZLE4(1,1,1,1)) {
+ pass0_fixup_swizzle(nvs, src, sm_1, sm_2);
+ /* The source is definitely in a temp now, so don't bother checking
+ * for multiple ATTRIB/CONST regs.
+ */
+ continue;
+ }
+
+ /* HW can't use more than one ATTRIB or PARAM in a single instruction */
+ switch (src->File) {
+ case PROGRAM_INPUT:
+ if (insrc != -1 && insrc != src->Index)
+ do_mov = 1;
+ else insrc = src->Index;
+ break;
+ case PROGRAM_STATE_VAR:
+ if (constsrc != -1 && constsrc != src->Index)
+ do_mov = 1;
+ else constsrc = src->Index;
+ break;
+ default:
+ break;
+ }
+
+ /* Emit any extra ATTRIB/CONST to a temp, and modify the Mesa instruction
+ * to point at the temp.
+ */
+ if (do_mov) {
+ pass0_make_src_reg(nvs, &sr, src);
+ pass0_make_reg(nvs, &dr, NVS_FILE_TEMP, -1);
+ pass0_emit(nvs, NVS_OP_MOV, dr, SMASK_ALL, 0,
+ sr, nvr_unused, nvr_unused);
+
+ src->File = PROGRAM_TEMPORARY;
+ src->Index = dr.index;
+ src->Swizzle= SWIZZLE_NOOP;
+ }
+ }
+}
+
+static GLboolean
+pass0_emulate_instruction(nouveauShader *nvs, struct prog_instruction *inst)
+{
+ nvsFunc *shader = nvs->func;
+ nvsRegister src[3], dest, temp;
+ nvsInstruction *nvsinst;
+ unsigned int mask = pass0_make_mask(inst->DstReg.WriteMask);
+ int i, sat;
+
+ sat = (inst->SaturateMode == SATURATE_ZERO_ONE);
+
+ /* Build all the "real" regs for the instruction */
+ for (i=0; i<_mesa_num_inst_src_regs(inst->Opcode); i++)
+ pass0_make_src_reg(nvs, &src[i], &inst->SrcReg[i]);
+ if (inst->Opcode != OPCODE_KIL)
+ pass0_make_dst_reg(nvs, &dest, &inst->DstReg);
+
+ switch (inst->Opcode) {
+ case OPCODE_ABS:
+ if (shader->caps & SCAP_SRC_ABS)
+ pass0_emit(nvs, NVS_OP_MOV, dest, mask, sat,
+ nvsAbs(src[0]), nvr_unused, nvr_unused);
+ else
+ pass0_emit(nvs, NVS_OP_MAX, dest, mask, sat,
+ src[0], nvsNegate(src[0]), nvr_unused);
+ break;
+ case OPCODE_KIL:
+ /* This is only in ARB shaders, so we don't have to worry
+ * about clobbering a CC reg as they aren't supported anyway.
+ */
+ /* MOVC0 temp, src */
+ pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1);
+ nvsinst = pass0_emit(nvs, NVS_OP_MOV, temp, SMASK_ALL, 0,
+ src[0], nvr_unused, nvr_unused);
+ nvsinst->cond_update = 1;
+ nvsinst->cond_reg = 0;
+ /* KIL_NV (LT0.xyzw) temp */
+ nvsinst = pass0_emit(nvs, NVS_OP_KIL, nvr_unused, 0, 0,
+ nvr_unused, nvr_unused, nvr_unused);
+ nvsinst->cond = COND_LT;
+ nvsinst->cond_reg = 0;
+ nvsinst->cond_test = 1;
+ pass0_make_swizzle(nvsinst->cond_swizzle, MAKE_SWIZZLE4(0,1,2,3));
+ break;
+ case OPCODE_LIT:
+ break;
+ case OPCODE_LRP:
+ pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1);
+ pass0_emit(nvs, NVS_OP_MAD, temp, mask, 0,
+ nvsNegate(src[0]), src[2], src[2]);
+ pass0_emit(nvs, NVS_OP_MAD, dest, mask, sat,
+ src[0], src[1], temp);
+ break;
+ case OPCODE_POW:
+ if (shader->SupportsOpcode(shader, NVS_OP_LG2) &&
+ shader->SupportsOpcode(shader, NVS_OP_EX2)) {
+ pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1);
+ /* LG2 temp.x, src0.c */
+ pass0_emit(nvs, NVS_OP_LG2, temp, SMASK_X, 0,
+ nvsSwizzle(src[0], X, X, X, X),
+ nvr_unused,
+ nvr_unused);
+ /* MUL temp.x, temp.x, src1.c */
+ pass0_emit(nvs, NVS_OP_MUL, temp, SMASK_X, 0,
+ nvsSwizzle(temp, X, X, X, X),
+ nvsSwizzle(src[1], X, X, X, X),
+ nvr_unused);
+ /* EX2 dest, temp.x */
+ pass0_emit(nvs, NVS_OP_EX2, dest, mask, sat,
+ nvsSwizzle(temp, X, X, X, X),
+ nvr_unused,
+ nvr_unused);
+ } else {
+ /* can we use EXP/LOG instead of EX2/LG2?? */
+ fprintf(stderr, "Implement POW for NV20 vtxprog!\n");
+ return GL_FALSE;
+ }
+ break;
+ case OPCODE_RSQ:
+ pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1);
+ pass0_emit(nvs, NVS_OP_LG2, temp, SMASK_X, 0,
+ nvsAbs(nvsSwizzle(src[0], X, X, X, X)), nvr_unused, nvr_unused);
+ pass0_emit(nvs, NVS_OP_EX2, dest, mask, sat,
+ nvsSwizzle(temp, X, X, X, X), nvr_unused, nvr_unused);
+ break;
+ case OPCODE_SCS:
+ if (mask & SMASK_X)
+ pass0_emit(nvs, NVS_OP_COS, dest, SMASK_X, sat,
+ nvsSwizzle(src[0], X, X, X, X),
+ nvr_unused,
+ nvr_unused);
+ if (mask & SMASK_Y)
+ pass0_emit(nvs, NVS_OP_SIN, dest, SMASK_Y, sat,
+ nvsSwizzle(src[0], X, X, X, X),
+ nvr_unused,
+ nvr_unused);
+ break;
+ case OPCODE_SUB:
+ pass0_emit(nvs, NVS_OP_ADD, dest, mask, sat,
+ src[0], nvsNegate(src[1]), nvr_unused);
+ break;
+ case OPCODE_XPD:
+ pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1);
+ pass0_emit(nvs, NVS_OP_MUL, temp, SMASK_ALL, 0,
+ nvsSwizzle(src[0], Z, X, Y, Y),
+ nvsSwizzle(src[1], Y, Z, X, X),
+ nvr_unused);
+ pass0_emit(nvs, NVS_OP_MAD, dest, (mask & ~SMASK_W), sat,
+ nvsSwizzle(src[0], Y, Z, X, X),
+ nvsSwizzle(src[1], Z, X, Y, Y),
+ nvsNegate(temp));
+ break;
+ default:
+ fprintf(stderr, "hw doesn't support opcode \"%s\", and no emulation found\n",
+ _mesa_opcode_string(inst->Opcode));
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+}
+
+static GLboolean
+pass0_translate_instructions(nouveauShader *nvs)
+{
+ struct gl_program *prog = (struct gl_program *)&nvs->mesa.vp;
+ struct pass0_rec *rec = nvs->pass_rec;
+ nvsFunc *shader = nvs->func;
+ int ipos;
+
+ for (ipos=0; ipos<prog->NumInstructions; ipos++) {
+ struct prog_instruction *inst = &prog->Instructions[ipos];
+
+ if (inst->Opcode == OPCODE_END)
+ break;
+
+ /* Deal with multiple ATTRIB/PARAM in a single instruction */
+ pass0_check_sources(nvs, inst);
+
+ /* Now it's safe to do the prog_instruction->nvsInstruction conversion */
+ if (shader->SupportsOpcode(shader, pass0_make_opcode(inst->Opcode))) {
+ nvsInstruction *nvsinst;
+ nvsRegister src[3], dest;
+ int i;
+
+ for (i=0; i<_mesa_num_inst_src_regs(inst->Opcode); i++)
+ pass0_make_src_reg(nvs, &src[i], &inst->SrcReg[i]);
+ pass0_make_dst_reg(nvs, &dest, &inst->DstReg);
+
+ nvsinst = pass0_emit(nvs,
+ pass0_make_opcode(inst->Opcode),
+ dest,
+ pass0_make_mask(inst->DstReg.WriteMask),
+ (inst->SaturateMode != SATURATE_OFF),
+ src[0], src[1], src[2]);
+ nvsinst->tex_unit = inst->TexSrcUnit;
+ nvsinst->tex_target = pass0_make_tex_target(inst->TexSrcTarget);
+ /* TODO when NV_fp/vp is implemented */
+ nvsinst->cond = COND_TR;
+ } else {
+ if (!pass0_emulate_instruction(nvs, inst))
+ return GL_FALSE;
+ }
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean
+nouveau_shader_pass0_arb(GLcontext *ctx, nouveauShader *nvs)
+{
+ nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx);
+ struct gl_program *prog = (struct gl_program*)nvs;
+ struct gl_vertex_program *vp = (struct gl_vertex_program *)prog;
+ struct gl_fragment_program *fp = (struct gl_fragment_program *)prog;
+ struct pass0_rec *rec;
+ int ret;
+
+ switch (prog->Target) {
+ case GL_VERTEX_PROGRAM_ARB:
+ nvs->func = &nmesa->VPfunc;
+ if (vp->IsPositionInvariant)
+ _mesa_insert_mvp_code(ctx, vp);
+#if 0
+ if (IS_FIXEDFUNCTION_PROG && CLIP_PLANES_USED)
+ pass0_insert_ff_clip_planes();
+#endif
+ break;
+ case GL_FRAGMENT_PROGRAM_ARB:
+ nvs->func = &nmesa->FPfunc;
+ if (fp->FogOption != GL_NONE)
+ _mesa_append_fog_code(ctx, fp);
+ break;
+ default:
+ fprintf(stderr, "Unknown program type %d", prog->Target);
+ return GL_FALSE;
+ }
+
+ rec = calloc(1, sizeof(struct pass0_rec));
+ rec->next_temp = prog->NumTemporaries;
+ nvs->pass_rec = rec;
+
+ ret = pass0_translate_instructions(nvs);
+ if (!ret) {
+ /* DESTROY list */
+ }
+
+ free(nvs->pass_rec);
+ return ret;
+}
+
--- /dev/null
+/*
+ * Copyright (C) 2006 Ben Skeggs.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/*
+ * Authors:
+ * Ben Skeggs <darktama@iinet.net.au>
+ */
+
+#include "glheader.h"
+#include "macros.h"
+#include "enums.h"
+
+#include "nouveau_shader.h"
+
+#define PASS1_OK 0
+#define PASS1_KILL 1
+#define PASS1_FAIL 2
+
+struct pass1_rec {
+ unsigned int temp[NVS_MAX_TEMPS];
+ unsigned int result[NVS_MAX_ATTRIBS];
+ unsigned int address[NVS_MAX_ADDRESS];
+ unsigned int cc[2];
+};
+
+static void
+pass1_remove_fragment(nvsPtr nvs, nvsFragmentList *item)
+{
+ if (item->prev) item->prev->next = item->next;
+ if (item->next) item->next->prev = item->prev;
+ if (nvs->list_head == item) nvs->list_head = item->next;
+ if (nvs->list_tail == item) nvs->list_tail = item->prev;
+
+ nvs->inst_count--;
+}
+
+static int
+pass1_result_needed(struct pass1_rec *rec, nvsInstruction *inst)
+{
+ if (inst->cond_update && rec->cc[inst->cond_reg])
+ return 1;
+ /* Only write components that are read later */
+ if (inst->dest.file == NVS_FILE_TEMP)
+ return (inst->mask & rec->temp[inst->dest.index]);
+ if (inst->dest.file == NVS_FILE_ADDRESS)
+ return (inst->mask & rec->address[inst->dest.index]);
+ /* No point writing result components that are written later */
+ if (inst->dest.file == NVS_FILE_RESULT)
+ return (inst->mask & ~rec->result[inst->dest.index]);
+ assert(0);
+}
+
+static void
+pass1_track_result(struct pass1_rec *rec, nvsInstruction *inst)
+{
+ if (inst->cond_test)
+ rec->cc[inst->cond_reg] = 1;
+ if (inst->dest.file == NVS_FILE_TEMP) {
+ inst->mask &= rec->temp[inst->dest.index];
+ } else if (inst->dest.file == NVS_FILE_RESULT) {
+ inst->mask &= ~rec->result[inst->dest.index];
+ rec->result[inst->dest.index] |= inst->mask;
+ } else if (inst->dest.file == NVS_FILE_ADDRESS) {
+ inst->mask &= rec->address[inst->dest.index];
+ }
+}
+
+static void
+pass1_track_source(nouveauShader *nvs, nvsInstruction *inst, int pos,
+ unsigned int read)
+{
+ struct pass1_rec *rec = nvs->pass_rec;
+ nvsRegister *src = &inst->src[pos];
+ unsigned int really_read = 0;
+ int i,sc;
+
+ /* Account for swizzling */
+ for (i=0; i<4; i++)
+ if (read & (1<<i)) really_read |= (1 << src->swizzle[i]);
+
+ /* Track register reads */
+ if (src->file == NVS_FILE_TEMP) {
+ if (nvs->temps[src->index].last_use == -1)
+ nvs->temps[src->index].last_use = inst->header.position;
+ rec->temp [src->index] |= really_read;
+ } else if (src->indexed) {
+ rec->address[src->addr_reg] |= (1<<src->addr_comp);
+ }
+
+ /* Modify swizzle to only access read components */
+ /* Find a component that is used.. */
+ for (sc=0;sc<4;sc++)
+ if (really_read & (1<<sc))
+ break;
+ /* Now set all unused components to that value */
+ for (i=0;i<4;i++)
+ if (!(read & (1<<i))) src->swizzle[i] = sc;
+}
+
+static int
+pass1_check_instruction(nouveauShader *nvs, nvsInstruction *inst)
+{
+ struct pass1_rec *rec = nvs->pass_rec;
+ unsigned int read0, read1, read2;
+
+ if (inst->op != NVS_OP_KIL) {
+ if (!pass1_result_needed(rec, inst))
+ return PASS1_KILL;
+ }
+ pass1_track_result(rec, inst);
+
+ read0 = read1 = read2 = 0;
+
+ switch (inst->op) {
+ case NVS_OP_FLR:
+ case NVS_OP_FRC:
+ case NVS_OP_MOV:
+ case NVS_OP_SSG:
+ case NVS_OP_ARL:
+ read0 = inst->mask;
+ break;
+ case NVS_OP_ADD:
+ case NVS_OP_MAX:
+ case NVS_OP_MIN:
+ case NVS_OP_MUL:
+ case NVS_OP_SEQ:
+ case NVS_OP_SFL:
+ case NVS_OP_SGE:
+ case NVS_OP_SGT:
+ case NVS_OP_SLE:
+ case NVS_OP_SLT:
+ case NVS_OP_SNE:
+ case NVS_OP_STR:
+ case NVS_OP_SUB:
+ read0 = inst->mask;
+ read1 = inst->mask;
+ break;
+ case NVS_OP_CMP:
+ case NVS_OP_LRP:
+ case NVS_OP_MAD:
+ read0 = inst->mask;
+ read1 = inst->mask;
+ read2 = inst->mask;
+ break;
+ case NVS_OP_XPD:
+ if (inst->mask & SMASK_X) read0 |= SMASK_Y|SMASK_Z;
+ if (inst->mask & SMASK_Y) read0 |= SMASK_X|SMASK_Z;
+ if (inst->mask & SMASK_Z) read0 |= SMASK_X|SMASK_Y;
+ read1 = read0;
+ break;
+ case NVS_OP_COS:
+ case NVS_OP_EX2:
+ case NVS_OP_EXP:
+ case NVS_OP_LG2:
+ case NVS_OP_LOG:
+ case NVS_OP_RCC:
+ case NVS_OP_RCP:
+ case NVS_OP_RSQ:
+ case NVS_OP_SCS:
+ case NVS_OP_SIN:
+ read0 = SMASK_X;
+ break;
+ case NVS_OP_POW:
+ read0 = SMASK_X;
+ read1 = SMASK_X;
+ break;
+ case NVS_OP_DIV:
+ read0 = inst->mask;
+ read1 = SMASK_X;
+ break;
+ case NVS_OP_DP2:
+ read0 = SMASK_X|SMASK_Y;
+ read1 = SMASK_X|SMASK_Y;
+ break;
+ case NVS_OP_DP3:
+ case NVS_OP_RFL:
+ read0 = SMASK_X|SMASK_Y|SMASK_Z;
+ read1 = SMASK_X|SMASK_Y|SMASK_Z;
+ break;
+ case NVS_OP_DP4:
+ read0 = SMASK_ALL;
+ read1 = SMASK_ALL;
+ break;
+ case NVS_OP_DPH:
+ read0 = SMASK_X|SMASK_Y|SMASK_Z;
+ read1 = SMASK_ALL;
+ break;
+ case NVS_OP_DST:
+ if (inst->mask & SMASK_Y) read0 = read1 = SMASK_Y;
+ if (inst->mask & SMASK_Z) read0 |= SMASK_Z;
+ if (inst->mask & SMASK_W) read1 |= SMASK_W;
+ break;
+ case NVS_OP_NRM:
+ read0 = SMASK_X|SMASK_Y|SMASK_Z;
+ break;
+ case NVS_OP_PK2H:
+ case NVS_OP_PK2US:
+ read0 = SMASK_X|SMASK_Y;
+ break;
+ case NVS_OP_DDX:
+ case NVS_OP_DDY:
+ case NVS_OP_UP2H:
+ case NVS_OP_UP2US:
+ case NVS_OP_PK4B:
+ case NVS_OP_PK4UB:
+ case NVS_OP_UP4B:
+ case NVS_OP_UP4UB:
+ read0 = SMASK_ALL;
+ break;
+ case NVS_OP_X2D:
+ read1 = SMASK_X|SMASK_Y;
+ if (inst->mask & (SMASK_X|SMASK_Z)) {
+ read0 |= SMASK_X;
+ read2 |= SMASK_X|SMASK_Y;
+ }
+ if (inst->mask & (SMASK_Y|SMASK_W)) {
+ read0 |= SMASK_Y;
+ read2 |= SMASK_Z|SMASK_W;
+ }
+ break;
+ case NVS_OP_LIT:
+ read0 |= SMASK_X|SMASK_Y|SMASK_W;
+ break;
+ case NVS_OP_TEX:
+ case NVS_OP_TXP:
+ case NVS_OP_TXL:
+ case NVS_OP_TXB:
+ read0 = SMASK_ALL;
+ break;
+ case NVS_OP_TXD:
+ read0 = SMASK_ALL;
+ read1 = SMASK_ALL;
+ read2 = SMASK_ALL;
+ break;
+ case NVS_OP_KIL:
+ break;
+ default:
+ fprintf(stderr, "Unknown sop=%d", inst->op);
+ return PASS1_FAIL;
+ }
+
+ /* Any values that are written by this inst can't have been read further up */
+ if (inst->dest.file == NVS_FILE_TEMP)
+ rec->temp[inst->dest.index] &= ~inst->mask;
+
+ if (read0) pass1_track_source(nvs, inst, 0, read0);
+ if (read1) pass1_track_source(nvs, inst, 1, read1);
+ if (read2) pass1_track_source(nvs, inst, 2, read2);
+
+ return PASS1_OK;
+}
+
+/* Some basic dead code elimination
+ * - Remove unused instructions
+ * - Don't write unused register components
+ * - Modify swizzles to not reference unneeded components.
+ */
+GLboolean
+nouveau_shader_pass1(nvsPtr nvs)
+{
+ nvsFragmentList *list = nvs->list_tail;
+ int i;
+
+ for (i=0; i<NVS_MAX_TEMPS; i++)
+ nvs->temps[i].last_use = -1;
+
+ nvs->pass_rec = calloc(1, sizeof(struct pass1_rec));
+
+ while (list) {
+ assert(list->fragment->type == NVS_INSTRUCTION);
+
+ switch(pass1_check_instruction(nvs, (nvsInstruction *)list->fragment)) {
+ case PASS1_OK:
+ break;
+ case PASS1_KILL:
+ pass1_remove_fragment(nvs, list);
+ break;
+ case PASS1_FAIL:
+ default:
+ free(nvs->pass_rec);
+ nvs->pass_rec = NULL;
+ return GL_FALSE;
+ }
+
+ list = list->prev;
+ }
+
+ free(nvs->pass_rec);
+ nvs->pass_rec = NULL;
+
+ return GL_TRUE;
+}
+
+
--- /dev/null
+/*
+ * Copyright (C) 2006 Ben Skeggs.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/*
+ * Authors:
+ * Ben Skeggs <darktama@iinet.net.au>
+ */
+
+#include "glheader.h"
+#include "macros.h"
+#include "enums.h"
+
+#include "nouveau_shader.h"
+
+struct pass2_rec {
+ /* Map nvsRegister temp ID onto hw temp ID */
+ unsigned int temps[NVS_MAX_TEMPS];
+ /* Track free hw registers */
+ unsigned int hw_temps[NVS_MAX_TEMPS];
+};
+
+static int
+pass2_alloc_hw_temp(nvsPtr nvs)
+{
+ struct pass2_rec *rec = nvs->pass_rec;
+ int i;
+
+ for (i=0; i<nvs->func->MaxTemp; i++) {
+ /* This is a *horrible* hack.. R0 is both temp0 and result.color
+ * in NV30/40 fragprogs, we can use R0 as a temp before result is
+ * written however..
+ */
+ if (nvs->mesa.vp.Base.Target == GL_FRAGMENT_PROGRAM_ARB && i==0)
+ continue;
+
+ if (rec->hw_temps[i] == 0) {
+ rec->hw_temps[i] = 1;
+ return i;
+ }
+ }
+ return -1;
+}
+
+static void
+pass2_free_hw_temp(nvsPtr nvs, int reg)
+{
+ struct pass2_rec *rec = nvs->pass_rec;
+ rec->hw_temps[reg] = 0;
+}
+
+static nvsRegister
+pass2_mangle_reg(nvsPtr nvs, nvsInstruction *inst, nvsRegister reg)
+{
+ struct pass2_rec *rec = nvs->pass_rec;
+
+ if (reg.file == NVS_FILE_TEMP) {
+ int hwidx;
+
+ if (rec->temps[reg.index] == -1)
+ rec->temps[reg.index] = pass2_alloc_hw_temp(nvs);
+ hwidx = rec->temps[reg.index];
+
+ if (nvs->temps[reg.index].last_use <= inst->header.position)
+ pass2_free_hw_temp(nvs, hwidx);
+
+ reg.index = hwidx;
+ }
+
+ return reg;
+}
+
+static void
+pass2_add_instruction(nvsPtr nvs, nvsInstruction *inst,
+ struct _op_xlat *op, int slot)
+{
+ nvsSwzComp default_swz[4] = { NVS_SWZ_X, NVS_SWZ_Y, NVS_SWZ_Z, NVS_SWZ_W };
+ nvsFunc *shader = nvs->func;
+ nvsRegister reg;
+ int i, srcpos_used = ~7;
+
+ shader->SetOpcode(shader, op->NV, slot);
+ if (inst->saturate ) shader->SetSaturate(shader);
+ if (inst->cond_update) shader->SetCCUpdate(shader);
+ if (inst->cond_test ) shader->SetCondition(shader, 1, inst->cond,
+ inst->cond_reg,
+ inst->cond_swizzle);
+ else shader->SetCondition(shader, 0, NVS_COND_TR,
+ 0,
+ default_swz);
+ switch (inst->op) {
+ case NVS_OP_TEX:
+ case NVS_OP_TXB:
+ case NVS_OP_TXL:
+ case NVS_OP_TXP:
+ case NVS_OP_TXD:
+ shader->SetTexImageUnit(shader, inst->tex_unit);
+ break;
+ default:
+ break;
+ }
+
+ for (i = 0; i < 3; i++) {
+ if (op->srcpos[i] != -1) {
+ reg = pass2_mangle_reg(nvs, inst, inst->src[i]);
+ if (reg.file == NVS_FILE_ATTRIB)
+ nvs->inputs_read |= (1 << reg.index);
+ shader->SetSource(shader, ®, op->srcpos[i]);
+ srcpos_used |= (1<<op->srcpos[i]);
+ if (reg.file == NVS_FILE_CONST && shader->GetSourceConstVal)
+ nvs->params[reg.index].hw_index = nvs->program_current + 4;
+ }
+ }
+ for (i = 0; i < 3; i++) {
+ if (!(srcpos_used & (1<<i)))
+ shader->SetUnusedSource(shader, i);
+ }
+
+ reg = pass2_mangle_reg(nvs, inst, inst->dest);
+ if (reg.file == NVS_FILE_RESULT)
+ nvs->outputs_written |= (1 << reg.index);
+ shader->SetResult(shader, ®, inst->mask, slot);
+}
+
+static int
+pass2_assemble_instruction(nvsPtr nvs, nvsInstruction *inst, int last)
+{
+ nvsFunc *shader = nvs->func;
+ struct _op_xlat *op, *op2;
+ unsigned int hw_inst[8] = {0,0,0,0,0,0,0,0,0};
+ int slot, slot2;
+ int instsz;
+ int i;
+
+ shader->inst = hw_inst;
+
+ /* Assemble this instruction */
+ if (!(op = shader->GetOPTXFromSOP(inst->op, &slot)))
+ return 0;
+ pass2_add_instruction(nvs, inst, op, slot);
+ if (last)
+ shader->SetLastInst(shader);
+
+ instsz = shader->GetOffsetNext(nvs->func);
+ if (nvs->program_size + instsz >= nvs->program_alloc_size) {
+ nvs->program_alloc_size *= 2;
+ nvs->program = realloc(nvs->program,
+ nvs->program_alloc_size * sizeof(uint32_t));
+ }
+
+ for (i=0; i<instsz; i++)
+ nvs->program[nvs->program_current++] = hw_inst[i];
+ nvs->program_size = nvs->program_current;
+ return 1;
+}
+
+/* Translate program into hardware format */
+GLboolean
+nouveau_shader_pass2(nvsPtr nvs)
+{
+ nvsFragmentList *list = nvs->list_head;
+ struct pass2_rec *rec;
+ int i;
+
+ rec = calloc(1, sizeof(struct pass2_rec));
+ for (i=0; i<NVS_MAX_TEMPS; i++)
+ rec->temps[i] = -1;
+ nvs->pass_rec = rec;
+
+ /* Start off with allocating 4 uint32_t's for each inst, will be grown
+ * if necessary..
+ */
+ nvs->program_alloc_size = nvs->inst_count * 4;
+ nvs->program = calloc(nvs->program_alloc_size, sizeof(uint32_t));
+ nvs->program_size = 0;
+ nvs->program_current = 0;
+
+ while (list) {
+ assert(list->fragment->type == NVS_INSTRUCTION);
+
+ if (!pass2_assemble_instruction(nvs, (nvsInstruction *)list->fragment, list->next ? 0 : 1)) {
+ free(nvs->program);
+ nvs->program = NULL;
+ return GL_FALSE;
+ }
+
+ list = list->next;
+ }
+
+ /* Shrink allocated memory to only what we need */
+ nvs->program = realloc(nvs->program, nvs->program_size * sizeof(uint32_t));
+ nvs->program_alloc_size = nvs->program_size;
+
+ nvs->translated = 1;
+ nvs->on_hardware = 0;
+
+#if 1
+ fflush(stdout); fflush(stderr);
+ fprintf(stderr, "----------------MESA PROGRAM\n");
+ fflush(stdout); fflush(stderr);
+ _mesa_print_program(&nvs->mesa.vp.Base);
+ fflush(stdout); fflush(stderr);
+ fprintf(stderr, "^^^^^^^^^^^^^^^^MESA PROGRAM\n");
+ fflush(stdout); fflush(stderr);
+ fprintf(stderr, "----------------NV40 PROGRAM\n");
+ fflush(stdout); fflush(stderr);
+ nvsDisasmHWShader(nvs);
+ fflush(stdout); fflush(stderr);
+ fprintf(stderr, "^^^^^^^^^^^^^^^^NV40 PROGRAM\n");
+ fflush(stdout); fflush(stderr);
+#endif
+
+ return GL_TRUE;
+}
+
extern void nv30InitStateFuncs(struct dd_function_table *func);
extern void nouveauInitState(GLcontext *ctx);
+
/*
extern void nouveauDDUpdateState(GLcontext *ctx);
extern void nouveauDDUpdateHWState(GLcontext *ctx);
--- /dev/null
+/* NV20_TCL_PRIMITIVE_3D_0x0B00 */
+#define NV20_VP_INST_0B00 0x00000000 /* always 0? */
+#define NV20_VP_INST0_KNOWN 0
+
+/* NV20_TCL_PRIMITIVE_3D_0x0B04 */
+#define NV20_VP_INST_SCA_OPCODE_SHIFT 25
+#define NV20_VP_INST_SCA_OPCODE_MASK (0x0F << 25)
+#define NV20_VP_INST_OPCODE_RCP 0x2
+#define NV20_VP_INST_OPCODE_RCC 0x3
+#define NV20_VP_INST_OPCODE_RSQ 0x4
+#define NV20_VP_INST_OPCODE_EXP 0x5
+#define NV20_VP_INST_OPCODE_LOG 0x6
+#define NV20_VP_INST_OPCODE_LIT 0x7
+#define NV20_VP_INST_VEC_OPCODE_SHIFT 21
+#define NV20_VP_INST_VEC_OPCODE_MASK (0x0F << 21)
+#define NV20_VP_INST_OPCODE_NOP 0x0 /* guess */
+#define NV20_VP_INST_OPCODE_MOV 0x1
+#define NV20_VP_INST_OPCODE_MUL 0x2
+#define NV20_VP_INST_OPCODE_ADD 0x3
+#define NV20_VP_INST_OPCODE_MAD 0x4
+#define NV20_VP_INST_OPCODE_DP3 0x5
+#define NV20_VP_INST_OPCODE_DPH 0x6
+#define NV20_VP_INST_OPCODE_DP4 0x7
+#define NV20_VP_INST_OPCODE_DST 0x8
+#define NV20_VP_INST_OPCODE_MIN 0x9
+#define NV20_VP_INST_OPCODE_MAX 0xA
+#define NV20_VP_INST_OPCODE_SLT 0xB
+#define NV20_VP_INST_OPCODE_SGE 0xC
+#define NV20_VP_INST_OPCODE_ARL 0xD
+#define NV20_VP_INST_CONST_SRC_SHIFT 13
+#define NV20_VP_INST_CONST_SRC_MASK (0xFF << 13)
+#define NV20_VP_INST_INPUT_SRC_SHIFT 9
+#define NV20_VP_INST_INPUT_SRC_MASK (0xF << 9) /* guess */
+#define NV20_VP_INST_INPUT_SRC_POS 0
+#define NV20_VP_INST_INPUT_SRC_COL0 3
+#define NV20_VP_INST_INPUT_SRC_COL1 4
+#define NV20_VP_INST_INPUT_SRC_TC(n) (9+n)
+#define NV20_VP_INST_SRC0H_SHIFT 0
+#define NV20_VP_INST_SRC0H_MASK (0x1FF << 0)
+#define NV20_VP_INST1_KNOWN ( \
+ NV20_VP_INST_OPCODE_MASK | \
+ NV20_VP_INST_CONST_SRC_MASK | \
+ NV20_VP_INST_INPUT_SRC_MASK | \
+ NV20_VP_INST_SRC0H_MASK \
+ )
+
+/* NV20_TCL_PRIMITIVE_3D_0x0B08 */
+#define NV20_VP_INST_SRC0L_SHIFT 26
+#define NV20_VP_INST_SRC0L_MASK (0x3F <<26)
+#define NV20_VP_INST_SRC1_SHIFT 11
+#define NV20_VP_INST_SRC1_MASK (0x7FFF<<11)
+#define NV20_VP_INST_SRC2H_SHIFT 0
+#define NV20_VP_INST_SRC2H_MASK (0x7FF << 0)
+
+/* NV20_TCL_PRIMITIVE_3D_0x0B0C */
+#define NV20_VP_INST_SRC2L_SHIFT 28
+#define NV20_VP_INST_SRC2L_MASK (0x0F <<28)
+#define NV20_VP_INST_VTEMP_WRITEMASK_SHIFT 24
+#define NV20_VP_INST_VTEMP_WRITEMASK_MASK (0x0F <<24)
+# define NV20_VP_INST_TEMP_WRITEMASK_X (1<<27)
+# define NV20_VP_INST_TEMP_WRITEMASK_Y (1<<26)
+# define NV20_VP_INST_TEMP_WRITEMASK_Z (1<<25)
+# define NV20_VP_INST_TEMP_WRITEMASK_W (1<<24)
+#define NV20_VP_INST_DEST_TEMP_ID_SHIFT 20
+#define NV20_VP_INST_DEST_TEMP_ID_MASK (0x0F <<20)
+#define NV20_VP_INST_STEMP_WRITEMASK_SHIFT 16
+#define NV20_VP_INST_STEMP_WRITEMASK_MASK (0x0F <<16)
+# define NV20_VP_INST_STEMP_WRITEMASK_X (1<<19)
+# define NV20_VP_INST_STEMP_WRITEMASK_Y (1<<18)
+# define NV20_VP_INST_STEMP_WRITEMASK_Z (1<<17)
+# define NV20_VP_INST_STEMP_WRITEMASK_W (1<<16)
+#define NV20_VP_INST_DEST_WRITEMASK_SHIFT 12
+#define NV20_VP_INST_DEST_WRITEMASK_MASK (0x0F <<12)
+# define NV20_VP_INST_DEST_WRITEMASK_X (1<<15)
+# define NV20_VP_INST_DEST_WRITEMASK_Y (1<<14)
+# define NV20_VP_INST_DEST_WRITEMASK_Z (1<<13)
+# define NV20_VP_INST_DEST_WRITEMASK_W (1<<12)
+#define NV20_VP_INST_DEST_SHIFT 3
+#define NV20_VP_INST_DEST_MASK (0xF << 3) /* guess */
+#define NV20_VP_INST_DEST_POS 0
+#define NV20_VP_INST_DEST_COL0 3
+#define NV20_VP_INST_DEST_COL1 4
+#define NV20_VP_INST_DEST_TC(n) (9+n)
+#define NV20_VP_INST_INDEX_CONST (1<<1)
+#define NV20_VP_INST3_KNOWN ( \
+ NV20_VP_INST_SRC2L_MASK | \
+ NV20_VP_INST_TEMP_WRITEMASK_MASK | \
+ NV20_VP_INST_DEST_TEMP_ID_MASK | \
+ NV20_VP_INST_STEMP_WRITEMASK_MASK | \
+ NV20_VP_INST_DEST_WRITEMASK_MASK | \
+ NV20_VP_INST_DEST_MASK | \
+ NV20_VP_INST_INDEX_CONST \
+ )
+
+/* Useful to split the source selection regs into their pieces */
+#define NV20_VP_SRC0_HIGH_SHIFT 6
+#define NV20_VP_SRC0_HIGH_MASK 0x00007FC0
+#define NV20_VP_SRC0_LOW_MASK 0x0000003F
+#define NV20_VP_SRC2_HIGH_SHIFT 4
+#define NV20_VP_SRC2_HIGH_MASK 0x00007FF0
+#define NV20_VP_SRC2_LOW_MASK 0x0000000F
+
+#define NV20_VP_SRC_REG_NEGATE (1<<14)
+#define NV20_VP_SRC_REG_SWZ_X_SHIFT 12
+#define NV20_VP_SRC_REG_SWZ_X_MASK (0x03 <<12)
+#define NV20_VP_SRC_REG_SWZ_Y_SHIFT 10
+#define NV20_VP_SRC_REG_SWZ_Y_MASK (0x03 <<10)
+#define NV20_VP_SRC_REG_SWZ_Z_SHIFT 8
+#define NV20_VP_SRC_REG_SWZ_Z_MASK (0x03 << 8)
+#define NV20_VP_SRC_REG_SWZ_W_SHIFT 6
+#define NV20_VP_SRC_REG_SWZ_W_MASK (0x03 << 6)
+#define NV20_VP_SRC_REG_SWZ_ALL_SHIFT 6
+#define NV20_VP_SRC_REG_SWZ_ALL_MASK (0xFF << 6)
+#define NV20_VP_SRC_REG_TEMP_ID_SHIFT 2
+#define NV20_VP_SRC_REG_TEMP_ID_MASK (0x0F << 0)
+#define NV20_VP_SRC_REG_TYPE_SHIFT 0
+#define NV20_VP_SRC_REG_TYPE_MASK (0x03 << 0)
+#define NV20_VP_SRC_REG_TYPE_TEMP 1
+#define NV20_VP_SRC_REG_TYPE_INPUT 2
+#define NV20_VP_SRC_REG_TYPE_CONST 3 /* guess */
+
--- /dev/null
+#include "nouveau_context.h"
+#include "nouveau_object.h"
+#include "nouveau_fifo.h"
+#include "nouveau_reg.h"
+
+#include "nouveau_shader.h"
+#include "nv20_shader.h"
+
+unsigned int NVVP_TX_VOP_COUNT = 16;
+unsigned int NVVP_TX_NVS_OP_COUNT = 16;
+struct _op_xlat NVVP_TX_VOP[32];
+struct _op_xlat NVVP_TX_SOP[32];
+
+nvsSwzComp NV20VP_TX_SWIZZLE[4] = { NVS_SWZ_X, NVS_SWZ_Y, NVS_SWZ_Z, NVS_SWZ_W };
+
+/*****************************************************************************
+ * Support routines
+ */
+static void
+NV20VPUploadToHW(GLcontext *ctx, nouveauShader *nvs)
+{
+ nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx);
+ int i;
+
+ /* XXX: missing a way to say what insn we're uploading from, and possible
+ * the program start position (if NV20 has one) */
+ for (i=0; i<nvs->program_size; i+=4) {
+ BEGIN_RING_SIZE(NvSub3D, NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0, 4);
+ OUT_RING(nvs->program[i + 0]);
+ OUT_RING(nvs->program[i + 1]);
+ OUT_RING(nvs->program[i + 2]);
+ OUT_RING(nvs->program[i + 3]);
+ }
+}
+
+static void
+NV20VPUpdateConst(GLcontext *ctx, nouveauShader *nvs, int id)
+{
+ nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx);
+
+ /* Worth checking if the value *actually* changed? Mesa doesn't tell us this
+ * as far as I know..
+ */
+ BEGIN_RING_SIZE(NvSub3D, NV30_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_ID, 1);
+ OUT_RING (id);
+ BEGIN_RING_SIZE(NvSub3D, NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_X, 4);
+ OUT_RINGf(nvs->params[id].source_val[0]);
+ OUT_RINGf(nvs->params[id].source_val[1]);
+ OUT_RINGf(nvs->params[id].source_val[2]);
+ OUT_RINGf(nvs->params[id].source_val[3]);
+}
+
+/*****************************************************************************
+ * Assembly routines
+ */
+
+/*****************************************************************************
+ * Disassembly routines
+ */
+void
+NV20VPTXSwizzle(int hwswz, nvsSwzComp *swz)
+{
+ swz[NVS_SWZ_X] = NV20VP_TX_SWIZZLE[(hwswz & 0xC0) >> 6];
+ swz[NVS_SWZ_Y] = NV20VP_TX_SWIZZLE[(hwswz & 0x30) >> 4];
+ swz[NVS_SWZ_Z] = NV20VP_TX_SWIZZLE[(hwswz & 0x0C) >> 2];
+ swz[NVS_SWZ_W] = NV20VP_TX_SWIZZLE[(hwswz & 0x03) >> 0];
+}
+
+static int
+NV20VPHasMergedInst(nvsFunc * shader)
+{
+ if (shader->GetOpcodeHW(shader, 0) != NV20_VP_INST_OPCODE_NOP &&
+ shader->GetOpcodeHW(shader, 1) != NV20_VP_INST_OPCODE_NOP)
+ printf
+ ("\n\n*****both opcode fields have values - PLEASE REPORT*****\n");
+ return 0;
+}
+
+static int
+NV20VPIsLastInst(nvsFunc * shader)
+{
+ return ((shader->inst[3] & (1 << 0)) ? 1 : 0);
+}
+
+static int
+NV20VPGetOffsetNext(nvsFunc * shader)
+{
+ return 4;
+}
+
+static struct _op_xlat *
+NV20VPGetOPTXRec(nvsFunc * shader, int merged)
+{
+ struct _op_xlat *opr;
+ int op;
+
+ if (shader->GetOpcodeSlot(shader, merged)) {
+ opr = NVVP_TX_SOP;
+ op = shader->GetOpcodeHW(shader, 1);
+ if (op >= NVVP_TX_NVS_OP_COUNT)
+ return NULL;
+ }
+ else {
+ opr = NVVP_TX_VOP;
+ op = shader->GetOpcodeHW(shader, 0);
+ if (op >= NVVP_TX_VOP_COUNT)
+ return NULL;
+ }
+
+ if (opr[op].SOP == NVS_OP_UNKNOWN)
+ return NULL;
+ return &opr[op];
+}
+
+static struct _op_xlat *
+NV20VPGetOPTXFromSOP(nvsOpcode sop, int *id)
+{
+ int i;
+
+ for (i=0;i<NVVP_TX_VOP_COUNT;i++) {
+ if (NVVP_TX_VOP[i].SOP == sop) {
+ if (id) *id = 0;
+ return &NVVP_TX_VOP[i];
+ }
+ }
+
+ for (i=0;i<NVVP_TX_NVS_OP_COUNT;i++) {
+ if (NVVP_TX_SOP[i].SOP == sop) {
+ if (id) *id = 1;
+ return &NVVP_TX_SOP[i];
+ }
+ }
+
+ return NULL;
+}
+
+static int
+NV20VPGetOpcodeSlot(nvsFunc * shader, int merged)
+{
+ if (shader->HasMergedInst(shader))
+ return merged;
+ if (shader->GetOpcodeHW(shader, 0) == NV20_VP_INST_OPCODE_NOP)
+ return 1;
+ return 0;
+}
+
+static nvsOpcode
+NV20VPGetOpcode(nvsFunc * shader, int merged)
+{
+ struct _op_xlat *opr;
+
+ opr = shader->GetOPTXRec(shader, merged);
+ if (!opr)
+ return NVS_OP_UNKNOWN;
+
+ return opr->SOP;
+}
+
+static nvsOpcode
+NV20VPGetOpcodeHW(nvsFunc * shader, int slot)
+{
+ if (slot)
+ return (shader->inst[1] & NV20_VP_INST_SCA_OPCODE_MASK)
+ >> NV20_VP_INST_SCA_OPCODE_SHIFT;
+ return (shader->inst[1] & NV20_VP_INST_VEC_OPCODE_MASK)
+ >> NV20_VP_INST_VEC_OPCODE_SHIFT;
+}
+
+static nvsRegFile
+NV20VPGetDestFile(nvsFunc * shader, int merged)
+{
+ switch (shader->GetOpcode(shader, merged)) {
+ case NVS_OP_ARL:
+ return NVS_FILE_ADDRESS;
+ default:
+ /*FIXME: This probably isn't correct.. */
+ if ((shader->inst[3] & NV20_VP_INST_DEST_WRITEMASK_MASK) == 0)
+ return NVS_FILE_TEMP;
+ return NVS_FILE_RESULT;
+ }
+}
+
+static unsigned int
+NV20VPGetDestID(nvsFunc * shader, int merged)
+{
+ int id;
+
+ switch (shader->GetDestFile(shader, merged)) {
+ case NVS_FILE_RESULT:
+ id = ((shader->inst[3] & NV20_VP_INST_DEST_MASK)
+ >> NV20_VP_INST_DEST_SHIFT);
+ switch (id) {
+ case NV20_VP_INST_DEST_POS : return NVS_FR_POSITION;
+ case NV20_VP_INST_DEST_COL0 : return NVS_FR_COL0;
+ case NV20_VP_INST_DEST_COL1 : return NVS_FR_COL1;
+ case NV20_VP_INST_DEST_TC(0): return NVS_FR_TEXCOORD0;
+ case NV20_VP_INST_DEST_TC(1): return NVS_FR_TEXCOORD1;
+ case NV20_VP_INST_DEST_TC(2): return NVS_FR_TEXCOORD2;
+ case NV20_VP_INST_DEST_TC(3): return NVS_FR_TEXCOORD3;
+ default:
+ return -1;
+ }
+ case NVS_FILE_ADDRESS:
+ return 0;
+ case NVS_FILE_TEMP:
+ id = ((shader->inst[3] & NV20_VP_INST_DEST_TEMP_ID_MASK)
+ >> NV20_VP_INST_DEST_TEMP_ID_SHIFT);
+ return id;
+ default:
+ return -1;
+ }
+}
+
+static unsigned int
+NV20VPGetDestMask(nvsFunc * shader, int merged)
+{
+ int hwmask, mask = 0;
+
+ /* Special handling for ARL - hardware only supports a
+ * 1-component address reg
+ */
+ if (shader->GetOpcode(shader, merged) == NVS_OP_ARL)
+ return SMASK_X;
+
+ if (shader->GetDestFile(shader, merged) == NVS_FILE_RESULT)
+ hwmask = (shader->inst[3] & NV20_VP_INST_DEST_WRITEMASK_MASK)
+ >> NV20_VP_INST_DEST_WRITEMASK_SHIFT;
+ else if (shader->GetOpcodeSlot(shader, merged))
+ hwmask = (shader->inst[3] & NV20_VP_INST_STEMP_WRITEMASK_MASK)
+ >> NV20_VP_INST_STEMP_WRITEMASK_SHIFT;
+ else
+ hwmask = (shader->inst[3] & NV20_VP_INST_VTEMP_WRITEMASK_MASK)
+ >> NV20_VP_INST_VTEMP_WRITEMASK_SHIFT;
+
+ if (hwmask & (1 << 3)) mask |= SMASK_X;
+ if (hwmask & (1 << 2)) mask |= SMASK_Y;
+ if (hwmask & (1 << 1)) mask |= SMASK_Z;
+ if (hwmask & (1 << 0)) mask |= SMASK_W;
+
+ return mask;
+}
+
+static unsigned int
+NV20VPGetSourceHW(nvsFunc * shader, int merged, int pos)
+{
+ struct _op_xlat *opr;
+ unsigned int src;
+
+ opr = shader->GetOPTXRec(shader, merged);
+ if (!opr)
+ return -1;
+
+ switch (opr->srcpos[pos]) {
+ case 0:
+ src = ((shader->inst[1] & NV20_VP_INST_SRC0H_MASK)
+ >> NV20_VP_INST_SRC0H_SHIFT)
+ << NV20_VP_SRC0_HIGH_SHIFT;
+ src |= ((shader->inst[2] & NV20_VP_INST_SRC0L_MASK)
+ >> NV20_VP_INST_SRC0L_SHIFT);
+ break;
+ case 1:
+ src = ((shader->inst[2] & NV20_VP_INST_SRC1_MASK)
+ >> NV20_VP_INST_SRC1_SHIFT);
+ break;
+ case 2:
+ src = ((shader->inst[2] & NV20_VP_INST_SRC2H_MASK)
+ >> NV20_VP_INST_SRC2H_SHIFT)
+ << NV20_VP_SRC2_HIGH_SHIFT;
+ src |= ((shader->inst[3] & NV20_VP_INST_SRC2L_MASK)
+ >> NV20_VP_INST_SRC2L_SHIFT);
+ break;
+ default:
+ src = -1;
+ }
+
+ return src;
+}
+
+static nvsRegFile
+NV20VPGetSourceFile(nvsFunc * shader, int merged, int pos)
+{
+ unsigned int src;
+ struct _op_xlat *opr;
+ int file;
+
+ opr = shader->GetOPTXRec(shader, merged);
+ if (!opr || opr->srcpos[pos] == -1)
+ return -1;
+
+ switch (opr->srcpos[pos]) {
+ case SPOS_ADDRESS:
+ return NVS_FILE_ADDRESS;
+ default:
+ src = NV20VPGetSourceHW(shader, merged, pos);
+ file = (src & NV20_VP_SRC_REG_TYPE_MASK) >> NV20_VP_SRC_REG_TYPE_SHIFT;
+
+ switch (file) {
+ case NV20_VP_SRC_REG_TYPE_TEMP : return NVS_FILE_TEMP;
+ case NV20_VP_SRC_REG_TYPE_INPUT: return NVS_FILE_ATTRIB;
+ case NV20_VP_SRC_REG_TYPE_CONST: return NVS_FILE_CONST;
+ default:
+ return NVS_FILE_UNKNOWN;
+ }
+ }
+}
+
+static int
+NV20VPGetSourceID(nvsFunc * shader, int merged, int pos)
+{
+ unsigned int src;
+
+ switch (shader->GetSourceFile(shader, merged, pos)) {
+ case NVS_FILE_TEMP:
+ src = shader->GetSourceHW(shader, merged, pos);
+ return ((src & NV20_VP_SRC_REG_TEMP_ID_MASK) >>
+ NV20_VP_SRC_REG_TEMP_ID_SHIFT);
+ case NVS_FILE_CONST:
+ return ((shader->inst[1] & NV20_VP_INST_CONST_SRC_MASK)
+ >> NV20_VP_INST_CONST_SRC_SHIFT);
+ case NVS_FILE_ATTRIB:
+ src = ((shader->inst[1] & NV20_VP_INST_INPUT_SRC_MASK)
+ >> NV20_VP_INST_INPUT_SRC_SHIFT);
+ switch (src) {
+ case NV20_VP_INST_INPUT_SRC_POS : return NVS_FR_POSITION;
+ case NV20_VP_INST_INPUT_SRC_COL0 : return NVS_FR_COL0;
+ case NV20_VP_INST_INPUT_SRC_COL1 : return NVS_FR_COL1;
+ case NV20_VP_INST_INPUT_SRC_TC(0): return NVS_FR_TEXCOORD0;
+ case NV20_VP_INST_INPUT_SRC_TC(1): return NVS_FR_TEXCOORD1;
+ case NV20_VP_INST_INPUT_SRC_TC(2): return NVS_FR_TEXCOORD2;
+ case NV20_VP_INST_INPUT_SRC_TC(3): return NVS_FR_TEXCOORD3;
+ default:
+ return NVS_FR_UNKNOWN;
+ }
+ default:
+ return -1;
+ }
+}
+
+static int
+NV20VPGetSourceNegate(nvsFunc * shader, int merged, int pos)
+{
+ unsigned int src;
+
+ src = shader->GetSourceHW(shader, merged, pos);
+
+ return ((src & NV20_VP_SRC_REG_NEGATE) ? 1 : 0);
+}
+
+static int
+NV20VPGetSourceAbs(nvsFunc * shader, int merged, int pos)
+{
+ /* NV20 can't do ABS on sources? Appears to be emulated with
+ * MAX reg, reg, -reg
+ */
+ return 0;
+}
+
+static void
+NV20VPGetSourceSwizzle(nvsFunc * shader, int merged, int pos, nvsSwzComp *swz)
+{
+ unsigned int src;
+ int swzbits;
+
+ src = shader->GetSourceHW(shader, merged, pos);
+ swzbits =
+ (src & NV20_VP_SRC_REG_SWZ_ALL_MASK) >> NV20_VP_SRC_REG_SWZ_ALL_SHIFT;
+ return NV20VPTXSwizzle(swzbits, swz);
+}
+
+static int
+NV20VPGetSourceIndexed(nvsFunc * shader, int merged, int pos)
+{
+ /* I don't think NV20 can index into attribs, at least no GL
+ * extension is exposed that will allow it.
+ */
+ if (shader->GetSourceFile(shader, merged, pos) != NVS_FILE_CONST)
+ return 0;
+ if (shader->inst[3] & NV20_VP_INST_INDEX_CONST)
+ return 1;
+ return 0;
+}
+
+static int
+NV20VPGetAddressRegID(nvsFunc * shader)
+{
+ /* Only 1 address reg */
+ return 0;
+}
+
+static nvsSwzComp
+NV20VPGetAddressRegSwizzle(nvsFunc * shader)
+{
+ /* Only A0.x available */
+ return NVS_SWZ_X;
+}
+
+void
+NV20VPInitShaderFuncs(nvsFunc * shader)
+{
+ MOD_OPCODE(NVVP_TX_VOP, NV20_VP_INST_OPCODE_NOP, NVS_OP_NOP, -1, -1, -1);
+ MOD_OPCODE(NVVP_TX_VOP, NV20_VP_INST_OPCODE_MOV, NVS_OP_MOV, 0, -1, -1);
+ MOD_OPCODE(NVVP_TX_VOP, NV20_VP_INST_OPCODE_MUL, NVS_OP_MUL, 0, 1, -1);
+ MOD_OPCODE(NVVP_TX_VOP, NV20_VP_INST_OPCODE_ADD, NVS_OP_ADD, 0, 2, -1);
+ MOD_OPCODE(NVVP_TX_VOP, NV20_VP_INST_OPCODE_MAD, NVS_OP_MAD, 0, 1, 2);
+ MOD_OPCODE(NVVP_TX_VOP, NV20_VP_INST_OPCODE_DP3, NVS_OP_DP3, 0, 1, -1);
+ MOD_OPCODE(NVVP_TX_VOP, NV20_VP_INST_OPCODE_DPH, NVS_OP_DPH, 0, 1, -1);
+ MOD_OPCODE(NVVP_TX_VOP, NV20_VP_INST_OPCODE_DP4, NVS_OP_DP4, 0, 1, -1);
+ MOD_OPCODE(NVVP_TX_VOP, NV20_VP_INST_OPCODE_DST, NVS_OP_DST, 0, 1, -1);
+ MOD_OPCODE(NVVP_TX_VOP, NV20_VP_INST_OPCODE_MIN, NVS_OP_MIN, 0, 1, -1);
+ MOD_OPCODE(NVVP_TX_VOP, NV20_VP_INST_OPCODE_MAX, NVS_OP_MAX, 0, 1, -1);
+ MOD_OPCODE(NVVP_TX_VOP, NV20_VP_INST_OPCODE_SLT, NVS_OP_SLT, 0, 1, -1);
+ MOD_OPCODE(NVVP_TX_VOP, NV20_VP_INST_OPCODE_SGE, NVS_OP_SGE, 0, 1, -1);
+ MOD_OPCODE(NVVP_TX_VOP, NV20_VP_INST_OPCODE_ARL, NVS_OP_ARL, 0, -1, -1);
+
+ MOD_OPCODE(NVVP_TX_SOP, NV20_VP_INST_OPCODE_NOP, NVS_OP_NOP, -1, -1, -1);
+ MOD_OPCODE(NVVP_TX_SOP, NV20_VP_INST_OPCODE_RCP, NVS_OP_RCP, 2, -1, -1);
+ MOD_OPCODE(NVVP_TX_SOP, NV20_VP_INST_OPCODE_RCC, NVS_OP_RCC, 2, -1, -1);
+ MOD_OPCODE(NVVP_TX_SOP, NV20_VP_INST_OPCODE_RSQ, NVS_OP_RSQ, 2, -1, -1);
+ MOD_OPCODE(NVVP_TX_SOP, NV20_VP_INST_OPCODE_EXP, NVS_OP_EXP, 2, -1, -1);
+ MOD_OPCODE(NVVP_TX_SOP, NV20_VP_INST_OPCODE_LOG, NVS_OP_LOG, 2, -1, -1);
+ MOD_OPCODE(NVVP_TX_SOP, NV20_VP_INST_OPCODE_LIT, NVS_OP_LIT, 2, -1, -1);
+
+ shader->UploadToHW = NV20VPUploadToHW;
+ shader->UpdateConst = NV20VPUpdateConst;
+
+ shader->GetOPTXRec = NV20VPGetOPTXRec;
+ shader->GetOPTXFromSOP = NV20VPGetOPTXFromSOP;
+
+ shader->HasMergedInst = NV20VPHasMergedInst;
+ shader->IsLastInst = NV20VPIsLastInst;
+ shader->GetOffsetNext = NV20VPGetOffsetNext;
+ shader->GetOpcodeSlot = NV20VPGetOpcodeSlot;
+ shader->GetOpcode = NV20VPGetOpcode;
+ shader->GetOpcodeHW = NV20VPGetOpcodeHW;
+ shader->GetDestFile = NV20VPGetDestFile;
+ shader->GetDestID = NV20VPGetDestID;
+ shader->GetDestMask = NV20VPGetDestMask;
+ shader->GetSourceHW = NV20VPGetSourceHW;
+ shader->GetSourceFile = NV20VPGetSourceFile;
+ shader->GetSourceID = NV20VPGetSourceID;
+ shader->GetSourceNegate = NV20VPGetSourceNegate;
+ shader->GetSourceAbs = NV20VPGetSourceAbs;
+ shader->GetSourceSwizzle = NV20VPGetSourceSwizzle;
+ shader->GetSourceIndexed = NV20VPGetSourceIndexed;
+ shader->GetRelAddressRegID = NV20VPGetAddressRegID;
+ shader->GetRelAddressSwizzle = NV20VPGetAddressRegSwizzle;
+}
--- /dev/null
+#include <stdint.h>
+
+#include "glheader.h"
+#include "macros.h"
+
+#include "nouveau_context.h"
+#include "nouveau_fifo.h"
+#include "nouveau_reg.h"
+#include "nouveau_drm.h"
+#include "nouveau_shader.h"
+#include "nouveau_object.h"
+#include "nouveau_msg.h"
+#include "nv30_shader.h"
+
+unsigned int NVFP_TX_AOP_COUNT = 64;
+struct _op_xlat NVFP_TX_AOP[64];
+
+/*******************************************************************************
+ * Support routines
+ */
+
+/*XXX: bad bad bad bad */
+static uint64_t fragprog_ofs;
+static uint32_t *fragprog_buf = NULL;
+
+static void
+NV30FPUploadToHW(GLcontext *ctx, nouveauShader *nvs)
+{
+ nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx);
+ drm_nouveau_mem_alloc_t mem;
+
+ if (!fragprog_buf) {
+ mem.flags = NOUVEAU_MEM_FB|NOUVEAU_MEM_MAPPED;
+ mem.size = nvs->program_size * sizeof(uint32_t);
+ mem.alignment = 0;
+ mem.region_offset = &fragprog_ofs;
+ if (drmCommandWriteRead(nmesa->driFd, DRM_NOUVEAU_MEM_ALLOC, &mem,
+ sizeof(mem))) {
+ fprintf(stderr, "MEM_ALLOC fail\n");
+ return;
+ }
+
+ if (drmMap(nmesa->driFd, fragprog_ofs, mem.size, &fragprog_buf)) {
+ fprintf(stderr, "MEM_MAP fail\n");
+ return;
+ }
+ }
+
+ /*XXX: should do a DMA.. and not copy over a possibly in-use program.. */
+ /* not using state cache here, updated programs at the same address
+ * seem to not take effect unless ACTIVE_PROGRAM is called again. hw
+ * caches the program somewhere? so, maybe not so bad to just clobber the
+ * old program in vram..
+ */
+ memcpy(fragprog_buf, nvs->program, nvs->program_size * sizeof(uint32_t));
+ BEGIN_RING_SIZE(NvSub3D, NV30_TCL_PRIMITIVE_3D_FP_ACTIVE_PROGRAM, 1);
+ OUT_RING(((uint32_t)fragprog_ofs-0xE0000000)|1);
+}
+
+static void
+NV30FPUpdateConst(GLcontext *ctx, nouveauShader *nvs, int id)
+{
+ uint32_t *current = nvs->program + nvs->params[id].hw_index;
+ uint32_t *new = nvs->params[id].source_val ?
+ nvs->params[id].source_val : nvs->params[id].val;
+
+ COPY_4V(current, new);
+ nvs->on_hardware = 0;
+}
+
+/*******************************************************************************
+ * Assembly helpers
+ */
+static struct _op_xlat *
+NV30FPGetOPTXFromSOP(nvsOpcode op, int *id)
+{
+ int i;
+
+ for (i=0; i<NVFP_TX_AOP_COUNT; i++) {
+ if (NVFP_TX_AOP[i].SOP == op) {
+ if (id) *id = 0;
+ return &NVFP_TX_AOP[i];
+ }
+ }
+
+ return NULL;
+}
+
+static int
+NV30FPSupportsOpcode(nvsFunc *shader, nvsOpcode op)
+{
+ if (shader->GetOPTXFromSOP(op, NULL))
+ return 1;
+ return 0;
+}
+
+static void
+NV30FPSetOpcode(nvsFunc *shader, unsigned int opcode, int slot)
+{
+ shader->inst[0] |= (opcode << NV30_FP_OP_OPCODE_SHIFT);
+}
+
+static void
+NV30FPSetCCUpdate(nvsFunc *shader)
+{
+ shader->inst[0] |= NV30_FP_OP_COND_WRITE_ENABLE;
+}
+
+static void
+NV30FPSetCondition(nvsFunc *shader, int on, nvsCond cond, int reg,
+ nvsSwzComp *swz)
+{
+ nvsSwzComp default_swz[4] = { NVS_SWZ_X, NVS_SWZ_Y, NVS_SWZ_Z, NVS_SWZ_W };
+ unsigned int hwcond;
+
+ /* cond masking is always enabled */
+ if (!on) {
+ cond = NVS_COND_TR;
+ reg = 0;
+ swz = default_swz;
+ }
+
+ switch (cond) {
+ case NVS_COND_TR: hwcond = NV30_FP_OP_COND_TR; break;
+ case NVS_COND_FL: hwcond = NV30_FP_OP_COND_FL; break;
+ case NVS_COND_LT: hwcond = NV30_FP_OP_COND_LT; break;
+ case NVS_COND_GT: hwcond = NV30_FP_OP_COND_GT; break;
+ case NVS_COND_LE: hwcond = NV30_FP_OP_COND_LE; break;
+ case NVS_COND_GE: hwcond = NV30_FP_OP_COND_GE; break;
+ case NVS_COND_EQ: hwcond = NV30_FP_OP_COND_EQ; break;
+ case NVS_COND_NE: hwcond = NV30_FP_OP_COND_NE; break;
+ default:
+ WARN_ONCE("unknown fp condmask=%d\n", cond);
+ hwcond = NV30_FP_OP_COND_TR;
+ break;
+ }
+
+ shader->inst[1] |= (hwcond << NV30_FP_OP_COND_SHIFT);
+ shader->inst[1] |= (swz[NVS_SWZ_X] << NV30_FP_OP_COND_SWZ_X_SHIFT);
+ shader->inst[1] |= (swz[NVS_SWZ_Y] << NV30_FP_OP_COND_SWZ_Y_SHIFT);
+ shader->inst[1] |= (swz[NVS_SWZ_Z] << NV30_FP_OP_COND_SWZ_Z_SHIFT);
+ shader->inst[1] |= (swz[NVS_SWZ_W] << NV30_FP_OP_COND_SWZ_W_SHIFT);
+}
+
+static void
+NV30FPSetResult(nvsFunc *shader, nvsRegister *reg, unsigned int mask, int slot)
+{
+ unsigned int hwreg, hwmask = 0;
+
+ if (mask & SMASK_X) shader->inst[0] |= NV30_FP_OP_OUT_X;
+ if (mask & SMASK_Y) shader->inst[0] |= NV30_FP_OP_OUT_Y;
+ if (mask & SMASK_Z) shader->inst[0] |= NV30_FP_OP_OUT_Z;
+ if (mask & SMASK_W) shader->inst[0] |= NV30_FP_OP_OUT_W;
+
+ if (reg->file == NVS_FILE_RESULT) {
+ hwreg = 0; /* FIXME: this is only fragment.color */
+ /* This is *not* correct, I have no idea what it is either */
+ shader->inst[0] |= NV30_FP_OP_UNK0_7;
+ } else
+ hwreg = reg->index;
+ shader->inst[0] |= (hwreg << NV30_FP_OP_OUT_REG_SHIFT);
+}
+
+static void
+NV30FPSetSource(nvsFunc *shader, nvsRegister *reg, int pos)
+{
+ unsigned int hwsrc = 0;
+
+ switch (reg->file) {
+ case NVS_FILE_TEMP:
+ hwsrc |= (NV30_FP_REG_TYPE_TEMP << NV30_FP_REG_TYPE_SHIFT);
+ hwsrc |= (reg->index << NV30_FP_REG_SRC_SHIFT);
+ break;
+ case NVS_FILE_ATTRIB:
+ {
+ unsigned int hwin;
+
+ switch (reg->index) {
+ case NVS_FR_POSITION : hwin = NV30_FP_OP_INPUT_SRC_POSITION; break;
+ case NVS_FR_COL0 : hwin = NV30_FP_OP_INPUT_SRC_COL0; break;
+ case NVS_FR_COL1 : hwin = NV30_FP_OP_INPUT_SRC_COL1; break;
+ case NVS_FR_FOGCOORD : hwin = NV30_FP_OP_INPUT_SRC_FOGC; break;
+ case NVS_FR_TEXCOORD0: hwin = NV30_FP_OP_INPUT_SRC_TC(0); break;
+ case NVS_FR_TEXCOORD1: hwin = NV30_FP_OP_INPUT_SRC_TC(1); break;
+ case NVS_FR_TEXCOORD2: hwin = NV30_FP_OP_INPUT_SRC_TC(2); break;
+ case NVS_FR_TEXCOORD3: hwin = NV30_FP_OP_INPUT_SRC_TC(3); break;
+ case NVS_FR_TEXCOORD4: hwin = NV30_FP_OP_INPUT_SRC_TC(4); break;
+ case NVS_FR_TEXCOORD5: hwin = NV30_FP_OP_INPUT_SRC_TC(5); break;
+ case NVS_FR_TEXCOORD6: hwin = NV30_FP_OP_INPUT_SRC_TC(6); break;
+ case NVS_FR_TEXCOORD7: hwin = NV30_FP_OP_INPUT_SRC_TC(7); break;
+ default:
+ WARN_ONCE("unknown fp input %d\n", reg->index);
+ hwin = NV30_FP_OP_INPUT_SRC_COL0;
+ break;
+ }
+ shader->inst[0] |= (hwin << NV30_FP_OP_INPUT_SRC_SHIFT);
+ hwsrc |= (hwin << NV30_FP_REG_SRC_SHIFT);
+ }
+ hwsrc |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT);
+ break;
+ case NVS_FILE_CONST:
+ /* consts are inlined after the inst */
+ hwsrc |= (NV30_FP_REG_TYPE_CONST << NV30_FP_REG_TYPE_SHIFT);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ if (reg->negate)
+ hwsrc |= NV30_FP_REG_NEGATE;
+ if (reg->abs)
+ shader->inst[1] |= (1 << (29+pos));
+ hwsrc |= (reg->swizzle[NVS_SWZ_X] << NV30_FP_REG_SWZ_X_SHIFT);
+ hwsrc |= (reg->swizzle[NVS_SWZ_Y] << NV30_FP_REG_SWZ_Y_SHIFT);
+ hwsrc |= (reg->swizzle[NVS_SWZ_Z] << NV30_FP_REG_SWZ_Z_SHIFT);
+ hwsrc |= (reg->swizzle[NVS_SWZ_W] << NV30_FP_REG_SWZ_W_SHIFT);
+
+ shader->inst[pos+1] |= hwsrc;
+}
+
+static void
+NV30FPSetUnusedSource(nvsFunc *shader, int pos)
+{
+ shader->inst[pos+1] |= (
+ (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT) |
+ (NVS_SWZ_X << NV30_FP_REG_SWZ_X_SHIFT) |
+ (NVS_SWZ_Y << NV30_FP_REG_SWZ_Y_SHIFT) |
+ (NVS_SWZ_Z << NV30_FP_REG_SWZ_Z_SHIFT) |
+ (NVS_SWZ_W << NV30_FP_REG_SWZ_W_SHIFT)
+ );
+}
+
+static void
+NV30FPSetTexImageUnit(nvsFunc *shader, int unit)
+{
+ shader->inst[0] |= (unit << NV30_FP_OP_TEX_UNIT_SHIFT);
+}
+
+static void
+NV30FPSetSaturate(nvsFunc *shader)
+{
+ shader->inst[0] |= NV30_FP_OP_OUT_SAT;
+}
+
+static void
+NV30FPSetLastInst(nvsFunc *shader)
+{
+ shader->inst[0] |= 1;
+
+}
+
+/*******************************************************************************
+ * Disassembly helpers
+ */
+static struct _op_xlat *
+NV30FPGetOPTXRec(nvsFunc * shader, int merged)
+{
+ int op;
+
+ op = shader->GetOpcodeHW(shader, 0);
+ if (op > NVFP_TX_AOP_COUNT)
+ return NULL;
+ if (NVFP_TX_AOP[op].SOP == NVS_OP_UNKNOWN)
+ return NULL;
+ return &NVFP_TX_AOP[op];
+}
+
+static int
+NV30FPHasMergedInst(nvsFunc * shader)
+{
+ return 0;
+}
+
+static int
+NV30FPIsLastInst(nvsFunc * shader)
+{
+ return ((shader->inst[0] & NV30_FP_OP_PROGRAM_END) ? 1 : 0);
+}
+
+static int
+NV30FPGetOffsetNext(nvsFunc * shader)
+{
+ int i;
+
+ for (i = 0; i < 3; i++)
+ if (shader->GetSourceFile(shader, 0, i) == NVS_FILE_CONST)
+ return 8;
+ return 4;
+}
+
+static nvsOpcode
+NV30FPGetOpcode(nvsFunc * shader, int merged)
+{
+ struct _op_xlat *opr;
+
+ opr = shader->GetOPTXRec(shader, merged);
+ if (!opr)
+ return NVS_OP_UNKNOWN;
+
+ return opr->SOP;
+}
+
+static unsigned int
+NV30FPGetOpcodeHW(nvsFunc * shader, int slot)
+{
+ int op;
+
+ op = (shader->inst[0] & NV30_FP_OP_OPCODE_MASK) >> NV30_FP_OP_OPCODE_SHIFT;
+
+ return op;
+}
+
+static nvsRegFile
+NV30FPGetDestFile(nvsFunc * shader, int merged)
+{
+ /* Result regs overlap temporary regs */
+ return NVS_FILE_TEMP;
+}
+
+static unsigned int
+NV30FPGetDestID(nvsFunc * shader, int merged)
+{
+ int id;
+
+ switch (shader->GetDestFile(shader, merged)) {
+ case NVS_FILE_TEMP:
+ id = ((shader->inst[0] & NV30_FP_OP_OUT_REG_MASK)
+ >> NV30_FP_OP_OUT_REG_SHIFT);
+ return id;
+ default:
+ return -1;
+ }
+}
+
+static unsigned int
+NV30FPGetDestMask(nvsFunc * shader, int merged)
+{
+ unsigned int mask = 0;
+
+ if (shader->inst[0] & NV30_FP_OP_OUT_X) mask |= SMASK_X;
+ if (shader->inst[0] & NV30_FP_OP_OUT_Y) mask |= SMASK_Y;
+ if (shader->inst[0] & NV30_FP_OP_OUT_Z) mask |= SMASK_Z;
+ if (shader->inst[0] & NV30_FP_OP_OUT_W) mask |= SMASK_W;
+
+ return mask;
+}
+
+static unsigned int
+NV30FPGetSourceHW(nvsFunc * shader, int merged, int pos)
+{
+ struct _op_xlat *opr;
+
+ opr = shader->GetOPTXRec(shader, merged);
+ if (!opr || opr->srcpos[pos] == -1)
+ return -1;
+
+ return shader->inst[opr->srcpos[pos] + 1];
+}
+
+static nvsRegFile
+NV30FPGetSourceFile(nvsFunc * shader, int merged, int pos)
+{
+ unsigned int src;
+ struct _op_xlat *opr;
+ int file;
+
+ opr = shader->GetOPTXRec(shader, merged);
+ if (!opr || opr->srcpos[pos] == -1)
+ return NVS_FILE_UNKNOWN;
+
+ switch (opr->srcpos[pos]) {
+ case SPOS_ADDRESS: return NVS_FILE_ADDRESS;
+ default:
+ src = shader->GetSourceHW(shader, merged, pos);
+ file = (src & NV30_FP_REG_TYPE_MASK) >> NV30_FP_REG_TYPE_SHIFT;
+
+ switch (file) {
+ case NV30_FP_REG_TYPE_TEMP : return NVS_FILE_TEMP;
+ case NV30_FP_REG_TYPE_INPUT: return NVS_FILE_ATTRIB;
+ case NV30_FP_REG_TYPE_CONST: return NVS_FILE_CONST;
+ default:
+ return NVS_FILE_UNKNOWN;
+ }
+ }
+}
+
+static int
+NV30FPGetSourceID(nvsFunc * shader, int merged, int pos)
+{
+ switch (shader->GetSourceFile(shader, merged, pos)) {
+ case NVS_FILE_ATTRIB:
+ switch ((shader->inst[0] & NV30_FP_OP_INPUT_SRC_MASK)
+ >> NV30_FP_OP_INPUT_SRC_SHIFT) {
+ case NV30_FP_OP_INPUT_SRC_POSITION: return NVS_FR_POSITION;
+ case NV30_FP_OP_INPUT_SRC_COL0 : return NVS_FR_COL0;
+ case NV30_FP_OP_INPUT_SRC_COL1 : return NVS_FR_COL1;
+ case NV30_FP_OP_INPUT_SRC_FOGC : return NVS_FR_FOGCOORD;
+ case NV30_FP_OP_INPUT_SRC_TC(0) : return NVS_FR_TEXCOORD0;
+ case NV30_FP_OP_INPUT_SRC_TC(1) : return NVS_FR_TEXCOORD1;
+ case NV30_FP_OP_INPUT_SRC_TC(2) : return NVS_FR_TEXCOORD2;
+ case NV30_FP_OP_INPUT_SRC_TC(3) : return NVS_FR_TEXCOORD3;
+ case NV30_FP_OP_INPUT_SRC_TC(4) : return NVS_FR_TEXCOORD4;
+ case NV30_FP_OP_INPUT_SRC_TC(5) : return NVS_FR_TEXCOORD5;
+ case NV30_FP_OP_INPUT_SRC_TC(6) : return NVS_FR_TEXCOORD6;
+ case NV30_FP_OP_INPUT_SRC_TC(7) : return NVS_FR_TEXCOORD7;
+ default:
+ return -1;
+ }
+ break;
+ case NVS_FILE_TEMP:
+ {
+ unsigned int src;
+
+ src = shader->GetSourceHW(shader, merged, pos);
+ return ((src & NV30_FP_REG_SRC_MASK) >> NV30_FP_REG_SRC_SHIFT);
+ }
+ case NVS_FILE_CONST: /* inlined into fragprog */
+ default:
+ return -1;
+ }
+}
+
+static int
+NV30FPGetTexImageUnit(nvsFunc *shader)
+{
+ return ((shader->inst[0] & NV30_FP_OP_TEX_UNIT_MASK)
+ >> NV30_FP_OP_TEX_UNIT_SHIFT);
+}
+
+static int
+NV30FPGetSourceNegate(nvsFunc * shader, int merged, int pos)
+{
+ unsigned int src;
+
+ src = shader->GetSourceHW(shader, merged, pos);
+
+ if (src == -1)
+ return -1;
+ return ((src & NV30_FP_REG_NEGATE) ? 1 : 0);
+}
+
+static int
+NV30FPGetSourceAbs(nvsFunc * shader, int merged, int pos)
+{
+ struct _op_xlat *opr;
+ static unsigned int abspos[3] = {
+ NV30_FP_OP_OUT_ABS,
+ (1 << 30), /* guess */
+ (1 << 31) /* guess */
+ };
+
+ opr = shader->GetOPTXRec(shader, merged);
+ if (!opr || opr->srcpos[pos] == -1)
+ return -1;
+
+ return ((shader->inst[1] & abspos[opr->srcpos[pos]]) ? 1 : 0);
+}
+
+nvsSwzComp NV30FP_TX_SWIZZLE[4] = {NVS_SWZ_X, NVS_SWZ_Y, NVS_SWZ_Z, NVS_SWZ_W };
+
+static void
+NV30FPTXSwizzle(int hwswz, nvsSwzComp *swz)
+{
+ swz[NVS_SWZ_W] = NV30FP_TX_SWIZZLE[(hwswz & 0xC0) >> 6];
+ swz[NVS_SWZ_Z] = NV30FP_TX_SWIZZLE[(hwswz & 0x30) >> 4];
+ swz[NVS_SWZ_Y] = NV30FP_TX_SWIZZLE[(hwswz & 0x0C) >> 2];
+ swz[NVS_SWZ_X] = NV30FP_TX_SWIZZLE[(hwswz & 0x03) >> 0];
+}
+
+static void
+NV30FPGetSourceSwizzle(nvsFunc * shader, int merged, int pos, nvsSwzComp *swz)
+{
+ unsigned int src;
+ int swzbits;
+
+ src = shader->GetSourceHW(shader, merged, pos);
+ swzbits = (src & NV30_FP_REG_SWZ_ALL_MASK) >> NV30_FP_REG_SWZ_ALL_SHIFT;
+ NV30FPTXSwizzle(swzbits, swz);
+}
+
+static int
+NV30FPGetSourceIndexed(nvsFunc * shader, int merged, int pos)
+{
+ switch (shader->GetSourceFile(shader, merged, pos)) {
+ case NVS_FILE_ATTRIB:
+ return ((shader->inst[3] & NV30_FP_OP_INDEX_INPUT) ? 1 : 0);
+ default:
+ return 0;
+ }
+}
+
+static void
+NV30FPGetSourceConstVal(nvsFunc * shader, int merged, int pos, float *val)
+{
+ val[0] = *(float *) &(shader->inst[4]);
+ val[1] = *(float *) &(shader->inst[5]);
+ val[2] = *(float *) &(shader->inst[6]);
+ val[3] = *(float *) &(shader->inst[7]);
+}
+
+static int
+NV30FPGetSourceScale(nvsFunc * shader, int merged, int pos)
+{
+/*FIXME: is this per-source, only for a specific source, or all sources??*/
+ return (1 << ((shader->inst[2] & NV30_FP_OP_SRC_SCALE_MASK)
+ >> NV30_FP_OP_SRC_SCALE_SHIFT));
+}
+
+static int
+NV30FPGetAddressRegID(nvsFunc * shader)
+{
+ return 0;
+}
+
+static nvsSwzComp
+NV30FPGetAddressRegSwizzle(nvsFunc * shader)
+{
+ return NVS_SWZ_X;
+}
+
+static int
+NV30FPSupportsConditional(nvsFunc * shader)
+{
+ /*FIXME: Is this true of all ops? */
+ return 1;
+}
+
+static int
+NV30FPGetConditionUpdate(nvsFunc * shader)
+{
+ return ((shader->inst[0] & NV30_FP_OP_COND_WRITE_ENABLE) ? 1 : 0);
+}
+
+static int
+NV30FPGetConditionTest(nvsFunc * shader)
+{
+ /*FIXME: always? */
+ return 1;
+}
+
+static nvsCond
+NV30FPGetCondition(nvsFunc * shader)
+{
+ int cond;
+
+ cond = ((shader->inst[1] & NV30_FP_OP_COND_MASK)
+ >> NV30_FP_OP_COND_SHIFT);
+
+ switch (cond) {
+ case NV30_FP_OP_COND_FL: return NVS_COND_FL;
+ case NV30_FP_OP_COND_LT: return NVS_COND_LT;
+ case NV30_FP_OP_COND_EQ: return NVS_COND_EQ;
+ case NV30_FP_OP_COND_LE: return NVS_COND_LE;
+ case NV30_FP_OP_COND_GT: return NVS_COND_GT;
+ case NV30_FP_OP_COND_NE: return NVS_COND_NE;
+ case NV30_FP_OP_COND_GE: return NVS_COND_GE;
+ case NV30_FP_OP_COND_TR: return NVS_COND_TR;
+ default:
+ return NVS_COND_UNKNOWN;
+ }
+}
+
+static void
+NV30FPGetCondRegSwizzle(nvsFunc * shader, nvsSwzComp *swz)
+{
+ int swzbits;
+
+ swzbits = (shader->inst[1] & NV30_FP_OP_COND_SWZ_ALL_MASK)
+ >> NV30_FP_OP_COND_SWZ_ALL_SHIFT;
+ NV30FPTXSwizzle(swzbits, swz);
+}
+
+static int
+NV30FPGetCondRegID(nvsFunc * shader)
+{
+ return 0;
+}
+
+static nvsPrecision
+NV30FPGetPrecision(nvsFunc * shader)
+{
+ int p;
+
+ p = (shader->inst[0] & NV30_FP_OP_PRECISION_MASK)
+ >> NV30_FP_OP_PRECISION_SHIFT;
+
+ switch (p) {
+ case NV30_FP_PRECISION_FP32: return NVS_PREC_FLOAT32;
+ case NV30_FP_PRECISION_FP16: return NVS_PREC_FLOAT16;
+ case NV30_FP_PRECISION_FX12: return NVS_PREC_FIXED12;
+ default:
+ return NVS_PREC_UNKNOWN;
+ }
+}
+
+static int
+NV30FPGetSaturate(nvsFunc * shader)
+{
+ return ((shader->inst[0] & NV30_FP_OP_OUT_SAT) ? 1 : 0);
+}
+
+/*******************************************************************************
+ * Init
+ */
+void
+NV30FPInitShaderFuncs(nvsFunc * shader)
+{
+ /* These are probably bogus, I made them up... */
+ shader->MaxInst = 1024;
+ shader->MaxAttrib = 16;
+ shader->MaxTemp = 32;
+ shader->MaxAddress = 1;
+ shader->MaxConst = 256;
+ shader->caps = SCAP_SRC_ABS;
+
+ MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_MOV, NVS_OP_MOV, 0, -1, -1);
+ MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_MUL, NVS_OP_MUL, 0, 1, -1);
+ MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_ADD, NVS_OP_ADD, 0, 1, -1);
+ MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_MAD, NVS_OP_MAD, 0, 1, 2);
+ MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_DP3, NVS_OP_DP3, 0, 1, -1);
+ MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_DP4, NVS_OP_DP4, 0, 1, -1);
+ MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_DST, NVS_OP_DST, 0, 1, -1);
+ MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_MIN, NVS_OP_MIN, 0, 1, -1);
+ MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_MAX, NVS_OP_MAX, 0, 1, -1);
+ MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_SLT, NVS_OP_SLT, 0, 1, -1);
+ MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_SGE, NVS_OP_SGE, 0, 1, -1);
+ MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_FRC, NVS_OP_FRC, 0, -1, -1);
+ MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_FLR, NVS_OP_FLR, 0, -1, -1);
+ MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_TEX, NVS_OP_TEX, 0, -1, -1);
+ MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_TXD, NVS_OP_TXD, 0, 1, 2);
+ MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_TXP, NVS_OP_TXP, 0, -1, -1);
+ MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_TXB, NVS_OP_TXB, 0, -1, -1);
+ MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_SEQ, NVS_OP_SEQ, 0, 1, -1);
+ MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_SGT, NVS_OP_SGT, 0, 1, -1);
+ MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_SLE, NVS_OP_SLE, 0, 1, -1);
+ MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_SNE, NVS_OP_SNE, 0, 1, -1);
+ MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_RCP, NVS_OP_RCP, 0, -1, -1);
+ MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_LG2, NVS_OP_LG2, 0, -1, -1);
+ MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_EX2, NVS_OP_EX2, 0, -1, -1);
+ MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_COS, NVS_OP_COS, 0, -1, -1);
+ MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_SIN, NVS_OP_SIN, 0, -1, -1);
+ MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_NOP, NVS_OP_NOP, -1, -1, -1);
+ MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_DDX, NVS_OP_DDX, 0, -1, -1);
+ MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_DDY, NVS_OP_DDY, 0, -1, -1);
+ MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_KIL, NVS_OP_KIL, -1, -1, -1);
+ MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_PK4B, NVS_OP_PK4B, 0, -1, -1);
+ MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_UP4B, NVS_OP_UP4B, 0, -1, -1);
+ MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_PK2H, NVS_OP_PK2H, 0, -1, -1);
+ MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_UP2H, NVS_OP_UP2H, 0, -1, -1);
+ MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_PK4UB, NVS_OP_PK4UB, 0, -1, -1);
+ MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_UP4UB, NVS_OP_UP4UB, 0, -1, -1);
+ MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_PK2US, NVS_OP_PK2US, 0, -1, -1);
+ MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_UP2US, NVS_OP_UP2US, 0, -1, -1);
+ /*FIXME: Haven't confirmed the source positions for the below opcodes */
+ MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_LIT, NVS_OP_LIT, 0, -1, -1);
+ MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_LRP, NVS_OP_LRP, 0, 1, 2);
+ MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_POW, NVS_OP_POW, 0, 1, -1);
+ MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_RSQ, NVS_OP_RSQ, 0, -1, -1);
+ MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_RFL, NVS_OP_RFL, 0, 1, -1);
+
+ shader->GetOPTXRec = NV30FPGetOPTXRec;
+ shader->GetOPTXFromSOP = NV30FPGetOPTXFromSOP;
+
+ shader->UploadToHW = NV30FPUploadToHW;
+ shader->UpdateConst = NV30FPUpdateConst;
+
+ shader->SupportsOpcode = NV30FPSupportsOpcode;
+ shader->SetOpcode = NV30FPSetOpcode;
+ shader->SetCCUpdate = NV30FPSetCCUpdate;
+ shader->SetCondition = NV30FPSetCondition;
+ shader->SetResult = NV30FPSetResult;
+ shader->SetSource = NV30FPSetSource;
+ shader->SetUnusedSource = NV30FPSetUnusedSource;
+ shader->SetTexImageUnit = NV30FPSetTexImageUnit;
+ shader->SetSaturate = NV30FPSetSaturate;
+ shader->SetLastInst = NV30FPSetLastInst;
+
+ shader->HasMergedInst = NV30FPHasMergedInst;
+ shader->IsLastInst = NV30FPIsLastInst;
+ shader->GetOffsetNext = NV30FPGetOffsetNext;
+ shader->GetOpcode = NV30FPGetOpcode;
+ shader->GetOpcodeHW = NV30FPGetOpcodeHW;
+ shader->GetDestFile = NV30FPGetDestFile;
+ shader->GetDestID = NV30FPGetDestID;
+ shader->GetDestMask = NV30FPGetDestMask;
+ shader->GetSourceHW = NV30FPGetSourceHW;
+ shader->GetSourceFile = NV30FPGetSourceFile;
+ shader->GetSourceID = NV30FPGetSourceID;
+ shader->GetTexImageUnit = NV30FPGetTexImageUnit;
+ shader->GetSourceNegate = NV30FPGetSourceNegate;
+ shader->GetSourceAbs = NV30FPGetSourceAbs;
+ shader->GetSourceSwizzle = NV30FPGetSourceSwizzle;
+ shader->GetSourceIndexed = NV30FPGetSourceIndexed;
+ shader->GetSourceConstVal = NV30FPGetSourceConstVal;
+ shader->GetSourceScale = NV30FPGetSourceScale;
+ shader->GetRelAddressRegID = NV30FPGetAddressRegID;
+ shader->GetRelAddressSwizzle = NV30FPGetAddressRegSwizzle;
+ shader->GetPrecision = NV30FPGetPrecision;
+ shader->GetSaturate = NV30FPGetSaturate;
+ shader->SupportsConditional = NV30FPSupportsConditional;
+ shader->GetConditionUpdate = NV30FPGetConditionUpdate;
+ shader->GetConditionTest = NV30FPGetConditionTest;
+ shader->GetCondition = NV30FPGetCondition;
+ shader->GetCondRegSwizzle = NV30FPGetCondRegSwizzle;
+ shader->GetCondRegID = NV30FPGetCondRegID;
+}
--- /dev/null
+#ifndef __NV30_SHADER_H__
+#define __NV30_SHADER_H__
+
+/* Vertex programs instruction set
+ *
+ * 128bit opcodes, split into 4 32-bit ones for ease of use.
+ *
+ * Non-native instructions
+ * ABS - MOV + NV40_VP_INST0_DEST_ABS
+ * POW - EX2 + MUL + LG2
+ * SUB - ADD, second source negated
+ * SWZ - MOV
+ * XPD -
+ *
+ * Register access
+ * - Only one INPUT can be accessed per-instruction (move extras into TEMPs)
+ * - Only one CONST can be accessed per-instruction (move extras into TEMPs)
+ *
+ * Relative Addressing
+ * According to the value returned for MAX_PROGRAM_NATIVE_ADDRESS_REGISTERS_ARB
+ * there are only two address registers available. The destination in the ARL
+ * instruction is set to TEMP <n> (The temp isn't actually written).
+ *
+ * When using vanilla ARB_v_p, the proprietary driver will squish both the available
+ * ADDRESS regs into the first hardware reg in the X and Y components.
+ *
+ * To use an address reg as an index into consts, the CONST_SRC is set to
+ * (const_base + offset) and INDEX_CONST is set.
+ *
+ * To access the second address reg use ADDR_REG_SELECT_1. A particular component
+ * of the address regs is selected with ADDR_SWZ.
+ *
+ * Only one address register can be accessed per instruction.
+ *
+ * Conditional execution (see NV_vertex_program{2,3} for details)
+ * Conditional execution of an instruction is enabled by setting COND_TEST_ENABLE, and
+ * selecting the condition which will allow the test to pass with COND_{FL,LT,...}.
+ * It is possible to swizzle the values in the condition register, which allows for
+ * testing against an individual component.
+ *
+ * Branching
+ * The BRA/CAL instructions seem to follow a slightly different opcode layout. The
+ * destination instruction ID (IADDR) overlaps a source field. Instruction ID's seem to
+ * be numbered based on the UPLOAD_FROM_ID FIFO command, and is incremented automatically
+ * on each UPLOAD_INST FIFO command.
+ *
+ * Conditional branching is achieved by using the condition tests described above.
+ * There doesn't appear to be dedicated looping instructions, but this can be done
+ * using a temp reg + conditional branching.
+ *
+ * Subroutines may be uploaded before the main program itself, but the first executed
+ * instruction is determined by the PROGRAM_START_ID FIFO command.
+ *
+ */
+
+/* DWORD 0 */
+#define NV30_VP_INST_ADDR_REG_SELECT_1 (1 << 24)
+#define NV30_VP_INST_SRC2_ABS (1 << 23) /* guess */
+#define NV30_VP_INST_SRC1_ABS (1 << 22) /* guess */
+#define NV30_VP_INST_SRC0_ABS (1 << 21) /* guess */
+#define NV30_VP_INST_OUT_RESULT (1 << 20)
+#define NV30_VP_INST_DEST_TEMP_ID_SHIFT 16
+#define NV30_VP_INST_DEST_TEMP_ID_MASK (0x0F << 16)
+#define NV30_VP_INST_COND_UPDATE_ENABLE (1<<15)
+#define NV30_VP_INST_COND_TEST_ENABLE (1<<14)
+#define NV30_VP_INST_COND_SHIFT 11
+#define NV30_VP_INST_COND_MASK (0x07 << 11)
+# define NV30_VP_INST_COND_FL 0 /* guess */
+# define NV30_VP_INST_COND_LT 1
+# define NV30_VP_INST_COND_EQ 2
+# define NV30_VP_INST_COND_LE 3
+# define NV30_VP_INST_COND_GT 4
+# define NV30_VP_INST_COND_NE 5
+# define NV30_VP_INST_COND_GE 6
+# define NV30_VP_INST_COND_TR 7 /* guess */
+#define NV30_VP_INST_COND_SWZ_X_SHIFT 9
+#define NV30_VP_INST_COND_SWZ_X_MASK (0x03 << 9)
+#define NV30_VP_INST_COND_SWZ_Y_SHIFT 7
+#define NV30_VP_INST_COND_SWZ_Y_MASK (0x03 << 7)
+#define NV30_VP_INST_COND_SWZ_Z_SHIFT 5
+#define NV30_VP_INST_COND_SWZ_Z_MASK (0x03 << 5)
+#define NV30_VP_INST_COND_SWZ_W_SHIFT 3
+#define NV30_VP_INST_COND_SWZ_W_MASK (0x03 << 3)
+#define NV30_VP_INST_COND_SWZ_ALL_SHIFT 3
+#define NV30_VP_INST_COND_SWZ_ALL_MASK (0xFF << 3)
+#define NV30_VP_INST_ADDR_SWZ_SHIFT 1
+#define NV30_VP_INST_ADDR_SWZ_MASK (0x03 << 1)
+#define NV30_VP_INST_SCA_OPCODEH_SHIFT 0
+#define NV30_VP_INST_SCA_OPCODEH_MASK (0x01 << 0)
+
+/* DWORD 1 */
+#define NV30_VP_INST_SCA_OPCODEL_SHIFT 28
+#define NV30_VP_INST_SCA_OPCODEL_MASK (0x0F << 28)
+# define NV30_VP_INST_OP_NOP 0x00
+# define NV30_VP_INST_OP_RCP 0x02
+# define NV30_VP_INST_OP_RCC 0x03
+# define NV30_VP_INST_OP_RSQ 0x04
+# define NV30_VP_INST_OP_EXP 0x05
+# define NV30_VP_INST_OP_LOG 0x06
+# define NV30_VP_INST_OP_LIT 0x07
+# define NV30_VP_INST_OP_BRA 0x09
+# define NV30_VP_INST_OP_CAL 0x0B
+# define NV30_VP_INST_OP_RET 0x0C
+# define NV30_VP_INST_OP_LG2 0x0D
+# define NV30_VP_INST_OP_EX2 0x0E
+# define NV30_VP_INST_OP_SIN 0x0F
+# define NV30_VP_INST_OP_COS 0x10
+#define NV30_VP_INST_VEC_OPCODE_SHIFT 23
+#define NV30_VP_INST_VEC_OPCODE_MASK (0x1F << 23)
+# define NV30_VP_INST_OP_NOPV 0x00
+# define NV30_VP_INST_OP_MOV 0x01
+# define NV30_VP_INST_OP_MUL 0x02
+# define NV30_VP_INST_OP_ADD 0x03
+# define NV30_VP_INST_OP_MAD 0x04
+# define NV30_VP_INST_OP_DP3 0x05
+# define NV30_VP_INST_OP_DP4 0x07
+# define NV30_VP_INST_OP_DPH 0x06
+# define NV30_VP_INST_OP_DST 0x08
+# define NV30_VP_INST_OP_MIN 0x09
+# define NV30_VP_INST_OP_MAX 0x0A
+# define NV30_VP_INST_OP_SLT 0x0B
+# define NV30_VP_INST_OP_SGE 0x0C
+# define NV30_VP_INST_OP_ARL 0x0D
+# define NV30_VP_INST_OP_FRC 0x0E
+# define NV30_VP_INST_OP_FLR 0x0F
+# define NV30_VP_INST_OP_SEQ 0x10
+# define NV30_VP_INST_OP_SFL 0x11
+# define NV30_VP_INST_OP_SGT 0x12
+# define NV30_VP_INST_OP_SLE 0x13
+# define NV30_VP_INST_OP_SNE 0x14
+# define NV30_VP_INST_OP_STR 0x15
+# define NV30_VP_INST_OP_SSG 0x16
+# define NV30_VP_INST_OP_ARR 0x17
+# define NV30_VP_INST_OP_ARA 0x18
+#define NV30_VP_INST_CONST_SRC_SHIFT 14
+#define NV30_VP_INST_CONST_SRC_MASK (0xFF << 14)
+#define NV30_VP_INST_INPUT_SRC_SHIFT 9 /*NV20*/
+#define NV30_VP_INST_INPUT_SRC_MASK (0x0F << 9) /*NV20*/
+# define NV30_VP_INST_IN_POS 0 /* These seem to match the bindings specified in */
+# define NV30_VP_INST_IN_WEIGHT 1 /* the ARB_v_p spec (2.14.3.1) */
+# define NV30_VP_INST_IN_NORMAL 2
+# define NV30_VP_INST_IN_COL0 3 /* Should probably confirm them all though */
+# define NV30_VP_INST_IN_COL1 4
+# define NV30_VP_INST_IN_FOGC 5
+# define NV30_VP_INST_IN_TC0 8
+# define NV30_VP_INST_IN_TC(n) (8+n)
+#define NV30_VP_INST_SRC0H_SHIFT 0 /*NV20*/
+#define NV30_VP_INST_SRC0H_MASK (0x1FF << 0) /*NV20*/
+
+/* DWORD 2 */
+#define NV30_VP_INST_SRC0L_SHIFT 26 /*NV20*/
+#define NV30_VP_INST_SRC0L_MASK (0x3F <<26) /*NV20*/
+#define NV30_VP_INST_SRC1_SHIFT 11 /*NV20*/
+#define NV30_VP_INST_SRC1_MASK (0x7FFF<<11) /*NV20*/
+#define NV30_VP_INST_SRC2H_SHIFT 0 /*NV20*/
+#define NV30_VP_INST_SRC2H_MASK (0x7FF << 0) /*NV20*/
+#define NV30_VP_INST_IADDR_SHIFT 2
+#define NV30_VP_INST_IADDR_MASK (0xFF << 2) /* guess */
+
+/* DWORD 3 */
+#define NV30_VP_INST_SRC2L_SHIFT 28 /*NV20*/
+#define NV30_VP_INST_SRC2L_MASK (0x0F <<28) /*NV20*/
+#define NV30_VP_INST_STEMP_WRITEMASK_SHIFT 24
+#define NV30_VP_INST_STEMP_WRITEMASK_MASK (0x0F << 24)
+#define NV30_VP_INST_VTEMP_WRITEMASK_SHIFT 20
+#define NV30_VP_INST_VTEMP_WRITEMASK_MASK (0x0F << 20)
+#define NV30_VP_INST_SDEST_WRITEMASK_SHIFT 16
+#define NV30_VP_INST_SDEST_WRITEMASK_MASK (0x0F << 16)
+#define NV30_VP_INST_VDEST_WRITEMASK_SHIFT 12 /*NV20*/
+#define NV30_VP_INST_VDEST_WRITEMASK_MASK (0x0F << 12) /*NV20*/
+#define NV30_VP_INST_DEST_ID_SHIFT 2
+#define NV30_VP_INST_DEST_ID_MASK (0x0F << 2)
+# define NV30_VP_INST_DEST_POS 0
+# define NV30_VP_INST_DEST_COL0 3
+# define NV30_VP_INST_DEST_COL1 4
+# define NV30_VP_INST_DEST_TC(n) (8+n)
+
+/* Source-register definition - matches NV20 exactly */
+#define NV30_VP_SRC_REG_NEGATE (1<<14)
+#define NV30_VP_SRC_REG_SWZ_X_SHIFT 12
+#define NV30_VP_SRC_REG_SWZ_X_MASK (0x03 <<12)
+#define NV30_VP_SRC_REG_SWZ_Y_SHIFT 10
+#define NV30_VP_SRC_REG_SWZ_Y_MASK (0x03 <<10)
+#define NV30_VP_SRC_REG_SWZ_Z_SHIFT 8
+#define NV30_VP_SRC_REG_SWZ_Z_MASK (0x03 << 8)
+#define NV30_VP_SRC_REG_SWZ_W_SHIFT 6
+#define NV30_VP_SRC_REG_SWZ_W_MASK (0x03 << 6)
+#define NV30_VP_SRC_REG_SWZ_ALL_SHIFT 6
+#define NV30_VP_SRC_REG_SWZ_ALL_MASK (0xFF << 6)
+#define NV30_VP_SRC_REG_TEMP_ID_SHIFT 2
+#define NV30_VP_SRC_REG_TEMP_ID_MASK (0x0F << 0)
+#define NV30_VP_SRC_REG_TYPE_SHIFT 0
+#define NV30_VP_SRC_REG_TYPE_MASK (0x03 << 0)
+#define NV30_VP_SRC_REG_TYPE_TEMP 1
+#define NV30_VP_SRC_REG_TYPE_INPUT 2
+#define NV30_VP_SRC_REG_TYPE_CONST 3 /* guess */
+
+/*
+ * Each fragment program opcode appears to be comprised of 4 32-bit values.
+ *
+ * 0 - Opcode, output reg/mask, ATTRIB source
+ * 1 - Source 0
+ * 2 - Source 1
+ * 3 - Source 2
+ *
+ * There appears to be no special difference between result regs and temp regs.
+ * result.color == R0.xyzw
+ * result.depth == R1.z
+ * When the fragprog contains instructions to write depth, NV30_TCL_PRIMITIVE_3D_UNK1D78=0
+ * otherwise it is set to 1.
+ *
+ * Constants are inserted directly after the instruction that uses them.
+ *
+ * It appears that it's not possible to use two input registers in one
+ * instruction as the input sourcing is done in the instruction dword
+ * and not the source selection dwords. As such instructions such as:
+ *
+ * ADD result.color, fragment.color, fragment.texcoord[0];
+ *
+ * must be split into two MOV's and then an ADD (nvidia does this) but
+ * I'm not sure why it's not just one MOV and then source the second input
+ * in the ADD instruction..
+ *
+ * Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary
+ * negation requires multiplication with a const.
+ *
+ * Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO/SWIZZLE_ONE
+ * The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as SWIZZLE_ZERO
+ * is implemented simply by not writing to the relevant components of the destination.
+ *
+ * Conditional execution
+ * TODO
+ *
+ * Non-native instructions:
+ * LIT
+ * LRP - MAD+MAD
+ * SUB - ADD, negate second source
+ * RSQ - LG2 + EX2
+ * POW - LG2 + MUL + EX2
+ * SCS - COS + SIN
+ * XPD
+ */
+
+//== Opcode / Destination selection ==
+#define NV30_FP_OP_PROGRAM_END (1 << 0)
+#define NV30_FP_OP_OUT_REG_SHIFT 1
+#define NV30_FP_OP_OUT_REG_MASK (31 << 1) /* uncertain */
+/* Needs to be set when writing outputs to get expected result.. */
+#define NV30_FP_OP_UNK0_7 (1 << 7)
+#define NV30_FP_OP_COND_WRITE_ENABLE (1 << 8)
+#define NV30_FP_OP_OUTMASK_SHIFT 9
+#define NV30_FP_OP_OUTMASK_MASK (0xF << 9)
+# define NV30_FP_OP_OUT_X (1<<9)
+# define NV30_FP_OP_OUT_Y (1<<10)
+# define NV30_FP_OP_OUT_Z (1<<11)
+# define NV30_FP_OP_OUT_W (1<<12)
+/* Uncertain about these, especially the input_src values.. it's possible that
+ * they can be dynamically changed.
+ */
+#define NV30_FP_OP_INPUT_SRC_SHIFT 13
+#define NV30_FP_OP_INPUT_SRC_MASK (15 << 13)
+# define NV30_FP_OP_INPUT_SRC_POSITION 0x0
+# define NV30_FP_OP_INPUT_SRC_COL0 0x1
+# define NV30_FP_OP_INPUT_SRC_COL1 0x2
+# define NV30_FP_OP_INPUT_SRC_FOGC 0x3
+# define NV30_FP_OP_INPUT_SRC_TC0 0x4
+# define NV30_FP_OP_INPUT_SRC_TC(n) (0x4 + n)
+#define NV30_FP_OP_TEX_UNIT_SHIFT 17
+#define NV30_FP_OP_TEX_UNIT_MASK (0xF << 17) /* guess */
+#define NV30_FP_OP_PRECISION_SHIFT 22
+#define NV30_FP_OP_PRECISION_MASK (3 << 22)
+# define NV30_FP_PRECISION_FP32 0
+# define NV30_FP_PRECISION_FP16 1
+# define NV30_FP_PRECISION_FX12 2
+#define NV30_FP_OP_OPCODE_SHIFT 24
+#define NV30_FP_OP_OPCODE_MASK (0x3F << 24)
+# define NV30_FP_OP_OPCODE_NOP 0x00
+# define NV30_FP_OP_OPCODE_MOV 0x01
+# define NV30_FP_OP_OPCODE_MUL 0x02
+# define NV30_FP_OP_OPCODE_ADD 0x03
+# define NV30_FP_OP_OPCODE_MAD 0x04
+# define NV30_FP_OP_OPCODE_DP3 0x05
+# define NV30_FP_OP_OPCODE_DP4 0x06
+# define NV30_FP_OP_OPCODE_DST 0x07
+# define NV30_FP_OP_OPCODE_MIN 0x08
+# define NV30_FP_OP_OPCODE_MAX 0x09
+# define NV30_FP_OP_OPCODE_SLT 0x0A
+# define NV30_FP_OP_OPCODE_SGE 0x0B
+# define NV30_FP_OP_OPCODE_SLE 0x0C
+# define NV30_FP_OP_OPCODE_SGT 0x0D
+# define NV30_FP_OP_OPCODE_SNE 0x0E
+# define NV30_FP_OP_OPCODE_SEQ 0x0F
+# define NV30_FP_OP_OPCODE_FRC 0x10
+# define NV30_FP_OP_OPCODE_FLR 0x11
+# define NV30_FP_OP_OPCODE_KIL 0x12
+# define NV30_FP_OP_OPCODE_PK4B 0x13
+# define NV30_FP_OP_OPCODE_UP4B 0x14
+# define NV30_FP_OP_OPCODE_DDX 0x15 /* can only write XY */
+# define NV30_FP_OP_OPCODE_DDY 0x16 /* can only write XY */
+# define NV30_FP_OP_OPCODE_TEX 0x17
+# define NV30_FP_OP_OPCODE_TXP 0x18
+# define NV30_FP_OP_OPCODE_TXD 0x19
+# define NV30_FP_OP_OPCODE_RCP 0x1A
+# define NV30_FP_OP_OPCODE_RSQ 0x1B
+# define NV30_FP_OP_OPCODE_EX2 0x1C
+# define NV30_FP_OP_OPCODE_LG2 0x1D
+# define NV30_FP_OP_OPCODE_LIT 0x1E
+# define NV30_FP_OP_OPCODE_LRP 0x1F
+# define NV30_FP_OP_OPCODE_COS 0x22
+# define NV30_FP_OP_OPCODE_SIN 0x23
+# define NV30_FP_OP_OPCODE_PK2H 0x24
+# define NV30_FP_OP_OPCODE_UP2H 0x25
+# define NV30_FP_OP_OPCODE_POW 0x26
+# define NV30_FP_OP_OPCODE_PK4UB 0x27
+# define NV30_FP_OP_OPCODE_UP4UB 0x28
+# define NV30_FP_OP_OPCODE_PK2US 0x29
+# define NV30_FP_OP_OPCODE_UP2US 0x2A
+# define NV30_FP_OP_OPCODE_DP2A 0x2E
+# define NV30_FP_OP_OPCODE_TXB 0x31
+# define NV30_FP_OP_OPCODE_RFL 0x36
+#define NV30_FP_OP_OUT_SAT (1 << 31)
+
+/* high order bits of SRC0 */
+#define NV30_FP_OP_OUT_ABS (1 << 29)
+#define NV30_FP_OP_COND_SWZ_W_SHIFT 27
+#define NV30_FP_OP_COND_SWZ_W_MASK (3 << 27)
+#define NV30_FP_OP_COND_SWZ_Z_SHIFT 25
+#define NV30_FP_OP_COND_SWZ_Z_MASK (3 << 25)
+#define NV30_FP_OP_COND_SWZ_Y_SHIFT 23
+#define NV30_FP_OP_COND_SWZ_Y_MASK (3 << 23)
+#define NV30_FP_OP_COND_SWZ_X_SHIFT 21
+#define NV30_FP_OP_COND_SWZ_X_MASK (3 << 21)
+#define NV30_FP_OP_COND_SWZ_ALL_SHIFT 21
+#define NV30_FP_OP_COND_SWZ_ALL_MASK (0xFF << 21)
+#define NV30_FP_OP_COND_SHIFT 18
+#define NV30_FP_OP_COND_MASK (0x07 << 18)
+# define NV30_FP_OP_COND_FL 0
+# define NV30_FP_OP_COND_LT 1
+# define NV30_FP_OP_COND_EQ 2
+# define NV30_FP_OP_COND_LE 3
+# define NV30_FP_OP_COND_GT 4
+# define NV30_FP_OP_COND_NE 5
+# define NV30_FP_OP_COND_GE 6
+# define NV30_FP_OP_COND_TR 7
+
+/* high order bits of SRC1 */
+#define NV30_FP_OP_SRC_SCALE_SHIFT 28
+#define NV30_FP_OP_SRC_SCALE_MASK (3 << 28)
+
+/* high order bits of SRC2 */
+#define NV30_FP_OP_INDEX_INPUT (1 << 30)
+
+//== Register selection ==
+#define NV30_FP_REG_TYPE_SHIFT 0
+#define NV30_FP_REG_TYPE_MASK (3 << 0)
+# define NV30_FP_REG_TYPE_TEMP 0
+# define NV30_FP_REG_TYPE_INPUT 1
+# define NV30_FP_REG_TYPE_CONST 2
+#define NV30_FP_REG_SRC_SHIFT 2 /* uncertain */
+#define NV30_FP_REG_SRC_MASK (31 << 2)
+#define NV30_FP_REG_UNK_0 (1 << 8)
+#define NV30_FP_REG_SWZ_ALL_SHIFT 9
+#define NV30_FP_REG_SWZ_ALL_MASK (255 << 9)
+#define NV30_FP_REG_SWZ_X_SHIFT 9
+#define NV30_FP_REG_SWZ_X_MASK (3 << 9)
+#define NV30_FP_REG_SWZ_Y_SHIFT 11
+#define NV30_FP_REG_SWZ_Y_MASK (3 << 11)
+#define NV30_FP_REG_SWZ_Z_SHIFT 13
+#define NV30_FP_REG_SWZ_Z_MASK (3 << 13)
+#define NV30_FP_REG_SWZ_W_SHIFT 15
+#define NV30_FP_REG_SWZ_W_MASK (3 << 15)
+# define NV30_FP_SWIZZLE_X 0
+# define NV30_FP_SWIZZLE_Y 1
+# define NV30_FP_SWIZZLE_Z 2
+# define NV30_FP_SWIZZLE_W 3
+#define NV30_FP_REG_NEGATE (1 << 17)
+
+#endif
switch(pname)
{
case GL_FOG_MODE:
- BEGIN_RING_CACHE(NvSub3D, NV30_TCL_PRIMITIVE_3D_FOG_MODE, 1);
+ //BEGIN_RING_CACHE(NvSub3D, NV30_TCL_PRIMITIVE_3D_FOG_MODE, 1);
//OUT_RING_CACHE (params);
break;
/* TODO: unsure about the rest.*/
--- /dev/null
+#include "nouveau_context.h"
+#include "nouveau_object.h"
+#include "nouveau_fifo.h"
+#include "nouveau_reg.h"
+
+#include "nouveau_shader.h"
+#include "nv30_shader.h"
+
+extern nvsSwzComp NV20VP_TX_SWIZZLE[4];
+extern void NV20VPTXSwizzle(int hwswz, nvsSwzComp *swz);
+
+/*****************************************************************************
+ * Support routines
+ */
+static void
+NV30VPUploadToHW(GLcontext *ctx, nouveauShader *nvs)
+{
+ nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx);
+ int i;
+
+ /* We can do better here and keep more than one VP on the hardware, and
+ * switch between them with PROGRAM_START_ID..
+ */
+ BEGIN_RING_SIZE(NvSub3D, NV30_TCL_PRIMITIVE_3D_VP_UPLOAD_FROM_ID, 1);
+ OUT_RING(0);
+ for (i=0; i<nvs->program_size; i+=4) {
+ BEGIN_RING_SIZE(NvSub3D, NV30_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0, 4);
+ OUT_RING(nvs->program[i + 0]);
+ OUT_RING(nvs->program[i + 1]);
+ OUT_RING(nvs->program[i + 2]);
+ OUT_RING(nvs->program[i + 3]);
+ }
+ BEGIN_RING_SIZE(NvSub3D, NV30_TCL_PRIMITIVE_3D_VP_PROGRAM_START_ID, 1);
+ OUT_RING(0);
+}
+
+static void
+NV30VPUpdateConst(GLcontext *ctx, nouveauShader *nvs, int id)
+{
+ nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx);
+ GLfloat *val;
+
+ val = nvs->params[id].source_val ?
+ nvs->params[id].source_val : nvs->params[id].val;
+
+ BEGIN_RING_SIZE(NvSub3D, NV30_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_ID, 5);
+ OUT_RING (id);
+ OUT_RINGp(val, 4);
+}
+
+/*****************************************************************************
+ * Assembly routines
+ */
+
+/*****************************************************************************
+ * Disassembly routines
+ */
+static unsigned int
+NV30VPGetOpcodeHW(nvsFunc * shader, int slot)
+{
+ int op;
+
+ if (slot) {
+ op = (shader->inst[1] & NV30_VP_INST_SCA_OPCODEL_MASK)
+ >> NV30_VP_INST_SCA_OPCODEL_SHIFT;
+ op |= ((shader->inst[0] & NV30_VP_INST_SCA_OPCODEH_MASK)
+ >> NV30_VP_INST_SCA_OPCODEH_SHIFT) << 4;
+ }
+ else {
+ op = (shader->inst[1] & NV30_VP_INST_VEC_OPCODE_MASK)
+ >> NV30_VP_INST_VEC_OPCODE_SHIFT;
+ }
+
+ return op;
+}
+
+static nvsRegFile
+NV30VPGetDestFile(nvsFunc * shader, int merged)
+{
+ switch (shader->GetOpcode(shader, merged)) {
+ case NVS_OP_ARL:
+ case NVS_OP_ARR:
+ case NVS_OP_ARA:
+ return NVS_FILE_ADDRESS;
+ default:
+ /*FIXME: This probably isn't correct.. */
+ if ((shader->inst[3] & NV30_VP_INST_VDEST_WRITEMASK_MASK) != 0)
+ return NVS_FILE_RESULT;
+ if ((shader->inst[3] & NV30_VP_INST_SDEST_WRITEMASK_MASK) != 0)
+ return NVS_FILE_RESULT;
+ return NVS_FILE_TEMP;
+ }
+}
+
+static unsigned int
+NV30VPGetDestID(nvsFunc * shader, int merged)
+{
+ int id;
+
+ switch (shader->GetDestFile(shader, merged)) {
+ case NVS_FILE_RESULT:
+ id = ((shader->inst[3] & NV30_VP_INST_DEST_ID_MASK)
+ >> NV30_VP_INST_DEST_ID_SHIFT);
+ switch (id) {
+ case NV30_VP_INST_DEST_POS : return NVS_FR_POSITION;
+ case NV30_VP_INST_DEST_COL0 : return NVS_FR_COL0;
+ case NV30_VP_INST_DEST_COL1 : return NVS_FR_COL1;
+ case NV30_VP_INST_DEST_TC(0): return NVS_FR_TEXCOORD0;
+ case NV30_VP_INST_DEST_TC(1): return NVS_FR_TEXCOORD1;
+ case NV30_VP_INST_DEST_TC(2): return NVS_FR_TEXCOORD2;
+ case NV30_VP_INST_DEST_TC(3): return NVS_FR_TEXCOORD3;
+ case NV30_VP_INST_DEST_TC(4): return NVS_FR_TEXCOORD4;
+ case NV30_VP_INST_DEST_TC(5): return NVS_FR_TEXCOORD5;
+ case NV30_VP_INST_DEST_TC(6): return NVS_FR_TEXCOORD6;
+ case NV30_VP_INST_DEST_TC(7): return NVS_FR_TEXCOORD7;
+ default:
+ return -1;
+ }
+ case NVS_FILE_ADDRESS:
+ case NVS_FILE_TEMP:
+ return (shader->inst[0] & NV30_VP_INST_DEST_TEMP_ID_MASK)
+ >> NV30_VP_INST_DEST_TEMP_ID_SHIFT;
+ default:
+ return -1;
+ }
+}
+
+static unsigned int
+NV30VPGetDestMask(nvsFunc * shader, int merged)
+{
+ int hwmask, mask = 0;
+
+ if (shader->GetDestFile(shader, merged) == NVS_FILE_RESULT)
+ if (shader->GetOpcodeSlot(shader, merged))
+ hwmask = (shader->inst[3] & NV30_VP_INST_SDEST_WRITEMASK_MASK)
+ >> NV30_VP_INST_SDEST_WRITEMASK_SHIFT;
+ else
+ hwmask = (shader->inst[3] & NV30_VP_INST_VDEST_WRITEMASK_MASK)
+ >> NV30_VP_INST_VDEST_WRITEMASK_SHIFT;
+ else if (shader->GetOpcodeSlot(shader, merged))
+ hwmask = (shader->inst[3] & NV30_VP_INST_STEMP_WRITEMASK_MASK)
+ >> NV30_VP_INST_STEMP_WRITEMASK_SHIFT;
+ else
+ hwmask = (shader->inst[3] & NV30_VP_INST_VTEMP_WRITEMASK_MASK)
+ >> NV30_VP_INST_VTEMP_WRITEMASK_SHIFT;
+
+ if (hwmask & (1 << 3)) mask |= SMASK_X;
+ if (hwmask & (1 << 2)) mask |= SMASK_Y;
+ if (hwmask & (1 << 1)) mask |= SMASK_Z;
+ if (hwmask & (1 << 0)) mask |= SMASK_W;
+
+ return mask;
+}
+
+static int
+NV30VPGetSourceID(nvsFunc * shader, int merged, int pos)
+{
+ unsigned int src;
+
+ switch (shader->GetSourceFile(shader, merged, pos)) {
+ case NVS_FILE_TEMP:
+ src = shader->GetSourceHW(shader, merged, pos);
+ return ((src & NV30_VP_SRC_REG_TEMP_ID_MASK) >>
+ NV30_VP_SRC_REG_TEMP_ID_SHIFT);
+ case NVS_FILE_CONST:
+ return ((shader->inst[1] & NV30_VP_INST_CONST_SRC_MASK)
+ >> NV30_VP_INST_CONST_SRC_SHIFT);
+ case NVS_FILE_ATTRIB:
+ src = ((shader->inst[1] & NV30_VP_INST_INPUT_SRC_MASK)
+ >> NV30_VP_INST_INPUT_SRC_SHIFT);
+ switch (src) {
+ case NV30_VP_INST_IN_POS : return NVS_FR_POSITION;
+ case NV30_VP_INST_IN_COL0 : return NVS_FR_COL0;
+ case NV30_VP_INST_IN_COL1 : return NVS_FR_COL1;
+ case NV30_VP_INST_IN_TC(0): return NVS_FR_TEXCOORD0;
+ case NV30_VP_INST_IN_TC(1): return NVS_FR_TEXCOORD1;
+ case NV30_VP_INST_IN_TC(2): return NVS_FR_TEXCOORD2;
+ case NV30_VP_INST_IN_TC(3): return NVS_FR_TEXCOORD3;
+ case NV30_VP_INST_IN_TC(4): return NVS_FR_TEXCOORD4;
+ case NV30_VP_INST_IN_TC(5): return NVS_FR_TEXCOORD5;
+ case NV30_VP_INST_IN_TC(6): return NVS_FR_TEXCOORD6;
+ case NV30_VP_INST_IN_TC(7): return NVS_FR_TEXCOORD7;
+ default:
+ return NVS_FR_UNKNOWN;
+ }
+ default:
+ return -1;
+ }
+}
+
+static int
+NV30VPGetSourceAbs(nvsFunc * shader, int merged, int pos)
+{
+ struct _op_xlat *opr;
+ static unsigned int abspos[3] = {
+ NV30_VP_INST_SRC0_ABS,
+ NV30_VP_INST_SRC1_ABS,
+ NV30_VP_INST_SRC2_ABS,
+ };
+
+ opr = shader->GetOPTXRec(shader, merged);
+ if (!opr || opr->srcpos[pos] == -1 || opr->srcpos[pos] > 2)
+ return 0;
+
+ return ((shader->inst[0] & abspos[opr->srcpos[pos]]) ? 1 : 0);
+}
+
+static int
+NV30VPGetRelAddressRegID(nvsFunc * shader)
+{
+ return ((shader->inst[0] & NV30_VP_INST_ADDR_REG_SELECT_1) ? 1 : 0);
+}
+
+static nvsSwzComp
+NV30VPGetRelAddressSwizzle(nvsFunc * shader)
+{
+ nvsSwzComp swz;
+
+ swz = NV20VP_TX_SWIZZLE[(shader->inst[0] & NV30_VP_INST_ADDR_SWZ_MASK)
+ >> NV30_VP_INST_ADDR_SWZ_SHIFT];
+ return swz;
+}
+
+static int
+NV30VPSupportsConditional(nvsFunc * shader)
+{
+ /*FIXME: Is this true of all ops? */
+ return 1;
+}
+
+static int
+NV30VPGetConditionUpdate(nvsFunc * shader)
+{
+ return ((shader->inst[0] & NV30_VP_INST_COND_UPDATE_ENABLE) ? 1 : 0);
+}
+
+static int
+NV30VPGetConditionTest(nvsFunc * shader)
+{
+ int op;
+
+ /* The condition test is unconditionally enabled on some
+ * instructions. ie: the condition test bit does *NOT* have
+ * to be set.
+ *
+ * FIXME: check other relevant ops for this situation.
+ */
+ op = shader->GetOpcodeHW(shader, 1);
+ switch (op) {
+ case NV30_VP_INST_OP_BRA:
+ return 1;
+ default:
+ return ((shader->inst[0] & NV30_VP_INST_COND_TEST_ENABLE) ? 1 : 0);
+ }
+}
+
+static nvsCond
+NV30VPGetCondition(nvsFunc * shader)
+{
+ int cond;
+
+ cond = ((shader->inst[0] & NV30_VP_INST_COND_MASK)
+ >> NV30_VP_INST_COND_SHIFT);
+
+ switch (cond) {
+ case NV30_VP_INST_COND_FL: return NVS_COND_FL;
+ case NV30_VP_INST_COND_LT: return NVS_COND_LT;
+ case NV30_VP_INST_COND_EQ: return NVS_COND_EQ;
+ case NV30_VP_INST_COND_LE: return NVS_COND_LE;
+ case NV30_VP_INST_COND_GT: return NVS_COND_GT;
+ case NV30_VP_INST_COND_NE: return NVS_COND_NE;
+ case NV30_VP_INST_COND_GE: return NVS_COND_GE;
+ case NV30_VP_INST_COND_TR: return NVS_COND_TR;
+ default:
+ return NVS_COND_UNKNOWN;
+ }
+}
+
+static void
+NV30VPGetCondRegSwizzle(nvsFunc * shader, nvsSwzComp *swz)
+{
+ int swzbits;
+
+ swzbits = (shader->inst[0] & NV30_VP_INST_COND_SWZ_ALL_MASK)
+ >> NV30_VP_INST_COND_SWZ_ALL_SHIFT;
+ NV20VPTXSwizzle(swzbits, swz);
+}
+
+static int
+NV30VPGetCondRegID(nvsFunc * shader)
+{
+ return 0;
+}
+
+
+static int
+NV30VPGetBranch(nvsFunc * shader)
+{
+ return ((shader->inst[2] & NV30_VP_INST_IADDR_MASK)
+ >> NV30_VP_INST_IADDR_SHIFT);
+}
+
+void
+NV30VPInitShaderFuncs(nvsFunc * shader)
+{
+ /* Inherit NV20 code, a lot of it is the same */
+ NV20VPInitShaderFuncs(shader);
+
+ /* Increase max valid opcode ID, and add new instructions */
+ NVVP_TX_VOP_COUNT = NVVP_TX_NVS_OP_COUNT = 32;
+
+ MOD_OPCODE(NVVP_TX_VOP, NV30_VP_INST_OP_FRC, NVS_OP_FRC, 0, -1, -1);
+ MOD_OPCODE(NVVP_TX_VOP, NV30_VP_INST_OP_FLR, NVS_OP_FLR, 0, -1, -1);
+ MOD_OPCODE(NVVP_TX_VOP, NV30_VP_INST_OP_SEQ, NVS_OP_SEQ, 0, 1, -1);
+ MOD_OPCODE(NVVP_TX_VOP, NV30_VP_INST_OP_SFL, NVS_OP_SFL, 0, 1, -1);
+ MOD_OPCODE(NVVP_TX_VOP, NV30_VP_INST_OP_SGT, NVS_OP_SGT, 0, 1, -1);
+ MOD_OPCODE(NVVP_TX_VOP, NV30_VP_INST_OP_SLE, NVS_OP_SLE, 0, 1, -1);
+ MOD_OPCODE(NVVP_TX_VOP, NV30_VP_INST_OP_SNE, NVS_OP_SNE, 0, 1, -1);
+ MOD_OPCODE(NVVP_TX_VOP, NV30_VP_INST_OP_STR, NVS_OP_STR, 0, 1, -1);
+ MOD_OPCODE(NVVP_TX_VOP, NV30_VP_INST_OP_SSG, NVS_OP_SSG, 0, -1, -1);
+ MOD_OPCODE(NVVP_TX_VOP, NV30_VP_INST_OP_ARR, NVS_OP_ARR, 0, -1, -1);
+ MOD_OPCODE(NVVP_TX_VOP, NV30_VP_INST_OP_ARA, NVS_OP_ARA, 3, -1, -1);
+
+ MOD_OPCODE(NVVP_TX_SOP, NV30_VP_INST_OP_BRA, NVS_OP_BRA, -1, -1, -1);
+ MOD_OPCODE(NVVP_TX_SOP, NV30_VP_INST_OP_CAL, NVS_OP_CAL, -1, -1, -1);
+ MOD_OPCODE(NVVP_TX_SOP, NV30_VP_INST_OP_RET, NVS_OP_RET, -1, -1, -1);
+ MOD_OPCODE(NVVP_TX_SOP, NV30_VP_INST_OP_LG2, NVS_OP_LG2, 2, -1, -1);
+ MOD_OPCODE(NVVP_TX_SOP, NV30_VP_INST_OP_EX2, NVS_OP_EX2, 2, -1, -1);
+ MOD_OPCODE(NVVP_TX_SOP, NV30_VP_INST_OP_SIN, NVS_OP_SIN, 2, -1, -1);
+ MOD_OPCODE(NVVP_TX_SOP, NV30_VP_INST_OP_COS, NVS_OP_COS, 2, -1, -1);
+
+ shader->UploadToHW = NV30VPUploadToHW;
+ shader->UpdateConst = NV30VPUpdateConst;
+
+ shader->GetOpcodeHW = NV30VPGetOpcodeHW;
+
+ shader->GetDestFile = NV30VPGetDestFile;
+ shader->GetDestID = NV30VPGetDestID;
+ shader->GetDestMask = NV30VPGetDestMask;
+
+ shader->GetSourceID = NV30VPGetSourceID;
+ shader->GetSourceAbs = NV30VPGetSourceAbs;
+
+ shader->GetRelAddressRegID = NV30VPGetRelAddressRegID;
+ shader->GetRelAddressSwizzle = NV30VPGetRelAddressSwizzle;
+
+ shader->SupportsConditional = NV30VPSupportsConditional;
+ shader->GetConditionUpdate = NV30VPGetConditionUpdate;
+ shader->GetConditionTest = NV30VPGetConditionTest;
+ shader->GetCondition = NV30VPGetCondition;
+ shader->GetCondRegSwizzle = NV30VPGetCondRegSwizzle;
+ shader->GetCondRegID = NV30VPGetCondRegID;
+
+ shader->GetBranch = NV30VPGetBranch;
+}
+
--- /dev/null
+#include "nouveau_shader.h"
+#include "nv40_shader.h"
+
+/* branching ops */
+unsigned int NVFP_TX_BOP_COUNT = 5;
+struct _op_xlat NVFP_TX_BOP[64];
+
+static struct _op_xlat *
+NV40FPGetOPTXRec(nvsFunc * shader, int merged)
+{
+ struct _op_xlat *opr;
+ int op;
+
+ op = shader->GetOpcodeHW(shader, 0);
+ if (shader->inst[2] & NV40_FP_OP_OPCODE_IS_BRANCH) {
+ opr = NVFP_TX_BOP;
+ op &= ~NV40_FP_OP_OPCODE_IS_BRANCH;
+ if (op > NVFP_TX_BOP_COUNT)
+ return NULL;
+ }
+ else {
+ opr = NVFP_TX_AOP;
+ if (op > NVFP_TX_AOP_COUNT)
+ return NULL;
+ }
+
+ if (opr[op].SOP == NVS_OP_UNKNOWN)
+ return NULL;
+ return &opr[op];
+}
+
+static int
+NV40FPGetSourceID(nvsFunc * shader, int merged, int pos)
+{
+ switch (shader->GetSourceFile(shader, merged, pos)) {
+ case NVS_FILE_ATTRIB:
+ switch ((shader->inst[0] & NV40_FP_OP_INPUT_SRC_MASK)
+ >> NV40_FP_OP_INPUT_SRC_SHIFT) {
+ case NV40_FP_OP_INPUT_SRC_POSITION: return NVS_FR_POSITION;
+ case NV40_FP_OP_INPUT_SRC_COL0 : return NVS_FR_COL0;
+ case NV40_FP_OP_INPUT_SRC_COL1 : return NVS_FR_COL1;
+ case NV40_FP_OP_INPUT_SRC_FOGC : return NVS_FR_FOGCOORD;
+ case NV40_FP_OP_INPUT_SRC_TC(0) : return NVS_FR_TEXCOORD0;
+ case NV40_FP_OP_INPUT_SRC_TC(1) : return NVS_FR_TEXCOORD1;
+ case NV40_FP_OP_INPUT_SRC_TC(2) : return NVS_FR_TEXCOORD2;
+ case NV40_FP_OP_INPUT_SRC_TC(3) : return NVS_FR_TEXCOORD3;
+ case NV40_FP_OP_INPUT_SRC_TC(4) : return NVS_FR_TEXCOORD4;
+ case NV40_FP_OP_INPUT_SRC_TC(5) : return NVS_FR_TEXCOORD5;
+ case NV40_FP_OP_INPUT_SRC_TC(6) : return NVS_FR_TEXCOORD6;
+ case NV40_FP_OP_INPUT_SRC_TC(7) : return NVS_FR_TEXCOORD7;
+ case NV40_FP_OP_INPUT_SRC_FACING : return NVS_FR_FACING;
+ default:
+ return -1;
+ }
+ break;
+ case NVS_FILE_TEMP:
+ {
+ unsigned int src;
+
+ src = shader->GetSourceHW(shader, merged, pos);
+ return ((src & NV40_FP_REG_SRC_MASK) >> NV40_FP_REG_SRC_SHIFT);
+ }
+ case NVS_FILE_CONST: /* inlined into fragprog */
+ default:
+ return -1;
+ }
+}
+
+static int
+NV40FPGetBranch(nvsFunc * shader)
+{
+ return ((shader->inst[2] & NV40_FP_OP_IADDR_MASK)
+ >> NV40_FP_OP_IADDR_SHIFT);;
+}
+
+static int
+NV40FPGetBranchElse(nvsFunc * shader)
+{
+ return ((shader->inst[2] & NV40_FP_OP_ELSE_ID_MASK)
+ >> NV40_FP_OP_ELSE_ID_SHIFT);
+}
+
+static int
+NV40FPGetBranchEnd(nvsFunc * shader)
+{
+ return ((shader->inst[3] & NV40_FP_OP_END_ID_MASK)
+ >> NV40_FP_OP_END_ID_SHIFT);
+}
+
+static int
+NV40FPGetLoopCount(nvsFunc * shader)
+{
+ return ((shader->inst[2] & NV40_FP_OP_LOOP_COUNT_MASK)
+ >> NV40_FP_OP_LOOP_COUNT_SHIFT);
+}
+
+static int
+NV40FPGetLoopInitial(nvsFunc * shader)
+{
+ return ((shader->inst[2] & NV40_FP_OP_LOOP_INDEX_MASK)
+ >> NV40_FP_OP_LOOP_INDEX_SHIFT);
+}
+
+static int
+NV40FPGetLoopIncrement(nvsFunc * shader)
+{
+ return ((shader->inst[2] & NV40_FP_OP_LOOP_INCR_MASK)
+ >> NV40_FP_OP_LOOP_INCR_SHIFT);
+}
+
+void
+NV40FPInitShaderFuncs(nvsFunc * shader)
+{
+ /* Inherit NV30 FP code, it's mostly the same */
+ NV30FPInitShaderFuncs(shader);
+
+ /* Kill off opcodes seen on NV30, but not seen on NV40 - need to find
+ * out if these actually work or not.
+ *
+ * update: either LIT/RSQ don't work on nv40, or I generate bad code for
+ * them. haven't tested the others yet
+ */
+ MOD_OPCODE(NVFP_TX_AOP, 0x1B, NVS_OP_UNKNOWN, -1, -1, -1); /* NV30 RSQ */
+ MOD_OPCODE(NVFP_TX_AOP, 0x1E, NVS_OP_UNKNOWN, -1, -1, -1); /* NV30 LIT */
+ MOD_OPCODE(NVFP_TX_AOP, 0x1F, NVS_OP_UNKNOWN, -1, -1, -1); /* NV30 LRP */
+ MOD_OPCODE(NVFP_TX_AOP, 0x26, NVS_OP_UNKNOWN, -1, -1, -1); /* NV30 POW */
+ MOD_OPCODE(NVFP_TX_AOP, 0x36, NVS_OP_UNKNOWN, -1, -1, -1); /* NV30 RFL */
+
+ /* Extra opcodes supported on NV40 */
+ MOD_OPCODE(NVFP_TX_AOP, NV40_FP_OP_OPCODE_DIV , NVS_OP_DIV , 0, 1, -1);
+ MOD_OPCODE(NVFP_TX_AOP, NV40_FP_OP_OPCODE_DP2A , NVS_OP_DP2A, 0, 1, 2);
+ MOD_OPCODE(NVFP_TX_AOP, NV40_FP_OP_OPCODE_TXL , NVS_OP_TXL , 0, -1, -1);
+
+ MOD_OPCODE(NVFP_TX_BOP, NV40_FP_OP_BRA_OPCODE_BRK , NVS_OP_BRK , -1, -1, -1);
+ MOD_OPCODE(NVFP_TX_BOP, NV40_FP_OP_BRA_OPCODE_CAL , NVS_OP_CAL , -1, -1, -1);
+ MOD_OPCODE(NVFP_TX_BOP, NV40_FP_OP_BRA_OPCODE_IF , NVS_OP_IF , -1, -1, -1);
+ MOD_OPCODE(NVFP_TX_BOP, NV40_FP_OP_BRA_OPCODE_LOOP, NVS_OP_LOOP, -1, -1, -1);
+ MOD_OPCODE(NVFP_TX_BOP, NV40_FP_OP_BRA_OPCODE_REP , NVS_OP_REP , -1, -1, -1);
+ MOD_OPCODE(NVFP_TX_BOP, NV40_FP_OP_BRA_OPCODE_RET , NVS_OP_RET , -1, -1, -1);
+
+ /* fragment.facing */
+ shader->GetSourceID = NV40FPGetSourceID;
+
+ /* branching */
+ shader->GetOPTXRec = NV40FPGetOPTXRec;
+ shader->GetBranch = NV40FPGetBranch;
+ shader->GetBranchElse = NV40FPGetBranchElse;
+ shader->GetBranchEnd = NV40FPGetBranchEnd;
+ shader->GetLoopCount = NV40FPGetLoopCount;
+ shader->GetLoopInitial = NV40FPGetLoopInitial;
+ shader->GetLoopIncrement = NV40FPGetLoopIncrement;
+}
--- /dev/null
+#ifndef __NV40_SHADER_H__
+#define __NV40_SHADER_H__
+
+/* Vertex programs instruction set
+ *
+ * The NV40 instruction set is very similar to NV30. Most fields are in
+ * a slightly different position in the instruction however.
+ *
+ * Merged instructions
+ * In some cases it is possible to put two instructions into one opcode
+ * slot. The rules for when this is OK is not entirely clear to me yet.
+ *
+ * There are separate writemasks and dest temp register fields for each
+ * grouping of instructions. There is however only one field with the
+ * ID of a result register. Writing to temp/result regs is selected by
+ * setting VEC_RESULT/SCA_RESULT.
+ *
+ * Temporary registers
+ * The source/dest temp register fields have been extended by 1 bit, to
+ * give a total of 32 temporary registers.
+ *
+ * Relative Addressing
+ * NV40 can use an address register to index into vertex attribute regs.
+ * This is done by putting the offset value into INPUT_SRC and setting
+ * the INDEX_INPUT flag.
+ *
+ * Conditional execution (see NV_vertex_program{2,3} for details)
+ * There is a second condition code register on NV40, it's use is enabled
+ * by setting the COND_REG_SELECT_1 flag.
+ *
+ * Texture lookup
+ * TODO
+ */
+
+/* ---- OPCODE BITS 127:96 / data DWORD 0 --- */
+#define NV40_VP_INST_VEC_RESULT (1 << 30)
+/* uncertain.. */
+#define NV40_VP_INST_COND_UPDATE_ENABLE ((1 << 14)|1<<29)
+/* use address reg as index into attribs */
+#define NV40_VP_INST_INDEX_INPUT (1 << 27)
+#define NV40_VP_INST_COND_REG_SELECT_1 (1 << 25)
+#define NV40_VP_INST_ADDR_REG_SELECT_1 (1 << 24)
+#define NV40_VP_INST_SRC2_ABS (1 << 23)
+#define NV40_VP_INST_SRC1_ABS (1 << 22)
+#define NV40_VP_INST_SRC0_ABS (1 << 21)
+#define NV40_VP_INST_VEC_DEST_TEMP_SHIFT 15
+#define NV40_VP_INST_VEC_DEST_TEMP_MASK (0x1F << 15)
+#define NV40_VP_INST_COND_TEST_ENABLE (1 << 13)
+#define NV40_VP_INST_COND_SHIFT 10
+#define NV40_VP_INST_COND_MASK (0x7 << 10)
+# define NV40_VP_INST_COND_FL 0
+# define NV40_VP_INST_COND_LT 1
+# define NV40_VP_INST_COND_EQ 2
+# define NV40_VP_INST_COND_LE 3
+# define NV40_VP_INST_COND_GT 4
+# define NV40_VP_INST_COND_NE 5
+# define NV40_VP_INST_COND_GE 6
+# define NV40_VP_INST_COND_TR 7
+#define NV40_VP_INST_COND_SWZ_X_SHIFT 8
+#define NV40_VP_INST_COND_SWZ_X_MASK (3 << 8)
+#define NV40_VP_INST_COND_SWZ_Y_SHIFT 6
+#define NV40_VP_INST_COND_SWZ_Y_MASK (3 << 6)
+#define NV40_VP_INST_COND_SWZ_Z_SHIFT 4
+#define NV40_VP_INST_COND_SWZ_Z_MASK (3 << 4)
+#define NV40_VP_INST_COND_SWZ_W_SHIFT 2
+#define NV40_VP_INST_COND_SWZ_W_MASK (3 << 2)
+#define NV40_VP_INST_COND_SWZ_ALL_SHIFT 2
+#define NV40_VP_INST_COND_SWZ_ALL_MASK (0xFF << 2)
+#define NV40_VP_INST_ADDR_SWZ_SHIFT 0
+#define NV40_VP_INST_ADDR_SWZ_MASK (0x03 << 0)
+#define NV40_VP_INST0_KNOWN ( \
+ NV40_VP_INST_INDEX_INPUT | \
+ NV40_VP_INST_COND_REG_SELECT_1 | \
+ NV40_VP_INST_ADDR_REG_SELECT_1 | \
+ NV40_VP_INST_SRC2_ABS | \
+ NV40_VP_INST_SRC1_ABS | \
+ NV40_VP_INST_SRC0_ABS | \
+ NV40_VP_INST_VEC_DEST_TEMP_MASK | \
+ NV40_VP_INST_COND_TEST_ENABLE | \
+ NV40_VP_INST_COND_MASK | \
+ NV40_VP_INST_COND_SWZ_ALL_MASK | \
+ NV40_VP_INST_ADDR_SWZ_MASK)
+
+/* ---- OPCODE BITS 95:64 / data DWORD 1 --- */
+#define NV40_VP_INST_VEC_OPCODE_SHIFT 22
+#define NV40_VP_INST_VEC_OPCODE_MASK (0x1F << 22)
+# define NV40_VP_INST_OP_NOP 0x00
+# define NV40_VP_INST_OP_MOV 0x01
+# define NV40_VP_INST_OP_MUL 0x02
+# define NV40_VP_INST_OP_ADD 0x03
+# define NV40_VP_INST_OP_MAD 0x04
+# define NV40_VP_INST_OP_DP3 0x05
+# define NV40_VP_INST_OP_DP4 0x07
+# define NV40_VP_INST_OP_DPH 0x06
+# define NV40_VP_INST_OP_DST 0x08
+# define NV40_VP_INST_OP_MIN 0x09
+# define NV40_VP_INST_OP_MAX 0x0A
+# define NV40_VP_INST_OP_SLT 0x0B
+# define NV40_VP_INST_OP_SGE 0x0C
+# define NV40_VP_INST_OP_ARL 0x0D
+# define NV40_VP_INST_OP_FRC 0x0E
+# define NV40_VP_INST_OP_FLR 0x0F
+# define NV40_VP_INST_OP_SEQ 0x10
+# define NV40_VP_INST_OP_SFL 0x11
+# define NV40_VP_INST_OP_SGT 0x12
+# define NV40_VP_INST_OP_SLE 0x13
+# define NV40_VP_INST_OP_SNE 0x14
+# define NV40_VP_INST_OP_STR 0x15
+# define NV40_VP_INST_OP_SSG 0x16
+# define NV40_VP_INST_OP_ARR 0x17
+# define NV40_VP_INST_OP_ARA 0x18
+# define NV40_VP_INST_OP_TXWHAT 0x19
+#define NV40_VP_INST_SCA_OPCODE_SHIFT 27
+#define NV40_VP_INST_SCA_OPCODE_MASK (0x1F << 27)
+# define NV40_VP_INST_OP_RCP 0x02
+# define NV40_VP_INST_OP_RCC 0x03
+# define NV40_VP_INST_OP_RSQ 0x04
+# define NV40_VP_INST_OP_EXP 0x05
+# define NV40_VP_INST_OP_LOG 0x06
+# define NV40_VP_INST_OP_LIT 0x07
+# define NV40_VP_INST_OP_BRA 0x09
+# define NV40_VP_INST_OP_CAL 0x0B
+# define NV40_VP_INST_OP_RET 0x0C
+# define NV40_VP_INST_OP_LG2 0x0D
+# define NV40_VP_INST_OP_EX2 0x0E
+# define NV40_VP_INST_OP_SIN 0x0F
+# define NV40_VP_INST_OP_COS 0x10
+# define NV40_VP_INST_OP_PUSHA 0x13
+# define NV40_VP_INST_OP_POPA 0x14
+#define NV40_VP_INST_CONST_SRC_SHIFT 12
+#define NV40_VP_INST_CONST_SRC_MASK (0xFF << 12)
+#define NV40_VP_INST_INPUT_SRC_SHIFT 8
+#define NV40_VP_INST_INPUT_SRC_MASK (0x0F << 8)
+# define NV40_VP_INST_IN_POS 0
+# define NV40_VP_INST_IN_WEIGHT 1
+# define NV40_VP_INST_IN_NORMAL 2
+# define NV40_VP_INST_IN_COL0 3
+# define NV40_VP_INST_IN_COL1 4
+# define NV40_VP_INST_IN_FOGC 5
+# define NV40_VP_INST_IN_TC0 8
+# define NV40_VP_INST_IN_TC(n) (8+n)
+#define NV40_VP_INST_SRC0H_SHIFT 0
+#define NV40_VP_INST_SRC0H_MASK (0xFF << 0)
+#define NV40_VP_INST1_KNOWN ( \
+ NV40_VP_INST_VEC_OPCODE_MASK | \
+ NV40_VP_INST_SCA_OPCODE_MASK | \
+ NV40_VP_INST_CONST_SRC_MASK | \
+ NV40_VP_INST_INPUT_SRC_MASK | \
+ NV40_VP_INST_SRC0H_MASK \
+ )
+
+/* ---- OPCODE BITS 63:32 / data DWORD 2 --- */
+#define NV40_VP_INST_SRC0L_SHIFT 23
+#define NV40_VP_INST_SRC0L_MASK (0x1FF << 23)
+#define NV40_VP_INST_SRC1_SHIFT 6
+#define NV40_VP_INST_SRC1_MASK (0x1FFFF << 6)
+#define NV40_VP_INST_SRC2H_SHIFT 0
+#define NV40_VP_INST_SRC2H_MASK (0x3F << 0)
+#define NV40_VP_INST_IADDRH_SHIFT 0
+#define NV40_VP_INST_IADDRH_MASK (0x1F << 0)
+
+/* ---- OPCODE BITS 31:0 / data DWORD 3 --- */
+#define NV40_VP_INST_IADDRL_SHIFT 29
+#define NV40_VP_INST_IADDRL_MASK (7 << 29)
+#define NV40_VP_INST_SRC2L_SHIFT 21
+#define NV40_VP_INST_SRC2L_MASK (0x7FF << 21)
+#define NV40_VP_INST_SCA_WRITEMASK_SHIFT 17
+#define NV40_VP_INST_SCA_WRITEMASK_MASK (0xF << 17)
+# define NV40_VP_INST_SCA_WRITEMASK_X (1 << 20)
+# define NV40_VP_INST_SCA_WRITEMASK_Y (1 << 19)
+# define NV40_VP_INST_SCA_WRITEMASK_Z (1 << 18)
+# define NV40_VP_INST_SCA_WRITEMASK_W (1 << 17)
+#define NV40_VP_INST_VEC_WRITEMASK_SHIFT 13
+#define NV40_VP_INST_VEC_WRITEMASK_MASK (0xF << 13)
+# define NV40_VP_INST_VEC_WRITEMASK_X (1 << 16)
+# define NV40_VP_INST_VEC_WRITEMASK_Y (1 << 15)
+# define NV40_VP_INST_VEC_WRITEMASK_Z (1 << 14)
+# define NV40_VP_INST_VEC_WRITEMASK_W (1 << 13)
+#define NV40_VP_INST_SCA_RESULT (1 << 12)
+#define NV40_VP_INST_SCA_DEST_TEMP_SHIFT 7
+#define NV40_VP_INST_SCA_DEST_TEMP_MASK (0x1F << 7)
+#define NV40_VP_INST_DEST_SHIFT 2
+#define NV40_VP_INST_DEST_MASK (31 << 2)
+# define NV40_VP_INST_DEST_POS 0
+# define NV40_VP_INST_DEST_COL0 1
+# define NV40_VP_INST_DEST_COL1 2
+# define NV40_VP_INST_DEST_BFC0 3
+# define NV40_VP_INST_DEST_BFC1 4
+# define NV40_VP_INST_DEST_FOGC 5
+# define NV40_VP_INST_DEST_PSZ 6
+# define NV40_VP_INST_DEST_TC0 7
+# define NV40_VP_INST_DEST_TC(n) (7+n)
+# define NV40_VP_INST_DEST_TEMP 0x1F
+#define NV40_VP_INST_INDEX_CONST (1 << 1)
+#define NV40_VP_INST_LAST (1 << 0)
+#define NV40_VP_INST3_KNOWN ( \
+ NV40_VP_INST_SRC2L_MASK |\
+ NV40_VP_INST_SCA_WRITEMASK_MASK |\
+ NV40_VP_INST_VEC_WRITEMASK_MASK |\
+ NV40_VP_INST_SCA_DEST_TEMP_MASK |\
+ NV40_VP_INST_DEST_MASK |\
+ NV40_VP_INST_INDEX_CONST)
+
+/* Useful to split the source selection regs into their pieces */
+#define NV40_VP_SRC0_HIGH_SHIFT 9
+#define NV40_VP_SRC0_HIGH_MASK 0x0001FE00
+#define NV40_VP_SRC0_LOW_MASK 0x000001FF
+#define NV40_VP_SRC2_HIGH_SHIFT 11
+#define NV40_VP_SRC2_HIGH_MASK 0x0001F800
+#define NV40_VP_SRC2_LOW_MASK 0x000007FF
+
+/* Source selection - these are the bits you fill NV40_VP_INST_SRCn with */
+#define NV40_VP_SRC_NEGATE (1 << 16)
+#define NV40_VP_SRC_SWZ_X_SHIFT 14
+#define NV40_VP_SRC_SWZ_X_MASK (3 << 14)
+#define NV40_VP_SRC_SWZ_Y_SHIFT 12
+#define NV40_VP_SRC_SWZ_Y_MASK (3 << 12)
+#define NV40_VP_SRC_SWZ_Z_SHIFT 10
+#define NV40_VP_SRC_SWZ_Z_MASK (3 << 10)
+#define NV40_VP_SRC_SWZ_W_SHIFT 8
+#define NV40_VP_SRC_SWZ_W_MASK (3 << 8)
+#define NV40_VP_SRC_SWZ_ALL_SHIFT 8
+#define NV40_VP_SRC_SWZ_ALL_MASK (0xFF << 8)
+#define NV40_VP_SRC_TEMP_SRC_SHIFT 2
+#define NV40_VP_SRC_TEMP_SRC_MASK (0x1F << 2)
+#define NV40_VP_SRC_REG_TYPE_SHIFT 0
+#define NV40_VP_SRC_REG_TYPE_MASK (3 << 0)
+# define NV40_VP_SRC_REG_TYPE_UNK0 0
+# define NV40_VP_SRC_REG_TYPE_TEMP 1
+# define NV40_VP_SRC_REG_TYPE_INPUT 2
+# define NV40_VP_SRC_REG_TYPE_CONST 3
+
+
+/*
+ * Each fragment program opcode appears to be comprised of 4 32-bit values.
+ *
+ * 0 - Opcode, output reg/mask, ATTRIB source
+ * 1 - Source 0
+ * 2 - Source 1
+ * 3 - Source 2
+ *
+ * There appears to be no special difference between result regs and temp regs.
+ * result.color == R0.xyzw
+ * result.depth == R1.z
+ * When the fragprog contains instructions to write depth,
+ * NV30_TCL_PRIMITIVE_3D_UNK1D78=0 otherwise it is set to 1.
+ *
+ * Constants are inserted directly after the instruction that uses them.
+ *
+ * It appears that it's not possible to use two input registers in one
+ * instruction as the input sourcing is done in the instruction dword
+ * and not the source selection dwords. As such instructions such as:
+ *
+ * ADD result.color, fragment.color, fragment.texcoord[0];
+ *
+ * must be split into two MOV's and then an ADD (nvidia does this) but
+ * I'm not sure why it's not just one MOV and then source the second input
+ * in the ADD instruction..
+ *
+ * Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary
+ * negation requires multiplication with a const.
+ *
+ * Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO and
+ * SWIZZLE_ONE.
+ *
+ * The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as
+ * SWIZZLE_ZERO is implemented simply by not writing to the relevant components
+ * of the destination.
+ *
+ * Looping
+ * Loops appear to be fairly expensive on NV40 at least, the proprietary
+ * driver goes to a lot of effort to avoid using the native looping
+ * instructions. If the total number of *executed* instructions between
+ * REP/ENDREP or LOOP/ENDLOOP is <=500, the driver will unroll the loop.
+ * The maximum loop count is 255.
+ *
+ * Conditional execution
+ * TODO
+ *
+ * Non-native instructions:
+ * LIT
+ * LRP - MAD+MAD
+ * SUB - ADD, negate second source
+ * RSQ - LG2 + EX2
+ * POW - LG2 + MUL + EX2
+ * SCS - COS + SIN
+ * XPD
+ * DP2 - MUL + ADD
+ * NRM
+ */
+
+//== Opcode / Destination selection ==
+#define NV40_FP_OP_PROGRAM_END (1 << 0)
+#define NV40_FP_OP_OUT_REG_SHIFT 1
+#define NV40_FP_OP_OUT_REG_MASK (31 << 1)
+/* Needs to be set when writing outputs to get expected result.. */
+#define NV40_FP_OP_UNK0_7 (1 << 7)
+#define NV40_FP_OP_COND_WRITE_ENABLE (1 << 8)
+#define NV40_FP_OP_OUTMASK_SHIFT 9
+#define NV40_FP_OP_OUTMASK_MASK (0xF << 9)
+# define NV40_FP_OP_OUT_X (1 << 9)
+# define NV40_FP_OP_OUT_Y (1 <<10)
+# define NV40_FP_OP_OUT_Z (1 <<11)
+# define NV40_FP_OP_OUT_W (1 <<12)
+/* Uncertain about these, especially the input_src values.. it's possible that
+ * they can be dynamically changed.
+ */
+#define NV40_FP_OP_INPUT_SRC_SHIFT 13
+#define NV40_FP_OP_INPUT_SRC_MASK (15 << 13)
+# define NV40_FP_OP_INPUT_SRC_POSITION 0x0
+# define NV40_FP_OP_INPUT_SRC_COL0 0x1
+# define NV40_FP_OP_INPUT_SRC_COL1 0x2
+# define NV40_FP_OP_INPUT_SRC_FOGC 0x3
+# define NV40_FP_OP_INPUT_SRC_TC0 0x4
+# define NV40_FP_OP_INPUT_SRC_TC(n) (0x4 + n)
+# define NV40_FP_OP_INPUT_SRC_FACING 0xE
+#define NV40_FP_OP_TEX_UNIT_SHIFT 17
+#define NV40_FP_OP_TEX_UNIT_MASK (0xF << 17)
+#define NV40_FP_OP_PRECISION_SHIFT 22
+#define NV40_FP_OP_PRECISION_MASK (3 << 22)
+# define NV40_FP_PRECISION_FP32 0
+# define NV40_FP_PRECISION_FP16 1
+# define NV40_FP_PRECISION_FX12 2
+#define NV40_FP_OP_OPCODE_SHIFT 24
+#define NV40_FP_OP_OPCODE_MASK (0x3F << 24)
+# define NV40_FP_OP_OPCODE_NOP 0x00
+# define NV40_FP_OP_OPCODE_MOV 0x01
+# define NV40_FP_OP_OPCODE_MUL 0x02
+# define NV40_FP_OP_OPCODE_ADD 0x03
+# define NV40_FP_OP_OPCODE_MAD 0x04
+# define NV40_FP_OP_OPCODE_DP3 0x05
+# define NV40_FP_OP_OPCODE_DP4 0x06
+# define NV40_FP_OP_OPCODE_DST 0x07
+# define NV40_FP_OP_OPCODE_MIN 0x08
+# define NV40_FP_OP_OPCODE_MAX 0x09
+# define NV40_FP_OP_OPCODE_SLT 0x0A
+# define NV40_FP_OP_OPCODE_SGE 0x0B
+# define NV40_FP_OP_OPCODE_SLE 0x0C
+# define NV40_FP_OP_OPCODE_SGT 0x0D
+# define NV40_FP_OP_OPCODE_SNE 0x0E
+# define NV40_FP_OP_OPCODE_SEQ 0x0F
+# define NV40_FP_OP_OPCODE_FRC 0x10
+# define NV40_FP_OP_OPCODE_FLR 0x11
+# define NV40_FP_OP_OPCODE_KIL 0x12
+# define NV40_FP_OP_OPCODE_PK4B 0x13
+# define NV40_FP_OP_OPCODE_UP4B 0x14
+/* DDX/DDY can only write to XY */
+# define NV40_FP_OP_OPCODE_DDX 0x15
+# define NV40_FP_OP_OPCODE_DDY 0x16
+# define NV40_FP_OP_OPCODE_TEX 0x17
+# define NV40_FP_OP_OPCODE_TXP 0x18
+# define NV40_FP_OP_OPCODE_TXD 0x19
+# define NV40_FP_OP_OPCODE_RCP 0x1A
+# define NV40_FP_OP_OPCODE_EX2 0x1C
+# define NV40_FP_OP_OPCODE_LG2 0x1D
+# define NV40_FP_OP_OPCODE_COS 0x22
+# define NV40_FP_OP_OPCODE_SIN 0x23
+# define NV40_FP_OP_OPCODE_PK2H 0x24
+# define NV40_FP_OP_OPCODE_UP2H 0x25
+# define NV40_FP_OP_OPCODE_PK4UB 0x27
+# define NV40_FP_OP_OPCODE_UP4UB 0x28
+# define NV40_FP_OP_OPCODE_PK2US 0x29
+# define NV40_FP_OP_OPCODE_UP2US 0x2A
+# define NV40_FP_OP_OPCODE_DP2A 0x2E
+# define NV40_FP_OP_OPCODE_TXL 0x2F
+# define NV40_FP_OP_OPCODE_TXB 0x31
+# define NV40_FP_OP_OPCODE_DIV 0x3A
+/* The use of these instructions appears to be indicated by bit 31 of DWORD 2.*/
+# define NV40_FP_OP_BRA_OPCODE_BRK 0x0
+# define NV40_FP_OP_BRA_OPCODE_CAL 0x1
+# define NV40_FP_OP_BRA_OPCODE_IF 0x2
+# define NV40_FP_OP_BRA_OPCODE_LOOP 0x3
+# define NV40_FP_OP_BRA_OPCODE_REP 0x4
+# define NV40_FP_OP_BRA_OPCODE_RET 0x5
+#define NV40_FP_OP_OUT_SAT (1 << 31)
+
+/* high order bits of SRC0 */
+#define NV40_FP_OP_OUT_ABS (1 << 29)
+#define NV40_FP_OP_COND_SWZ_W_SHIFT 27
+#define NV40_FP_OP_COND_SWZ_W_MASK (3 << 27)
+#define NV40_FP_OP_COND_SWZ_Z_SHIFT 25
+#define NV40_FP_OP_COND_SWZ_Z_MASK (3 << 25)
+#define NV40_FP_OP_COND_SWZ_Y_SHIFT 23
+#define NV40_FP_OP_COND_SWZ_Y_MASK (3 << 23)
+#define NV40_FP_OP_COND_SWZ_X_SHIFT 21
+#define NV40_FP_OP_COND_SWZ_X_MASK (3 << 21)
+#define NV40_FP_OP_COND_SWZ_ALL_SHIFT 21
+#define NV40_FP_OP_COND_SWZ_ALL_MASK (0xFF << 21)
+#define NV40_FP_OP_COND_SHIFT 18
+#define NV40_FP_OP_COND_MASK (0x07 << 18)
+# define NV40_FP_OP_COND_FL 0
+# define NV40_FP_OP_COND_LT 1
+# define NV40_FP_OP_COND_EQ 2
+# define NV40_FP_OP_COND_LE 3
+# define NV40_FP_OP_COND_GT 4
+# define NV40_FP_OP_COND_NE 5
+# define NV40_FP_OP_COND_GE 6
+# define NV40_FP_OP_COND_TR 7
+
+/* high order bits of SRC1 */
+#define NV40_FP_OP_OPCODE_IS_BRANCH (1<<31)
+#define NV40_FP_OP_SRC_SCALE_SHIFT 28
+#define NV40_FP_OP_SRC_SCALE_MASK (3 << 28)
+
+/* SRC1 LOOP */
+#define NV40_FP_OP_LOOP_INCR_SHIFT 19
+#define NV40_FP_OP_LOOP_INCR_MASK (0xFF << 19)
+#define NV40_FP_OP_LOOP_INDEX_SHIFT 10
+#define NV40_FP_OP_LOOP_INDEX_MASK (0xFF << 10)
+#define NV40_FP_OP_LOOP_COUNT_SHIFT 2
+#define NV40_FP_OP_LOOP_COUNT_MASK (0xFF << 2)
+
+/* SRC1 IF */
+#define NV40_FP_OP_ELSE_ID_SHIFT 2
+#define NV40_FP_OP_ELSE_ID_MASK (0xFF << 2)
+
+/* SRC1 CAL */
+#define NV40_FP_OP_IADDR_SHIFT 2
+#define NV40_FP_OP_IADDR_MASK (0xFF << 2)
+
+/* SRC1 REP
+ * I have no idea why there are 3 count values here.. but they
+ * have always been filled with the same value in my tests so
+ * far..
+ */
+#define NV40_FP_OP_REP_COUNT1_SHIFT 2
+#define NV40_FP_OP_REP_COUNT1_MASK (0xFF << 2)
+#define NV40_FP_OP_REP_COUNT2_SHIFT 10
+#define NV40_FP_OP_REP_COUNT2_MASK (0xFF << 10)
+#define NV40_FP_OP_REP_COUNT3_SHIFT 19
+#define NV40_FP_OP_REP_COUNT3_MASK (0xFF << 19)
+
+/* SRC2 REP/IF */
+#define NV40_FP_OP_END_ID_SHIFT 2
+#define NV40_FP_OP_END_ID_MASK (0xFF << 2)
+
+// SRC2 high-order
+#define NV40_FP_OP_INDEX_INPUT (1 << 30)
+#define NV40_FP_OP_ADDR_INDEX_SHIFT 19
+#define NV40_FP_OP_ADDR_INDEX_MASK (0xF << 19)
+
+//== Register selection ==
+#define NV40_FP_REG_TYPE_SHIFT 0
+#define NV40_FP_REG_TYPE_MASK (3 << 0)
+# define NV40_FP_REG_TYPE_TEMP 0
+# define NV40_FP_REG_TYPE_INPUT 1
+# define NV40_FP_REG_TYPE_CONST 2
+#define NV40_FP_REG_SRC_SHIFT 2
+#define NV40_FP_REG_SRC_MASK (31 << 2)
+#define NV40_FP_REG_UNK_0 (1 << 8)
+#define NV40_FP_REG_SWZ_ALL_SHIFT 9
+#define NV40_FP_REG_SWZ_ALL_MASK (255 << 9)
+#define NV40_FP_REG_SWZ_X_SHIFT 9
+#define NV40_FP_REG_SWZ_X_MASK (3 << 9)
+#define NV40_FP_REG_SWZ_Y_SHIFT 11
+#define NV40_FP_REG_SWZ_Y_MASK (3 << 11)
+#define NV40_FP_REG_SWZ_Z_SHIFT 13
+#define NV40_FP_REG_SWZ_Z_MASK (3 << 13)
+#define NV40_FP_REG_SWZ_W_SHIFT 15
+#define NV40_FP_REG_SWZ_W_MASK (3 << 15)
+# define NV40_FP_SWIZZLE_X 0
+# define NV40_FP_SWIZZLE_Y 1
+# define NV40_FP_SWIZZLE_Z 2
+# define NV40_FP_SWIZZLE_W 3
+#define NV40_FP_REG_NEGATE (1 << 17)
+
+#endif
--- /dev/null
+#include "nouveau_shader.h"
+#include "nouveau_msg.h"
+#include "nv40_shader.h"
+
+extern nvsSwzComp NV20VP_TX_SWIZZLE[4];
+extern void NV20VPTXSwizzle(int hwswz, nvsSwzComp *swz);
+
+/*****************************************************************************
+ * Assembly routines
+ */
+static int
+NV40VPSupportsOpcode(nvsFunc * shader, nvsOpcode op)
+{
+ if (shader->GetOPTXFromSOP(op, NULL))
+ return 1;
+ return 0;
+}
+
+static void
+NV40VPSetOpcode(nvsFunc *shader, unsigned int opcode, int slot)
+{
+ if (slot) shader->inst[1] |= (opcode << NV40_VP_INST_SCA_OPCODE_SHIFT);
+ else shader->inst[1] |= (opcode << NV40_VP_INST_VEC_OPCODE_SHIFT);
+}
+
+static void
+NV40VPSetCCUpdate(nvsFunc *shader)
+{
+ shader->inst[0] |= NV40_VP_INST_COND_UPDATE_ENABLE;
+}
+
+static void
+NV40VPSetCondition(nvsFunc *shader, int on, nvsCond cond, int reg,
+ nvsSwzComp *swizzle)
+{
+ unsigned int hwcond;
+
+ if (on ) shader->inst[0] |= NV40_VP_INST_COND_TEST_ENABLE;
+ if (reg) shader->inst[0] |= NV40_VP_INST_COND_REG_SELECT_1;
+
+ switch (cond) {
+ case NVS_COND_TR: hwcond = NV40_VP_INST_COND_TR; break;
+ case NVS_COND_FL: hwcond = NV40_VP_INST_COND_FL; break;
+ case NVS_COND_LT: hwcond = NV40_VP_INST_COND_LT; break;
+ case NVS_COND_GT: hwcond = NV40_VP_INST_COND_GT; break;
+ case NVS_COND_NE: hwcond = NV40_VP_INST_COND_NE; break;
+ case NVS_COND_EQ: hwcond = NV40_VP_INST_COND_EQ; break;
+ case NVS_COND_GE: hwcond = NV40_VP_INST_COND_GE; break;
+ case NVS_COND_LE: hwcond = NV40_VP_INST_COND_LE; break;
+ default:
+ WARN_ONCE("unknown vp cond %d\n", cond);
+ hwcond = NV40_VP_INST_COND_TR;
+ break;
+ }
+ shader->inst[0] |= (hwcond << NV40_VP_INST_COND_SHIFT);
+
+ shader->inst[0] |= (swizzle[NVS_SWZ_X] << NV40_VP_INST_COND_SWZ_X_SHIFT);
+ shader->inst[0] |= (swizzle[NVS_SWZ_Y] << NV40_VP_INST_COND_SWZ_Y_SHIFT);
+ shader->inst[0] |= (swizzle[NVS_SWZ_Z] << NV40_VP_INST_COND_SWZ_Z_SHIFT);
+ shader->inst[0] |= (swizzle[NVS_SWZ_W] << NV40_VP_INST_COND_SWZ_W_SHIFT);
+}
+
+static void
+NV40VPSetResult(nvsFunc *shader, nvsRegister * dest, unsigned int mask,
+ int slot)
+{
+ unsigned int hwmask = 0;
+
+ if (mask & SMASK_X) hwmask |= (1 << 3);
+ if (mask & SMASK_Y) hwmask |= (1 << 2);
+ if (mask & SMASK_Z) hwmask |= (1 << 1);
+ if (mask & SMASK_W) hwmask |= (1 << 0);
+
+ if (dest->file == NVS_FILE_RESULT) {
+ int hwidx;
+
+ switch (dest->index) {
+ case NVS_FR_POSITION : hwidx = NV40_VP_INST_DEST_POS; break;
+ case NVS_FR_COL0 : hwidx = NV40_VP_INST_DEST_COL0; break;
+ case NVS_FR_COL1 : hwidx = NV40_VP_INST_DEST_COL1; break;
+ case NVS_FR_BFC0 : hwidx = NV40_VP_INST_DEST_BFC0; break;
+ case NVS_FR_BFC1 : hwidx = NV40_VP_INST_DEST_BFC1; break;
+ case NVS_FR_FOGCOORD : hwidx = NV40_VP_INST_DEST_FOGC; break;
+ case NVS_FR_POINTSZ : hwidx = NV40_VP_INST_DEST_PSZ; break;
+ case NVS_FR_TEXCOORD0: hwidx = NV40_VP_INST_DEST_TC(0); break;
+ case NVS_FR_TEXCOORD1: hwidx = NV40_VP_INST_DEST_TC(1); break;
+ case NVS_FR_TEXCOORD2: hwidx = NV40_VP_INST_DEST_TC(2); break;
+ case NVS_FR_TEXCOORD3: hwidx = NV40_VP_INST_DEST_TC(3); break;
+ case NVS_FR_TEXCOORD4: hwidx = NV40_VP_INST_DEST_TC(4); break;
+ case NVS_FR_TEXCOORD5: hwidx = NV40_VP_INST_DEST_TC(5); break;
+ case NVS_FR_TEXCOORD6: hwidx = NV40_VP_INST_DEST_TC(6); break;
+ case NVS_FR_TEXCOORD7: hwidx = NV40_VP_INST_DEST_TC(7); break;
+ default:
+ WARN_ONCE("unknown vtxprog output %d\n", dest->index);
+ hwidx = 0;
+ break;
+ }
+ shader->inst[3] |= (hwidx << NV40_VP_INST_DEST_SHIFT);
+
+ if (slot) {
+ shader->inst[3] |= NV40_VP_INST_SCA_RESULT;
+ shader->inst[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK;
+ } else {
+ shader->inst[0] |= NV40_VP_INST_VEC_RESULT;
+ shader->inst[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK | (1<<20);
+ }
+ } else {
+ /* NVS_FILE_TEMP || NVS_FILE_ADDRESS */
+ if (slot)
+ shader->inst[3] |= (dest->index << NV40_VP_INST_SCA_DEST_TEMP_SHIFT);
+ else
+ shader->inst[0] |= (dest->index << NV40_VP_INST_VEC_DEST_TEMP_SHIFT);
+ }
+
+ if (slot) shader->inst[3] |= (hwmask << NV40_VP_INST_SCA_WRITEMASK_SHIFT);
+ else shader->inst[3] |= (hwmask << NV40_VP_INST_VEC_WRITEMASK_SHIFT);
+}
+
+static void
+NV40VPInsertSource(nvsFunc *shader, unsigned int hw, int pos)
+{
+ switch (pos) {
+ case 0:
+ shader->inst[1] |= ((hw & NV40_VP_SRC0_HIGH_MASK) >>
+ NV40_VP_SRC0_HIGH_SHIFT)
+ << NV40_VP_INST_SRC0H_SHIFT;
+ shader->inst[2] |= (hw & NV40_VP_SRC0_LOW_MASK)
+ << NV40_VP_INST_SRC0L_SHIFT;
+ break;
+ case 1:
+ shader->inst[2] |= hw
+ << NV40_VP_INST_SRC1_SHIFT;
+ break;
+ case 2:
+ shader->inst[2] |= ((hw & NV40_VP_SRC2_HIGH_MASK) >>
+ NV40_VP_SRC2_HIGH_SHIFT)
+ << NV40_VP_INST_SRC2H_SHIFT;
+ shader->inst[3] |= (hw & NV40_VP_SRC2_LOW_MASK)
+ << NV40_VP_INST_SRC2L_SHIFT;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+}
+
+static void
+NV40VPSetSource(nvsFunc *shader, nvsRegister * src, int pos)
+{
+ unsigned int hw = 0;
+
+ switch (src->file) {
+ case NVS_FILE_ADDRESS:
+ break;
+ case NVS_FILE_ATTRIB:
+ hw |= (NV40_VP_SRC_REG_TYPE_INPUT << NV40_VP_SRC_REG_TYPE_SHIFT);
+
+ shader->inst[1] |= (src->index << NV40_VP_INST_INPUT_SRC_SHIFT);
+ if (src->indexed) {
+ shader->inst[0] |= NV40_VP_INST_INDEX_INPUT;
+ if (src->addr_reg)
+ shader->inst[0] |= NV40_VP_INST_ADDR_REG_SELECT_1;
+ shader->inst[0] |= (src->addr_comp << NV40_VP_INST_ADDR_SWZ_SHIFT);
+ }
+ break;
+ case NVS_FILE_CONST:
+ hw |= (NV40_VP_SRC_REG_TYPE_CONST << NV40_VP_SRC_REG_TYPE_SHIFT);
+
+ shader->inst[1] |= (src->index << NV40_VP_INST_CONST_SRC_SHIFT);
+ if (src->indexed) {
+ shader->inst[3] |= NV40_VP_INST_INDEX_CONST;
+ if (src->addr_reg)
+ shader->inst[0] |= NV40_VP_INST_ADDR_REG_SELECT_1;
+ shader->inst[0] |= (src->addr_comp << NV40_VP_INST_ADDR_SWZ_SHIFT);
+ }
+ break;
+ case NVS_FILE_TEMP:
+ hw |= (NV40_VP_SRC_REG_TYPE_TEMP << NV40_VP_SRC_REG_TYPE_SHIFT);
+ hw |= (src->index << NV40_VP_SRC_TEMP_SRC_SHIFT);
+ break;
+ default:
+ fprintf(stderr, "unknown source file %d\n", src->file);
+ assert(0);
+ break;
+ }
+
+ if (src->file != NVS_FILE_ADDRESS) {
+ if (src->negate)
+ hw |= NV40_VP_SRC_NEGATE;
+ if (src->abs)
+ shader->inst[0] |= (1 << (21 + pos));
+ hw |= (src->swizzle[0] << NV40_VP_SRC_SWZ_X_SHIFT);
+ hw |= (src->swizzle[1] << NV40_VP_SRC_SWZ_Y_SHIFT);
+ hw |= (src->swizzle[2] << NV40_VP_SRC_SWZ_Z_SHIFT);
+ hw |= (src->swizzle[3] << NV40_VP_SRC_SWZ_W_SHIFT);
+
+ NV40VPInsertSource(shader, hw, pos);
+ }
+}
+
+static void
+NV40VPSetUnusedSource(nvsFunc *shader, int pos)
+{
+ unsigned int hw;
+
+ hw = ((NV40_VP_SRC_REG_TYPE_INPUT << NV40_VP_SRC_REG_TYPE_SHIFT) |
+ (NVS_SWZ_X << NV40_VP_SRC_SWZ_X_SHIFT) |
+ (NVS_SWZ_Y << NV40_VP_SRC_SWZ_Y_SHIFT) |
+ (NVS_SWZ_Z << NV40_VP_SRC_SWZ_Z_SHIFT) |
+ (NVS_SWZ_W << NV40_VP_SRC_SWZ_W_SHIFT));
+
+ NV40VPInsertSource(shader, hw, pos);
+}
+
+static void
+NV40VPSetLastInst(nvsFunc *shader, int pos)
+{
+ shader->inst[3] |= 1;
+}
+
+/*****************************************************************************
+ * Disassembly routines
+ */
+static int
+NV40VPHasMergedInst(nvsFunc * shader)
+{
+ if (shader->GetOpcodeHW(shader, 0) != NV40_VP_INST_OP_NOP &&
+ shader->GetOpcodeHW(shader, 1) != NV40_VP_INST_OP_NOP)
+ return 1;
+ return 0;
+}
+
+static unsigned int
+NV40VPGetOpcodeHW(nvsFunc * shader, int slot)
+{
+ int op;
+
+ if (slot)
+ op = (shader->inst[1] & NV40_VP_INST_SCA_OPCODE_MASK)
+ >> NV40_VP_INST_SCA_OPCODE_SHIFT;
+ else
+ op = (shader->inst[1] & NV40_VP_INST_VEC_OPCODE_MASK)
+ >> NV40_VP_INST_VEC_OPCODE_SHIFT;
+
+ return op;
+}
+
+static nvsRegFile
+NV40VPGetDestFile(nvsFunc * shader, int merged)
+{
+ nvsOpcode op;
+
+ op = shader->GetOpcode(shader, merged);
+ switch (op) {
+ case NVS_OP_ARL:
+ case NVS_OP_ARR:
+ case NVS_OP_ARA:
+ case NVS_OP_POPA:
+ return NVS_FILE_ADDRESS;
+ default:
+ if (shader->GetOpcodeSlot(shader, merged)) {
+ if (shader->inst[3] & NV40_VP_INST_SCA_RESULT)
+ return NVS_FILE_RESULT;
+ }
+ else {
+ if (shader->inst[0] & NV40_VP_INST_VEC_RESULT)
+ return NVS_FILE_RESULT;
+ }
+ return NVS_FILE_TEMP;
+ }
+
+}
+
+static unsigned int
+NV40VPGetDestID(nvsFunc * shader, int merged)
+{
+ int id;
+
+ switch (shader->GetDestFile(shader, merged)) {
+ case NVS_FILE_RESULT:
+ id = ((shader->inst[3] & NV40_VP_INST_DEST_MASK)
+ >> NV40_VP_INST_DEST_SHIFT);
+ switch (id) {
+ case NV40_VP_INST_DEST_POS : return NVS_FR_POSITION;
+ case NV40_VP_INST_DEST_COL0: return NVS_FR_COL0;
+ case NV40_VP_INST_DEST_COL1: return NVS_FR_COL1;
+ case NV40_VP_INST_DEST_BFC0: return NVS_FR_BFC0;
+ case NV40_VP_INST_DEST_BFC1: return NVS_FR_BFC1;
+ case NV40_VP_INST_DEST_FOGC: {
+ int mask = shader->GetDestMask(shader, merged);
+ switch (mask) {
+ case SMASK_X: return NVS_FR_FOGCOORD;
+ case SMASK_Y: return NVS_FR_CLIP0;
+ case SMASK_Z: return NVS_FR_CLIP1;
+ case SMASK_W: return NVS_FR_CLIP2;
+ default:
+ printf("more than 1 mask component set in FOGC writemask!\n");
+ return NVS_FR_UNKNOWN;
+ }
+ }
+ case NV40_VP_INST_DEST_PSZ:
+ {
+ int mask = shader->GetDestMask(shader, merged);
+ switch (mask) {
+ case SMASK_X: return NVS_FR_POINTSZ;
+ case SMASK_Y: return NVS_FR_CLIP3;
+ case SMASK_Z: return NVS_FR_CLIP4;
+ case SMASK_W: return NVS_FR_CLIP5;
+ default:
+ printf("more than 1 mask component set in PSZ writemask!\n");
+ return NVS_FR_UNKNOWN;
+ }
+ }
+ case NV40_VP_INST_DEST_TC(0): return NVS_FR_TEXCOORD0;
+ case NV40_VP_INST_DEST_TC(1): return NVS_FR_TEXCOORD1;
+ case NV40_VP_INST_DEST_TC(2): return NVS_FR_TEXCOORD2;
+ case NV40_VP_INST_DEST_TC(3): return NVS_FR_TEXCOORD3;
+ case NV40_VP_INST_DEST_TC(4): return NVS_FR_TEXCOORD4;
+ case NV40_VP_INST_DEST_TC(5): return NVS_FR_TEXCOORD5;
+ case NV40_VP_INST_DEST_TC(6): return NVS_FR_TEXCOORD6;
+ case NV40_VP_INST_DEST_TC(7): return NVS_FR_TEXCOORD7;
+ default:
+ return -1;
+ }
+ case NVS_FILE_ADDRESS:
+ /* Instructions that write address regs are encoded as if
+ * they would write temps.
+ */
+ case NVS_FILE_TEMP:
+ if (shader->GetOpcodeSlot(shader, merged))
+ id = ((shader->inst[3] & NV40_VP_INST_SCA_DEST_TEMP_MASK)
+ >> NV40_VP_INST_SCA_DEST_TEMP_SHIFT);
+ else
+ id = ((shader->inst[0] & NV40_VP_INST_VEC_DEST_TEMP_MASK)
+ >> NV40_VP_INST_VEC_DEST_TEMP_SHIFT);
+ return id;
+ default:
+ return -1;
+ }
+}
+
+static unsigned int
+NV40VPGetDestMask(nvsFunc * shader, int merged)
+{
+ unsigned int mask = 0;
+
+ if (shader->GetOpcodeSlot(shader, merged)) {
+ if (shader->inst[3] & NV40_VP_INST_SCA_WRITEMASK_X) mask |= SMASK_X;
+ if (shader->inst[3] & NV40_VP_INST_SCA_WRITEMASK_Y) mask |= SMASK_Y;
+ if (shader->inst[3] & NV40_VP_INST_SCA_WRITEMASK_Z) mask |= SMASK_Z;
+ if (shader->inst[3] & NV40_VP_INST_SCA_WRITEMASK_W) mask |= SMASK_W;
+ } else {
+ if (shader->inst[3] & NV40_VP_INST_VEC_WRITEMASK_X) mask |= SMASK_X;
+ if (shader->inst[3] & NV40_VP_INST_VEC_WRITEMASK_Y) mask |= SMASK_Y;
+ if (shader->inst[3] & NV40_VP_INST_VEC_WRITEMASK_Z) mask |= SMASK_Z;
+ if (shader->inst[3] & NV40_VP_INST_VEC_WRITEMASK_W) mask |= SMASK_W;
+ }
+
+ return mask;
+}
+
+static unsigned int
+NV40VPGetSourceHW(nvsFunc * shader, int merged, int pos)
+{
+ struct _op_xlat *opr;
+ unsigned int src;
+
+ opr = shader->GetOPTXRec(shader, merged);
+ if (!opr)
+ return -1;
+
+ switch (opr->srcpos[pos]) {
+ case 0:
+ src = ((shader->inst[1] & NV40_VP_INST_SRC0H_MASK)
+ >> NV40_VP_INST_SRC0H_SHIFT)
+ << NV40_VP_SRC0_HIGH_SHIFT;
+ src |= ((shader->inst[2] & NV40_VP_INST_SRC0L_MASK)
+ >> NV40_VP_INST_SRC0L_SHIFT);
+ break;
+ case 1:
+ src = ((shader->inst[2] & NV40_VP_INST_SRC1_MASK)
+ >> NV40_VP_INST_SRC1_SHIFT);
+ break;
+ case 2:
+ src = ((shader->inst[2] & NV40_VP_INST_SRC2H_MASK)
+ >> NV40_VP_INST_SRC2H_SHIFT)
+ << NV40_VP_SRC2_HIGH_SHIFT;
+ src |= ((shader->inst[3] & NV40_VP_INST_SRC2L_MASK)
+ >> NV40_VP_INST_SRC2L_SHIFT);
+ break;
+ default:
+ src = -1;
+ }
+
+ return src;
+}
+
+static nvsRegFile
+NV40VPGetSourceFile(nvsFunc * shader, int merged, int pos)
+{
+ unsigned int src;
+ struct _op_xlat *opr;
+ int file;
+
+ opr = shader->GetOPTXRec(shader, merged);
+ if (!opr || opr->srcpos[pos] == -1)
+ return -1;
+
+ switch (opr->srcpos[pos]) {
+ case SPOS_ADDRESS: return NVS_FILE_ADDRESS;
+ default:
+ src = shader->GetSourceHW(shader, merged, pos);
+ file = (src & NV40_VP_SRC_REG_TYPE_MASK) >> NV40_VP_SRC_REG_TYPE_SHIFT;
+
+ switch (file) {
+ case NV40_VP_SRC_REG_TYPE_TEMP : return NVS_FILE_TEMP;
+ case NV40_VP_SRC_REG_TYPE_INPUT: return NVS_FILE_ATTRIB;
+ case NV40_VP_SRC_REG_TYPE_CONST: return NVS_FILE_CONST;
+ default:
+ return NVS_FILE_UNKNOWN;
+ }
+ }
+}
+
+static int
+NV40VPGetSourceID(nvsFunc * shader, int merged, int pos)
+{
+ switch (shader->GetSourceFile(shader, merged, pos)) {
+ case NVS_FILE_ATTRIB:
+ switch ((shader->inst[1] & NV40_VP_INST_INPUT_SRC_MASK)
+ >> NV40_VP_INST_INPUT_SRC_SHIFT) {
+ case NV40_VP_INST_IN_POS: return NVS_FR_POSITION;
+ case NV40_VP_INST_IN_WEIGHT: return NVS_FR_WEIGHT;
+ case NV40_VP_INST_IN_NORMAL: return NVS_FR_NORMAL;
+ case NV40_VP_INST_IN_COL0: return NVS_FR_COL0;
+ case NV40_VP_INST_IN_COL1: return NVS_FR_COL1;
+ case NV40_VP_INST_IN_FOGC: return NVS_FR_FOGCOORD;
+ case NV40_VP_INST_IN_TC(0): return NVS_FR_TEXCOORD0;
+ case NV40_VP_INST_IN_TC(1): return NVS_FR_TEXCOORD1;
+ case NV40_VP_INST_IN_TC(2): return NVS_FR_TEXCOORD2;
+ case NV40_VP_INST_IN_TC(3): return NVS_FR_TEXCOORD3;
+ case NV40_VP_INST_IN_TC(4): return NVS_FR_TEXCOORD4;
+ case NV40_VP_INST_IN_TC(5): return NVS_FR_TEXCOORD5;
+ case NV40_VP_INST_IN_TC(6): return NVS_FR_TEXCOORD6;
+ case NV40_VP_INST_IN_TC(7): return NVS_FR_TEXCOORD7;
+ default:
+ return -1;
+ }
+ break;
+ case NVS_FILE_CONST:
+ return ((shader->inst[1] & NV40_VP_INST_CONST_SRC_MASK)
+ >> NV40_VP_INST_CONST_SRC_SHIFT);
+ case NVS_FILE_TEMP:
+ {
+ unsigned int src;
+
+ src = shader->GetSourceHW(shader, merged, pos);
+ return ((src & NV40_VP_SRC_TEMP_SRC_MASK) >>
+ NV40_VP_SRC_TEMP_SRC_SHIFT);
+ }
+ default:
+ return -1;
+ }
+}
+
+static int
+NV40VPGetSourceNegate(nvsFunc * shader, int merged, int pos)
+{
+ unsigned int src;
+
+ src = shader->GetSourceHW(shader, merged, pos);
+
+ if (src == -1)
+ return -1;
+ return ((src & NV40_VP_SRC_NEGATE) ? 1 : 0);
+}
+
+static void
+NV40VPGetSourceSwizzle(nvsFunc * shader, int merged, int pos, nvsSwzComp *swz)
+{
+ unsigned int src;
+ int swzbits;
+
+ src = shader->GetSourceHW(shader, merged, pos);
+ swzbits = (src & NV40_VP_SRC_SWZ_ALL_MASK) >> NV40_VP_SRC_SWZ_ALL_SHIFT;
+ NV20VPTXSwizzle(swzbits, swz);
+}
+
+static int
+NV40VPGetSourceIndexed(nvsFunc * shader, int merged, int pos)
+{
+ switch (shader->GetSourceFile(shader, merged, pos)) {
+ case NVS_FILE_ATTRIB:
+ return ((shader->inst[0] & NV40_VP_INST_INDEX_INPUT) ? 1 : 0);
+ case NVS_FILE_CONST:
+ return ((shader->inst[3] & NV40_VP_INST_INDEX_CONST) ? 1 : 0);
+ default:
+ return 0;
+ }
+}
+
+static nvsSwzComp
+NV40VPGetAddressRegSwizzle(nvsFunc * shader)
+{
+ nvsSwzComp swz;
+
+ swz = NV20VP_TX_SWIZZLE[(shader->inst[0] & NV40_VP_INST_ADDR_SWZ_MASK)
+ >> NV40_VP_INST_ADDR_SWZ_SHIFT];
+ return swz;
+}
+
+static int
+NV40VPSupportsConditional(nvsFunc * shader)
+{
+ /*FIXME: Is this true of all ops? */
+ return 1;
+}
+
+static int
+NV40VPGetConditionUpdate(nvsFunc * shader)
+{
+ return ((shader->inst[0] & NV40_VP_INST_COND_UPDATE_ENABLE) ? 1 : 0);
+}
+
+static int
+NV40VPGetConditionTest(nvsFunc * shader)
+{
+ int op;
+
+ /* The condition test is unconditionally enabled on some
+ * instructions. ie: the condition test bit does *NOT* have
+ * to be set.
+ *
+ * FIXME: check other relevant ops for this situation.
+ */
+ op = shader->GetOpcodeHW(shader, 1);
+ switch (op) {
+ case NV40_VP_INST_OP_BRA:
+ return 1;
+ default:
+ return ((shader->inst[0] & NV40_VP_INST_COND_TEST_ENABLE) ? 1 : 0);
+ }
+}
+
+static nvsCond
+NV40VPGetCondition(nvsFunc * shader)
+{
+ int cond;
+
+ cond = ((shader->inst[0] & NV40_VP_INST_COND_MASK)
+ >> NV40_VP_INST_COND_SHIFT);
+
+ switch (cond) {
+ case NV40_VP_INST_COND_FL: return NVS_COND_FL;
+ case NV40_VP_INST_COND_LT: return NVS_COND_LT;
+ case NV40_VP_INST_COND_EQ: return NVS_COND_EQ;
+ case NV40_VP_INST_COND_LE: return NVS_COND_LE;
+ case NV40_VP_INST_COND_GT: return NVS_COND_GT;
+ case NV40_VP_INST_COND_NE: return NVS_COND_NE;
+ case NV40_VP_INST_COND_GE: return NVS_COND_GE;
+ case NV40_VP_INST_COND_TR: return NVS_COND_TR;
+ default:
+ return NVS_COND_UNKNOWN;
+ }
+}
+
+static void
+NV40VPGetCondRegSwizzle(nvsFunc * shader, nvsSwzComp *swz)
+{
+ int swzbits;
+
+ swzbits = (shader->inst[0] & NV40_VP_INST_COND_SWZ_ALL_MASK)
+ >> NV40_VP_INST_COND_SWZ_ALL_SHIFT;
+ NV20VPTXSwizzle(swzbits, swz);
+}
+
+static int
+NV40VPGetCondRegID(nvsFunc * shader)
+{
+ return ((shader->inst[0] & NV40_VP_INST_COND_REG_SELECT_1) ? 1 : 0);
+}
+
+static int
+NV40VPGetBranch(nvsFunc * shader)
+{
+ int addr;
+
+ addr = ((shader->inst[2] & NV40_VP_INST_IADDRH_MASK)
+ >> NV40_VP_INST_IADDRH_SHIFT) << 3;
+ addr |= ((shader->inst[3] & NV40_VP_INST_IADDRL_MASK)
+ >> NV40_VP_INST_IADDRL_SHIFT);
+ return addr;
+}
+
+void
+NV40VPInitShaderFuncs(nvsFunc * shader)
+{
+ /* Inherit NV30 VP code, we share some of it */
+ NV30VPInitShaderFuncs(shader);
+
+ /* Limits */
+ shader->MaxInst = 4096;
+ shader->MaxAttrib = 16;
+ shader->MaxTemp = 32;
+ shader->MaxAddress = 2;
+ shader->MaxConst = 256;
+ shader->caps = SCAP_SRC_ABS;
+
+ /* Add extra opcodes for NV40+ */
+// MOD_OPCODE(NVVP_TX_VOP, NV40_VP_INST_OP_TXWHAT, NVS_OP_TEX , 0, 4, -1);
+ MOD_OPCODE(NVVP_TX_SOP, NV40_VP_INST_OP_PUSHA, NVS_OP_PUSHA, 3, -1, -1);
+ MOD_OPCODE(NVVP_TX_SOP, NV40_VP_INST_OP_POPA , NVS_OP_POPA , -1, -1, -1);
+
+ shader->SupportsOpcode = NV40VPSupportsOpcode;
+ shader->SetOpcode = NV40VPSetOpcode;
+ shader->SetCCUpdate = NV40VPSetCCUpdate;
+ shader->SetCondition = NV40VPSetCondition;
+ shader->SetResult = NV40VPSetResult;
+ shader->SetSource = NV40VPSetSource;
+ shader->SetUnusedSource = NV40VPSetUnusedSource;
+ shader->SetLastInst = NV40VPSetLastInst;
+
+ shader->HasMergedInst = NV40VPHasMergedInst;
+ shader->GetOpcodeHW = NV40VPGetOpcodeHW;
+
+ shader->GetDestFile = NV40VPGetDestFile;
+ shader->GetDestID = NV40VPGetDestID;
+ shader->GetDestMask = NV40VPGetDestMask;
+
+ shader->GetSourceHW = NV40VPGetSourceHW;
+ shader->GetSourceFile = NV40VPGetSourceFile;
+ shader->GetSourceID = NV40VPGetSourceID;
+ shader->GetSourceNegate = NV40VPGetSourceNegate;
+ shader->GetSourceSwizzle = NV40VPGetSourceSwizzle;
+ shader->GetSourceIndexed = NV40VPGetSourceIndexed;
+
+ shader->GetRelAddressSwizzle = NV40VPGetAddressRegSwizzle;
+
+ shader->SupportsConditional = NV40VPSupportsConditional;
+ shader->GetConditionUpdate = NV40VPGetConditionUpdate;
+ shader->GetConditionTest = NV40VPGetConditionTest;
+ shader->GetCondition = NV40VPGetCondition;
+ shader->GetCondRegSwizzle = NV40VPGetCondRegSwizzle;
+ shader->GetCondRegID = NV40VPGetCondRegID;
+
+ shader->GetBranch = NV40VPGetBranch;
+}