From: Luca Barbieri <luca@luca-barbieri.com>
Date: Sun, 21 Feb 2010 01:26:25 +0000 (+0100)
Subject: nv30, nv40: non-trivially partially unify nv[34]0_shader.h
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=d9e396ce4a124529fa92ad967f2b3ff72534079b;p=mesa.git

nv30, nv40: non-trivially partially unify nv[34]0_shader.h

shader.h is similar, except for the following differences:
1. The instruction sets are not exactly the same, but mostly similar
2. Vertex program fields are in different bit positions

This patch unifies all parts of nv[34]0_shader.h except the vertex
program fields.

Vertex opcodes are also changed so that the constant names includes
SCA if it is a scalar opcode and VEC if it is a vector opcode.
---

diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c
index ae246ffd647..4ce16b8f0e3 100644
--- a/src/gallium/drivers/nv30/nv30_fragprog.c
+++ b/src/gallium/drivers/nv30/nv30_fragprog.c
@@ -19,14 +19,14 @@
 #define MASK_Z 4
 #define MASK_W 8
 #define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W)
-#define DEF_SCALE NV30_FP_OP_DST_SCALE_1X
-#define DEF_CTEST NV30_FP_OP_COND_TR
-#include "nv30_shader.h"
+#define DEF_SCALE NVFX_FP_OP_DST_SCALE_1X
+#define DEF_CTEST NVFX_FP_OP_COND_TR
+#include "nvfx_shader.h"
 
-#define swz(s,x,y,z,w) nv30_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
-#define neg(s) nv30_sr_neg((s))
-#define abs(s) nv30_sr_abs((s))
-#define scale(s,v) nv30_sr_scale((s), NV30_FP_OP_DST_SCALE_##v)
+#define swz(s,x,y,z,w) nvfx_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
+#define neg(s) nvfx_sr_neg((s))
+#define abs(s) nvfx_sr_abs((s))
+#define scale(s,v) nvfx_sr_scale((s), NVFX_FP_OP_DST_SCALE_##v)
 
 #define MAX_CONSTS 128
 #define MAX_IMM 32
@@ -50,21 +50,21 @@ struct nv30_fpc {
 	} consts[MAX_CONSTS];
 	int nr_consts;
 
-	struct nv30_sreg imm[MAX_IMM];
+	struct nvfx_sreg imm[MAX_IMM];
 	unsigned nr_imm;
 };
 
-static INLINE struct nv30_sreg
+static INLINE struct nvfx_sreg
 temp(struct nv30_fpc *fpc)
 {
 	int idx;
 
 	idx  = fpc->temp_temp_count++;
 	idx += fpc->high_temp + 1;
-	return nv30_sr(NV30SR_TEMP, idx);
+	return nvfx_sr(NVFXSR_TEMP, idx);
 }
 
-static INLINE struct nv30_sreg
+static INLINE struct nvfx_sreg
 constant(struct nv30_fpc *fpc, int pipe, float vals[4])
 {
 	int idx;
@@ -76,14 +76,14 @@ constant(struct nv30_fpc *fpc, int pipe, float vals[4])
 	fpc->consts[idx].pipe = pipe;
 	if (pipe == -1)
 		memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float));
-	return nv30_sr(NV30SR_CONST, idx);
+	return nvfx_sr(NVFXSR_CONST, idx);
 }
 
 #define arith(cc,s,o,d,m,s0,s1,s2) \
-	nv30_fp_arith((cc), (s), NV30_FP_OP_OPCODE_##o, \
+	nv30_fp_arith((cc), (s), NVFX_FP_OP_OPCODE_##o, \
 			(d), (m), (s0), (s1), (s2))
 #define tex(cc,s,o,u,d,m,s0,s1,s2) \
-	nv30_fp_tex((cc), (s), NV30_FP_OP_OPCODE_##o, (u), \
+	nv30_fp_tex((cc), (s), NVFX_FP_OP_OPCODE_##o, (u), \
 		    (d), (m), (s0), none, none)
 
 static void
@@ -96,25 +96,25 @@ grow_insns(struct nv30_fpc *fpc, int size)
 }
 
 static void
-emit_src(struct nv30_fpc *fpc, int pos, struct nv30_sreg src)
+emit_src(struct nv30_fpc *fpc, int pos, struct nvfx_sreg src)
 {
 	struct nvfx_fragment_program *fp = fpc->fp;
 	uint32_t *hw = &fp->insn[fpc->inst_offset];
 	uint32_t sr = 0;
 
 	switch (src.type) {
-	case NV30SR_INPUT:
-		sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT);
-		hw[0] |= (src.index << NV30_FP_OP_INPUT_SRC_SHIFT);
+	case NVFXSR_INPUT:
+		sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT);
+		hw[0] |= (src.index << NVFX_FP_OP_INPUT_SRC_SHIFT);
 		break;
-	case NV30SR_OUTPUT:
-		sr |= NV30_FP_REG_SRC_HALF;
+	case NVFXSR_OUTPUT:
+		sr |= NVFX_FP_REG_SRC_HALF;
 		/* fall-through */
-	case NV30SR_TEMP:
-		sr |= (NV30_FP_REG_TYPE_TEMP << NV30_FP_REG_TYPE_SHIFT);
-		sr |= (src.index << NV30_FP_REG_SRC_SHIFT);
+	case NVFXSR_TEMP:
+		sr |= (NVFX_FP_REG_TYPE_TEMP << NVFX_FP_REG_TYPE_SHIFT);
+		sr |= (src.index << NVFX_FP_REG_SRC_SHIFT);
 		break;
-	case NV30SR_CONST:
+	case NVFXSR_CONST:
 		grow_insns(fpc, 4);
 		hw = &fp->insn[fpc->inst_offset];
 		if (fpc->consts[src.index].pipe >= 0) {
@@ -132,61 +132,61 @@ emit_src(struct nv30_fpc *fpc, int pos, struct nv30_sreg src)
 				sizeof(uint32_t) * 4);
 		}
 
-		sr |= (NV30_FP_REG_TYPE_CONST << NV30_FP_REG_TYPE_SHIFT);
+		sr |= (NVFX_FP_REG_TYPE_CONST << NVFX_FP_REG_TYPE_SHIFT);
 		break;
-	case NV30SR_NONE:
-		sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT);
+	case NVFXSR_NONE:
+		sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT);
 		break;
 	default:
 		assert(0);
 	}
 
 	if (src.negate)
-		sr |= NV30_FP_REG_NEGATE;
+		sr |= NVFX_FP_REG_NEGATE;
 
 	if (src.abs)
 		hw[1] |= (1 << (29 + pos));
 
-	sr |= ((src.swz[0] << NV30_FP_REG_SWZ_X_SHIFT) |
-	       (src.swz[1] << NV30_FP_REG_SWZ_Y_SHIFT) |
-	       (src.swz[2] << NV30_FP_REG_SWZ_Z_SHIFT) |
-	       (src.swz[3] << NV30_FP_REG_SWZ_W_SHIFT));
+	sr |= ((src.swz[0] << NVFX_FP_REG_SWZ_X_SHIFT) |
+	       (src.swz[1] << NVFX_FP_REG_SWZ_Y_SHIFT) |
+	       (src.swz[2] << NVFX_FP_REG_SWZ_Z_SHIFT) |
+	       (src.swz[3] << NVFX_FP_REG_SWZ_W_SHIFT));
 
 	hw[pos + 1] |= sr;
 }
 
 static void
-emit_dst(struct nv30_fpc *fpc, struct nv30_sreg dst)
+emit_dst(struct nv30_fpc *fpc, struct nvfx_sreg dst)
 {
 	struct nvfx_fragment_program *fp = fpc->fp;
 	uint32_t *hw = &fp->insn[fpc->inst_offset];
 
 	switch (dst.type) {
-	case NV30SR_TEMP:
+	case NVFXSR_TEMP:
 		if (fpc->num_regs < (dst.index + 1))
 			fpc->num_regs = dst.index + 1;
 		break;
-	case NV30SR_OUTPUT:
+	case NVFXSR_OUTPUT:
 		if (dst.index == 1) {
 			fp->fp_control |= 0xe;
 		} else {
-			hw[0] |= NV30_FP_OP_OUT_REG_HALF;
+			hw[0] |= NVFX_FP_OP_OUT_REG_HALF;
 		}
 		break;
-	case NV30SR_NONE:
+	case NVFXSR_NONE:
 		hw[0] |= (1 << 30);
 		break;
 	default:
 		assert(0);
 	}
 
-	hw[0] |= (dst.index << NV30_FP_OP_OUT_REG_SHIFT);
+	hw[0] |= (dst.index << NVFX_FP_OP_OUT_REG_SHIFT);
 }
 
 static void
 nv30_fp_arith(struct nv30_fpc *fpc, int sat, int op,
-	      struct nv30_sreg dst, int mask,
-	      struct nv30_sreg s0, struct nv30_sreg s1, struct nv30_sreg s2)
+	      struct nvfx_sreg dst, int mask,
+	      struct nvfx_sreg s0, struct nvfx_sreg s1, struct nvfx_sreg s2)
 {
 	struct nvfx_fragment_program *fp = fpc->fp;
 	uint32_t *hw;
@@ -196,22 +196,22 @@ nv30_fp_arith(struct nv30_fpc *fpc, int sat, int op,
 	hw = &fp->insn[fpc->inst_offset];
 	memset(hw, 0, sizeof(uint32_t) * 4);
 
-	if (op == NV30_FP_OP_OPCODE_KIL)
+	if (op == NVFX_FP_OP_OPCODE_KIL)
 		fp->fp_control |= NV34TCL_FP_CONTROL_USES_KIL;
-	hw[0] |= (op << NV30_FP_OP_OPCODE_SHIFT);
-	hw[0] |= (mask << NV30_FP_OP_OUTMASK_SHIFT);
-	hw[2] |= (dst.dst_scale << NV30_FP_OP_DST_SCALE_SHIFT);
+	hw[0] |= (op << NVFX_FP_OP_OPCODE_SHIFT);
+	hw[0] |= (mask << NVFX_FP_OP_OUTMASK_SHIFT);
+	hw[2] |= (dst.dst_scale << NVFX_FP_OP_DST_SCALE_SHIFT);
 
 	if (sat)
-		hw[0] |= NV30_FP_OP_OUT_SAT;
+		hw[0] |= NVFX_FP_OP_OUT_SAT;
 
 	if (dst.cc_update)
-		hw[0] |= NV30_FP_OP_COND_WRITE_ENABLE;
-	hw[1] |= (dst.cc_test << NV30_FP_OP_COND_SHIFT);
-	hw[1] |= ((dst.cc_swz[0] << NV30_FP_OP_COND_SWZ_X_SHIFT) |
-		  (dst.cc_swz[1] << NV30_FP_OP_COND_SWZ_Y_SHIFT) |
-		  (dst.cc_swz[2] << NV30_FP_OP_COND_SWZ_Z_SHIFT) |
-		  (dst.cc_swz[3] << NV30_FP_OP_COND_SWZ_W_SHIFT));
+		hw[0] |= NVFX_FP_OP_COND_WRITE_ENABLE;
+	hw[1] |= (dst.cc_test << NVFX_FP_OP_COND_SHIFT);
+	hw[1] |= ((dst.cc_swz[0] << NVFX_FP_OP_COND_SWZ_X_SHIFT) |
+		  (dst.cc_swz[1] << NVFX_FP_OP_COND_SWZ_Y_SHIFT) |
+		  (dst.cc_swz[2] << NVFX_FP_OP_COND_SWZ_Z_SHIFT) |
+		  (dst.cc_swz[3] << NVFX_FP_OP_COND_SWZ_W_SHIFT));
 
 	emit_dst(fpc, dst);
 	emit_src(fpc, 0, s0);
@@ -221,25 +221,25 @@ nv30_fp_arith(struct nv30_fpc *fpc, int sat, int op,
 
 static void
 nv30_fp_tex(struct nv30_fpc *fpc, int sat, int op, int unit,
-	    struct nv30_sreg dst, int mask,
-	    struct nv30_sreg s0, struct nv30_sreg s1, struct nv30_sreg s2)
+	    struct nvfx_sreg dst, int mask,
+	    struct nvfx_sreg s0, struct nvfx_sreg s1, struct nvfx_sreg s2)
 {
 	struct nvfx_fragment_program *fp = fpc->fp;
 
 	nv30_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2);
 
-	fp->insn[fpc->inst_offset] |= (unit << NV30_FP_OP_TEX_UNIT_SHIFT);
+	fp->insn[fpc->inst_offset] |= (unit << NVFX_FP_OP_TEX_UNIT_SHIFT);
 	fp->samplers |= (1 << unit);
 }
 
-static INLINE struct nv30_sreg
+static INLINE struct nvfx_sreg
 tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc)
 {
-	struct nv30_sreg src;
+	struct nvfx_sreg src;
 
 	switch (fsrc->Register.File) {
 	case TGSI_FILE_INPUT:
-		src = nv30_sr(NV30SR_INPUT,
+		src = nvfx_sr(NVFXSR_INPUT,
 			      fpc->attrib_map[fsrc->Register.Index]);
 		break;
 	case TGSI_FILE_CONSTANT:
@@ -250,7 +250,7 @@ tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc)
 		src = fpc->imm[fsrc->Register.Index];
 		break;
 	case TGSI_FILE_TEMPORARY:
-		src = nv30_sr(NV30SR_TEMP, fsrc->Register.Index + 1);
+		src = nvfx_sr(NVFXSR_TEMP, fsrc->Register.Index + 1);
 		if (fpc->high_temp < src.index)
 			fpc->high_temp = src.index;
 		break;
@@ -259,9 +259,9 @@ tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc)
 	 */
 	case TGSI_FILE_OUTPUT:
 		if (fsrc->Register.Index == fpc->colour_id)
-			return nv30_sr(NV30SR_OUTPUT, 0);
+			return nvfx_sr(NVFXSR_OUTPUT, 0);
 		else
-			return nv30_sr(NV30SR_OUTPUT, 1);
+			return nvfx_sr(NVFXSR_OUTPUT, 1);
 		break;
 	default:
 		NOUVEAU_ERR("bad src file\n");
@@ -277,27 +277,27 @@ tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc)
 	return src;
 }
 
-static INLINE struct nv30_sreg
+static INLINE struct nvfx_sreg
 tgsi_dst(struct nv30_fpc *fpc, const struct tgsi_full_dst_register *fdst) {
 	int idx;
 
 	switch (fdst->Register.File) {
 	case TGSI_FILE_OUTPUT:
 		if (fdst->Register.Index == fpc->colour_id)
-			return nv30_sr(NV30SR_OUTPUT, 0);
+			return nvfx_sr(NVFXSR_OUTPUT, 0);
 		else
-			return nv30_sr(NV30SR_OUTPUT, 1);
+			return nvfx_sr(NVFXSR_OUTPUT, 1);
 		break;
 	case TGSI_FILE_TEMPORARY:
 		idx = fdst->Register.Index + 1;
 		if (fpc->high_temp < idx)
 			fpc->high_temp = idx;
-		return nv30_sr(NV30SR_TEMP, idx);
+		return nvfx_sr(NVFXSR_TEMP, idx);
 	case TGSI_FILE_NULL:
-		return nv30_sr(NV30SR_NONE, 0);
+		return nvfx_sr(NVFXSR_NONE, 0);
 	default:
 		NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File);
-		return nv30_sr(NV30SR_NONE, 0);
+		return nvfx_sr(NVFXSR_NONE, 0);
 	}
 }
 
@@ -315,10 +315,10 @@ tgsi_mask(uint tgsi)
 
 static boolean
 src_native_swz(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc,
-	       struct nv30_sreg *src)
+	       struct nvfx_sreg *src)
 {
-	const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0);
-	struct nv30_sreg tgsi = tgsi_src(fpc, fsrc);
+	const struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0);
+	struct nvfx_sreg tgsi = tgsi_src(fpc, fsrc);
 	uint mask = 0;
 	uint c;
 
@@ -350,8 +350,8 @@ static boolean
 nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
 				const struct tgsi_full_instruction *finst)
 {
-	const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0);
-	struct nv30_sreg src[3], dst, tmp;
+	const struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0);
+	struct nvfx_sreg src[3], dst, tmp;
 	int mask, sat, unit = 0;
 	int ai = -1, ci = -1;
 	int i;
@@ -435,12 +435,12 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
 		arith(fpc, sat, ADD, dst, mask, src[0], src[1], none);
 		break;
 	case TGSI_OPCODE_CMP:
-		tmp = nv30_sr(NV30SR_NONE, 0);
+		tmp = nvfx_sr(NVFXSR_NONE, 0);
 		tmp.cc_update = 1;
 		arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none);
-		dst.cc_test = NV30_VP_INST_COND_GE;
+		dst.cc_test = NVFX_VP_INST_COND_GE;
 		arith(fpc, sat, MOV, dst, mask, src[2], none, none);
-		dst.cc_test = NV30_VP_INST_COND_LT;
+		dst.cc_test = NVFX_VP_INST_COND_LT;
 		arith(fpc, sat, MOV, dst, mask, src[1], none, none);
 		break;
 	case TGSI_OPCODE_COS:
@@ -474,10 +474,10 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
 		arith(fpc, 0, KIL, none, 0, none, none, none);
 		break;
 	case TGSI_OPCODE_KIL:
-		dst = nv30_sr(NV30SR_NONE, 0);
+		dst = nvfx_sr(NVFXSR_NONE, 0);
 		dst.cc_update = 1;
 		arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none);
-		dst.cc_update = 0; dst.cc_test = NV30_FP_OP_COND_LT;
+		dst.cc_update = 0; dst.cc_test = NVFX_FP_OP_COND_LT;
 		arith(fpc, 0, KIL, dst, 0, none, none, none);
 		break;
 	case TGSI_OPCODE_LG2:
@@ -485,7 +485,7 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
 		break;
 //	case TGSI_OPCODE_LIT:
 	case TGSI_OPCODE_LRP:
-		arith(fpc, sat, LRP, dst, mask, src[0], src[1], src[2]);
+		arith(fpc, sat, LRP_NV30, dst, mask, src[0], src[1], src[2]);
 		break;
 	case TGSI_OPCODE_MAD:
 		arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]);
@@ -503,7 +503,7 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
 		arith(fpc, sat, MUL, dst, mask, src[0], src[1], none);
 		break;
 	case TGSI_OPCODE_POW:
-		arith(fpc, sat, POW, dst, mask, src[0], src[1], none);
+		arith(fpc, sat, POW_NV30, dst, mask, src[0], src[1], none);
 		break;
 	case TGSI_OPCODE_RCP:
 		arith(fpc, sat, RCP, dst, mask, src[0], none, none);
@@ -512,10 +512,10 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
 		assert(0);
 		break;
 	case TGSI_OPCODE_RFL:
-		arith(fpc, 0, RFL, dst, mask, src[0], src[1], none);
+		arith(fpc, 0, RFL_NV30, dst, mask, src[0], src[1], none);
 		break;
 	case TGSI_OPCODE_RSQ:
-		arith(fpc, sat, RSQ, dst, mask, abs(swz(src[0], X, X, X, X)), none, none);
+		arith(fpc, sat, RSQ_NV30, dst, mask, abs(swz(src[0], X, X, X, X)), none, none);
 		break;
 	case TGSI_OPCODE_SCS:
 		/* avoid overwriting the source */
@@ -590,25 +590,25 @@ nv30_fragprog_parse_decl_attrib(struct nv30_fpc *fpc,
 
 	switch (fdec->Semantic.Name) {
 	case TGSI_SEMANTIC_POSITION:
-		hw = NV30_FP_OP_INPUT_SRC_POSITION;
+		hw = NVFX_FP_OP_INPUT_SRC_POSITION;
 		break;
 	case TGSI_SEMANTIC_COLOR:
 		if (fdec->Semantic.Index == 0) {
-			hw = NV30_FP_OP_INPUT_SRC_COL0;
+			hw = NVFX_FP_OP_INPUT_SRC_COL0;
 		} else
 		if (fdec->Semantic.Index == 1) {
-			hw = NV30_FP_OP_INPUT_SRC_COL1;
+			hw = NVFX_FP_OP_INPUT_SRC_COL1;
 		} else {
 			NOUVEAU_ERR("bad colour semantic index\n");
 			return FALSE;
 		}
 		break;
 	case TGSI_SEMANTIC_FOG:
-		hw = NV30_FP_OP_INPUT_SRC_FOGC;
+		hw = NVFX_FP_OP_INPUT_SRC_FOGC;
 		break;
 	case TGSI_SEMANTIC_GENERIC:
 		if (fdec->Semantic.Index <= 7) {
-			hw = NV30_FP_OP_INPUT_SRC_TC(fdec->Semantic.
+			hw = NVFX_FP_OP_INPUT_SRC_TC(fdec->Semantic.
 						     Index);
 		} else {
 			NOUVEAU_ERR("bad generic semantic index\n");
@@ -702,7 +702,7 @@ nv30_fragprog_prepare(struct nv30_fpc *fpc)
 	tgsi_parse_free(&p);
 
 	/*if (++high_temp) {
-		fpc->r_temp = CALLOC(high_temp, sizeof(struct nv30_sreg));
+		fpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_sreg));
 		for (i = 0; i < high_temp; i++)
 			fpc->r_temp[i] = temp(fpc);
 		fpc->r_temps_discard = 0;
diff --git a/src/gallium/drivers/nv30/nv30_shader.h b/src/gallium/drivers/nv30/nv30_shader.h
index dd3a36f78f3..f19efb5aa4d 100644
--- a/src/gallium/drivers/nv30/nv30_shader.h
+++ b/src/gallium/drivers/nv30/nv30_shader.h
@@ -72,14 +72,6 @@
 #define NV30_VP_INST_COND_TEST_ENABLE        (1<<14)
 #define NV30_VP_INST_COND_SHIFT          11
 #define NV30_VP_INST_COND_MASK          (0x07 << 11)
-#  define NV30_VP_INST_COND_FL  0 /* guess */  
-#  define NV30_VP_INST_COND_LT  1  
-#  define NV30_VP_INST_COND_EQ  2
-#  define NV30_VP_INST_COND_LE  3
-#  define NV30_VP_INST_COND_GT  4
-#  define NV30_VP_INST_COND_NE  5
-#  define NV30_VP_INST_COND_GE  6
-#  define NV30_VP_INST_COND_TR  7 /* guess */
 #define NV30_VP_INST_COND_SWZ_X_SHIFT        9
 #define NV30_VP_INST_COND_SWZ_X_MASK        (0x03 <<  9)
 #define NV30_VP_INST_COND_SWZ_Y_SHIFT        7
@@ -98,59 +90,12 @@
 /* DWORD 1 */
 #define NV30_VP_INST_SCA_OPCODEL_SHIFT        28
 #define NV30_VP_INST_SCA_OPCODEL_MASK        (0x0F << 28)
-#  define NV30_VP_INST_OP_NOP  0x00
-#  define NV30_VP_INST_OP_RCP  0x02
-#  define NV30_VP_INST_OP_RCC  0x03
-#  define NV30_VP_INST_OP_RSQ  0x04
-#  define NV30_VP_INST_OP_EXP  0x05
-#  define NV30_VP_INST_OP_LOG  0x06
-#  define NV30_VP_INST_OP_LIT  0x07
-#  define NV30_VP_INST_OP_BRA  0x09
-#  define NV30_VP_INST_OP_CAL  0x0B
-#  define NV30_VP_INST_OP_RET  0x0C
-#  define NV30_VP_INST_OP_LG2  0x0D
-#  define NV30_VP_INST_OP_EX2  0x0E
-#  define NV30_VP_INST_OP_SIN  0x0F
-#  define NV30_VP_INST_OP_COS  0x10
 #define NV30_VP_INST_VEC_OPCODE_SHIFT        23
 #define NV30_VP_INST_VEC_OPCODE_MASK        (0x1F << 23)
-#  define NV30_VP_INST_OP_NOPV  0x00
-#  define NV30_VP_INST_OP_MOV  0x01
-#  define NV30_VP_INST_OP_MUL  0x02
-#  define NV30_VP_INST_OP_ADD  0x03
-#  define NV30_VP_INST_OP_MAD  0x04
-#  define NV30_VP_INST_OP_DP3  0x05
-#  define NV30_VP_INST_OP_DP4  0x07
-#  define NV30_VP_INST_OP_DPH  0x06
-#  define NV30_VP_INST_OP_DST  0x08
-#  define NV30_VP_INST_OP_MIN  0x09
-#  define NV30_VP_INST_OP_MAX  0x0A
-#  define NV30_VP_INST_OP_SLT  0x0B
-#  define NV30_VP_INST_OP_SGE  0x0C
-#  define NV30_VP_INST_OP_ARL  0x0D
-#  define NV30_VP_INST_OP_FRC  0x0E
-#  define NV30_VP_INST_OP_FLR  0x0F
-#  define NV30_VP_INST_OP_SEQ  0x10
-#  define NV30_VP_INST_OP_SFL  0x11
-#  define NV30_VP_INST_OP_SGT  0x12
-#  define NV30_VP_INST_OP_SLE  0x13
-#  define NV30_VP_INST_OP_SNE  0x14
-#  define NV30_VP_INST_OP_STR  0x15
-#  define NV30_VP_INST_OP_SSG  0x16
-#  define NV30_VP_INST_OP_ARR  0x17
-#  define NV30_VP_INST_OP_ARA  0x18
 #define NV30_VP_INST_CONST_SRC_SHIFT        14
 #define NV30_VP_INST_CONST_SRC_MASK        (0xFF << 14)
 #define NV30_VP_INST_INPUT_SRC_SHIFT        9    /*NV20*/
 #define NV30_VP_INST_INPUT_SRC_MASK        (0x0F <<  9)  /*NV20*/
-#  define NV30_VP_INST_IN_POS  0    /* These seem to match the bindings specified in */
-#  define NV30_VP_INST_IN_WEIGHT  1    /* the ARB_v_p spec (2.14.3.1) */
-#  define NV30_VP_INST_IN_NORMAL  2    
-#  define NV30_VP_INST_IN_COL0  3    /* Should probably confirm them all though */
-#  define NV30_VP_INST_IN_COL1  4
-#  define NV30_VP_INST_IN_FOGC  5
-#  define NV30_VP_INST_IN_TC0  8
-#  define NV30_VP_INST_IN_TC(n)  (8+n)
 #define NV30_VP_INST_SRC0H_SHIFT        0    /*NV20*/
 #define NV30_VP_INST_SRC0H_MASK          (0x1FF << 0)  /*NV20*/
 
@@ -190,8 +135,6 @@
 #  define NV30_VP_INST_DEST_PSZ   6
 #  define NV30_VP_INST_DEST_TC(n)  (8+n)
 
-#define NV30_VP_INST_LAST                           (1 << 0)
-
 /* Useful to split the source selection regs into their pieces */
 #define NV30_VP_SRC0_HIGH_SHIFT                                                6
 #define NV30_VP_SRC0_HIGH_MASK                                        0x00007FC0
@@ -221,270 +164,6 @@
 #define NV30_VP_SRC_REG_TYPE_INPUT  2
 #define NV30_VP_SRC_REG_TYPE_CONST  3 /* guess */
 
-/*
- * Each fragment program opcode appears to be comprised of 4 32-bit values.
- *
- *   0 - Opcode, output reg/mask, ATTRIB source
- *   1 - Source 0
- *   2 - Source 1
- *   3 - Source 2
- *
- * There appears to be no special difference between result regs and temp regs.
- *     result.color == R0.xyzw
- *     result.depth == R1.z
- * When the fragprog contains instructions to write depth, NV30_TCL_PRIMITIVE_3D_UNK1D78=0
- * otherwise it is set to 1.
- *
- * Constants are inserted directly after the instruction that uses them.
- * 
- * It appears that it's not possible to use two input registers in one
- * instruction as the input sourcing is done in the instruction dword
- * and not the source selection dwords.  As such instructions such as:
- * 
- *     ADD result.color, fragment.color, fragment.texcoord[0];
- *
- * must be split into two MOV's and then an ADD (nvidia does this) but
- * I'm not sure why it's not just one MOV and then source the second input
- * in the ADD instruction..
- *
- * Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary
- * negation requires multiplication with a const.
- *
- * Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO/SWIZZLE_ONE
- * The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as SWIZZLE_ZERO
- * is implemented simply by not writing to the relevant components of the destination.
- *
- * Conditional execution
- *   TODO
- * 
- * Non-native instructions:
- *   LIT
- *   LRP - MAD+MAD
- *   SUB - ADD, negate second source
- *   RSQ - LG2 + EX2
- *   POW - LG2 + MUL + EX2
- *   SCS - COS + SIN
- *   XPD
- */
-
-//== Opcode / Destination selection ==
-#define NV30_FP_OP_PROGRAM_END          (1 << 0)
-#define NV30_FP_OP_OUT_REG_SHIFT        1
-#define NV30_FP_OP_OUT_REG_MASK          (31 << 1)  /* uncertain */
-/* Needs to be set when writing outputs to get expected result.. */
-#define NV30_FP_OP_OUT_REG_HALF          (1 << 7)
-#define NV30_FP_OP_COND_WRITE_ENABLE        (1 << 8)
-#define NV30_FP_OP_OUTMASK_SHIFT        9
-#define NV30_FP_OP_OUTMASK_MASK          (0xF << 9)
-#  define NV30_FP_OP_OUT_X  (1<<9)
-#  define NV30_FP_OP_OUT_Y  (1<<10)
-#  define NV30_FP_OP_OUT_Z  (1<<11)
-#  define NV30_FP_OP_OUT_W  (1<<12)
-/* Uncertain about these, especially the input_src values.. it's possible that
- * they can be dynamically changed.
- */
-#define NV30_FP_OP_INPUT_SRC_SHIFT        13
-#define NV30_FP_OP_INPUT_SRC_MASK        (15 << 13)
-#  define NV30_FP_OP_INPUT_SRC_POSITION  0x0
-#  define NV30_FP_OP_INPUT_SRC_COL0  0x1
-#  define NV30_FP_OP_INPUT_SRC_COL1  0x2
-#  define NV30_FP_OP_INPUT_SRC_FOGC  0x3
-#  define NV30_FP_OP_INPUT_SRC_TC0    0x4
-#  define NV30_FP_OP_INPUT_SRC_TC(n)  (0x4 + n)
-#define NV30_FP_OP_TEX_UNIT_SHIFT        17
-#define NV30_FP_OP_TEX_UNIT_MASK        (0xF << 17) /* guess */
-#define NV30_FP_OP_PRECISION_SHIFT        22
-#define NV30_FP_OP_PRECISION_MASK        (3 << 22)
-#   define NV30_FP_PRECISION_FP32  0
-#   define NV30_FP_PRECISION_FP16  1
-#   define NV30_FP_PRECISION_FX12  2
-#define NV30_FP_OP_OPCODE_SHIFT          24
-#define NV30_FP_OP_OPCODE_MASK          (0x3F << 24)
-#  define NV30_FP_OP_OPCODE_NOP  0x00
-#  define NV30_FP_OP_OPCODE_MOV  0x01
-#  define NV30_FP_OP_OPCODE_MUL  0x02
-#  define NV30_FP_OP_OPCODE_ADD  0x03
-#  define NV30_FP_OP_OPCODE_MAD  0x04
-#  define NV30_FP_OP_OPCODE_DP3  0x05
-#  define NV30_FP_OP_OPCODE_DP4  0x06
-#  define NV30_FP_OP_OPCODE_DST  0x07
-#  define NV30_FP_OP_OPCODE_MIN  0x08
-#  define NV30_FP_OP_OPCODE_MAX  0x09
-#  define NV30_FP_OP_OPCODE_SLT  0x0A
-#  define NV30_FP_OP_OPCODE_SGE  0x0B
-#  define NV30_FP_OP_OPCODE_SLE  0x0C
-#  define NV30_FP_OP_OPCODE_SGT  0x0D
-#  define NV30_FP_OP_OPCODE_SNE  0x0E
-#  define NV30_FP_OP_OPCODE_SEQ  0x0F
-#  define NV30_FP_OP_OPCODE_FRC  0x10
-#  define NV30_FP_OP_OPCODE_FLR  0x11
-#  define NV30_FP_OP_OPCODE_KIL  0x12
-#  define NV30_FP_OP_OPCODE_PK4B   0x13
-#  define NV30_FP_OP_OPCODE_UP4B   0x14
-#  define NV30_FP_OP_OPCODE_DDX  0x15 /* can only write XY */
-#  define NV30_FP_OP_OPCODE_DDY  0x16 /* can only write XY */
-#  define NV30_FP_OP_OPCODE_TEX  0x17
-#  define NV30_FP_OP_OPCODE_TXP  0x18
-#  define NV30_FP_OP_OPCODE_TXD  0x19
-#  define NV30_FP_OP_OPCODE_RCP  0x1A
-#  define NV30_FP_OP_OPCODE_RSQ  0x1B
-#  define NV30_FP_OP_OPCODE_EX2  0x1C
-#  define NV30_FP_OP_OPCODE_LG2  0x1D
-#  define NV30_FP_OP_OPCODE_LIT  0x1E
-#  define NV30_FP_OP_OPCODE_LRP  0x1F
-#  define NV30_FP_OP_OPCODE_STR  0x20 
-#  define NV30_FP_OP_OPCODE_SFL  0x21
-#  define NV30_FP_OP_OPCODE_COS  0x22
-#  define NV30_FP_OP_OPCODE_SIN  0x23
-#  define NV30_FP_OP_OPCODE_PK2H   0x24
-#  define NV30_FP_OP_OPCODE_UP2H   0x25
-#  define NV30_FP_OP_OPCODE_POW  0x26
-#  define NV30_FP_OP_OPCODE_PK4UB  0x27
-#  define NV30_FP_OP_OPCODE_UP4UB  0x28
-#  define NV30_FP_OP_OPCODE_PK2US  0x29
-#  define NV30_FP_OP_OPCODE_UP2US  0x2A
-#  define NV30_FP_OP_OPCODE_DP2A   0x2E
-#  define NV30_FP_OP_OPCODE_TXB  0x31
-#  define NV30_FP_OP_OPCODE_RFL  0x36
-#  define NV30_FP_OP_OPCODE_DIV  0x3A
-#define NV30_FP_OP_OUT_SAT          (1 << 31)
-
-/* high order bits of SRC0 */
-#define NV30_FP_OP_OUT_ABS          (1 << 29)
-#define NV30_FP_OP_COND_SWZ_W_SHIFT        27
-#define NV30_FP_OP_COND_SWZ_W_MASK        (3 << 27)
-#define NV30_FP_OP_COND_SWZ_Z_SHIFT        25
-#define NV30_FP_OP_COND_SWZ_Z_MASK        (3 << 25)
-#define NV30_FP_OP_COND_SWZ_Y_SHIFT        23
-#define NV30_FP_OP_COND_SWZ_Y_MASK        (3 << 23)
-#define NV30_FP_OP_COND_SWZ_X_SHIFT        21
-#define NV30_FP_OP_COND_SWZ_X_MASK        (3 << 21)
-#define NV30_FP_OP_COND_SWZ_ALL_SHIFT        21
-#define NV30_FP_OP_COND_SWZ_ALL_MASK        (0xFF << 21)
-#define NV30_FP_OP_COND_SHIFT          18
-#define NV30_FP_OP_COND_MASK          (0x07 << 18)
-#  define NV30_FP_OP_COND_FL  0
-#  define NV30_FP_OP_COND_LT  1
-#  define NV30_FP_OP_COND_EQ  2
-#  define NV30_FP_OP_COND_LE  3
-#  define NV30_FP_OP_COND_GT  4
-#  define NV30_FP_OP_COND_NE  5
-#  define NV30_FP_OP_COND_GE  6
-#  define NV30_FP_OP_COND_TR  7
-
-/* high order bits of SRC1 */
-#define NV30_FP_OP_DST_SCALE_SHIFT        28
-#define NV30_FP_OP_DST_SCALE_MASK        (3 << 28)
-#define NV30_FP_OP_DST_SCALE_1X                                                0
-#define NV30_FP_OP_DST_SCALE_2X                                                1
-#define NV30_FP_OP_DST_SCALE_4X                                                2
-#define NV30_FP_OP_DST_SCALE_8X                                                3
-#define NV30_FP_OP_DST_SCALE_INV_2X                                            5
-#define NV30_FP_OP_DST_SCALE_INV_4X                                            6
-#define NV30_FP_OP_DST_SCALE_INV_8X                                            7
-
-
-/* high order bits of SRC2 */
-#define NV30_FP_OP_INDEX_INPUT          (1 << 30)
-
-//== Register selection ==
-#define NV30_FP_REG_TYPE_SHIFT          0
-#define NV30_FP_REG_TYPE_MASK          (3 << 0)
-#  define NV30_FP_REG_TYPE_TEMP  0
-#  define NV30_FP_REG_TYPE_INPUT  1
-#  define NV30_FP_REG_TYPE_CONST  2
-#define NV30_FP_REG_SRC_SHIFT          2 /* uncertain */
-#define NV30_FP_REG_SRC_MASK          (31 << 2)
-#define NV30_FP_REG_SRC_HALF          (1 << 8)
-#define NV30_FP_REG_SWZ_ALL_SHIFT        9
-#define NV30_FP_REG_SWZ_ALL_MASK        (255 << 9)
-#define NV30_FP_REG_SWZ_X_SHIFT          9
-#define NV30_FP_REG_SWZ_X_MASK          (3 << 9)
-#define NV30_FP_REG_SWZ_Y_SHIFT          11
-#define NV30_FP_REG_SWZ_Y_MASK          (3 << 11)
-#define NV30_FP_REG_SWZ_Z_SHIFT          13
-#define NV30_FP_REG_SWZ_Z_MASK          (3 << 13)
-#define NV30_FP_REG_SWZ_W_SHIFT          15
-#define NV30_FP_REG_SWZ_W_MASK          (3 << 15)
-#  define NV30_FP_SWIZZLE_X  0
-#  define NV30_FP_SWIZZLE_Y  1
-#  define NV30_FP_SWIZZLE_Z  2
-#  define NV30_FP_SWIZZLE_W  3
-#define NV30_FP_REG_NEGATE          (1 << 17)
-
-#define NV30SR_NONE	0
-#define NV30SR_OUTPUT	1
-#define NV30SR_INPUT	2
-#define NV30SR_TEMP	3
-#define NV30SR_CONST	4
-
-struct nv30_sreg {
-	int type;
-	int index;
-
-	int dst_scale;
-
-	int negate;
-	int abs;
-	int swz[4];
-
-	int cc_update;
-	int cc_update_reg;
-	int cc_test;
-	int cc_test_reg;
-	int cc_swz[4];
-};
-
-static INLINE struct nv30_sreg
-nv30_sr(int type, int index)
-{
-	struct nv30_sreg temp = {
-		.type = type,
-		.index = index,
-		.dst_scale = DEF_SCALE,
-		.abs = 0,
-		.negate = 0,
-		.swz = { 0, 1, 2, 3 },
-		.cc_update = 0,
-		.cc_update_reg = 0,
-		.cc_test = DEF_CTEST,
-		.cc_test_reg = 0,
-		.cc_swz = { 0, 1, 2, 3 },
-	};
-	return temp;
-}
-
-static INLINE struct nv30_sreg
-nv30_sr_swz(struct nv30_sreg src, int x, int y, int z, int w)
-{
-	struct nv30_sreg dst = src;
-
-	dst.swz[SWZ_X] = src.swz[x];
-	dst.swz[SWZ_Y] = src.swz[y];
-	dst.swz[SWZ_Z] = src.swz[z];
-	dst.swz[SWZ_W] = src.swz[w];
-	return dst;
-}
-
-static INLINE struct nv30_sreg
-nv30_sr_neg(struct nv30_sreg src)
-{
-	src.negate = !src.negate;
-	return src;
-}
-
-static INLINE struct nv30_sreg
-nv30_sr_abs(struct nv30_sreg src)
-{
-	src.abs = 1;
-	return src;
-}
-
-static INLINE struct nv30_sreg
-nv30_sr_scale(struct nv30_sreg src, int scale)
-{
-	src.dst_scale = scale;
-	return src;
-}
+#include "nvfx_shader.h"
 
 #endif
diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c
index cf910e34b11..ec6d63889bc 100644
--- a/src/gallium/drivers/nv30/nv30_vertprog.c
+++ b/src/gallium/drivers/nv30/nv30_vertprog.c
@@ -35,9 +35,9 @@
 #define DEF_CTEST 0
 #include "nv30_shader.h"
 
-#define swz(s,x,y,z,w) nv30_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
-#define neg(s) nv30_sr_neg((s))
-#define abs(s) nv30_sr_abs((s))
+#define swz(s,x,y,z,w) nvfx_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
+#define neg(s) nvfx_sr_neg((s))
+#define abs(s) nvfx_sr_abs((s))
 
 struct nv30_vpc {
 	struct nvfx_vertex_program *vp;
@@ -49,21 +49,21 @@ struct nv30_vpc {
 	int high_temp;
 	int temp_temp_count;
 
-	struct nv30_sreg *imm;
+	struct nvfx_sreg *imm;
 	unsigned nr_imm;
 };
 
-static struct nv30_sreg
+static struct nvfx_sreg
 temp(struct nv30_vpc *vpc)
 {
 	int idx;
 
 	idx  = vpc->temp_temp_count++;
 	idx += vpc->high_temp + 1;
-	return nv30_sr(NV30SR_TEMP, idx);
+	return nvfx_sr(NVFXSR_TEMP, idx);
 }
 
-static struct nv30_sreg
+static struct nvfx_sreg
 constant(struct nv30_vpc *vpc, int pipe, float x, float y, float z, float w)
 {
 	struct nvfx_vertex_program *vp = vpc->vp;
@@ -73,7 +73,7 @@ constant(struct nv30_vpc *vpc, int pipe, float x, float y, float z, float w)
 	if (pipe >= 0) {
 		for (idx = 0; idx < vp->nr_consts; idx++) {
 			if (vp->consts[idx].index == pipe)
-				return nv30_sr(NV30SR_CONST, idx);
+				return nvfx_sr(NVFXSR_CONST, idx);
 		}
 	}
 
@@ -86,37 +86,37 @@ constant(struct nv30_vpc *vpc, int pipe, float x, float y, float z, float w)
 	vpd->value[1] = y;
 	vpd->value[2] = z;
 	vpd->value[3] = w;
-	return nv30_sr(NV30SR_CONST, idx);
+	return nvfx_sr(NVFXSR_CONST, idx);
 }
 
 #define arith(cc,s,o,d,m,s0,s1,s2) \
-	nv30_vp_arith((cc), (s), NV30_VP_INST_##o, (d), (m), (s0), (s1), (s2))
+	nv30_vp_arith((cc), NVFX_VP_INST_SLOT_##s, NVFX_VP_INST_##s##_OP_##o, (d), (m), (s0), (s1), (s2))
 
 static void
-emit_src(struct nv30_vpc *vpc, uint32_t *hw, int pos, struct nv30_sreg src)
+emit_src(struct nv30_vpc *vpc, uint32_t *hw, int pos, struct nvfx_sreg src)
 {
 	struct nvfx_vertex_program *vp = vpc->vp;
 	uint32_t sr = 0;
 
 	switch (src.type) {
-	case NV30SR_TEMP:
+	case NVFXSR_TEMP:
 		sr |= (NV30_VP_SRC_REG_TYPE_TEMP << NV30_VP_SRC_REG_TYPE_SHIFT);
 		sr |= (src.index << NV30_VP_SRC_TEMP_SRC_SHIFT);
 		break;
-	case NV30SR_INPUT:
+	case NVFXSR_INPUT:
 		sr |= (NV30_VP_SRC_REG_TYPE_INPUT <<
 		       NV30_VP_SRC_REG_TYPE_SHIFT);
 		vp->ir |= (1 << src.index);
 		hw[1] |= (src.index << NV30_VP_INST_INPUT_SRC_SHIFT);
 		break;
-	case NV30SR_CONST:
+	case NVFXSR_CONST:
 		sr |= (NV30_VP_SRC_REG_TYPE_CONST <<
 		       NV30_VP_SRC_REG_TYPE_SHIFT);
 		assert(vpc->vpi->const_index == -1 ||
 		       vpc->vpi->const_index == src.index);
 		vpc->vpi->const_index = src.index;
 		break;
-	case NV30SR_NONE:
+	case NVFXSR_NONE:
 		sr |= (NV30_VP_SRC_REG_TYPE_INPUT <<
 		       NV30_VP_SRC_REG_TYPE_SHIFT);
 		break;
@@ -164,15 +164,15 @@ emit_src(struct nv30_vpc *vpc, uint32_t *hw, int pos, struct nv30_sreg src)
 }
 
 static void
-emit_dst(struct nv30_vpc *vpc, uint32_t *hw, int slot, struct nv30_sreg dst)
+emit_dst(struct nv30_vpc *vpc, uint32_t *hw, int slot, struct nvfx_sreg dst)
 {
 	struct nvfx_vertex_program *vp = vpc->vp;
 
 	switch (dst.type) {
-	case NV30SR_TEMP:
+	case NVFXSR_TEMP:
 		hw[0] |= (dst.index << NV30_VP_INST_DEST_TEMP_ID_SHIFT);
 		break;
-	case NV30SR_OUTPUT:
+	case NVFXSR_OUTPUT:
 		switch (dst.index) {
 		case NV30_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break;
 		case NV30_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break;
@@ -207,9 +207,9 @@ emit_dst(struct nv30_vpc *vpc, uint32_t *hw, int slot, struct nv30_sreg dst)
 
 static void
 nv30_vp_arith(struct nv30_vpc *vpc, int slot, int op,
-	      struct nv30_sreg dst, int mask,
-	      struct nv30_sreg s0, struct nv30_sreg s1,
-	      struct nv30_sreg s2)
+	      struct nvfx_sreg dst, int mask,
+	      struct nvfx_sreg s0, struct nvfx_sreg s1,
+	      struct nvfx_sreg s2)
 {
 	struct nvfx_vertex_program *vp = vpc->vp;
 	uint32_t *hw;
@@ -221,7 +221,7 @@ nv30_vp_arith(struct nv30_vpc *vpc, int slot, int op,
 
 	hw = vpc->vpi->data;
 
-	hw[0] |= (NV30_VP_INST_COND_TR << NV30_VP_INST_COND_SHIFT);
+	hw[0] |= (NVFX_VP_INST_COND_TR << NV30_VP_INST_COND_SHIFT);
 	hw[0] |= ((0 << NV30_VP_INST_COND_SWZ_X_SHIFT) |
 		  (1 << NV30_VP_INST_COND_SWZ_Y_SHIFT) |
 		  (2 << NV30_VP_INST_COND_SWZ_Z_SHIFT) |
@@ -231,7 +231,7 @@ nv30_vp_arith(struct nv30_vpc *vpc, int slot, int op,
 //	hw[3] |= NV30_VP_INST_SCA_DEST_TEMP_MASK;
 //	hw[3] |= (mask << NV30_VP_INST_VEC_WRITEMASK_SHIFT);
 
-	if (dst.type == NV30SR_OUTPUT) {
+	if (dst.type == NVFXSR_OUTPUT) {
 		if (slot)
 			hw[3] |= (mask << NV30_VP_INST_SDEST_WRITEMASK_SHIFT);
 		else
@@ -249,13 +249,13 @@ nv30_vp_arith(struct nv30_vpc *vpc, int slot, int op,
 	emit_src(vpc, hw, 2, s2);
 }
 
-static INLINE struct nv30_sreg
+static INLINE struct nvfx_sreg
 tgsi_src(struct nv30_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
-	struct nv30_sreg src;
+	struct nvfx_sreg src;
 
 	switch (fsrc->Register.File) {
 	case TGSI_FILE_INPUT:
-		src = nv30_sr(NV30SR_INPUT, fsrc->Register.Index);
+		src = nvfx_sr(NVFXSR_INPUT, fsrc->Register.Index);
 		break;
 	case TGSI_FILE_CONSTANT:
 		src = constant(vpc, fsrc->Register.Index, 0, 0, 0, 0);
@@ -266,7 +266,7 @@ tgsi_src(struct nv30_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
 	case TGSI_FILE_TEMPORARY:
 		if (vpc->high_temp < fsrc->Register.Index)
 			vpc->high_temp = fsrc->Register.Index;
-		src = nv30_sr(NV30SR_TEMP, fsrc->Register.Index);
+		src = nvfx_sr(NVFXSR_TEMP, fsrc->Register.Index);
 		break;
 	default:
 		NOUVEAU_ERR("bad src file\n");
@@ -282,18 +282,18 @@ tgsi_src(struct nv30_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
 	return src;
 }
 
-static INLINE struct nv30_sreg
+static INLINE struct nvfx_sreg
 tgsi_dst(struct nv30_vpc *vpc, const struct tgsi_full_dst_register *fdst) {
-	struct nv30_sreg dst;
+	struct nvfx_sreg dst;
 
 	switch (fdst->Register.File) {
 	case TGSI_FILE_OUTPUT:
-		dst = nv30_sr(NV30SR_OUTPUT,
+		dst = nvfx_sr(NVFXSR_OUTPUT,
 			      vpc->output_map[fdst->Register.Index]);
 
 		break;
 	case TGSI_FILE_TEMPORARY:
-		dst = nv30_sr(NV30SR_TEMP, fdst->Register.Index);
+		dst = nvfx_sr(NVFXSR_TEMP, fdst->Register.Index);
 		if (vpc->high_temp < dst.index)
 			vpc->high_temp = dst.index;
 		break;
@@ -321,8 +321,8 @@ static boolean
 nv30_vertprog_parse_instruction(struct nv30_vpc *vpc,
 				const struct tgsi_full_instruction *finst)
 {
-	struct nv30_sreg src[3], dst, tmp;
-	struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0);
+	struct nvfx_sreg src[3], dst, tmp;
+	struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0);
 	int mask;
 	int ai = -1, ci = -1;
 	int i;
@@ -351,7 +351,7 @@ nv30_vertprog_parse_instruction(struct nv30_vpc *vpc,
 				src[i] = tgsi_src(vpc, fsrc);
 			} else {
 				src[i] = temp(vpc);
-				arith(vpc, 0, OP_MOV, src[i], MASK_ALL,
+				arith(vpc, VEC, MOV, src[i], MASK_ALL,
 				      tgsi_src(vpc, fsrc), none, none);
 			}
 			break;
@@ -365,7 +365,7 @@ nv30_vertprog_parse_instruction(struct nv30_vpc *vpc,
 				src[i] = tgsi_src(vpc, fsrc);
 			} else {
 				src[i] = temp(vpc);
-				arith(vpc, 0, OP_MOV, src[i], MASK_ALL,
+				arith(vpc, VEC, MOV, src[i], MASK_ALL,
 				      tgsi_src(vpc, fsrc), none, none);
 			}
 			break;
@@ -383,96 +383,96 @@ nv30_vertprog_parse_instruction(struct nv30_vpc *vpc,
 
 	switch (finst->Instruction.Opcode) {
 	case TGSI_OPCODE_ABS:
-		arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none);
+		arith(vpc, VEC, MOV, dst, mask, abs(src[0]), none, none);
 		break;
 	case TGSI_OPCODE_ADD:
-		arith(vpc, 0, OP_ADD, dst, mask, src[0], none, src[1]);
+		arith(vpc, VEC, ADD, dst, mask, src[0], none, src[1]);
 		break;
 	case TGSI_OPCODE_ARL:
-		arith(vpc, 0, OP_ARL, dst, mask, src[0], none, none);
+		arith(vpc, VEC, ARL, dst, mask, src[0], none, none);
 		break;
 	case TGSI_OPCODE_DP3:
-		arith(vpc, 0, OP_DP3, dst, mask, src[0], src[1], none);
+		arith(vpc, VEC, DP3, dst, mask, src[0], src[1], none);
 		break;
 	case TGSI_OPCODE_DP4:
-		arith(vpc, 0, OP_DP4, dst, mask, src[0], src[1], none);
+		arith(vpc, VEC, DP4, dst, mask, src[0], src[1], none);
 		break;
 	case TGSI_OPCODE_DPH:
-		arith(vpc, 0, OP_DPH, dst, mask, src[0], src[1], none);
+		arith(vpc, VEC, DPH, dst, mask, src[0], src[1], none);
 		break;
 	case TGSI_OPCODE_DST:
-		arith(vpc, 0, OP_DST, dst, mask, src[0], src[1], none);
+		arith(vpc, VEC, DST, dst, mask, src[0], src[1], none);
 		break;
 	case TGSI_OPCODE_EX2:
-		arith(vpc, 1, OP_EX2, dst, mask, none, none, src[0]);
+		arith(vpc, SCA, EX2, dst, mask, none, none, src[0]);
 		break;
 	case TGSI_OPCODE_EXP:
-		arith(vpc, 1, OP_EXP, dst, mask, none, none, src[0]);
+		arith(vpc, SCA, EXP, dst, mask, none, none, src[0]);
 		break;
 	case TGSI_OPCODE_FLR:
-		arith(vpc, 0, OP_FLR, dst, mask, src[0], none, none);
+		arith(vpc, VEC, FLR, dst, mask, src[0], none, none);
 		break;
 	case TGSI_OPCODE_FRC:
-		arith(vpc, 0, OP_FRC, dst, mask, src[0], none, none);
+		arith(vpc, VEC, FRC, dst, mask, src[0], none, none);
 		break;
 	case TGSI_OPCODE_LG2:
-		arith(vpc, 1, OP_LG2, dst, mask, none, none, src[0]);
+		arith(vpc, SCA, LG2, dst, mask, none, none, src[0]);
 		break;
 	case TGSI_OPCODE_LIT:
-		arith(vpc, 1, OP_LIT, dst, mask, none, none, src[0]);
+		arith(vpc, SCA, LIT, dst, mask, none, none, src[0]);
 		break;
 	case TGSI_OPCODE_LOG:
-		arith(vpc, 1, OP_LOG, dst, mask, none, none, src[0]);
+		arith(vpc, SCA, LOG, dst, mask, none, none, src[0]);
 		break;
 	case TGSI_OPCODE_MAD:
-		arith(vpc, 0, OP_MAD, dst, mask, src[0], src[1], src[2]);
+		arith(vpc, VEC, MAD, dst, mask, src[0], src[1], src[2]);
 		break;
 	case TGSI_OPCODE_MAX:
-		arith(vpc, 0, OP_MAX, dst, mask, src[0], src[1], none);
+		arith(vpc, VEC, MAX, dst, mask, src[0], src[1], none);
 		break;
 	case TGSI_OPCODE_MIN:
-		arith(vpc, 0, OP_MIN, dst, mask, src[0], src[1], none);
+		arith(vpc, VEC, MIN, dst, mask, src[0], src[1], none);
 		break;
 	case TGSI_OPCODE_MOV:
-		arith(vpc, 0, OP_MOV, dst, mask, src[0], none, none);
+		arith(vpc, VEC, MOV, dst, mask, src[0], none, none);
 		break;
 	case TGSI_OPCODE_MUL:
-		arith(vpc, 0, OP_MUL, dst, mask, src[0], src[1], none);
+		arith(vpc, VEC, MUL, dst, mask, src[0], src[1], none);
 		break;
 	case TGSI_OPCODE_POW:
 		tmp = temp(vpc);
-		arith(vpc, 1, OP_LG2, tmp, MASK_X, none, none,
+		arith(vpc, SCA, LG2, tmp, MASK_X, none, none,
 		      swz(src[0], X, X, X, X));
-		arith(vpc, 0, OP_MUL, tmp, MASK_X, swz(tmp, X, X, X, X),
+		arith(vpc, VEC, MUL, tmp, MASK_X, swz(tmp, X, X, X, X),
 		      swz(src[1], X, X, X, X), none);
-		arith(vpc, 1, OP_EX2, dst, mask, none, none,
+		arith(vpc, SCA, EX2, dst, mask, none, none,
 		      swz(tmp, X, X, X, X));
 		break;
 	case TGSI_OPCODE_RCP:
-		arith(vpc, 1, OP_RCP, dst, mask, none, none, src[0]);
+		arith(vpc, SCA, RCP, dst, mask, none, none, src[0]);
 		break;
 	case TGSI_OPCODE_RET:
 		break;
 	case TGSI_OPCODE_RSQ:
-		arith(vpc, 1, OP_RSQ, dst, mask, none, none, src[0]);
+		arith(vpc, SCA, RSQ, dst, mask, none, none, src[0]);
 		break;
 	case TGSI_OPCODE_SGE:
-		arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none);
+		arith(vpc, VEC, SGE, dst, mask, src[0], src[1], none);
 		break;
 	case TGSI_OPCODE_SGT:
-		arith(vpc, 0, OP_SGT, dst, mask, src[0], src[1], none);
+		arith(vpc, VEC, SGT, dst, mask, src[0], src[1], none);
 		break;
 	case TGSI_OPCODE_SLT:
-		arith(vpc, 0, OP_SLT, dst, mask, src[0], src[1], none);
+		arith(vpc, VEC, SLT, dst, mask, src[0], src[1], none);
 		break;
 	case TGSI_OPCODE_SUB:
-		arith(vpc, 0, OP_ADD, dst, mask, src[0], none, neg(src[1]));
+		arith(vpc, VEC, ADD, dst, mask, src[0], none, neg(src[1]));
 		break;
 	case TGSI_OPCODE_XPD:
 		tmp = temp(vpc);
-		arith(vpc, 0, OP_MUL, tmp, mask,
+		arith(vpc, VEC, MUL, tmp, mask,
 		      swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none);
-		arith(vpc, 0, OP_MAD, dst, (mask & ~MASK_W),
+		arith(vpc, VEC, MAD, dst, (mask & ~MASK_W),
 		      swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y),
 		      neg(tmp));
 		break;
@@ -564,7 +564,7 @@ nv30_vertprog_prepare(struct nv30_vpc *vpc)
 	tgsi_parse_free(&p);
 
 	if (nr_imm) {
-		vpc->imm = CALLOC(nr_imm, sizeof(struct nv30_sreg));
+		vpc->imm = CALLOC(nr_imm, sizeof(struct nvfx_sreg));
 		assert(vpc->imm);
 	}
 
@@ -639,7 +639,7 @@ nv30_vertprog_translate(struct nvfx_context *nvfx,
 		}
 	}
 
-	vp->insns[vp->nr_insns - 1].data[3] |= NV30_VP_INST_LAST;
+	vp->insns[vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST;
 	vp->translated = TRUE;
 out_err:
 	tgsi_parse_free(&parse);
diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c
index 87d2689d54b..4ed87779fd6 100644
--- a/src/gallium/drivers/nv40/nv40_draw.c
+++ b/src/gallium/drivers/nv40/nv40_draw.c
@@ -8,7 +8,7 @@
 #include "draw/draw_pipe.h"
 
 #include "nv40_context.h"
-#define NV40_SHADER_NO_FUCKEDNESS
+#define NVFX_SHADER_NO_FUCKEDNESS
 #include "nv40_shader.h"
 
 /* Simple, but crappy, swtnl path, hopefully we wont need to hit this very
diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c
index 2a0ab0cf310..e044f367a0b 100644
--- a/src/gallium/drivers/nv40/nv40_fragprog.c
+++ b/src/gallium/drivers/nv40/nv40_fragprog.c
@@ -18,14 +18,14 @@
 #define MASK_Z 4
 #define MASK_W 8
 #define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W)
-#define DEF_SCALE NV40_FP_OP_DST_SCALE_1X
-#define DEF_CTEST NV40_FP_OP_COND_TR
-#include "nv40_shader.h"
+#define DEF_SCALE NVFX_FP_OP_DST_SCALE_1X
+#define DEF_CTEST NVFX_FP_OP_COND_TR
+#include "nvfx_shader.h"
 
-#define swz(s,x,y,z,w) nv40_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
-#define neg(s) nv40_sr_neg((s))
-#define abs(s) nv40_sr_abs((s))
-#define scale(s,v) nv40_sr_scale((s), NV40_FP_OP_DST_SCALE_##v)
+#define swz(s,x,y,z,w) nvfx_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
+#define neg(s) nvfx_sr_neg((s))
+#define abs(s) nvfx_sr_abs((s))
+#define scale(s,v) nvfx_sr_scale((s), NVFX_FP_OP_DST_SCALE_##v)
 
 #define MAX_CONSTS 128
 #define MAX_IMM 32
@@ -36,8 +36,8 @@ struct nv40_fpc {
 
 	unsigned r_temps;
 	unsigned r_temps_discard;
-	struct nv40_sreg r_result[PIPE_MAX_SHADER_OUTPUTS];
-	struct nv40_sreg *r_temp;
+	struct nvfx_sreg r_result[PIPE_MAX_SHADER_OUTPUTS];
+	struct nvfx_sreg *r_temp;
 
 	int num_regs;
 
@@ -50,11 +50,11 @@ struct nv40_fpc {
 	} consts[MAX_CONSTS];
 	int nr_consts;
 
-	struct nv40_sreg imm[MAX_IMM];
+	struct nvfx_sreg imm[MAX_IMM];
 	unsigned nr_imm;
 };
 
-static INLINE struct nv40_sreg
+static INLINE struct nvfx_sreg
 temp(struct nv40_fpc *fpc)
 {
 	int idx = ffs(~fpc->r_temps) - 1;
@@ -62,12 +62,12 @@ temp(struct nv40_fpc *fpc)
 	if (idx < 0) {
 		NOUVEAU_ERR("out of temps!!\n");
 		assert(0);
-		return nv40_sr(NV40SR_TEMP, 0);
+		return nvfx_sr(NVFXSR_TEMP, 0);
 	}
 
 	fpc->r_temps |= (1 << idx);
 	fpc->r_temps_discard |= (1 << idx);
-	return nv40_sr(NV40SR_TEMP, idx);
+	return nvfx_sr(NVFXSR_TEMP, idx);
 }
 
 static INLINE void
@@ -77,7 +77,7 @@ release_temps(struct nv40_fpc *fpc)
 	fpc->r_temps_discard = 0;
 }
 
-static INLINE struct nv40_sreg
+static INLINE struct nvfx_sreg
 constant(struct nv40_fpc *fpc, int pipe, float vals[4])
 {
 	int idx;
@@ -89,14 +89,14 @@ constant(struct nv40_fpc *fpc, int pipe, float vals[4])
 	fpc->consts[idx].pipe = pipe;
 	if (pipe == -1)
 		memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float));
-	return nv40_sr(NV40SR_CONST, idx);
+	return nvfx_sr(NVFXSR_CONST, idx);
 }
 
 #define arith(cc,s,o,d,m,s0,s1,s2) \
-	nv40_fp_arith((cc), (s), NV40_FP_OP_OPCODE_##o, \
+	nv40_fp_arith((cc), (s), NVFX_FP_OP_OPCODE_##o, \
 			(d), (m), (s0), (s1), (s2))
 #define tex(cc,s,o,u,d,m,s0,s1,s2) \
-	nv40_fp_tex((cc), (s), NV40_FP_OP_OPCODE_##o, (u), \
+	nv40_fp_tex((cc), (s), NVFX_FP_OP_OPCODE_##o, (u), \
 		    (d), (m), (s0), none, none)
 
 static void
@@ -109,25 +109,25 @@ grow_insns(struct nv40_fpc *fpc, int size)
 }
 
 static void
-emit_src(struct nv40_fpc *fpc, int pos, struct nv40_sreg src)
+emit_src(struct nv40_fpc *fpc, int pos, struct nvfx_sreg src)
 {
 	struct nvfx_fragment_program *fp = fpc->fp;
 	uint32_t *hw = &fp->insn[fpc->inst_offset];
 	uint32_t sr = 0;
 
 	switch (src.type) {
-	case NV40SR_INPUT:
-		sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT);
-		hw[0] |= (src.index << NV40_FP_OP_INPUT_SRC_SHIFT);
+	case NVFXSR_INPUT:
+		sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT);
+		hw[0] |= (src.index << NVFX_FP_OP_INPUT_SRC_SHIFT);
 		break;
-	case NV40SR_OUTPUT:
-		sr |= NV40_FP_REG_SRC_HALF;
+	case NVFXSR_OUTPUT:
+		sr |= NVFX_FP_REG_SRC_HALF;
 		/* fall-through */
-	case NV40SR_TEMP:
-		sr |= (NV40_FP_REG_TYPE_TEMP << NV40_FP_REG_TYPE_SHIFT);
-		sr |= (src.index << NV40_FP_REG_SRC_SHIFT);
+	case NVFXSR_TEMP:
+		sr |= (NVFX_FP_REG_TYPE_TEMP << NVFX_FP_REG_TYPE_SHIFT);
+		sr |= (src.index << NVFX_FP_REG_SRC_SHIFT);
 		break;
-	case NV40SR_CONST:
+	case NVFXSR_CONST:
 		if (!fpc->have_const) {
 			grow_insns(fpc, 4);
 			fpc->have_const = 1;
@@ -149,61 +149,61 @@ emit_src(struct nv40_fpc *fpc, int pos, struct nv40_sreg src)
 				sizeof(uint32_t) * 4);
 		}
 
-		sr |= (NV40_FP_REG_TYPE_CONST << NV40_FP_REG_TYPE_SHIFT);
+		sr |= (NVFX_FP_REG_TYPE_CONST << NVFX_FP_REG_TYPE_SHIFT);
 		break;
-	case NV40SR_NONE:
-		sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT);
+	case NVFXSR_NONE:
+		sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT);
 		break;
 	default:
 		assert(0);
 	}
 
 	if (src.negate)
-		sr |= NV40_FP_REG_NEGATE;
+		sr |= NVFX_FP_REG_NEGATE;
 
 	if (src.abs)
 		hw[1] |= (1 << (29 + pos));
 
-	sr |= ((src.swz[0] << NV40_FP_REG_SWZ_X_SHIFT) |
-	       (src.swz[1] << NV40_FP_REG_SWZ_Y_SHIFT) |
-	       (src.swz[2] << NV40_FP_REG_SWZ_Z_SHIFT) |
-	       (src.swz[3] << NV40_FP_REG_SWZ_W_SHIFT));
+	sr |= ((src.swz[0] << NVFX_FP_REG_SWZ_X_SHIFT) |
+	       (src.swz[1] << NVFX_FP_REG_SWZ_Y_SHIFT) |
+	       (src.swz[2] << NVFX_FP_REG_SWZ_Z_SHIFT) |
+	       (src.swz[3] << NVFX_FP_REG_SWZ_W_SHIFT));
 
 	hw[pos + 1] |= sr;
 }
 
 static void
-emit_dst(struct nv40_fpc *fpc, struct nv40_sreg dst)
+emit_dst(struct nv40_fpc *fpc, struct nvfx_sreg dst)
 {
 	struct nvfx_fragment_program *fp = fpc->fp;
 	uint32_t *hw = &fp->insn[fpc->inst_offset];
 
 	switch (dst.type) {
-	case NV40SR_TEMP:
+	case NVFXSR_TEMP:
 		if (fpc->num_regs < (dst.index + 1))
 			fpc->num_regs = dst.index + 1;
 		break;
-	case NV40SR_OUTPUT:
+	case NVFXSR_OUTPUT:
 		if (dst.index == 1) {
 			fp->fp_control |= 0xe;
 		} else {
-			hw[0] |= NV40_FP_OP_OUT_REG_HALF;
+			hw[0] |= NVFX_FP_OP_OUT_REG_HALF;
 		}
 		break;
-	case NV40SR_NONE:
+	case NVFXSR_NONE:
 		hw[0] |= (1 << 30);
 		break;
 	default:
 		assert(0);
 	}
 
-	hw[0] |= (dst.index << NV40_FP_OP_OUT_REG_SHIFT);
+	hw[0] |= (dst.index << NVFX_FP_OP_OUT_REG_SHIFT);
 }
 
 static void
 nv40_fp_arith(struct nv40_fpc *fpc, int sat, int op,
-	      struct nv40_sreg dst, int mask,
-	      struct nv40_sreg s0, struct nv40_sreg s1, struct nv40_sreg s2)
+	      struct nvfx_sreg dst, int mask,
+	      struct nvfx_sreg s0, struct nvfx_sreg s1, struct nvfx_sreg s2)
 {
 	struct nvfx_fragment_program *fp = fpc->fp;
 	uint32_t *hw;
@@ -214,22 +214,22 @@ nv40_fp_arith(struct nv40_fpc *fpc, int sat, int op,
 	hw = &fp->insn[fpc->inst_offset];
 	memset(hw, 0, sizeof(uint32_t) * 4);
 
-	if (op == NV40_FP_OP_OPCODE_KIL)
+	if (op == NVFX_FP_OP_OPCODE_KIL)
 		fp->fp_control |= NV40TCL_FP_CONTROL_KIL;
-	hw[0] |= (op << NV40_FP_OP_OPCODE_SHIFT);
-	hw[0] |= (mask << NV40_FP_OP_OUTMASK_SHIFT);
-	hw[2] |= (dst.dst_scale << NV40_FP_OP_DST_SCALE_SHIFT);
+	hw[0] |= (op << NVFX_FP_OP_OPCODE_SHIFT);
+	hw[0] |= (mask << NVFX_FP_OP_OUTMASK_SHIFT);
+	hw[2] |= (dst.dst_scale << NVFX_FP_OP_DST_SCALE_SHIFT);
 
 	if (sat)
-		hw[0] |= NV40_FP_OP_OUT_SAT;
+		hw[0] |= NVFX_FP_OP_OUT_SAT;
 
 	if (dst.cc_update)
-		hw[0] |= NV40_FP_OP_COND_WRITE_ENABLE;
-	hw[1] |= (dst.cc_test << NV40_FP_OP_COND_SHIFT);
-	hw[1] |= ((dst.cc_swz[0] << NV40_FP_OP_COND_SWZ_X_SHIFT) |
-		  (dst.cc_swz[1] << NV40_FP_OP_COND_SWZ_Y_SHIFT) |
-		  (dst.cc_swz[2] << NV40_FP_OP_COND_SWZ_Z_SHIFT) |
-		  (dst.cc_swz[3] << NV40_FP_OP_COND_SWZ_W_SHIFT));
+		hw[0] |= NVFX_FP_OP_COND_WRITE_ENABLE;
+	hw[1] |= (dst.cc_test << NVFX_FP_OP_COND_SHIFT);
+	hw[1] |= ((dst.cc_swz[0] << NVFX_FP_OP_COND_SWZ_X_SHIFT) |
+		  (dst.cc_swz[1] << NVFX_FP_OP_COND_SWZ_Y_SHIFT) |
+		  (dst.cc_swz[2] << NVFX_FP_OP_COND_SWZ_Z_SHIFT) |
+		  (dst.cc_swz[3] << NVFX_FP_OP_COND_SWZ_W_SHIFT));
 
 	emit_dst(fpc, dst);
 	emit_src(fpc, 0, s0);
@@ -239,25 +239,25 @@ nv40_fp_arith(struct nv40_fpc *fpc, int sat, int op,
 
 static void
 nv40_fp_tex(struct nv40_fpc *fpc, int sat, int op, int unit,
-	    struct nv40_sreg dst, int mask,
-	    struct nv40_sreg s0, struct nv40_sreg s1, struct nv40_sreg s2)
+	    struct nvfx_sreg dst, int mask,
+	    struct nvfx_sreg s0, struct nvfx_sreg s1, struct nvfx_sreg s2)
 {
 	struct nvfx_fragment_program *fp = fpc->fp;
 
 	nv40_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2);
 
-	fp->insn[fpc->inst_offset] |= (unit << NV40_FP_OP_TEX_UNIT_SHIFT);
+	fp->insn[fpc->inst_offset] |= (unit << NVFX_FP_OP_TEX_UNIT_SHIFT);
 	fp->samplers |= (1 << unit);
 }
 
-static INLINE struct nv40_sreg
+static INLINE struct nvfx_sreg
 tgsi_src(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc)
 {
-	struct nv40_sreg src;
+	struct nvfx_sreg src;
 
 	switch (fsrc->Register.File) {
 	case TGSI_FILE_INPUT:
-		src = nv40_sr(NV40SR_INPUT,
+		src = nvfx_sr(NVFXSR_INPUT,
 			      fpc->attrib_map[fsrc->Register.Index]);
 		break;
 	case TGSI_FILE_CONSTANT:
@@ -288,7 +288,7 @@ tgsi_src(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc)
 	return src;
 }
 
-static INLINE struct nv40_sreg
+static INLINE struct nvfx_sreg
 tgsi_dst(struct nv40_fpc *fpc, const struct tgsi_full_dst_register *fdst) {
 	switch (fdst->Register.File) {
 	case TGSI_FILE_OUTPUT:
@@ -296,10 +296,10 @@ tgsi_dst(struct nv40_fpc *fpc, const struct tgsi_full_dst_register *fdst) {
 	case TGSI_FILE_TEMPORARY:
 		return fpc->r_temp[fdst->Register.Index];
 	case TGSI_FILE_NULL:
-		return nv40_sr(NV40SR_NONE, 0);
+		return nvfx_sr(NVFXSR_NONE, 0);
 	default:
 		NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File);
-		return nv40_sr(NV40SR_NONE, 0);
+		return nvfx_sr(NVFXSR_NONE, 0);
 	}
 }
 
@@ -317,10 +317,10 @@ tgsi_mask(uint tgsi)
 
 static boolean
 src_native_swz(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc,
-	       struct nv40_sreg *src)
+	       struct nvfx_sreg *src)
 {
-	const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0);
-	struct nv40_sreg tgsi = tgsi_src(fpc, fsrc);
+	const struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0);
+	struct nvfx_sreg tgsi = tgsi_src(fpc, fsrc);
 	uint mask = 0;
 	uint c;
 
@@ -352,8 +352,8 @@ static boolean
 nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
 				const struct tgsi_full_instruction *finst)
 {
-	const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0);
-	struct nv40_sreg src[3], dst, tmp;
+	const struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0);
+	struct nvfx_sreg src[3], dst, tmp;
 	int mask, sat, unit;
 	int ai = -1, ci = -1, ii = -1;
 	int i;
@@ -445,12 +445,12 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
 		arith(fpc, sat, ADD, dst, mask, src[0], src[1], none);
 		break;
 	case TGSI_OPCODE_CMP:
-		tmp = nv40_sr(NV40SR_NONE, 0);
+		tmp = nvfx_sr(NVFXSR_NONE, 0);
 		tmp.cc_update = 1;
 		arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none);
-		dst.cc_test = NV40_VP_INST_COND_GE;
+		dst.cc_test = NVFX_VP_INST_COND_GE;
 		arith(fpc, sat, MOV, dst, mask, src[2], none, none);
-		dst.cc_test = NV40_VP_INST_COND_LT;
+		dst.cc_test = NVFX_VP_INST_COND_LT;
 		arith(fpc, sat, MOV, dst, mask, src[1], none, none);
 		break;
 	case TGSI_OPCODE_COS:
@@ -512,10 +512,10 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
 		arith(fpc, 0, KIL, none, 0, none, none, none);
 		break;
 	case TGSI_OPCODE_KIL:
-		dst = nv40_sr(NV40SR_NONE, 0);
+		dst = nvfx_sr(NVFXSR_NONE, 0);
 		dst.cc_update = 1;
 		arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none);
-		dst.cc_update = 0; dst.cc_test = NV40_FP_OP_COND_LT;
+		dst.cc_update = 0; dst.cc_test = NVFX_FP_OP_COND_LT;
 		arith(fpc, 0, KIL, dst, 0, none, none, none);
 		break;
 	case TGSI_OPCODE_LG2:
@@ -662,25 +662,25 @@ nv40_fragprog_parse_decl_attrib(struct nv40_fpc *fpc,
 
 	switch (fdec->Semantic.Name) {
 	case TGSI_SEMANTIC_POSITION:
-		hw = NV40_FP_OP_INPUT_SRC_POSITION;
+		hw = NVFX_FP_OP_INPUT_SRC_POSITION;
 		break;
 	case TGSI_SEMANTIC_COLOR:
 		if (fdec->Semantic.Index == 0) {
-			hw = NV40_FP_OP_INPUT_SRC_COL0;
+			hw = NVFX_FP_OP_INPUT_SRC_COL0;
 		} else
 		if (fdec->Semantic.Index == 1) {
-			hw = NV40_FP_OP_INPUT_SRC_COL1;
+			hw = NVFX_FP_OP_INPUT_SRC_COL1;
 		} else {
 			NOUVEAU_ERR("bad colour semantic index\n");
 			return FALSE;
 		}
 		break;
 	case TGSI_SEMANTIC_FOG:
-		hw = NV40_FP_OP_INPUT_SRC_FOGC;
+		hw = NVFX_FP_OP_INPUT_SRC_FOGC;
 		break;
 	case TGSI_SEMANTIC_GENERIC:
 		if (fdec->Semantic.Index <= 7) {
-			hw = NV40_FP_OP_INPUT_SRC_TC(fdec->Semantic.
+			hw = NVFX_FP_OP_INPUT_SRC_TC(fdec->Semantic.
 						     Index);
 		} else {
 			NOUVEAU_ERR("bad generic semantic index\n");
@@ -723,7 +723,7 @@ nv40_fragprog_parse_decl_output(struct nv40_fpc *fpc,
 		return FALSE;
 	}
 
-	fpc->r_result[idx] = nv40_sr(NV40SR_OUTPUT, hw);
+	fpc->r_result[idx] = nvfx_sr(NVFXSR_OUTPUT, hw);
 	fpc->r_temps |= (1 << hw);
 	return TRUE;
 }
@@ -787,7 +787,7 @@ nv40_fragprog_prepare(struct nv40_fpc *fpc)
 	tgsi_parse_free(&p);
 
 	if (++high_temp) {
-		fpc->r_temp = CALLOC(high_temp, sizeof(struct nv40_sreg));
+		fpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_sreg));
 		for (i = 0; i < high_temp; i++)
 			fpc->r_temp[i] = temp(fpc);
 		fpc->r_temps_discard = 0;
diff --git a/src/gallium/drivers/nv40/nv40_shader.h b/src/gallium/drivers/nv40/nv40_shader.h
index 854dccf5486..8d28137e9de 100644
--- a/src/gallium/drivers/nv40/nv40_shader.h
+++ b/src/gallium/drivers/nv40/nv40_shader.h
@@ -48,14 +48,6 @@
 #define NV40_VP_INST_COND_TEST_ENABLE                                  (1 << 13)
 #define NV40_VP_INST_COND_SHIFT                                               10
 #define NV40_VP_INST_COND_MASK                                       (0x7 << 10)
-#    define NV40_VP_INST_COND_FL                                               0
-#    define NV40_VP_INST_COND_LT                                               1
-#    define NV40_VP_INST_COND_EQ                                               2
-#    define NV40_VP_INST_COND_LE                                               3
-#    define NV40_VP_INST_COND_GT                                               4
-#    define NV40_VP_INST_COND_NE                                               5
-#    define NV40_VP_INST_COND_GE                                               6
-#    define NV40_VP_INST_COND_TR                                               7
 #define NV40_VP_INST_COND_SWZ_X_SHIFT                                          8
 #define NV40_VP_INST_COND_SWZ_X_MASK                                    (3 << 8)
 #define NV40_VP_INST_COND_SWZ_Y_SHIFT                                          6
@@ -84,63 +76,12 @@
 /* ---- OPCODE BITS 95:64 / data DWORD 1 --- */
 #define NV40_VP_INST_VEC_OPCODE_SHIFT                                         22
 #define NV40_VP_INST_VEC_OPCODE_MASK                                (0x1F << 22)
-#    define NV40_VP_INST_OP_NOP                                             0x00
-#    define NV40_VP_INST_OP_MOV                                             0x01
-#    define NV40_VP_INST_OP_MUL                                             0x02
-#    define NV40_VP_INST_OP_ADD                                             0x03
-#    define NV40_VP_INST_OP_MAD                                             0x04
-#    define NV40_VP_INST_OP_DP3                                             0x05
-#    define NV40_VP_INST_OP_DPH                                             0x06
-#    define NV40_VP_INST_OP_DP4                                             0x07
-#    define NV40_VP_INST_OP_DST                                             0x08
-#    define NV40_VP_INST_OP_MIN                                             0x09
-#    define NV40_VP_INST_OP_MAX                                             0x0A
-#    define NV40_VP_INST_OP_SLT                                             0x0B
-#    define NV40_VP_INST_OP_SGE                                             0x0C
-#    define NV40_VP_INST_OP_ARL                                             0x0D
-#    define NV40_VP_INST_OP_FRC                                             0x0E
-#    define NV40_VP_INST_OP_FLR                                             0x0F
-#    define NV40_VP_INST_OP_SEQ                                             0x10
-#    define NV40_VP_INST_OP_SFL                                             0x11
-#    define NV40_VP_INST_OP_SGT                                             0x12
-#    define NV40_VP_INST_OP_SLE                                             0x13
-#    define NV40_VP_INST_OP_SNE                                             0x14
-#    define NV40_VP_INST_OP_STR                                             0x15
-#    define NV40_VP_INST_OP_SSG                                             0x16
-#    define NV40_VP_INST_OP_ARR                                             0x17
-#    define NV40_VP_INST_OP_ARA                                             0x18
-#    define NV40_VP_INST_OP_TXL                                             0x19
 #define NV40_VP_INST_SCA_OPCODE_SHIFT                                         27
 #define NV40_VP_INST_SCA_OPCODE_MASK                                (0x1F << 27)
-#    define NV40_VP_INST_OP_NOP                                             0x00
-#    define NV40_VP_INST_OP_MOV                                             0x01
-#    define NV40_VP_INST_OP_RCP                                             0x02
-#    define NV40_VP_INST_OP_RCC                                             0x03
-#    define NV40_VP_INST_OP_RSQ                                             0x04
-#    define NV40_VP_INST_OP_EXP                                             0x05
-#    define NV40_VP_INST_OP_LOG                                             0x06
-#    define NV40_VP_INST_OP_LIT                                             0x07
-#    define NV40_VP_INST_OP_BRA                                             0x09
-#    define NV40_VP_INST_OP_CAL                                             0x0B
-#    define NV40_VP_INST_OP_RET                                             0x0C
-#    define NV40_VP_INST_OP_LG2                                             0x0D
-#    define NV40_VP_INST_OP_EX2                                             0x0E
-#    define NV40_VP_INST_OP_SIN                                             0x0F
-#    define NV40_VP_INST_OP_COS                                             0x10
-#    define NV40_VP_INST_OP_PUSHA                                           0x13
-#    define NV40_VP_INST_OP_POPA                                            0x14
 #define NV40_VP_INST_CONST_SRC_SHIFT                                          12
 #define NV40_VP_INST_CONST_SRC_MASK                                 (0xFF << 12)
 #define NV40_VP_INST_INPUT_SRC_SHIFT                                           8
 #define NV40_VP_INST_INPUT_SRC_MASK                                  (0x0F << 8)
-#    define NV40_VP_INST_IN_POS                                                0
-#    define NV40_VP_INST_IN_WEIGHT                                             1
-#    define NV40_VP_INST_IN_NORMAL                                             2
-#    define NV40_VP_INST_IN_COL0                                               3
-#    define NV40_VP_INST_IN_COL1                                               4
-#    define NV40_VP_INST_IN_FOGC                                               5
-#    define NV40_VP_INST_IN_TC0                                                8
-#    define NV40_VP_INST_IN_TC(n)                                          (8+n)
 #define NV40_VP_INST_SRC0H_SHIFT                                               0
 #define NV40_VP_INST_SRC0H_MASK                                      (0xFF << 0)
 #define NV40_VP_INST1_KNOWN ( \
@@ -194,7 +135,6 @@
 #    define NV40_VP_INST_DEST_TC(n)                                        (7+n)
 #    define NV40_VP_INST_DEST_TEMP                                          0x1F
 #define NV40_VP_INST_INDEX_CONST                                        (1 << 1)
-#define NV40_VP_INST_LAST                                               (1 << 0)
 #define NV40_VP_INST3_KNOWN ( \
                 NV40_VP_INST_SRC2L_MASK |\
                 NV40_VP_INST_SCA_WRITEMASK_MASK |\
@@ -232,325 +172,7 @@
 #    define NV40_VP_SRC_REG_TYPE_INPUT                                         2
 #    define NV40_VP_SRC_REG_TYPE_CONST                                         3
 
+#include "nvfx_shader.h"
 
-/*
- * Each fragment program opcode appears to be comprised of 4 32-bit values.
- *
- *         0 - Opcode, output reg/mask, ATTRIB source
- *         1 - Source 0
- *         2 - Source 1
- *         3 - Source 2
- *
- * There appears to be no special difference between result regs and temp regs.
- *                 result.color == R0.xyzw
- *                 result.depth == R1.z
- * When the fragprog contains instructions to write depth,
- * NV30_TCL_PRIMITIVE_3D_UNK1D78=0 otherwise it is set to 1.
- *
- * Constants are inserted directly after the instruction that uses them.
- * 
- * It appears that it's not possible to use two input registers in one
- * instruction as the input sourcing is done in the instruction dword
- * and not the source selection dwords.  As such instructions such as:
- * 
- *                 ADD result.color, fragment.color, fragment.texcoord[0];
- *
- * must be split into two MOV's and then an ADD (nvidia does this) but
- * I'm not sure why it's not just one MOV and then source the second input
- * in the ADD instruction..
- *
- * Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary
- * negation requires multiplication with a const.
- *
- * Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO and
- * SWIZZLE_ONE.
- *
- * The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as
- * SWIZZLE_ZERO is implemented simply by not writing to the relevant components
- * of the destination.
- *
- * Looping
- *   Loops appear to be fairly expensive on NV40 at least, the proprietary
- *   driver goes to a lot of effort to avoid using the native looping
- *   instructions.  If the total number of *executed* instructions between
- *   REP/ENDREP or LOOP/ENDLOOP is <=500, the driver will unroll the loop.
- *   The maximum loop count is 255.
- *
- * Conditional execution
- *   TODO
- * 
- * Non-native instructions:
- *         LIT
- *         LRP - MAD+MAD
- *         SUB - ADD, negate second source
- *         RSQ - LG2 + EX2
- *         POW - LG2 + MUL + EX2
- *         SCS - COS + SIN
- *         XPD
- *         DP2 - MUL + ADD
- *         NRM
- */
-
-//== Opcode / Destination selection ==
-#define NV40_FP_OP_PROGRAM_END                                          (1 << 0)
-#define NV40_FP_OP_OUT_REG_SHIFT                                               1
-#define NV40_FP_OP_OUT_REG_MASK                                        (63 << 1)
-/* Needs to be set when writing outputs to get expected result.. */
-#define NV40_FP_OP_OUT_REG_HALF                                         (1 << 7)
-#define NV40_FP_OP_COND_WRITE_ENABLE                                    (1 << 8)
-#define NV40_FP_OP_OUTMASK_SHIFT                                               9
-#define NV40_FP_OP_OUTMASK_MASK                                       (0xF << 9)
-#    define NV40_FP_OP_OUT_X                                            (1 << 9)
-#    define NV40_FP_OP_OUT_Y                                            (1 <<10)
-#    define NV40_FP_OP_OUT_Z                                            (1 <<11)
-#    define NV40_FP_OP_OUT_W                                            (1 <<12)
-/* Uncertain about these, especially the input_src values.. it's possible that
- * they can be dynamically changed.
- */
-#define NV40_FP_OP_INPUT_SRC_SHIFT                                            13
-#define NV40_FP_OP_INPUT_SRC_MASK                                     (15 << 13)
-#    define NV40_FP_OP_INPUT_SRC_POSITION                                    0x0
-#    define NV40_FP_OP_INPUT_SRC_COL0                                        0x1
-#    define NV40_FP_OP_INPUT_SRC_COL1                                        0x2
-#    define NV40_FP_OP_INPUT_SRC_FOGC                                        0x3
-#    define NV40_FP_OP_INPUT_SRC_TC0                                         0x4
-#    define NV40_FP_OP_INPUT_SRC_TC(n)                                 (0x4 + n)
-#    define NV40_FP_OP_INPUT_SRC_FACING                                      0xE
-#define NV40_FP_OP_TEX_UNIT_SHIFT                                             17
-#define NV40_FP_OP_TEX_UNIT_MASK                                     (0xF << 17)
-#define NV40_FP_OP_PRECISION_SHIFT                                            22
-#define NV40_FP_OP_PRECISION_MASK                                      (3 << 22)
-#   define NV40_FP_PRECISION_FP32                                              0
-#   define NV40_FP_PRECISION_FP16                                              1
-#   define NV40_FP_PRECISION_FX12                                              2
-#define NV40_FP_OP_OPCODE_SHIFT                                               24
-#define NV40_FP_OP_OPCODE_MASK                                      (0x3F << 24)
-#        define NV40_FP_OP_OPCODE_NOP                                       0x00
-#        define NV40_FP_OP_OPCODE_MOV                                       0x01
-#        define NV40_FP_OP_OPCODE_MUL                                       0x02
-#        define NV40_FP_OP_OPCODE_ADD                                       0x03
-#        define NV40_FP_OP_OPCODE_MAD                                       0x04
-#        define NV40_FP_OP_OPCODE_DP3                                       0x05
-#        define NV40_FP_OP_OPCODE_DP4                                       0x06
-#        define NV40_FP_OP_OPCODE_DST                                       0x07
-#        define NV40_FP_OP_OPCODE_MIN                                       0x08
-#        define NV40_FP_OP_OPCODE_MAX                                       0x09
-#        define NV40_FP_OP_OPCODE_SLT                                       0x0A
-#        define NV40_FP_OP_OPCODE_SGE                                       0x0B
-#        define NV40_FP_OP_OPCODE_SLE                                       0x0C
-#        define NV40_FP_OP_OPCODE_SGT                                       0x0D
-#        define NV40_FP_OP_OPCODE_SNE                                       0x0E
-#        define NV40_FP_OP_OPCODE_SEQ                                       0x0F
-#        define NV40_FP_OP_OPCODE_FRC                                       0x10
-#        define NV40_FP_OP_OPCODE_FLR                                       0x11
-#        define NV40_FP_OP_OPCODE_KIL                                       0x12
-#        define NV40_FP_OP_OPCODE_PK4B                                      0x13
-#        define NV40_FP_OP_OPCODE_UP4B                                      0x14
-/* DDX/DDY can only write to XY */
-#        define NV40_FP_OP_OPCODE_DDX                                       0x15
-#        define NV40_FP_OP_OPCODE_DDY                                       0x16
-#        define NV40_FP_OP_OPCODE_TEX                                       0x17
-#        define NV40_FP_OP_OPCODE_TXP                                       0x18
-#        define NV40_FP_OP_OPCODE_TXD                                       0x19
-#        define NV40_FP_OP_OPCODE_RCP                                       0x1A
-#        define NV40_FP_OP_OPCODE_EX2                                       0x1C
-#        define NV40_FP_OP_OPCODE_LG2                                       0x1D
-#        define NV40_FP_OP_OPCODE_STR                                       0x20
-#        define NV40_FP_OP_OPCODE_SFL                                       0x21
-#        define NV40_FP_OP_OPCODE_COS                                       0x22
-#        define NV40_FP_OP_OPCODE_SIN                                       0x23
-#        define NV40_FP_OP_OPCODE_PK2H                                      0x24
-#        define NV40_FP_OP_OPCODE_UP2H                                      0x25
-#        define NV40_FP_OP_OPCODE_PK4UB                                     0x27
-#        define NV40_FP_OP_OPCODE_UP4UB                                     0x28
-#        define NV40_FP_OP_OPCODE_PK2US                                     0x29
-#        define NV40_FP_OP_OPCODE_UP2US                                     0x2A
-#        define NV40_FP_OP_OPCODE_DP2A                                      0x2E
-#        define NV40_FP_OP_OPCODE_TXL                                       0x2F
-#        define NV40_FP_OP_OPCODE_TXB                                       0x31
-#        define NV40_FP_OP_OPCODE_DIV                                       0x3A
-#        define NV40_FP_OP_OPCODE_UNK_LIT                                   0x3C
-/* The use of these instructions appears to be indicated by bit 31 of DWORD 2.*/
-#        define NV40_FP_OP_BRA_OPCODE_BRK                                    0x0
-#        define NV40_FP_OP_BRA_OPCODE_CAL                                    0x1
-#        define NV40_FP_OP_BRA_OPCODE_IF                                     0x2
-#        define NV40_FP_OP_BRA_OPCODE_LOOP                                   0x3
-#        define NV40_FP_OP_BRA_OPCODE_REP                                    0x4
-#        define NV40_FP_OP_BRA_OPCODE_RET                                    0x5
-#define NV40_FP_OP_OUT_SAT                                             (1 << 31)
-
-/* high order bits of SRC0 */
-#define NV40_FP_OP_OUT_ABS                                             (1 << 29)
-#define NV40_FP_OP_COND_SWZ_W_SHIFT                                           27
-#define NV40_FP_OP_COND_SWZ_W_MASK                                     (3 << 27)
-#define NV40_FP_OP_COND_SWZ_Z_SHIFT                                           25
-#define NV40_FP_OP_COND_SWZ_Z_MASK                                     (3 << 25)
-#define NV40_FP_OP_COND_SWZ_Y_SHIFT                                           23
-#define NV40_FP_OP_COND_SWZ_Y_MASK                                     (3 << 23)
-#define NV40_FP_OP_COND_SWZ_X_SHIFT                                           21
-#define NV40_FP_OP_COND_SWZ_X_MASK                                     (3 << 21)
-#define NV40_FP_OP_COND_SWZ_ALL_SHIFT                                         21
-#define NV40_FP_OP_COND_SWZ_ALL_MASK                                (0xFF << 21)
-#define NV40_FP_OP_COND_SHIFT                                                 18
-#define NV40_FP_OP_COND_MASK                                        (0x07 << 18)
-#        define NV40_FP_OP_COND_FL                                             0
-#        define NV40_FP_OP_COND_LT                                             1
-#        define NV40_FP_OP_COND_EQ                                             2
-#        define NV40_FP_OP_COND_LE                                             3
-#        define NV40_FP_OP_COND_GT                                             4
-#        define NV40_FP_OP_COND_NE                                             5
-#        define NV40_FP_OP_COND_GE                                             6
-#        define NV40_FP_OP_COND_TR                                             7
-
-/* high order bits of SRC1 */
-#define NV40_FP_OP_OPCODE_IS_BRANCH                                      (1<<31)
-#define NV40_FP_OP_DST_SCALE_SHIFT                                            28
-#define NV40_FP_OP_DST_SCALE_MASK                                      (3 << 28)
-#define NV40_FP_OP_DST_SCALE_1X                                                0
-#define NV40_FP_OP_DST_SCALE_2X                                                1
-#define NV40_FP_OP_DST_SCALE_4X                                                2
-#define NV40_FP_OP_DST_SCALE_8X                                                3
-#define NV40_FP_OP_DST_SCALE_INV_2X                                            5
-#define NV40_FP_OP_DST_SCALE_INV_4X                                            6
-#define NV40_FP_OP_DST_SCALE_INV_8X                                            7
-
-/* SRC1 LOOP */
-#define NV40_FP_OP_LOOP_INCR_SHIFT                                            19
-#define NV40_FP_OP_LOOP_INCR_MASK                                   (0xFF << 19)
-#define NV40_FP_OP_LOOP_INDEX_SHIFT                                           10
-#define NV40_FP_OP_LOOP_INDEX_MASK                                  (0xFF << 10)
-#define NV40_FP_OP_LOOP_COUNT_SHIFT                                            2
-#define NV40_FP_OP_LOOP_COUNT_MASK                                   (0xFF << 2)
-
-/* SRC1 IF */
-#define NV40_FP_OP_ELSE_ID_SHIFT                                               2
-#define NV40_FP_OP_ELSE_ID_MASK                                      (0xFF << 2)
-
-/* SRC1 CAL */
-#define NV40_FP_OP_IADDR_SHIFT                                                 2
-#define NV40_FP_OP_IADDR_MASK                                        (0xFF << 2)
-
-/* SRC1 REP
- *   I have no idea why there are 3 count values here..  but they
- *   have always been filled with the same value in my tests so
- *   far..
- */
-#define NV40_FP_OP_REP_COUNT1_SHIFT                                            2
-#define NV40_FP_OP_REP_COUNT1_MASK                                   (0xFF << 2)
-#define NV40_FP_OP_REP_COUNT2_SHIFT                                           10
-#define NV40_FP_OP_REP_COUNT2_MASK                                  (0xFF << 10)
-#define NV40_FP_OP_REP_COUNT3_SHIFT                                           19
-#define NV40_FP_OP_REP_COUNT3_MASK                                  (0xFF << 19)
-
-/* SRC2 REP/IF */
-#define NV40_FP_OP_END_ID_SHIFT                                                2
-#define NV40_FP_OP_END_ID_MASK                                       (0xFF << 2)
-
-// SRC2 high-order
-#define NV40_FP_OP_INDEX_INPUT                                         (1 << 30)
-#define NV40_FP_OP_ADDR_INDEX_SHIFT                                           19
-#define NV40_FP_OP_ADDR_INDEX_MASK                                   (0xF << 19)
-
-//== Register selection ==
-#define NV40_FP_REG_TYPE_SHIFT                                                 0
-#define NV40_FP_REG_TYPE_MASK                                           (3 << 0)
-#        define NV40_FP_REG_TYPE_TEMP                                          0
-#        define NV40_FP_REG_TYPE_INPUT                                         1
-#        define NV40_FP_REG_TYPE_CONST                                         2
-#define NV40_FP_REG_SRC_SHIFT                                                  2
-#define NV40_FP_REG_SRC_MASK                                           (63 << 2)
-#define NV40_FP_REG_SRC_HALF                                            (1 << 8)
-#define NV40_FP_REG_SWZ_ALL_SHIFT                                              9
-#define NV40_FP_REG_SWZ_ALL_MASK                                      (255 << 9)
-#define NV40_FP_REG_SWZ_X_SHIFT                                                9
-#define NV40_FP_REG_SWZ_X_MASK                                          (3 << 9)
-#define NV40_FP_REG_SWZ_Y_SHIFT                                               11
-#define NV40_FP_REG_SWZ_Y_MASK                                         (3 << 11)
-#define NV40_FP_REG_SWZ_Z_SHIFT                                               13
-#define NV40_FP_REG_SWZ_Z_MASK                                         (3 << 13)
-#define NV40_FP_REG_SWZ_W_SHIFT                                               15
-#define NV40_FP_REG_SWZ_W_MASK                                         (3 << 15)
-#        define NV40_FP_SWIZZLE_X                                              0
-#        define NV40_FP_SWIZZLE_Y                                              1
-#        define NV40_FP_SWIZZLE_Z                                              2
-#        define NV40_FP_SWIZZLE_W                                              3
-#define NV40_FP_REG_NEGATE                                             (1 << 17)
-
-#ifndef NV40_SHADER_NO_FUCKEDNESS
-#define NV40SR_NONE	0
-#define NV40SR_OUTPUT	1
-#define NV40SR_INPUT	2
-#define NV40SR_TEMP	3
-#define NV40SR_CONST	4
-
-struct nv40_sreg {
-	int type;
-	int index;
-
-	int dst_scale;
-
-	int negate;
-	int abs;
-	int swz[4];
-
-	int cc_update;
-	int cc_update_reg;
-	int cc_test;
-	int cc_test_reg;
-	int cc_swz[4];
-};
-
-static INLINE struct nv40_sreg
-nv40_sr(int type, int index)
-{
-	struct nv40_sreg temp = {
-		.type = type,
-		.index = index,
-		.dst_scale = DEF_SCALE,
-		.abs = 0,
-		.negate = 0,
-		.swz = { 0, 1, 2, 3 },
-		.cc_update = 0,
-		.cc_update_reg = 0,
-		.cc_test = DEF_CTEST,
-		.cc_test_reg = 0,
-		.cc_swz = { 0, 1, 2, 3 },
-	};
-	return temp;
-}
-
-static INLINE struct nv40_sreg
-nv40_sr_swz(struct nv40_sreg src, int x, int y, int z, int w)
-{
-	struct nv40_sreg dst = src;
-
-	dst.swz[SWZ_X] = src.swz[x];
-	dst.swz[SWZ_Y] = src.swz[y];
-	dst.swz[SWZ_Z] = src.swz[z];
-	dst.swz[SWZ_W] = src.swz[w];
-	return dst;
-}
-
-static INLINE struct nv40_sreg
-nv40_sr_neg(struct nv40_sreg src)
-{
-	src.negate = !src.negate;
-	return src;
-}
-
-static INLINE struct nv40_sreg
-nv40_sr_abs(struct nv40_sreg src)
-{
-	src.abs = 1;
-	return src;
-}
-
-static INLINE struct nv40_sreg
-nv40_sr_scale(struct nv40_sreg src, int scale)
-{
-	src.dst_scale = scale;
-	return src;
-}
 #endif
 
-#endif
diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c
index a199f0766e4..752cd0d1b3d 100644
--- a/src/gallium/drivers/nv40/nv40_vertprog.c
+++ b/src/gallium/drivers/nv40/nv40_vertprog.c
@@ -34,9 +34,9 @@
 #define DEF_CTEST 0
 #include "nv40_shader.h"
 
-#define swz(s,x,y,z,w) nv40_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
-#define neg(s) nv40_sr_neg((s))
-#define abs(s) nv40_sr_abs((s))
+#define swz(s,x,y,z,w) nvfx_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
+#define neg(s) nvfx_sr_neg((s))
+#define abs(s) nvfx_sr_abs((s))
 
 #define NV40_VP_INST_DEST_CLIP(n) ((~0 - 6) + (n))
 
@@ -47,17 +47,17 @@ struct nv40_vpc {
 
 	unsigned r_temps;
 	unsigned r_temps_discard;
-	struct nv40_sreg r_result[PIPE_MAX_SHADER_OUTPUTS];
-	struct nv40_sreg *r_address;
-	struct nv40_sreg *r_temp;
+	struct nvfx_sreg r_result[PIPE_MAX_SHADER_OUTPUTS];
+	struct nvfx_sreg *r_address;
+	struct nvfx_sreg *r_temp;
 
-	struct nv40_sreg *imm;
+	struct nvfx_sreg *imm;
 	unsigned nr_imm;
 
 	unsigned hpos_idx;
 };
 
-static struct nv40_sreg
+static struct nvfx_sreg
 temp(struct nv40_vpc *vpc)
 {
 	int idx = ffs(~vpc->r_temps) - 1;
@@ -65,12 +65,12 @@ temp(struct nv40_vpc *vpc)
 	if (idx < 0) {
 		NOUVEAU_ERR("out of temps!!\n");
 		assert(0);
-		return nv40_sr(NV40SR_TEMP, 0);
+		return nvfx_sr(NVFXSR_TEMP, 0);
 	}
 
 	vpc->r_temps |= (1 << idx);
 	vpc->r_temps_discard |= (1 << idx);
-	return nv40_sr(NV40SR_TEMP, idx);
+	return nvfx_sr(NVFXSR_TEMP, idx);
 }
 
 static INLINE void
@@ -80,7 +80,7 @@ release_temps(struct nv40_vpc *vpc)
 	vpc->r_temps_discard = 0;
 }
 
-static struct nv40_sreg
+static struct nvfx_sreg
 constant(struct nv40_vpc *vpc, int pipe, float x, float y, float z, float w)
 {
 	struct nvfx_vertex_program *vp = vpc->vp;
@@ -90,7 +90,7 @@ constant(struct nv40_vpc *vpc, int pipe, float x, float y, float z, float w)
 	if (pipe >= 0) {
 		for (idx = 0; idx < vp->nr_consts; idx++) {
 			if (vp->consts[idx].index == pipe)
-				return nv40_sr(NV40SR_CONST, idx);
+				return nvfx_sr(NVFXSR_CONST, idx);
 		}
 	}
 
@@ -103,37 +103,37 @@ constant(struct nv40_vpc *vpc, int pipe, float x, float y, float z, float w)
 	vpd->value[1] = y;
 	vpd->value[2] = z;
 	vpd->value[3] = w;
-	return nv40_sr(NV40SR_CONST, idx);
+	return nvfx_sr(NVFXSR_CONST, idx);
 }
 
 #define arith(cc,s,o,d,m,s0,s1,s2) \
-	nv40_vp_arith((cc), (s), NV40_VP_INST_##o, (d), (m), (s0), (s1), (s2))
+	nv40_vp_arith((cc), NVFX_VP_INST_SLOT_##s, NVFX_VP_INST_##s##_OP_##o, (d), (m), (s0), (s1), (s2))
 
 static void
-emit_src(struct nv40_vpc *vpc, uint32_t *hw, int pos, struct nv40_sreg src)
+emit_src(struct nv40_vpc *vpc, uint32_t *hw, int pos, struct nvfx_sreg src)
 {
 	struct nvfx_vertex_program *vp = vpc->vp;
 	uint32_t sr = 0;
 
 	switch (src.type) {
-	case NV40SR_TEMP:
+	case NVFXSR_TEMP:
 		sr |= (NV40_VP_SRC_REG_TYPE_TEMP << NV40_VP_SRC_REG_TYPE_SHIFT);
 		sr |= (src.index << NV40_VP_SRC_TEMP_SRC_SHIFT);
 		break;
-	case NV40SR_INPUT:
+	case NVFXSR_INPUT:
 		sr |= (NV40_VP_SRC_REG_TYPE_INPUT <<
 		       NV40_VP_SRC_REG_TYPE_SHIFT);
 		vp->ir |= (1 << src.index);
 		hw[1] |= (src.index << NV40_VP_INST_INPUT_SRC_SHIFT);
 		break;
-	case NV40SR_CONST:
+	case NVFXSR_CONST:
 		sr |= (NV40_VP_SRC_REG_TYPE_CONST <<
 		       NV40_VP_SRC_REG_TYPE_SHIFT);
 		assert(vpc->vpi->const_index == -1 ||
 		       vpc->vpi->const_index == src.index);
 		vpc->vpi->const_index = src.index;
 		break;
-	case NV40SR_NONE:
+	case NVFXSR_NONE:
 		sr |= (NV40_VP_SRC_REG_TYPE_INPUT <<
 		       NV40_VP_SRC_REG_TYPE_SHIFT);
 		break;
@@ -174,12 +174,12 @@ emit_src(struct nv40_vpc *vpc, uint32_t *hw, int pos, struct nv40_sreg src)
 }
 
 static void
-emit_dst(struct nv40_vpc *vpc, uint32_t *hw, int slot, struct nv40_sreg dst)
+emit_dst(struct nv40_vpc *vpc, uint32_t *hw, int slot, struct nvfx_sreg dst)
 {
 	struct nvfx_vertex_program *vp = vpc->vp;
 
 	switch (dst.type) {
-	case NV40SR_TEMP:
+	case NVFXSR_TEMP:
 		hw[3] |= NV40_VP_INST_DEST_MASK;
 		if (slot == 0) {
 			hw[0] |= (dst.index <<
@@ -189,7 +189,7 @@ emit_dst(struct nv40_vpc *vpc, uint32_t *hw, int slot, struct nv40_sreg dst)
 				  NV40_VP_INST_SCA_DEST_TEMP_SHIFT);
 		}
 		break;
-	case NV40SR_OUTPUT:
+	case NVFXSR_OUTPUT:
 		switch (dst.index) {
 		case NV40_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break;
 		case NV40_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break;
@@ -255,9 +255,9 @@ emit_dst(struct nv40_vpc *vpc, uint32_t *hw, int slot, struct nv40_sreg dst)
 
 static void
 nv40_vp_arith(struct nv40_vpc *vpc, int slot, int op,
-	      struct nv40_sreg dst, int mask,
-	      struct nv40_sreg s0, struct nv40_sreg s1,
-	      struct nv40_sreg s2)
+	      struct nvfx_sreg dst, int mask,
+	      struct nvfx_sreg s0, struct nvfx_sreg s1,
+	      struct nvfx_sreg s2)
 {
 	struct nvfx_vertex_program *vp = vpc->vp;
 	uint32_t *hw;
@@ -269,7 +269,7 @@ nv40_vp_arith(struct nv40_vpc *vpc, int slot, int op,
 
 	hw = vpc->vpi->data;
 
-	hw[0] |= (NV40_VP_INST_COND_TR << NV40_VP_INST_COND_SHIFT);
+	hw[0] |= (NVFX_VP_INST_COND_TR << NV40_VP_INST_COND_SHIFT);
 	hw[0] |= ((0 << NV40_VP_INST_COND_SWZ_X_SHIFT) |
 		  (1 << NV40_VP_INST_COND_SWZ_Y_SHIFT) |
 		  (2 << NV40_VP_INST_COND_SWZ_Z_SHIFT) |
@@ -291,13 +291,13 @@ nv40_vp_arith(struct nv40_vpc *vpc, int slot, int op,
 	emit_src(vpc, hw, 2, s2);
 }
 
-static INLINE struct nv40_sreg
+static INLINE struct nvfx_sreg
 tgsi_src(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
-	struct nv40_sreg src;
+	struct nvfx_sreg src;
 
 	switch (fsrc->Register.File) {
 	case TGSI_FILE_INPUT:
-		src = nv40_sr(NV40SR_INPUT, fsrc->Register.Index);
+		src = nvfx_sr(NVFXSR_INPUT, fsrc->Register.Index);
 		break;
 	case TGSI_FILE_CONSTANT:
 		src = constant(vpc, fsrc->Register.Index, 0, 0, 0, 0);
@@ -322,9 +322,9 @@ tgsi_src(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
 	return src;
 }
 
-static INLINE struct nv40_sreg
+static INLINE struct nvfx_sreg
 tgsi_dst(struct nv40_vpc *vpc, const struct tgsi_full_dst_register *fdst) {
-	struct nv40_sreg dst;
+	struct nvfx_sreg dst;
 
 	switch (fdst->Register.File) {
 	case TGSI_FILE_OUTPUT:
@@ -358,10 +358,10 @@ tgsi_mask(uint tgsi)
 
 static boolean
 src_native_swz(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc,
-	       struct nv40_sreg *src)
+	       struct nvfx_sreg *src)
 {
-	const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0);
-	struct nv40_sreg tgsi = tgsi_src(vpc, fsrc);
+	const struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0);
+	struct nvfx_sreg tgsi = tgsi_src(vpc, fsrc);
 	uint mask = 0;
 	uint c;
 
@@ -384,7 +384,7 @@ src_native_swz(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc,
 	*src = temp(vpc);
 
 	if (mask)
-		arith(vpc, 0, OP_MOV, *src, mask, tgsi, none, none);
+		arith(vpc, VEC, MOV, *src, mask, tgsi, none, none);
 
 	return FALSE;
 }
@@ -393,8 +393,8 @@ static boolean
 nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
 				const struct tgsi_full_instruction *finst)
 {
-	struct nv40_sreg src[3], dst, tmp;
-	struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0);
+	struct nvfx_sreg src[3], dst, tmp;
+	struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0);
 	int mask;
 	int ai = -1, ci = -1, ii = -1;
 	int i;
@@ -434,7 +434,7 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
 				src[i] = tgsi_src(vpc, fsrc);
 			} else {
 				src[i] = temp(vpc);
-				arith(vpc, 0, OP_MOV, src[i], MASK_ALL,
+				arith(vpc, VEC, MOV, src[i], MASK_ALL,
 				      tgsi_src(vpc, fsrc), none, none);
 			}
 			break;
@@ -445,7 +445,7 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
 				src[i] = tgsi_src(vpc, fsrc);
 			} else {
 				src[i] = temp(vpc);
-				arith(vpc, 0, OP_MOV, src[i], MASK_ALL,
+				arith(vpc, VEC, MOV, src[i], MASK_ALL,
 				      tgsi_src(vpc, fsrc), none, none);
 			}
 			break;
@@ -456,7 +456,7 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
 				src[i] = tgsi_src(vpc, fsrc);
 			} else {
 				src[i] = temp(vpc);
-				arith(vpc, 0, OP_MOV, src[i], MASK_ALL,
+				arith(vpc, VEC, MOV, src[i], MASK_ALL,
 				      tgsi_src(vpc, fsrc), none, none);
 			}
 			break;
@@ -474,93 +474,93 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
 
 	switch (finst->Instruction.Opcode) {
 	case TGSI_OPCODE_ABS:
-		arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none);
+		arith(vpc, VEC, MOV, dst, mask, abs(src[0]), none, none);
 		break;
 	case TGSI_OPCODE_ADD:
-		arith(vpc, 0, OP_ADD, dst, mask, src[0], none, src[1]);
+		arith(vpc, VEC, ADD, dst, mask, src[0], none, src[1]);
 		break;
 	case TGSI_OPCODE_ARL:
-		arith(vpc, 0, OP_ARL, dst, mask, src[0], none, none);
+		arith(vpc, VEC, ARL, dst, mask, src[0], none, none);
 		break;
 	case TGSI_OPCODE_DP3:
-		arith(vpc, 0, OP_DP3, dst, mask, src[0], src[1], none);
+		arith(vpc, VEC, DP3, dst, mask, src[0], src[1], none);
 		break;
 	case TGSI_OPCODE_DP4:
-		arith(vpc, 0, OP_DP4, dst, mask, src[0], src[1], none);
+		arith(vpc, VEC, DP4, dst, mask, src[0], src[1], none);
 		break;
 	case TGSI_OPCODE_DPH:
-		arith(vpc, 0, OP_DPH, dst, mask, src[0], src[1], none);
+		arith(vpc, VEC, DPH, dst, mask, src[0], src[1], none);
 		break;
 	case TGSI_OPCODE_DST:
-		arith(vpc, 0, OP_DST, dst, mask, src[0], src[1], none);
+		arith(vpc, VEC, DST, dst, mask, src[0], src[1], none);
 		break;
 	case TGSI_OPCODE_EX2:
-		arith(vpc, 1, OP_EX2, dst, mask, none, none, src[0]);
+		arith(vpc, SCA, EX2, dst, mask, none, none, src[0]);
 		break;
 	case TGSI_OPCODE_EXP:
-		arith(vpc, 1, OP_EXP, dst, mask, none, none, src[0]);
+		arith(vpc, SCA, EXP, dst, mask, none, none, src[0]);
 		break;
 	case TGSI_OPCODE_FLR:
-		arith(vpc, 0, OP_FLR, dst, mask, src[0], none, none);
+		arith(vpc, VEC, FLR, dst, mask, src[0], none, none);
 		break;
 	case TGSI_OPCODE_FRC:
-		arith(vpc, 0, OP_FRC, dst, mask, src[0], none, none);
+		arith(vpc, VEC, FRC, dst, mask, src[0], none, none);
 		break;
 	case TGSI_OPCODE_LG2:
-		arith(vpc, 1, OP_LG2, dst, mask, none, none, src[0]);
+		arith(vpc, SCA, LG2, dst, mask, none, none, src[0]);
 		break;
 	case TGSI_OPCODE_LIT:
-		arith(vpc, 1, OP_LIT, dst, mask, none, none, src[0]);
+		arith(vpc, SCA, LIT, dst, mask, none, none, src[0]);
 		break;
 	case TGSI_OPCODE_LOG:
-		arith(vpc, 1, OP_LOG, dst, mask, none, none, src[0]);
+		arith(vpc, SCA, LOG, dst, mask, none, none, src[0]);
 		break;
 	case TGSI_OPCODE_MAD:
-		arith(vpc, 0, OP_MAD, dst, mask, src[0], src[1], src[2]);
+		arith(vpc, VEC, MAD, dst, mask, src[0], src[1], src[2]);
 		break;
 	case TGSI_OPCODE_MAX:
-		arith(vpc, 0, OP_MAX, dst, mask, src[0], src[1], none);
+		arith(vpc, VEC, MAX, dst, mask, src[0], src[1], none);
 		break;
 	case TGSI_OPCODE_MIN:
-		arith(vpc, 0, OP_MIN, dst, mask, src[0], src[1], none);
+		arith(vpc, VEC, MIN, dst, mask, src[0], src[1], none);
 		break;
 	case TGSI_OPCODE_MOV:
-		arith(vpc, 0, OP_MOV, dst, mask, src[0], none, none);
+		arith(vpc, VEC, MOV, dst, mask, src[0], none, none);
 		break;
 	case TGSI_OPCODE_MUL:
-		arith(vpc, 0, OP_MUL, dst, mask, src[0], src[1], none);
+		arith(vpc, VEC, MUL, dst, mask, src[0], src[1], none);
 		break;
 	case TGSI_OPCODE_POW:
 		tmp = temp(vpc);
-		arith(vpc, 1, OP_LG2, tmp, MASK_X, none, none,
+		arith(vpc, SCA, LG2, tmp, MASK_X, none, none,
 		      swz(src[0], X, X, X, X));
-		arith(vpc, 0, OP_MUL, tmp, MASK_X, swz(tmp, X, X, X, X),
+		arith(vpc, VEC, MUL, tmp, MASK_X, swz(tmp, X, X, X, X),
 		      swz(src[1], X, X, X, X), none);
-		arith(vpc, 1, OP_EX2, dst, mask, none, none,
+		arith(vpc, SCA, EX2, dst, mask, none, none,
 		      swz(tmp, X, X, X, X));
 		break;
 	case TGSI_OPCODE_RCP:
-		arith(vpc, 1, OP_RCP, dst, mask, none, none, src[0]);
+		arith(vpc, SCA, RCP, dst, mask, none, none, src[0]);
 		break;
 	case TGSI_OPCODE_RET:
 		break;
 	case TGSI_OPCODE_RSQ:
-		arith(vpc, 1, OP_RSQ, dst, mask, none, none, abs(src[0]));
+		arith(vpc, SCA, RSQ, dst, mask, none, none, abs(src[0]));
 		break;
 	case TGSI_OPCODE_SGE:
-		arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none);
+		arith(vpc, VEC, SGE, dst, mask, src[0], src[1], none);
 		break;
 	case TGSI_OPCODE_SLT:
-		arith(vpc, 0, OP_SLT, dst, mask, src[0], src[1], none);
+		arith(vpc, VEC, SLT, dst, mask, src[0], src[1], none);
 		break;
 	case TGSI_OPCODE_SUB:
-		arith(vpc, 0, OP_ADD, dst, mask, src[0], none, neg(src[1]));
+		arith(vpc, VEC, ADD, dst, mask, src[0], none, neg(src[1]));
 		break;
 	case TGSI_OPCODE_XPD:
 		tmp = temp(vpc);
-		arith(vpc, 0, OP_MUL, tmp, mask,
+		arith(vpc, VEC, MUL, tmp, mask,
 		      swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none);
-		arith(vpc, 0, OP_MAD, dst, (mask & ~MASK_W),
+		arith(vpc, VEC, MAD, dst, (mask & ~MASK_W),
 		      swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y),
 		      neg(tmp));
 		break;
@@ -630,7 +630,7 @@ nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc,
 		return FALSE;
 	}
 
-	vpc->r_result[idx] = nv40_sr(NV40SR_OUTPUT, hw);
+	vpc->r_result[idx] = nvfx_sr(NVFXSR_OUTPUT, hw);
 	return TRUE;
 }
 
@@ -702,18 +702,18 @@ nv40_vertprog_prepare(struct nv40_vpc *vpc)
 	tgsi_parse_free(&p);
 
 	if (nr_imm) {
-		vpc->imm = CALLOC(nr_imm, sizeof(struct nv40_sreg));
+		vpc->imm = CALLOC(nr_imm, sizeof(struct nvfx_sreg));
 		assert(vpc->imm);
 	}
 
 	if (++high_temp) {
-		vpc->r_temp = CALLOC(high_temp, sizeof(struct nv40_sreg));
+		vpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_sreg));
 		for (i = 0; i < high_temp; i++)
 			vpc->r_temp[i] = temp(vpc);
 	}
 
 	if (++high_addr) {
-		vpc->r_address = CALLOC(high_addr, sizeof(struct nv40_sreg));
+		vpc->r_address = CALLOC(high_addr, sizeof(struct nvfx_sreg));
 		for (i = 0; i < high_addr; i++)
 			vpc->r_address[i] = temp(vpc);
 	}
@@ -728,7 +728,7 @@ nv40_vertprog_translate(struct nvfx_context *nvfx,
 {
 	struct tgsi_parse_context parse;
 	struct nv40_vpc *vpc = NULL;
-	struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0);
+	struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0);
 	int i;
 
 	vpc = CALLOC(1, sizeof(struct nv40_vpc));
@@ -785,24 +785,24 @@ nv40_vertprog_translate(struct nvfx_context *nvfx,
 	}
 
 	/* Write out HPOS if it was redirected to a temp earlier */
-	if (vpc->r_result[vpc->hpos_idx].type != NV40SR_OUTPUT) {
-		struct nv40_sreg hpos = nv40_sr(NV40SR_OUTPUT,
+	if (vpc->r_result[vpc->hpos_idx].type != NVFXSR_OUTPUT) {
+		struct nvfx_sreg hpos = nvfx_sr(NVFXSR_OUTPUT,
 						NV40_VP_INST_DEST_POS);
-		struct nv40_sreg htmp = vpc->r_result[vpc->hpos_idx];
+		struct nvfx_sreg htmp = vpc->r_result[vpc->hpos_idx];
 
-		arith(vpc, 0, OP_MOV, hpos, MASK_ALL, htmp, none, none);
+		arith(vpc, VEC, MOV, hpos, MASK_ALL, htmp, none, none);
 	}
 
 	/* Insert code to handle user clip planes */
 	for (i = 0; i < vp->ucp.nr; i++) {
-		struct nv40_sreg cdst = nv40_sr(NV40SR_OUTPUT,
+		struct nvfx_sreg cdst = nvfx_sr(NVFXSR_OUTPUT,
 						NV40_VP_INST_DEST_CLIP(i));
-		struct nv40_sreg ceqn = constant(vpc, -1,
+		struct nvfx_sreg ceqn = constant(vpc, -1,
 						 nvfx->clip.ucp[i][0],
 						 nvfx->clip.ucp[i][1],
 						 nvfx->clip.ucp[i][2],
 						 nvfx->clip.ucp[i][3]);
-		struct nv40_sreg htmp = vpc->r_result[vpc->hpos_idx];
+		struct nvfx_sreg htmp = vpc->r_result[vpc->hpos_idx];
 		unsigned mask;
 
 		switch (i) {
@@ -814,10 +814,10 @@ nv40_vertprog_translate(struct nvfx_context *nvfx,
 			goto out_err;
 		}
 
-		arith(vpc, 0, OP_DP4, cdst, mask, htmp, ceqn, none);
+		arith(vpc, VEC, DP4, cdst, mask, htmp, ceqn, none);
 	}
 
-	vp->insns[vp->nr_insns - 1].data[3] |= NV40_VP_INST_LAST;
+	vp->insns[vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST;
 	vp->translated = TRUE;
 out_err:
 	tgsi_parse_free(&parse);
diff --git a/src/gallium/drivers/nvfx/nvfx_shader.h b/src/gallium/drivers/nvfx/nvfx_shader.h
new file mode 100644
index 00000000000..191131a40a1
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nvfx_shader.h
@@ -0,0 +1,407 @@
+#ifndef __NVFX_SHADER_H__
+#define __NVFX_SHADER_H__
+
+/* this will resolve to either the NV30 or the NV40 version
+ * depending on the current hardware */
+/* unusual, but very fast and compact method */
+#define NVFX_VP(c) ((NV30_VP_##c) + (nvfx->is_nv4x & ((NV40_VP_##c) - (NV30_VP_##c))))
+
+#define NVFX_VP_INST_SLOT_VEC 0
+#define NVFX_VP_INST_SLOT_SCA 1
+
+#define NVFX_VP_INST_COND_FL  0 /* guess */
+#define NVFX_VP_INST_COND_LT  1
+#define NVFX_VP_INST_COND_EQ  2
+#define NVFX_VP_INST_COND_LE  3
+#define NVFX_VP_INST_COND_GT  4
+#define NVFX_VP_INST_COND_NE  5
+#define NVFX_VP_INST_COND_GE  6
+#define NVFX_VP_INST_COND_TR  7 /* guess */
+
+#define NVFX_VP_INST_IN_POS  0    /* These seem to match the bindings specified in */
+#define NVFX_VP_INST_IN_WEIGHT  1    /* the ARB_v_p spec (2.14.3.1) */
+#define NVFX_VP_INST_IN_NORMAL  2
+#define NVFX_VP_INST_IN_COL0  3    /* Should probably confirm them all though */
+#define NVFX_VP_INST_IN_COL1  4
+#define NVFX_VP_INST_IN_FOGC  5
+#define NVFX_VP_INST_IN_TC0  8
+#define NVFX_VP_INST_IN_TC(n)  (8+n)
+
+#define NVFX_VP_INST_SCA_OP_NOP 0x00
+#define NVFX_VP_INST_SCA_OP_MOV 0x01
+#define NVFX_VP_INST_SCA_OP_RCP 0x02
+#define NVFX_VP_INST_SCA_OP_RCC 0x03
+#define NVFX_VP_INST_SCA_OP_RSQ 0x04
+#define NVFX_VP_INST_SCA_OP_EXP 0x05
+#define NVFX_VP_INST_SCA_OP_LOG 0x06
+#define NVFX_VP_INST_SCA_OP_LIT 0x07
+#define NVFX_VP_INST_SCA_OP_BRA 0x09
+#define NVFX_VP_INST_SCA_OP_CAL 0x0B
+#define NVFX_VP_INST_SCA_OP_RET 0x0C
+#define NVFX_VP_INST_SCA_OP_LG2 0x0D
+#define NVFX_VP_INST_SCA_OP_EX2 0x0E
+#define NVFX_VP_INST_SCA_OP_SIN 0x0F
+#define NVFX_VP_INST_SCA_OP_COS 0x10
+
+#define NV40_VP_INST_SCA_OP_PUSHA 0x13
+#define NV40_VP_INST_SCA_OP_POPA 0x14
+
+#define NVFX_VP_INST_VEC_OP_NOP 0x00
+#define NVFX_VP_INST_VEC_OP_MOV 0x01
+#define NVFX_VP_INST_VEC_OP_MUL 0x02
+#define NVFX_VP_INST_VEC_OP_ADD 0x03
+#define NVFX_VP_INST_VEC_OP_MAD 0x04
+#define NVFX_VP_INST_VEC_OP_DP3 0x05
+#define NVFX_VP_INST_VEC_OP_DPH 0x06
+#define NVFX_VP_INST_VEC_OP_DP4 0x07
+#define NVFX_VP_INST_VEC_OP_DST 0x08
+#define NVFX_VP_INST_VEC_OP_MIN 0x09
+#define NVFX_VP_INST_VEC_OP_MAX 0x0A
+#define NVFX_VP_INST_VEC_OP_SLT 0x0B
+#define NVFX_VP_INST_VEC_OP_SGE 0x0C
+#define NVFX_VP_INST_VEC_OP_ARL 0x0D
+#define NVFX_VP_INST_VEC_OP_FRC 0x0E
+#define NVFX_VP_INST_VEC_OP_FLR 0x0F
+#define NVFX_VP_INST_VEC_OP_SEQ 0x10
+#define NVFX_VP_INST_VEC_OP_SFL 0x11
+#define NVFX_VP_INST_VEC_OP_SGT 0x12
+#define NVFX_VP_INST_VEC_OP_SLE 0x13
+#define NVFX_VP_INST_VEC_OP_SNE 0x14
+#define NVFX_VP_INST_VEC_OP_STR 0x15
+#define NVFX_VP_INST_VEC_OP_SSG 0x16
+#define NVFX_VP_INST_VEC_OP_ARR 0x17
+#define NVFX_VP_INST_VEC_OP_ARA 0x18
+
+#define NV40_VP_INST_VEC_OP_TXL 0x19
+
+/* DWORD 3 */
+#define NVFX_VP_INST_LAST                           (1 << 0)
+
+/*
+ * Each fragment program opcode appears to be comprised of 4 32-bit values.
+ *
+ *   0 - Opcode, output reg/mask, ATTRIB source
+ *   1 - Source 0
+ *   2 - Source 1
+ *   3 - Source 2
+ *
+ * There appears to be no special difference between result regs and temp regs.
+ *     result.color == R0.xyzw
+ *     result.depth == R1.z
+ * When the fragprog contains instructions to write depth, NV30_TCL_PRIMITIVE_3D_UNK1D78=0
+ * otherwise it is set to 1.
+ *
+ * Constants are inserted directly after the instruction that uses them.
+ *
+ * It appears that it's not possible to use two input registers in one
+ * instruction as the input sourcing is done in the instruction dword
+ * and not the source selection dwords.  As such instructions such as:
+ *
+ *     ADD result.color, fragment.color, fragment.texcoord[0];
+ *
+ * must be split into two MOV's and then an ADD (nvidia does this) but
+ * I'm not sure why it's not just one MOV and then source the second input
+ * in the ADD instruction..
+ *
+ * Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary
+ * negation requires multiplication with a const.
+ *
+ * Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO/SWIZZLE_ONE
+ * The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as SWIZZLE_ZERO
+ * is implemented simply by not writing to the relevant components of the destination.
+ *
+ * Conditional execution
+ *   TODO
+ *
+ * Non-native instructions:
+ *   LIT
+ *   LRP - MAD+MAD
+ *   SUB - ADD, negate second source
+ *   RSQ - LG2 + EX2
+ *   POW - LG2 + MUL + EX2
+ *   SCS - COS + SIN
+ *   XPD
+ *
+ * NV40 Looping
+ *   Loops appear to be fairly expensive on NV40 at least, the proprietary
+ *   driver goes to a lot of effort to avoid using the native looping
+ *   instructions.  If the total number of *executed* instructions between
+ *   REP/ENDREP or LOOP/ENDLOOP is <=500, the driver will unroll the loop.
+ *   The maximum loop count is 255.
+ *
+ */
+
+//== Opcode / Destination selection ==
+#define NVFX_FP_OP_PROGRAM_END          (1 << 0)
+#define NVFX_FP_OP_OUT_REG_SHIFT        1
+#define NV30_FP_OP_OUT_REG_MASK          (31 << 1)  /* uncertain */
+#define NV40_FP_OP_OUT_REG_MASK          (63 << 1)
+/* Needs to be set when writing outputs to get expected result.. */
+#define NVFX_FP_OP_OUT_REG_HALF          (1 << 7)
+#define NVFX_FP_OP_COND_WRITE_ENABLE        (1 << 8)
+#define NVFX_FP_OP_OUTMASK_SHIFT        9
+#define NVFX_FP_OP_OUTMASK_MASK          (0xF << 9)
+#  define NVFX_FP_OP_OUT_X  (1<<9)
+#  define NVFX_FP_OP_OUT_Y  (1<<10)
+#  define NVFX_FP_OP_OUT_Z  (1<<11)
+#  define NVFX_FP_OP_OUT_W  (1<<12)
+/* Uncertain about these, especially the input_src values.. it's possible that
+ * they can be dynamically changed.
+ */
+#define NVFX_FP_OP_INPUT_SRC_SHIFT        13
+#define NVFX_FP_OP_INPUT_SRC_MASK        (15 << 13)
+#  define NVFX_FP_OP_INPUT_SRC_POSITION  0x0
+#  define NVFX_FP_OP_INPUT_SRC_COL0  0x1
+#  define NVFX_FP_OP_INPUT_SRC_COL1  0x2
+#  define NVFX_FP_OP_INPUT_SRC_FOGC  0x3
+#  define NVFX_FP_OP_INPUT_SRC_TC0    0x4
+#  define NVFX_FP_OP_INPUT_SRC_TC(n)  (0x4 + n)
+#  define NV40_FP_OP_INPUT_SRC_FACING  0xE
+#define NVFX_FP_OP_TEX_UNIT_SHIFT        17
+#define NVFX_FP_OP_TEX_UNIT_MASK        (0xF << 17) /* guess */
+#define NVFX_FP_OP_PRECISION_SHIFT        22
+#define NVFX_FP_OP_PRECISION_MASK        (3 << 22)
+#   define NVFX_FP_PRECISION_FP32  0
+#   define NVFX_FP_PRECISION_FP16  1
+#   define NVFX_FP_PRECISION_FX12  2
+#define NVFX_FP_OP_OPCODE_SHIFT          24
+#define NVFX_FP_OP_OPCODE_MASK          (0x3F << 24)
+/* NV30/NV40 fragment program opcodes */
+#define NVFX_FP_OP_OPCODE_NOP 0x00
+#define NVFX_FP_OP_OPCODE_MOV 0x01
+#define NVFX_FP_OP_OPCODE_MUL 0x02
+#define NVFX_FP_OP_OPCODE_ADD 0x03
+#define NVFX_FP_OP_OPCODE_MAD 0x04
+#define NVFX_FP_OP_OPCODE_DP3 0x05
+#define NVFX_FP_OP_OPCODE_DP4 0x06
+#define NVFX_FP_OP_OPCODE_DST 0x07
+#define NVFX_FP_OP_OPCODE_MIN 0x08
+#define NVFX_FP_OP_OPCODE_MAX 0x09
+#define NVFX_FP_OP_OPCODE_SLT 0x0A
+#define NVFX_FP_OP_OPCODE_SGE 0x0B
+#define NVFX_FP_OP_OPCODE_SLE 0x0C
+#define NVFX_FP_OP_OPCODE_SGT 0x0D
+#define NVFX_FP_OP_OPCODE_SNE 0x0E
+#define NVFX_FP_OP_OPCODE_SEQ 0x0F
+#define NVFX_FP_OP_OPCODE_FRC 0x10
+#define NVFX_FP_OP_OPCODE_FLR 0x11
+#define NVFX_FP_OP_OPCODE_KIL 0x12
+#define NVFX_FP_OP_OPCODE_PK4B 0x13
+#define NVFX_FP_OP_OPCODE_UP4B 0x14
+#define NVFX_FP_OP_OPCODE_DDX 0x15 /* can only write XY */
+#define NVFX_FP_OP_OPCODE_DDY 0x16 /* can only write XY */
+#define NVFX_FP_OP_OPCODE_TEX 0x17
+#define NVFX_FP_OP_OPCODE_TXP 0x18
+#define NVFX_FP_OP_OPCODE_TXD 0x19
+#define NVFX_FP_OP_OPCODE_RCP 0x1A
+#define NVFX_FP_OP_OPCODE_EX2 0x1C
+#define NVFX_FP_OP_OPCODE_LG2 0x1D
+#define NVFX_FP_OP_OPCODE_STR 0x20
+#define NVFX_FP_OP_OPCODE_SFL 0x21
+#define NVFX_FP_OP_OPCODE_COS 0x22
+#define NVFX_FP_OP_OPCODE_SIN 0x23
+#define NVFX_FP_OP_OPCODE_PK2H 0x24
+#define NVFX_FP_OP_OPCODE_UP2H 0x25
+#define NVFX_FP_OP_OPCODE_PK4UB 0x27
+#define NVFX_FP_OP_OPCODE_UP4UB 0x28
+#define NVFX_FP_OP_OPCODE_PK2US 0x29
+#define NVFX_FP_OP_OPCODE_UP2US 0x2A
+#define NVFX_FP_OP_OPCODE_DP2A 0x2E
+#define NVFX_FP_OP_OPCODE_TXB 0x31
+#define NVFX_FP_OP_OPCODE_DIV 0x3A
+
+/* NV30 only fragment program opcodes */
+#define NVFX_FP_OP_OPCODE_RSQ_NV30 0x1B
+#define NVFX_FP_OP_OPCODE_LIT_NV30 0x1E
+#define NVFX_FP_OP_OPCODE_LRP_NV30 0x1F
+#define NVFX_FP_OP_OPCODE_POW_NV30 0x26
+#define NVFX_FP_OP_OPCODE_RFL_NV30 0x36
+
+/* NV40 only fragment program opcodes */
+#define NVFX_FP_OP_OPCODE_TXL_NV40 0x31
+/* The use of these instructions appears to be indicated by bit 31 of DWORD 2.*/
+#define NV40_FP_OP_BRA_OPCODE_BRK                                    0x0
+#define NV40_FP_OP_BRA_OPCODE_CAL                                    0x1
+#define NV40_FP_OP_BRA_OPCODE_IF                                     0x2
+#define NV40_FP_OP_BRA_OPCODE_LOOP                                   0x3
+#define NV40_FP_OP_BRA_OPCODE_REP                                    0x4
+#define NV40_FP_OP_BRA_OPCODE_RET                                    0x5
+
+#define NVFX_FP_OP_OUT_SAT          (1 << 31)
+
+/* high order bits of SRC0 */
+#define NVFX_FP_OP_OUT_ABS          (1 << 29)
+#define NVFX_FP_OP_COND_SWZ_W_SHIFT        27
+#define NVFX_FP_OP_COND_SWZ_W_MASK        (3 << 27)
+#define NVFX_FP_OP_COND_SWZ_Z_SHIFT        25
+#define NVFX_FP_OP_COND_SWZ_Z_MASK        (3 << 25)
+#define NVFX_FP_OP_COND_SWZ_Y_SHIFT        23
+#define NVFX_FP_OP_COND_SWZ_Y_MASK        (3 << 23)
+#define NVFX_FP_OP_COND_SWZ_X_SHIFT        21
+#define NVFX_FP_OP_COND_SWZ_X_MASK        (3 << 21)
+#define NVFX_FP_OP_COND_SWZ_ALL_SHIFT        21
+#define NVFX_FP_OP_COND_SWZ_ALL_MASK        (0xFF << 21)
+#define NVFX_FP_OP_COND_SHIFT          18
+#define NVFX_FP_OP_COND_MASK          (0x07 << 18)
+#  define NVFX_FP_OP_COND_FL  0
+#  define NVFX_FP_OP_COND_LT  1
+#  define NVFX_FP_OP_COND_EQ  2
+#  define NVFX_FP_OP_COND_LE  3
+#  define NVFX_FP_OP_COND_GT  4
+#  define NVFX_FP_OP_COND_NE  5
+#  define NVFX_FP_OP_COND_GE  6
+#  define NVFX_FP_OP_COND_TR  7
+
+/* high order bits of SRC1 */
+#define NV40_FP_OP_OPCODE_IS_BRANCH                                      (1<<31)
+#define NVFX_FP_OP_DST_SCALE_SHIFT        28
+#define NVFX_FP_OP_DST_SCALE_MASK        (3 << 28)
+#define NVFX_FP_OP_DST_SCALE_1X                                                0
+#define NVFX_FP_OP_DST_SCALE_2X                                                1
+#define NVFX_FP_OP_DST_SCALE_4X                                                2
+#define NVFX_FP_OP_DST_SCALE_8X                                                3
+#define NVFX_FP_OP_DST_SCALE_INV_2X                                            5
+#define NVFX_FP_OP_DST_SCALE_INV_4X                                            6
+#define NVFX_FP_OP_DST_SCALE_INV_8X                                            7
+
+/* SRC1 LOOP */
+#define NV40_FP_OP_LOOP_INCR_SHIFT                                            19
+#define NV40_FP_OP_LOOP_INCR_MASK                                   (0xFF << 19)
+#define NV40_FP_OP_LOOP_INDEX_SHIFT                                           10
+#define NV40_FP_OP_LOOP_INDEX_MASK                                  (0xFF << 10)
+#define NV40_FP_OP_LOOP_COUNT_SHIFT                                            2
+#define NV40_FP_OP_LOOP_COUNT_MASK                                   (0xFF << 2)
+
+/* SRC1 IF */
+#define NV40_FP_OP_ELSE_ID_SHIFT                                               2
+#define NV40_FP_OP_ELSE_ID_MASK                                      (0xFF << 2)
+
+/* SRC1 CAL */
+#define NV40_FP_OP_IADDR_SHIFT                                                 2
+#define NV40_FP_OP_IADDR_MASK                                        (0xFF << 2)
+
+/* SRC1 REP
+ *   I have no idea why there are 3 count values here..  but they
+ *   have always been filled with the same value in my tests so
+ *   far..
+ */
+#define NV40_FP_OP_REP_COUNT1_SHIFT                                            2
+#define NV40_FP_OP_REP_COUNT1_MASK                                   (0xFF << 2)
+#define NV40_FP_OP_REP_COUNT2_SHIFT                                           10
+#define NV40_FP_OP_REP_COUNT2_MASK                                  (0xFF << 10)
+#define NV40_FP_OP_REP_COUNT3_SHIFT                                           19
+#define NV40_FP_OP_REP_COUNT3_MASK                                  (0xFF << 19)
+
+/* SRC2 REP/IF */
+#define NV40_FP_OP_END_ID_SHIFT                                                2
+#define NV40_FP_OP_END_ID_MASK                                       (0xFF << 2)
+
+/* high order bits of SRC2 */
+#define NVFX_FP_OP_INDEX_INPUT          (1 << 30)
+#define NV40_FP_OP_ADDR_INDEX_SHIFT        19
+#define NV40_FP_OP_ADDR_INDEX_MASK        (0xF << 19)
+
+//== Register selection ==
+#define NVFX_FP_REG_TYPE_SHIFT           0
+#define NVFX_FP_REG_TYPE_MASK           (3 << 0)
+#  define NVFX_FP_REG_TYPE_TEMP   0
+#  define NVFX_FP_REG_TYPE_INPUT  1
+#  define NVFX_FP_REG_TYPE_CONST  2
+#define NVFX_FP_REG_SRC_SHIFT            2
+#define NV30_FP_REG_SRC_MASK              (31 << 2)
+#define NV40_FP_REG_SRC_MASK              (63 << 2)
+#define NVFX_FP_REG_SRC_HALF            (1 << 8)
+#define NVFX_FP_REG_SWZ_ALL_SHIFT        9
+#define NVFX_FP_REG_SWZ_ALL_MASK        (255 << 9)
+#define NVFX_FP_REG_SWZ_X_SHIFT          9
+#define NVFX_FP_REG_SWZ_X_MASK          (3 << 9)
+#define NVFX_FP_REG_SWZ_Y_SHIFT          11
+#define NVFX_FP_REG_SWZ_Y_MASK          (3 << 11)
+#define NVFX_FP_REG_SWZ_Z_SHIFT          13
+#define NVFX_FP_REG_SWZ_Z_MASK          (3 << 13)
+#define NVFX_FP_REG_SWZ_W_SHIFT          15
+#define NVFX_FP_REG_SWZ_W_MASK          (3 << 15)
+#  define NVFX_FP_SWIZZLE_X  0
+#  define NVFX_FP_SWIZZLE_Y  1
+#  define NVFX_FP_SWIZZLE_Z  2
+#  define NVFX_FP_SWIZZLE_W  3
+#define NVFX_FP_REG_NEGATE          (1 << 17)
+
+#ifndef NVFX_SHADER_NO_FUCKEDNESS
+#define NVFXSR_NONE	0
+#define NVFXSR_OUTPUT	1
+#define NVFXSR_INPUT	2
+#define NVFXSR_TEMP	3
+#define NVFXSR_CONST	4
+
+struct nvfx_sreg {
+	int type;
+	int index;
+
+	int dst_scale;
+
+	int negate;
+	int abs;
+	int swz[4];
+
+	int cc_update;
+	int cc_update_reg;
+	int cc_test;
+	int cc_test_reg;
+	int cc_swz[4];
+};
+
+static INLINE struct nvfx_sreg
+nvfx_sr(int type, int index)
+{
+	struct nvfx_sreg temp = {
+		.type = type,
+		.index = index,
+		.dst_scale = DEF_SCALE,
+		.abs = 0,
+		.negate = 0,
+		.swz = { 0, 1, 2, 3 },
+		.cc_update = 0,
+		.cc_update_reg = 0,
+		.cc_test = DEF_CTEST,
+		.cc_test_reg = 0,
+		.cc_swz = { 0, 1, 2, 3 },
+	};
+	return temp;
+}
+
+static INLINE struct nvfx_sreg
+nvfx_sr_swz(struct nvfx_sreg src, int x, int y, int z, int w)
+{
+	struct nvfx_sreg dst = src;
+
+	dst.swz[SWZ_X] = src.swz[x];
+	dst.swz[SWZ_Y] = src.swz[y];
+	dst.swz[SWZ_Z] = src.swz[z];
+	dst.swz[SWZ_W] = src.swz[w];
+	return dst;
+}
+
+static INLINE struct nvfx_sreg
+nvfx_sr_neg(struct nvfx_sreg src)
+{
+	src.negate = !src.negate;
+	return src;
+}
+
+static INLINE struct nvfx_sreg
+nvfx_sr_abs(struct nvfx_sreg src)
+{
+	src.abs = 1;
+	return src;
+}
+
+static INLINE struct nvfx_sreg
+nvfx_sr_scale(struct nvfx_sreg src, int scale)
+{
+	src.dst_scale = scale;
+	return src;
+}
+#endif
+
+#endif