nv30, nv40: non-trivially partially unify nv[34]0_shader.h
[mesa.git] / src / gallium / drivers / nv30 / nv30_fragprog.c
index 0ce702d6f8449d6ca8399d03ce0d7f0943df7b48..4ce16b8f0e39af5859f06955a834902a401454cc 100644 (file)
@@ -1,7 +1,7 @@
 #include "pipe/p_context.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_state.h"
-#include "pipe/p_inlines.h"
+#include "util/u_inlines.h"
 
 #include "pipe/p_shader_tokens.h"
 #include "tgsi/tgsi_dump.h"
 #define MASK_Z 4
 #define MASK_W 8
 #define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W)
-#define DEF_SCALE NV30_FP_OP_DST_SCALE_1X
-#define DEF_CTEST NV30_FP_OP_COND_TR
-#include "nv30_shader.h"
+#define DEF_SCALE NVFX_FP_OP_DST_SCALE_1X
+#define DEF_CTEST NVFX_FP_OP_COND_TR
+#include "nvfx_shader.h"
 
-#define swz(s,x,y,z,w) nv30_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
-#define neg(s) nv30_sr_neg((s))
-#define abs(s) nv30_sr_abs((s))
-#define scale(s,v) nv30_sr_scale((s), NV30_FP_OP_DST_SCALE_##v)
+#define swz(s,x,y,z,w) nvfx_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
+#define neg(s) nvfx_sr_neg((s))
+#define abs(s) nvfx_sr_abs((s))
+#define scale(s,v) nvfx_sr_scale((s), NVFX_FP_OP_DST_SCALE_##v)
 
 #define MAX_CONSTS 128
 #define MAX_IMM 32
 struct nv30_fpc {
-       struct nv30_fragment_program *fp;
+       struct nvfx_fragment_program *fp;
 
        uint attrib_map[PIPE_MAX_SHADER_INPUTS];
 
@@ -50,21 +50,21 @@ struct nv30_fpc {
        } consts[MAX_CONSTS];
        int nr_consts;
 
-       struct nv30_sreg imm[MAX_IMM];
+       struct nvfx_sreg imm[MAX_IMM];
        unsigned nr_imm;
 };
 
-static INLINE struct nv30_sreg
+static INLINE struct nvfx_sreg
 temp(struct nv30_fpc *fpc)
 {
        int idx;
 
        idx  = fpc->temp_temp_count++;
        idx += fpc->high_temp + 1;
-       return nv30_sr(NV30SR_TEMP, idx);
+       return nvfx_sr(NVFXSR_TEMP, idx);
 }
 
-static INLINE struct nv30_sreg
+static INLINE struct nvfx_sreg
 constant(struct nv30_fpc *fpc, int pipe, float vals[4])
 {
        int idx;
@@ -76,49 +76,49 @@ constant(struct nv30_fpc *fpc, int pipe, float vals[4])
        fpc->consts[idx].pipe = pipe;
        if (pipe == -1)
                memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float));
-       return nv30_sr(NV30SR_CONST, idx);
+       return nvfx_sr(NVFXSR_CONST, idx);
 }
 
 #define arith(cc,s,o,d,m,s0,s1,s2) \
-       nv30_fp_arith((cc), (s), NV30_FP_OP_OPCODE_##o, \
+       nv30_fp_arith((cc), (s), NVFX_FP_OP_OPCODE_##o, \
                        (d), (m), (s0), (s1), (s2))
 #define tex(cc,s,o,u,d,m,s0,s1,s2) \
-       nv30_fp_tex((cc), (s), NV30_FP_OP_OPCODE_##o, (u), \
+       nv30_fp_tex((cc), (s), NVFX_FP_OP_OPCODE_##o, (u), \
                    (d), (m), (s0), none, none)
 
 static void
 grow_insns(struct nv30_fpc *fpc, int size)
 {
-       struct nv30_fragment_program *fp = fpc->fp;
+       struct nvfx_fragment_program *fp = fpc->fp;
 
        fp->insn_len += size;
        fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len);
 }
 
 static void
-emit_src(struct nv30_fpc *fpc, int pos, struct nv30_sreg src)
+emit_src(struct nv30_fpc *fpc, int pos, struct nvfx_sreg src)
 {
-       struct nv30_fragment_program *fp = fpc->fp;
+       struct nvfx_fragment_program *fp = fpc->fp;
        uint32_t *hw = &fp->insn[fpc->inst_offset];
        uint32_t sr = 0;
 
        switch (src.type) {
-       case NV30SR_INPUT:
-               sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT);
-               hw[0] |= (src.index << NV30_FP_OP_INPUT_SRC_SHIFT);
+       case NVFXSR_INPUT:
+               sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT);
+               hw[0] |= (src.index << NVFX_FP_OP_INPUT_SRC_SHIFT);
                break;
-       case NV30SR_OUTPUT:
-               sr |= NV30_FP_REG_SRC_HALF;
+       case NVFXSR_OUTPUT:
+               sr |= NVFX_FP_REG_SRC_HALF;
                /* fall-through */
-       case NV30SR_TEMP:
-               sr |= (NV30_FP_REG_TYPE_TEMP << NV30_FP_REG_TYPE_SHIFT);
-               sr |= (src.index << NV30_FP_REG_SRC_SHIFT);
+       case NVFXSR_TEMP:
+               sr |= (NVFX_FP_REG_TYPE_TEMP << NVFX_FP_REG_TYPE_SHIFT);
+               sr |= (src.index << NVFX_FP_REG_SRC_SHIFT);
                break;
-       case NV30SR_CONST:
+       case NVFXSR_CONST:
                grow_insns(fpc, 4);
                hw = &fp->insn[fpc->inst_offset];
                if (fpc->consts[src.index].pipe >= 0) {
-                       struct nv30_fragment_program_data *fpd;
+                       struct nvfx_fragment_program_data *fpd;
 
                        fp->consts = realloc(fp->consts, ++fp->nr_consts *
                                             sizeof(*fpd));
@@ -132,63 +132,63 @@ emit_src(struct nv30_fpc *fpc, int pos, struct nv30_sreg src)
                                sizeof(uint32_t) * 4);
                }
 
-               sr |= (NV30_FP_REG_TYPE_CONST << NV30_FP_REG_TYPE_SHIFT);
+               sr |= (NVFX_FP_REG_TYPE_CONST << NVFX_FP_REG_TYPE_SHIFT);
                break;
-       case NV30SR_NONE:
-               sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT);
+       case NVFXSR_NONE:
+               sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT);
                break;
        default:
                assert(0);
        }
 
        if (src.negate)
-               sr |= NV30_FP_REG_NEGATE;
+               sr |= NVFX_FP_REG_NEGATE;
 
        if (src.abs)
                hw[1] |= (1 << (29 + pos));
 
-       sr |= ((src.swz[0] << NV30_FP_REG_SWZ_X_SHIFT) |
-              (src.swz[1] << NV30_FP_REG_SWZ_Y_SHIFT) |
-              (src.swz[2] << NV30_FP_REG_SWZ_Z_SHIFT) |
-              (src.swz[3] << NV30_FP_REG_SWZ_W_SHIFT));
+       sr |= ((src.swz[0] << NVFX_FP_REG_SWZ_X_SHIFT) |
+              (src.swz[1] << NVFX_FP_REG_SWZ_Y_SHIFT) |
+              (src.swz[2] << NVFX_FP_REG_SWZ_Z_SHIFT) |
+              (src.swz[3] << NVFX_FP_REG_SWZ_W_SHIFT));
 
        hw[pos + 1] |= sr;
 }
 
 static void
-emit_dst(struct nv30_fpc *fpc, struct nv30_sreg dst)
+emit_dst(struct nv30_fpc *fpc, struct nvfx_sreg dst)
 {
-       struct nv30_fragment_program *fp = fpc->fp;
+       struct nvfx_fragment_program *fp = fpc->fp;
        uint32_t *hw = &fp->insn[fpc->inst_offset];
 
        switch (dst.type) {
-       case NV30SR_TEMP:
+       case NVFXSR_TEMP:
                if (fpc->num_regs < (dst.index + 1))
                        fpc->num_regs = dst.index + 1;
                break;
-       case NV30SR_OUTPUT:
+       case NVFXSR_OUTPUT:
                if (dst.index == 1) {
                        fp->fp_control |= 0xe;
                } else {
-                       hw[0] |= NV30_FP_OP_OUT_REG_HALF;
+                       hw[0] |= NVFX_FP_OP_OUT_REG_HALF;
                }
                break;
-       case NV30SR_NONE:
+       case NVFXSR_NONE:
                hw[0] |= (1 << 30);
                break;
        default:
                assert(0);
        }
 
-       hw[0] |= (dst.index << NV30_FP_OP_OUT_REG_SHIFT);
+       hw[0] |= (dst.index << NVFX_FP_OP_OUT_REG_SHIFT);
 }
 
 static void
 nv30_fp_arith(struct nv30_fpc *fpc, int sat, int op,
-             struct nv30_sreg dst, int mask,
-             struct nv30_sreg s0, struct nv30_sreg s1, struct nv30_sreg s2)
+             struct nvfx_sreg dst, int mask,
+             struct nvfx_sreg s0, struct nvfx_sreg s1, struct nvfx_sreg s2)
 {
-       struct nv30_fragment_program *fp = fpc->fp;
+       struct nvfx_fragment_program *fp = fpc->fp;
        uint32_t *hw;
 
        fpc->inst_offset = fp->insn_len;
@@ -196,22 +196,22 @@ nv30_fp_arith(struct nv30_fpc *fpc, int sat, int op,
        hw = &fp->insn[fpc->inst_offset];
        memset(hw, 0, sizeof(uint32_t) * 4);
 
-       if (op == NV30_FP_OP_OPCODE_KIL)
+       if (op == NVFX_FP_OP_OPCODE_KIL)
                fp->fp_control |= NV34TCL_FP_CONTROL_USES_KIL;
-       hw[0] |= (op << NV30_FP_OP_OPCODE_SHIFT);
-       hw[0] |= (mask << NV30_FP_OP_OUTMASK_SHIFT);
-       hw[2] |= (dst.dst_scale << NV30_FP_OP_DST_SCALE_SHIFT);
+       hw[0] |= (op << NVFX_FP_OP_OPCODE_SHIFT);
+       hw[0] |= (mask << NVFX_FP_OP_OUTMASK_SHIFT);
+       hw[2] |= (dst.dst_scale << NVFX_FP_OP_DST_SCALE_SHIFT);
 
        if (sat)
-               hw[0] |= NV30_FP_OP_OUT_SAT;
+               hw[0] |= NVFX_FP_OP_OUT_SAT;
 
        if (dst.cc_update)
-               hw[0] |= NV30_FP_OP_COND_WRITE_ENABLE;
-       hw[1] |= (dst.cc_test << NV30_FP_OP_COND_SHIFT);
-       hw[1] |= ((dst.cc_swz[0] << NV30_FP_OP_COND_SWZ_X_SHIFT) |
-                 (dst.cc_swz[1] << NV30_FP_OP_COND_SWZ_Y_SHIFT) |
-                 (dst.cc_swz[2] << NV30_FP_OP_COND_SWZ_Z_SHIFT) |
-                 (dst.cc_swz[3] << NV30_FP_OP_COND_SWZ_W_SHIFT));
+               hw[0] |= NVFX_FP_OP_COND_WRITE_ENABLE;
+       hw[1] |= (dst.cc_test << NVFX_FP_OP_COND_SHIFT);
+       hw[1] |= ((dst.cc_swz[0] << NVFX_FP_OP_COND_SWZ_X_SHIFT) |
+                 (dst.cc_swz[1] << NVFX_FP_OP_COND_SWZ_Y_SHIFT) |
+                 (dst.cc_swz[2] << NVFX_FP_OP_COND_SWZ_Z_SHIFT) |
+                 (dst.cc_swz[3] << NVFX_FP_OP_COND_SWZ_W_SHIFT));
 
        emit_dst(fpc, dst);
        emit_src(fpc, 0, s0);
@@ -221,36 +221,36 @@ nv30_fp_arith(struct nv30_fpc *fpc, int sat, int op,
 
 static void
 nv30_fp_tex(struct nv30_fpc *fpc, int sat, int op, int unit,
-           struct nv30_sreg dst, int mask,
-           struct nv30_sreg s0, struct nv30_sreg s1, struct nv30_sreg s2)
+           struct nvfx_sreg dst, int mask,
+           struct nvfx_sreg s0, struct nvfx_sreg s1, struct nvfx_sreg s2)
 {
-       struct nv30_fragment_program *fp = fpc->fp;
+       struct nvfx_fragment_program *fp = fpc->fp;
 
        nv30_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2);
 
-       fp->insn[fpc->inst_offset] |= (unit << NV30_FP_OP_TEX_UNIT_SHIFT);
+       fp->insn[fpc->inst_offset] |= (unit << NVFX_FP_OP_TEX_UNIT_SHIFT);
        fp->samplers |= (1 << unit);
 }
 
-static INLINE struct nv30_sreg
+static INLINE struct nvfx_sreg
 tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc)
 {
-       struct nv30_sreg src;
+       struct nvfx_sreg src;
 
-       switch (fsrc->SrcRegister.File) {
+       switch (fsrc->Register.File) {
        case TGSI_FILE_INPUT:
-               src = nv30_sr(NV30SR_INPUT,
-                             fpc->attrib_map[fsrc->SrcRegister.Index]);
+               src = nvfx_sr(NVFXSR_INPUT,
+                             fpc->attrib_map[fsrc->Register.Index]);
                break;
        case TGSI_FILE_CONSTANT:
-               src = constant(fpc, fsrc->SrcRegister.Index, NULL);
+               src = constant(fpc, fsrc->Register.Index, NULL);
                break;
        case TGSI_FILE_IMMEDIATE:
-               assert(fsrc->SrcRegister.Index < fpc->nr_imm);
-               src = fpc->imm[fsrc->SrcRegister.Index];
+               assert(fsrc->Register.Index < fpc->nr_imm);
+               src = fpc->imm[fsrc->Register.Index];
                break;
        case TGSI_FILE_TEMPORARY:
-               src = nv30_sr(NV30SR_TEMP, fsrc->SrcRegister.Index + 1);
+               src = nvfx_sr(NVFXSR_TEMP, fsrc->Register.Index + 1);
                if (fpc->high_temp < src.index)
                        fpc->high_temp = src.index;
                break;
@@ -258,46 +258,46 @@ tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc)
         * Luckily fragprog results are just temp regs..
         */
        case TGSI_FILE_OUTPUT:
-               if (fsrc->SrcRegister.Index == fpc->colour_id)
-                       return nv30_sr(NV30SR_OUTPUT, 0);
+               if (fsrc->Register.Index == fpc->colour_id)
+                       return nvfx_sr(NVFXSR_OUTPUT, 0);
                else
-                       return nv30_sr(NV30SR_OUTPUT, 1);
+                       return nvfx_sr(NVFXSR_OUTPUT, 1);
                break;
        default:
                NOUVEAU_ERR("bad src file\n");
                break;
        }
 
-       src.abs = fsrc->SrcRegisterExtMod.Absolute;
-       src.negate = fsrc->SrcRegister.Negate;
-       src.swz[0] = fsrc->SrcRegister.SwizzleX;
-       src.swz[1] = fsrc->SrcRegister.SwizzleY;
-       src.swz[2] = fsrc->SrcRegister.SwizzleZ;
-       src.swz[3] = fsrc->SrcRegister.SwizzleW;
+       src.abs = fsrc->Register.Absolute;
+       src.negate = fsrc->Register.Negate;
+       src.swz[0] = fsrc->Register.SwizzleX;
+       src.swz[1] = fsrc->Register.SwizzleY;
+       src.swz[2] = fsrc->Register.SwizzleZ;
+       src.swz[3] = fsrc->Register.SwizzleW;
        return src;
 }
 
-static INLINE struct nv30_sreg
+static INLINE struct nvfx_sreg
 tgsi_dst(struct nv30_fpc *fpc, const struct tgsi_full_dst_register *fdst) {
        int idx;
 
-       switch (fdst->DstRegister.File) {
+       switch (fdst->Register.File) {
        case TGSI_FILE_OUTPUT:
-               if (fdst->DstRegister.Index == fpc->colour_id)
-                       return nv30_sr(NV30SR_OUTPUT, 0);
+               if (fdst->Register.Index == fpc->colour_id)
+                       return nvfx_sr(NVFXSR_OUTPUT, 0);
                else
-                       return nv30_sr(NV30SR_OUTPUT, 1);
+                       return nvfx_sr(NVFXSR_OUTPUT, 1);
                break;
        case TGSI_FILE_TEMPORARY:
-               idx = fdst->DstRegister.Index + 1;
+               idx = fdst->Register.Index + 1;
                if (fpc->high_temp < idx)
                        fpc->high_temp = idx;
-               return nv30_sr(NV30SR_TEMP, idx);
+               return nvfx_sr(NVFXSR_TEMP, idx);
        case TGSI_FILE_NULL:
-               return nv30_sr(NV30SR_NONE, 0);
+               return nvfx_sr(NVFXSR_NONE, 0);
        default:
-               NOUVEAU_ERR("bad dst file %d\n", fdst->DstRegister.File);
-               return nv30_sr(NV30SR_NONE, 0);
+               NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File);
+               return nvfx_sr(NVFXSR_NONE, 0);
        }
 }
 
@@ -315,10 +315,10 @@ tgsi_mask(uint tgsi)
 
 static boolean
 src_native_swz(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc,
-              struct nv30_sreg *src)
+              struct nvfx_sreg *src)
 {
-       const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0);
-       struct nv30_sreg tgsi = tgsi_src(fpc, fsrc);
+       const struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0);
+       struct nvfx_sreg tgsi = tgsi_src(fpc, fsrc);
        uint mask = 0;
        uint c;
 
@@ -350,8 +350,8 @@ static boolean
 nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
                                const struct tgsi_full_instruction *finst)
 {
-       const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0);
-       struct nv30_sreg src[3], dst, tmp;
+       const struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0);
+       struct nvfx_sreg src[3], dst, tmp;
        int mask, sat, unit = 0;
        int ai = -1, ci = -1;
        int i;
@@ -363,8 +363,8 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
        for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
                const struct tgsi_full_src_register *fsrc;
 
-               fsrc = &finst->FullSrcRegisters[i];
-               if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) {
+               fsrc = &finst->Src[i];
+               if (fsrc->Register.File == TGSI_FILE_TEMPORARY) {
                        src[i] = tgsi_src(fpc, fsrc);
                }
        }
@@ -372,9 +372,9 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
        for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
                const struct tgsi_full_src_register *fsrc;
 
-               fsrc = &finst->FullSrcRegisters[i];
+               fsrc = &finst->Src[i];
 
-               switch (fsrc->SrcRegister.File) {
+               switch (fsrc->Register.File) {
                case TGSI_FILE_INPUT:
                case TGSI_FILE_CONSTANT:
                case TGSI_FILE_TEMPORARY:
@@ -385,14 +385,14 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
                        break;
                }
 
-               switch (fsrc->SrcRegister.File) {
+               switch (fsrc->Register.File) {
                case TGSI_FILE_INPUT:
-                       if (ai == -1 || ai == fsrc->SrcRegister.Index) {
-                               ai = fsrc->SrcRegister.Index;
+                       if (ai == -1 || ai == fsrc->Register.Index) {
+                               ai = fsrc->Register.Index;
                                src[i] = tgsi_src(fpc, fsrc);
                        } else {
                                NOUVEAU_MSG("extra src attr %d\n",
-                                        fsrc->SrcRegister.Index);
+                                        fsrc->Register.Index);
                                src[i] = temp(fpc);
                                arith(fpc, 0, MOV, src[i], MASK_ALL,
                                      tgsi_src(fpc, fsrc), none, none);
@@ -400,8 +400,8 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
                        break;
                case TGSI_FILE_CONSTANT:
                case TGSI_FILE_IMMEDIATE:
-                       if (ci == -1 || ci == fsrc->SrcRegister.Index) {
-                               ci = fsrc->SrcRegister.Index;
+                       if (ci == -1 || ci == fsrc->Register.Index) {
+                               ci = fsrc->Register.Index;
                                src[i] = tgsi_src(fpc, fsrc);
                        } else {
                                src[i] = temp(fpc);
@@ -413,7 +413,7 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
                        /* handled above */
                        break;
                case TGSI_FILE_SAMPLER:
-                       unit = fsrc->SrcRegister.Index;
+                       unit = fsrc->Register.Index;
                        break;
                case TGSI_FILE_OUTPUT:
                        break;
@@ -423,8 +423,8 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
                }
        }
 
-       dst  = tgsi_dst(fpc, &finst->FullDstRegisters[0]);
-       mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask);
+       dst  = tgsi_dst(fpc, &finst->Dst[0]);
+       mask = tgsi_mask(finst->Dst[0].Register.WriteMask);
        sat  = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE);
 
        switch (finst->Instruction.Opcode) {
@@ -435,11 +435,12 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
                arith(fpc, sat, ADD, dst, mask, src[0], src[1], none);
                break;
        case TGSI_OPCODE_CMP:
-               tmp = temp(fpc);
-               arith(fpc, sat, MOV, dst, mask, src[2], none, none);
+               tmp = nvfx_sr(NVFXSR_NONE, 0);
                tmp.cc_update = 1;
                arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none);
-               dst.cc_test = NV30_VP_INST_COND_LT;
+               dst.cc_test = NVFX_VP_INST_COND_GE;
+               arith(fpc, sat, MOV, dst, mask, src[2], none, none);
+               dst.cc_test = NVFX_VP_INST_COND_LT;
                arith(fpc, sat, MOV, dst, mask, src[1], none, none);
                break;
        case TGSI_OPCODE_COS:
@@ -473,10 +474,10 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
                arith(fpc, 0, KIL, none, 0, none, none, none);
                break;
        case TGSI_OPCODE_KIL:
-               dst = nv30_sr(NV30SR_NONE, 0);
+               dst = nvfx_sr(NVFXSR_NONE, 0);
                dst.cc_update = 1;
                arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none);
-               dst.cc_update = 0; dst.cc_test = NV30_FP_OP_COND_LT;
+               dst.cc_update = 0; dst.cc_test = NVFX_FP_OP_COND_LT;
                arith(fpc, 0, KIL, dst, 0, none, none, none);
                break;
        case TGSI_OPCODE_LG2:
@@ -484,7 +485,7 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
                break;
 //     case TGSI_OPCODE_LIT:
        case TGSI_OPCODE_LRP:
-               arith(fpc, sat, LRP, dst, mask, src[0], src[1], src[2]);
+               arith(fpc, sat, LRP_NV30, dst, mask, src[0], src[1], src[2]);
                break;
        case TGSI_OPCODE_MAD:
                arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]);
@@ -502,7 +503,7 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
                arith(fpc, sat, MUL, dst, mask, src[0], src[1], none);
                break;
        case TGSI_OPCODE_POW:
-               arith(fpc, sat, POW, dst, mask, src[0], src[1], none);
+               arith(fpc, sat, POW_NV30, dst, mask, src[0], src[1], none);
                break;
        case TGSI_OPCODE_RCP:
                arith(fpc, sat, RCP, dst, mask, src[0], none, none);
@@ -511,19 +512,34 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
                assert(0);
                break;
        case TGSI_OPCODE_RFL:
-               arith(fpc, 0, RFL, dst, mask, src[0], src[1], none);
+               arith(fpc, 0, RFL_NV30, dst, mask, src[0], src[1], none);
                break;
        case TGSI_OPCODE_RSQ:
-               arith(fpc, sat, RSQ, dst, mask, abs(swz(src[0], X, X, X, X)), none, none);
+               arith(fpc, sat, RSQ_NV30, dst, mask, abs(swz(src[0], X, X, X, X)), none, none);
                break;
        case TGSI_OPCODE_SCS:
-               if (mask & MASK_X) {
-                       arith(fpc, sat, COS, dst, MASK_X,
-                             swz(src[0], X, X, X, X), none, none);
+               /* avoid overwriting the source */
+               if(src[0].swz[SWZ_X] != SWZ_X)
+               {
+                       if (mask & MASK_X) {
+                               arith(fpc, sat, COS, dst, MASK_X,
+                                     swz(src[0], X, X, X, X), none, none);
+                       }
+                       if (mask & MASK_Y) {
+                               arith(fpc, sat, SIN, dst, MASK_Y,
+                                     swz(src[0], X, X, X, X), none, none);
+                       }
                }
-               if (mask & MASK_Y) {
-                       arith(fpc, sat, SIN, dst, MASK_Y,
-                             swz(src[0], X, X, X, X), none, none);
+               else
+               {
+                       if (mask & MASK_Y) {
+                               arith(fpc, sat, SIN, dst, MASK_Y,
+                                     swz(src[0], X, X, X, X), none, none);
+                       }
+                       if (mask & MASK_X) {
+                               arith(fpc, sat, COS, dst, MASK_X,
+                                     swz(src[0], X, X, X, X), none, none);
+                       }
                }
                break;
        case TGSI_OPCODE_SIN:
@@ -572,28 +588,28 @@ nv30_fragprog_parse_decl_attrib(struct nv30_fpc *fpc,
 {
        int hw;
 
-       switch (fdec->Semantic.SemanticName) {
+       switch (fdec->Semantic.Name) {
        case TGSI_SEMANTIC_POSITION:
-               hw = NV30_FP_OP_INPUT_SRC_POSITION;
+               hw = NVFX_FP_OP_INPUT_SRC_POSITION;
                break;
        case TGSI_SEMANTIC_COLOR:
-               if (fdec->Semantic.SemanticIndex == 0) {
-                       hw = NV30_FP_OP_INPUT_SRC_COL0;
+               if (fdec->Semantic.Index == 0) {
+                       hw = NVFX_FP_OP_INPUT_SRC_COL0;
                } else
-               if (fdec->Semantic.SemanticIndex == 1) {
-                       hw = NV30_FP_OP_INPUT_SRC_COL1;
+               if (fdec->Semantic.Index == 1) {
+                       hw = NVFX_FP_OP_INPUT_SRC_COL1;
                } else {
                        NOUVEAU_ERR("bad colour semantic index\n");
                        return FALSE;
                }
                break;
        case TGSI_SEMANTIC_FOG:
-               hw = NV30_FP_OP_INPUT_SRC_FOGC;
+               hw = NVFX_FP_OP_INPUT_SRC_FOGC;
                break;
        case TGSI_SEMANTIC_GENERIC:
-               if (fdec->Semantic.SemanticIndex <= 7) {
-                       hw = NV30_FP_OP_INPUT_SRC_TC(fdec->Semantic.
-                                                    SemanticIndex);
+               if (fdec->Semantic.Index <= 7) {
+                       hw = NVFX_FP_OP_INPUT_SRC_TC(fdec->Semantic.
+                                                    Index);
                } else {
                        NOUVEAU_ERR("bad generic semantic index\n");
                        return FALSE;
@@ -604,7 +620,7 @@ nv30_fragprog_parse_decl_attrib(struct nv30_fpc *fpc,
                return FALSE;
        }
 
-       fpc->attrib_map[fdec->DeclarationRange.First] = hw;
+       fpc->attrib_map[fdec->Range.First] = hw;
        return TRUE;
 }
 
@@ -612,12 +628,12 @@ static boolean
 nv30_fragprog_parse_decl_output(struct nv30_fpc *fpc,
                                const struct tgsi_full_declaration *fdec)
 {
-       switch (fdec->Semantic.SemanticName) {
+       switch (fdec->Semantic.Name) {
        case TGSI_SEMANTIC_POSITION:
-               fpc->depth_id = fdec->DeclarationRange.First;
+               fpc->depth_id = fdec->Range.First;
                break;
        case TGSI_SEMANTIC_COLOR:
-               fpc->colour_id = fdec->DeclarationRange.First;
+               fpc->colour_id = fdec->Range.First;
                break;
        default:
                NOUVEAU_ERR("bad output semantic\n");
@@ -653,9 +669,9 @@ nv30_fragprog_prepare(struct nv30_fpc *fpc)
                                        goto out_err;
                                break;
                        /*case TGSI_FILE_TEMPORARY:
-                               if (fdec->DeclarationRange.Last > high_temp) {
+                               if (fdec->Range.Last > high_temp) {
                                        high_temp =
-                                               fdec->DeclarationRange.Last;
+                                               fdec->Range.Last;
                                }
                                break;*/
                        default:
@@ -686,7 +702,7 @@ nv30_fragprog_prepare(struct nv30_fpc *fpc)
        tgsi_parse_free(&p);
 
        /*if (++high_temp) {
-               fpc->r_temp = CALLOC(high_temp, sizeof(struct nv30_sreg));
+               fpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_sreg));
                for (i = 0; i < high_temp; i++)
                        fpc->r_temp[i] = temp(fpc);
                fpc->r_temps_discard = 0;
@@ -702,8 +718,8 @@ out_err:
 }
 
 static void
-nv30_fragprog_translate(struct nv30_context *nv30,
-                       struct nv30_fragment_program *fp)
+nv30_fragprog_translate(struct nvfx_context *nvfx,
+                       struct nvfx_fragment_program *fp)
 {
        struct tgsi_parse_context parse;
        struct nv30_fpc *fpc = NULL;
@@ -743,7 +759,6 @@ nv30_fragprog_translate(struct nv30_context *nv30,
        }
 
        fp->fp_control |= (fpc->num_regs-1)/2;
-       fp->fp_reg_control = (1<<16)|0x4;
 
        /* Terminate final instruction */
        fp->insn[fpc->inst_offset] |= 0x00000001;
@@ -757,17 +772,16 @@ nv30_fragprog_translate(struct nv30_context *nv30,
        fp->insn[fpc->inst_offset + 3] = 0x00000000;
 
        fp->translated = TRUE;
-       fp->on_hw = FALSE;
 out_err:
        tgsi_parse_free(&parse);
        FREE(fpc);
 }
 
 static void
-nv30_fragprog_upload(struct nv30_context *nv30,
-                    struct nv30_fragment_program *fp)
+nv30_fragprog_upload(struct nvfx_context *nvfx,
+                    struct nvfx_fragment_program *fp)
 {
-       struct pipe_screen *pscreen = nv30->pipe.screen;
+       struct pipe_screen *pscreen = nvfx->pipe.screen;
        const uint32_t le = 1;
        uint32_t *map;
        int i;
@@ -798,12 +812,12 @@ nv30_fragprog_upload(struct nv30_context *nv30,
 }
 
 static boolean
-nv30_fragprog_validate(struct nv30_context *nv30)
+nv30_fragprog_validate(struct nvfx_context *nvfx)
 {
-       struct nv30_fragment_program *fp = nv30->fragprog;
+       struct nvfx_fragment_program *fp = nvfx->fragprog;
        struct pipe_buffer *constbuf =
-               nv30->constbuf[PIPE_SHADER_FRAGMENT];
-       struct pipe_screen *pscreen = nv30->pipe.screen;
+               nvfx->constbuf[PIPE_SHADER_FRAGMENT];
+       struct pipe_screen *pscreen = nvfx->pipe.screen;
        struct nouveau_stateobj *so;
        boolean new_consts = FALSE;
        int i;
@@ -811,27 +825,27 @@ nv30_fragprog_validate(struct nv30_context *nv30)
        if (fp->translated)
                goto update_constants;
 
-       /*nv30->fallback_swrast &= ~NV30_NEW_FRAGPROG;*/
-       nv30_fragprog_translate(nv30, fp);
+       /*nvfx->fallback_swrast &= ~NVFX_NEW_FRAGPROG;*/
+       nv30_fragprog_translate(nvfx, fp);
        if (!fp->translated) {
-               /*nv30->fallback_swrast |= NV30_NEW_FRAGPROG;*/
+               /*nvfx->fallback_swrast |= NVFX_NEW_FRAGPROG;*/
                return FALSE;
        }
 
        fp->buffer = pscreen->buffer_create(pscreen, 0x100, 0, fp->insn_len * 4);
-       nv30_fragprog_upload(nv30, fp);
+       nv30_fragprog_upload(nvfx, fp);
 
-       so = so_new(8, 1);
-       so_method(so, nv30->screen->rankine, NV34TCL_FP_ACTIVE_PROGRAM, 1);
+       so = so_new(4, 4, 1);
+       so_method(so, nvfx->screen->eng3d, NV34TCL_FP_ACTIVE_PROGRAM, 1);
        so_reloc (so, nouveau_bo(fp->buffer), 0, NOUVEAU_BO_VRAM |
                      NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW |
                      NOUVEAU_BO_OR, NV34TCL_FP_ACTIVE_PROGRAM_DMA0,
                      NV34TCL_FP_ACTIVE_PROGRAM_DMA1);
-       so_method(so, nv30->screen->rankine, NV34TCL_FP_CONTROL, 1);
+       so_method(so, nvfx->screen->eng3d, NV34TCL_FP_CONTROL, 1);
        so_data  (so, fp->fp_control);
-       so_method(so, nv30->screen->rankine, NV34TCL_FP_REG_CONTROL, 1);
-       so_data  (so, fp->fp_reg_control);
-       so_method(so, nv30->screen->rankine, NV34TCL_TX_UNITS_ENABLE, 1);
+       so_method(so, nvfx->screen->eng3d, NV34TCL_FP_REG_CONTROL, 1);
+       so_data  (so, (1<<16)|0x4);
+       so_method(so, nvfx->screen->eng3d, NV34TCL_TX_UNITS_ENABLE, 1);
        so_data  (so, fp->samplers);
        so_ref(so, &fp->so);
        so_ref(NULL, &so);
@@ -843,7 +857,7 @@ update_constants:
                map = pipe_buffer_map(pscreen, constbuf,
                                      PIPE_BUFFER_USAGE_CPU_READ);
                for (i = 0; i < fp->nr_consts; i++) {
-                       struct nv30_fragment_program_data *fpd = &fp->consts[i];
+                       struct nvfx_fragment_program_data *fpd = &fp->consts[i];
                        uint32_t *p = &fp->insn[fpd->offset];
                        uint32_t *cb = (uint32_t *)&map[fpd->index * 4];
 
@@ -855,11 +869,11 @@ update_constants:
                pipe_buffer_unmap(pscreen, constbuf);
 
                if (new_consts)
-                       nv30_fragprog_upload(nv30, fp);
+                       nv30_fragprog_upload(nvfx, fp);
        }
 
-       if (new_consts || fp->so != nv30->state.hw[NV30_STATE_FRAGPROG]) {
-               so_ref(fp->so, &nv30->state.hw[NV30_STATE_FRAGPROG]);
+       if (new_consts || fp->so != nvfx->state.hw[NVFX_STATE_FRAGPROG]) {
+               so_ref(fp->so, &nvfx->state.hw[NVFX_STATE_FRAGPROG]);
                return TRUE;
        }
 
@@ -867,17 +881,23 @@ update_constants:
 }
 
 void
-nv30_fragprog_destroy(struct nv30_context *nv30,
-                     struct nv30_fragment_program *fp)
+nv30_fragprog_destroy(struct nvfx_context *nvfx,
+                     struct nvfx_fragment_program *fp)
 {
+       if (fp->buffer)
+               pipe_buffer_reference(&fp->buffer, NULL);
+
+       if (fp->so)
+               so_ref(NULL, &fp->so);
+
        if (fp->insn_len)
                FREE(fp->insn);
 }
 
-struct nv30_state_entry nv30_state_fragprog = {
+struct nvfx_state_entry nv30_state_fragprog = {
        .validate = nv30_fragprog_validate,
        .dirty = {
-               .pipe = NV30_NEW_FRAGPROG,
-               .hw = NV30_STATE_FRAGPROG
+               .pipe = NVFX_NEW_FRAGPROG,
+               .hw = NVFX_STATE_FRAGPROG
        }
 };