i965: Fix ENDLOOP to only patch up this loop's BREAK and CONT.
[mesa.git] / src / gallium / drivers / nv30 / nv30_fragprog.c
index a48ba9782b329e47e32431f5fb48379ed36ef1c3..2c432c6dfa7caff343a5b7effa8eb0bea3a4ca5d 100644 (file)
@@ -1,9 +1,10 @@
 #include "pipe/p_context.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_state.h"
-#include "pipe/p_inlines.h"
+#include "util/u_inlines.h"
 
 #include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_dump.h"
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_util.h"
 
@@ -131,7 +132,7 @@ emit_src(struct nv30_fpc *fpc, int pos, struct nv30_sreg src)
                                sizeof(uint32_t) * 4);
                }
 
-               sr |= (NV30_FP_REG_TYPE_CONST << NV30_FP_REG_TYPE_SHIFT);       
+               sr |= (NV30_FP_REG_TYPE_CONST << NV30_FP_REG_TYPE_SHIFT);
                break;
        case NV30SR_NONE:
                sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT);
@@ -236,20 +237,20 @@ tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc)
 {
        struct nv30_sreg src;
 
-       switch (fsrc->SrcRegister.File) {
+       switch (fsrc->Register.File) {
        case TGSI_FILE_INPUT:
                src = nv30_sr(NV30SR_INPUT,
-                             fpc->attrib_map[fsrc->SrcRegister.Index]);
+                             fpc->attrib_map[fsrc->Register.Index]);
                break;
        case TGSI_FILE_CONSTANT:
-               src = constant(fpc, fsrc->SrcRegister.Index, NULL);
+               src = constant(fpc, fsrc->Register.Index, NULL);
                break;
        case TGSI_FILE_IMMEDIATE:
-               assert(fsrc->SrcRegister.Index < fpc->nr_imm);
-               src = fpc->imm[fsrc->SrcRegister.Index];
+               assert(fsrc->Register.Index < fpc->nr_imm);
+               src = fpc->imm[fsrc->Register.Index];
                break;
        case TGSI_FILE_TEMPORARY:
-               src = nv30_sr(NV30SR_TEMP, fsrc->SrcRegister.Index + 1);
+               src = nv30_sr(NV30SR_TEMP, fsrc->Register.Index + 1);
                if (fpc->high_temp < src.index)
                        fpc->high_temp = src.index;
                break;
@@ -257,7 +258,7 @@ tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc)
         * Luckily fragprog results are just temp regs..
         */
        case TGSI_FILE_OUTPUT:
-               if (fsrc->SrcRegister.Index == fpc->colour_id)
+               if (fsrc->Register.Index == fpc->colour_id)
                        return nv30_sr(NV30SR_OUTPUT, 0);
                else
                        return nv30_sr(NV30SR_OUTPUT, 1);
@@ -267,12 +268,12 @@ tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc)
                break;
        }
 
-       src.abs = fsrc->SrcRegisterExtMod.Absolute;
-       src.negate = fsrc->SrcRegister.Negate;
-       src.swz[0] = fsrc->SrcRegister.SwizzleX;
-       src.swz[1] = fsrc->SrcRegister.SwizzleY;
-       src.swz[2] = fsrc->SrcRegister.SwizzleZ;
-       src.swz[3] = fsrc->SrcRegister.SwizzleW;
+       src.abs = fsrc->Register.Absolute;
+       src.negate = fsrc->Register.Negate;
+       src.swz[0] = fsrc->Register.SwizzleX;
+       src.swz[1] = fsrc->Register.SwizzleY;
+       src.swz[2] = fsrc->Register.SwizzleZ;
+       src.swz[3] = fsrc->Register.SwizzleW;
        return src;
 }
 
@@ -280,22 +281,22 @@ static INLINE struct nv30_sreg
 tgsi_dst(struct nv30_fpc *fpc, const struct tgsi_full_dst_register *fdst) {
        int idx;
 
-       switch (fdst->DstRegister.File) {
+       switch (fdst->Register.File) {
        case TGSI_FILE_OUTPUT:
-               if (fdst->DstRegister.Index == fpc->colour_id)
+               if (fdst->Register.Index == fpc->colour_id)
                        return nv30_sr(NV30SR_OUTPUT, 0);
                else
                        return nv30_sr(NV30SR_OUTPUT, 1);
                break;
        case TGSI_FILE_TEMPORARY:
-               idx = fdst->DstRegister.Index + 1;
+               idx = fdst->Register.Index + 1;
                if (fpc->high_temp < idx)
                        fpc->high_temp = idx;
                return nv30_sr(NV30SR_TEMP, idx);
        case TGSI_FILE_NULL:
                return nv30_sr(NV30SR_NONE, 0);
        default:
-               NOUVEAU_ERR("bad dst file %d\n", fdst->DstRegister.File);
+               NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File);
                return nv30_sr(NV30SR_NONE, 0);
        }
 }
@@ -318,38 +319,23 @@ src_native_swz(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc,
 {
        const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0);
        struct nv30_sreg tgsi = tgsi_src(fpc, fsrc);
-       uint mask = 0, zero_mask = 0, one_mask = 0, neg_mask = 0;
-       uint neg[4] = { fsrc->SrcRegisterExtSwz.NegateX,
-                       fsrc->SrcRegisterExtSwz.NegateY,
-                       fsrc->SrcRegisterExtSwz.NegateZ,
-                       fsrc->SrcRegisterExtSwz.NegateW };
+       uint mask = 0;
        uint c;
 
        for (c = 0; c < 4; c++) {
-               switch (tgsi_util_get_full_src_register_extswizzle(fsrc, c)) {
-               case TGSI_EXTSWIZZLE_X:
-               case TGSI_EXTSWIZZLE_Y:
-               case TGSI_EXTSWIZZLE_Z:
-               case TGSI_EXTSWIZZLE_W:
+               switch (tgsi_util_get_full_src_register_swizzle(fsrc, c)) {
+               case TGSI_SWIZZLE_X:
+               case TGSI_SWIZZLE_Y:
+               case TGSI_SWIZZLE_Z:
+               case TGSI_SWIZZLE_W:
                        mask |= (1 << c);
                        break;
-               case TGSI_EXTSWIZZLE_ZERO:
-                       zero_mask |= (1 << c);
-                       tgsi.swz[c] = SWZ_X;
-                       break;
-               case TGSI_EXTSWIZZLE_ONE:
-                       one_mask |= (1 << c);
-                       tgsi.swz[c] = SWZ_X;
-                       break;
                default:
                        assert(0);
                }
-
-               if (!tgsi.negate && neg[c])
-                       neg_mask |= (1 << c);
        }
 
-       if (mask == MASK_ALL && !neg_mask)
+       if (mask == MASK_ALL)
                return TRUE;
 
        *src = temp(fpc);
@@ -357,18 +343,6 @@ src_native_swz(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc,
        if (mask)
                arith(fpc, 0, MOV, *src, mask, tgsi, none, none);
 
-       if (zero_mask)
-               arith(fpc, 0, SFL, *src, zero_mask, *src, none, none);
-
-       if (one_mask)
-               arith(fpc, 0, STR, *src, one_mask, *src, none, none);
-
-       if (neg_mask) {
-               struct nv30_sreg one = temp(fpc);
-               arith(fpc, 0, STR, one, neg_mask, one, none, none);
-               arith(fpc, 0, MUL, *src, neg_mask, *src, neg(one), none);
-       }
-
        return FALSE;
 }
 
@@ -389,8 +363,8 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
        for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
                const struct tgsi_full_src_register *fsrc;
 
-               fsrc = &finst->FullSrcRegisters[i];
-               if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) {
+               fsrc = &finst->Src[i];
+               if (fsrc->Register.File == TGSI_FILE_TEMPORARY) {
                        src[i] = tgsi_src(fpc, fsrc);
                }
        }
@@ -398,9 +372,9 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
        for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
                const struct tgsi_full_src_register *fsrc;
 
-               fsrc = &finst->FullSrcRegisters[i];
+               fsrc = &finst->Src[i];
 
-               switch (fsrc->SrcRegister.File) {
+               switch (fsrc->Register.File) {
                case TGSI_FILE_INPUT:
                case TGSI_FILE_CONSTANT:
                case TGSI_FILE_TEMPORARY:
@@ -411,14 +385,14 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
                        break;
                }
 
-               switch (fsrc->SrcRegister.File) {
+               switch (fsrc->Register.File) {
                case TGSI_FILE_INPUT:
-                       if (ai == -1 || ai == fsrc->SrcRegister.Index) {
-                               ai = fsrc->SrcRegister.Index;
+                       if (ai == -1 || ai == fsrc->Register.Index) {
+                               ai = fsrc->Register.Index;
                                src[i] = tgsi_src(fpc, fsrc);
                        } else {
                                NOUVEAU_MSG("extra src attr %d\n",
-                                        fsrc->SrcRegister.Index);
+                                        fsrc->Register.Index);
                                src[i] = temp(fpc);
                                arith(fpc, 0, MOV, src[i], MASK_ALL,
                                      tgsi_src(fpc, fsrc), none, none);
@@ -426,8 +400,8 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
                        break;
                case TGSI_FILE_CONSTANT:
                case TGSI_FILE_IMMEDIATE:
-                       if (ci == -1 || ci == fsrc->SrcRegister.Index) {
-                               ci = fsrc->SrcRegister.Index;
+                       if (ci == -1 || ci == fsrc->Register.Index) {
+                               ci = fsrc->Register.Index;
                                src[i] = tgsi_src(fpc, fsrc);
                        } else {
                                src[i] = temp(fpc);
@@ -439,7 +413,7 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
                        /* handled above */
                        break;
                case TGSI_FILE_SAMPLER:
-                       unit = fsrc->SrcRegister.Index;
+                       unit = fsrc->Register.Index;
                        break;
                case TGSI_FILE_OUTPUT:
                        break;
@@ -449,8 +423,8 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
                }
        }
 
-       dst  = tgsi_dst(fpc, &finst->FullDstRegisters[0]);
-       mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask);
+       dst  = tgsi_dst(fpc, &finst->Dst[0]);
+       mask = tgsi_mask(finst->Dst[0].Register.WriteMask);
        sat  = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE);
 
        switch (finst->Instruction.Opcode) {
@@ -461,10 +435,11 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
                arith(fpc, sat, ADD, dst, mask, src[0], src[1], none);
                break;
        case TGSI_OPCODE_CMP:
-               tmp = temp(fpc);
-               arith(fpc, sat, MOV, dst, mask, src[2], none, none);
+               tmp = nv30_sr(NV30SR_NONE, 0);
                tmp.cc_update = 1;
                arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none);
+               dst.cc_test = NV30_VP_INST_COND_GE;
+               arith(fpc, sat, MOV, dst, mask, src[2], none, none);
                dst.cc_test = NV30_VP_INST_COND_LT;
                arith(fpc, sat, MOV, dst, mask, src[1], none, none);
                break;
@@ -527,12 +502,6 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
        case TGSI_OPCODE_MUL:
                arith(fpc, sat, MUL, dst, mask, src[0], src[1], none);
                break;
-       case TGSI_OPCODE_NOISE1:
-       case TGSI_OPCODE_NOISE2:
-       case TGSI_OPCODE_NOISE3:
-       case TGSI_OPCODE_NOISE4:
-               arith(fpc, sat, SFL, dst, mask, none, none, none);
-               break;
        case TGSI_OPCODE_POW:
                arith(fpc, sat, POW, dst, mask, src[0], src[1], none);
                break;
@@ -549,13 +518,28 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
                arith(fpc, sat, RSQ, dst, mask, abs(swz(src[0], X, X, X, X)), none, none);
                break;
        case TGSI_OPCODE_SCS:
-               if (mask & MASK_X) {
-                       arith(fpc, sat, COS, dst, MASK_X,
-                             swz(src[0], X, X, X, X), none, none);
+               /* avoid overwriting the source */
+               if(src[0].swz[SWZ_X] != SWZ_X)
+               {
+                       if (mask & MASK_X) {
+                               arith(fpc, sat, COS, dst, MASK_X,
+                                     swz(src[0], X, X, X, X), none, none);
+                       }
+                       if (mask & MASK_Y) {
+                               arith(fpc, sat, SIN, dst, MASK_Y,
+                                     swz(src[0], X, X, X, X), none, none);
+                       }
                }
-               if (mask & MASK_Y) {
-                       arith(fpc, sat, SIN, dst, MASK_Y,
-                             swz(src[0], X, X, X, X), none, none);
+               else
+               {
+                       if (mask & MASK_Y) {
+                               arith(fpc, sat, SIN, dst, MASK_Y,
+                                     swz(src[0], X, X, X, X), none, none);
+                       }
+                       if (mask & MASK_X) {
+                               arith(fpc, sat, COS, dst, MASK_X,
+                                     swz(src[0], X, X, X, X), none, none);
+                       }
                }
                break;
        case TGSI_OPCODE_SIN:
@@ -604,15 +588,15 @@ nv30_fragprog_parse_decl_attrib(struct nv30_fpc *fpc,
 {
        int hw;
 
-       switch (fdec->Semantic.SemanticName) {
+       switch (fdec->Semantic.Name) {
        case TGSI_SEMANTIC_POSITION:
                hw = NV30_FP_OP_INPUT_SRC_POSITION;
                break;
        case TGSI_SEMANTIC_COLOR:
-               if (fdec->Semantic.SemanticIndex == 0) {
+               if (fdec->Semantic.Index == 0) {
                        hw = NV30_FP_OP_INPUT_SRC_COL0;
                } else
-               if (fdec->Semantic.SemanticIndex == 1) {
+               if (fdec->Semantic.Index == 1) {
                        hw = NV30_FP_OP_INPUT_SRC_COL1;
                } else {
                        NOUVEAU_ERR("bad colour semantic index\n");
@@ -623,9 +607,9 @@ nv30_fragprog_parse_decl_attrib(struct nv30_fpc *fpc,
                hw = NV30_FP_OP_INPUT_SRC_FOGC;
                break;
        case TGSI_SEMANTIC_GENERIC:
-               if (fdec->Semantic.SemanticIndex <= 7) {
+               if (fdec->Semantic.Index <= 7) {
                        hw = NV30_FP_OP_INPUT_SRC_TC(fdec->Semantic.
-                                                    SemanticIndex);
+                                                    Index);
                } else {
                        NOUVEAU_ERR("bad generic semantic index\n");
                        return FALSE;
@@ -636,7 +620,7 @@ nv30_fragprog_parse_decl_attrib(struct nv30_fpc *fpc,
                return FALSE;
        }
 
-       fpc->attrib_map[fdec->DeclarationRange.First] = hw;
+       fpc->attrib_map[fdec->Range.First] = hw;
        return TRUE;
 }
 
@@ -644,12 +628,12 @@ static boolean
 nv30_fragprog_parse_decl_output(struct nv30_fpc *fpc,
                                const struct tgsi_full_declaration *fdec)
 {
-       switch (fdec->Semantic.SemanticName) {
+       switch (fdec->Semantic.Name) {
        case TGSI_SEMANTIC_POSITION:
-               fpc->depth_id = fdec->DeclarationRange.First;
+               fpc->depth_id = fdec->Range.First;
                break;
        case TGSI_SEMANTIC_COLOR:
-               fpc->colour_id = fdec->DeclarationRange.First;
+               fpc->colour_id = fdec->Range.First;
                break;
        default:
                NOUVEAU_ERR("bad output semantic\n");
@@ -685,9 +669,9 @@ nv30_fragprog_prepare(struct nv30_fpc *fpc)
                                        goto out_err;
                                break;
                        /*case TGSI_FILE_TEMPORARY:
-                               if (fdec->DeclarationRange.Last > high_temp) {
+                               if (fdec->Range.Last > high_temp) {
                                        high_temp =
-                                               fdec->DeclarationRange.Last;
+                                               fdec->Range.Last;
                                }
                                break;*/
                        default:
@@ -699,7 +683,7 @@ nv30_fragprog_prepare(struct nv30_fpc *fpc)
                {
                        struct tgsi_full_immediate *imm;
                        float vals[4];
-                       
+
                        imm = &p.FullToken.FullImmediate;
                        assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
                        assert(fpc->nr_imm < MAX_IMM);
@@ -787,7 +771,7 @@ nv30_fragprog_translate(struct nv30_context *nv30,
        fp->insn[fpc->inst_offset + 1] = 0x00000000;
        fp->insn[fpc->inst_offset + 2] = 0x00000000;
        fp->insn[fpc->inst_offset + 3] = 0x00000000;
-       
+
        fp->translated = TRUE;
        fp->on_hw = FALSE;
 out_err:
@@ -853,7 +837,7 @@ nv30_fragprog_validate(struct nv30_context *nv30)
        fp->buffer = pscreen->buffer_create(pscreen, 0x100, 0, fp->insn_len * 4);
        nv30_fragprog_upload(nv30, fp);
 
-       so = so_new(8, 1);
+       so = so_new(4, 4, 1);
        so_method(so, nv30->screen->rankine, NV34TCL_FP_ACTIVE_PROGRAM, 1);
        so_reloc (so, nouveau_bo(fp->buffer), 0, NOUVEAU_BO_VRAM |
                      NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW |
@@ -871,7 +855,7 @@ nv30_fragprog_validate(struct nv30_context *nv30)
 update_constants:
        if (fp->nr_consts) {
                float *map;
-               
+
                map = pipe_buffer_map(pscreen, constbuf,
                                      PIPE_BUFFER_USAGE_CPU_READ);
                for (i = 0; i < fp->nr_consts; i++) {
@@ -902,6 +886,12 @@ void
 nv30_fragprog_destroy(struct nv30_context *nv30,
                      struct nv30_fragment_program *fp)
 {
+       if (fp->buffer)
+               pipe_buffer_reference(&fp->buffer, NULL);
+
+       if (fp->so)
+               so_ref(NULL, &fp->so);
+
        if (fp->insn_len)
                FREE(fp->insn);
 }