r300: respect radeon common code fallbacks
[mesa.git] / src / gallium / drivers / nv40 / nv40_vertprog.c
index e10250528e2a406e0631139e40981be009821b4a..c93c5d127c44d634f97e8e0193dddb547d85a85d 100644 (file)
@@ -1,10 +1,11 @@
 #include "pipe/p_context.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_state.h"
+#include "util/u_inlines.h"
 
 #include "pipe/p_shader_tokens.h"
-#include "tgsi/util/tgsi_parse.h"
-#include "tgsi/util/tgsi_util.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
 
 #include "nv40_context.h"
 #include "nv40_state.h"
@@ -294,30 +295,30 @@ static INLINE struct nv40_sreg
 tgsi_src(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
        struct nv40_sreg src;
 
-       switch (fsrc->SrcRegister.File) {
+       switch (fsrc->Register.File) {
        case TGSI_FILE_INPUT:
-               src = nv40_sr(NV40SR_INPUT, fsrc->SrcRegister.Index);
+               src = nv40_sr(NV40SR_INPUT, fsrc->Register.Index);
                break;
        case TGSI_FILE_CONSTANT:
-               src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0);
+               src = constant(vpc, fsrc->Register.Index, 0, 0, 0, 0);
                break;
        case TGSI_FILE_IMMEDIATE:
-               src = vpc->imm[fsrc->SrcRegister.Index];
+               src = vpc->imm[fsrc->Register.Index];
                break;
        case TGSI_FILE_TEMPORARY:
-               src = vpc->r_temp[fsrc->SrcRegister.Index];
+               src = vpc->r_temp[fsrc->Register.Index];
                break;
        default:
                NOUVEAU_ERR("bad src file\n");
                break;
        }
 
-       src.abs = fsrc->SrcRegisterExtMod.Absolute;
-       src.negate = fsrc->SrcRegister.Negate;
-       src.swz[0] = fsrc->SrcRegister.SwizzleX;
-       src.swz[1] = fsrc->SrcRegister.SwizzleY;
-       src.swz[2] = fsrc->SrcRegister.SwizzleZ;
-       src.swz[3] = fsrc->SrcRegister.SwizzleW;
+       src.abs = fsrc->Register.Absolute;
+       src.negate = fsrc->Register.Negate;
+       src.swz[0] = fsrc->Register.SwizzleX;
+       src.swz[1] = fsrc->Register.SwizzleY;
+       src.swz[2] = fsrc->Register.SwizzleZ;
+       src.swz[3] = fsrc->Register.SwizzleW;
        return src;
 }
 
@@ -325,15 +326,15 @@ static INLINE struct nv40_sreg
 tgsi_dst(struct nv40_vpc *vpc, const struct tgsi_full_dst_register *fdst) {
        struct nv40_sreg dst;
 
-       switch (fdst->DstRegister.File) {
+       switch (fdst->Register.File) {
        case TGSI_FILE_OUTPUT:
-               dst = vpc->r_result[fdst->DstRegister.Index];
+               dst = vpc->r_result[fdst->Register.Index];
                break;
        case TGSI_FILE_TEMPORARY:
-               dst = vpc->r_temp[fdst->DstRegister.Index];
+               dst = vpc->r_temp[fdst->Register.Index];
                break;
        case TGSI_FILE_ADDRESS:
-               dst = vpc->r_address[fdst->DstRegister.Index];
+               dst = vpc->r_address[fdst->Register.Index];
                break;
        default:
                NOUVEAU_ERR("bad dst file\n");
@@ -361,38 +362,23 @@ src_native_swz(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc,
 {
        const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0);
        struct nv40_sreg tgsi = tgsi_src(vpc, fsrc);
-       uint mask = 0, zero_mask = 0, one_mask = 0, neg_mask = 0;
-       uint neg[4] = { fsrc->SrcRegisterExtSwz.NegateX,
-                       fsrc->SrcRegisterExtSwz.NegateY,
-                       fsrc->SrcRegisterExtSwz.NegateZ,
-                       fsrc->SrcRegisterExtSwz.NegateW };
+       uint mask = 0;
        uint c;
 
        for (c = 0; c < 4; c++) {
-               switch (tgsi_util_get_full_src_register_extswizzle(fsrc, c)) {
-               case TGSI_EXTSWIZZLE_X:
-               case TGSI_EXTSWIZZLE_Y:
-               case TGSI_EXTSWIZZLE_Z:
-               case TGSI_EXTSWIZZLE_W:
+               switch (tgsi_util_get_full_src_register_swizzle(fsrc, c)) {
+               case TGSI_SWIZZLE_X:
+               case TGSI_SWIZZLE_Y:
+               case TGSI_SWIZZLE_Z:
+               case TGSI_SWIZZLE_W:
                        mask |= tgsi_mask(1 << c);
                        break;
-               case TGSI_EXTSWIZZLE_ZERO:
-                       zero_mask |= tgsi_mask(1 << c);
-                       tgsi.swz[c] = SWZ_X;
-                       break;
-               case TGSI_EXTSWIZZLE_ONE:
-                       one_mask |= tgsi_mask(1 << c);
-                       tgsi.swz[c] = SWZ_X;
-                       break;
                default:
                        assert(0);
                }
-
-               if (!tgsi.negate && neg[c])
-                       neg_mask |= tgsi_mask(1 << c);
        }
 
-       if (mask == MASK_ALL && !neg_mask)
+       if (mask == MASK_ALL)
                return TRUE;
 
        *src = temp(vpc);
@@ -400,18 +386,6 @@ src_native_swz(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc,
        if (mask)
                arith(vpc, 0, OP_MOV, *src, mask, tgsi, none, none);
 
-       if (zero_mask)
-               arith(vpc, 0, OP_SFL, *src, zero_mask, *src, none, none);
-
-       if (one_mask)
-               arith(vpc, 0, OP_STR, *src, one_mask, *src, none, none);
-
-       if (neg_mask) {
-               struct nv40_sreg one = temp(vpc);
-               arith(vpc, 0, OP_STR, one, neg_mask, one, none, none);
-               arith(vpc, 0, OP_MUL, *src, neg_mask, *src, neg(one), none);
-       }
-
        return FALSE;
 }
 
@@ -431,8 +405,8 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
        for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
                const struct tgsi_full_src_register *fsrc;
 
-               fsrc = &finst->FullSrcRegisters[i];
-               if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) {
+               fsrc = &finst->Src[i];
+               if (fsrc->Register.File == TGSI_FILE_TEMPORARY) {
                        src[i] = tgsi_src(vpc, fsrc);
                }
        }
@@ -440,9 +414,9 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
        for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
                const struct tgsi_full_src_register *fsrc;
 
-               fsrc = &finst->FullSrcRegisters[i];
+               fsrc = &finst->Src[i];
 
-               switch (fsrc->SrcRegister.File) {
+               switch (fsrc->Register.File) {
                case TGSI_FILE_INPUT:
                case TGSI_FILE_CONSTANT:
                case TGSI_FILE_TEMPORARY:
@@ -453,10 +427,10 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
                        break;
                }
 
-               switch (fsrc->SrcRegister.File) {
+               switch (fsrc->Register.File) {
                case TGSI_FILE_INPUT:
-                       if (ai == -1 || ai == fsrc->SrcRegister.Index) {
-                               ai = fsrc->SrcRegister.Index;
+                       if (ai == -1 || ai == fsrc->Register.Index) {
+                               ai = fsrc->Register.Index;
                                src[i] = tgsi_src(vpc, fsrc);
                        } else {
                                src[i] = temp(vpc);
@@ -466,8 +440,8 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
                        break;
                case TGSI_FILE_CONSTANT:
                        if ((ci == -1 && ii == -1) ||
-                           ci == fsrc->SrcRegister.Index) {
-                               ci = fsrc->SrcRegister.Index;
+                           ci == fsrc->Register.Index) {
+                               ci = fsrc->Register.Index;
                                src[i] = tgsi_src(vpc, fsrc);
                        } else {
                                src[i] = temp(vpc);
@@ -477,8 +451,8 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
                        break;
                case TGSI_FILE_IMMEDIATE:
                        if ((ci == -1 && ii == -1) ||
-                           ii == fsrc->SrcRegister.Index) {
-                               ii = fsrc->SrcRegister.Index;
+                           ii == fsrc->Register.Index) {
+                               ii = fsrc->Register.Index;
                                src[i] = tgsi_src(vpc, fsrc);
                        } else {
                                src[i] = temp(vpc);
@@ -495,8 +469,8 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
                }
        }
 
-       dst  = tgsi_dst(vpc, &finst->FullDstRegisters[0]);
-       mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask);
+       dst  = tgsi_dst(vpc, &finst->Dst[0]);
+       mask = tgsi_mask(finst->Dst[0].Register.WriteMask);
 
        switch (finst->Instruction.Opcode) {
        case TGSI_OPCODE_ABS:
@@ -571,7 +545,7 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
        case TGSI_OPCODE_RET:
                break;
        case TGSI_OPCODE_RSQ:
-               arith(vpc, 1, OP_RSQ, dst, mask, none, none, src[0]);
+               arith(vpc, 1, OP_RSQ, dst, mask, none, none, abs(src[0]));
                break;
        case TGSI_OPCODE_SGE:
                arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none);
@@ -603,19 +577,19 @@ static boolean
 nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc,
                                const struct tgsi_full_declaration *fdec)
 {
-       unsigned idx = fdec->u.DeclarationRange.First;
+       unsigned idx = fdec->Range.First;
        int hw;
 
-       switch (fdec->Semantic.SemanticName) {
+       switch (fdec->Semantic.Name) {
        case TGSI_SEMANTIC_POSITION:
                hw = NV40_VP_INST_DEST_POS;
                vpc->hpos_idx = idx;
                break;
        case TGSI_SEMANTIC_COLOR:
-               if (fdec->Semantic.SemanticIndex == 0) {
+               if (fdec->Semantic.Index == 0) {
                        hw = NV40_VP_INST_DEST_COL0;
                } else
-               if (fdec->Semantic.SemanticIndex == 1) {
+               if (fdec->Semantic.Index == 1) {
                        hw = NV40_VP_INST_DEST_COL1;
                } else {
                        NOUVEAU_ERR("bad colour semantic index\n");
@@ -623,10 +597,10 @@ nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc,
                }
                break;
        case TGSI_SEMANTIC_BCOLOR:
-               if (fdec->Semantic.SemanticIndex == 0) {
+               if (fdec->Semantic.Index == 0) {
                        hw = NV40_VP_INST_DEST_BFC0;
                } else
-               if (fdec->Semantic.SemanticIndex == 1) {
+               if (fdec->Semantic.Index == 1) {
                        hw = NV40_VP_INST_DEST_BFC1;
                } else {
                        NOUVEAU_ERR("bad bcolour semantic index\n");
@@ -640,13 +614,17 @@ nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc,
                hw = NV40_VP_INST_DEST_PSZ;
                break;
        case TGSI_SEMANTIC_GENERIC:
-               if (fdec->Semantic.SemanticIndex <= 7) {
-                       hw = NV40_VP_INST_DEST_TC(fdec->Semantic.SemanticIndex);
+               if (fdec->Semantic.Index <= 7) {
+                       hw = NV40_VP_INST_DEST_TC(fdec->Semantic.Index);
                } else {
                        NOUVEAU_ERR("bad generic semantic index\n");
                        return FALSE;
                }
                break;
+       case TGSI_SEMANTIC_EDGEFLAG:
+               /* not really an error just a fallback */
+               NOUVEAU_ERR("cannot handle edgeflag output\n");
+               return FALSE;
        default:
                NOUVEAU_ERR("bad output semantic\n");
                return FALSE;
@@ -678,16 +656,16 @@ nv40_vertprog_prepare(struct nv40_vpc *vpc)
                        fdec = &p.FullToken.FullDeclaration;
                        switch (fdec->Declaration.File) {
                        case TGSI_FILE_TEMPORARY:
-                               if (fdec->u.DeclarationRange.Last > high_temp) {
+                               if (fdec->Range.Last > high_temp) {
                                        high_temp =
-                                               fdec->u.DeclarationRange.Last;
+                                               fdec->Range.Last;
                                }
                                break;
 #if 0 /* this would be nice.. except gallium doesn't track it */
                        case TGSI_FILE_ADDRESS:
-                               if (fdec->u.DeclarationRange.Last > high_addr) {
+                               if (fdec->Range.Last > high_addr) {
                                        high_addr =
-                                               fdec->u.DeclarationRange.Last;
+                                               fdec->Range.Last;
                                }
                                break;
 #endif
@@ -707,11 +685,11 @@ nv40_vertprog_prepare(struct nv40_vpc *vpc)
                        const struct tgsi_full_dst_register *fdst;
 
                        finst = &p.FullToken.FullInstruction;
-                       fdst = &finst->FullDstRegisters[0];
+                       fdst = &finst->Dst[0];
 
-                       if (fdst->DstRegister.File == TGSI_FILE_ADDRESS) {
-                               if (fdst->DstRegister.Index > high_addr)
-                                       high_addr = fdst->DstRegister.Index;
+                       if (fdst->Register.File == TGSI_FILE_ADDRESS) {
+                               if (fdst->Register.Index > high_addr)
+                                       high_addr = fdst->Register.Index;
                        }
                
                }
@@ -764,7 +742,7 @@ nv40_vertprog_translate(struct nv40_context *nv40,
        }
 
        /* Redirect post-transform vertex position to a temp if user clip
-        * planes are enabled.  We need to append code the the vtxprog
+        * planes are enabled.  We need to append code to the vtxprog
         * to handle clip planes later.
         */
        if (vp->ucp.nr)  {
@@ -784,13 +762,13 @@ nv40_vertprog_translate(struct nv40_context *nv40,
 
                        imm = &parse.FullToken.FullImmediate;
                        assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
-//                     assert(imm->Immediate.Size == 4);
+                       assert(imm->Immediate.NrTokens == 4 + 1);
                        vpc->imm[vpc->nr_imm++] =
                                constant(vpc, -1,
-                                        imm->u.ImmediateFloat32[0].Float,
-                                        imm->u.ImmediateFloat32[1].Float,
-                                        imm->u.ImmediateFloat32[2].Float,
-                                        imm->u.ImmediateFloat32[3].Float);
+                                        imm->u[0].Float,
+                                        imm->u[1].Float,
+                                        imm->u[2].Float,
+                                        imm->u[3].Float);
                }
                        break;
                case TGSI_TOKEN_TYPE_INSTRUCTION:
@@ -843,15 +821,22 @@ nv40_vertprog_translate(struct nv40_context *nv40,
        vp->translated = TRUE;
 out_err:
        tgsi_parse_free(&parse);
+       if (vpc->r_temp)
+               FREE(vpc->r_temp); 
+       if (vpc->r_address)
+               FREE(vpc->r_address); 
+       if (vpc->imm)   
+               FREE(vpc->imm); 
        FREE(vpc);
 }
 
 static boolean
 nv40_vertprog_validate(struct nv40_context *nv40)
 { 
-       struct nouveau_winsys *nvws = nv40->nvws;
-       struct pipe_winsys *ws = nv40->pipe.winsys;
-       struct nouveau_grobj *curie = nv40->screen->curie;
+       struct pipe_screen *pscreen = nv40->pipe.screen;
+       struct nv40_screen *screen = nv40->screen;
+       struct nouveau_channel *chan = screen->base.channel;
+       struct nouveau_grobj *curie = screen->curie;
        struct nv40_vertex_program *vp;
        struct pipe_buffer *constbuf;
        boolean upload_code = FALSE, upload_data = FALSE;
@@ -889,19 +874,19 @@ check_gpu_resources:
                struct nouveau_stateobj *so;
                uint vplen = vp->nr_insns;
 
-               if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) {
+               if (nouveau_resource_alloc(heap, vplen, vp, &vp->exec)) {
                        while (heap->next && heap->size < vplen) {
                                struct nv40_vertex_program *evict;
                                
                                evict = heap->next->priv;
-                               nvws->res_free(&evict->exec);
+                               nouveau_resource_free(&evict->exec);
                        }
 
-                       if (nvws->res_alloc(heap, vplen, vp, &vp->exec))
+                       if (nouveau_resource_alloc(heap, vplen, vp, &vp->exec))
                                assert(0);
                }
 
-               so = so_new(7, 0);
+               so = so_new(3, 4, 0);
                so_method(so, curie, NV40TCL_VP_START_FROM_ID, 1);
                so_data  (so, vp->exec->start);
                so_method(so, curie, NV40TCL_VP_ATTRIB_EN, 2);
@@ -910,6 +895,7 @@ check_gpu_resources:
                so_method(so, curie,  NV40TCL_CLIP_PLANE_ENABLE, 1);
                so_data  (so, vp->clip_ctrl);
                so_ref(so, &vp->so);
+               so_ref(NULL, &so);
 
                upload_code = TRUE;
        }
@@ -918,15 +904,15 @@ check_gpu_resources:
        if (vp->nr_consts && !vp->data) {
                struct nouveau_resource *heap = nv40->screen->vp_data_heap;
 
-               if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) {
+               if (nouveau_resource_alloc(heap, vp->nr_consts, vp, &vp->data)) {
                        while (heap->next && heap->size < vp->nr_consts) {
                                struct nv40_vertex_program *evict;
                                
                                evict = heap->next->priv;
-                               nvws->res_free(&evict->data);
+                               nouveau_resource_free(&evict->data);
                        }
 
-                       if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data))
+                       if (nouveau_resource_alloc(heap, vp->nr_consts, vp, &vp->data))
                                assert(0);
                }
 
@@ -974,8 +960,8 @@ check_gpu_resources:
                float *map = NULL;
 
                if (constbuf) {
-                       map = ws->buffer_map(ws, constbuf,
-                                            PIPE_BUFFER_USAGE_CPU_READ);
+                       map = pipe_buffer_map(pscreen, constbuf,
+                                             PIPE_BUFFER_USAGE_CPU_READ);
                }
 
                for (i = 0; i < vp->nr_consts; i++) {
@@ -990,13 +976,13 @@ check_gpu_resources:
                                       4 * sizeof(float));
                        }
 
-                       BEGIN_RING(curie, NV40TCL_VP_UPLOAD_CONST_ID, 5);
-                       OUT_RING  (i + vp->data->start);
-                       OUT_RINGp ((uint32_t *)vpd->value, 4);
+                       BEGIN_RING(chan, curie, NV40TCL_VP_UPLOAD_CONST_ID, 5);
+                       OUT_RING  (chan, i + vp->data->start);
+                       OUT_RINGp (chan, (uint32_t *)vpd->value, 4);
                }
 
                if (constbuf)
-                       ws->buffer_unmap(ws, constbuf);
+                       pscreen->buffer_unmap(pscreen, constbuf);
        }
 
        /* Upload vtxprog */
@@ -1009,11 +995,11 @@ check_gpu_resources:
                        NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[3]);
                }
 #endif
-               BEGIN_RING(curie, NV40TCL_VP_UPLOAD_FROM_ID, 1);
-               OUT_RING  (vp->exec->start);
+               BEGIN_RING(chan, curie, NV40TCL_VP_UPLOAD_FROM_ID, 1);
+               OUT_RING  (chan, vp->exec->start);
                for (i = 0; i < vp->nr_insns; i++) {
-                       BEGIN_RING(curie, NV40TCL_VP_UPLOAD_INST(0), 4);
-                       OUT_RINGp (vp->insns[i].data, 4);
+                       BEGIN_RING(chan, curie, NV40TCL_VP_UPLOAD_INST(0), 4);
+                       OUT_RINGp (chan, vp->insns[i].data, 4);
                }
        }
 
@@ -1028,8 +1014,6 @@ check_gpu_resources:
 void
 nv40_vertprog_destroy(struct nv40_context *nv40, struct nv40_vertex_program *vp)
 {
-       struct nouveau_winsys *nvws = nv40->screen->nvws;
-
        vp->translated = FALSE;
 
        if (vp->nr_insns) {
@@ -1044,9 +1028,9 @@ nv40_vertprog_destroy(struct nv40_context *nv40, struct nv40_vertex_program *vp)
                vp->nr_consts = 0;
        }
 
-       nvws->res_free(&vp->exec);
+       nouveau_resource_free(&vp->exec);
        vp->exec_start = 0;
-       nvws->res_free(&vp->data);
+       nouveau_resource_free(&vp->data);
        vp->data_start = 0;
        vp->data_start_min = 0;