nv40: implement user clip planes
authorBen Skeggs <skeggsb@gmail.com>
Mon, 7 Apr 2008 10:10:40 +0000 (20:10 +1000)
committerBen Skeggs <skeggsb@gmail.com>
Mon, 7 Apr 2008 11:43:29 +0000 (21:43 +1000)
It turns out the user planes handed to the driver are already in clip space.
Hence, we no longer need to transform incoming vertices before computing the
clip distance, and no longer need to change the interface provided by
gallium.  Yay :)

The clip state change handling could be better, but this works.

src/gallium/drivers/nv40/Makefile
src/gallium/drivers/nv40/nv40_context.h
src/gallium/drivers/nv40/nv40_draw.c
src/gallium/drivers/nv40/nv40_state.h
src/gallium/drivers/nv40/nv40_state_clip.c [deleted file]
src/gallium/drivers/nv40/nv40_state_emit.c
src/gallium/drivers/nv40/nv40_vertprog.c

index 3369a21574fd322c5e0685e47cfdf4750ed6dbe3..9c8eadf7e440c07694da8a90bfb762f059fbac02 100644 (file)
@@ -14,7 +14,6 @@ DRIVER_SOURCES = \
        nv40_screen.c \
        nv40_state.c \
        nv40_state_blend.c \
-       nv40_state_clip.c \
        nv40_state_emit.c \
        nv40_state_fb.c \
        nv40_state_rasterizer.c \
index a1b5c88a0636c478ea9e19e224d9b2b05e6158ba..436351b6bcc5d9627c2708166e5ce27b007319c3 100644 (file)
@@ -76,9 +76,6 @@ enum nv40_state_index {
 #define NV40_NEW_ARRAYS                (1 << 11)
 #define NV40_NEW_UCP           (1 << 12)
 
-#define NV40_FALLBACK_TNL (1 << 0)
-#define NV40_FALLBACK_RAST (1 << 1)
-
 struct nv40_rasterizer_state {
        struct pipe_rasterizer_state pipe;
        struct nouveau_stateobj *so;
@@ -203,7 +200,6 @@ extern void nv40_fragtex_bind(struct nv40_context *);
 extern boolean nv40_state_validate(struct nv40_context *nv40);
 extern boolean nv40_state_validate_swtnl(struct nv40_context *nv40);
 extern void nv40_state_emit(struct nv40_context *nv40);
-extern struct nv40_state_entry nv40_state_clip;
 extern struct nv40_state_entry nv40_state_rasterizer;
 extern struct nv40_state_entry nv40_state_scissor;
 extern struct nv40_state_entry nv40_state_stipple;
index 9cd8fa6a497ae5ac40538ba02106b4e99d523f56..7f008aca3a806aafd324456f214dfc85e70a21fd 100644 (file)
@@ -236,7 +236,7 @@ nv40_draw_elements_swtnl(struct pipe_context *pipe,
 
        if (!nv40_state_validate_swtnl(nv40))
                return FALSE;
-       nv40->dirty &= ~(1ULL << NV40_STATE_VTXBUF);
+       nv40->state.dirty &= ~(1ULL << NV40_STATE_VTXBUF);
        nv40_state_emit(nv40);
 
        for (i = 0; i < PIPE_ATTRIB_MAX; i++) {
index e018464c9f8431ad0e2e1e45f26621f94ff2f351..2b4225deb2e748006d51e86e0f206a44f040beef 100644 (file)
@@ -29,6 +29,9 @@ struct nv40_vertex_program {
        struct draw_vertex_shader *draw;
 
        boolean translated;
+
+       struct pipe_clip_state ucp;
+
        struct nv40_vertex_program_exec *insns;
        unsigned nr_insns;
        struct nv40_vertex_program_data *consts;
diff --git a/src/gallium/drivers/nv40/nv40_state_clip.c b/src/gallium/drivers/nv40/nv40_state_clip.c
deleted file mode 100644 (file)
index c52390f..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-#include "nv40_context.h"
-
-static boolean
-nv40_state_clip_validate(struct nv40_context *nv40)
-{
-
-       if (nv40->render_mode == HW) {
-               nv40->fallback_swtnl &= ~NV40_NEW_UCP;
-               if (nv40->clip.nr)
-                       nv40->fallback_swtnl |= NV40_NEW_UCP;
-       }
-
-       return FALSE;
-}
-
-struct nv40_state_entry nv40_state_clip = {
-       .validate = nv40_state_clip_validate,
-       .dirty = {
-               .pipe = NV40_NEW_UCP,
-               .hw = 0
-       }
-};
index c742e4f421eaded543b616ae444fab97086c36fa..864dfc2e0c3d3b1520fb6be91e5965995be8f8c5 100644 (file)
@@ -5,7 +5,6 @@
 static struct nv40_state_entry *render_states[] = {
        &nv40_state_framebuffer,
        &nv40_state_rasterizer,
-       &nv40_state_clip,
        &nv40_state_scissor,
        &nv40_state_stipple,
        &nv40_state_fragprog,
@@ -22,7 +21,6 @@ static struct nv40_state_entry *render_states[] = {
 static struct nv40_state_entry *swtnl_states[] = {
        &nv40_state_framebuffer,
        &nv40_state_rasterizer,
-       &nv40_state_clip,
        &nv40_state_scissor,
        &nv40_state_stipple,
        &nv40_state_fragprog,
@@ -127,8 +125,7 @@ nv40_state_validate(struct nv40_context *nv40)
                nv40->pipe.flush(&nv40->pipe, 0, NULL);
                nv40->dirty |= (NV40_NEW_VIEWPORT |
                                NV40_NEW_VERTPROG |
-                               NV40_NEW_ARRAYS |
-                               NV40_NEW_UCP);
+                               NV40_NEW_ARRAYS);
                nv40->render_mode = HW;
        }
 
@@ -153,8 +150,7 @@ nv40_state_validate_swtnl(struct nv40_context *nv40)
                nv40->pipe.flush(&nv40->pipe, 0, NULL);
                nv40->dirty |= (NV40_NEW_VIEWPORT |
                                NV40_NEW_VERTPROG |
-                               NV40_NEW_ARRAYS |
-                               NV40_NEW_UCP);
+                               NV40_NEW_ARRAYS);
                nv40->render_mode = SWTNL;
        }
 
index 08d3f387e090c6d2dd7e96479801c9f7f13929b5..e10250528e2a406e0631139e40981be009821b4a 100644 (file)
@@ -52,6 +52,8 @@ struct nv40_vpc {
 
        struct nv40_sreg *imm;
        unsigned nr_imm;
+
+       unsigned hpos_idx;
 };
 
 static struct nv40_sreg
@@ -423,11 +425,6 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
        int ai = -1, ci = -1, ii = -1;
        int i;
 
-       struct {
-               struct nv40_sreg dst;
-               unsigned m;
-       } clip;
-
        if (finst->Instruction.Opcode == TGSI_OPCODE_END)
                return TRUE;
 
@@ -501,47 +498,6 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
        dst  = tgsi_dst(vpc, &finst->FullDstRegisters[0]);
        mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask);
 
-       /* If writing to clip distance regs, need to modify instruction to
-        * change which component is written to.  On NV40 the clip regs
-        * are the unused components (yzw) of FOGC/PSZ.
-        */
-       clip.dst = none;
-       if (dst.type == NV40SR_OUTPUT &&
-           dst.index >= NV40_VP_INST_DEST_CLIP(0) &&
-           dst.index <= NV40_VP_INST_DEST_CLIP(5)) {
-               unsigned n = dst.index - NV40_VP_INST_DEST_CLIP(0);
-               unsigned m[] =
-                       { MASK_Y, MASK_Z, MASK_W, MASK_Y, MASK_Z, MASK_W };
-
-               /* Some instructions we can get away with swizzling and/or
-                * changing the writemask.  Others, we'll use a temp reg.
-                */
-               switch (finst->Instruction.Opcode) {
-               case TGSI_OPCODE_DST:
-               case TGSI_OPCODE_EXP:
-               case TGSI_OPCODE_LIT:
-               case TGSI_OPCODE_LOG:
-               case TGSI_OPCODE_XPD:
-                       clip.dst = dst;
-                       clip.m = m[n];
-                       dst = temp(vpc);
-                       break;
-               case TGSI_OPCODE_DP3:
-               case TGSI_OPCODE_DP4:
-               case TGSI_OPCODE_DPH:
-               case TGSI_OPCODE_POW:
-               case TGSI_OPCODE_RCP:
-               case TGSI_OPCODE_RSQ:
-                       mask = m[n];
-                       break;
-               default:
-                       for (i = 0; i < finst->Instruction.NumSrcRegs; i++)
-                               src[i] = swz(src[i], X, X, X, X);
-                       mask = m[n];
-                       break;
-               }
-       }
-
        switch (finst->Instruction.Opcode) {
        case TGSI_OPCODE_ABS:
                arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none);
@@ -639,11 +595,6 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
                return FALSE;
        }
 
-       if (clip.dst.type != NV40SR_NONE) {
-               arith(vpc, 0, OP_MOV, clip.dst, clip.m,
-                     swz(dst, X, X, X, X), none, none);
-       }
-
        release_temps(vpc);
        return TRUE;
 }
@@ -658,6 +609,7 @@ nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc,
        switch (fdec->Semantic.SemanticName) {
        case TGSI_SEMANTIC_POSITION:
                hw = NV40_VP_INST_DEST_POS;
+               vpc->hpos_idx = idx;
                break;
        case TGSI_SEMANTIC_COLOR:
                if (fdec->Semantic.SemanticIndex == 0) {
@@ -695,15 +647,6 @@ nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc,
                        return FALSE;
                }
                break;
-#if 0
-       case TGSI_SEMANTIC_CLIP:
-               if (fdec->Semantic.SemanticIndex >= 6) {
-                       NOUVEAU_ERR("bad clip distance index\n");
-                       return FALSE;
-               }
-               hw = NV40_VP_INST_DEST_CLIP(fdec->Semantic.SemanticIndex);
-               break;
-#endif
        default:
                NOUVEAU_ERR("bad output semantic\n");
                return FALSE;
@@ -748,6 +691,10 @@ nv40_vertprog_prepare(struct nv40_vpc *vpc)
                                }
                                break;
 #endif
+                       case TGSI_FILE_OUTPUT:
+                               if (!nv40_vertprog_parse_decl_output(vpc, fdec))
+                                       return FALSE;
+                               break;
                        default:
                                break;
                        }
@@ -803,6 +750,8 @@ nv40_vertprog_translate(struct nv40_context *nv40,
 {
        struct tgsi_parse_context parse;
        struct nv40_vpc *vpc = NULL;
+       struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0);
+       int i;
 
        vpc = CALLOC(1, sizeof(struct nv40_vpc));
        if (!vpc)
@@ -814,26 +763,21 @@ nv40_vertprog_translate(struct nv40_context *nv40,
                return;
        }
 
+       /* Redirect post-transform vertex position to a temp if user clip
+        * planes are enabled.  We need to append code the the vtxprog
+        * to handle clip planes later.
+        */
+       if (vp->ucp.nr)  {
+               vpc->r_result[vpc->hpos_idx] = temp(vpc);
+               vpc->r_temps_discard = 0;
+       }
+
        tgsi_parse_init(&parse, vp->pipe.tokens);
 
        while (!tgsi_parse_end_of_tokens(&parse)) {
                tgsi_parse_token(&parse);
 
                switch (parse.FullToken.Token.Type) {
-               case TGSI_TOKEN_TYPE_DECLARATION:
-               {
-                       const struct tgsi_full_declaration *fdec;
-                       fdec = &parse.FullToken.FullDeclaration;
-                       switch (fdec->Declaration.File) {
-                       case TGSI_FILE_OUTPUT:
-                               if (!nv40_vertprog_parse_decl_output(vpc, fdec))
-                                       goto out_err;
-                               break;
-                       default:
-                               break;
-                       }
-               }
-                       break;
                case TGSI_TOKEN_TYPE_IMMEDIATE:
                {
                        const struct tgsi_full_immediate *imm;
@@ -862,6 +806,39 @@ nv40_vertprog_translate(struct nv40_context *nv40,
                }
        }
 
+       /* Write out HPOS if it was redirected to a temp earlier */
+       if (vpc->r_result[vpc->hpos_idx].type != NV40SR_OUTPUT) {
+               struct nv40_sreg hpos = nv40_sr(NV40SR_OUTPUT,
+                                               NV40_VP_INST_DEST_POS);
+               struct nv40_sreg htmp = vpc->r_result[vpc->hpos_idx];
+
+               arith(vpc, 0, OP_MOV, hpos, MASK_ALL, htmp, none, none);
+       }
+
+       /* Insert code to handle user clip planes */
+       for (i = 0; i < vp->ucp.nr; i++) {
+               struct nv40_sreg cdst = nv40_sr(NV40SR_OUTPUT,
+                                               NV40_VP_INST_DEST_CLIP(i));
+               struct nv40_sreg ceqn = constant(vpc, -1,
+                                                nv40->clip.ucp[i][0],
+                                                nv40->clip.ucp[i][1],
+                                                nv40->clip.ucp[i][2],
+                                                nv40->clip.ucp[i][3]);
+               struct nv40_sreg htmp = vpc->r_result[vpc->hpos_idx];
+               unsigned mask;
+
+               switch (i) {
+               case 0: case 3: mask = MASK_Y; break;
+               case 1: case 4: mask = MASK_Z; break;
+               case 2: case 5: mask = MASK_W; break;
+               default:
+                       NOUVEAU_ERR("invalid clip dist #%d\n", i);
+                       goto out_err;
+               }
+
+               arith(vpc, 0, OP_DP4, cdst, mask, htmp, ceqn, none);
+       }
+
        vp->insns[vp->nr_insns - 1].data[3] |= NV40_VP_INST_LAST;
        vp->translated = TRUE;
 out_err:
@@ -883,6 +860,12 @@ nv40_vertprog_validate(struct nv40_context *nv40)
        if (nv40->render_mode == HW) {
                vp = nv40->vertprog;
                constbuf = nv40->constbuf[PIPE_SHADER_VERTEX];
+
+               if ((nv40->dirty & NV40_NEW_UCP) ||
+                   memcmp(&nv40->clip, &vp->ucp, sizeof(vp->ucp))) {
+                       nv40_vertprog_destroy(nv40, vp);
+                       memcpy(&vp->ucp, &nv40->clip, sizeof(vp->ucp));
+               }
        } else {
                vp = nv40->swtnl.vertprog;
                constbuf = NULL;
@@ -1045,16 +1028,36 @@ check_gpu_resources:
 void
 nv40_vertprog_destroy(struct nv40_context *nv40, struct nv40_vertex_program *vp)
 {
-       if (vp->nr_consts)
-               FREE(vp->consts);
-       if (vp->nr_insns)
+       struct nouveau_winsys *nvws = nv40->screen->nvws;
+
+       vp->translated = FALSE;
+
+       if (vp->nr_insns) {
                FREE(vp->insns);
+               vp->insns = NULL;
+               vp->nr_insns = 0;
+       }
+
+       if (vp->nr_consts) {
+               FREE(vp->consts);
+               vp->consts = NULL;
+               vp->nr_consts = 0;
+       }
+
+       nvws->res_free(&vp->exec);
+       vp->exec_start = 0;
+       nvws->res_free(&vp->data);
+       vp->data_start = 0;
+       vp->data_start_min = 0;
+
+       vp->ir = vp->or = vp->clip_ctrl = 0;
+       so_ref(NULL, &vp->so);
 }
 
 struct nv40_state_entry nv40_state_vertprog = {
        .validate = nv40_vertprog_validate,
        .dirty = {
-               .pipe = NV40_NEW_VERTPROG,
+               .pipe = NV40_NEW_VERTPROG | NV40_NEW_UCP,
                .hw = NV40_STATE_VERTPROG,
        }
 };