nouveau: call notifier/grobj etc funcs directly
[mesa.git] / src / gallium / drivers / nv40 / nv40_vertprog.c
index 9f1ee575ce9d0b4cf9ac57b048acca52c3fd236b..f32d4d690ce5cfbb21905d17fb79b997bbe319ac 100644 (file)
@@ -3,14 +3,14 @@
 #include "pipe/p_state.h"
 
 #include "pipe/p_shader_tokens.h"
-#include "tgsi/util/tgsi_parse.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
 
 #include "nv40_context.h"
 #include "nv40_state.h"
 
 /* TODO (at least...):
  *  1. Indexed consts  + ARL
- *  2. Arb. swz/negation
  *  3. NV_vp11, NV_vp2, NV_vp3 features
  *       - extra arith opcodes
  *       - branching
 #define neg(s) nv40_sr_neg((s))
 #define abs(s) nv40_sr_abs((s))
 
+#define NV40_VP_INST_DEST_CLIP(n) ((~0 - 6) + (n))
+
 struct nv40_vpc {
        struct nv40_vertex_program *vp;
 
        struct nv40_vertex_program_exec *vpi;
 
-       unsigned output_map[PIPE_MAX_SHADER_OUTPUTS];
-
-       int high_temp;
-       int temp_temp_count;
+       unsigned r_temps;
+       unsigned r_temps_discard;
+       struct nv40_sreg r_result[PIPE_MAX_SHADER_OUTPUTS];
+       struct nv40_sreg *r_address;
+       struct nv40_sreg *r_temp;
 
        struct nv40_sreg *imm;
        unsigned nr_imm;
+
+       unsigned hpos_idx;
 };
 
 static struct nv40_sreg
 temp(struct nv40_vpc *vpc)
 {
-       int idx;
+       int idx = ffs(~vpc->r_temps) - 1;
 
-       idx  = vpc->temp_temp_count++;
-       idx += vpc->high_temp + 1;
+       if (idx < 0) {
+               NOUVEAU_ERR("out of temps!!\n");
+               assert(0);
+               return nv40_sr(NV40SR_TEMP, 0);
+       }
+
+       vpc->r_temps |= (1 << idx);
+       vpc->r_temps_discard |= (1 << idx);
        return nv40_sr(NV40SR_TEMP, idx);
 }
 
+static INLINE void
+release_temps(struct nv40_vpc *vpc)
+{
+       vpc->r_temps &= ~vpc->r_temps_discard;
+       vpc->r_temps_discard = 0;
+}
+
 static struct nv40_sreg
 constant(struct nv40_vpc *vpc, int pipe, float x, float y, float z, float w)
 {
@@ -186,6 +204,36 @@ emit_dst(struct nv40_vpc *vpc, uint32_t *hw, int slot, struct nv40_sreg dst)
                case NV40_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break;
                case NV40_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break;
                case NV40_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break;
+               case NV40_VP_INST_DEST_CLIP(0):
+                       vp->or |= (1 << 6);
+                       vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE0;
+                       dst.index = NV40_VP_INST_DEST_FOGC;
+                       break;
+               case NV40_VP_INST_DEST_CLIP(1):
+                       vp->or |= (1 << 7);
+                       vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE1;
+                       dst.index = NV40_VP_INST_DEST_FOGC;
+                       break;
+               case NV40_VP_INST_DEST_CLIP(2):
+                       vp->or |= (1 << 8);
+                       vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE2;
+                       dst.index = NV40_VP_INST_DEST_FOGC;
+                       break;
+               case NV40_VP_INST_DEST_CLIP(3):
+                       vp->or |= (1 << 9);
+                       vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE3;
+                       dst.index = NV40_VP_INST_DEST_PSZ;
+                       break;
+               case NV40_VP_INST_DEST_CLIP(4):
+                       vp->or |= (1 << 10);
+                       vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE4;
+                       dst.index = NV40_VP_INST_DEST_PSZ;
+                       break;
+               case NV40_VP_INST_DEST_CLIP(5):
+                       vp->or |= (1 << 11);
+                       vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE5;
+                       dst.index = NV40_VP_INST_DEST_PSZ;
+                       break;
                default:
                        break;
                }
@@ -257,9 +305,7 @@ tgsi_src(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
                src = vpc->imm[fsrc->SrcRegister.Index];
                break;
        case TGSI_FILE_TEMPORARY:
-               if (vpc->high_temp < fsrc->SrcRegister.Index)
-                       vpc->high_temp = fsrc->SrcRegister.Index;
-               src = nv40_sr(NV40SR_TEMP, fsrc->SrcRegister.Index);
+               src = vpc->r_temp[fsrc->SrcRegister.Index];
                break;
        default:
                NOUVEAU_ERR("bad src file\n");
@@ -281,14 +327,13 @@ tgsi_dst(struct nv40_vpc *vpc, const struct tgsi_full_dst_register *fdst) {
 
        switch (fdst->DstRegister.File) {
        case TGSI_FILE_OUTPUT:
-               dst = nv40_sr(NV40SR_OUTPUT,
-                             vpc->output_map[fdst->DstRegister.Index]);
-
+               dst = vpc->r_result[fdst->DstRegister.Index];
                break;
        case TGSI_FILE_TEMPORARY:
-               dst = nv40_sr(NV40SR_TEMP, fdst->DstRegister.Index);
-               if (vpc->high_temp < dst.index)
-                       vpc->high_temp = dst.index;
+               dst = vpc->r_temp[fdst->DstRegister.Index];
+               break;
+       case TGSI_FILE_ADDRESS:
+               dst = vpc->r_address[fdst->DstRegister.Index];
                break;
        default:
                NOUVEAU_ERR("bad dst file\n");
@@ -310,6 +355,66 @@ tgsi_mask(uint tgsi)
        return mask;
 }
 
+static boolean
+src_native_swz(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc,
+              struct nv40_sreg *src)
+{
+       const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0);
+       struct nv40_sreg tgsi = tgsi_src(vpc, fsrc);
+       uint mask = 0, zero_mask = 0, one_mask = 0, neg_mask = 0;
+       uint neg[4] = { fsrc->SrcRegisterExtSwz.NegateX,
+                       fsrc->SrcRegisterExtSwz.NegateY,
+                       fsrc->SrcRegisterExtSwz.NegateZ,
+                       fsrc->SrcRegisterExtSwz.NegateW };
+       uint c;
+
+       for (c = 0; c < 4; c++) {
+               switch (tgsi_util_get_full_src_register_extswizzle(fsrc, c)) {
+               case TGSI_EXTSWIZZLE_X:
+               case TGSI_EXTSWIZZLE_Y:
+               case TGSI_EXTSWIZZLE_Z:
+               case TGSI_EXTSWIZZLE_W:
+                       mask |= tgsi_mask(1 << c);
+                       break;
+               case TGSI_EXTSWIZZLE_ZERO:
+                       zero_mask |= tgsi_mask(1 << c);
+                       tgsi.swz[c] = SWZ_X;
+                       break;
+               case TGSI_EXTSWIZZLE_ONE:
+                       one_mask |= tgsi_mask(1 << c);
+                       tgsi.swz[c] = SWZ_X;
+                       break;
+               default:
+                       assert(0);
+               }
+
+               if (!tgsi.negate && neg[c])
+                       neg_mask |= tgsi_mask(1 << c);
+       }
+
+       if (mask == MASK_ALL && !neg_mask)
+               return TRUE;
+
+       *src = temp(vpc);
+
+       if (mask)
+               arith(vpc, 0, OP_MOV, *src, mask, tgsi, none, none);
+
+       if (zero_mask)
+               arith(vpc, 0, OP_SFL, *src, zero_mask, *src, none, none);
+
+       if (one_mask)
+               arith(vpc, 0, OP_STR, *src, one_mask, *src, none, none);
+
+       if (neg_mask) {
+               struct nv40_sreg one = temp(vpc);
+               arith(vpc, 0, OP_STR, one, neg_mask, one, none, none);
+               arith(vpc, 0, OP_MUL, *src, neg_mask, *src, neg(one), none);
+       }
+
+       return FALSE;
+}
+
 static boolean
 nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
                                const struct tgsi_full_instruction *finst)
@@ -317,13 +422,12 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
        struct nv40_sreg src[3], dst, tmp;
        struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0);
        int mask;
-       int ai = -1, ci = -1;
+       int ai = -1, ci = -1, ii = -1;
        int i;
 
        if (finst->Instruction.Opcode == TGSI_OPCODE_END)
                return TRUE;
 
-       vpc->temp_temp_count = 0;
        for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
                const struct tgsi_full_src_register *fsrc;
 
@@ -337,6 +441,18 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
                const struct tgsi_full_src_register *fsrc;
 
                fsrc = &finst->FullSrcRegisters[i];
+
+               switch (fsrc->SrcRegister.File) {
+               case TGSI_FILE_INPUT:
+               case TGSI_FILE_CONSTANT:
+               case TGSI_FILE_TEMPORARY:
+                       if (!src_native_swz(vpc, fsrc, &src[i]))
+                               continue;
+                       break;
+               default:
+                       break;
+               }
+
                switch (fsrc->SrcRegister.File) {
                case TGSI_FILE_INPUT:
                        if (ai == -1 || ai == fsrc->SrcRegister.Index) {
@@ -348,12 +464,9 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
                                      tgsi_src(vpc, fsrc), none, none);
                        }
                        break;
-               /*XXX: index comparison is broken now that consts come from
-                *     two different register files.
-                */
                case TGSI_FILE_CONSTANT:
-               case TGSI_FILE_IMMEDIATE:
-                       if (ci == -1 || ci == fsrc->SrcRegister.Index) {
+                       if ((ci == -1 && ii == -1) ||
+                           ci == fsrc->SrcRegister.Index) {
                                ci = fsrc->SrcRegister.Index;
                                src[i] = tgsi_src(vpc, fsrc);
                        } else {
@@ -362,6 +475,17 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
                                      tgsi_src(vpc, fsrc), none, none);
                        }
                        break;
+               case TGSI_FILE_IMMEDIATE:
+                       if ((ci == -1 && ii == -1) ||
+                           ii == fsrc->SrcRegister.Index) {
+                               ii = fsrc->SrcRegister.Index;
+                               src[i] = tgsi_src(vpc, fsrc);
+                       } else {
+                               src[i] = temp(vpc);
+                               arith(vpc, 0, OP_MOV, src[i], MASK_ALL,
+                                     tgsi_src(vpc, fsrc), none, none);
+                       }
+                       break;
                case TGSI_FILE_TEMPORARY:
                        /* handled above */
                        break;
@@ -447,7 +571,7 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
        case TGSI_OPCODE_RET:
                break;
        case TGSI_OPCODE_RSQ:
-               arith(vpc, 1, OP_RSQ, dst, mask, none, none, src[0]);
+               arith(vpc, 1, OP_RSQ, dst, mask, none, none, abs(src[0]));
                break;
        case TGSI_OPCODE_SGE:
                arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none);
@@ -471,6 +595,7 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
                return FALSE;
        }
 
+       release_temps(vpc);
        return TRUE;
 }
 
@@ -478,11 +603,13 @@ static boolean
 nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc,
                                const struct tgsi_full_declaration *fdec)
 {
+       unsigned idx = fdec->DeclarationRange.First;
        int hw;
 
        switch (fdec->Semantic.SemanticName) {
        case TGSI_SEMANTIC_POSITION:
                hw = NV40_VP_INST_DEST_POS;
+               vpc->hpos_idx = idx;
                break;
        case TGSI_SEMANTIC_COLOR:
                if (fdec->Semantic.SemanticIndex == 0) {
@@ -525,7 +652,7 @@ nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc,
                return FALSE;
        }
 
-       vpc->output_map[fdec->u.DeclarationRange.First] = hw;
+       vpc->r_result[idx] = nv40_sr(NV40SR_OUTPUT, hw);
        return TRUE;
 }
 
@@ -533,7 +660,7 @@ static boolean
 nv40_vertprog_prepare(struct nv40_vpc *vpc)
 {
        struct tgsi_parse_context p;
-       int nr_imm = 0;
+       int high_temp = -1, high_addr = -1, nr_imm = 0, i;
 
        tgsi_parse_init(&p, vpc->vp->pipe.tokens);
        while (!tgsi_parse_end_of_tokens(&p)) {
@@ -544,6 +671,52 @@ nv40_vertprog_prepare(struct nv40_vpc *vpc)
                case TGSI_TOKEN_TYPE_IMMEDIATE:
                        nr_imm++;
                        break;
+               case TGSI_TOKEN_TYPE_DECLARATION:
+               {
+                       const struct tgsi_full_declaration *fdec;
+
+                       fdec = &p.FullToken.FullDeclaration;
+                       switch (fdec->Declaration.File) {
+                       case TGSI_FILE_TEMPORARY:
+                               if (fdec->DeclarationRange.Last > high_temp) {
+                                       high_temp =
+                                               fdec->DeclarationRange.Last;
+                               }
+                               break;
+#if 0 /* this would be nice.. except gallium doesn't track it */
+                       case TGSI_FILE_ADDRESS:
+                               if (fdec->DeclarationRange.Last > high_addr) {
+                                       high_addr =
+                                               fdec->DeclarationRange.Last;
+                               }
+                               break;
+#endif
+                       case TGSI_FILE_OUTPUT:
+                               if (!nv40_vertprog_parse_decl_output(vpc, fdec))
+                                       return FALSE;
+                               break;
+                       default:
+                               break;
+                       }
+               }
+                       break;
+#if 1 /* yay, parse instructions looking for address regs instead */
+               case TGSI_TOKEN_TYPE_INSTRUCTION:
+               {
+                       const struct tgsi_full_instruction *finst;
+                       const struct tgsi_full_dst_register *fdst;
+
+                       finst = &p.FullToken.FullInstruction;
+                       fdst = &finst->FullDstRegisters[0];
+
+                       if (fdst->DstRegister.File == TGSI_FILE_ADDRESS) {
+                               if (fdst->DstRegister.Index > high_addr)
+                                       high_addr = fdst->DstRegister.Index;
+                       }
+               
+               }
+                       break;
+#endif
                default:
                        break;
                }
@@ -555,6 +728,19 @@ nv40_vertprog_prepare(struct nv40_vpc *vpc)
                assert(vpc->imm);
        }
 
+       if (++high_temp) {
+               vpc->r_temp = CALLOC(high_temp, sizeof(struct nv40_sreg));
+               for (i = 0; i < high_temp; i++)
+                       vpc->r_temp[i] = temp(vpc);
+       }
+
+       if (++high_addr) {
+               vpc->r_address = CALLOC(high_addr, sizeof(struct nv40_sreg));
+               for (i = 0; i < high_addr; i++)
+                       vpc->r_address[i] = temp(vpc);
+       }
+
+       vpc->r_temps_discard = 0;
        return TRUE;
 }
 
@@ -564,45 +750,41 @@ nv40_vertprog_translate(struct nv40_context *nv40,
 {
        struct tgsi_parse_context parse;
        struct nv40_vpc *vpc = NULL;
+       struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0);
+       int i;
 
        vpc = CALLOC(1, sizeof(struct nv40_vpc));
        if (!vpc)
                return;
        vpc->vp = vp;
-       vpc->high_temp = -1;
 
        if (!nv40_vertprog_prepare(vpc)) {
-               free(vpc);
+               FREE(vpc);
                return;
        }
 
+       /* Redirect post-transform vertex position to a temp if user clip
+        * planes are enabled.  We need to append code the the vtxprog
+        * to handle clip planes later.
+        */
+       if (vp->ucp.nr)  {
+               vpc->r_result[vpc->hpos_idx] = temp(vpc);
+               vpc->r_temps_discard = 0;
+       }
+
        tgsi_parse_init(&parse, vp->pipe.tokens);
 
        while (!tgsi_parse_end_of_tokens(&parse)) {
                tgsi_parse_token(&parse);
 
                switch (parse.FullToken.Token.Type) {
-               case TGSI_TOKEN_TYPE_DECLARATION:
-               {
-                       const struct tgsi_full_declaration *fdec;
-                       fdec = &parse.FullToken.FullDeclaration;
-                       switch (fdec->Declaration.File) {
-                       case TGSI_FILE_OUTPUT:
-                               if (!nv40_vertprog_parse_decl_output(vpc, fdec))
-                                       goto out_err;
-                               break;
-                       default:
-                               break;
-                       }
-               }
-                       break;
                case TGSI_TOKEN_TYPE_IMMEDIATE:
                {
                        const struct tgsi_full_immediate *imm;
 
                        imm = &parse.FullToken.FullImmediate;
                        assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
-//                     assert(imm->Immediate.Size == 4);
+                       assert(imm->Immediate.NrTokens == 4 + 1);
                        vpc->imm[vpc->nr_imm++] =
                                constant(vpc, -1,
                                         imm->u.ImmediateFloat32[0].Float,
@@ -624,18 +806,57 @@ nv40_vertprog_translate(struct nv40_context *nv40,
                }
        }
 
+       /* Write out HPOS if it was redirected to a temp earlier */
+       if (vpc->r_result[vpc->hpos_idx].type != NV40SR_OUTPUT) {
+               struct nv40_sreg hpos = nv40_sr(NV40SR_OUTPUT,
+                                               NV40_VP_INST_DEST_POS);
+               struct nv40_sreg htmp = vpc->r_result[vpc->hpos_idx];
+
+               arith(vpc, 0, OP_MOV, hpos, MASK_ALL, htmp, none, none);
+       }
+
+       /* Insert code to handle user clip planes */
+       for (i = 0; i < vp->ucp.nr; i++) {
+               struct nv40_sreg cdst = nv40_sr(NV40SR_OUTPUT,
+                                               NV40_VP_INST_DEST_CLIP(i));
+               struct nv40_sreg ceqn = constant(vpc, -1,
+                                                nv40->clip.ucp[i][0],
+                                                nv40->clip.ucp[i][1],
+                                                nv40->clip.ucp[i][2],
+                                                nv40->clip.ucp[i][3]);
+               struct nv40_sreg htmp = vpc->r_result[vpc->hpos_idx];
+               unsigned mask;
+
+               switch (i) {
+               case 0: case 3: mask = MASK_Y; break;
+               case 1: case 4: mask = MASK_Z; break;
+               case 2: case 5: mask = MASK_W; break;
+               default:
+                       NOUVEAU_ERR("invalid clip dist #%d\n", i);
+                       goto out_err;
+               }
+
+               arith(vpc, 0, OP_DP4, cdst, mask, htmp, ceqn, none);
+       }
+
        vp->insns[vp->nr_insns - 1].data[3] |= NV40_VP_INST_LAST;
        vp->translated = TRUE;
 out_err:
        tgsi_parse_free(&parse);
-       free(vpc);
+       if (vpc->r_temp)
+               FREE(vpc->r_temp); 
+       if (vpc->r_address)
+               FREE(vpc->r_address); 
+       if (vpc->imm)   
+               FREE(vpc->imm); 
+       FREE(vpc);
 }
 
 static boolean
 nv40_vertprog_validate(struct nv40_context *nv40)
 { 
-       struct nouveau_winsys *nvws = nv40->nvws;
        struct pipe_winsys *ws = nv40->pipe.winsys;
+       struct nouveau_grobj *curie = nv40->screen->curie;
        struct nv40_vertex_program *vp;
        struct pipe_buffer *constbuf;
        boolean upload_code = FALSE, upload_data = FALSE;
@@ -644,6 +865,12 @@ nv40_vertprog_validate(struct nv40_context *nv40)
        if (nv40->render_mode == HW) {
                vp = nv40->vertprog;
                constbuf = nv40->constbuf[PIPE_SHADER_VERTEX];
+
+               if ((nv40->dirty & NV40_NEW_UCP) ||
+                   memcmp(&nv40->clip, &vp->ucp, sizeof(vp->ucp))) {
+                       nv40_vertprog_destroy(nv40, vp);
+                       memcpy(&vp->ucp, &nv40->clip, sizeof(vp->ucp));
+               }
        } else {
                vp = nv40->swtnl.vertprog;
                constbuf = NULL;
@@ -667,25 +894,28 @@ check_gpu_resources:
                struct nouveau_stateobj *so;
                uint vplen = vp->nr_insns;
 
-               if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) {
+               if (nouveau_resource_alloc(heap, vplen, vp, &vp->exec)) {
                        while (heap->next && heap->size < vplen) {
                                struct nv40_vertex_program *evict;
                                
                                evict = heap->next->priv;
-                               nvws->res_free(&evict->exec);
+                               nouveau_resource_free(&evict->exec);
                        }
 
-                       if (nvws->res_alloc(heap, vplen, vp, &vp->exec))
+                       if (nouveau_resource_alloc(heap, vplen, vp, &vp->exec))
                                assert(0);
                }
 
-               so = so_new(5, 0);
-               so_method(so, nv40->screen->curie, NV40TCL_VP_START_FROM_ID, 1);
+               so = so_new(7, 0);
+               so_method(so, curie, NV40TCL_VP_START_FROM_ID, 1);
                so_data  (so, vp->exec->start);
-               so_method(so, nv40->screen->curie, NV40TCL_VP_ATTRIB_EN, 2);
+               so_method(so, curie, NV40TCL_VP_ATTRIB_EN, 2);
                so_data  (so, vp->ir);
                so_data  (so, vp->or);
+               so_method(so, curie,  NV40TCL_CLIP_PLANE_ENABLE, 1);
+               so_data  (so, vp->clip_ctrl);
                so_ref(so, &vp->so);
+               so_ref(NULL, &so);
 
                upload_code = TRUE;
        }
@@ -694,15 +924,15 @@ check_gpu_resources:
        if (vp->nr_consts && !vp->data) {
                struct nouveau_resource *heap = nv40->screen->vp_data_heap;
 
-               if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) {
+               if (nouveau_resource_alloc(heap, vp->nr_consts, vp, &vp->data)) {
                        while (heap->next && heap->size < vp->nr_consts) {
                                struct nv40_vertex_program *evict;
                                
                                evict = heap->next->priv;
-                               nvws->res_free(&evict->data);
+                               nouveau_resource_free(&evict->data);
                        }
 
-                       if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data))
+                       if (nouveau_resource_alloc(heap, vp->nr_consts, vp, &vp->data))
                                assert(0);
                }
 
@@ -804,16 +1034,34 @@ check_gpu_resources:
 void
 nv40_vertprog_destroy(struct nv40_context *nv40, struct nv40_vertex_program *vp)
 {
-       if (vp->nr_consts)
-               free(vp->consts);
-       if (vp->nr_insns)
-               free(vp->insns);
+       vp->translated = FALSE;
+
+       if (vp->nr_insns) {
+               FREE(vp->insns);
+               vp->insns = NULL;
+               vp->nr_insns = 0;
+       }
+
+       if (vp->nr_consts) {
+               FREE(vp->consts);
+               vp->consts = NULL;
+               vp->nr_consts = 0;
+       }
+
+       nouveau_resource_free(&vp->exec);
+       vp->exec_start = 0;
+       nouveau_resource_free(&vp->data);
+       vp->data_start = 0;
+       vp->data_start_min = 0;
+
+       vp->ir = vp->or = vp->clip_ctrl = 0;
+       so_ref(NULL, &vp->so);
 }
 
 struct nv40_state_entry nv40_state_vertprog = {
        .validate = nv40_vertprog_validate,
        .dirty = {
-               .pipe = NV40_NEW_VERTPROG,
+               .pipe = NV40_NEW_VERTPROG | NV40_NEW_UCP,
                .hw = NV40_STATE_VERTPROG,
        }
 };