nv50: negate sources directly where supported
[mesa.git] / src / gallium / drivers / nv50 / nv50_program.c
index 30aa35886041f905e101085229250e87d8aa5c7b..aada285f2c006785178fc6ce7e01908845693b11 100644 (file)
@@ -214,6 +214,22 @@ assimilate_temp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
        FREE(src);
 }
 
+/* release the hardware resource held by r */
+static void
+release_hw(struct nv50_pc *pc, struct nv50_reg *r)
+{
+       assert(r->type == P_TEMP);
+       if (r->hw == -1)
+               return;
+
+       assert(pc->r_temp[r->hw] == r);
+       pc->r_temp[r->hw] = NULL;
+
+       r->acc = 0;
+       if (r->index == -1)
+               FREE(r);
+}
+
 static void
 free_temp(struct nv50_pc *pc, struct nv50_reg *r)
 {
@@ -393,8 +409,8 @@ set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e)
 static INLINE void
 set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e)
 {
-       assert(imm->hw >= pc->param_nr * 4);
-       unsigned val = fui(pc->immd_buf[imm->hw - pc->param_nr * 4]);
+       float f = pc->immd_buf[imm->hw];
+       unsigned val = fui(imm->neg ? -f : f);
 
        set_long(pc, e);
        /*XXX: can't be predicated - bits overlap.. catch cases where both
@@ -446,22 +462,12 @@ set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s,
         struct nv50_program_exec *e)
 {
        set_long(pc, e);
-#if 1
-       e->inst[1] |= (1 << 22);
-#else
-       if (src->type == P_IMMD) {
-               e->inst[1] |= (NV50_CB_PMISC << 22);
-       } else {
-               if (pc->p->type == PIPE_SHADER_VERTEX)
-                       e->inst[1] |= (NV50_CB_PVP << 22);
-               else
-                       e->inst[1] |= (NV50_CB_PFP << 22);
-       }
-#endif
 
        e->param.index = src->hw;
        e->param.shift = s;
        e->param.mask = m << (s % 32);
+
+       e->inst[1] |= (((src->type == P_IMMD) ? 0 : 1) << 22);
 }
 
 static void
@@ -622,10 +628,19 @@ emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
        check_swap_src_0_1(pc, &src0, &src1);
        set_dst(pc, dst, e);
        set_src_0(pc, src0, e);
-       if (src1->type == P_IMMD && !is_long(e))
+       if (src1->type == P_IMMD && !is_long(e)) {
+               if (src0->neg)
+                       e->inst[0] |= 0x00008000;
                set_immd(pc, src1, e);
-       else
+       } else {
                set_src_1(pc, src1, e);
+               if (src0->neg ^ src1->neg) {
+                       if (is_long(e))
+                               e->inst[1] |= 0x08000000;
+                       else
+                               e->inst[0] |= 0x00008000;
+               }
+       }
 
        emit(pc, e);
 }
@@ -638,13 +653,16 @@ emit_add(struct nv50_pc *pc, struct nv50_reg *dst,
 
        e->inst[0] |= 0xb0000000;
 
-       if (!pc->allow32)
+       check_swap_src_0_1(pc, &src0, &src1);
+
+       if (!pc->allow32 || src0->neg || src1->neg) {
                set_long(pc, e);
+               e->inst[1] |= (src0->neg << 26) | (src1->neg << 27);
+       }
 
-       check_swap_src_0_1(pc, &src0, &src1);
        set_dst(pc, dst, e);
        set_src_0(pc, src0, e);
-       if (is_long(e) || src1->type == P_CONST || src1->type == P_ATTR)
+       if (src1->type == P_CONST || src1->type == P_ATTR || is_long(e))
                set_src_2(pc, src1, e);
        else
        if (src1->type == P_IMMD)
@@ -673,25 +691,13 @@ emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst,
        emit(pc, e);
 }
 
-static void
+static INLINE void
 emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
         struct nv50_reg *src1)
 {
-       struct nv50_program_exec *e = exec(pc);
-
-       e->inst[0] |= 0xb0000000;
-
-       set_long(pc, e);
-       if (check_swap_src_0_1(pc, &src0, &src1))
-               e->inst[1] |= 0x04000000;
-       else
-               e->inst[1] |= 0x08000000;
-
-       set_dst(pc, dst, e);
-       set_src_0(pc, src0, e);
-       set_src_2(pc, src1, e);
-
-       emit(pc, e);
+       src1->neg ^= 1;
+       emit_add(pc, dst, src0, src1);
+       src1->neg ^= 1;
 }
 
 static void
@@ -708,26 +714,21 @@ emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
        set_src_1(pc, src1, e);
        set_src_2(pc, src2, e);
 
+       if (src0->neg ^ src1->neg)
+               e->inst[1] |= 0x04000000;
+       if (src2->neg)
+               e->inst[1] |= 0x08000000;
+
        emit(pc, e);
 }
 
-static void
+static INLINE void
 emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
         struct nv50_reg *src1, struct nv50_reg *src2)
 {
-       struct nv50_program_exec *e = exec(pc);
-
-       e->inst[0] |= 0xe0000000;
-       set_long(pc, e);
-       e->inst[1] |= 0x08000000; /* src0 * src1 - src2 */
-
-       check_swap_src_0_1(pc, &src0, &src1);
-       set_dst(pc, dst, e);
-       set_src_0(pc, src0, e);
-       set_src_1(pc, src1, e);
-       set_src_2(pc, src2, e);
-
-       emit(pc, e);
+       src2->neg ^= 1;
+       emit_mad(pc, dst, src0, src1, src2);
+       src2->neg ^= 1;
 }
 
 static void
@@ -778,6 +779,48 @@ emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
        emit(pc, e);
 }
 
+#define CVTOP_RN       0x01
+#define CVTOP_FLOOR    0x03
+#define CVTOP_CEIL     0x05
+#define CVTOP_TRUNC    0x07
+#define CVTOP_SAT      0x08
+#define CVTOP_ABS      0x10
+
+#define CVT_F32_F32 0xc4
+#define CVT_F32_S32 0x44
+#define CVT_F32_U32 0x64
+#define CVT_S32_F32 0x8c
+#define CVT_S32_S32 0x0c
+#define CVT_F32_F32_ROP 0xcc
+
+static void
+emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src,
+        int wp, unsigned cop, unsigned fmt)
+{
+       struct nv50_program_exec *e;
+
+       e = exec(pc);
+       set_long(pc, e);
+
+       e->inst[0] |= 0xa0000000;
+       e->inst[1] |= 0x00004000;
+       e->inst[1] |= (cop << 16);
+       e->inst[1] |= (fmt << 24);
+       set_src_0(pc, src, e);
+
+       if (wp >= 0)
+               set_pred_wr(pc, 1, wp, e);
+
+       if (dst)
+               set_dst(pc, dst, e);
+       else {
+               e->inst[0] |= 0x000001fc;
+               e->inst[1] |= 0x00000008;
+       }
+
+       emit(pc, e);
+}
+
 static void
 emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst,
         struct nv50_reg *src0, struct nv50_reg *src1)
@@ -821,22 +864,10 @@ emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst,
                free_temp(pc, dst);
 }
 
-static void
+static INLINE void
 emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
 {
-       struct nv50_program_exec *e = exec(pc);
-
-       e->inst[0] = 0xa0000000; /* cvt */
-       set_long(pc, e);
-       e->inst[1] |= (6 << 29); /* cvt */
-       e->inst[1] |= 0x08000000; /* integer mode */
-       e->inst[1] |= 0x04000000; /* 32 bit */
-       e->inst[1] |= ((0x1 << 3)) << 14; /* .rn */
-       e->inst[1] |= (1 << 14); /* src .f32 */
-       set_dst(pc, dst, e);
-       set_src_0(pc, src, e);
-
-       emit(pc, e);
+       emit_cvt(pc, dst, src, -1, CVTOP_FLOOR, CVT_F32_F32_ROP);
 }
 
 static void
@@ -853,21 +884,10 @@ emit_pow(struct nv50_pc *pc, struct nv50_reg *dst,
        free_temp(pc, temp);
 }
 
-static void
+static INLINE void
 emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
 {
-       struct nv50_program_exec *e = exec(pc);
-
-       e->inst[0] = 0xa0000000; /* cvt */
-       set_long(pc, e);
-       e->inst[1] |= (6 << 29); /* cvt */
-       e->inst[1] |= 0x04000000; /* 32 bit */
-       e->inst[1] |= (1 << 14); /* src .f32 */
-       e->inst[1] |= ((1 << 6) << 14); /* .abs */
-       set_dst(pc, dst, e);
-       set_src_0(pc, src, e);
-
-       emit(pc, e);
+       emit_cvt(pc, dst, src, -1, CVTOP_ABS, CVT_F32_F32);
 }
 
 static void
@@ -952,6 +972,8 @@ emit_kil(struct nv50_pc *pc, struct nv50_reg *src)
        e->inst[1] = 0xc4014788;
        set_src_0(pc, src, e);
        set_pred_wr(pc, 1, r_pred, e);
+       if (src->neg)
+               e->inst[1] |= 0x20000000;
        emit(pc, e);
 
        /* This is probably KILP */
@@ -962,6 +984,113 @@ emit_kil(struct nv50_pc *pc, struct nv50_reg *src)
        emit(pc, e);
 }
 
+static void
+emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
+        struct nv50_reg **src, unsigned unit, unsigned type, boolean proj)
+{
+       struct nv50_reg *temp, *t[4];
+       struct nv50_program_exec *e;
+
+       unsigned c, mode, dim;
+
+       switch (type) {
+       case TGSI_TEXTURE_1D:
+               dim = 1;
+               break;
+       case TGSI_TEXTURE_UNKNOWN:
+       case TGSI_TEXTURE_2D:
+       case TGSI_TEXTURE_SHADOW1D: /* XXX: x, z */
+       case TGSI_TEXTURE_RECT:
+               dim = 2;
+               break;
+       case TGSI_TEXTURE_3D:
+       case TGSI_TEXTURE_CUBE:
+       case TGSI_TEXTURE_SHADOW2D:
+       case TGSI_TEXTURE_SHADOWRECT: /* XXX */
+               dim = 3;
+               break;
+       default:
+               assert(0);
+               break;
+       }
+
+       alloc_temp4(pc, t, 0);
+
+       if (proj) {
+               if (src[0]->type == P_TEMP && src[0]->rhw != -1) {
+                       mode = pc->interp_mode[src[0]->index];
+
+                       t[3]->rhw = src[3]->rhw;
+                       emit_interp(pc, t[3], NULL, (mode & INTERP_CENTROID));
+                       emit_flop(pc, 0, t[3], t[3]);
+
+                       for (c = 0; c < dim; c++) {
+                               t[c]->rhw = src[c]->rhw;
+                               emit_interp(pc, t[c], t[3],
+                                           (mode | INTERP_PERSPECTIVE));
+                       }
+               } else {
+                       emit_flop(pc, 0, t[3], src[3]);
+                       for (c = 0; c < dim; c++)
+                               emit_mul(pc, t[c], src[c], t[3]);
+
+                       /* XXX: for some reason the blob sometimes uses MAD:
+                        * emit_mad(pc, t[c], src[0][c], t[3], t[3])
+                        * pc->p->exec_tail->inst[1] |= 0x080fc000;
+                        */
+               }
+       } else {
+               if (type == TGSI_TEXTURE_CUBE) {
+                       temp = temp_temp(pc);
+                       emit_minmax(pc, 4, temp, src[0], src[1]);
+                       emit_minmax(pc, 4, temp, temp, src[2]);
+                       emit_flop(pc, 0, temp, temp);
+                       for (c = 0; c < 3; c++)
+                               emit_mul(pc, t[c], src[c], temp);
+               } else {
+                       for (c = 0; c < dim; c++)
+                               emit_mov(pc, t[c], src[c]);
+               }
+       }
+
+       e = exec(pc);
+       set_long(pc, e);
+       e->inst[0] |= 0xf0000000;
+       e->inst[1] |= 0x00000004;
+       set_dst(pc, t[0], e);
+       e->inst[0] |= (unit << 9);
+
+       if (dim == 2)
+               e->inst[0] |= 0x00400000;
+       else
+       if (dim == 3)
+               e->inst[0] |= 0x00800000;
+
+       e->inst[0] |= (mask & 0x3) << 25;
+       e->inst[1] |= (mask & 0xc) << 12;
+
+       emit(pc, e);
+
+#if 1
+       if (mask & 1) emit_mov(pc, dst[0], t[0]);
+       if (mask & 2) emit_mov(pc, dst[1], t[1]);
+       if (mask & 4) emit_mov(pc, dst[2], t[2]);
+       if (mask & 8) emit_mov(pc, dst[3], t[3]);
+
+       free_temp4(pc, t);
+#else
+       /* XXX: if p.e. MUL is used directly after TEX, it would still use
+        * the texture coordinates, not the fetched values: latency ? */
+
+       for (c = 0; c < 4; c++) {
+               if (mask & (1 << c))
+                       assimilate_temp(pc, dst[c], t[c]);
+               else
+                       free_temp(pc, t[c]);
+       }
+#endif
+}
+
 static void
 convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e)
 {
@@ -1010,6 +1139,25 @@ convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e)
        e->inst[1] |= q;
 }
 
+static boolean
+negate_supported(const struct tgsi_full_instruction *insn, int i)
+{
+       switch (insn->Instruction.Opcode) {
+       case TGSI_OPCODE_DP3:
+       case TGSI_OPCODE_DP4:
+       case TGSI_OPCODE_MUL:
+       case TGSI_OPCODE_KIL:
+       case TGSI_OPCODE_ADD:
+       case TGSI_OPCODE_SUB:
+       case TGSI_OPCODE_MAD:
+               return TRUE;
+       case TGSI_OPCODE_POW:
+               return (i == 1) ? TRUE : FALSE;
+       default:
+               return FALSE;
+       }
+}
+
 static struct nv50_reg *
 tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst)
 {
@@ -1028,7 +1176,8 @@ tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst)
 }
 
 static struct nv50_reg *
-tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src)
+tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
+        boolean neg)
 {
        struct nv50_reg *r = NULL;
        struct nv50_reg *temp;
@@ -1083,14 +1232,21 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src)
                r = temp;
                break;
        case TGSI_UTIL_SIGN_TOGGLE:
-               temp = temp_temp(pc);
-               emit_neg(pc, temp, r);
-               r = temp;
+               if (neg)
+                       r->neg = 1;
+               else {
+                       temp = temp_temp(pc);
+                       emit_neg(pc, temp, r);
+                       r = temp;
+               }
                break;
        case TGSI_UTIL_SIGN_SET:
                temp = temp_temp(pc);
                emit_abs(pc, temp, r);
-               emit_neg(pc, temp, temp);
+               if (neg)
+                       temp->neg = 1;
+               else
+                       emit_neg(pc, temp, temp);
                r = temp;
                break;
        default:
@@ -1158,7 +1314,8 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
                        unit = fs->SrcRegister.Index;
 
                for (c = 0; c < 4; c++)
-                       src[i][c] = tgsi_src(pc, c, fs);
+                       src[i][c] = tgsi_src(pc, c, fs,
+                                            negate_supported(inst, i));
        }
 
        if (sat) {
@@ -1417,30 +1574,12 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
                }
                break;
        case TGSI_OPCODE_TEX:
+               emit_tex(pc, dst, mask, src[0], unit,
+                        inst->InstructionExtTexture.Texture, FALSE);
+               break;
        case TGSI_OPCODE_TXP:
-       {
-               struct nv50_reg *t[4];
-               struct nv50_program_exec *e;
-
-               alloc_temp4(pc, t, 0);
-               emit_mov(pc, t[0], src[0][0]);
-               emit_mov(pc, t[1], src[0][1]);
-
-               e = exec(pc);
-               e->inst[0] = 0xf6400000;
-               e->inst[0] |= (unit << 9);
-               set_long(pc, e);
-               e->inst[1] |= 0x0000c004;
-               set_dst(pc, t[0], e);
-               emit(pc, e);
-
-               if (mask & (1 << 0)) emit_mov(pc, dst[0], t[0]);
-               if (mask & (1 << 1)) emit_mov(pc, dst[1], t[1]);
-               if (mask & (1 << 2)) emit_mov(pc, dst[2], t[2]);
-               if (mask & (1 << 3)) emit_mov(pc, dst[3], t[3]);
-
-               free_temp4(pc, t);
-       }
+               emit_tex(pc, dst, mask, src[0], unit,
+                        inst->InstructionExtTexture.Texture, TRUE);
                break;
        case TGSI_OPCODE_XPD:
                temp = temp_temp(pc);
@@ -1468,21 +1607,10 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
 
        if (sat) {
                for (c = 0; c < 4; c++) {
-                       struct nv50_program_exec *e;
-
                        if (!(mask & (1 << c)))
                                continue;
-                       e = exec(pc);
-
-                       e->inst[0] = 0xa0000000; /* cvt */
-                       set_long(pc, e);
-                       e->inst[1] |= (6 << 29); /* cvt */
-                       e->inst[1] |= 0x04000000; /* 32 bit */
-                       e->inst[1] |= (1 << 14); /* src .f32 */
-                       e->inst[1] |= ((1 << 5) << 14); /* .sat */
-                       set_dst(pc, rdst[c], e);
-                       set_src_0(pc, dst[c], e);
-                       emit(pc, e);
+                       emit_cvt(pc, rdst[c], dst[c], -1, CVTOP_SAT,
+                                CVT_F32_F32);
                }
        } else if (assimilate) {
                for (c = 0; c < 4; c++)
@@ -1496,6 +1624,9 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
                                continue;
                        if (src[i][c]->index == -1 && src[i][c]->type == P_IMMD)
                                FREE(src[i][c]);
+                       else
+                       if (src[i][c]->acc == pc->insn_cur)
+                               release_hw(pc, src[i][c]);
                }
        }
 
@@ -1933,7 +2064,7 @@ nv50_program_tx_prep(struct nv50_pc *pc)
        }
 
        if (pc->immd_nr) {
-               int rid = pc->param_nr * 4;
+               int rid = 0;
 
                pc->immd = CALLOC(pc->immd_nr * 4, sizeof(struct nv50_reg));
                if (!pc->immd)
@@ -1962,8 +2093,6 @@ out_err:
 static void
 free_nv50_pc(struct nv50_pc *pc)
 {
-       unsigned i;
-
        if (pc->immd)
                FREE(pc->immd);
        if (pc->param)
@@ -1975,12 +2104,6 @@ free_nv50_pc(struct nv50_pc *pc)
        if (pc->temp)
                FREE(pc->temp);
 
-       for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
-               /* deallocate fragment program attributes */
-               if (pc->r_temp[i] && pc->r_temp[i]->index == -1)
-                       FREE(pc->r_temp[i]);
-       }
-
        FREE(pc);
 }
 
@@ -2090,7 +2213,7 @@ nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p)
 
 static void
 nv50_program_upload_data(struct nv50_context *nv50, float *map,
-                        unsigned start, unsigned count)
+                       unsigned start, unsigned count, unsigned cbuf)
 {
        struct nouveau_channel *chan = nv50->screen->nvws->channel;
        struct nouveau_grobj *tesla = nv50->screen->tesla;
@@ -2099,7 +2222,7 @@ nv50_program_upload_data(struct nv50_context *nv50, float *map,
                unsigned nr = count > 2047 ? 2047 : count;
 
                BEGIN_RING(chan, tesla, 0x00000f00, 1);
-               OUT_RING  (chan, (NV50_CB_PMISC << 0) | (start << 8));
+               OUT_RING  (chan, (cbuf << 0) | (start << 8));
                BEGIN_RING(chan, tesla, 0x40000f04, nr);
                OUT_RINGp (chan, map, nr);
 
@@ -2114,35 +2237,50 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p)
 {
        struct nouveau_winsys *nvws = nv50->screen->nvws;
        struct pipe_winsys *ws = nv50->pipe.winsys;
-       unsigned nr = p->param_nr + p->immd_nr;
 
-       if (!p->data && nr) {
-               struct nouveau_resource *heap = nv50->screen->vp_data_heap;
+       if (!p->data[0] && p->immd_nr) {
+               struct nouveau_resource *heap = nv50->screen->immd_heap[0];
 
-               if (nvws->res_alloc(heap, nr, p, &p->data)) {
-                       while (heap->next && heap->size < nr) {
+               if (nvws->res_alloc(heap, p->immd_nr, p, &p->data[0])) {
+                       while (heap->next && heap->size < p->immd_nr) {
                                struct nv50_program *evict = heap->next->priv;
-                               nvws->res_free(&evict->data);
+                               nvws->res_free(&evict->data[0]);
                        }
 
-                       if (nvws->res_alloc(heap, nr, p, &p->data))
+                       if (nvws->res_alloc(heap, p->immd_nr, p, &p->data[0]))
+                               assert(0);
+               }
+
+               /* immediates only need to be uploaded again when freed */
+               nv50_program_upload_data(nv50, p->immd, p->data[0]->start,
+                                        p->immd_nr, NV50_CB_PMISC);
+       }
+
+       if (!p->data[1] && p->param_nr) {
+               struct nouveau_resource *heap =
+                       nv50->screen->parm_heap[p->type];
+
+               if (nvws->res_alloc(heap, p->param_nr, p, &p->data[1])) {
+                       while (heap->next && heap->size < p->param_nr) {
+                               struct nv50_program *evict = heap->next->priv;
+                               nvws->res_free(&evict->data[1]);
+                       }
+
+                       if (nvws->res_alloc(heap, p->param_nr, p, &p->data[1]))
                                assert(0);
                }
        }
 
        if (p->param_nr) {
+               unsigned cbuf = NV50_CB_PVP;
                float *map = ws->buffer_map(ws, nv50->constbuf[p->type],
                                            PIPE_BUFFER_USAGE_CPU_READ);
-               nv50_program_upload_data(nv50, map, p->data->start,
-                                        p->param_nr);
+               if (p->type == PIPE_SHADER_FRAGMENT)
+                       cbuf = NV50_CB_PFP;
+               nv50_program_upload_data(nv50, map, p->data[1]->start,
+                                        p->param_nr, cbuf);
                ws->buffer_unmap(ws, nv50->constbuf[p->type]);
        }
-
-       if (p->immd_nr) {
-               nv50_program_upload_data(nv50, p->immd,
-                                        p->data->start + p->param_nr,
-                                        p->immd_nr);
-       }
 }
 
 static void
@@ -2162,20 +2300,27 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
                upload = TRUE;
        }
 
-       if (p->data && p->data->start != p->data_start) {
+       if ((p->data[0] && p->data[0]->start != p->data_start[0]) ||
+               (p->data[1] && p->data[1]->start != p->data_start[1])) {
                for (e = p->exec_head; e; e = e->next) {
-                       unsigned ei, ci;
+                       unsigned ei, ci, bs;
 
                        if (e->param.index < 0)
                                continue;
+                       bs = (e->inst[1] >> 22) & 0x07;
+                       assert(bs < 2);
                        ei = e->param.shift >> 5;
-                       ci = e->param.index + p->data->start;
+                       ci = e->param.index + p->data[bs]->start;
 
                        e->inst[ei] &= ~e->param.mask;
                        e->inst[ei] |= (ci << e->param.shift);
                }
 
-               p->data_start = p->data->start;
+               if (p->data[0])
+                       p->data_start[0] = p->data[0]->start;
+               if (p->data[1])
+                       p->data_start[1] = p->data[1]->start;
+
                upload = TRUE;
        }
 
@@ -2330,7 +2475,8 @@ nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
        if (p->buffer)
                pipe_buffer_reference(&p->buffer, NULL);
 
-       nv50->screen->nvws->res_free(&p->data);
+       nv50->screen->nvws->res_free(&p->data[0]);
+       nv50->screen->nvws->res_free(&p->data[1]);
 
        p->translated = 0;
 }