nv50: proper linkage between VP and FP
authorChristoph Bumiller <e0425955@student.tuwien.ac.at>
Mon, 14 Sep 2009 18:23:39 +0000 (20:23 +0200)
committerChristoph Bumiller <e0425955@student.tuwien.ac.at>
Tue, 15 Sep 2009 10:13:23 +0000 (12:13 +0200)
This moves construction of the mapping between VP outputs
and FP inputs into validation.

The map also contains slots for special outputs like clip
distance and point size, so we need to at least merge the
VP related and FP related parts on validation if we want
to support those.

Now we match every single FP input component with results
from the VP and leave those not read out of the map, or
replace those not written by 0 (xyz) or 1 (w).
The bitmap indicating linear interpolants is also filled,
and flat FP inputs are mapped in only after non-flat ones,
as is required.

Furthermore, we can save some space by only fetching VP
attrs we actually use, and avoid wasting any output regs
because of TGSI using less than 4 components.

src/gallium/drivers/nv50/nv50_context.h
src/gallium/drivers/nv50/nv50_program.c
src/gallium/drivers/nv50/nv50_program.h
src/gallium/drivers/nv50/nv50_state_validate.c

index 1e9e8e49bfbd2c174cec3d7c2e6f2d3d0320b826..3a5f990e97ef2eaa3e956f381fa35caf158facac 100644 (file)
@@ -116,6 +116,7 @@ struct nv50_state {
        unsigned miptree_nr;
        struct nouveau_stateobj *vertprog;
        struct nouveau_stateobj *fragprog;
+       struct nouveau_stateobj *programs;
        struct nouveau_stateobj *vtxfmt;
        struct nouveau_stateobj *vtxbuf;
        struct nouveau_stateobj *vtxattr;
@@ -190,6 +191,7 @@ extern void nv50_clear(struct pipe_context *pipe, unsigned buffers,
 /* nv50_program.c */
 extern void nv50_vertprog_validate(struct nv50_context *nv50);
 extern void nv50_fragprog_validate(struct nv50_context *nv50);
+extern void nv50_linkage_validate(struct nv50_context *nv50);
 extern void nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p);
 
 /* nv50_state_validate.c */
index 7618ff33759338734adec67da22c51a44a1bb141..7bc8e13d2adf16d0f639c8dbcc61a4f2425f0628 100644 (file)
@@ -139,6 +139,14 @@ ctor_reg(struct nv50_reg *reg, unsigned type, int index, int hw)
        reg->acc = 0;
 }
 
+static INLINE unsigned
+popcnt4(uint32_t val)
+{
+       static const unsigned cnt[16]
+       = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 };
+       return cnt[val & 0xf];
+}
+
 static void
 alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg)
 {
@@ -1975,59 +1983,48 @@ nv50_tgsi_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
        return TRUE;
 }
 
-static unsigned
-load_fp_attrib(struct nv50_pc *pc, int i, int *mid, int *aid, int *p_oid)
+static void
+load_interpolant(struct nv50_pc *pc, struct nv50_reg *reg)
 {
-       struct nv50_reg *iv;
-       int oid, c, n;
-       unsigned mask = 0;
+       struct nv50_reg *iv, **ppiv;
+       unsigned mode = pc->interp_mode[reg->index];
 
-       iv = (pc->interp_mode[i] & INTERP_CENTROID) ? pc->iv_c : pc->iv_p;
+       ppiv = (mode & INTERP_CENTROID) ? &pc->iv_c : &pc->iv_p;
+       iv = *ppiv;
 
-       for (c = 0, n = i * 4; c < 4; c++, n++) {
-               oid = (*p_oid)++;
+       if ((mode & INTERP_PERSPECTIVE) && !iv) {
+               iv = *ppiv = alloc_temp(pc, NULL);
+               iv->rhw = popcnt4(pc->p->cfg.regs[1] >> 24) - 1;
 
-               if (!pc->attr[n].acc)
-                       continue;
-               mask |= (1 << c);
-
-               alloc_reg(pc, &pc->attr[n]);
-
-               pc->attr[n].rhw = (*aid)++;
-               emit_interp(pc, &pc->attr[n], iv, pc->interp_mode[i]);
+               emit_interp(pc, iv, NULL, mode & INTERP_CENTROID);
+               emit_flop(pc, 0, iv, iv);
 
-               pc->p->cfg.fp.map[(*mid) / 4] |= oid << (8 * ((*mid) % 4));
-               (*mid)++;
-               pc->p->cfg.fp.regs[1] += 0x00010001;
+               /* XXX: when loading interpolants dynamically, move these
+                * to the program head, or make sure it can't be skipped.
+                */
        }
 
-       return mask;
+       emit_interp(pc, reg, iv, mode);
 }
 
 static boolean
 nv50_program_tx_prep(struct nv50_pc *pc)
 {
-       struct tgsi_parse_context p;
+       struct tgsi_parse_context tp;
+       struct nv50_program *p = pc->p;
        boolean ret = FALSE;
-       unsigned i, c;
-       unsigned fcol, bcol, fcrd;
-
-       /* count (centroid) perspective interpolations */
-       unsigned centroid_loads = 0;
-       unsigned perspect_loads = 0;
-
-       fcol = bcol = fcrd = ~0;
+       unsigned i, c, flat_nr = 0;
 
-       tgsi_parse_init(&p, pc->p->pipe.tokens);
-       while (!tgsi_parse_end_of_tokens(&p)) {
-               const union tgsi_full_token *tok = &p.FullToken;
+       tgsi_parse_init(&tp, pc->p->pipe.tokens);
+       while (!tgsi_parse_end_of_tokens(&tp)) {
+               const union tgsi_full_token *tok = &tp.FullToken;
 
-               tgsi_parse_token(&p);
+               tgsi_parse_token(&tp);
                switch (tok->Token.Type) {
                case TGSI_TOKEN_TYPE_IMMEDIATE:
                {
                        const struct tgsi_full_immediate *imm =
-                               &p.FullToken.FullImmediate;
+                               &tp.FullToken.FullImmediate;
 
                        ctor_immd(pc, imm->u[0].Float,
                                      imm->u[1].Float,
@@ -2038,9 +2035,9 @@ nv50_program_tx_prep(struct nv50_pc *pc)
                case TGSI_TOKEN_TYPE_DECLARATION:
                {
                        const struct tgsi_full_declaration *d;
-                       unsigned last, first, mode;
+                       unsigned si, last, first, mode;
 
-                       d = &p.FullToken.FullDeclaration;
+                       d = &tp.FullToken.FullDeclaration;
                        first = d->DeclarationRange.First;
                        last = d->DeclarationRange.Last;
 
@@ -2048,43 +2045,41 @@ nv50_program_tx_prep(struct nv50_pc *pc)
                        case TGSI_FILE_TEMPORARY:
                                break;
                        case TGSI_FILE_OUTPUT:
+                               if (!d->Declaration.Semantic ||
+                                   p->type == PIPE_SHADER_FRAGMENT)
+                                       break;
+
+                               si = d->Semantic.SemanticIndex;
+                               switch (d->Semantic.SemanticName) {
+                                       /*
+                               case TGSI_SEMANTIC_CLIP_DISTANCE:
+                                       p->cfg.clpd = MIN2(p->cfg.clpd, first);
+                                       break;
+                                       */
+                               default:
+                                       break;
+                               }
                                break;
                        case TGSI_FILE_INPUT:
                        {
-                               if (pc->p->type != PIPE_SHADER_FRAGMENT)
+                               if (p->type != PIPE_SHADER_FRAGMENT)
                                        break;
 
                                switch (d->Declaration.Interpolate) {
                                case TGSI_INTERPOLATE_CONSTANT:
                                        mode = INTERP_FLAT;
+                                       flat_nr++;
                                        break;
                                case TGSI_INTERPOLATE_PERSPECTIVE:
                                        mode = INTERP_PERSPECTIVE;
+                                       p->cfg.regs[1] |= 0x08 << 24;
                                        break;
                                default:
                                        mode = INTERP_LINEAR;
                                        break;
                                }
-
-                               if (d->Declaration.Semantic) {
-                                       switch (d->Semantic.SemanticName) {
-                                       case TGSI_SEMANTIC_POSITION:
-                                               fcrd = first;
-                                               break;
-                                       case TGSI_SEMANTIC_COLOR:
-                                               fcol = first;
-                                               mode = INTERP_PERSPECTIVE;
-                                               break;
-                                       }
-                               }
-
-                               if (d->Declaration.Centroid) {
+                               if (d->Declaration.Centroid)
                                        mode |= INTERP_CENTROID;
-                                       if (mode & INTERP_PERSPECTIVE)
-                                               centroid_loads++;
-                               } else
-                               if (mode & INTERP_PERSPECTIVE)
-                                       perspect_loads++;
 
                                assert(last < 32);
                                for (i = first; i <= last; i++)
@@ -2111,92 +2106,117 @@ nv50_program_tx_prep(struct nv50_pc *pc)
                }
        }
 
-       if (pc->attr_nr) {
-               int oid = 4, mid = 4, aid = 0;
-               /* oid = VP output id
-                * aid = FP attribute/interpolant id
-                * mid = VP output mapping field ID
-                */
-               if (pc->p->type == PIPE_SHADER_FRAGMENT) {
-                       /* position should be loaded first */
-                       if (fcrd < 0x40) {
-                               unsigned mask;
-                               mid = 0;
-                               mask = load_fp_attrib(pc, fcrd, &mid, &aid,
-                                                     &oid);
-                               pc->p->cfg.fp.regs[1] |= (mask << 24);
-                               pc->p->cfg.fp.map[0] += 0x04040404 * fcrd;
-                       }
+       if (p->type == PIPE_SHADER_VERTEX) {
+               int rid = 0;
 
-                       /* should do MAD fcrd.xy, fcrd, SOME_CONST, fcrd */
-
-                       if (perspect_loads) {
-                               pc->iv_p = alloc_temp(pc, NULL);
-
-                               if (!(pc->p->cfg.fp.regs[1] & 0x08000000)) {
-                                       pc->p->cfg.fp.regs[1] |= 0x08000000;
-                                       pc->iv_p->rhw = aid++;
-                                       emit_interp(pc, pc->iv_p, NULL,
-                                                   INTERP_LINEAR);
-                                       emit_flop(pc, 0, pc->iv_p, pc->iv_p);
-                               } else {
-                                       pc->iv_p->rhw = aid - 1;
-                                       emit_flop(pc, 0, pc->iv_p,
-                                                 &pc->attr[fcrd * 4 + 3]);
-                               }
+               for (i = 0; i < pc->attr_nr * 4; ++i) {
+                       if (pc->attr[i].acc) {
+                               pc->attr[i].hw = rid++;
+                               p->cfg.attr[i / 32] |= 1 << (i % 32);
                        }
+               }
+
+               for (i = 0, rid = 0; i < pc->result_nr; ++i) {
+                       p->cfg.io[i].hw = rid;
+                       p->cfg.io[i].id_vp = i;
 
-                       if (centroid_loads) {
-                               pc->iv_c = alloc_temp(pc, NULL);
-                               pc->iv_c->rhw = pc->iv_p ? aid - 1 : aid++;
-                               emit_interp(pc, pc->iv_c, NULL,
-                                           INTERP_CENTROID);
-                               emit_flop(pc, 0, pc->iv_c, pc->iv_c);
-                               pc->p->cfg.fp.regs[1] |= 0x08000000;
+                       for (c = 0; c < 4; ++c) {
+                               int n = i * 4 + c;
+                               if (!pc->result[n].acc)
+                                       continue;
+                               pc->result[n].hw = rid++;
+                               p->cfg.io[i].mask |= 1 << c;
                        }
+               }
+       } else
+       if (p->type == PIPE_SHADER_FRAGMENT) {
+               int rid, aid;
+               unsigned n = 0, m = pc->attr_nr - flat_nr;
+
+               int base = (TGSI_SEMANTIC_POSITION ==
+                           p->info.input_semantic_name[0]) ? 0 : 1;
 
-                       for (c = 0; c < 4; c++) {
-                               /* XXX: secondary colour, tbd */
-                               if (fcol < 0x40 && pc->attr[fcol * 4 + c].acc)
-                                       pc->p->cfg.fp.regs[0] += 0x00010000;
+               /* non-flat interpolants have to be mapped to
+                * the lower hardware IDs, so sort them:
+                */
+               for (i = 0; i < pc->attr_nr; i++) {
+                       if (pc->interp_mode[i] == INTERP_FLAT) {
+                               p->cfg.io[m].id_vp = i + base;
+                               p->cfg.io[m++].id_fp = i;
+                       } else {
+                               if (!(pc->interp_mode[i] & INTERP_PERSPECTIVE))
+                                       p->cfg.io[n].linear = TRUE;
+                               p->cfg.io[n].id_vp = i + base;
+                               p->cfg.io[n++].id_fp = i;
                        }
+               }
 
-                       for (i = ((fcrd < 0x40) ? 1 : 0); i < pc->attr_nr; i++)
-                               load_fp_attrib(pc, i, &mid, &aid, &oid);
+               if (!base) /* set w-coordinate mask from perspective interp */
+                       p->cfg.io[0].mask |= p->cfg.regs[1] >> 24;
 
-                       if (pc->iv_p)
-                               free_temp(pc, pc->iv_p);
-                       if (pc->iv_c)
-                               free_temp(pc, pc->iv_c);
+               aid = popcnt4( /* if fcrd isn't contained in cfg.io */
+                       base ? (p->cfg.regs[1] >> 24) : p->cfg.io[0].mask);
 
-                       pc->p->cfg.fp.high_map = (mid / 4);
-                       pc->p->cfg.fp.high_map += ((mid % 4) ? 1 : 0);
-               } else {
-                       /* vertex program */
-                       for (i = 0; i < pc->attr_nr * 4; i++) {
-                               pc->p->cfg.vp.attr[aid / 32] |=
-                                       (1 << (aid % 32));
-                               pc->attr[i].hw = aid++;
+               for (n = 0; n < pc->attr_nr; ++n) {
+                       p->cfg.io[n].hw = rid = aid;
+                       i = p->cfg.io[n].id_fp;
+
+                       for (c = 0; c < 4; ++c) {
+                               if (!pc->attr[i * 4 + c].acc)
+                                       continue;
+                               pc->attr[i * 4 + c].rhw = rid++;
+                               p->cfg.io[n].mask |= 1 << c;
+
+                               load_interpolant(pc, &pc->attr[i * 4 + c]);
                        }
+                       aid += popcnt4(p->cfg.io[n].mask);
                }
-       }
 
-       if (pc->result_nr) {
-               if (pc->p->type == PIPE_SHADER_VERTEX) {
-                       for (i = 0; i < pc->result_nr * 4; i++)
-                               pc->result[i].hw = i;
-               } else {
-                       /* type == PIPE_SHADER_FRAGMENT
-                        * FragDepth is always first TGSI and last HW output
-                        */
-                       int rid = 0;
-                       i = pc->p->info.writes_z ? 4 : 0;
+               if (!base)
+                       p->cfg.regs[1] |= p->cfg.io[0].mask << 24;
+
+               m = popcnt4(p->cfg.regs[1] >> 24);
+
+               /* set count of non-position inputs and of non-flat
+                * non-position inputs for FP_INTERPOLANT_CTRL
+                */
+               p->cfg.regs[1] |= aid - m;
+
+               if (flat_nr) {
+                       i = p->cfg.io[pc->attr_nr - flat_nr].hw;
+                       p->cfg.regs[1] |= (i - m) << 16;
+               } else
+                       p->cfg.regs[1] |= p->cfg.regs[1] << 16;
+
+               /* mark color semantic for light-twoside */
+               n = 0x40;
+               for (i = 0; i < pc->attr_nr; i++) {
+                       ubyte si, sn;
 
-                       for (; i < pc->result_nr * 4; i++)
-                               pc->result[i].rhw = rid++;
-                       if (pc->p->info.writes_z)
-                               pc->result[2].rhw = rid;
+                       sn = p->info.input_semantic_name[p->cfg.io[i].id_fp];
+                       si = p->info.input_semantic_index[p->cfg.io[i].id_fp];
+
+                       if (sn == TGSI_SEMANTIC_COLOR) {
+                               p->cfg.two_side[si] = p->cfg.io[i];
+
+                               /* increase colour count */
+                               p->cfg.regs[0] += popcnt4(
+                                       p->cfg.two_side[si].mask) << 16;
+
+                               n = MIN2(n, p->cfg.io[i].hw - m);
+                       }
                }
+               if (n < 0x40)
+                       p->cfg.regs[0] += n;
+
+               /* Initialize FP results:
+                * FragDepth is always first TGSI and last hw output
+                */
+               i = p->info.writes_z ? 4 : 0;
+               for (rid = 0; i < pc->result_nr * 4; i++)
+                       pc->result[i].rhw = rid++;
+               if (p->info.writes_z)
+                       pc->result[2].rhw = rid;
        }
 
        if (pc->immd_nr) {
@@ -2214,7 +2234,12 @@ nv50_program_tx_prep(struct nv50_pc *pc)
 
        ret = TRUE;
 out_err:
-       tgsi_parse_free(&p);
+       if (pc->iv_p)
+               free_temp(pc, pc->iv_p);
+       if (pc->iv_c)
+               free_temp(pc, pc->iv_c);
+
+       tgsi_parse_free(&tp);
        return ret;
 }
 
@@ -2249,24 +2274,26 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p)
 
        p->cfg.high_temp = 4;
 
+       p->cfg.two_side[0].hw = 0x40;
+       p->cfg.two_side[1].hw = 0x40;
+
        switch (p->type) {
        case PIPE_SHADER_VERTEX:
+               p->cfg.clpd = 0x40;
+               p->cfg.io_nr = pc->result_nr;
                break;
        case PIPE_SHADER_FRAGMENT:
-               p->cfg.fp.regs[0] = 0x01000404;
-               p->cfg.fp.regs[1] = 0x00000400;
-
-               p->cfg.fp.map[0] = 0x03020100;
-               p->cfg.fp.high_map = 1;
-
                rtype[0] = rtype[1] = P_TEMP;
 
+               p->cfg.regs[0] = 0x01000004;
+               p->cfg.io_nr = pc->attr_nr;
+
                if (p->info.writes_z) {
-                       p->cfg.fp.regs[2] |= 0x00000100;
-                       p->cfg.fp.regs[3] |= 0x00000011;
+                       p->cfg.regs[2] |= 0x00000100;
+                       p->cfg.regs[3] |= 0x00000011;
                }
                if (p->info.uses_kill)
-                       p->cfg.fp.regs[2] |= 0x00100000;
+                       p->cfg.regs[2] |= 0x00100000;
                break;
        }
 
@@ -2609,8 +2636,8 @@ nv50_vertprog_validate(struct nv50_context *nv50)
        so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
                      NOUVEAU_BO_LOW, 0, 0);
        so_method(so, tesla, NV50TCL_VP_ATTR_EN_0, 2);
-       so_data  (so, p->cfg.vp.attr[0]);
-       so_data  (so, p->cfg.vp.attr[1]);
+       so_data  (so, p->cfg.attr[0]);
+       so_data  (so, p->cfg.attr[1]);
        so_method(so, tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1);
        so_data  (so, p->cfg.high_result);
        so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 2);
@@ -2628,7 +2655,6 @@ nv50_fragprog_validate(struct nv50_context *nv50)
        struct nouveau_grobj *tesla = nv50->screen->tesla;
        struct nv50_program *p = nv50->fragprog;
        struct nouveau_stateobj *so;
-       unsigned i;
 
        if (!p->translated) {
                nv50_program_validate(nv50, p);
@@ -2645,29 +2671,119 @@ nv50_fragprog_validate(struct nv50_context *nv50)
                      NOUVEAU_BO_HIGH, 0, 0);
        so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
                      NOUVEAU_BO_LOW, 0, 0);
-       so_method(so, tesla, NV50TCL_MAP_SEMANTIC_0, 4);
-       so_data  (so, p->cfg.fp.regs[0]); /* 0x01000404 / 0x00040404 */
-       so_data  (so, 0x00000004);
-       so_data  (so, 0x00000000);
-       so_data  (so, 0x00000000);
-       so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), p->cfg.fp.high_map);
-       for (i = 0; i < p->cfg.fp.high_map; i++)
-               so_data(so, p->cfg.fp.map[i]);
-       so_method(so, tesla, NV50TCL_FP_INTERPOLANT_CTRL, 2);
-       so_data  (so, p->cfg.fp.regs[1]); /* 0x08040404 / 0x0f000401 */
+       so_method(so, tesla, NV50TCL_FP_REG_ALLOC_TEMP, 1);
        so_data  (so, p->cfg.high_temp);
        so_method(so, tesla, NV50TCL_FP_RESULT_COUNT, 1);
        so_data  (so, p->cfg.high_result);
        so_method(so, tesla, NV50TCL_FP_CTRL_UNK19A8, 1);
-       so_data  (so, p->cfg.fp.regs[2]);
+       so_data  (so, p->cfg.regs[2]);
        so_method(so, tesla, NV50TCL_FP_CTRL_UNK196C, 1);
-       so_data  (so, p->cfg.fp.regs[3]);
+       so_data  (so, p->cfg.regs[3]);
        so_method(so, tesla, NV50TCL_FP_START_ID, 1);
        so_data  (so, 0); /* program start offset */
        so_ref(so, &nv50->state.fragprog);
        so_ref(NULL, &so);
 }
 
+static int
+nv50_sreg4_map(uint32_t *p_map, int mid, uint32_t lin[4],
+              struct nv50_sreg4 *fpi, struct nv50_sreg4 *vpo)
+{
+       int c;
+       uint8_t mv = vpo->mask, mf = fpi->mask, oid = vpo->hw;
+       uint8_t *map = (uint8_t *)p_map;
+
+       for (c = 0; c < 4; ++c) {
+               if (mf & 1) {
+                       if (fpi->linear == TRUE)
+                               lin[mid / 32] |= 1 << (mid % 32);
+                       map[mid++] = (mv & 1) ? oid : ((c == 3) ? 0x41 : 0x40);
+               }
+
+               oid += mv & 1;
+               mf >>= 1;
+               mv >>= 1;
+       }
+
+       return mid;
+}
+
+void
+nv50_linkage_validate(struct nv50_context *nv50)
+{
+       struct nouveau_grobj *tesla = nv50->screen->tesla;
+       struct nv50_program *vp = nv50->vertprog;
+       struct nv50_program *fp = nv50->fragprog;
+       struct nouveau_stateobj *so;
+       struct nv50_sreg4 dummy, *vpo;
+       int i, n, c, m = 0;
+       uint32_t map[16], lin[4], reg[5];
+
+       memset(map, 0, sizeof(map));
+       memset(lin, 0, sizeof(lin));
+
+       reg[1] = 0x00000004; /* low and high clip distance map ids */
+       reg[2] = 0x00000000; /* layer index map id (disabled, GP only) */
+       reg[3] = 0x00000000; /* point size map id & enable */
+       reg[0] = fp->cfg.regs[0]; /* colour semantic reg */
+       reg[4] = fp->cfg.regs[1]; /* interpolant info */
+
+       dummy.linear = FALSE;
+       dummy.mask = 0xf; /* map all components of HPOS */
+       m = nv50_sreg4_map(map, m, lin, &dummy, &vp->cfg.io[0]);
+
+       dummy.mask = 0x0;
+
+       if (vp->cfg.clpd < 0x40) {
+               for (c = 0; c < vp->cfg.clpd_nr; ++c)
+                       map[m++] = vp->cfg.clpd + c;
+               reg[1] = (m << 8);
+       }
+
+       reg[0] |= m << 8; /* adjust BFC0 id */
+       reg[0] += m - 4; /* adjust FFC0 id */
+       reg[4] |= m << 8; /* set mid where 'normal' FP inputs start */
+
+       i = 0;
+       if (fp->info.input_semantic_name[0] == TGSI_SEMANTIC_POSITION)
+               i = 1;
+       for (; i < fp->cfg.io_nr; i++) {
+               ubyte sn = fp->info.input_semantic_name[fp->cfg.io[i].id_fp];
+               ubyte si = fp->info.input_semantic_index[fp->cfg.io[i].id_fp];
+
+               n = fp->cfg.io[i].id_vp;
+               if (n >= vp->cfg.io_nr ||
+                   vp->info.output_semantic_name[n] != sn ||
+                   vp->info.output_semantic_index[n] != si)
+                       vpo = &dummy;
+               else
+                       vpo = &vp->cfg.io[n];
+
+               m = nv50_sreg4_map(map, m, lin, &fp->cfg.io[i], vpo);
+       }
+
+       /* now fill the stateobj */
+       so = so_new(64, 0);
+
+       n = (m + 3) / 4;
+       so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1);
+       so_data  (so, m);
+       so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), n);
+       so_datap (so, map, n);
+
+       so_method(so, tesla, NV50TCL_MAP_SEMANTIC_0, 4);
+       so_datap (so, reg, 4);
+
+       so_method(so, tesla, NV50TCL_FP_INTERPOLANT_CTRL, 1);
+       so_data  (so, reg[4]);
+
+       so_method(so, tesla, 0x1540, 4);
+       so_datap (so, lin, 4);
+
+        so_ref(so, &nv50->state.programs);
+        so_ref(NULL, &so);
+}
+
 void
 nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
 {
index 096e0476aab64a32bbcb53d8854fc9324eaeb623..5745e0b1ee91b7c7f5bf0426159df0dd561adb1f 100644 (file)
@@ -15,6 +15,15 @@ struct nv50_program_exec {
        } param;
 };
 
+struct nv50_sreg4 {
+       uint8_t hw;
+       uint8_t id_vp;
+       uint8_t id_fp;
+
+       uint8_t mask;
+       boolean linear;
+};
+
 struct nv50_program {
        struct pipe_shader_state pipe;
        struct tgsi_shader_info info;
@@ -36,14 +45,19 @@ struct nv50_program {
        struct {
                unsigned high_temp;
                unsigned high_result;
-               struct {
-                       unsigned attr[2];
-               } vp;
-               struct {
-                       unsigned regs[4];
-                       unsigned map[5];
-                       unsigned high_map;
-               } fp;
+
+               uint32_t attr[2];
+               uint32_t regs[4];
+
+               /* for VPs, io_nr doesn't count 'private' results (PSIZ etc.) */
+               unsigned io_nr;
+               struct nv50_sreg4 io[PIPE_MAX_SHADER_OUTPUTS];
+
+               /* FP colour inputs, VP/GP back colour outputs */
+               struct nv50_sreg4 two_side[2];
+
+               /* VP only */
+               uint8_t clpd, clpd_nr;
        } cfg;
 };
 
index d294356f75d5561c925179db868cb8d3dbefa35c..d307a98745320c272ea6f0d560b265c585c32248 100644 (file)
@@ -189,6 +189,8 @@ nv50_state_emit(struct nv50_context *nv50)
                so_emit(chan, nv50->state.vertprog);
        if (nv50->state.dirty & NV50_NEW_FRAGPROG)
                so_emit(chan, nv50->state.fragprog);
+       if (nv50->state.dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG))
+               so_emit(chan, nv50->state.programs);
        if (nv50->state.dirty & NV50_NEW_RASTERIZER)
                so_emit(chan, nv50->state.rast);
        if (nv50->state.dirty & NV50_NEW_BLEND_COLOUR)
@@ -240,6 +242,9 @@ nv50_state_validate(struct nv50_context *nv50)
        if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_FRAGPROG_CB))
                nv50_fragprog_validate(nv50);
 
+       if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG))
+               nv50_linkage_validate(nv50);
+
        if (nv50->dirty & NV50_NEW_RASTERIZER)
                so_ref(nv50->rasterizer->so, &nv50->state.rast);