nv50: start using interpreter for fragprog too, not hardcoded passthrough
authorBen Skeggs <skeggsb@gmail.com>
Tue, 3 Jun 2008 02:37:29 +0000 (12:37 +1000)
committerBen Skeggs <skeggsb@gmail.com>
Sun, 29 Jun 2008 05:46:14 +0000 (15:46 +1000)
src/gallium/drivers/nv50/nv50_program.c
src/gallium/drivers/nv50/nv50_state.c
src/gallium/drivers/nv50/nv50_state.h

index aa848faa49636eed912591acdf565725aad87945..9d89f6d0609ec5d738662206d137c707b24c4223 100644 (file)
@@ -11,7 +11,7 @@
 #include "nv50_state.h"
 
 #define NV50_SU_MAX_TEMP 64
-#define TX_FRAGPROG 0
+#define TX_FRAGPROG 1
 
 struct nv50_reg {
        enum {
@@ -52,15 +52,23 @@ alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg)
 {
        int i;
 
-       if (reg->type != P_TEMP || reg->hw >= 0)
+       if (reg->type != P_TEMP)
                return;
 
+       if (reg->hw >= 0) {
+               /*XXX: do this here too to catch FP temp-as-attr usage..
+                *     not clean, but works */
+               if (pc->p->cfg.high_temp < (reg->hw + 1))
+                       pc->p->cfg.high_temp = reg->hw + 1;
+               return;
+       }
+
        for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
                if (!(pc->r_temp[i])) {
                        pc->r_temp[i] = reg;
                        reg->hw = i;
-                       if (pc->p->cfg.vp.high_temp < (i + 1))
-                               pc->p->cfg.vp.high_temp = i + 1;
+                       if (pc->p->cfg.high_temp < (i + 1))
+                               pc->p->cfg.high_temp = i + 1;
                        return;
                }
        }
@@ -236,6 +244,24 @@ set_immd(struct nv50_pc *pc, struct nv50_reg *imm, unsigned *inst)
        inst[1] |= (val >> 6) << 2;
 }
 
+static void
+emit_interp(struct nv50_pc *pc, struct nv50_reg *dst,
+           struct nv50_reg *src, struct nv50_reg *iv, boolean noperspective)
+{
+       unsigned inst[2] = { 0, 0 };
+
+       inst[0] |= 0x80000000;
+       set_dst(pc, dst, inst);
+       alloc_reg(pc, iv);
+       inst[0] |= (iv->hw << 9);
+       alloc_reg(pc, src);
+       inst[0] |= (src->hw << 16);
+       if (noperspective)
+               inst[0] |= (1 << 25);
+
+       emit(pc, inst);
+}
+
 static void
 emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
 {
@@ -409,20 +435,57 @@ nv50_program_tx_prep(struct nv50_pc *pc)
 
        NOUVEAU_ERR("%d attrib regs\n", pc->attr_nr);
        if (pc->attr_nr) {
+               struct nv50_reg *iv = NULL, *tmp = NULL;
                int aid = 0;
 
                pc->attr = calloc(pc->attr_nr * 4, sizeof(struct nv50_reg));
                if (!pc->attr)
                        goto out_err;
 
+               if (pc->p->type == NV50_PROG_FRAGMENT) {
+                       iv = alloc_temp(pc, NULL);
+                       aid++;
+               }
+
                for (i = 0; i < pc->attr_nr; i++) {
+                       struct nv50_reg *a = &pc->attr[i*4];
+
                        for (c = 0; c < 4; c++) {
-                               pc->p->cfg.vp.attr[aid/32] |= (1 << (aid % 32));
-                               pc->attr[i*4+c].type = P_ATTR;
-                               pc->attr[i*4+c].hw = aid++;
-                               pc->attr[i*4+c].index = i;
+                               if (pc->p->type == NV50_PROG_FRAGMENT) {
+                                       struct nv50_reg *at =
+                                               alloc_temp(pc, NULL);
+                                       pc->attr[i*4+c].type = at->type;
+                                       pc->attr[i*4+c].hw = at->hw;
+                                       pc->attr[i*4+c].index = at->index;
+                               } else {
+                                       pc->p->cfg.vp.attr[aid/32] |=
+                                               (1 << (aid % 32));
+                                       pc->attr[i*4+c].type = P_ATTR;
+                                       pc->attr[i*4+c].hw = aid++;
+                                       pc->attr[i*4+c].index = i;
+                               }
                        }
+
+                       if (pc->p->type != NV50_PROG_FRAGMENT)
+                               continue;
+
+                       emit_interp(pc, iv, iv, iv, FALSE);
+                       tmp = alloc_temp(pc, NULL);
+                       {
+                               unsigned inst[2] = { 0, 0 };
+                               inst[0]  = 0x90000000;
+                               inst[0] |= (tmp->hw << 2);
+                               emit(pc, inst);
+                       }
+                       emit_interp(pc, &a[0], &a[0], tmp, TRUE);
+                       emit_interp(pc, &a[1], &a[1], tmp, TRUE);
+                       emit_interp(pc, &a[2], &a[2], tmp, TRUE);
+                       emit_interp(pc, &a[3], &a[3], tmp, TRUE);
+                       free_temp(pc, tmp);
                }
+
+               if (iv)
+                       free_temp(pc, iv);
        }
 
        NOUVEAU_ERR("%d result regs\n", pc->result_nr);
@@ -435,7 +498,10 @@ nv50_program_tx_prep(struct nv50_pc *pc)
 
                for (i = 0; i < pc->result_nr; i++) {
                        for (c = 0; c < 4; c++) {
-                               pc->result[i*4+c].type = P_RESULT;
+                               if (pc->p->type == NV50_PROG_FRAGMENT)
+                                       pc->result[i*4+c].type = P_TEMP;
+                               else
+                                       pc->result[i*4+c].type = P_RESULT;
                                pc->result[i*4+c].hw = rid++;
                                pc->result[i*4+c].index = i;
                        }
@@ -492,7 +558,7 @@ nv50_program_tx(struct nv50_program *p)
        if (!pc)
                return FALSE;
        pc->p = p;
-       pc->p->cfg.vp.high_temp = 4;
+       pc->p->cfg.high_temp = 4;
 
        ret = nv50_program_tx_prep(pc);
        if (ret == FALSE)
@@ -551,6 +617,8 @@ nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p)
 
                if (nv50_program_tx(p) == FALSE)
                        assert(0);
+               /* *not* sufficient, it's fine if last inst is long and
+                * NOT immd - otherwise it's fucked fucked fucked */
                p->insns[p->insns_nr - 1] |= 0x00000001;
 
                for (i = 0; i < p->insns_nr; i++)
@@ -620,7 +688,7 @@ nv50_vertprog_validate(struct nv50_context *nv50)
        so_data  (so, p->cfg.vp.attr[1]);
        so_method(so, tesla, 0x16ac, 2);
        so_data  (so, 8);
-       so_data  (so, p->cfg.vp.high_temp);
+       so_data  (so, p->cfg.high_temp);
        so_method(so, tesla, 0x140c, 1);
        so_data  (so, 0); /* program start offset */
        so_emit(nv50->screen->nvws, so);
@@ -645,12 +713,16 @@ nv50_fragprog_validate(struct nv50_context *nv50)
        nv50_program_validate_data(nv50, p);
        nv50_program_validate_code(nv50, p);
 
-       so = so_new(3, 2);
+       so = so_new(7, 2);
        so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2);
        so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
                  NOUVEAU_BO_HIGH, 0, 0);
        so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
                  NOUVEAU_BO_LOW, 0, 0);
+       so_method(so, tesla, 0x198c, 1);
+       so_data  (so, p->cfg.high_temp);
+       so_method(so, tesla, 0x1414, 1);
+       so_data  (so, 0); /* program start offset */
        so_emit(nv50->screen->nvws, so);
        so_ref(NULL, &so);
 }
index ba3d04cede355f420707b2cbf6927699b67dae7c..48d09c296bb94f94ef6fc2ef6ef347937191c082 100644 (file)
@@ -331,6 +331,7 @@ nv50_vp_state_create(struct pipe_context *pipe,
        struct nv50_program *p = CALLOC_STRUCT(nv50_program);
 
        p->pipe = *cso;
+       p->type = NV50_PROG_VERTEX;
        tgsi_scan_shader(p->pipe.tokens, &p->info);
        return (void *)p;
 }
@@ -360,6 +361,7 @@ nv50_fp_state_create(struct pipe_context *pipe,
        struct nv50_program *p = CALLOC_STRUCT(nv50_program);
 
        p->pipe = *cso;
+       p->type = NV50_PROG_FRAGMENT;
        tgsi_scan_shader(p->pipe.tokens, &p->info);
        return (void *)p;
 }
index 9e3876871bd64b5a4eb24a237632c606dad4ceab..dd352b658567e5693a5c1e668a4bba64e3c3175c 100644 (file)
@@ -15,6 +15,10 @@ struct nv50_program {
        struct tgsi_shader_info info;
        boolean translated;
 
+       enum {
+               NV50_PROG_VERTEX,
+               NV50_PROG_FRAGMENT
+       } type;
        unsigned *insns;
        unsigned insns_nr;
 
@@ -25,9 +29,9 @@ struct nv50_program {
 
        struct nouveau_resource *data;
 
-       union {
+       struct {
+               unsigned high_temp;
                struct {
-                       unsigned high_temp;
                        unsigned attr[2];
                } vp;
        } cfg;