Merge commit 'origin/master' into gallium-0.2
[mesa.git] / src / gallium / drivers / nv30 / nv30_fragprog.c
index 09ad555c324505af9607e4792589ef5e6a83c534..320ba3f4bf42c0ba20b1f5538fbfa6cdd6aadcf9 100644 (file)
@@ -1,11 +1,10 @@
 #include "pipe/p_context.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_state.h"
-#include "pipe/p_util.h"
 
 #include "pipe/p_shader_tokens.h"
-#include "tgsi/util/tgsi_parse.h"
-#include "tgsi/util/tgsi_util.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
 
 #include "nv30_context.h"
 
@@ -378,7 +377,7 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
 {
        const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0);
        struct nv30_sreg src[3], dst, tmp;
-       int mask, sat, unit;
+       int mask, sat, unit = 0;
        int ai = -1, ci = -1;
        int i;
 
@@ -495,10 +494,10 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
        case TGSI_OPCODE_FRC:
                arith(fpc, sat, FRC, dst, mask, src[0], none, none);
                break;
-       case TGSI_OPCODE_KIL:
+       case TGSI_OPCODE_KILP:
                arith(fpc, 0, KIL, none, 0, none, none, none);
                break;
-       case TGSI_OPCODE_KILP:
+       case TGSI_OPCODE_KIL:
                dst = nv30_sr(NV30SR_NONE, 0);
                dst.cc_update = 1;
                arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none);
@@ -510,9 +509,7 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
                break;
 //     case TGSI_OPCODE_LIT:
        case TGSI_OPCODE_LRP:
-               tmp = temp(fpc);
-               arith(fpc, 0, MAD, tmp, mask, neg(src[0]), src[2], src[2]);
-               arith(fpc, sat, MAD, dst, mask, src[0], src[1], tmp);
+               arith(fpc, sat, LRP, dst, mask, src[0], src[1], src[2]);
                break;
        case TGSI_OPCODE_MAD:
                arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]);
@@ -529,14 +526,14 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
        case TGSI_OPCODE_MUL:
                arith(fpc, sat, MUL, dst, mask, src[0], src[1], none);
                break;
+       case TGSI_OPCODE_NOISE1:
+       case TGSI_OPCODE_NOISE2:
+       case TGSI_OPCODE_NOISE3:
+       case TGSI_OPCODE_NOISE4:
+               arith(fpc, sat, SFL, dst, mask, none, none, none);
+               break;
        case TGSI_OPCODE_POW:
-               tmp = temp(fpc);
-               arith(fpc, 0, LG2, tmp, MASK_X,
-                     swz(src[0], X, X, X, X), none, none);
-               arith(fpc, 0, MUL, tmp, MASK_X, swz(tmp, X, X, X, X),
-                     swz(src[1], X, X, X, X), none);
-               arith(fpc, sat, EX2, dst, mask,
-                     swz(tmp, X, X, X, X), none, none);
+               arith(fpc, sat, POW, dst, mask, src[0], src[1], none);
                break;
        case TGSI_OPCODE_RCP:
                arith(fpc, sat, RCP, dst, mask, src[0], none, none);
@@ -545,20 +542,10 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
                assert(0);
                break;
        case TGSI_OPCODE_RFL:
-               tmp = temp(fpc);
-               arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[0], none);
-               arith(fpc, 0, DP3, tmp, MASK_Y, src[0], src[1], none);
-               arith(fpc, 0, DIV, scale(tmp, 2X), MASK_Z,
-                     swz(tmp, Y, Y, Y, Y), swz(tmp, X, X, X, X), none);
-               arith(fpc, sat, MAD, dst, mask,
-                     swz(tmp, Z, Z, Z, Z), src[0], neg(src[1]));
+               arith(fpc, 0, RFL, dst, mask, src[0], src[1], none);
                break;
        case TGSI_OPCODE_RSQ:
-               tmp = temp(fpc);
-               arith(fpc, 0, LG2, scale(tmp, INV_2X), MASK_X,
-                     abs(swz(src[0], X, X, X, X)), none, none);
-               arith(fpc, sat, EX2, dst, mask,
-                     neg(swz(tmp, X, X, X, X)), none, none);
+               arith(fpc, sat, RSQ, dst, mask, abs(swz(src[0], X, X, X, X)), none, none);
                break;
        case TGSI_OPCODE_SCS:
                if (mask & MASK_X) {
@@ -576,6 +563,9 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
        case TGSI_OPCODE_SGE:
                arith(fpc, sat, SGE, dst, mask, src[0], src[1], none);
                break;
+       case TGSI_OPCODE_SGT:
+               arith(fpc, sat, SGT, dst, mask, src[0], src[1], none);
+               break;
        case TGSI_OPCODE_SLT:
                arith(fpc, sat, SLT, dst, mask, src[0], src[1], none);
                break;
@@ -583,15 +573,14 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
                arith(fpc, sat, ADD, dst, mask, src[0], neg(src[1]), none);
                break;
        case TGSI_OPCODE_TEX:
-               if (finst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide ==
-                               TGSI_EXTSWIZZLE_W) {
-                       tex(fpc, sat, TXP, unit, dst, mask, src[0], none, none);
-               } else
-                       tex(fpc, sat, TEX, unit, dst, mask, src[0], none, none);
+               tex(fpc, sat, TEX, unit, dst, mask, src[0], none, none);
                break;
        case TGSI_OPCODE_TXB:
                tex(fpc, sat, TXB, unit, dst, mask, src[0], none, none);
                break;
+       case TGSI_OPCODE_TXP:
+               tex(fpc, sat, TXP, unit, dst, mask, src[0], none, none);
+               break;
        case TGSI_OPCODE_XPD:
                tmp = temp(fpc);
                arith(fpc, 0, MUL, tmp, mask,
@@ -646,7 +635,7 @@ nv30_fragprog_parse_decl_attrib(struct nv30_fpc *fpc,
                return FALSE;
        }
 
-       fpc->attrib_map[fdec->u.DeclarationRange.First] = hw;
+       fpc->attrib_map[fdec->DeclarationRange.First] = hw;
        return TRUE;
 }
 
@@ -656,10 +645,10 @@ nv30_fragprog_parse_decl_output(struct nv30_fpc *fpc,
 {
        switch (fdec->Semantic.SemanticName) {
        case TGSI_SEMANTIC_POSITION:
-               fpc->depth_id = fdec->u.DeclarationRange.First;
+               fpc->depth_id = fdec->DeclarationRange.First;
                break;
        case TGSI_SEMANTIC_COLOR:
-               fpc->colour_id = fdec->u.DeclarationRange.First;
+               fpc->colour_id = fdec->DeclarationRange.First;
                break;
        default:
                NOUVEAU_ERR("bad output semantic\n");
@@ -669,30 +658,22 @@ nv30_fragprog_parse_decl_output(struct nv30_fpc *fpc,
        return TRUE;
 }
 
-void
-nv30_fragprog_translate(struct nv30_context *nv30,
-                       struct nv30_fragment_program *fp)
+static boolean
+nv30_fragprog_prepare(struct nv30_fpc *fpc)
 {
-       struct tgsi_parse_context parse;
-       struct nv30_fpc *fpc = NULL;
+       struct tgsi_parse_context p;
+       /*int high_temp = -1, i;*/
 
-       fpc = CALLOC(1, sizeof(struct nv30_fpc));
-       if (!fpc)
-               return;
-       fpc->fp = fp;
-       fpc->high_temp = -1;
-       fpc->num_regs = 2;
+       tgsi_parse_init(&p, fpc->fp->pipe.tokens);
+       while (!tgsi_parse_end_of_tokens(&p)) {
+               const union tgsi_full_token *tok = &p.FullToken;
 
-       tgsi_parse_init(&parse, fp->pipe->tokens);
-
-       while (!tgsi_parse_end_of_tokens(&parse)) {
-               tgsi_parse_token(&parse);
-
-               switch (parse.FullToken.Token.Type) {
+               tgsi_parse_token(&p);
+               switch(tok->Token.Type) {
                case TGSI_TOKEN_TYPE_DECLARATION:
                {
                        const struct tgsi_full_declaration *fdec;
-                       fdec = &parse.FullToken.FullDeclaration;
+                       fdec = &p.FullToken.FullDeclaration;
                        switch (fdec->Declaration.File) {
                        case TGSI_FILE_INPUT:
                                if (!nv30_fragprog_parse_decl_attrib(fpc, fdec))
@@ -702,6 +683,12 @@ nv30_fragprog_translate(struct nv30_context *nv30,
                                if (!nv30_fragprog_parse_decl_output(fpc, fdec))
                                        goto out_err;
                                break;
+                       /*case TGSI_FILE_TEMPORARY:
+                               if (fdec->DeclarationRange.Last > high_temp) {
+                                       high_temp =
+                                               fdec->DeclarationRange.Last;
+                               }
+                               break;*/
                        default:
                                break;
                        }
@@ -711,17 +698,67 @@ nv30_fragprog_translate(struct nv30_context *nv30,
                {
                        struct tgsi_full_immediate *imm;
                        float vals[4];
-                       int i;
                        
-                       imm = &parse.FullToken.FullImmediate;
+                       imm = &p.FullToken.FullImmediate;
                        assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
                        assert(fpc->nr_imm < MAX_IMM);
 
-                       for (i = 0; i < 4; i++)
-                               vals[i] = imm->u.ImmediateFloat32[i].Float;
+                       vals[0] = imm->u.ImmediateFloat32[0].Float;
+                       vals[1] = imm->u.ImmediateFloat32[1].Float;
+                       vals[2] = imm->u.ImmediateFloat32[2].Float;
+                       vals[3] = imm->u.ImmediateFloat32[3].Float;
                        fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals);
                }
                        break;
+               default:
+                       break;
+               }
+       }
+       tgsi_parse_free(&p);
+
+       /*if (++high_temp) {
+               fpc->r_temp = CALLOC(high_temp, sizeof(struct nv30_sreg));
+               for (i = 0; i < high_temp; i++)
+                       fpc->r_temp[i] = temp(fpc);
+               fpc->r_temps_discard = 0;
+       }*/
+
+       return TRUE;
+
+out_err:
+       /*if (fpc->r_temp)
+               FREE(fpc->r_temp);*/
+       tgsi_parse_free(&p);
+       return FALSE;
+}
+
+static void
+nv30_fragprog_translate(struct nv30_context *nv30,
+                       struct nv30_fragment_program *fp)
+{
+       struct tgsi_parse_context parse;
+       struct nv30_fpc *fpc = NULL;
+
+       tgsi_dump(fp->pipe.tokens,0);
+
+       fpc = CALLOC(1, sizeof(struct nv30_fpc));
+       if (!fpc)
+               return;
+       fpc->fp = fp;
+       fpc->high_temp = -1;
+       fpc->num_regs = 2;
+
+       if (!nv30_fragprog_prepare(fpc)) {
+               FREE(fpc);
+               return;
+       }
+
+       tgsi_parse_init(&parse, fp->pipe.tokens);
+
+       while (!tgsi_parse_end_of_tokens(&parse)) {
+               tgsi_parse_token(&parse);
+
+               switch (parse.FullToken.Token.Type) {
                case TGSI_TOKEN_TYPE_INSTRUCTION:
                {
                        const struct tgsi_full_instruction *finst;
@@ -754,24 +791,85 @@ nv30_fragprog_translate(struct nv30_context *nv30,
        fp->on_hw = FALSE;
 out_err:
        tgsi_parse_free(&parse);
-       free(fpc);
+       FREE(fpc);
 }
 
-void
-nv30_fragprog_bind(struct nv30_context *nv30, struct nv30_fragment_program *fp)
+static void
+nv30_fragprog_upload(struct nv30_context *nv30,
+                    struct nv30_fragment_program *fp)
+{
+       struct pipe_winsys *ws = nv30->pipe.winsys;
+       const uint32_t le = 1;
+       uint32_t *map;
+       int i;
+
+       map = ws->buffer_map(ws, fp->buffer, PIPE_BUFFER_USAGE_CPU_WRITE);
+
+#if 0
+       for (i = 0; i < fp->insn_len; i++) {
+               fflush(stdout); fflush(stderr);
+               NOUVEAU_ERR("%d 0x%08x\n", i, fp->insn[i]);
+               fflush(stdout); fflush(stderr);
+       }
+#endif
+
+       if ((*(const uint8_t *)&le)) {
+               for (i = 0; i < fp->insn_len; i++) {
+                       map[i] = fp->insn[i];
+               }
+       } else {
+               /* Weird swapping for big-endian chips */
+               for (i = 0; i < fp->insn_len; i++) {
+                       map[i] = ((fp->insn[i] & 0xffff) << 16) |
+                                 ((fp->insn[i] >> 16) & 0xffff);
+               }
+       }
+
+       ws->buffer_unmap(ws, fp->buffer);
+}
+
+static boolean
+nv30_fragprog_validate(struct nv30_context *nv30)
 {
+       struct nv30_fragment_program *fp = nv30->fragprog;
+       struct pipe_buffer *constbuf =
+               nv30->constbuf[PIPE_SHADER_FRAGMENT];
        struct pipe_winsys *ws = nv30->pipe.winsys;
+       struct nouveau_stateobj *so;
+       boolean new_consts = FALSE;
        int i;
 
+       if (fp->translated)
+               goto update_constants;
+
+       /*nv30->fallback_swrast &= ~NV30_NEW_FRAGPROG;*/
+       nv30_fragprog_translate(nv30, fp);
        if (!fp->translated) {
-               nv30_fragprog_translate(nv30, fp);
-               if (!fp->translated)
-                       assert(0);
+               /*nv30->fallback_swrast |= NV30_NEW_FRAGPROG;*/
+               return FALSE;
        }
 
+       fp->buffer = ws->buffer_create(ws, 0x100, 0, fp->insn_len * 4);
+       nv30_fragprog_upload(nv30, fp);
+
+       so = so_new(8, 1);
+       so_method(so, nv30->screen->rankine, NV34TCL_FP_ACTIVE_PROGRAM, 1);
+       so_reloc (so, fp->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
+                 NOUVEAU_BO_RD | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
+                 NV34TCL_FP_ACTIVE_PROGRAM_DMA0, NV34TCL_FP_ACTIVE_PROGRAM_DMA1);
+       so_method(so, nv30->screen->rankine, NV34TCL_FP_CONTROL, 1);
+       so_data  (so, fp->fp_control);
+       so_method(so, nv30->screen->rankine, NV34TCL_FP_REG_CONTROL, 1);
+       so_data  (so, fp->fp_reg_control);
+       so_method(so, nv30->screen->rankine, NV34TCL_TX_UNITS_ENABLE, 1);
+       so_data  (so, fp->samplers);
+       so_ref(so, &fp->so);
+
+update_constants:
        if (fp->nr_consts) {
-               float *map = ws->buffer_map(ws, nv30->fragprog.constant_buf,
-                                           PIPE_BUFFER_USAGE_CPU_READ);
+               float *map;
+               
+               map = ws->buffer_map(ws, constbuf, PIPE_BUFFER_USAGE_CPU_READ);
                for (i = 0; i < fp->nr_consts; i++) {
                        struct nv30_fragment_program_data *fpd = &fp->consts[i];
                        uint32_t *p = &fp->insn[fpd->offset];
@@ -780,49 +878,20 @@ nv30_fragprog_bind(struct nv30_context *nv30, struct nv30_fragment_program *fp)
                        if (!memcmp(p, cb, 4 * sizeof(float)))
                                continue;
                        memcpy(p, cb, 4 * sizeof(float));
-                       fp->on_hw = 0;
-               }
-               ws->buffer_unmap(ws, nv30->fragprog.constant_buf);
-       }
-
-       if (!fp->on_hw) {
-               const uint32_t le = 1;
-               uint32_t *map;
-
-               if (!fp->buffer)
-                       fp->buffer = ws->buffer_create(ws, 0x100, 0,
-                                                      fp->insn_len * 4);
-               map = ws->buffer_map(ws, fp->buffer,
-                                    PIPE_BUFFER_USAGE_CPU_WRITE);
-
-#if 0
-               for (i = 0; i < fp->insn_len; i++) {
-                       NOUVEAU_ERR("%d 0x%08x\n", i, fp->insn[i]);
+                       new_consts = TRUE;
                }
-#endif
+               ws->buffer_unmap(ws, constbuf);
 
-               if ((*(const uint8_t *)&le)) {
-                       for (i = 0; i < fp->insn_len; i++) {
-                               map[i] = fp->insn[i];
-                       }
-               } else {
-                       /* Weird swapping for big-endian chips */
-                       for (i = 0; i < fp->insn_len; i++) {
-                               map[i] = ((fp->insn[i] & 0xffff) << 16) |
-                                         ((fp->insn[i] >> 16) & 0xffff);
-                       }
-               }
-
-               ws->buffer_unmap(ws, fp->buffer);
-               fp->on_hw = TRUE;
+               if (new_consts)
+                       nv30_fragprog_upload(nv30, fp);
        }
 
-       BEGIN_RING(rankine, NV34TCL_FP_CONTROL, 1);
-       OUT_RING  (fp->fp_control);
-       BEGIN_RING(rankine, NV34TCL_FP_REG_CONTROL, 1);
-       OUT_RING  (fp->fp_reg_control);
+       if (new_consts || fp->so != nv30->state.hw[NV30_STATE_FRAGPROG]) {
+               so_ref(fp->so, &nv30->state.hw[NV30_STATE_FRAGPROG]);
+               return TRUE;
+       }
 
-       nv30->fragprog.active = fp;
+       return FALSE;
 }
 
 void
@@ -830,6 +899,13 @@ nv30_fragprog_destroy(struct nv30_context *nv30,
                      struct nv30_fragment_program *fp)
 {
        if (fp->insn_len)
-               free(fp->insn);
+               FREE(fp->insn);
 }
 
+struct nv30_state_entry nv30_state_fragprog = {
+       .validate = nv30_fragprog_validate,
+       .dirty = {
+               .pipe = NV30_NEW_FRAGPROG,
+               .hw = NV30_STATE_FRAGPROG
+       }
+};