nvfx: rewrite draw code and buffer code

[mesa.git] / src / gallium / drivers / nvfx / nvfx_vertprog.c
diff --git a/src/gallium/drivers/nvfx/nvfx_vertprog.c b/src/gallium/drivers/nvfx/nvfx_vertprog.c

index 2d243be16a35ff1ce245f0b0917292b2af308c4b..939d2b83aee4ab3feda1e00aadd5afaabb1c2b4c 100644 (file)
--- a/src/gallium/drivers/nvfx/nvfx_vertprog.c
+++ b/src/gallium/drivers/nvfx/nvfx_vertprog.c
@@ -10,6 +10,7 @@
  
  #include "nvfx_context.h"
  #include "nvfx_state.h"
+#include "nvfx_resource.h"
  
  /* TODO (at least...):
   *  1. Indexed consts  + ARL
@@ -299,7 +300,13 @@ nvfx_vp_arith(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, int slot, int op,
                   (3 << NVFX_VP(INST_COND_SWZ_W_SHIFT)));
  
         if(!nvfx->is_nv4x) {
-               hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT);
+               if(slot == 0)
+                       hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT);
+               else
+               {
+                       hw[0] |= ((op >> 4) << NV30_VP_INST_SCA_OPCODEH_SHIFT);
+                       hw[1] |= ((op & 0xf) << NV30_VP_INST_SCA_OPCODEL_SHIFT);
+               }
  //             hw[3] |= NVFX_VP(INST_SCA_DEST_TEMP_MASK);
  //             hw[3] |= (mask << NVFX_VP(INST_VEC_WRITEMASK_SHIFT));
  
@@ -334,7 +341,7 @@ nvfx_vp_arith(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, int slot, int op,
  
  static INLINE struct nvfx_sreg
  tgsi_src(struct nvfx_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
-       struct nvfx_sreg src;
+       struct nvfx_sreg src = { 0 };
  
         switch (fsrc->Register.File) {
         case TGSI_FILE_INPUT:
@@ -365,7 +372,7 @@ tgsi_src(struct nvfx_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
  
  static INLINE struct nvfx_sreg
  tgsi_dst(struct nvfx_vpc *vpc, const struct tgsi_full_dst_register *fdst) {
-       struct nvfx_sreg dst;
+       struct nvfx_sreg dst = { 0 };
  
         switch (fdst->Register.File) {
         case TGSI_FILE_OUTPUT:
@@ -479,6 +486,9 @@ nvfx_vertprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_vpc *vpc,
         case TGSI_OPCODE_ARL:
                 arith(vpc, VEC, ARL, dst, mask, src[0], none, none);
                 break;
+       case TGSI_OPCODE_COS:
+               arith(vpc, SCA, COS, dst, mask, none, none, src[0]);
+               break;
         case TGSI_OPCODE_DP3:
                 arith(vpc, VEC, DP3, dst, mask, src[0], src[1], none);
                 break;
@@ -512,6 +522,11 @@ nvfx_vertprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_vpc *vpc,
         case TGSI_OPCODE_LOG:
                 arith(vpc, SCA, LOG, dst, mask, none, none, src[0]);
                 break;
+       case TGSI_OPCODE_LRP:
+               tmp = temp(vpc);
+               arith(vpc, VEC, MAD, tmp, mask, neg(src[0]), src[2], src[2]);
+               arith(vpc, VEC, MAD, dst, mask, src[0], src[1], tmp);
+               break;
         case TGSI_OPCODE_MAD:
                 arith(vpc, VEC, MAD, dst, mask, src[0], src[1], src[2]);
                 break;
@@ -544,15 +559,36 @@ nvfx_vertprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_vpc *vpc,
         case TGSI_OPCODE_RSQ:
                 arith(vpc, SCA, RSQ, dst, mask, none, none, abs(src[0]));
                 break;
+       case TGSI_OPCODE_SEQ:
+               arith(vpc, VEC, SEQ, dst, mask, src[0], src[1], none);
+               break;
+       case TGSI_OPCODE_SFL:
+               arith(vpc, VEC, SFL, dst, mask, src[0], src[1], none);
+               break;
         case TGSI_OPCODE_SGE:
                 arith(vpc, VEC, SGE, dst, mask, src[0], src[1], none);
                 break;
         case TGSI_OPCODE_SGT:
                 arith(vpc, VEC, SGT, dst, mask, src[0], src[1], none);
                 break;
+       case TGSI_OPCODE_SIN:
+               arith(vpc, SCA, SIN, dst, mask, none, none, src[0]);
+               break;
+       case TGSI_OPCODE_SLE:
+               arith(vpc, VEC, SLE, dst, mask, src[0], src[1], none);
+               break;
         case TGSI_OPCODE_SLT:
                 arith(vpc, VEC, SLT, dst, mask, src[0], src[1], none);
                 break;
+       case TGSI_OPCODE_SNE:
+               arith(vpc, VEC, SNE, dst, mask, src[0], src[1], none);
+               break;
+       case TGSI_OPCODE_SSG:
+               arith(vpc, VEC, SSG, dst, mask, src[0], src[1], none);
+               break;
+       case TGSI_OPCODE_STR:
+               arith(vpc, VEC, STR, dst, mask, src[0], src[1], none);
+               break;
         case TGSI_OPCODE_SUB:
                 arith(vpc, VEC, ADD, dst, mask, src[0], none, neg(src[1]));
                 break;
@@ -830,15 +866,15 @@ out_err:
         FREE(vpc);
  }
  
-static boolean
+boolean
  nvfx_vertprog_validate(struct nvfx_context *nvfx)
  {
-       struct pipe_screen *pscreen = nvfx->pipe.screen;
+       struct pipe_context *pipe = &nvfx->pipe;
         struct nvfx_screen *screen = nvfx->screen;
         struct nouveau_channel *chan = screen->base.channel;
         struct nouveau_grobj *eng3d = screen->eng3d;
         struct nvfx_vertex_program *vp;
-       struct pipe_buffer *constbuf;
+       struct pipe_resource *constbuf;
         boolean upload_code = FALSE, upload_data = FALSE;
         int i;
  
@@ -846,6 +882,7 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
                 vp = nvfx->vertprog;
                 constbuf = nvfx->constbuf[PIPE_SHADER_VERTEX];
  
+               // TODO: ouch! can't we just use constant slots for these?!
                 if ((nvfx->dirty & NVFX_NEW_UCP) ||
                     memcmp(&nvfx->clip, &vp->ucp, sizeof(vp->ucp))) {
                         nvfx_vertprog_destroy(nvfx, vp);
@@ -857,21 +894,19 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
         }
  
         /* Translate TGSI shader into hw bytecode */
-       if (vp->translated)
-               goto check_gpu_resources;
-
-       nvfx->fallback_swtnl &= ~NVFX_NEW_VERTPROG;
+       if (!vp->translated)
+       {
+               nvfx->fallback_swtnl &= ~NVFX_NEW_VERTPROG;
                 nvfx_vertprog_translate(nvfx, vp);
-       if (!vp->translated) {
-               nvfx->fallback_swtnl |= NVFX_NEW_VERTPROG;
+               if (!vp->translated) {
+                       nvfx->fallback_swtnl |= NVFX_NEW_VERTPROG;
                         return FALSE;
+               }
         }
  
-check_gpu_resources:
         /* Allocate hw vtxprog exec slots */
         if (!vp->exec) {
                 struct nouveau_resource *heap = nvfx->screen->vp_exec_heap;
-               struct nouveau_stateobj *so;
                 uint vplen = vp->nr_insns;
  
                 if (nouveau_resource_alloc(heap, vplen, vp, &vp->exec)) {
@@ -886,19 +921,6 @@ check_gpu_resources:
                                 assert(0);
                 }
  
-               so = so_new(3, 4, 0);
-               so_method(so, eng3d, NV34TCL_VP_START_FROM_ID, 1);
-               so_data  (so, vp->exec->start);
-               if(nvfx->is_nv4x) {
-                       so_method(so, eng3d, NV40TCL_VP_ATTRIB_EN, 2);
-                       so_data  (so, vp->ir);
-                       so_data  (so, vp->or);
-               }
-               so_method(so, eng3d,  NV34TCL_VP_CLIP_PLANES_ENABLE, 1);
-               so_data  (so, vp->clip_ctrl);
-               so_ref(so, &vp->so);
-               so_ref(NULL, &so);
-
                 upload_code = TRUE;
         }
  
@@ -961,10 +983,8 @@ check_gpu_resources:
         if (vp->nr_consts) {
                 float *map = NULL;
  
-               if (constbuf) {
-                       map = pipe_buffer_map(pscreen, constbuf,
-                                             PIPE_BUFFER_USAGE_CPU_READ);
-               }
+               if (constbuf)
+                       map = nvfx_buffer(constbuf)->data;
  
                 for (i = 0; i < vp->nr_consts; i++) {
                         struct nvfx_vertex_program_data *vpd = &vp->consts[i];
@@ -982,9 +1002,6 @@ check_gpu_resources:
                         OUT_RING  (chan, i + vp->data->start);
                         OUT_RINGp (chan, (uint32_t *)vpd->value, 4);
                 }
-
-               if (constbuf)
-                       pipe_buffer_unmap(pscreen, constbuf);
         }
  
         /* Upload vtxprog */
@@ -1005,12 +1022,21 @@ check_gpu_resources:
                 }
         }
  
-       if (vp->so != nvfx->state.hw[NVFX_STATE_VERTPROG]) {
-               so_ref(vp->so, &nvfx->state.hw[NVFX_STATE_VERTPROG]);
-               return TRUE;
+       if(nvfx->dirty & (NVFX_NEW_VERTPROG | NVFX_NEW_UCP))
+       {
+               WAIT_RING(chan, 7);
+               OUT_RING(chan, RING_3D(NV34TCL_VP_START_FROM_ID, 1));
+               OUT_RING(chan, vp->exec->start);
+               if(nvfx->is_nv4x) {
+                       OUT_RING(chan, RING_3D(NV40TCL_VP_ATTRIB_EN, 2));
+                       OUT_RING(chan, vp->ir);
+                       OUT_RING(chan, vp->or);
+               }
+               OUT_RING(chan, RING_3D(NV34TCL_VP_CLIP_PLANES_ENABLE, 1));
+               OUT_RING(chan, vp->clip_ctrl);
         }
  
-       return FALSE;
+       return TRUE;
  }
  
  void
@@ -1037,13 +1063,4 @@ nvfx_vertprog_destroy(struct nvfx_context *nvfx, struct nvfx_vertex_program *vp)
         vp->data_start_min = 0;
  
         vp->ir = vp->or = vp->clip_ctrl = 0;
-       so_ref(NULL, &vp->so);
  }
-
-struct nvfx_state_entry nvfx_state_vertprog = {
-       .validate = nvfx_vertprog_validate,
-       .dirty = {
-               .pipe = NVFX_NEW_VERTPROG | NVFX_NEW_UCP,
-               .hw = NVFX_STATE_VERTPROG,
-       }
-};