nvfx: add rewritten swtnl support

author Luca Barbieri <luca@luca-barbieri.com>

Sun, 5 Sep 2010 03:42:59 +0000 (05:42 +0200)

committer Luca Barbieri <luca@luca-barbieri.com>

Sun, 5 Sep 2010 15:52:25 +0000 (17:52 +0200)
author Luca Barbieri <luca@luca-barbieri.com>
Sun, 5 Sep 2010 03:42:59 +0000 (05:42 +0200)
committer Luca Barbieri <luca@luca-barbieri.com>
Sun, 5 Sep 2010 15:52:25 +0000 (17:52 +0200)
diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h

index b837437c58fd034928b065bdf253fc39c1d0ab5a..369c2163882a7f0947353a225d8626602a6f1838 100644 (file)
--- a/src/gallium/drivers/nvfx/nvfx_context.h
+++ b/src/gallium/drivers/nvfx/nvfx_context.h
@@ -151,14 +151,6 @@ struct nvfx_context {
  
         /* HW state derived from pipe states */
         struct nvfx_state state;
-       struct {
-               struct nvfx_vertex_program *vertprog;
-
-               unsigned nr_attribs;
-               unsigned hw[PIPE_MAX_SHADER_INPUTS];
-               unsigned draw[PIPE_MAX_SHADER_INPUTS];
-               unsigned emit[PIPE_MAX_SHADER_INPUTS];
-       } swtnl;
  
         enum {
                 HW, SWTNL, SWRAST
@@ -170,7 +162,7 @@ struct nvfx_context {
         struct pipe_scissor_state scissor;
         unsigned stipple[32];
         struct pipe_clip_state clip;
-       struct nvfx_vertex_program *vertprog;
+       struct nvfx_pipe_vertex_program *vertprog;
         struct nvfx_pipe_fragment_program *fragprog;
         struct pipe_resource *constbuf[PIPE_SHADER_TYPES];
         unsigned constbuf_nr[PIPE_SHADER_TYPES];
@@ -208,6 +200,7 @@ struct nvfx_context {
         int hw_pointsprite_control;
         int hw_vp_output;
         struct nvfx_fragment_program* hw_fragprog;
+       struct nvfx_vertex_program* hw_vertprog;
  
         unsigned relocs_needed;
  };
@@ -326,6 +319,7 @@ extern void nvfx_init_transfer_functions(struct pipe_context *pipe);
  
  /* nvfx_vbo.c */
  extern boolean nvfx_vbo_validate(struct nvfx_context *nvfx);
+extern void nvfx_vbo_swtnl_validate(struct nvfx_context *nvfx);
  extern void nvfx_vbo_relocate(struct nvfx_context *nvfx);
  extern void nvfx_idxbuf_validate(struct nvfx_context* nvfx);
  extern void nvfx_idxbuf_relocate(struct nvfx_context* nvfx);
diff --git a/src/gallium/drivers/nvfx/nvfx_draw.c b/src/gallium/drivers/nvfx/nvfx_draw.c

index 2601d5b8e2ecc7edcd97a0bb83cded1cd432111e..4bf38a9c181890f4465895e019762be97fdc1eb5 100644 (file)
--- a/src/gallium/drivers/nvfx/nvfx_draw.c
+++ b/src/gallium/drivers/nvfx/nvfx_draw.c
@@ -1,6 +1,5 @@
  #include "pipe/p_shader_tokens.h"
  #include "util/u_inlines.h"
-#include "tgsi/tgsi_ureg.h"
  
  #include "util/u_pack_color.h"
  
@@ -11,11 +10,6 @@
  #include "nvfx_context.h"
  #include "nvfx_resource.h"
  
-/* Simple, but crappy, swtnl path, hopefully we wont need to hit this very
- * often at all.  Uses "quadro style" vertex submission + a fixed vertex
- * layout to avoid the need to generate a vertex program or vtxfmt.
- */
-
  struct nvfx_render_stage {
         struct draw_stage stage;
         struct nvfx_context *nvfx;
@@ -28,58 +22,18 @@ nvfx_render_stage(struct draw_stage *stage)
         return (struct nvfx_render_stage *)stage;
  }
  
-static INLINE void
-nvfx_render_vertex(struct nvfx_context *nvfx, const struct vertex_header *v)
+static void
+nvfx_render_flush(struct draw_stage *stage, unsigned flags)
  {
-       struct nvfx_screen *screen = nvfx->screen;
-       struct nouveau_channel *chan = screen->base.channel;
-       struct nouveau_grobj *eng3d = screen->eng3d;
-       unsigned i;
+       struct nvfx_render_stage *rs = nvfx_render_stage(stage);
+       struct nvfx_context *nvfx = rs->nvfx;
+       struct nouveau_channel *chan = nvfx->screen->base.channel;
  
-       for (i = 0; i < nvfx->swtnl.nr_attribs; i++) {
-               unsigned idx = nvfx->swtnl.draw[i];
-               unsigned hw = nvfx->swtnl.hw[i];
-
-               WAIT_RING(chan, 5);
-               switch (nvfx->swtnl.emit[i]) {
-               case EMIT_OMIT:
-                       break;
-               case EMIT_1F:
-                       nvfx_emit_vtx_attr(chan, hw, v->data[idx], 1);
-                       break;
-               case EMIT_2F:
-                       nvfx_emit_vtx_attr(chan, hw, v->data[idx], 2);
-                       break;
-               case EMIT_3F:
-                       nvfx_emit_vtx_attr(chan, hw, v->data[idx], 3);
-                       break;
-               case EMIT_4F:
-                       nvfx_emit_vtx_attr(chan, hw, v->data[idx], 4);
-                       break;
-               case 0xff:
-                       BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_4F_X(hw), 4);
-                       OUT_RING  (chan, fui(v->data[idx][0] / v->data[idx][3]));
-                       OUT_RING  (chan, fui(v->data[idx][1] / v->data[idx][3]));
-                       OUT_RING  (chan, fui(v->data[idx][2] / v->data[idx][3]));
-                       OUT_RING  (chan, fui(1.0f / v->data[idx][3]));
-                       break;
-               case EMIT_4UB:
-                       BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_4UB(hw), 1);
-                       OUT_RING  (chan, pack_ub4(float_to_ubyte(v->data[idx][0]),
-                                           float_to_ubyte(v->data[idx][1]),
-                                           float_to_ubyte(v->data[idx][2]),
-                                           float_to_ubyte(v->data[idx][3])));
-               case EMIT_4UB_BGRA:
-                       BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_4UB(hw), 1);
-                       OUT_RING  (chan, pack_ub4(float_to_ubyte(v->data[idx][2]),
-                                           float_to_ubyte(v->data[idx][1]),
-                                           float_to_ubyte(v->data[idx][0]),
-                                           float_to_ubyte(v->data[idx][3])));
-                       break;
-               default:
-                       assert(0);
-                       break;
-               }
+       if (rs->prim != NV34TCL_VERTEX_BEGIN_END_STOP) {
+               assert(AVAIL_RING(chan) >= 2);
+               OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
+               OUT_RING(chan, NV34TCL_VERTEX_BEGIN_END_STOP);
+               rs->prim = NV34TCL_VERTEX_BEGIN_END_STOP;
         }
  }
  
@@ -92,42 +46,61 @@ nvfx_render_prim(struct draw_stage *stage, struct prim_header *prim,
  
         struct nvfx_screen *screen = nvfx->screen;
         struct nouveau_channel *chan = screen->base.channel;
-       struct nouveau_grobj *eng3d = screen->eng3d;
-       unsigned i;
+       boolean no_elements = nvfx->vertprog->draw_no_elements;
+       unsigned num_attribs = nvfx->vertprog->draw_elements;
  
-       /* Ensure there's room for 4xfloat32 + potentially 3 begin/end */
-       if (AVAIL_RING(chan) < ((count * 20) + 6)) {
-               if (rs->prim != NV34TCL_VERTEX_BEGIN_END_STOP) {
-                       NOUVEAU_ERR("AIII, missed flush\n");
-                       assert(0);
-               }
+       /* we need to account the flush as well here even if it is done afterthis
+        * function
+        */
+       if (AVAIL_RING(chan) < ((1 + count * num_attribs * 4) + 6 + 64)) {
+               nvfx_render_flush(stage, 0);
                 FIRE_RING(chan);
                 nvfx_state_emit(nvfx);
+
+               assert(AVAIL_RING(chan) >= ((1 + count * num_attribs * 4) + 6 + 64));
         }
  
         /* Switch primitive modes if necessary */
         if (rs->prim != mode) {
                 if (rs->prim != NV34TCL_VERTEX_BEGIN_END_STOP) {
-                       BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1);
-                       OUT_RING  (chan, NV34TCL_VERTEX_BEGIN_END_STOP);
+                       OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
+                       OUT_RING(chan, NV34TCL_VERTEX_BEGIN_END_STOP);
+               }
+
+               /* XXX: any command a lot of times seems to (mostly) fix corruption that would otherwise happen */
+               /* this seems to cause issues on nv3x, and also be unneeded there */
+               if(nvfx->is_nv4x)
+               {
+                       int i;
+                       for(i = 0; i < 32; ++i)
+                       {
+                               OUT_RING(chan, RING_3D(0x1dac, 1));
+                               OUT_RING(chan, 0);
+                       }
                 }
  
-               BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1);
+               OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
                 OUT_RING  (chan, mode);
                 rs->prim = mode;
         }
  
-       /* Emit vertex data */
-       for (i = 0; i < count; i++)
-               nvfx_render_vertex(nvfx, prim->v[i]);
-
-       /* If it's likely we'll need to empty the push buffer soon, finish
-        * off the primitive now.
-        */
-       if (AVAIL_RING(chan) < ((count * 20) + 6)) {
-               BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1);
-               OUT_RING  (chan, NV34TCL_VERTEX_BEGIN_END_STOP);
-               rs->prim = NV34TCL_VERTEX_BEGIN_END_STOP;
+       OUT_RING(chan, RING_3D_NI(NV34TCL_VERTEX_DATA, num_attribs * 4 * count));
+       if(no_elements) {
+               OUT_RING(chan, 0);
+               OUT_RING(chan, 0);
+               OUT_RING(chan, 0);
+               OUT_RING(chan, 0);
+       } else {
+               for (unsigned i = 0; i < count; ++i)
+               {
+                       struct vertex_header* v = prim->v[i];
+                       /* TODO: disable divide where it's causing the problem, and remove this hack */
+                       OUT_RING(chan, fui(v->data[0][0] / v->data[0][3]));
+                       OUT_RING(chan, fui(v->data[0][1] / v->data[0][3]));
+                       OUT_RING(chan, fui(v->data[0][2] / v->data[0][3]));
+                       OUT_RING(chan, fui(1.0f / v->data[0][3]));
+                       OUT_RINGp(chan, &v->data[1][0], 4 * (num_attribs - 1));
+               }
         }
  }
  
@@ -149,25 +122,11 @@ nvfx_render_tri(struct draw_stage *draw, struct prim_header *prim)
         nvfx_render_prim(draw, prim, NV34TCL_VERTEX_BEGIN_END_TRIANGLES, 3);
  }
  
-static void
-nvfx_render_flush(struct draw_stage *draw, unsigned flags)
-{
-       struct nvfx_render_stage *rs = nvfx_render_stage(draw);
-       struct nvfx_context *nvfx = rs->nvfx;
-       struct nvfx_screen *screen = nvfx->screen;
-       struct nouveau_channel *chan = screen->base.channel;
-       struct nouveau_grobj *eng3d = screen->eng3d;
-
-       if (rs->prim != NV34TCL_VERTEX_BEGIN_END_STOP) {
-               BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1);
-               OUT_RING  (chan, NV34TCL_VERTEX_BEGIN_END_STOP);
-               rs->prim = NV34TCL_VERTEX_BEGIN_END_STOP;
-       }
-}
-
  static void
  nvfx_render_reset_stipple_counter(struct draw_stage *draw)
  {
+       /* this doesn't really seem to work, but it matters rather little */
+       nvfx_render_flush(draw, 0);
  }
  
  static void
@@ -176,40 +135,11 @@ nvfx_render_destroy(struct draw_stage *draw)
         FREE(draw);
  }
  
-static struct nvfx_vertex_program *
-nvfx_create_drawvp(struct nvfx_context *nvfx)
-{
-       struct ureg_program *ureg;
-       uint i;
-
-       ureg = ureg_create( TGSI_PROCESSOR_VERTEX );
-       if (ureg == NULL)
-               return NULL;
-
-       ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0), ureg_DECL_vs_input(ureg, 0));
-       ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0), ureg_DECL_vs_input(ureg, 3));
-       ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 1), ureg_DECL_vs_input(ureg, 4));
-       ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_BCOLOR, 0), ureg_DECL_vs_input(ureg, 3));
-       ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_BCOLOR, 1), ureg_DECL_vs_input(ureg, 4));
-       ureg_MOV(ureg,
-                  ureg_writemask(ureg_DECL_output(ureg, TGSI_SEMANTIC_FOG, 1), TGSI_WRITEMASK_X),
-                  ureg_DECL_vs_input(ureg, 5));
-       for (i = 0; i < 8; ++i)
-               ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, i), ureg_DECL_vs_input(ureg, 8 + i));
-
-       ureg_END( ureg );
-
-       return ureg_create_shader_and_destroy( ureg, &nvfx->pipe );
-}
-
  struct draw_stage *
  nvfx_draw_render_stage(struct nvfx_context *nvfx)
  {
         struct nvfx_render_stage *render = CALLOC_STRUCT(nvfx_render_stage);
  
-       if (!nvfx->swtnl.vertprog)
-               nvfx->swtnl.vertprog = nvfx_create_drawvp(nvfx);
-
         render->nvfx = nvfx;
         render->stage.draw = nvfx->draw;
         render->stage.point = nvfx_render_point;
@@ -231,6 +161,7 @@ nvfx_draw_vbo_swtnl(struct pipe_context *pipe, const struct pipe_draw_info* info
  
         if (!nvfx_state_validate_swtnl(nvfx))
                 return;
+
         nvfx_state_emit(nvfx);
  
         /* these must be passed without adding the offsets */
@@ -256,62 +187,3 @@ nvfx_draw_vbo_swtnl(struct pipe_context *pipe, const struct pipe_draw_info* info
  
         draw_flush(nvfx->draw);
  }
-
-static INLINE void
-emit_attrib(struct nvfx_context *nvfx, unsigned hw, unsigned emit,
-           unsigned semantic, unsigned index)
-{
-       unsigned draw_out = draw_find_shader_output(nvfx->draw, semantic, index);
-       unsigned a = nvfx->swtnl.nr_attribs++;
-
-       nvfx->swtnl.hw[a] = hw;
-       nvfx->swtnl.emit[a] = emit;
-       nvfx->swtnl.draw[a] = draw_out;
-}
-
-void
-nvfx_vtxfmt_validate(struct nvfx_context *nvfx)
-{
-       struct nvfx_pipe_fragment_program *pfp = nvfx->fragprog;
-       unsigned colour = 0, texcoords = 0, fog = 0, i;
-
-       /* Determine needed fragprog inputs */
-       for (i = 0; i < pfp->info.num_inputs; i++) {
-               switch (pfp->info.input_semantic_name[i]) {
-               case TGSI_SEMANTIC_POSITION:
-                       break;
-               case TGSI_SEMANTIC_COLOR:
-                       colour |= (1 << pfp->info.input_semantic_index[i]);
-                       break;
-               case TGSI_SEMANTIC_GENERIC:
-                       texcoords |= (1 << pfp->info.input_semantic_index[i]);
-                       break;
-               case TGSI_SEMANTIC_FOG:
-                       fog = 1;
-                       break;
-               default:
-                       assert(0);
-               }
-       }
-
-       nvfx->swtnl.nr_attribs = 0;
-
-       /* Map draw vtxprog output to hw attribute IDs */
-       for (i = 0; i < 2; i++) {
-               if (!(colour & (1 << i)))
-                       continue;
-               emit_attrib(nvfx, 3 + i, EMIT_4F, TGSI_SEMANTIC_COLOR, i);
-       }
-
-       for (i = 0; i < 8; i++) {
-               if (!(texcoords & (1 << i)))
-                       continue;
-               emit_attrib(nvfx, 8 + i, EMIT_4F, TGSI_SEMANTIC_GENERIC, i);
-       }
-
-       if (fog) {
-               emit_attrib(nvfx, 5, EMIT_1F, TGSI_SEMANTIC_FOG, 0);
-       }
-
-       emit_attrib(nvfx, 0, 0xff, TGSI_SEMANTIC_POSITION, 0);
-}
diff --git a/src/gallium/drivers/nvfx/nvfx_fragprog.c b/src/gallium/drivers/nvfx/nvfx_fragprog.c

index 93ba5382419d037948e652d6417c90aead196bc8..86df7f004963e773334988a063bfd80d899de65e 100644 (file)
--- a/src/gallium/drivers/nvfx/nvfx_fragprog.c
+++ b/src/gallium/drivers/nvfx/nvfx_fragprog.c
@@ -1263,7 +1263,7 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx)
                 pfp->fps[key] = fp;
         }
  
-       vp = nvfx->render_mode == HW ? nvfx->vertprog : nvfx->swtnl.vertprog;
+       vp = nvfx->hw_vertprog;
  
         if (fp->last_vp_id != vp->id || fp->last_sprite_coord_enable != sprite_coord_enable) {
                 int sprite_real_input = -1;
diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c

index ac8053f26b3b1b6e567b7dfbd7a61c906c3753ae..3900821de4837a3cb23886b0ac7b6fe93413c181 100644 (file)
--- a/src/gallium/drivers/nvfx/nvfx_screen.c
+++ b/src/gallium/drivers/nvfx/nvfx_screen.c
@@ -432,7 +432,7 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
                 return NULL;
         }
  
-       screen->force_swtnl = debug_get_bool_option("NOUVEAU_SWTNL", FALSE);
+       screen->force_swtnl = debug_get_bool_option("NVFX_SWTNL", FALSE);
         screen->trace_draw = debug_get_bool_option("NVFX_TRACE_DRAW", FALSE);
  
         screen->buffer_allocation_cost = debug_get_num_option("NVFX_BUFFER_ALLOCATION_COST", 16384);
diff --git a/src/gallium/drivers/nvfx/nvfx_state.h b/src/gallium/drivers/nvfx/nvfx_state.h

index 9200f78a54567d98140fb43f83fc164afc48995c..8fafca1950ccb5bc4116373dd881ef18899b7a87 100644 (file)
--- a/src/gallium/drivers/nvfx/nvfx_state.h
+++ b/src/gallium/drivers/nvfx/nvfx_state.h
@@ -17,13 +17,8 @@ struct nvfx_vertex_program_data {
  };
  
  struct nvfx_vertex_program {
-       struct pipe_shader_state pipe;
         unsigned long long id;
  
-       struct draw_vertex_shader *draw;
-
-       boolean translated;
-
         struct nvfx_vertex_program_exec *insns;
         unsigned nr_insns;
         struct nvfx_vertex_program_data *consts;
@@ -46,6 +41,20 @@ struct nvfx_vertex_program {
         struct util_dynarray const_relocs;
  };
  
+#define NVFX_VP_FAILED ((struct nvfx_vertex_program*)-1)
+
+struct nvfx_pipe_vertex_program {
+       struct pipe_shader_state pipe;
+       struct tgsi_shader_info info;
+
+       unsigned draw_elements;
+       boolean draw_no_elements;
+       struct draw_vertex_shader *draw_vs;
+       struct nvfx_vertex_program* draw_vp;
+
+       struct nvfx_vertex_program* vp;
+};
+
  struct nvfx_fragment_program_data {
         unsigned offset;
         unsigned index;
diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c

index 308c25fbe1bff2688cb2efb5964ca1dd949fd235..30ef12a95b6a299a7317d16c5741152aa8c85586 100644 (file)
--- a/src/gallium/drivers/nvfx/nvfx_state_emit.c
+++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c
@@ -145,7 +145,7 @@ nvfx_vertprog_ucp_validate(struct nvfx_context* nvfx)
  {
         struct nouveau_channel* chan = nvfx->screen->base.channel;
         unsigned i;
-       struct nvfx_vertex_program* vp = nvfx->vertprog;
+       struct nvfx_vertex_program* vp = nvfx->hw_vertprog;
         if(nvfx->clip.nr != vp->clip_nr)
         {
                 unsigned idx;
@@ -230,7 +230,7 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)
  
         if(nvfx->render_mode == HW)
         {
-               if(dirty & (NVFX_NEW_VERTPROG | NVFX_NEW_VERTCONST | NVFX_NEW_UCP))
+               if(dirty & (NVFX_NEW_VERTPROG | NVFX_NEW_VERTCONST))
                 {
                         if(!nvfx_vertprog_validate(nvfx))
                                 return FALSE;
@@ -252,12 +252,10 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)
         }
         else
         {
-               /* TODO: this looks a bit misdesigned */
-               if(dirty & (NVFX_NEW_VERTPROG | NVFX_NEW_UCP))
-                       nvfx_vertprog_validate(nvfx);
-
-               if(dirty & (NVFX_NEW_ARRAYS | NVFX_NEW_INDEX | NVFX_NEW_FRAGPROG))
-                       nvfx_vtxfmt_validate(nvfx);
+               if(dirty & NVFX_NEW_VERTPROG) {
+                       assert(nvfx_vertprog_validate(nvfx));
+                       nvfx_vbo_swtnl_validate(nvfx);
+               }
         }
  
         if(dirty & NVFX_NEW_RAST)
@@ -284,7 +282,7 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)
  
         if(nvfx->is_nv4x)
         {
-               unsigned vp_output = nvfx->vertprog->or | nvfx->hw_fragprog->or;
+               unsigned vp_output = nvfx->hw_vertprog->or | nvfx->hw_fragprog->or;
                 vp_output |= (1 << (nvfx->clip.nr + 6)) - (1 << 6);
  
                 if(vp_output != nvfx->hw_vp_output)
@@ -399,8 +397,6 @@ nvfx_state_relocate(struct nvfx_context *nvfx, unsigned relocs)
  boolean
  nvfx_state_validate(struct nvfx_context *nvfx)
  {
-       boolean was_sw = nvfx->fallback_swtnl ? TRUE : FALSE;
-
         if (nvfx->render_mode != HW) {
                 /* Don't even bother trying to go back to hw if none
                  * of the states that caused swtnl previously have changed.
@@ -429,7 +425,11 @@ nvfx_state_validate_swtnl(struct nvfx_context *nvfx)
  
         /* Setup for swtnl */
         if (nvfx->render_mode == HW) {
-               NOUVEAU_ERR("hw->swtnl 0x%08x\n", nvfx->fallback_swtnl);
+               static boolean warned = FALSE;
+               if(!warned) {
+                       NOUVEAU_ERR("hw->swtnl 0x%08x\n", nvfx->fallback_swtnl);
+                       warned = TRUE;
+               }
                 nvfx->pipe.flush(&nvfx->pipe, 0, NULL);
                 nvfx->dirty |= (NVFX_NEW_VIEWPORT |
                                 NVFX_NEW_VERTPROG |
@@ -437,8 +437,11 @@ nvfx_state_validate_swtnl(struct nvfx_context *nvfx)
                 nvfx->render_mode = SWTNL;
         }
  
-       if (nvfx->draw_dirty & NVFX_NEW_VERTPROG)
-               draw_bind_vertex_shader(draw, nvfx->vertprog->draw);
+       if (nvfx->draw_dirty & NVFX_NEW_VERTPROG) {
+               if(!nvfx->vertprog->draw_vs)
+                       nvfx->vertprog->draw_vs = draw_create_vertex_shader(draw, &nvfx->vertprog->pipe);
+               draw_bind_vertex_shader(draw, nvfx->vertprog->draw_vs);
+       }
  
         if (nvfx->draw_dirty & NVFX_NEW_RAST)
             draw_set_rasterizer_state(draw, &nvfx->rasterizer->pipe,
diff --git a/src/gallium/drivers/nvfx/nvfx_vbo.c b/src/gallium/drivers/nvfx/nvfx_vbo.c

index 611de808af5c2317891bd49fa8d804553f6b7be6..c35e926a7a14e794050303bc95043c96060a33a3 100644 (file)
--- a/src/gallium/drivers/nvfx/nvfx_vbo.c
+++ b/src/gallium/drivers/nvfx/nvfx_vbo.c
@@ -338,6 +338,44 @@ nvfx_vbo_validate(struct nvfx_context *nvfx)
         return TRUE;
  }
  
+void
+nvfx_vbo_swtnl_validate(struct nvfx_context *nvfx)
+{
+       struct nouveau_channel* chan = nvfx->screen->base.channel;
+       unsigned num_outputs = nvfx->vertprog->draw_elements;
+       int elements = MAX2(num_outputs, nvfx->hw_vtxelt_nr);
+
+       if (!elements)
+               return;
+
+       WAIT_RING(chan, (1 + 6 + 1 + 2) + elements * 2);
+
+       OUT_RING(chan, RING_3D(NV34TCL_VTXFMT(0), elements));
+       for(unsigned i = 0; i < num_outputs; ++i)
+               OUT_RING(chan, (4 << NV34TCL_VTXFMT_SIZE_SHIFT) | NV34TCL_VTXFMT_TYPE_32_FLOAT);
+       for(unsigned i = num_outputs; i < elements; ++i)
+               OUT_RING(chan, NV34TCL_VTXFMT_TYPE_32_FLOAT);
+
+       if(nvfx->is_nv4x) {
+               unsigned i;
+               /* seems to be some kind of cache flushing */
+               for(i = 0; i < 3; ++i) {
+                       OUT_RING(chan, RING_3D(0x1718, 1));
+                       OUT_RING(chan, 0);
+               }
+       }
+
+       OUT_RING(chan, RING_3D(NV34TCL_VTXBUF_ADDRESS(0), elements));
+       for (unsigned i = 0; i < elements; i++)
+               OUT_RING(chan, 0);
+
+       OUT_RING(chan, RING_3D(0x1710, 1));
+       OUT_RING(chan, 0);
+
+       nvfx->hw_vtxelt_nr = num_outputs;
+       nvfx->relocs_needed &=~ NVFX_RELOCATE_VTXBUF;
+}
+
  void
  nvfx_vbo_relocate(struct nvfx_context *nvfx)
  {
diff --git a/src/gallium/drivers/nvfx/nvfx_vertprog.c b/src/gallium/drivers/nvfx/nvfx_vertprog.c

index bc78ed400a95cd7360d1934e35555bf75c889fe0..30385b26f798ac5b6f275b207322105bec60375f 100644 (file)
--- a/src/gallium/drivers/nvfx/nvfx_vertprog.c
+++ b/src/gallium/drivers/nvfx/nvfx_vertprog.c
@@ -8,6 +8,7 @@
  #include "tgsi/tgsi_parse.h"
  #include "tgsi/tgsi_dump.h"
  #include "tgsi/tgsi_util.h"
+#include "tgsi/tgsi_ureg.h"
  
  #include "draw/draw_context.h"
  
@@ -37,6 +38,7 @@ struct nvfx_loop_entry
  
  struct nvfx_vpc {
         struct nvfx_context* nvfx;
+       struct pipe_shader_state pipe;
         struct nvfx_vertex_program *vp;
  
         struct nvfx_vertex_program_exec *vpi;
@@ -813,7 +815,7 @@ nvfx_vertprog_prepare(struct nvfx_context* nvfx, struct nvfx_vpc *vpc)
         unsigned num_outputs;
         unsigned num_texcoords = nvfx->is_nv4x ? 10 : 8;
  
-       num_outputs = util_semantic_set_from_program_file(&set, vpc->vp->pipe.tokens, TGSI_FILE_OUTPUT);
+       num_outputs = util_semantic_set_from_program_file(&set, vpc->pipe.tokens, TGSI_FILE_OUTPUT);
  
         if(num_outputs > num_texcoords) {
                 NOUVEAU_ERR("too many vertex program outputs: %i\n", num_outputs);
@@ -840,7 +842,7 @@ nvfx_vertprog_prepare(struct nvfx_context* nvfx, struct nvfx_vpc *vpc)
                 }
         }
  
-       tgsi_parse_init(&p, vpc->vp->pipe.tokens);
+       tgsi_parse_init(&p, vpc->pipe.tokens);
         while (!tgsi_parse_end_of_tokens(&p)) {
                 const union tgsi_full_token *tok = &p.FullToken;
  
@@ -917,21 +919,35 @@ nvfx_vertprog_prepare(struct nvfx_context* nvfx, struct nvfx_vpc *vpc)
  
  DEBUG_GET_ONCE_BOOL_OPTION(nvfx_dump_vp, "NVFX_DUMP_VP", FALSE)
  
-static void
-nvfx_vertprog_translate(struct nvfx_context *nvfx,
-                       struct nvfx_vertex_program *vp)
+static struct nvfx_vertex_program*
+nvfx_vertprog_translate(struct nvfx_context *nvfx, const struct pipe_shader_state* vps)
  {
         struct tgsi_parse_context parse;
+       struct nvfx_vertex_program* vp = NULL;
         struct nvfx_vpc *vpc = NULL;
         struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0));
         struct util_dynarray insns;
         int i;
  
-       vpc = CALLOC(1, sizeof(struct nvfx_vpc));
+       tgsi_parse_init(&parse, vps->tokens);
+
+       vp = CALLOC_STRUCT(nvfx_vertex_program);
+       if(!vp)
+               goto out_err;
+
+       vpc = CALLOC_STRUCT(nvfx_vpc);
         if (!vpc)
-               return;
+               goto out_err;
+
         vpc->nvfx = nvfx;
         vpc->vp = vp;
+       vpc->pipe = *vps;
+
+       {
+               // TODO: use a 64-bit atomic here!
+               static unsigned long long id = 0;
+               vp->id = ++id;
+       }
  
         /* reserve space for ucps */
         if(nvfx->use_vp_clipping)
@@ -942,7 +958,7 @@ nvfx_vertprog_translate(struct nvfx_context *nvfx,
  
         if (!nvfx_vertprog_prepare(nvfx, vpc)) {
                 FREE(vpc);
-               return;
+               return NULL;
         }
  
         /* Redirect post-transform vertex position to a temp if user clip
@@ -955,8 +971,6 @@ nvfx_vertprog_translate(struct nvfx_context *nvfx,
                 vpc->r_temps_discard = 0;
         }
  
-       tgsi_parse_init(&parse, vp->pipe.tokens);
-
         util_dynarray_init(&insns);
         while (!tgsi_parse_end_of_tokens(&parse)) {
                 tgsi_parse_token(&parse);
@@ -1058,7 +1072,7 @@ nvfx_vertprog_translate(struct nvfx_context *nvfx,
         if(debug_get_option_nvfx_dump_vp())
         {
                 debug_printf("\n");
-               tgsi_dump(vp->pipe.tokens, 0);
+               tgsi_dump(vpc->pipe.tokens, 0);
  
                 debug_printf("\n%s vertex program:\n", nvfx->is_nv4x ? "nv4x" : "nv3x");
                 for (i = 0; i < vp->nr_insns; i++)
@@ -1068,20 +1082,49 @@ nvfx_vertprog_translate(struct nvfx_context *nvfx,
  
         vp->clip_nr = -1;
         vp->exec_start = -1;
-       vp->translated = TRUE;
-out_err:
+
+out:
         tgsi_parse_free(&parse);
-       util_dynarray_fini(&vpc->label_relocs);
-       util_dynarray_fini(&vpc->loop_stack);
-       if (vpc->r_temp)
+       if(vpc) {
+               util_dynarray_fini(&vpc->label_relocs);
+               util_dynarray_fini(&vpc->loop_stack);
                 FREE(vpc->r_temp);
-       if (vpc->r_address)
                 FREE(vpc->r_address);
-       if (vpc->r_const)
                 FREE(vpc->r_const);
-       if (vpc->imm)
                 FREE(vpc->imm);
-       FREE(vpc);
+               FREE(vpc);
+       }
+       return vp;
+
+out_err:
+       FREE(vp);
+       vp = NULL;
+       goto out;
+}
+
+static struct nvfx_vertex_program*
+nvfx_vertprog_translate_draw_vp(struct nvfx_context *nvfx, struct nvfx_pipe_vertex_program* pvp)
+{
+       struct nvfx_vertex_program* vp = NULL;
+       struct pipe_shader_state vps;
+       struct ureg_program *ureg = NULL;
+       unsigned num_outputs = MIN2(pvp->info.num_outputs, 16);
+
+       ureg = ureg_create( TGSI_PROCESSOR_VERTEX );
+       if(ureg == NULL)
+               return 0;
+
+       for (unsigned i = 0; i < num_outputs; i++)
+               ureg_MOV(ureg, ureg_DECL_output(ureg, pvp->info.output_semantic_name[i], pvp->info.output_semantic_index[i]), ureg_DECL_vs_input(ureg, i));
+
+       ureg_END( ureg );
+
+       vps.tokens = ureg_get_tokens(ureg, 0);
+       vp = nvfx_vertprog_translate(nvfx, &vps);
+       ureg_free_tokens(vps.tokens);
+       ureg_destroy(ureg);
+
+       return vp;
  }
  
  boolean
@@ -1090,30 +1133,44 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
         struct nvfx_screen *screen = nvfx->screen;
         struct nouveau_channel *chan = screen->base.channel;
         struct nouveau_grobj *eng3d = screen->eng3d;
-       struct nvfx_vertex_program *vp;
+       struct nvfx_pipe_vertex_program *pvp = nvfx->vertprog;
+       struct nvfx_vertex_program* vp;
         struct pipe_resource *constbuf;
         boolean upload_code = FALSE, upload_data = FALSE;
         int i;
  
         if (nvfx->render_mode == HW) {
-               vp = nvfx->vertprog;
-               constbuf = nvfx->constbuf[PIPE_SHADER_VERTEX];
-       } else {
-               vp = nvfx->swtnl.vertprog;
-               constbuf = NULL;
-       }
-
-       /* Translate TGSI shader into hw bytecode */
-       if (!vp->translated)
-       {
                 nvfx->fallback_swtnl &= ~NVFX_NEW_VERTPROG;
-               nvfx_vertprog_translate(nvfx, vp);
-               if (!vp->translated) {
+               vp = pvp->vp;
+
+               if(!vp) {
+                       vp = nvfx_vertprog_translate(nvfx, &pvp->pipe);
+                       if(!vp)
+                               vp = NVFX_VP_FAILED;
+                       pvp->vp = vp;
+               }
+
+               if(vp == NVFX_VP_FAILED) {
                         nvfx->fallback_swtnl |= NVFX_NEW_VERTPROG;
                         return FALSE;
                 }
+
+               constbuf = nvfx->constbuf[PIPE_SHADER_VERTEX];
+       } else {
+               vp = pvp->draw_vp;
+               if(!vp)
+               {
+                       pvp->draw_vp = vp = nvfx_vertprog_translate_draw_vp(nvfx, pvp);
+                       if(!vp) {
+                               _debug_printf("Error: unable to create a swtnl passthrough vertex shader: aborting.");
+                               abort();
+                       }
+               }
+               constbuf = NULL;
         }
  
+       nvfx->hw_vertprog = vp;
+
         /* Allocate hw vtxprog exec slots */
         if (!vp->exec) {
                 struct nouveau_resource *heap = nvfx->screen->vp_exec_heap;
@@ -1294,24 +1351,22 @@ nvfx_vertprog_destroy(struct nvfx_context *nvfx, struct nvfx_vertex_program *vp)
  
         util_dynarray_fini(&vp->branch_relocs);
         util_dynarray_fini(&vp->const_relocs);
+       FREE(vp);
  }
  
  static void *
-nvfx_vp_state_create(struct pipe_context *pipe,
-                     const struct pipe_shader_state *cso)
+nvfx_vp_state_create(struct pipe_context *pipe, const struct pipe_shader_state *cso)
  {
          struct nvfx_context *nvfx = nvfx_context(pipe);
-        struct nvfx_vertex_program *vp;
-
-        // TODO: use a 64-bit atomic here!
-        static unsigned long long id = 0;
+        struct nvfx_pipe_vertex_program *pvp;
  
-        vp = CALLOC(1, sizeof(struct nvfx_vertex_program));
-        vp->pipe.tokens = tgsi_dup_tokens(cso->tokens);
-        vp->draw = draw_create_vertex_shader(nvfx->draw, &vp->pipe);
-        vp->id = ++id;
+        pvp = CALLOC(1, sizeof(struct nvfx_pipe_vertex_program));
+        pvp->pipe.tokens = tgsi_dup_tokens(cso->tokens);
+        tgsi_scan_shader(pvp->pipe.tokens, &pvp->info);
+        pvp->draw_elements = MAX2(1, MIN2(pvp->info.num_outputs, 16));
+        pvp->draw_no_elements = pvp->info.num_outputs == 0;
  
-        return (void *)vp;
+        return (void *)pvp;
  }
  
  static void
@@ -1327,13 +1382,17 @@ nvfx_vp_state_bind(struct pipe_context *pipe, void *hwcso)
  static void
  nvfx_vp_state_delete(struct pipe_context *pipe, void *hwcso)
  {
-        struct nvfx_context *nvfx = nvfx_context(pipe);
-        struct nvfx_vertex_program *vp = hwcso;
-
-        draw_delete_vertex_shader(nvfx->draw, vp->draw);
-        nvfx_vertprog_destroy(nvfx, vp);
-        FREE((void*)vp->pipe.tokens);
-        FREE(vp);
+       struct nvfx_context *nvfx = nvfx_context(pipe);
+       struct nvfx_pipe_vertex_program *pvp = hwcso;
+
+       if(pvp->draw_vs)
+               draw_delete_vertex_shader(nvfx->draw, pvp->draw_vs);
+       if(pvp->vp && pvp->vp != NVFX_VP_FAILED)
+               nvfx_vertprog_destroy(nvfx, pvp->vp);
+       if(pvp->draw_vp)
+               nvfx_vertprog_destroy(nvfx, pvp->draw_vp);
+       FREE((void*)pvp->pipe.tokens);
+       FREE(pvp);
  }
  
  void
author	Luca Barbieri <luca@luca-barbieri.com>
	Sun, 5 Sep 2010 03:42:59 +0000 (05:42 +0200)
committer	Luca Barbieri <luca@luca-barbieri.com>
	Sun, 5 Sep 2010 15:52:25 +0000 (17:52 +0200)
src/gallium/drivers/nvfx/nvfx_context.h		patch \| blob \| history
src/gallium/drivers/nvfx/nvfx_draw.c		patch \| blob \| history
src/gallium/drivers/nvfx/nvfx_fragprog.c		patch \| blob \| history
src/gallium/drivers/nvfx/nvfx_screen.c		patch \| blob \| history
src/gallium/drivers/nvfx/nvfx_state.h		patch \| blob \| history
src/gallium/drivers/nvfx/nvfx_state_emit.c		patch \| blob \| history
src/gallium/drivers/nvfx/nvfx_vbo.c		patch \| blob \| history
src/gallium/drivers/nvfx/nvfx_vertprog.c		patch \| blob \| history