nvfx: rewrite draw code and buffer code

author Luca Barbieri <luca@luca-barbieri.com>

Sat, 7 Aug 2010 03:39:18 +0000 (05:39 +0200)

committer Luca Barbieri <luca@luca-barbieri.com>

Sat, 21 Aug 2010 18:42:14 +0000 (20:42 +0200)
author Luca Barbieri <luca@luca-barbieri.com>
Sat, 7 Aug 2010 03:39:18 +0000 (05:39 +0200)
committer Luca Barbieri <luca@luca-barbieri.com>
Sat, 21 Aug 2010 18:42:14 +0000 (20:42 +0200)
diff --git a/src/gallium/drivers/nouveau/nouveau_class.h b/src/gallium/drivers/nouveau/nouveau_class.h

index 685fa00b4556a04c60584e70d108a49afbfaa1da..14c11b278ad5539418c6ccdc9af1fa9728bd7d22 100644 (file)
--- a/src/gallium/drivers/nouveau/nouveau_class.h
+++ b/src/gallium/drivers/nouveau/nouveau_class.h
@@ -6149,6 +6149,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  #define   NV34TCL_FP_REG_CONTROL_UNK1_MASK                                             0xffff0000
  #define   NV34TCL_FP_REG_CONTROL_UNK0_SHIFT                                            0
  #define   NV34TCL_FP_REG_CONTROL_UNK0_MASK                                             0x0000ffff
+#define  NV34TCL_EDGEFLAG_ENABLE                                                       0x0000145c
  #define  NV34TCL_VP_CLIP_PLANES_ENABLE                                                 0x00001478
  #define   NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0                                         (1 <<  1)
  #define   NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1                                         (1 <<  5)
@@ -6182,10 +6183,13 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  #define  NV34TCL_VTXFMT__SIZE                                                          0x00000010
  #define   NV34TCL_VTXFMT_TYPE_SHIFT                                                    0
  #define   NV34TCL_VTXFMT_TYPE_MASK                                                     0x0000000f
-#define    NV34TCL_VTXFMT_TYPE_FLOAT                                                   0x00000002
-#define    NV34TCL_VTXFMT_TYPE_HALF                                                    0x00000003
-#define    NV34TCL_VTXFMT_TYPE_UBYTE                                                   0x00000004
-#define    NV34TCL_VTXFMT_TYPE_USHORT                                                  0x00000005
+#define    NV34TCL_VTXFMT_TYPE_16_SNORM                                                        0x00000001
+#define    NV34TCL_VTXFMT_TYPE_32_FLOAT                                                        0x00000002
+#define    NV34TCL_VTXFMT_TYPE_16_FLOAT                                                        0x00000003
+#define    NV34TCL_VTXFMT_TYPE_8_UNORM                                                 0x00000004
+#define    NV34TCL_VTXFMT_TYPE_16_SSCALED                                                      0x00000005
+#define    NV34TCL_VTXFMT_TYPE_11_11_10_SNORM                                                  0x00000006
+#define    NV34TCL_VTXFMT_TYPE_8_USCALED                                                       0x00000007
  #define   NV34TCL_VTXFMT_SIZE_SHIFT                                                    4
  #define   NV34TCL_VTXFMT_SIZE_MASK                                                     0x000000f0
  #define   NV34TCL_VTXFMT_STRIDE_SHIFT                                                  8
diff --git a/src/gallium/drivers/nouveau/nouveau_util.h b/src/gallium/drivers/nouveau/nouveau_util.h

deleted file mode 100644 (file)

index b165f7a..0000000
--- a/src/gallium/drivers/nouveau/nouveau_util.h
+++ /dev/null
@@ -1,91 +0,0 @@
-#ifndef __NOUVEAU_UTIL_H__
-#define __NOUVEAU_UTIL_H__
-
-/* Determine how many vertices can be pushed into the command stream.
- * Where the remaining space isn't large enough to represent all verices,
- * split the buffer at primitive boundaries.
- *
- * Returns a count of vertices that can be rendered, and an index to
- * restart drawing at after a flush.
- */
-static INLINE unsigned
-nouveau_vbuf_split(unsigned remaining, unsigned overhead, unsigned vpp,
-                  unsigned mode, unsigned start, unsigned count,
-                  unsigned *restart)
-{
-       int max, adj = 0;
-
-       max  = remaining - overhead;
-       if (max < 0)
-               return 0;
-
-       max *= vpp;
-       if (max >= count)
-               return count;
-
-       switch (mode) {
-       case PIPE_PRIM_POINTS:
-               break;
-       case PIPE_PRIM_LINES:
-               max = max & 1;
-               break;
-       case PIPE_PRIM_TRIANGLES:
-               max = max - (max % 3);
-               break;
-       case PIPE_PRIM_QUADS:
-               max = max & ~3;
-               break;
-       case PIPE_PRIM_LINE_LOOP:
-       case PIPE_PRIM_LINE_STRIP:
-               if (max < 2)
-                       max = 0;
-               adj = 1;
-               break;
-       case PIPE_PRIM_POLYGON:
-       case PIPE_PRIM_TRIANGLE_STRIP:
-       case PIPE_PRIM_TRIANGLE_FAN:
-               if (max < 3)
-                       max = 0;
-               adj = 2;
-               break;
-       case PIPE_PRIM_QUAD_STRIP:
-               if (max < 4)
-                       max = 0;
-               adj = 3;
-               break;
-       default:
-               assert(0);
-       }
-
-       *restart = start + max - adj;
-       return max;
-}
-
-/* Integer base-2 logarithm, rounded towards zero. */
-static INLINE unsigned log2i(unsigned i)
-{
-       unsigned r = 0;
-
-       if (i & 0xffff0000) {
-               i >>= 16;
-               r += 16;
-       }
-       if (i & 0x0000ff00) {
-               i >>= 8;
-               r += 8;
-       }
-       if (i & 0x000000f0) {
-               i >>= 4;
-               r += 4;
-       }
-       if (i & 0x0000000c) {
-               i >>= 2;
-               r += 2;
-       }
-       if (i & 0x00000002) {
-               r += 1;
-       }
-       return r;
-}
-
-#endif
diff --git a/src/gallium/drivers/nvfx/Makefile b/src/gallium/drivers/nvfx/Makefile

index 2834f8984c79f1c09b41f19942bc4dadfbc50b9a..6cbbad699eb5eb728a74fcffd300bf00302b2fab 100644 (file)
--- a/src/gallium/drivers/nvfx/Makefile
+++ b/src/gallium/drivers/nvfx/Makefile
@@ -14,6 +14,7 @@ C_SOURCES = \
         nv30_fragtex.c \
         nv40_fragtex.c \
         nvfx_miptree.c \
+       nvfx_push.c \
         nvfx_query.c \
         nvfx_resource.c \
         nvfx_screen.c \
diff --git a/src/gallium/drivers/nvfx/nv30_fragtex.c b/src/gallium/drivers/nvfx/nv30_fragtex.c

index 63c578a0ce1509b3a360a3fa32993cfe67a0ef6c..db8a8fc4b08db64a80c6d0a264ede4359b4577df 100644 (file)
--- a/src/gallium/drivers/nvfx/nv30_fragtex.c
+++ b/src/gallium/drivers/nvfx/nv30_fragtex.c
@@ -1,7 +1,6 @@
  #include "util/u_format.h"
  
  #include "nvfx_context.h"
-#include "nouveau/nouveau_util.h"
  #include "nvfx_tex.h"
  #include "nvfx_resource.h"
  
@@ -44,9 +43,9 @@ nv30_sampler_view_init(struct pipe_context *pipe,
  
         txf = sv->u.init_fmt;
         txf |= (level != sv->base.last_level ? NV34TCL_TX_FORMAT_MIPMAP : 0);
-       txf |= log2i(u_minify(pt->width0, level)) << NV34TCL_TX_FORMAT_BASE_SIZE_U_SHIFT;
-       txf |= log2i(u_minify(pt->height0, level)) << NV34TCL_TX_FORMAT_BASE_SIZE_V_SHIFT;
-       txf |= log2i(u_minify(pt->depth0, level)) << NV34TCL_TX_FORMAT_BASE_SIZE_W_SHIFT;
+       txf |= util_logbase2(u_minify(pt->width0, level)) << NV34TCL_TX_FORMAT_BASE_SIZE_U_SHIFT;
+       txf |= util_logbase2(u_minify(pt->height0, level)) << NV34TCL_TX_FORMAT_BASE_SIZE_V_SHIFT;
+       txf |= util_logbase2(u_minify(pt->depth0, level)) << NV34TCL_TX_FORMAT_BASE_SIZE_W_SHIFT;
         txf |=  0x10000;
  
         sv->u.nv30.fmt[0] = tf->fmt[0] | txf;
diff --git a/src/gallium/drivers/nvfx/nvfx_buffer.c b/src/gallium/drivers/nvfx/nvfx_buffer.c

index 44680e519596524d296098469502a5a1271ac25e..89bb8570efd650b659e093c0334b8aa80f5c5759 100644 (file)
--- a/src/gallium/drivers/nvfx/nvfx_buffer.c
+++ b/src/gallium/drivers/nvfx/nvfx_buffer.c
@@ -6,13 +6,16 @@
  #include "nouveau/nouveau_screen.h"
  #include "nouveau/nouveau_winsys.h"
  #include "nvfx_resource.h"
+#include "nvfx_screen.h"
  
  void nvfx_buffer_destroy(struct pipe_screen *pscreen,
                                 struct pipe_resource *presource)
  {
-       struct nvfx_resource *buffer = nvfx_resource(presource);
+       struct nvfx_buffer *buffer = nvfx_buffer(presource);
  
-       nouveau_screen_bo_release(pscreen, buffer->bo);
+       if(!(buffer->base.base.flags & NVFX_RESOURCE_FLAG_USER))
+               align_free(buffer->data);
+       nouveau_screen_bo_release(pscreen, buffer->base.bo);
         FREE(buffer);
  }
  
@@ -20,31 +23,22 @@ struct pipe_resource *
  nvfx_buffer_create(struct pipe_screen *pscreen,
                    const struct pipe_resource *template)
  {
-       struct nvfx_resource *buffer;
+       struct nvfx_screen* screen = nvfx_screen(pscreen);
+       struct nvfx_buffer* buffer;
  
-       buffer = CALLOC_STRUCT(nvfx_resource);
+       buffer = CALLOC_STRUCT(nvfx_buffer);
         if (!buffer)
                 return NULL;
  
-       buffer->base = *template;
-       buffer->base.flags |= NVFX_RESOURCE_FLAG_LINEAR;
-       pipe_reference_init(&buffer->base.reference, 1);
-       buffer->base.screen = pscreen;
+       buffer->base.base = *template;
+       buffer->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR;
+       pipe_reference_init(&buffer->base.base.reference, 1);
+       buffer->base.base.screen = pscreen;
+       buffer->size = util_format_get_stride(template->format, template->width0);
+       buffer->bytes_to_draw_until_static = buffer->size * screen->static_reuse_threshold;
+       buffer->data = align_malloc(buffer->size, 16);
  
-       buffer->bo = nouveau_screen_bo_new(pscreen,
-                                          16,
-                                          buffer->base.usage,
-                                          buffer->base.bind,
-                                          buffer->base.width0);
-
-       if (buffer->bo == NULL)
-               goto fail;
-
-       return &buffer->base;
-
-fail:
-       FREE(buffer);
-       return NULL;
+       return &buffer->base.base;
  }
  
  
@@ -54,29 +48,49 @@ nvfx_user_buffer_create(struct pipe_screen *pscreen,
                         unsigned bytes,
                         unsigned usage)
  {
-       struct nvfx_resource *buffer;
+       struct nvfx_screen* screen = nvfx_screen(pscreen);
+       struct nvfx_buffer* buffer;
  
-       buffer = CALLOC_STRUCT(nvfx_resource);
+       buffer = CALLOC_STRUCT(nvfx_buffer);
         if (!buffer)
                 return NULL;
  
-       pipe_reference_init(&buffer->base.reference, 1);
-       buffer->base.flags = NVFX_RESOURCE_FLAG_LINEAR;
-       buffer->base.screen = pscreen;
-       buffer->base.format = PIPE_FORMAT_R8_UNORM;
-       buffer->base.usage = PIPE_USAGE_IMMUTABLE;
-       buffer->base.bind = usage;
-       buffer->base.width0 = bytes;
-       buffer->base.height0 = 1;
-       buffer->base.depth0 = 1;
-
-       buffer->bo = nouveau_screen_bo_user(pscreen, ptr, bytes);
-       if (!buffer->bo)
-               goto fail;
-
-       return &buffer->base;
+       pipe_reference_init(&buffer->base.base.reference, 1);
+       buffer->base.base.flags = NVFX_RESOURCE_FLAG_LINEAR | NVFX_RESOURCE_FLAG_USER;
+       buffer->base.base.screen = pscreen;
+       buffer->base.base.format = PIPE_FORMAT_R8_UNORM;
+       buffer->base.base.usage = PIPE_USAGE_IMMUTABLE;
+       buffer->base.base.bind = usage;
+       buffer->base.base.width0 = bytes;
+       buffer->base.base.height0 = 1;
+       buffer->base.base.depth0 = 1;
+       buffer->data = ptr;
+       buffer->size = bytes;
+       buffer->bytes_to_draw_until_static = bytes * screen->static_reuse_threshold;
+       buffer->dirty_end = bytes;
+
+       return &buffer->base.base;
+}
  
-fail:
-       FREE(buffer);
-       return NULL;
+void nvfx_buffer_upload(struct nvfx_buffer* buffer)
+{
+       unsigned dirty = buffer->dirty_end - buffer->dirty_begin;
+       if(!buffer->base.bo)
+       {
+               buffer->base.bo = nouveau_screen_bo_new(buffer->base.base.screen,
+                                          16,
+                                          buffer->base.base.usage,
+                                          buffer->base.base.bind,
+                                          buffer->base.base.width0);
+       }
+
+       if(dirty)
+       {
+               // TODO: may want to use a temporary in some cases
+               nouveau_bo_map(buffer->base.bo, NOUVEAU_BO_WR
+                               | (buffer->dirty_unsynchronized ? NOUVEAU_BO_NOSYNC : 0));
+               memcpy(buffer->base.bo->map + buffer->dirty_begin, buffer->data + buffer->dirty_begin, dirty);
+               nouveau_bo_unmap(buffer->base.bo);
+               buffer->dirty_begin = buffer->dirty_end = 0;
+       }
  }
diff --git a/src/gallium/drivers/nvfx/nvfx_context.c b/src/gallium/drivers/nvfx/nvfx_context.c

index 1980176b23ec0789deb3a5c5ac04776b040e9935..94c854b22b8c07908b89a2a6c89c69d7408d89af 100644 (file)
--- a/src/gallium/drivers/nvfx/nvfx_context.c
+++ b/src/gallium/drivers/nvfx/nvfx_context.c
@@ -76,7 +76,9 @@ nvfx_create(struct pipe_screen *pscreen, void *priv)
         nvfx_init_surface_functions(nvfx);
         nvfx_init_state_functions(nvfx);
         nvfx_init_sampling_functions(nvfx);
+       nvfx_init_vbo_functions(nvfx);
         nvfx_init_resource_functions(&nvfx->pipe);
+       nvfx_init_transfer_functions(&nvfx->pipe);
  
         /* Create, configure, and install fallback swtnl path */
         nvfx->draw = draw_create(&nvfx->pipe);
@@ -89,6 +91,7 @@ nvfx_create(struct pipe_screen *pscreen, void *priv)
         /* set these to that we init them on first validation */
         nvfx->state.scissor_enabled = ~0;
         nvfx->state.stipple_enabled = ~0;
+       nvfx->use_vertex_buffers = -1;
  
         LIST_INITHEAD(&nvfx->render_cache);
  
diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h

index bce19df044d46e331f17f79d4cfd3eeea2f66f07..8899bf991e123b8045dbde95acc306b461d5fa60 100644 (file)
--- a/src/gallium/drivers/nvfx/nvfx_context.h
+++ b/src/gallium/drivers/nvfx/nvfx_context.h
@@ -44,6 +44,7 @@
  #define NVFX_NEW_SR            (1 << 13)
  #define NVFX_NEW_VERTCONST     (1 << 14)
  #define NVFX_NEW_FRAGCONST     (1 << 15)
+#define NVFX_NEW_INDEX (1 << 16)
  
  struct nvfx_rasterizer_state {
         struct pipe_rasterizer_state pipe;
@@ -71,9 +72,53 @@ struct nvfx_state {
         unsigned render_temps;
  };
  
+struct nvfx_per_vertex_element {
+       unsigned idx;
+        unsigned vertex_buffer_index;
+        unsigned src_offset;
+};
+
+struct nvfx_low_frequency_element {
+       unsigned idx;
+       unsigned vertex_buffer_index;
+       unsigned src_offset;
+        void (*fetch_rgba_float)(float *dst, const uint8_t *src, unsigned i, unsigned j);
+        unsigned ncomp;
+};
+
+struct nvfx_per_instance_element {
+       struct nvfx_low_frequency_element base;
+       unsigned instance_divisor;
+};
+
+struct nvfx_per_vertex_buffer_info
+{
+       unsigned vertex_buffer_index;
+       unsigned per_vertex_size;
+};
+
  struct nvfx_vtxelt_state {
         struct pipe_vertex_element pipe[16];
         unsigned num_elements;
+       unsigned vtxfmt[16];
+
+       unsigned num_per_vertex_buffer_infos;
+       struct nvfx_per_vertex_buffer_info per_vertex_buffer_info[16];
+
+       unsigned num_per_vertex;
+       struct nvfx_per_vertex_element per_vertex[16];
+
+       unsigned num_per_instance;
+       struct nvfx_per_instance_element per_instance[16];
+
+       unsigned num_constant;
+       struct nvfx_low_frequency_element constant[16];
+
+       boolean needs_translate;
+       struct translate* translate;
+
+       unsigned vertex_length;
+       unsigned max_vertices_per_packet;
  };
  
  struct nvfx_render_target {
@@ -127,8 +172,6 @@ struct nvfx_context {
         struct pipe_viewport_state viewport;
         struct pipe_framebuffer_state framebuffer;
         struct pipe_index_buffer idxbuf;
-       struct pipe_resource *idxbuf_buffer;
-       unsigned idxbuf_format;
         struct nvfx_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS];
         struct pipe_sampler_view *fragment_sampler_views[PIPE_MAX_SAMPLERS];
         unsigned nr_samplers;
@@ -137,8 +180,14 @@ struct nvfx_context {
         struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
         unsigned vtxbuf_nr;
         struct nvfx_vtxelt_state *vtxelt;
+       int base_vertex;
+       boolean use_index_buffer;
+       /* -1 = hardware input setup is outdated
+        * 0 = hardware input setup is for inline vertices
+        * 1 = hardware input setup is for hardware vertices
+        */
+       int use_vertex_buffers;
  
-       unsigned vbo_bo;
         unsigned hw_vtxelt_nr;
         uint8_t hw_samplers;
         uint32_t hw_txf[8];
@@ -180,11 +229,7 @@ extern void nvfx_clear(struct pipe_context *pipe, unsigned buffers,
  
  /* nvfx_draw.c */
  extern struct draw_stage *nvfx_draw_render_stage(struct nvfx_context *nvfx);
-extern void nvfx_draw_elements_swtnl(struct pipe_context *pipe,
-                                     struct pipe_resource *idxbuf,
-                                     unsigned ib_size, int ib_bias,
-                                     unsigned mode,
-                                     unsigned start, unsigned count);
+extern void nvfx_draw_vbo_swtnl(struct pipe_context *pipe, const struct pipe_draw_info* info);
  extern void nvfx_vtxfmt_validate(struct nvfx_context *nvfx);
  
  /* nvfx_fb.c */
@@ -245,17 +290,53 @@ extern boolean nvfx_state_validate_swtnl(struct nvfx_context *nvfx);
  extern void nvfx_state_emit(struct nvfx_context *nvfx);
  
  /* nvfx_transfer.c */
-extern void nvfx_init_transfer_functions(struct nvfx_context *nvfx);
+extern void nvfx_init_transfer_functions(struct pipe_context *pipe);
  
  /* nvfx_vbo.c */
  extern boolean nvfx_vbo_validate(struct nvfx_context *nvfx);
  extern void nvfx_vbo_relocate(struct nvfx_context *nvfx);
+extern void nvfx_idxbuf_validate(struct nvfx_context* nvfx);
+extern void nvfx_idxbuf_relocate(struct nvfx_context* nvfx);
  extern void nvfx_draw_vbo(struct pipe_context *pipe,
                            const struct pipe_draw_info *info);
+extern void nvfx_init_vbo_functions(struct nvfx_context *nvfx);
+extern unsigned nvfx_vertex_formats[];
  
  /* nvfx_vertprog.c */
  extern boolean nvfx_vertprog_validate(struct nvfx_context *nvfx);
  extern void nvfx_vertprog_destroy(struct nvfx_context *,
                                   struct nvfx_vertex_program *);
  
+/* nvfx_push.c */
+extern void nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info);
+
+/* must WAIT_RING(chan, ncomp + 1) or equivalent beforehand! */
+static inline void nvfx_emit_vtx_attr(struct nouveau_channel* chan, unsigned attrib, float* v, unsigned ncomp)
+{
+       switch (ncomp) {
+       case 4:
+               OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_4F_X(attrib), 4));
+               OUT_RING(chan, fui(v[0]));
+               OUT_RING(chan, fui(v[1]));
+               OUT_RING(chan,  fui(v[2]));
+               OUT_RING(chan,  fui(v[3]));
+               break;
+       case 3:
+               OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_3F_X(attrib), 3));
+               OUT_RING(chan,  fui(v[0]));
+               OUT_RING(chan,  fui(v[1]));
+               OUT_RING(chan,  fui(v[2]));
+               break;
+       case 2:
+               OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_2F_X(attrib), 2));
+               OUT_RING(chan,  fui(v[0]));
+               OUT_RING(chan,  fui(v[1]));
+               break;
+       case 1:
+               OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_1F(attrib), 1));
+               OUT_RING(chan,  fui(v[0]));
+               break;
+       }
+}
+
  #endif
diff --git a/src/gallium/drivers/nvfx/nvfx_draw.c b/src/gallium/drivers/nvfx/nvfx_draw.c

index 22cff370b77e44a3851a64704a68a6e04f9f19a3..331e28418adc681d1a029a445b85b9ad575c8c61 100644 (file)
--- a/src/gallium/drivers/nvfx/nvfx_draw.c
+++ b/src/gallium/drivers/nvfx/nvfx_draw.c
@@ -9,6 +9,7 @@
  #include "draw/draw_pipe.h"
  
  #include "nvfx_context.h"
+#include "nvfx_resource.h"
  
  /* Simple, but crappy, swtnl path, hopefully we wont need to hit this very
   * often at all.  Uses "quadro style" vertex submission + a fixed vertex
@@ -39,30 +40,21 @@ nvfx_render_vertex(struct nvfx_context *nvfx, const struct vertex_header *v)
                 unsigned idx = nvfx->swtnl.draw[i];
                 unsigned hw = nvfx->swtnl.hw[i];
  
+               WAIT_RING(chan, 5);
                 switch (nvfx->swtnl.emit[i]) {
                 case EMIT_OMIT:
                         break;
                 case EMIT_1F:
-                       BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_1F(hw), 1);
-                       OUT_RING  (chan, fui(v->data[idx][0]));
+                       nvfx_emit_vtx_attr(chan, hw, v->data[idx], 1);
                         break;
                 case EMIT_2F:
-                       BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_2F_X(hw), 2);
-                       OUT_RING  (chan, fui(v->data[idx][0]));
-                       OUT_RING  (chan, fui(v->data[idx][1]));
+                       nvfx_emit_vtx_attr(chan, hw, v->data[idx], 2);
                         break;
                 case EMIT_3F:
-                       BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_3F_X(hw), 3);
-                       OUT_RING  (chan, fui(v->data[idx][0]));
-                       OUT_RING  (chan, fui(v->data[idx][1]));
-                       OUT_RING  (chan, fui(v->data[idx][2]));
+                       nvfx_emit_vtx_attr(chan, hw, v->data[idx], 3);
                         break;
                 case EMIT_4F:
-                       BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_4F_X(hw), 4);
-                       OUT_RING  (chan, fui(v->data[idx][0]));
-                       OUT_RING  (chan, fui(v->data[idx][1]));
-                       OUT_RING  (chan, fui(v->data[idx][2]));
-                       OUT_RING  (chan, fui(v->data[idx][3]));
+                       nvfx_emit_vtx_attr(chan, hw, v->data[idx], 4);
                         break;
                 case 0xff:
                         BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_4F_X(hw), 4);
@@ -231,15 +223,9 @@ nvfx_draw_render_stage(struct nvfx_context *nvfx)
  }
  
  void
-nvfx_draw_elements_swtnl(struct pipe_context *pipe,
-                        struct pipe_resource *idxbuf,
-                        unsigned idxbuf_size, int idxbuf_bias,
-                        unsigned mode, unsigned start, unsigned count)
+nvfx_draw_vbo_swtnl(struct pipe_context *pipe, const struct pipe_draw_info* info)
  {
         struct nvfx_context *nvfx = nvfx_context(pipe);
-       struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS];
-       struct pipe_transfer *ib_transfer = NULL;
-       struct pipe_transfer *cb_transfer = NULL;
         unsigned i;
         void *map;
  
@@ -247,18 +233,15 @@ nvfx_draw_elements_swtnl(struct pipe_context *pipe,
                 return;
         nvfx_state_emit(nvfx);
  
+       /* these must be passed without adding the offsets */
         for (i = 0; i < nvfx->vtxbuf_nr; i++) {
-               map = pipe_buffer_map(pipe, nvfx->vtxbuf[i].buffer,
-                                      PIPE_TRANSFER_READ,
-                                     &vb_transfer[i]);
+               map = nvfx_buffer(nvfx->vtxbuf[i].buffer)->data;
                 draw_set_mapped_vertex_buffer(nvfx->draw, i, map);
         }
  
-       if (idxbuf) {
-               map = pipe_buffer_map(pipe, idxbuf,
-                                     PIPE_TRANSFER_READ,
-                                     &ib_transfer);
-               draw_set_mapped_element_buffer(nvfx->draw, idxbuf_size, idxbuf_bias, map);
+       if (info->indexed) {
+               map = nvfx_buffer(nvfx->idxbuf.buffer)->data + nvfx->idxbuf.offset;
+               draw_set_mapped_element_buffer_range(nvfx->draw, nvfx->idxbuf.index_size, info->index_bias, info->min_index, info->max_index, map);
         } else {
                 draw_set_mapped_element_buffer(nvfx->draw, 0, 0, NULL);
         }
@@ -266,28 +249,14 @@ nvfx_draw_elements_swtnl(struct pipe_context *pipe,
         if (nvfx->constbuf[PIPE_SHADER_VERTEX]) {
                 const unsigned nr = nvfx->constbuf_nr[PIPE_SHADER_VERTEX];
  
-               map = pipe_buffer_map(pipe,
-                                     nvfx->constbuf[PIPE_SHADER_VERTEX],
-                                     PIPE_TRANSFER_READ,
-                                     &cb_transfer);
+               map = nvfx_buffer(nvfx->constbuf[PIPE_SHADER_VERTEX])->data;
                 draw_set_mapped_constant_buffer(nvfx->draw, PIPE_SHADER_VERTEX, 0,
                                                  map, nr);
         }
  
-       draw_arrays(nvfx->draw, mode, start, count);
-
-       for (i = 0; i < nvfx->vtxbuf_nr; i++)
-               pipe_buffer_unmap(pipe, nvfx->vtxbuf[i].buffer, vb_transfer[i]);
-
-       if (idxbuf)
-               pipe_buffer_unmap(pipe, idxbuf, ib_transfer);
-
-       if (nvfx->constbuf[PIPE_SHADER_VERTEX])
-               pipe_buffer_unmap(pipe, nvfx->constbuf[PIPE_SHADER_VERTEX],
-                                 cb_transfer);
+       draw_arrays_instanced(nvfx->draw, info->mode, info->start, info->count, info->start_instance, info->instance_count);
  
         draw_flush(nvfx->draw);
-       pipe->flush(pipe, 0, NULL);
  }
  
  static INLINE void
diff --git a/src/gallium/drivers/nvfx/nvfx_fragprog.c b/src/gallium/drivers/nvfx/nvfx_fragprog.c

index ee41f03b9b8fd7064f64ff75efeb63f266c427c5..ae4fe3aa262cf4ebb867335a04ca1d96130a9d93 100644 (file)
--- a/src/gallium/drivers/nvfx/nvfx_fragprog.c
+++ b/src/gallium/drivers/nvfx/nvfx_fragprog.c
@@ -9,6 +9,7 @@
  
  #include "nvfx_context.h"
  #include "nvfx_shader.h"
+#include "nvfx_resource.h"
  
  #define MAX_CONSTS 128
  #define MAX_IMM 32
@@ -925,10 +926,7 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx)
  
                 if(nvfx->constbuf[PIPE_SHADER_FRAGMENT]) {
                         struct pipe_resource* constbuf = nvfx->constbuf[PIPE_SHADER_FRAGMENT];
-                       // TODO: avoid using transfers, just directly the buffer
-                       struct pipe_transfer* transfer;
-                       // TODO: does this check make any sense, or should we do this unconditionally?
-                       uint32_t* map = pipe_buffer_map(&nvfx->pipe, constbuf, PIPE_TRANSFER_READ, &transfer);
+                       uint32_t* map = (uint32_t*)nvfx_buffer(constbuf)->data;
                         uint32_t* fpmap = (uint32_t*)((char*)fp->fpbo->bo->map + offset);
                         uint32_t* buf = (uint32_t*)((char*)fp->fpbo->insn + offset);
                         int i;
@@ -942,7 +940,6 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx)
                                         nvfx_fp_memcpy(&fpmap[off], &map[idx], 4 * sizeof(uint32_t));
                                 }
                         }
-                       pipe_buffer_unmap(&nvfx->pipe, constbuf, transfer);
                 }
         }
  
diff --git a/src/gallium/drivers/nvfx/nvfx_push.c b/src/gallium/drivers/nvfx/nvfx_push.c

new file mode 100644 (file)

index 0000000..52e891c
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nvfx_push.c
@@ -0,0 +1,402 @@
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "util/u_split_prim.h"
+#include "translate/translate.h"
+
+#include "nvfx_context.h"
+#include "nvfx_resource.h"
+
+struct push_context {
+       struct nouveau_channel* chan;
+
+       void *idxbuf;
+       int32_t idxbias;
+
+       float edgeflag;
+       int edgeflag_attr;
+
+       unsigned vertex_length;
+       unsigned max_vertices_per_packet;
+
+       struct translate* translate;
+};
+
+static void
+emit_edgeflag(void *priv, boolean enabled)
+{
+       struct push_context* ctx = priv;
+       struct nouveau_channel *chan = ctx->chan;
+
+       OUT_RING(chan, RING_3D(NV34TCL_EDGEFLAG_ENABLE, 1));
+       OUT_RING(chan, enabled ? 1 : 0);
+}
+
+static void
+emit_vertices_lookup8(void *priv, unsigned start, unsigned count)
+{
+        struct push_context *ctx = priv;
+        uint8_t* elts = (uint8_t*)ctx->idxbuf + start;
+
+        while(count)
+        {
+                unsigned push = MIN2(count, ctx->max_vertices_per_packet);
+                unsigned length = push * ctx->vertex_length;
+
+                OUT_RING(ctx->chan, RING_3D_NI(NV34TCL_VERTEX_DATA, length));
+                ctx->translate->run_elts8(ctx->translate, elts, push, 0, ctx->chan->cur);
+                ctx->chan->cur += length;
+
+                count -= push;
+                elts += push;
+        }
+}
+
+static void
+emit_vertices_lookup16(void *priv, unsigned start, unsigned count)
+{
+       struct push_context *ctx = priv;
+        uint16_t* elts = (uint16_t*)ctx->idxbuf + start;
+
+        while(count)
+        {
+                unsigned push = MIN2(count, ctx->max_vertices_per_packet);
+                unsigned length = push * ctx->vertex_length;
+
+                OUT_RING(ctx->chan, RING_3D_NI(NV34TCL_VERTEX_DATA, length));
+                ctx->translate->run_elts16(ctx->translate, elts, push, 0, ctx->chan->cur);
+                ctx->chan->cur += length;
+
+                count -= push;
+                elts += push;
+        }
+}
+
+static void
+emit_vertices_lookup32(void *priv, unsigned start, unsigned count)
+{
+        struct push_context *ctx = priv;
+        uint32_t* elts = (uint32_t*)ctx->idxbuf + start;
+
+        while(count)
+        {
+                unsigned push = MIN2(count, ctx->max_vertices_per_packet);
+                unsigned length = push * ctx->vertex_length;
+
+                OUT_RING(ctx->chan, RING_3D_NI(NV34TCL_VERTEX_DATA, length));
+                ctx->translate->run_elts(ctx->translate, elts, push, 0, ctx->chan->cur);
+                ctx->chan->cur += length;
+
+                count -= push;
+                elts += push;
+        }
+}
+
+static void
+emit_vertices(void *priv, unsigned start, unsigned count)
+{
+        struct push_context *ctx = priv;
+
+        while(count)
+        {
+               unsigned push = MIN2(count, ctx->max_vertices_per_packet);
+               unsigned length = push * ctx->vertex_length;
+
+               OUT_RING(ctx->chan, RING_3D_NI(NV34TCL_VERTEX_DATA, length));
+               ctx->translate->run(ctx->translate, start, push, 0, ctx->chan->cur);
+               ctx->chan->cur += length;
+
+               count -= push;
+               start += push;
+        }
+}
+
+static void
+emit_ranges(void* priv, unsigned start, unsigned vc, unsigned reg)
+{
+       struct push_context* ctx = priv;
+       struct nouveau_channel *chan = ctx->chan;
+       unsigned nr = (vc & 0xff);
+       if (nr) {
+               OUT_RING(chan, RING_3D(reg, 1));
+               OUT_RING  (chan, ((nr - 1) << 24) | start);
+               start += nr;
+       }
+
+       nr = vc >> 8;
+       while (nr) {
+               unsigned push = nr > 2047 ? 2047 : nr;
+
+               nr -= push;
+
+               OUT_RING(chan, RING_3D_NI(reg, push));
+               while (push--) {
+                       OUT_RING(chan, ((0x100 - 1) << 24) | start);
+                       start += 0x100;
+               }
+       }
+}
+
+static void
+emit_ib_ranges(void* priv, unsigned start, unsigned vc)
+{
+       emit_ranges(priv, start, vc, NV34TCL_VB_INDEX_BATCH);
+}
+
+static void
+emit_vb_ranges(void* priv, unsigned start, unsigned vc)
+{
+       emit_ranges(priv, start, vc, NV34TCL_VB_VERTEX_BATCH);
+}
+
+static INLINE void
+emit_elt8(void* priv, unsigned start, unsigned vc)
+{
+       struct push_context* ctx = priv;
+       struct nouveau_channel *chan = ctx->chan;
+       uint8_t *elts = (uint8_t *)ctx->idxbuf + start;
+       int idxbias = ctx->idxbias;
+
+       if (vc & 1) {
+               OUT_RING(chan, RING_3D(NV34TCL_VB_ELEMENT_U32, 1));
+               OUT_RING  (chan, elts[0]);
+               elts++; vc--;
+       }
+
+       while (vc) {
+               unsigned i;
+               unsigned push = MIN2(vc, 2047 * 2);
+
+               OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U16, push >> 1));
+               for (i = 0; i < push; i+=2)
+                       OUT_RING(chan, ((elts[i+1] + idxbias) << 16) | (elts[i] + idxbias));
+
+               vc -= push;
+               elts += push;
+       }
+}
+
+static INLINE void
+emit_elt16(void* priv, unsigned start, unsigned vc)
+{
+       struct push_context* ctx = priv;
+       struct nouveau_channel *chan = ctx->chan;
+       uint16_t *elts = (uint16_t *)ctx->idxbuf + start;
+       int idxbias = ctx->idxbias;
+
+       if (vc & 1) {
+               OUT_RING(chan, RING_3D(NV34TCL_VB_ELEMENT_U32, 1));
+               OUT_RING  (chan, elts[0]);
+               elts++; vc--;
+       }
+
+       while (vc) {
+               unsigned i;
+               unsigned push = MIN2(vc, 2047 * 2);
+
+               OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U16, push >> 1));
+               for (i = 0; i < push; i+=2)
+                       OUT_RING(chan, ((elts[i+1] + idxbias) << 16) | (elts[i] + idxbias));
+
+               vc -= push;
+               elts += push;
+       }
+}
+
+static INLINE void
+emit_elt32(void* priv, unsigned start, unsigned vc)
+{
+       struct push_context* ctx = priv;
+       struct nouveau_channel *chan = ctx->chan;
+       uint32_t *elts = (uint32_t *)ctx->idxbuf + start;
+       int idxbias = ctx->idxbias;
+
+       while (vc) {
+               unsigned push = MIN2(vc, 2047);
+
+               OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U32, push));
+               assert(AVAIL_RING(chan) >= push);
+               if(idxbias)
+               {
+                       for(unsigned i = 0; i < push; ++i)
+                               OUT_RING(chan, elts[i] + idxbias);
+               }
+               else
+                       OUT_RINGp(chan, elts, push);
+
+               vc -= push;
+               elts += push;
+       }
+}
+
+void
+nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
+{
+       struct nvfx_context *nvfx = nvfx_context(pipe);
+       struct nouveau_channel *chan = nvfx->screen->base.channel;
+       struct push_context ctx;
+       struct util_split_prim s;
+       unsigned instances_left = info->instance_count;
+       int vtx_value;
+       unsigned hw_mode = nvgl_primitive(info->mode);
+       int i;
+       struct
+       {
+               uint8_t* map;
+               unsigned step;
+       } per_instance[16];
+       unsigned p_overhead = 0
+                       + 4 /* begin/end */
+                       + 4; /* potential edgeflag enable/disable */
+
+       ctx.chan = nvfx->screen->base.channel;
+       ctx.translate = nvfx->vtxelt->translate;
+       ctx.idxbuf = NULL;
+       ctx.vertex_length = nvfx->vtxelt->vertex_length;
+       ctx.max_vertices_per_packet = nvfx->vtxelt->max_vertices_per_packet;
+       ctx.edgeflag = 0.5f;
+       // TODO: figure out if we really want to handle this, and do so in that case
+       ctx.edgeflag_attr = 0xff; // nvfx->vertprog->cfg.edgeflag_in;
+
+       if(!nvfx->use_vertex_buffers)
+       {
+               for(i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; ++i)
+               {
+                       struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i];
+                       struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index];
+                       uint8_t* data = nvfx_buffer(vb->buffer)->data + vb->buffer_offset;
+                       if(info->indexed)
+                               data += info->index_bias * vb->stride;
+                       ctx.translate->set_buffer(ctx.translate, i, data, vb->stride, ~0);
+               }
+
+               if(ctx.edgeflag_attr < 16)
+                       vtx_value = -(ctx.vertex_length + 3);  /* vertex data and edgeflag header and value */
+               else
+               {
+                       p_overhead += 1; /* initial vertex_data header */
+                       vtx_value = -ctx.vertex_length;  /* vertex data and edgeflag header and value */
+               }
+
+               if (info->indexed) {
+                       // XXX: this case and is broken and probably need a new VTX_ATTR push path
+                       if (nvfx->idxbuf.index_size == 1)
+                               s.emit = emit_vertices_lookup8;
+                       else if (nvfx->idxbuf.index_size == 2)
+                               s.emit = emit_vertices_lookup16;
+                       else
+                               s.emit = emit_vertices_lookup32;
+               } else
+                       s.emit = emit_vertices;
+       }
+       else
+       {
+               if(!info->indexed || nvfx->use_index_buffer)
+               {
+                       s.emit = info->indexed ? emit_ib_ranges : emit_vb_ranges;
+                       p_overhead += 3;
+                       vtx_value = 0;
+               }
+               else if (nvfx->idxbuf.index_size == 4)
+               {
+                       s.emit = emit_elt32;
+                       p_overhead += 1;
+                       vtx_value = 8;
+               }
+               else
+               {
+                       s.emit = (nvfx->idxbuf.index_size == 2) ? emit_elt16 : emit_elt8;
+                       p_overhead += 3;
+                       vtx_value = 7;
+               }
+       }
+
+       ctx.idxbias = info->index_bias;
+       if(nvfx->use_vertex_buffers)
+               ctx.idxbias -= nvfx->base_vertex;
+
+       /* map index buffer, if present */
+       if (info->indexed && !nvfx->use_index_buffer)
+               ctx.idxbuf = nvfx_buffer(nvfx->idxbuf.buffer)->data + nvfx->idxbuf.offset;
+
+       s.priv = &ctx;
+       s.edge = emit_edgeflag;
+
+       for (i = 0; i < nvfx->vtxelt->num_per_instance; ++i)
+       {
+               struct nvfx_per_instance_element *ve = &nvfx->vtxelt->per_instance[i];
+               struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->base.vertex_buffer_index];
+               float v[4];
+               per_instance[i].step = info->start_instance % ve->instance_divisor;
+               per_instance[i].map = nvfx_buffer(vb->buffer)->data + vb->buffer_offset + ve->base.src_offset;
+
+               nvfx->vtxelt->per_instance[i].base.fetch_rgba_float(v, per_instance[i].map, 0, 0);
+
+               WAIT_RING(chan, 5);
+               nvfx_emit_vtx_attr(chan, nvfx->vtxelt->per_instance[i].base.idx, v, nvfx->vtxelt->per_instance[i].base.ncomp);
+       }
+
+       /* per-instance loop */
+       while (instances_left--) {
+               int max_verts;
+               boolean done;
+
+               util_split_prim_init(&s, info->mode, info->start, info->count);
+               nvfx_state_emit(nvfx);
+               for(;;) {
+                       max_verts  = AVAIL_RING(chan);
+                       max_verts -= p_overhead;
+
+                       /* if vtx_value < 0, each vertex is -vtx_value words long
+                        * otherwise, each vertex is 2^(vtx_value) / 255 words long (this is an approximation)
+                        */
+                       if(vtx_value < 0)
+                       {
+                               max_verts /= -vtx_value;
+                               max_verts -= (max_verts >> 10); /* vertex data headers */
+                       }
+                       else
+                       {
+                               if(max_verts >= (1 << 23)) /* avoid overflow here */
+                                       max_verts = (1 << 23);
+                               max_verts = (max_verts * 255) >> vtx_value;
+                       }
+
+                       //printf("avail %u max_verts %u\n", AVAIL_RING(chan), max_verts);
+
+                       if(max_verts >= 16)
+                       {
+                               OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
+                               OUT_RING(chan, hw_mode);
+                               done = util_split_prim_next(&s, max_verts);
+                               OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
+                               OUT_RING(chan, 0);
+
+                               if(done)
+                                       break;
+                       }
+
+                       FIRE_RING(chan);
+                       nvfx_state_emit(nvfx);
+               }
+
+               /* set data for the next instance, if any changed */
+               for (i = 0; i < nvfx->vtxelt->num_per_instance; ++i)
+               {
+                       struct nvfx_per_instance_element *ve = &nvfx->vtxelt->per_instance[i];
+                       struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->base.vertex_buffer_index];
+
+                       if(++per_instance[i].step == ve->instance_divisor)
+                       {
+                               float v[4];
+                               per_instance[i].map += vb->stride;
+                               per_instance[i].step = 0;
+
+                               nvfx->vtxelt->per_instance[i].base.fetch_rgba_float(v, per_instance[i].map, 0, 0);
+                               WAIT_RING(chan, 5);
+                               nvfx_emit_vtx_attr(chan, nvfx->vtxelt->per_instance[i].base.idx, v, nvfx->vtxelt->per_instance[i].base.ncomp);
+                       }
+               }
+       }
+}
diff --git a/src/gallium/drivers/nvfx/nvfx_resource.c b/src/gallium/drivers/nvfx/nvfx_resource.c

index 1c921b471004649995d39e25c13d4b44c1e38b88..3a46e0a7a5788f0890f3ffe15b20700e6aa86c91 100644 (file)
--- a/src/gallium/drivers/nvfx/nvfx_resource.c
+++ b/src/gallium/drivers/nvfx/nvfx_resource.c
@@ -59,12 +59,6 @@ nvfx_resource_get_handle(struct pipe_screen *pscreen,
  void
  nvfx_init_resource_functions(struct pipe_context *pipe)
  {
-       pipe->get_transfer = nvfx_transfer_new;
-       pipe->transfer_map = nvfx_transfer_map;
-       pipe->transfer_flush_region = u_default_transfer_flush_region;
-       pipe->transfer_unmap = nvfx_transfer_unmap;
-       pipe->transfer_destroy = util_staging_transfer_destroy;
-       pipe->transfer_inline_write = u_default_transfer_inline_write;
         pipe->is_resource_referenced = nvfx_resource_is_referenced;
  }
  
diff --git a/src/gallium/drivers/nvfx/nvfx_resource.h b/src/gallium/drivers/nvfx/nvfx_resource.h

index ff86f6d9cb6809582fa3ea8c4f2b55f6c46e33d0..583be4de2ae2596bca94c091fd1163fd701254b3 100644 (file)
--- a/src/gallium/drivers/nvfx/nvfx_resource.h
+++ b/src/gallium/drivers/nvfx/nvfx_resource.h
@@ -17,8 +17,23 @@ struct nvfx_resource {
         struct nouveau_bo *bo;
  };
  
+static INLINE
+struct nvfx_resource *nvfx_resource(struct pipe_resource *resource)
+{
+       return (struct nvfx_resource *)resource;
+}
+
  #define NVFX_RESOURCE_FLAG_LINEAR (PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
+#define NVFX_RESOURCE_FLAG_USER (PIPE_RESOURCE_FLAG_DRV_PRIV << 1)
+
+/* is resource mapped into the GPU's address space (i.e. VRAM or GART) ? */
+static INLINE boolean
+nvfx_resource_mapped_by_gpu(struct pipe_resource *resource)
+{
+   return nvfx_resource(resource)->bo->handle;
+}
  
+/* is resource in VRAM? */
  static inline int
  nvfx_resource_on_gpu(struct pipe_resource* pr)
  {
@@ -63,12 +78,6 @@ struct nvfx_surface {
         struct nvfx_miptree* temp;
  };
  
-static INLINE 
-struct nvfx_resource *nvfx_resource(struct pipe_resource *resource)
-{
-       return (struct nvfx_resource *)resource;
-}
-
  static INLINE struct nouveau_bo *
  nvfx_surface_buffer(struct pipe_surface *surf)
  {
@@ -106,22 +115,6 @@ nvfx_miptree_from_handle(struct pipe_screen *pscreen,
                          const struct pipe_resource *template,
                          struct winsys_handle *whandle);
  
-struct pipe_resource *
-nvfx_buffer_create(struct pipe_screen *pscreen,
-                  const struct pipe_resource *template);
-
-void
-nvfx_buffer_destroy(struct pipe_screen *pscreen,
-                    struct pipe_resource *presource);
-
-struct pipe_resource *
-nvfx_user_buffer_create(struct pipe_screen *screen,
-                       void *ptr,
-                       unsigned bytes,
-                       unsigned usage);
-
-
-
  void
  nvfx_miptree_surface_del(struct pipe_surface *ps);
  
@@ -173,4 +166,58 @@ nvfx_surface_create_temp(struct pipe_context* pipe, struct pipe_surface* surf);
  void
  nvfx_surface_flush(struct pipe_context* pipe, struct pipe_surface* surf);
  
+struct nvfx_buffer
+{
+       struct nvfx_resource base;
+       uint8_t* data;
+       unsigned size;
+
+       /* the range of data not yet uploaded to the GPU bo */
+       unsigned dirty_begin;
+       unsigned dirty_end;
+
+       /* whether all transfers were unsynchronized */
+       boolean dirty_unsynchronized;
+
+       /* whether it would have been profitable to upload
+        * the latest updated data to the GPU immediately */
+       boolean last_update_static;
+
+       /* how many bytes we need to draw before we deem
+        * the buffer to be static
+        */
+       long long bytes_to_draw_until_static;
+};
+
+static inline struct nvfx_buffer* nvfx_buffer(struct pipe_resource* pr)
+{
+       return (struct nvfx_buffer*)pr;
+}
+
+/* this is an heuristic to determine whether we are better off uploading the
+ * buffer to the GPU, or just continuing pushing it on the FIFO
+ */
+static inline boolean nvfx_buffer_seems_static(struct nvfx_buffer* buffer)
+{
+       return buffer->last_update_static
+               || buffer->bytes_to_draw_until_static < 0;
+}
+
+struct pipe_resource *
+nvfx_buffer_create(struct pipe_screen *pscreen,
+                  const struct pipe_resource *template);
+
+void
+nvfx_buffer_destroy(struct pipe_screen *pscreen,
+                    struct pipe_resource *presource);
+
+struct pipe_resource *
+nvfx_user_buffer_create(struct pipe_screen *screen,
+                       void *ptr,
+                       unsigned bytes,
+                       unsigned usage);
+
+void
+nvfx_buffer_upload(struct nvfx_buffer* buffer);
+
  #endif
diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c

index a1b8361a9a47595f35e6d44d22b022cb3e76a6d9..7e3caf8d2e372fcfe38f2ce799c7f6023a3d47a4 100644 (file)
--- a/src/gallium/drivers/nvfx/nvfx_screen.c
+++ b/src/gallium/drivers/nvfx/nvfx_screen.c
@@ -163,11 +163,11 @@ nvfx_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_cap param)
  }
  
  static boolean
-nvfx_screen_surface_format_supported(struct pipe_screen *pscreen,
+nvfx_screen_is_format_supported(struct pipe_screen *pscreen,
                                      enum pipe_format format,
                                      enum pipe_texture_target target,
                                      unsigned sample_count,
-                                    unsigned tex_usage, unsigned geom_flags)
+                                    unsigned bind, unsigned geom_flags)
  {
         struct nvfx_screen *screen = nvfx_screen(pscreen);
         struct pipe_surface *front = ((struct nouveau_winsys *) pscreen->winsys)->front;
@@ -175,7 +175,7 @@ nvfx_screen_surface_format_supported(struct pipe_screen *pscreen,
          if (sample_count > 1)
                 return FALSE;
  
-       if (tex_usage & PIPE_BIND_RENDER_TARGET) {
+       if (bind & PIPE_BIND_RENDER_TARGET) {
                 switch (format) {
                 case PIPE_FORMAT_B8G8R8A8_UNORM:
                 case PIPE_FORMAT_B8G8R8X8_UNORM:
@@ -186,7 +186,7 @@ nvfx_screen_surface_format_supported(struct pipe_screen *pscreen,
                 }
         }
  
-       if (tex_usage & PIPE_BIND_DEPTH_STENCIL) {
+       if (bind & PIPE_BIND_DEPTH_STENCIL) {
                 switch (format) {
                 case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
                 case PIPE_FORMAT_X8Z24_UNORM:
@@ -201,7 +201,7 @@ nvfx_screen_surface_format_supported(struct pipe_screen *pscreen,
                 }
         }
  
-       if (tex_usage & PIPE_BIND_SAMPLER_VIEW) {
+       if (bind & PIPE_BIND_SAMPLER_VIEW) {
                 struct nvfx_texture_format* tf = &nvfx_texture_formats[format];
                 if(util_format_is_s3tc(format) && !util_format_s3tc_enabled)
                         return FALSE;
@@ -218,6 +218,22 @@ nvfx_screen_surface_format_supported(struct pipe_screen *pscreen,
                 }
         }
  
+       // note that we do actually support everything through translate
+       if (bind & PIPE_BIND_VERTEX_BUFFER) {
+               unsigned type = nvfx_vertex_formats[format];
+               if(!type)
+                       return FALSE;
+       }
+
+       if (bind & PIPE_BIND_INDEX_BUFFER) {
+               // 8-bit indices supported, but not in hardware index buffer
+               if(format != PIPE_FORMAT_R16_USCALED && format != PIPE_FORMAT_R32_USCALED)
+                       return FALSE;
+       }
+
+       if(bind & PIPE_BIND_STREAM_OUTPUT)
+               return FALSE;
+
         return TRUE;
  }
  
@@ -387,7 +403,7 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
         pscreen->destroy = nvfx_screen_destroy;
         pscreen->get_param = nvfx_screen_get_param;
         pscreen->get_paramf = nvfx_screen_get_paramf;
-       pscreen->is_format_supported = nvfx_screen_surface_format_supported;
+       pscreen->is_format_supported = nvfx_screen_is_format_supported;
         pscreen->context_create = nvfx_create;
  
         switch (dev->chipset & 0xf0) {
@@ -419,6 +435,11 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
         }
  
         screen->force_swtnl = debug_get_bool_option("NOUVEAU_SWTNL", FALSE);
+       screen->trace_draw = debug_get_bool_option("NVFX_TRACE_DRAW", FALSE);
+
+       screen->buffer_allocation_cost = debug_get_num_option("NVFX_BUFFER_ALLOCATION_COST", 16384);
+       screen->inline_cost_per_hardware_cost = atof(debug_get_option("NVFX_INLINE_COST_PER_HARDWARE_COST", "1.0"));
+       screen->static_reuse_threshold = atof(debug_get_option("NVFX_STATIC_REUSE_THRESHOLD", "2.0"));
  
         screen->vertex_buffer_reloc_flags = nvfx_screen_get_vertex_buffer_flags(screen);
  
diff --git a/src/gallium/drivers/nvfx/nvfx_screen.h b/src/gallium/drivers/nvfx/nvfx_screen.h

index 4dedbe9cb40ee0ff7e61a551006f9c325295a76e..473a1127752d440035716a929a6169b8c1c2a2fc 100644 (file)
--- a/src/gallium/drivers/nvfx/nvfx_screen.h
+++ b/src/gallium/drivers/nvfx/nvfx_screen.h
@@ -16,6 +16,7 @@ struct nvfx_screen {
  
         unsigned is_nv4x; /* either 0 or ~0 */
         boolean force_swtnl;
+       boolean trace_draw;
         unsigned vertex_buffer_reloc_flags;
         unsigned index_buffer_reloc_flags;
  
@@ -33,6 +34,18 @@ struct nvfx_screen {
         struct nouveau_resource *vp_data_heap;
  
         struct nv04_2d_context* eng2d;
+
+       /* Once the amount of bytes drawn from the buffer reaches the updated size times this value,
+        * we will assume that the buffer will be drawn an huge number of times before the
+        * next modification
+        */
+       float static_reuse_threshold;
+
+       /* Cost of allocating a buffer in terms of the cost of copying a byte to an hardware buffer */
+       unsigned buffer_allocation_cost;
+
+       /* inline_cost/hardware_cost conversion ration */
+       float inline_cost_per_hardware_cost;
  };
  
  static INLINE struct nvfx_screen *
diff --git a/src/gallium/drivers/nvfx/nvfx_state.c b/src/gallium/drivers/nvfx/nvfx_state.c

index d459f9a88013ccabc43404275abe71e4951f71cb..25d29720a853c140b5ed8537b2f4b3c30bfec078 100644 (file)
--- a/src/gallium/drivers/nvfx/nvfx_state.c
+++ b/src/gallium/drivers/nvfx/nvfx_state.c
@@ -441,83 +441,6 @@ nvfx_set_viewport_state(struct pipe_context *pipe,
         nvfx->draw_dirty |= NVFX_NEW_VIEWPORT;
  }
  
-static void
-nvfx_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
-                       const struct pipe_vertex_buffer *vb)
-{
-       struct nvfx_context *nvfx = nvfx_context(pipe);
-
-       for(unsigned i = 0; i < count; ++i)
-       {
-               pipe_resource_reference(&nvfx->vtxbuf[i].buffer, vb[i].buffer);
-               nvfx->vtxbuf[i].buffer_offset = vb[i].buffer_offset;
-               nvfx->vtxbuf[i].max_index = vb[i].max_index;
-               nvfx->vtxbuf[i].stride = vb[i].stride;
-       }
-
-       for(unsigned i = count; i < nvfx->vtxbuf_nr; ++i)
-               pipe_resource_reference(&nvfx->vtxbuf[i].buffer, 0);
-
-       nvfx->vtxbuf_nr = count;
-
-       nvfx->dirty |= NVFX_NEW_ARRAYS;
-       nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
-}
-
-static void
-nvfx_set_index_buffer(struct pipe_context *pipe,
-                     const struct pipe_index_buffer *ib)
-{
-       struct nvfx_context *nvfx = nvfx_context(pipe);
-
-       /* TODO make this more like a state */
-
-       if(ib)
-       {
-               pipe_resource_reference(&nvfx->idxbuf.buffer, ib->buffer);
-               nvfx->idxbuf.index_size = ib->index_size;
-               nvfx->idxbuf.offset = ib->offset;
-       }
-       else
-       {
-               pipe_resource_reference(&nvfx->idxbuf.buffer, 0);
-               nvfx->idxbuf.index_size = 0;
-               nvfx->idxbuf.offset = 0;
-       }
-}
-
-static void *
-nvfx_vtxelts_state_create(struct pipe_context *pipe,
-                         unsigned num_elements,
-                         const struct pipe_vertex_element *elements)
-{
-       struct nvfx_vtxelt_state *cso = CALLOC_STRUCT(nvfx_vtxelt_state);
-
-       assert(num_elements < 16); /* not doing fallbacks yet */
-       cso->num_elements = num_elements;
-       memcpy(cso->pipe, elements, num_elements * sizeof(*elements));
-
-/*     nvfx_vtxelt_construct(cso);*/
-
-       return (void *)cso;
-}
-
-static void
-nvfx_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso)
-{
-       FREE(hwcso);
-}
-
-static void
-nvfx_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso)
-{
-       struct nvfx_context *nvfx = nvfx_context(pipe);
-
-       nvfx->vtxelt = hwcso;
-       nvfx->dirty |= NVFX_NEW_ARRAYS;
-       /*nvfx->draw_dirty |= NVFX_NEW_ARRAYS;*/
-}
-
  void
  nvfx_init_state_functions(struct nvfx_context *nvfx)
  {
@@ -553,11 +476,4 @@ nvfx_init_state_functions(struct nvfx_context *nvfx)
         nvfx->pipe.set_polygon_stipple = nvfx_set_polygon_stipple;
         nvfx->pipe.set_scissor_state = nvfx_set_scissor_state;
         nvfx->pipe.set_viewport_state = nvfx_set_viewport_state;
-
-       nvfx->pipe.create_vertex_elements_state = nvfx_vtxelts_state_create;
-       nvfx->pipe.delete_vertex_elements_state = nvfx_vtxelts_state_delete;
-       nvfx->pipe.bind_vertex_elements_state = nvfx_vtxelts_state_bind;
-
-       nvfx->pipe.set_vertex_buffers = nvfx_set_vertex_buffers;
-       nvfx->pipe.set_index_buffer = nvfx_set_index_buffer;
  }
diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c

index dc70f3de87020acf01b9a7ee6451a228ecf0f5fa..b9d189779197b57ac5d496b57a1311629419ba50 100644 (file)
--- a/src/gallium/drivers/nvfx/nvfx_state_emit.c
+++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c
@@ -8,6 +8,7 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)
  {
         struct nouveau_channel* chan = nvfx->screen->base.channel;
         unsigned dirty;
+       unsigned still_dirty = 0;
         int all_swizzled = -1;
         boolean flush_tex_cache = FALSE;
  
@@ -52,11 +53,19 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)
                                 return FALSE;
                 }
  
-               if(dirty & (NVFX_NEW_ARRAYS))
+               if(dirty & NVFX_NEW_ARRAYS)
                 {
                         if(!nvfx_vbo_validate(nvfx))
                                 return FALSE;
                 }
+
+               if(dirty & NVFX_NEW_INDEX)
+               {
+                       if(nvfx->use_index_buffer)
+                               nvfx_idxbuf_validate(nvfx);
+                       else
+                               still_dirty = NVFX_NEW_INDEX;
+               }
         }
         else
         {
@@ -64,7 +73,7 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)
                 if(dirty & (NVFX_NEW_VERTPROG | NVFX_NEW_UCP))
                         nvfx_vertprog_validate(nvfx);
  
-               if(dirty & (NVFX_NEW_ARRAYS | NVFX_NEW_FRAGPROG))
+               if(dirty & (NVFX_NEW_ARRAYS | NVFX_NEW_INDEX | NVFX_NEW_FRAGPROG))
                         nvfx_vtxfmt_validate(nvfx);
         }
  
@@ -118,7 +127,24 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)
                         OUT_RING(chan, 1);
                 }
         }
-       nvfx->dirty = 0;
+
+       nvfx->dirty = dirty & still_dirty;
+
+       unsigned render_temps = nvfx->state.render_temps;
+       if(render_temps)
+       {
+               for(int i = 0; i < nvfx->framebuffer.nr_cbufs; ++i)
+               {
+                       if(render_temps & (1 << i))
+                               util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(nvfx->framebuffer.cbufs[i]),
+                                               (struct util_dirty_surface*)nvfx->framebuffer.cbufs[i]);
+               }
+
+               if(render_temps & 0x80)
+                       util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(nvfx->framebuffer.zsbuf),
+                                       (struct util_dirty_surface*)nvfx->framebuffer.zsbuf);
+       }
+
         return TRUE;
  }
  
@@ -137,21 +163,6 @@ nvfx_state_emit(struct nvfx_context *nvfx)
               ;
         MARK_RING(chan, max_relocs * 2, max_relocs * 2);
         nvfx_state_relocate(nvfx);
-
-       unsigned render_temps = nvfx->state.render_temps;
-       if(render_temps)
-       {
-               for(int i = 0; i < nvfx->framebuffer.nr_cbufs; ++i)
-               {
-                       if(render_temps & (1 << i))
-                               util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(nvfx->framebuffer.cbufs[i]),
-                                               (struct util_dirty_surface*)nvfx->framebuffer.cbufs[i]);
-               }
-
-               if(render_temps & 0x80)
-                       util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(nvfx->framebuffer.zsbuf),
-                                       (struct util_dirty_surface*)nvfx->framebuffer.zsbuf);
-       }
  }
  
  void
@@ -161,7 +172,11 @@ nvfx_state_relocate(struct nvfx_context *nvfx)
         nvfx_fragtex_relocate(nvfx);
         nvfx_fragprog_relocate(nvfx);
         if (nvfx->render_mode == HW)
+       {
                 nvfx_vbo_relocate(nvfx);
+               if(nvfx->use_index_buffer)
+                       nvfx_idxbuf_relocate(nvfx);
+       }
  }
  
  boolean
diff --git a/src/gallium/drivers/nvfx/nvfx_state_fb.c b/src/gallium/drivers/nvfx/nvfx_state_fb.c

index 80b0f21575fc7280ebf67a3f0fb22c02d6e786dd..28bbd36c2e81ae0d0936b6f3014ea85f6360e51f 100644 (file)
--- a/src/gallium/drivers/nvfx/nvfx_state_fb.c
+++ b/src/gallium/drivers/nvfx/nvfx_state_fb.c
@@ -1,6 +1,5 @@
  #include "nvfx_context.h"
  #include "nvfx_resource.h"
-#include "nouveau/nouveau_util.h"
  #include "util/u_format.h"
  
  static inline boolean
@@ -125,8 +124,8 @@ nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result)
                 assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1)));
  
                 rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED |
-                       (log2i(fb->width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) |
-                       (log2i(fb->height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT);
+                       (util_logbase2(fb->width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) |
+                       (util_logbase2(fb->height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT);
         } else
                 rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR;
  
diff --git a/src/gallium/drivers/nvfx/nvfx_surface.c b/src/gallium/drivers/nvfx/nvfx_surface.c

index 7efdd954b4b4183db7fcc1df1f5a4944174658f7..135978ad2748b9737a85fe5779ac70f4b057fc78 100644 (file)
--- a/src/gallium/drivers/nvfx/nvfx_surface.c
+++ b/src/gallium/drivers/nvfx/nvfx_surface.c
@@ -36,7 +36,6 @@
  #include "util/u_blitter.h"
  
  #include "nouveau/nouveau_winsys.h"
-#include "nouveau/nouveau_util.h"
  #include "nouveau/nouveau_screen.h"
  #include "nvfx_context.h"
  #include "nvfx_screen.h"
@@ -62,7 +61,7 @@ nvfx_region_set_format(struct nv04_region* rgn, enum pipe_format format)
                 break;
         default:
                 assert(util_is_pot(bits));
-               int shift = log2i(bits) - 3;
+               int shift = util_logbase2(bits) - 3;
                 assert(shift >= 2);
                 rgn->bpps = 2;
                 shift -= 2;
@@ -365,25 +364,29 @@ nvfx_surface_copy_temp(struct pipe_context* pipe, struct pipe_surface* surf, int
  {
         struct nvfx_surface* ns = (struct nvfx_surface*)surf;
         struct pipe_subresource tempsr, surfsr;
-       struct pipe_resource *idxbuf_buffer;
-       unsigned idxbuf_format;
+       struct nvfx_context* nvfx = nvfx_context(pipe);
+
+       // TODO: we really should do this validation before setting these variable in draw calls
+       unsigned use_vertex_buffers = nvfx->use_vertex_buffers;
+       boolean use_index_buffer = nvfx->use_index_buffer;
+       unsigned base_vertex = nvfx->base_vertex;
  
         tempsr.face = 0;
         tempsr.level = 0;
         surfsr.face = surf->face;
         surfsr.level = surf->level;
  
-       // TODO: do this properly, in blitter save
-       idxbuf_buffer = ((struct nvfx_context*)pipe)->idxbuf_buffer;
-       idxbuf_format = ((struct nvfx_context*)pipe)->idxbuf_format;
-
         if(to_temp)
                 nvfx_resource_copy_region(pipe, &ns->temp->base.base, tempsr, 0, 0, 0, surf->texture, surfsr, 0, 0, surf->zslice, surf->width, surf->height);
         else
                 nvfx_resource_copy_region(pipe, surf->texture, surfsr, 0, 0, surf->zslice, &ns->temp->base.base, tempsr, 0, 0, 0, surf->width, surf->height);
  
-       ((struct nvfx_context*)pipe)->idxbuf_buffer = idxbuf_buffer;
-       ((struct nvfx_context*)pipe)->idxbuf_format = idxbuf_format;
+       nvfx->use_vertex_buffers = use_vertex_buffers;
+       nvfx->use_index_buffer = use_index_buffer;
+        nvfx->base_vertex = base_vertex;
+
+       nvfx->dirty |= NVFX_NEW_ARRAYS;
+       nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
  }
  
  void
diff --git a/src/gallium/drivers/nvfx/nvfx_transfer.c b/src/gallium/drivers/nvfx/nvfx_transfer.c

index e9c3dd7e551332c09c9c9a5eee8ec1441160d925..ca4462ef9dcf296b273a024b30f89974891202fd 100644 (file)
--- a/src/gallium/drivers/nvfx/nvfx_transfer.c
+++ b/src/gallium/drivers/nvfx/nvfx_transfer.c
@@ -26,25 +26,44 @@ nvfx_transfer_new(struct pipe_context *pipe,
                           unsigned usage,
                           const struct pipe_box *box)
  {
-       struct nvfx_staging_transfer* tx;
-       bool direct = !nvfx_resource_on_gpu(pt) && pt->flags & NVFX_RESOURCE_FLAG_LINEAR;
-
-       tx = CALLOC_STRUCT(nvfx_staging_transfer);
-       if(!tx)
-               return NULL;
-
-       util_staging_transfer_init(pipe, pt, sr, usage, box, direct, tx);
+        if((usage & (PIPE_TRANSFER_UNSYNCHRONIZED | PIPE_TRANSFER_DONTBLOCK)) == PIPE_TRANSFER_DONTBLOCK)
+        {
+                struct nouveau_bo* bo = ((struct nvfx_resource*)pt)->bo;
+                if(bo && nouveau_bo_busy(bo, NOUVEAU_BO_WR))
+                        return NULL;
+        }
  
         if(pt->target == PIPE_BUFFER)
         {
-               tx->base.base.slice_stride = tx->base.base.stride = ((struct nvfx_resource*)tx->base.staging_resource)->bo->size;
-               if(direct)
-                       tx->offset = util_format_get_stride(pt->format, box->x);
-               else
-                       tx->offset = 0;
+               // it would be nice if we could avoid all this ridiculous overhead...
+               struct pipe_transfer* tx;
+               struct nvfx_buffer* buffer = nvfx_buffer(pt);
+
+               tx = CALLOC_STRUCT(pipe_transfer);
+               if (!tx)
+                       return NULL;
+
+               pipe_resource_reference(&tx->resource, pt);
+               tx->sr = sr;
+               tx->usage = usage;
+               tx->box = *box;
+
+               tx->slice_stride = tx->stride = util_format_get_stride(pt->format, box->width);
+               tx->data = buffer->data + util_format_get_stride(pt->format, box->x);
+
+               return tx;
         }
         else
         {
+               struct nvfx_staging_transfer* tx;
+               bool direct = !nvfx_resource_on_gpu(pt) && pt->flags & NVFX_RESOURCE_FLAG_LINEAR;
+
+               tx = CALLOC_STRUCT(nvfx_staging_transfer);
+               if(!tx)
+                       return NULL;
+
+               util_staging_transfer_init(pipe, pt, sr, usage, box, direct, &tx->base);
+
                 if(direct)
                 {
                         tx->base.base.stride = nvfx_subresource_pitch(pt, sr.level);
@@ -66,26 +85,132 @@ nvfx_transfer_new(struct pipe_context *pipe,
         }
  }
  
+static void nvfx_buffer_dirty_interval(struct nvfx_buffer* buffer, unsigned begin, unsigned size, boolean unsynchronized)
+{
+       struct nvfx_screen* screen = nvfx_screen(buffer->base.base.screen);
+       buffer->last_update_static = buffer->bytes_to_draw_until_static < 0;
+       if(buffer->dirty_begin == buffer->dirty_end)
+       {
+               buffer->dirty_begin = begin;
+               buffer->dirty_end = begin + size;
+               buffer->dirty_unsynchronized = unsynchronized;
+       }
+       else
+       {
+               buffer->dirty_begin = MIN2(buffer->dirty_begin, begin);
+               buffer->dirty_end = MAX2(buffer->dirty_end, begin + size);
+               buffer->dirty_unsynchronized &= unsynchronized;
+       }
+
+       if(unsynchronized)
+       {
+               // TODO: revisit this, it doesn't seem quite right
+               //printf("UNSYNC UPDATE %p %u %u\n", buffer, begin, size);
+               buffer->bytes_to_draw_until_static += size * screen->static_reuse_threshold;
+       }
+       else
+               buffer->bytes_to_draw_until_static = buffer->size * screen->static_reuse_threshold;
+}
+
+static void nvfx_transfer_flush_region( struct pipe_context *pipe,
+                                     struct pipe_transfer *ptx,
+                                     const struct pipe_box *box)
+{
+       if(ptx->resource->target == PIPE_BUFFER && (ptx->usage & PIPE_TRANSFER_FLUSH_EXPLICIT))
+       {
+               struct nvfx_buffer* buffer = nvfx_buffer(ptx->resource);
+               nvfx_buffer_dirty_interval(buffer,
+                               (uint8_t*)ptx->data - buffer->data + util_format_get_stride(buffer->base.base.format, box->x),
+                               util_format_get_stride(buffer->base.base.format, box->width),
+                               !!(ptx->usage & PIPE_TRANSFER_UNSYNCHRONIZED));
+       }
+}
+
+static void
+nvfx_transfer_destroy(struct pipe_context *pipe, struct pipe_transfer *ptx)
+{
+       if(ptx->resource->target == PIPE_BUFFER)
+       {
+               struct nvfx_buffer* buffer = nvfx_buffer(ptx->resource);
+               if((ptx->usage & (PIPE_TRANSFER_WRITE | PIPE_TRANSFER_FLUSH_EXPLICIT)) == PIPE_TRANSFER_WRITE)
+                       nvfx_buffer_dirty_interval(buffer,
+                               (uint8_t*)ptx->data - buffer->data,
+                               ptx->stride,
+                               !!(ptx->usage & PIPE_TRANSFER_UNSYNCHRONIZED));
+               pipe_resource_reference(&ptx->resource, 0);
+               FREE(ptx);
+       }
+       else
+               util_staging_transfer_destroy(pipe, ptx);
+}
+
  void *
  nvfx_transfer_map(struct pipe_context *pipe, struct pipe_transfer *ptx)
  {
-       struct nvfx_staging_transfer *tx = (struct nvfx_staging_transfer *)ptx;
-       if(!ptx->data)
+       if(ptx->resource->target == PIPE_BUFFER)
+               return ptx->data;
+       else
         {
-               struct nvfx_miptree *mt = (struct nvfx_miptree *)tx->base.staging_resource;
-               uint8_t *map = nouveau_screen_bo_map(pipe->screen, mt->base.bo, nouveau_screen_transfer_flags(ptx->usage));
-               ptx->data = map + tx->offset;
+               struct nvfx_staging_transfer *tx = (struct nvfx_staging_transfer *)ptx;
+               if(!ptx->data)
+               {
+                       struct nvfx_miptree *mt = (struct nvfx_miptree *)tx->base.staging_resource;
+                       uint8_t *map = nouveau_screen_bo_map(pipe->screen, mt->base.bo, nouveau_screen_transfer_flags(ptx->usage));
+                       ptx->data = map + tx->offset;
+               }
+
+               ++tx->map_count;
+               return ptx->data;
         }
-       ++tx->map_count;
-       return ptx->data;
  }
  
  void
  nvfx_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *ptx)
  {
-       struct nvfx_staging_transfer *tx = (struct nvfx_staging_transfer *)ptx;
-       struct nvfx_miptree *mt = (struct nvfx_miptree *)tx->base.staging_resource;
+       if(ptx->resource->target != PIPE_BUFFER)
+       {
+               struct nvfx_staging_transfer *tx = (struct nvfx_staging_transfer *)ptx;
+               struct nvfx_miptree *mt = (struct nvfx_miptree *)tx->base.staging_resource;
+
+               if(!--tx->map_count)
+               {
+                       nouveau_screen_bo_unmap(pipe->screen, mt->base.bo);
+                       ptx->data = 0;
+               }
+       }
+}
+
+static void nvfx_transfer_inline_write( struct pipe_context *pipe,
+                                     struct pipe_resource *pr,
+                                     struct pipe_subresource sr,
+                                     unsigned usage,
+                                     const struct pipe_box *box,
+                                     const void *data,
+                                     unsigned stride,
+                                     unsigned slice_stride)
+{
+       if(pr->target != PIPE_BUFFER)
+       {
+               u_default_transfer_inline_write(pipe, pr, sr, usage, box, data, stride, slice_stride);
+       }
+       else
+       {
+               struct nvfx_buffer* buffer = nvfx_buffer(pr);
+               unsigned begin = util_format_get_stride(pr->format, box->x);
+               unsigned size = util_format_get_stride(pr->format, box->width);
+               memcpy(buffer->data + begin, data, size);
+               nvfx_buffer_dirty_interval(buffer, begin, size,
+                               !!(pr->flags & PIPE_TRANSFER_UNSYNCHRONIZED));
+       }
+}
  
-       if(!--tx->map_count)
-               nouveau_screen_bo_unmap(pipe->screen, mt->base.bo);
+void
+nvfx_init_transfer_functions(struct pipe_context *pipe)
+{
+       pipe->get_transfer = nvfx_transfer_new;
+       pipe->transfer_map = nvfx_transfer_map;
+       pipe->transfer_flush_region = nvfx_transfer_flush_region;
+       pipe->transfer_unmap = nvfx_transfer_unmap;
+       pipe->transfer_destroy = nvfx_transfer_destroy;
+       pipe->transfer_inline_write = nvfx_transfer_inline_write;
  }
diff --git a/src/gallium/drivers/nvfx/nvfx_vbo.c b/src/gallium/drivers/nvfx/nvfx_vbo.c

index 4aa37938425c0600a820a84c2a259133521b4ee7..a6cd12563507ea871aa91e2295bd7afdde90e070 100644 (file)
--- a/src/gallium/drivers/nvfx/nvfx_vbo.c
+++ b/src/gallium/drivers/nvfx/nvfx_vbo.c
@@ -2,6 +2,7 @@
  #include "pipe/p_state.h"
  #include "util/u_inlines.h"
  #include "util/u_format.h"
+#include "translate/translate.h"
  
  #include "nvfx_context.h"
  #include "nvfx_state.h"
@@ -10,646 +11,583 @@
  #include "nouveau/nouveau_channel.h"
  #include "nouveau/nouveau_class.h"
  #include "nouveau/nouveau_pushbuf.h"
-#include "nouveau/nouveau_util.h"
  
-static INLINE int
-nvfx_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp)
+static inline unsigned
+util_guess_unique_indices_count(unsigned mode, unsigned indices)
  {
-       switch (pipe) {
-       case PIPE_FORMAT_R32_FLOAT:
-       case PIPE_FORMAT_R32G32_FLOAT:
-       case PIPE_FORMAT_R32G32B32_FLOAT:
-       case PIPE_FORMAT_R32G32B32A32_FLOAT:
-               *fmt = NV34TCL_VTXFMT_TYPE_FLOAT;
-               break;
-       case PIPE_FORMAT_R16_FLOAT:
-       case PIPE_FORMAT_R16G16_FLOAT:
-       case PIPE_FORMAT_R16G16B16_FLOAT:
-       case PIPE_FORMAT_R16G16B16A16_FLOAT:
-               *fmt = NV34TCL_VTXFMT_TYPE_HALF;
-               break;
-       case PIPE_FORMAT_R8_UNORM:
-       case PIPE_FORMAT_R8G8_UNORM:
-       case PIPE_FORMAT_R8G8B8_UNORM:
-       case PIPE_FORMAT_R8G8B8A8_UNORM:
-               *fmt = NV34TCL_VTXFMT_TYPE_UBYTE;
-               break;
-       case PIPE_FORMAT_R16_SSCALED:
-       case PIPE_FORMAT_R16G16_SSCALED:
-       case PIPE_FORMAT_R16G16B16_SSCALED:
-       case PIPE_FORMAT_R16G16B16A16_SSCALED:
-               *fmt = NV34TCL_VTXFMT_TYPE_USHORT;
-               break;
-       default:
-               NOUVEAU_ERR("Unknown format %s\n", util_format_name(pipe));
-               return 1;
+       /* Euler's formula gives V =
+        * = E - F + 2 =
+        * = F * (polygon_edges / 2 - 1) + 2 =
+        * =  F * (polygon_edges - 2) / 2 + 2 =
+        * =  indices * (polygon_edges - 2) / (2 * indices_per_face) + 2
+        * =  indices * (1 / 2 - 1 / polygon_edges) + 2
+        */
+       switch(mode)
+       {
+       case PIPE_PRIM_LINES:
+               return indices >> 1;
+       case PIPE_PRIM_TRIANGLES:
+       {
+               // avoid an expensive division by 3 using the multiplicative inverse mod 2^32
+               unsigned q;
+               unsigned inv3 = 2863311531;
+               indices >>= 1;
+               q = indices * inv3;
+               if(unlikely(q >= indices))
+               {
+                       q += inv3;
+                       if(q >= indices)
+                               q += inv3;
+               }
+               return indices + 2;
+               //return indices / 6 + 2;
         }
-
-       switch (pipe) {
-       case PIPE_FORMAT_R8_UNORM:
-       case PIPE_FORMAT_R32_FLOAT:
-       case PIPE_FORMAT_R16_FLOAT:
-       case PIPE_FORMAT_R16_SSCALED:
-               *ncomp = 1;
-               break;
-       case PIPE_FORMAT_R8G8_UNORM:
-       case PIPE_FORMAT_R32G32_FLOAT:
-       case PIPE_FORMAT_R16G16_FLOAT:
-       case PIPE_FORMAT_R16G16_SSCALED:
-               *ncomp = 2;
-               break;
-       case PIPE_FORMAT_R8G8B8_UNORM:
-       case PIPE_FORMAT_R32G32B32_FLOAT:
-       case PIPE_FORMAT_R16G16B16_FLOAT:
-       case PIPE_FORMAT_R16G16B16_SSCALED:
-               *ncomp = 3;
-               break;
-       case PIPE_FORMAT_R8G8B8A8_UNORM:
-       case PIPE_FORMAT_R32G32B32A32_FLOAT:
-       case PIPE_FORMAT_R16G16B16A16_FLOAT:
-       case PIPE_FORMAT_R16G16B16A16_SSCALED:
-               *ncomp = 4;
-               break;
+       // guess that indexed quads are created by successive connections, since a closed mesh seems unlikely
+       case PIPE_PRIM_QUADS:
+               return (indices >> 1) + 2;
+       //      return (indices >> 2) + 2; // if it is a closed mesh
         default:
-               NOUVEAU_ERR("Unknown format %s\n", util_format_name(pipe));
-               return 1;
+               return indices;
         }
-
-       return 0;
  }
  
-static boolean
-nvfx_vbo_set_idxbuf(struct nvfx_context *nvfx, struct pipe_resource *ib,
-                   unsigned ib_size)
+static unsigned nvfx_decide_upload_mode(struct pipe_context *pipe, const struct pipe_draw_info *info)
  {
-       unsigned type;
-
-       if (!ib) {
-               nvfx->idxbuf_buffer = NULL;
-               nvfx->idxbuf_format = 0xdeadbeef;
-               return FALSE;
+       struct nvfx_context* nvfx = nvfx_context(pipe);
+       unsigned hardware_cost = 0;
+       unsigned inline_cost = 0;
+       unsigned unique_vertices;
+       unsigned upload_mode;
+       if (info->indexed)
+               unique_vertices = util_guess_unique_indices_count(info->mode, info->count);
+       else
+               unique_vertices = info->count;
+
+       /* Here we try to figure out if we are better off writing vertex data directly on the FIFO,
+        * or create hardware buffer objects and pointing the hardware to them.
+        *
+        * This is done by computing the total memcpy cost of each option, ignoring uploads
+        * if we think that the buffer is static and thus the upload cost will be amortized over
+        * future draw calls.
+        *
+        * For instance, if everything looks static, we will always create buffer objects, while if
+        * everything is a user buffer and we are not doing indexed drawing, we never do.
+        *
+        * Other interesting cases are where a small user vertex buffer, but a huge user index buffer,
+        * where we will upload the vertex buffer, so that we can use hardware index lookup, and
+        * the opposite case, where we instead do index lookup in software to avoid uploading
+        * a huge amount of vertex data that is not going to be used.
+        *
+        * Otherwise, we generally move to the GPU the after it has been pushed
+        * NVFX_STATIC_BUFFER_MIN_REUSE_TIMES times to the GPU without having
+        * been updated with a transfer (or just the buffer having been destroyed).
+        *
+        * There is no special handling for user buffers, since applications can use
+        * OpenGL VBOs in a one-shot fashion. OpenGL 3/4 core profile forces this
+        * by the way.
+        *
+        * Note that currently we don't support only putting some data on the FIFO, and
+        * some on vertex buffers (constant and instanced data is independent from this).
+        *
+        * nVidia doesn't seem to do this either, even though it should be at least
+        * doable with VTX_ATTR and possibly with VERTEX_DATA too if not indexed.
+        */
+
+       for (unsigned i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; i++)
+       {
+               struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i];
+               struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index];
+               struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer);
+               buffer->bytes_to_draw_until_static -= vbi->per_vertex_size * unique_vertices;
+               if (!nvfx_buffer_seems_static(buffer))
+               {
+                       hardware_cost += buffer->dirty_end - buffer->dirty_begin;
+                       if (!buffer->base.bo)
+                               hardware_cost += nvfx->screen->buffer_allocation_cost;
+               }
+               inline_cost += vbi->per_vertex_size * info->count;
         }
  
-       if (!nvfx->screen->index_buffer_reloc_flags || ib_size == 1)
-               return FALSE;
+       float best_index_cost_for_hardware_vertices_as_inline_cost = 0.0f;
+       boolean prefer_hardware_indices = FALSE;
+       unsigned index_inline_cost = 0;
+       unsigned index_hardware_cost = 0;
  
-       switch (ib_size) {
-       case 2:
-               type = NV34TCL_IDXBUF_FORMAT_TYPE_U16;
-               break;
-       case 4:
-               type = NV34TCL_IDXBUF_FORMAT_TYPE_U32;
-               break;
-       default:
-               return FALSE;
-       }
+       if (info->indexed)
+       {
+               index_inline_cost = nvfx->idxbuf.index_size * info->count;
+               if (nvfx->screen->index_buffer_reloc_flags
+                       && (nvfx->idxbuf.index_size == 2 || nvfx->idxbuf.index_size == 4)
+                       && !(nvfx->idxbuf.offset & (nvfx->idxbuf.index_size - 1)))
+               {
+                       struct nvfx_buffer* buffer = nvfx_buffer(nvfx->idxbuf.buffer);
+                       buffer->bytes_to_draw_until_static -= index_inline_cost;
  
-       if (ib != nvfx->idxbuf_buffer ||
-           type != nvfx->idxbuf_format) {
-               nvfx->dirty |= NVFX_NEW_ARRAYS;
-               nvfx->idxbuf_buffer = ib;
-               nvfx->idxbuf_format = type;
-       }
+                       prefer_hardware_indices = TRUE;
  
-       return TRUE;
-}
+                       if (!nvfx_buffer_seems_static(buffer))
+                       {
+                               index_hardware_cost = buffer->dirty_end - buffer->dirty_begin;
+                               if (!buffer->base.bo)
+                                       index_hardware_cost += nvfx->screen->buffer_allocation_cost;
+                       }
  
-// type must be floating point
-static inline void
-nvfx_vbo_static_attrib(struct nvfx_context *nvfx,
-                      int attrib, struct pipe_vertex_element *ve,
-                      struct pipe_vertex_buffer *vb, unsigned ncomp)
-{
-       struct pipe_transfer *transfer;
-       struct nouveau_channel* chan = nvfx->screen->base.channel;
-       void *map;
-       float *v;
-
-       map  = pipe_buffer_map(&nvfx->pipe, vb->buffer, PIPE_TRANSFER_READ, &transfer);
-       map = (uint8_t *) map + vb->buffer_offset + ve->src_offset;
-
-       v = map;
-
-       switch (ncomp) {
-       case 4:
-               OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_4F_X(attrib), 4));
-               OUT_RING(chan, fui(v[0]));
-               OUT_RING(chan, fui(v[1]));
-               OUT_RING(chan,  fui(v[2]));
-               OUT_RING(chan,  fui(v[3]));
-               break;
-       case 3:
-               OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_3F_X(attrib), 3));
-               OUT_RING(chan,  fui(v[0]));
-               OUT_RING(chan,  fui(v[1]));
-               OUT_RING(chan,  fui(v[2]));
-               break;
-       case 2:
-               OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_2F_X(attrib), 2));
-               OUT_RING(chan,  fui(v[0]));
-               OUT_RING(chan,  fui(v[1]));
-               break;
-       case 1:
-               OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_1F(attrib), 1));
-               OUT_RING(chan,  fui(v[0]));
-               break;
+                       if ((float) index_inline_cost < (float) index_hardware_cost * nvfx->screen->inline_cost_per_hardware_cost)
+                       {
+                               best_index_cost_for_hardware_vertices_as_inline_cost = (float) index_inline_cost;
+                       }
+                       else
+                       {
+                               best_index_cost_for_hardware_vertices_as_inline_cost = (float) index_hardware_cost * nvfx->screen->inline_cost_per_hardware_cost;
+                               prefer_hardware_indices = TRUE;
+                       }
+               }
         }
  
-       pipe_buffer_unmap(&nvfx->pipe, vb->buffer, transfer);
+       /* let's finally figure out which of the 3 paths we want to take */
+       if ((float) (inline_cost + index_inline_cost) > ((float) hardware_cost * nvfx->screen->inline_cost_per_hardware_cost + best_index_cost_for_hardware_vertices_as_inline_cost))
+               upload_mode = 1 + prefer_hardware_indices;
+       else
+               upload_mode = 0;
+
+#ifdef DEBUG
+        if (unlikely(nvfx->screen->trace_draw))
+          {
+                  fprintf(stderr, "DRAW");
+                  if (info->indexed)
+                  {
+                          fprintf(stderr, "_IDX%u", nvfx->idxbuf.index_size);
+                          if (info->index_bias)
+                                  fprintf(stderr, " biased %u", info->index_bias);
+                          fprintf(stderr, " idxrange %u -> %u", info->min_index, info->max_index);
+                  }
+                  if (info->instance_count > 1)
+                          fprintf(stderr, " %u instances from %u", info->instance_count, info->indexed);
+                  fprintf(stderr, " start %u count %u prim %u", info->start, info->count, info->mode);
+                  if (!upload_mode)
+                          fprintf(stderr, " -> inline vertex data");
+                  else if (upload_mode == 2 || !info->indexed)
+                          fprintf(stderr, " -> buffer range");
+                  else
+                          fprintf(stderr, " -> inline indices");
+                  fprintf(stderr, " [ivtx %u hvtx %u iidx %u hidx %u bidx %f] <", inline_cost, hardware_cost, index_inline_cost, index_hardware_cost, best_index_cost_for_hardware_vertices_as_inline_cost);
+                  for (unsigned i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; ++i)
+                  {
+                          struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i];
+                          struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index];
+                          struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer);
+                          if (i)
+                                  fprintf(stderr, ", ");
+                          fprintf(stderr, "%p%s left %Li", buffer, buffer->last_update_static ? " static" : "", buffer->bytes_to_draw_until_static);
+                  }
+                  fprintf(stderr, ">\n");
+          }
+#endif
+
+       return upload_mode;
  }
  
-static void
-nvfx_draw_arrays(struct pipe_context *pipe,
-                unsigned mode, unsigned start, unsigned count)
+void nvfx_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
  {
         struct nvfx_context *nvfx = nvfx_context(pipe);
-       struct nvfx_screen *screen = nvfx->screen;
-       struct nouveau_channel *chan = screen->base.channel;
-       unsigned restart = 0;
-
-       nvfx_vbo_set_idxbuf(nvfx, NULL, 0);
-       if (nvfx->screen->force_swtnl || !nvfx_state_validate(nvfx)) {
-               nvfx_draw_elements_swtnl(pipe, NULL, 0, 0,
-                                           mode, start, count);
-                return;
-       }
+       unsigned upload_mode = 0;
  
-       while (count) {
-               unsigned vc, nr, avail;
+       if (!nvfx->vtxelt->needs_translate)
+               upload_mode = nvfx_decide_upload_mode(pipe, info);
  
-               nvfx_state_emit(nvfx);
+       nvfx->use_index_buffer = upload_mode > 1;
  
-               avail = AVAIL_RING(chan);
-               avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */
+       if ((upload_mode > 0) != nvfx->use_vertex_buffers)
+       {
+               nvfx->use_vertex_buffers = (upload_mode > 0);
+               nvfx->dirty |= NVFX_NEW_ARRAYS;
+               nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
+       }
  
-               vc = nouveau_vbuf_split(avail, 6, 256,
-                                       mode, start, count, &restart);
-               if (!vc) {
-                       FIRE_RING(chan);
-                       continue;
+       if (upload_mode > 0)
+       {
+               for (unsigned i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; i++)
+               {
+                       struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i];
+                       struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index];
+                       nvfx_buffer_upload(nvfx_buffer(vb->buffer));
                 }
  
-               OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
-               OUT_RING  (chan, nvgl_primitive(mode));
+               if (upload_mode > 1)
+               {
+                       nvfx_buffer_upload(nvfx_buffer(nvfx->idxbuf.buffer));
  
-               nr = (vc & 0xff);
-               if (nr) {
-                       OUT_RING(chan, RING_3D(NV34TCL_VB_VERTEX_BATCH, 1));
-                       OUT_RING  (chan, ((nr - 1) << 24) | start);
-                       start += nr;
+                       if (unlikely(info->index_bias != nvfx->base_vertex))
+                       {
+                               nvfx->base_vertex = info->index_bias;
+                               nvfx->dirty |= NVFX_NEW_ARRAYS;
+                       }
                 }
-
-               nr = vc >> 8;
-               while (nr) {
-                       unsigned push = nr > 2047 ? 2047 : nr;
-
-                       nr -= push;
-
-                       OUT_RING(chan, RING_3D_NI(NV34TCL_VB_VERTEX_BATCH, push));
-                       while (push--) {
-                               OUT_RING(chan, ((0x100 - 1) << 24) | start);
-                               start += 0x100;
+               else
+               {
+                       if (unlikely(info->start < nvfx->base_vertex && nvfx->base_vertex))
+                       {
+                               nvfx->base_vertex = 0;
+                               nvfx->dirty |= NVFX_NEW_ARRAYS;
                         }
                 }
-
-               OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
-               OUT_RING  (chan, 0);
-
-               count -= vc;
-               start = restart;
         }
  
-       pipe->flush(pipe, 0, NULL);
+       if (nvfx->screen->force_swtnl || !nvfx_state_validate(nvfx))
+               nvfx_draw_vbo_swtnl(pipe, info);
+       else
+               nvfx_push_vbo(pipe, info);
  }
  
-static INLINE void
-nvfx_draw_elements_u08(struct nvfx_context *nvfx, void *ib,
-                      unsigned mode, unsigned start, unsigned count)
+boolean
+nvfx_vbo_validate(struct nvfx_context *nvfx)
  {
-       struct nvfx_screen *screen = nvfx->screen;
-       struct nouveau_channel *chan = screen->base.channel;
+       struct nouveau_channel* chan = nvfx->screen->base.channel;
+       int i;
+       int elements = MAX2(nvfx->vtxelt->num_elements, nvfx->hw_vtxelt_nr);
+       unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD;
  
-       while (count) {
-               uint8_t *elts = (uint8_t *)ib + start;
-               unsigned vc, push, restart = 0, avail;
+       if (!elements)
+               return TRUE;
  
-               nvfx_state_emit(nvfx);
+       MARK_RING(chan, (5 + 2) * 16 + 2 + 11, 16 + 2);
+       for(unsigned i = 0; i < nvfx->vtxelt->num_constant; ++i)
+       {
+               struct nvfx_low_frequency_element *ve = &nvfx->vtxelt->constant[i];
+               struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
+               struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer);
+               float v[4];
+               ve->fetch_rgba_float(v, buffer->data + vb->buffer_offset + ve->src_offset, 0, 0);
+               nvfx_emit_vtx_attr(chan, ve->idx, v, ve->ncomp);
+       }
  
-               avail = AVAIL_RING(chan);
-               avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */
  
-               vc = nouveau_vbuf_split(avail, 6, 2,
-                                       mode, start, count, &restart);
-               if (vc == 0) {
-                       FIRE_RING(chan);
-                       continue;
-               }
-               count -= vc;
+       OUT_RING(chan, RING_3D(NV34TCL_VTXFMT(0), elements));
+       if(nvfx->use_vertex_buffers)
+       {
+               unsigned idx = 0;
+               for (i = 0; i < nvfx->vtxelt->num_per_vertex; i++) {
+                       struct nvfx_per_vertex_element *ve = &nvfx->vtxelt->per_vertex[i];
+                       struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
  
-               OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
-               OUT_RING  (chan, nvgl_primitive(mode));
+                       if(idx != ve->idx)
+                       {
+                               assert(idx < ve->idx);
+                               OUT_RINGp(chan, &nvfx->vtxelt->vtxfmt[idx], ve->idx - idx);
+                               idx = ve->idx;
+                       }
  
-               if (vc & 1) {
-                       OUT_RING(chan, RING_3D(NV34TCL_VB_ELEMENT_U32, 1));
-                       OUT_RING  (chan, elts[0]);
-                       elts++; vc--;
+                       OUT_RING(chan, nvfx->vtxelt->vtxfmt[idx] | (vb->stride << NV34TCL_VTXFMT_STRIDE_SHIFT));
+                       ++idx;
                 }
+               if(idx != nvfx->vtxelt->num_elements)
+                       OUT_RINGp(chan, &nvfx->vtxelt->vtxfmt[idx], nvfx->vtxelt->num_elements - idx);
+       }
+       else
+               OUT_RINGp(chan, nvfx->vtxelt->vtxfmt, nvfx->vtxelt->num_elements);
  
-               while (vc) {
-                       unsigned i;
-
-                       push = MIN2(vc, 2047 * 2);
-
-                       OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U16, push >> 1));
-                       for (i = 0; i < push; i+=2)
-                               OUT_RING(chan, (elts[i+1] << 16) | elts[i]);
+       for(i = nvfx->vtxelt->num_elements; i < elements; ++i)
+               OUT_RING(chan, NV34TCL_VTXFMT_TYPE_32_FLOAT);
  
-                       vc -= push;
-                       elts += push;
+       if(nvfx->is_nv4x) {
+               unsigned i;
+               /* seems to be some kind of cache flushing */
+               for(i = 0; i < 3; ++i) {
+                       OUT_RING(chan, RING_3D(0x1718, 1));
+                       OUT_RING(chan, 0);
                 }
-
-               OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
-               OUT_RING  (chan, 0);
-
-               start = restart;
         }
-}
-
-static INLINE void
-nvfx_draw_elements_u16(struct nvfx_context *nvfx, void *ib,
-                      unsigned mode, unsigned start, unsigned count)
-{
-       struct nvfx_screen *screen = nvfx->screen;
-       struct nouveau_channel *chan = screen->base.channel;
-
-       while (count) {
-               uint16_t *elts = (uint16_t *)ib + start;
-               unsigned vc, push, restart = 0, avail;
  
-               nvfx_state_emit(nvfx);
-
-               avail = AVAIL_RING(chan);
-               avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */
-
-               vc = nouveau_vbuf_split(avail, 6, 2,
-                                       mode, start, count, &restart);
-               if (vc == 0) {
-                       FIRE_RING(chan);
-                       continue;
-               }
-               count -= vc;
+       OUT_RING(chan, RING_3D(NV34TCL_VTXBUF_ADDRESS(0), elements));
+       if(nvfx->use_vertex_buffers)
+       {
+               unsigned idx = 0;
+               for (i = 0; i < nvfx->vtxelt->num_per_vertex; i++) {
+                       struct nvfx_per_vertex_element *ve = &nvfx->vtxelt->per_vertex[i];
+                       struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
+                       struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo;
  
-               OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
-               OUT_RING  (chan, nvgl_primitive(mode));
+                       for(; idx < ve->idx; ++idx)
+                               OUT_RING(chan, 0);
  
-               if (vc & 1) {
-                       OUT_RING(chan, RING_3D(NV34TCL_VB_ELEMENT_U32, 1));
-                       OUT_RING  (chan, elts[0]);
-                       elts++; vc--;
+                       OUT_RELOC(chan, bo,
+                                       vb->buffer_offset + ve->src_offset + nvfx->base_vertex * vb->stride,
+                                       vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
+                                       0, NV34TCL_VTXBUF_ADDRESS_DMA1);
+                       ++idx;
                 }
  
-               while (vc) {
-                       unsigned i;
-
-                       push = MIN2(vc, 2047 * 2);
-
-                       OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U16, push >> 1));
-                       for (i = 0; i < push; i+=2)
-                               OUT_RING(chan, (elts[i+1] << 16) | elts[i]);
-
-                       vc -= push;
-                       elts += push;
-               }
+               for(; idx < elements; ++idx)
+                       OUT_RING(chan, 0);
+       }
+       else
+       {
+               for (i = 0; i < elements; i++)
+                       OUT_RING(chan, 0);
+       }
  
-               OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
-               OUT_RING  (chan, 0);
+       OUT_RING(chan, RING_3D(0x1710, 1));
+       OUT_RING(chan, 0);
  
-               start = restart;
-       }
+       nvfx->hw_vtxelt_nr = nvfx->vtxelt->num_elements;
+       return TRUE;
  }
  
-static INLINE void
-nvfx_draw_elements_u32(struct nvfx_context *nvfx, void *ib,
-                      unsigned mode, unsigned start, unsigned count)
+void
+nvfx_vbo_relocate(struct nvfx_context *nvfx)
  {
-       struct nvfx_screen *screen = nvfx->screen;
-       struct nouveau_channel *chan = screen->base.channel;
-
-       while (count) {
-               uint32_t *elts = (uint32_t *)ib + start;
-               unsigned vc, push, restart = 0, avail;
-
-               nvfx_state_emit(nvfx);
-
-               avail = AVAIL_RING(chan);
-               avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */
-
-               vc = nouveau_vbuf_split(avail, 5, 1,
-                                       mode, start, count, &restart);
-               if (vc == 0) {
-                       FIRE_RING(chan);
-                       continue;
-               }
-               count -= vc;
-
-               OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
-               OUT_RING  (chan, nvgl_primitive(mode));
-
-               while (vc) {
-                       push = MIN2(vc, 2047);
-
-                       OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U32, push));
-                       OUT_RINGp    (chan, elts, push);
-
-                       vc -= push;
-                       elts += push;
-               }
+        if(!nvfx->use_vertex_buffers)
+                return;
  
-               OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
-               OUT_RING  (chan, 0);
+       struct nouveau_channel* chan = nvfx->screen->base.channel;
+       unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY;
+       int i;
  
-               start = restart;
+       MARK_RING(chan, 2 * 16 + 3, 2 * 16 + 3);
+        for (i = 0; i < nvfx->vtxelt->num_per_vertex; i++) {
+                struct nvfx_per_vertex_element *ve = &nvfx->vtxelt->per_vertex[i];
+                struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
+                struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo;
+
+                OUT_RELOC(chan, bo, RING_3D(NV34TCL_VTXBUF_ADDRESS(ve->idx), 1),
+                               vb_flags, 0, 0);
+                OUT_RELOC(chan, bo, vb->buffer_offset + ve->src_offset + nvfx->base_vertex * vb->stride,
+                               vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
+                               0, NV34TCL_VTXBUF_ADDRESS_DMA1);
         }
  }
  
  static void
-nvfx_draw_elements_inline(struct pipe_context *pipe,
-                         struct pipe_resource *ib,
-                         unsigned ib_size, int ib_bias,
-                         unsigned mode, unsigned start, unsigned count)
+nvfx_idxbuf_emit(struct nvfx_context* nvfx, unsigned ib_flags)
  {
-       struct nvfx_context *nvfx = nvfx_context(pipe);
-       struct pipe_transfer *transfer;
-       void *map;
-
-       map = pipe_buffer_map(pipe, ib, PIPE_TRANSFER_READ, &transfer);
-       if (!ib) {
-               NOUVEAU_ERR("failed mapping ib\n");
-               return;
-       }
+       struct nouveau_channel* chan = nvfx->screen->base.channel;
+       unsigned ib_format = (nvfx->idxbuf.index_size == 2) ? NV34TCL_IDXBUF_FORMAT_TYPE_U16 : NV34TCL_IDXBUF_FORMAT_TYPE_U32;
+       struct nouveau_bo* bo = nvfx_resource(nvfx->idxbuf.buffer)->bo;
+       ib_flags |= nvfx->screen->index_buffer_reloc_flags | NOUVEAU_BO_RD;
  
-       assert(ib_bias == 0);
-
-       switch (ib_size) {
-       case 1:
-               nvfx_draw_elements_u08(nvfx, map, mode, start, count);
-               break;
-       case 2:
-               nvfx_draw_elements_u16(nvfx, map, mode, start, count);
-               break;
-       case 4:
-               nvfx_draw_elements_u32(nvfx, map, mode, start, count);
-               break;
-       default:
-               NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size);
-               break;
-       }
+       assert(nvfx->screen->index_buffer_reloc_flags);
  
-       pipe_buffer_unmap(pipe, ib, transfer);
+       MARK_RING(chan, 3, 3);
+       if(ib_flags & NOUVEAU_BO_DUMMY)
+               OUT_RELOC(chan, bo, RING_3D(NV34TCL_IDXBUF_ADDRESS, 2), ib_flags, 0, 0);
+       else
+               OUT_RING(chan, RING_3D(NV34TCL_IDXBUF_ADDRESS, 2));
+       OUT_RELOC(chan, bo, nvfx->idxbuf.offset + 1, ib_flags | NOUVEAU_BO_LOW, 0, 0);
+       OUT_RELOC(chan, bo, ib_format, ib_flags | NOUVEAU_BO_OR,
+                       0, NV34TCL_IDXBUF_FORMAT_DMA1);
  }
  
-static void
-nvfx_draw_elements_vbo(struct pipe_context *pipe,
-                      unsigned mode, unsigned start, unsigned count)
+void
+nvfx_idxbuf_validate(struct nvfx_context* nvfx)
  {
-       struct nvfx_context *nvfx = nvfx_context(pipe);
-       struct nvfx_screen *screen = nvfx->screen;
-       struct nouveau_channel *chan = screen->base.channel;
-       unsigned restart = 0;
-
-       while (count) {
-               unsigned nr, vc, avail;
-
-               nvfx_state_emit(nvfx);
+       nvfx_idxbuf_emit(nvfx, 0);
+}
  
-               avail = AVAIL_RING(chan);
-               avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */
+void
+nvfx_idxbuf_relocate(struct nvfx_context* nvfx)
+{
+       nvfx_idxbuf_emit(nvfx, NOUVEAU_BO_DUMMY);
+}
  
-               vc = nouveau_vbuf_split(avail, 6, 256,
-                                       mode, start, count, &restart);
-               if (!vc) {
-                       FIRE_RING(chan);
-                       continue;
-               }
+unsigned nvfx_vertex_formats[PIPE_FORMAT_COUNT] =
+{
+       [PIPE_FORMAT_R32_FLOAT] = NV34TCL_VTXFMT_TYPE_32_FLOAT,
+       [PIPE_FORMAT_R32G32_FLOAT] = NV34TCL_VTXFMT_TYPE_32_FLOAT,
+       [PIPE_FORMAT_R32G32B32A32_FLOAT] = NV34TCL_VTXFMT_TYPE_32_FLOAT,
+       [PIPE_FORMAT_R32G32B32_FLOAT] = NV34TCL_VTXFMT_TYPE_32_FLOAT,
+       [PIPE_FORMAT_R16_FLOAT] = NV34TCL_VTXFMT_TYPE_16_FLOAT,
+       [PIPE_FORMAT_R16G16_FLOAT] = NV34TCL_VTXFMT_TYPE_16_FLOAT,
+       [PIPE_FORMAT_R16G16B16_FLOAT] = NV34TCL_VTXFMT_TYPE_16_FLOAT,
+       [PIPE_FORMAT_R16G16B16A16_FLOAT] = NV34TCL_VTXFMT_TYPE_16_FLOAT,
+       [PIPE_FORMAT_R8_UNORM] = NV34TCL_VTXFMT_TYPE_8_UNORM,
+       [PIPE_FORMAT_R8G8_UNORM] = NV34TCL_VTXFMT_TYPE_8_UNORM,
+       [PIPE_FORMAT_R8G8B8_UNORM] = NV34TCL_VTXFMT_TYPE_8_UNORM,
+       [PIPE_FORMAT_R8G8B8A8_UNORM] = NV34TCL_VTXFMT_TYPE_8_UNORM,
+       [PIPE_FORMAT_R8G8B8A8_USCALED] = NV34TCL_VTXFMT_TYPE_8_USCALED,
+       [PIPE_FORMAT_R16_SNORM] = NV34TCL_VTXFMT_TYPE_16_SNORM,
+       [PIPE_FORMAT_R16G16_SNORM] = NV34TCL_VTXFMT_TYPE_16_SNORM,
+       [PIPE_FORMAT_R16G16B16_SNORM] = NV34TCL_VTXFMT_TYPE_16_SNORM,
+       [PIPE_FORMAT_R16G16B16A16_SNORM] = NV34TCL_VTXFMT_TYPE_16_SNORM,
+       [PIPE_FORMAT_R16_SSCALED] = NV34TCL_VTXFMT_TYPE_16_SSCALED,
+       [PIPE_FORMAT_R16G16_SSCALED] = NV34TCL_VTXFMT_TYPE_16_SSCALED,
+       [PIPE_FORMAT_R16G16B16_SSCALED] = NV34TCL_VTXFMT_TYPE_16_SSCALED,
+       [PIPE_FORMAT_R16G16B16A16_SSCALED] = NV34TCL_VTXFMT_TYPE_16_SSCALED,
+};
+
+static void *
+nvfx_vtxelts_state_create(struct pipe_context *pipe,
+                         unsigned num_elements,
+                         const struct pipe_vertex_element *elements)
+{
+       struct nvfx_context* nvfx = nvfx_context(pipe);
+       struct nvfx_vtxelt_state *cso = CALLOC_STRUCT(nvfx_vtxelt_state);
+        struct translate_key transkey;
+        unsigned per_vertex_size[16];
+        memset(per_vertex_size, 0, sizeof(per_vertex_size));
+
+        unsigned vb_compacted_index[16];
+
+       assert(num_elements < 16); /* not doing fallbacks yet */
+
+       memcpy(cso->pipe, elements, num_elements * sizeof(elements[0]));
+       cso->num_elements = num_elements;
+       cso->needs_translate = FALSE;
+
+       transkey.nr_elements = 0;
+       transkey.output_stride = 0;
+
+       for(unsigned i = 0; i < num_elements; ++i)
+        {
+               const struct pipe_vertex_element* ve = &elements[i];
+               if(!ve->instance_divisor)
+                        per_vertex_size[ve->vertex_buffer_index] += util_format_get_stride(ve->src_format, 1);
+        }
+
+        for(unsigned i = 0; i < 16; ++i)
+        {
+                if(per_vertex_size[i])
+                {
+                        unsigned idx = cso->num_per_vertex_buffer_infos++;
+                        cso->per_vertex_buffer_info[idx].vertex_buffer_index = i;
+                        cso->per_vertex_buffer_info[idx].per_vertex_size = per_vertex_size[i];
+                        vb_compacted_index[i] = idx;
+                }
+        }
+
+       for(unsigned i = 0; i < num_elements; ++i)
+       {
+               const struct pipe_vertex_element* ve = &elements[i];
+               unsigned type = nvfx_vertex_formats[ve->src_format];
+               unsigned ncomp = util_format_get_nr_components(ve->src_format);
  
-               OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
-               OUT_RING  (chan, nvgl_primitive(mode));
+               //if(ve->frequency != PIPE_ELEMENT_FREQUENCY_PER_VERTEX)
+               if(ve->instance_divisor)
+               {
+                       struct nvfx_low_frequency_element* lfve;
+                       cso->vtxfmt[i] = NV34TCL_VTXFMT_TYPE_32_FLOAT;
+
+                       //if(ve->frequency == PIPE_ELEMENT_FREQUENCY_CONSTANT)
+                       if(0)
+                               lfve = &cso->constant[cso->num_constant++];
+                       else
+                       {
+                               lfve = &cso->per_instance[cso->num_per_instance++].base;
+                               ((struct nvfx_per_instance_element*)lfve)->instance_divisor = ve->instance_divisor;
+                       }
  
-               nr = (vc & 0xff);
-               if (nr) {
-                       OUT_RING(chan, RING_3D(NV34TCL_VB_INDEX_BATCH, 1));
-                       OUT_RING  (chan, ((nr - 1) << 24) | start);
-                       start += nr;
+                        lfve->idx = i;
+                        lfve->vertex_buffer_index = ve->vertex_buffer_index;
+                        lfve->src_offset = ve->src_offset;
+                        lfve->fetch_rgba_float = util_format_description(ve->src_format)->fetch_rgba_float;
+                        lfve->ncomp = ncomp;
                 }
-
-               nr = vc >> 8;
-               while (nr) {
-                       unsigned push = nr > 2047 ? 2047 : nr;
-
-                       nr -= push;
-
-                       OUT_RING(chan, RING_3D_NI(NV34TCL_VB_INDEX_BATCH, push));
-                       while (push--) {
-                               OUT_RING(chan, ((0x100 - 1) << 24) | start);
-                               start += 0x100;
+               else
+               {
+                       unsigned idx;
+
+                       idx = cso->num_per_vertex++;
+                       cso->per_vertex[idx].idx = i;
+                       cso->per_vertex[idx].vertex_buffer_index = ve->vertex_buffer_index;
+                       cso->per_vertex[idx].src_offset = ve->src_offset;
+
+                       idx = transkey.nr_elements++;
+                       transkey.element[idx].input_format = ve->src_format;
+                       transkey.element[idx].input_buffer = vb_compacted_index[ve->vertex_buffer_index];
+                       transkey.element[idx].input_offset = ve->src_offset;
+                       transkey.element[idx].instance_divisor = 0;
+                       transkey.element[idx].type = TRANSLATE_ELEMENT_NORMAL;
+                       if(type)
+                       {
+                               transkey.element[idx].output_format = ve->src_format;
+                               cso->vtxfmt[i] = (ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | type;
+                       }
+                       else
+                       {
+                               unsigned float32[4] = {PIPE_FORMAT_R32_FLOAT, PIPE_FORMAT_R32G32_FLOAT, PIPE_FORMAT_R32G32B32_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT};
+                               transkey.element[idx].output_format = float32[ncomp - 1];
+                               cso->needs_translate = TRUE;
+                               cso->vtxfmt[i] = (ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | NV34TCL_VTXFMT_TYPE_32_FLOAT;
                         }
+                       transkey.element[idx].output_offset = transkey.output_stride;
+                       transkey.output_stride += (util_format_get_stride(transkey.element[idx].output_format, 1) + 3) & ~3;
                 }
+       }
  
-               OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
-               OUT_RING  (chan, 0);
+       cso->translate = translate_generic_create(&transkey);
+       cso->vertex_length = transkey.output_stride >> 2;
+       cso->max_vertices_per_packet = 2047 / cso->vertex_length;
  
-               count -= vc;
-               start = restart;
-       }
+       return (void *)cso;
  }
  
  static void
-nvfx_draw_elements(struct pipe_context *pipe,
-                  struct pipe_resource *indexBuffer,
-                  unsigned indexSize, int indexBias,
-                  unsigned mode, unsigned start, unsigned count)
+nvfx_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso)
  {
-       struct nvfx_context *nvfx = nvfx_context(pipe);
-       boolean idxbuf;
-
-       idxbuf = nvfx_vbo_set_idxbuf(nvfx, indexBuffer, indexSize);
-       if (nvfx->screen->force_swtnl || !nvfx_state_validate(nvfx)) {
-               nvfx_draw_elements_swtnl(pipe,
-                                        indexBuffer, indexSize, indexBias,
-                                        mode, start, count);
-               return;
-       }
-
-       if (idxbuf) {
-               nvfx_draw_elements_vbo(pipe, mode, start, count);
-       } else {
-               nvfx_draw_elements_inline(pipe,
-                                         indexBuffer, indexSize, indexBias,
-                                         mode, start, count);
-       }
-
-       pipe->flush(pipe, 0, NULL);
+       FREE(hwcso);
  }
  
-void
-nvfx_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
+static void
+nvfx_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso)
  {
         struct nvfx_context *nvfx = nvfx_context(pipe);
  
-       if (info->indexed && nvfx->idxbuf.buffer) {
-               unsigned offset;
-
-               assert(nvfx->idxbuf.offset % nvfx->idxbuf.index_size == 0);
-               offset = nvfx->idxbuf.offset / nvfx->idxbuf.index_size;
-
-               nvfx_draw_elements(pipe,
-                                  nvfx->idxbuf.buffer,
-                                  nvfx->idxbuf.index_size,
-                                  info->index_bias,
-                                  info->mode,
-                                  info->start + offset,
-                                  info->count);
-       }
-       else {
-               nvfx_draw_arrays(pipe,
-                               info->mode,
-                               info->start,
-                               info->count);
-       }
+       nvfx->vtxelt = hwcso;
+       nvfx->use_vertex_buffers = -1;
+       nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
  }
  
-boolean
-nvfx_vbo_validate(struct nvfx_context *nvfx)
+static void
+nvfx_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
+                       const struct pipe_vertex_buffer *vb)
  {
-       struct nouveau_channel* chan = nvfx->screen->base.channel;
-       struct pipe_resource *ib = nvfx->idxbuf_buffer;
-       unsigned ib_format = nvfx->idxbuf_format;
-       int i;
-       int elements = MAX2(nvfx->vtxelt->num_elements, nvfx->hw_vtxelt_nr);
-       uint32_t vtxfmt[16];
-       unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD;
-
-       if (!elements)
-               return TRUE;
-
-       nvfx->vbo_bo = 0;
-
-       MARK_RING(chan, (5 + 2) * 16 + 2 + 11, 16 + 2);
-       for (i = 0; i < nvfx->vtxelt->num_elements; i++) {
-               struct pipe_vertex_element *ve;
-               struct pipe_vertex_buffer *vb;
-               unsigned type, ncomp;
-
-               ve = &nvfx->vtxelt->pipe[i];
-               vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
-
-               if (nvfx_vbo_format_to_hw(ve->src_format, &type, &ncomp)) {
-                       MARK_UNDO(chan);
-                       nvfx->fallback_swtnl |= NVFX_NEW_ARRAYS;
-                       return FALSE;
-               }
+       struct nvfx_context *nvfx = nvfx_context(pipe);
  
-               if (!vb->stride && type == NV34TCL_VTXFMT_TYPE_FLOAT) {
-                       nvfx_vbo_static_attrib(nvfx, i, ve, vb, ncomp);
-                       vtxfmt[i] = type;
-               } else {
-                       vtxfmt[i] = ((vb->stride << NV34TCL_VTXFMT_STRIDE_SHIFT) |
-                               (ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | type);
-                       nvfx->vbo_bo |= (1 << i);
-               }
+       for(unsigned i = 0; i < count; ++i)
+       {
+               pipe_resource_reference(&nvfx->vtxbuf[i].buffer, vb[i].buffer);
+               nvfx->vtxbuf[i].buffer_offset = vb[i].buffer_offset;
+               nvfx->vtxbuf[i].max_index = vb[i].max_index;
+               nvfx->vtxbuf[i].stride = vb[i].stride;
         }
  
-       for(; i < elements; ++i)
-               vtxfmt[i] = NV34TCL_VTXFMT_TYPE_FLOAT;
-
-       OUT_RING(chan, RING_3D(NV34TCL_VTXFMT(0), elements));
-       OUT_RINGp(chan, vtxfmt, elements);
-
-       if(nvfx->is_nv4x) {
-               unsigned i;
-               /* seems to be some kind of cache flushing */
-               for(i = 0; i < 3; ++i) {
-                       OUT_RING(chan, RING_3D(0x1718, 1));
-                       OUT_RING(chan, 0);
-               }
-       }
+       for(unsigned i = count; i < nvfx->vtxbuf_nr; ++i)
+               pipe_resource_reference(&nvfx->vtxbuf[i].buffer, 0);
  
-       OUT_RING(chan, RING_3D(NV34TCL_VTXBUF_ADDRESS(0), elements));
-       for (i = 0; i < nvfx->vtxelt->num_elements; i++) {
-               struct pipe_vertex_element *ve;
-               struct pipe_vertex_buffer *vb;
+       nvfx->vtxbuf_nr = count;
+       nvfx->use_vertex_buffers = -1;
+       nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
+}
  
-               ve = &nvfx->vtxelt->pipe[i];
-               vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
+static void
+nvfx_set_index_buffer(struct pipe_context *pipe,
+                     const struct pipe_index_buffer *ib)
+{
+       struct nvfx_context *nvfx = nvfx_context(pipe);
  
-               if (!(nvfx->vbo_bo & (1 << i)))
-                       OUT_RING(chan, 0);
-               else
-               {
-                       struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo;
-                       OUT_RELOC(chan, bo,
-                                vb->buffer_offset + ve->src_offset,
-                                vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
-                                0, NV34TCL_VTXBUF_ADDRESS_DMA1);
-               }
+       if(ib)
+       {
+               pipe_resource_reference(&nvfx->idxbuf.buffer, ib->buffer);
+               nvfx->idxbuf.index_size = ib->index_size;
+               nvfx->idxbuf.offset = ib->offset;
         }
-
-        for (; i < elements; i++)
-               OUT_RING(chan, 0);
-
-       OUT_RING(chan, RING_3D(0x1710, 1));
-       OUT_RING(chan, 0);
-
-       if (ib) {
-               unsigned ib_flags = nvfx->screen->index_buffer_reloc_flags | NOUVEAU_BO_RD;
-               struct nouveau_bo* bo = nvfx_resource(ib)->bo;
-
-               assert(nvfx->screen->index_buffer_reloc_flags);
-
-               OUT_RING(chan, RING_3D(NV34TCL_IDXBUF_ADDRESS, 2));
-               OUT_RELOC(chan, bo, 0, ib_flags | NOUVEAU_BO_LOW, 0, 0);
-               OUT_RELOC(chan, bo, ib_format, ib_flags | NOUVEAU_BO_OR,
-                                 0, NV34TCL_IDXBUF_FORMAT_DMA1);
+       else
+       {
+               pipe_resource_reference(&nvfx->idxbuf.buffer, 0);
+               nvfx->idxbuf.index_size = 0;
+               nvfx->idxbuf.offset = 0;
         }
  
-       nvfx->hw_vtxelt_nr = nvfx->vtxelt->num_elements;
-       return TRUE;
+       nvfx->dirty |= NVFX_NEW_INDEX;
+       nvfx->draw_dirty |= NVFX_NEW_INDEX;
  }
  
  void
-nvfx_vbo_relocate(struct nvfx_context *nvfx)
+nvfx_init_vbo_functions(struct nvfx_context *nvfx)
  {
-       struct nouveau_channel* chan = nvfx->screen->base.channel;
-       unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY;
-       int i;
+       nvfx->pipe.set_vertex_buffers = nvfx_set_vertex_buffers;
+       nvfx->pipe.set_index_buffer = nvfx_set_index_buffer;
  
-       MARK_RING(chan, 2 * 16 + 3, 2 * 16 + 3);
-       for(i = 0; i < nvfx->vtxelt->num_elements; ++i) {
-               if(nvfx->vbo_bo & (1 << i)) {
-                       struct pipe_vertex_element *ve = &nvfx->vtxelt->pipe[i];
-                       struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
-                       struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo;
-                       OUT_RELOC(chan, bo, RING_3D(NV34TCL_VTXBUF_ADDRESS(i), 1),
-                                       vb_flags, 0, 0);
-                       OUT_RELOC(chan, bo, vb->buffer_offset + ve->src_offset,
-                                       vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
-                                       0, NV34TCL_VTXBUF_ADDRESS_DMA1);
-               }
-       }
-
-       if(nvfx->idxbuf_buffer)
-       {
-               unsigned ib_flags = nvfx->screen->index_buffer_reloc_flags | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY;
-               struct nouveau_bo* bo = nvfx_resource(nvfx->idxbuf_buffer)->bo;
-
-               assert(nvfx->screen->index_buffer_reloc_flags);
-
-               OUT_RELOC(chan, bo, RING_3D(NV34TCL_IDXBUF_ADDRESS, 2),
-                               ib_flags, 0, 0);
-               OUT_RELOC(chan, bo, 0,
-                               ib_flags | NOUVEAU_BO_LOW, 0, 0);
-               OUT_RELOC(chan, bo, nvfx->idxbuf_format,
-                               ib_flags | NOUVEAU_BO_OR,
-                               0, NV34TCL_IDXBUF_FORMAT_DMA1);
-       }
+       nvfx->pipe.create_vertex_elements_state = nvfx_vtxelts_state_create;
+       nvfx->pipe.delete_vertex_elements_state = nvfx_vtxelts_state_delete;
+       nvfx->pipe.bind_vertex_elements_state = nvfx_vtxelts_state_bind;
  }
diff --git a/src/gallium/drivers/nvfx/nvfx_vertprog.c b/src/gallium/drivers/nvfx/nvfx_vertprog.c

index 24d9846310e001413d4519b98add36f7aaccf9dc..939d2b83aee4ab3feda1e00aadd5afaabb1c2b4c 100644 (file)
--- a/src/gallium/drivers/nvfx/nvfx_vertprog.c
+++ b/src/gallium/drivers/nvfx/nvfx_vertprog.c
@@ -10,6 +10,7 @@
  
  #include "nvfx_context.h"
  #include "nvfx_state.h"
+#include "nvfx_resource.h"
  
  /* TODO (at least...):
   *  1. Indexed consts  + ARL
@@ -874,7 +875,6 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
         struct nouveau_grobj *eng3d = screen->eng3d;
         struct nvfx_vertex_program *vp;
         struct pipe_resource *constbuf;
-       struct pipe_transfer *transfer = NULL;
         boolean upload_code = FALSE, upload_data = FALSE;
         int i;
  
@@ -983,11 +983,8 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
         if (vp->nr_consts) {
                 float *map = NULL;
  
-               if (constbuf) {
-                       map = pipe_buffer_map(pipe, constbuf,
-                                             PIPE_TRANSFER_READ,
-                                             &transfer);
-               }
+               if (constbuf)
+                       map = nvfx_buffer(constbuf)->data;
  
                 for (i = 0; i < vp->nr_consts; i++) {
                         struct nvfx_vertex_program_data *vpd = &vp->consts[i];
@@ -1005,9 +1002,6 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
                         OUT_RING  (chan, i + vp->data->start);
                         OUT_RINGp (chan, (uint32_t *)vpd->value, 4);
                 }
-
-               if (constbuf)
-                       pipe_buffer_unmap(pipe, constbuf, transfer);
         }
  
         /* Upload vtxprog */
author	Luca Barbieri <luca@luca-barbieri.com>
	Sat, 7 Aug 2010 03:39:18 +0000 (05:39 +0200)
committer	Luca Barbieri <luca@luca-barbieri.com>
	Sat, 21 Aug 2010 18:42:14 +0000 (20:42 +0200)
src/gallium/drivers/nouveau/nouveau_class.h		patch \| blob \| history
src/gallium/drivers/nouveau/nouveau_util.h	[deleted file]	patch \| blob \| history
src/gallium/drivers/nvfx/Makefile		patch \| blob \| history
src/gallium/drivers/nvfx/nv30_fragtex.c		patch \| blob \| history
src/gallium/drivers/nvfx/nvfx_buffer.c		patch \| blob \| history
src/gallium/drivers/nvfx/nvfx_context.c		patch \| blob \| history
src/gallium/drivers/nvfx/nvfx_context.h		patch \| blob \| history
src/gallium/drivers/nvfx/nvfx_draw.c		patch \| blob \| history
src/gallium/drivers/nvfx/nvfx_fragprog.c		patch \| blob \| history
src/gallium/drivers/nvfx/nvfx_push.c	[new file with mode: 0644]	patch \| blob
src/gallium/drivers/nvfx/nvfx_resource.c		patch \| blob \| history
src/gallium/drivers/nvfx/nvfx_resource.h		patch \| blob \| history
src/gallium/drivers/nvfx/nvfx_screen.c		patch \| blob \| history
src/gallium/drivers/nvfx/nvfx_screen.h		patch \| blob \| history
src/gallium/drivers/nvfx/nvfx_state.c		patch \| blob \| history
src/gallium/drivers/nvfx/nvfx_state_emit.c		patch \| blob \| history
src/gallium/drivers/nvfx/nvfx_state_fb.c		patch \| blob \| history
src/gallium/drivers/nvfx/nvfx_surface.c		patch \| blob \| history
src/gallium/drivers/nvfx/nvfx_transfer.c		patch \| blob \| history
src/gallium/drivers/nvfx/nvfx_vbo.c		patch \| blob \| history
src/gallium/drivers/nvfx/nvfx_vertprog.c		patch \| blob \| history