nvfx: rewrite draw code and buffer code
authorLuca Barbieri <luca@luca-barbieri.com>
Sat, 7 Aug 2010 03:39:18 +0000 (05:39 +0200)
committerLuca Barbieri <luca@luca-barbieri.com>
Sat, 21 Aug 2010 18:42:14 +0000 (20:42 +0200)
This is a full rewrite of the drawing and buffer management logic.

It offers a lot of improvements:
1. A copy of buffers is now always kept in system memory. This is
   necessary to allow software processing of them, which is necessary
   or improves performance in many cases.
2. Support for pushing vertices on the FIFO, with index lookup if necessary.
3. "Smart" draw code that tries to intelligently choose the cheapest
  way to draw something: whether to use inline vertices or hardware
  vertex buffer, and whether to use hardware index buffers
4. Support for all vertex formats supported by the hardware
5. Usage of translate to push vertices, supporting all formats that are
   sensible to use as vertex formats
6. Support for base vertex
7. Usage of Ben Skeggs' primitive splitter originally for nv50, allowing
   correct splitting of line loops, triangle fans, etc.
8. Support for instancing
9. Precomputation using the vertex elements CSO

Thanks to Ben Skeggs for his primitive splitter originally for nv50.

Thanks to Christoph Bumiller for his nv50 push code, that was the basis
of this work, even though I changed his code dramatically, in particular
to replace his ad-hoc vertex data emitter with translate.

The changes could also go into nv50 too, but there are substantial
differences due to the additional nv50 hardware features.

21 files changed:
src/gallium/drivers/nouveau/nouveau_class.h
src/gallium/drivers/nouveau/nouveau_util.h [deleted file]
src/gallium/drivers/nvfx/Makefile
src/gallium/drivers/nvfx/nv30_fragtex.c
src/gallium/drivers/nvfx/nvfx_buffer.c
src/gallium/drivers/nvfx/nvfx_context.c
src/gallium/drivers/nvfx/nvfx_context.h
src/gallium/drivers/nvfx/nvfx_draw.c
src/gallium/drivers/nvfx/nvfx_fragprog.c
src/gallium/drivers/nvfx/nvfx_push.c [new file with mode: 0644]
src/gallium/drivers/nvfx/nvfx_resource.c
src/gallium/drivers/nvfx/nvfx_resource.h
src/gallium/drivers/nvfx/nvfx_screen.c
src/gallium/drivers/nvfx/nvfx_screen.h
src/gallium/drivers/nvfx/nvfx_state.c
src/gallium/drivers/nvfx/nvfx_state_emit.c
src/gallium/drivers/nvfx/nvfx_state_fb.c
src/gallium/drivers/nvfx/nvfx_surface.c
src/gallium/drivers/nvfx/nvfx_transfer.c
src/gallium/drivers/nvfx/nvfx_vbo.c
src/gallium/drivers/nvfx/nvfx_vertprog.c

index 685fa00b4556a04c60584e70d108a49afbfaa1da..14c11b278ad5539418c6ccdc9af1fa9728bd7d22 100644 (file)
@@ -6149,6 +6149,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define   NV34TCL_FP_REG_CONTROL_UNK1_MASK                                             0xffff0000
 #define   NV34TCL_FP_REG_CONTROL_UNK0_SHIFT                                            0
 #define   NV34TCL_FP_REG_CONTROL_UNK0_MASK                                             0x0000ffff
+#define  NV34TCL_EDGEFLAG_ENABLE                                                       0x0000145c
 #define  NV34TCL_VP_CLIP_PLANES_ENABLE                                                 0x00001478
 #define   NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0                                         (1 <<  1)
 #define   NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1                                         (1 <<  5)
@@ -6182,10 +6183,13 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define  NV34TCL_VTXFMT__SIZE                                                          0x00000010
 #define   NV34TCL_VTXFMT_TYPE_SHIFT                                                    0
 #define   NV34TCL_VTXFMT_TYPE_MASK                                                     0x0000000f
-#define    NV34TCL_VTXFMT_TYPE_FLOAT                                                   0x00000002
-#define    NV34TCL_VTXFMT_TYPE_HALF                                                    0x00000003
-#define    NV34TCL_VTXFMT_TYPE_UBYTE                                                   0x00000004
-#define    NV34TCL_VTXFMT_TYPE_USHORT                                                  0x00000005
+#define    NV34TCL_VTXFMT_TYPE_16_SNORM                                                        0x00000001
+#define    NV34TCL_VTXFMT_TYPE_32_FLOAT                                                        0x00000002
+#define    NV34TCL_VTXFMT_TYPE_16_FLOAT                                                        0x00000003
+#define    NV34TCL_VTXFMT_TYPE_8_UNORM                                                 0x00000004
+#define    NV34TCL_VTXFMT_TYPE_16_SSCALED                                                      0x00000005
+#define    NV34TCL_VTXFMT_TYPE_11_11_10_SNORM                                                  0x00000006
+#define    NV34TCL_VTXFMT_TYPE_8_USCALED                                                       0x00000007
 #define   NV34TCL_VTXFMT_SIZE_SHIFT                                                    4
 #define   NV34TCL_VTXFMT_SIZE_MASK                                                     0x000000f0
 #define   NV34TCL_VTXFMT_STRIDE_SHIFT                                                  8
diff --git a/src/gallium/drivers/nouveau/nouveau_util.h b/src/gallium/drivers/nouveau/nouveau_util.h
deleted file mode 100644 (file)
index b165f7a..0000000
+++ /dev/null
@@ -1,91 +0,0 @@
-#ifndef __NOUVEAU_UTIL_H__
-#define __NOUVEAU_UTIL_H__
-
-/* Determine how many vertices can be pushed into the command stream.
- * Where the remaining space isn't large enough to represent all verices,
- * split the buffer at primitive boundaries.
- *
- * Returns a count of vertices that can be rendered, and an index to
- * restart drawing at after a flush.
- */
-static INLINE unsigned
-nouveau_vbuf_split(unsigned remaining, unsigned overhead, unsigned vpp,
-                  unsigned mode, unsigned start, unsigned count,
-                  unsigned *restart)
-{
-       int max, adj = 0;
-
-       max  = remaining - overhead;
-       if (max < 0)
-               return 0;
-
-       max *= vpp;
-       if (max >= count)
-               return count;
-
-       switch (mode) {
-       case PIPE_PRIM_POINTS:
-               break;
-       case PIPE_PRIM_LINES:
-               max = max & 1;
-               break;
-       case PIPE_PRIM_TRIANGLES:
-               max = max - (max % 3);
-               break;
-       case PIPE_PRIM_QUADS:
-               max = max & ~3;
-               break;
-       case PIPE_PRIM_LINE_LOOP:
-       case PIPE_PRIM_LINE_STRIP:
-               if (max < 2)
-                       max = 0;
-               adj = 1;
-               break;
-       case PIPE_PRIM_POLYGON:
-       case PIPE_PRIM_TRIANGLE_STRIP:
-       case PIPE_PRIM_TRIANGLE_FAN:
-               if (max < 3)
-                       max = 0;
-               adj = 2;
-               break;
-       case PIPE_PRIM_QUAD_STRIP:
-               if (max < 4)
-                       max = 0;
-               adj = 3;
-               break;
-       default:
-               assert(0);
-       }
-
-       *restart = start + max - adj;
-       return max;
-}
-
-/* Integer base-2 logarithm, rounded towards zero. */
-static INLINE unsigned log2i(unsigned i)
-{
-       unsigned r = 0;
-
-       if (i & 0xffff0000) {
-               i >>= 16;
-               r += 16;
-       }
-       if (i & 0x0000ff00) {
-               i >>= 8;
-               r += 8;
-       }
-       if (i & 0x000000f0) {
-               i >>= 4;
-               r += 4;
-       }
-       if (i & 0x0000000c) {
-               i >>= 2;
-               r += 2;
-       }
-       if (i & 0x00000002) {
-               r += 1;
-       }
-       return r;
-}
-
-#endif
index 2834f8984c79f1c09b41f19942bc4dadfbc50b9a..6cbbad699eb5eb728a74fcffd300bf00302b2fab 100644 (file)
@@ -14,6 +14,7 @@ C_SOURCES = \
        nv30_fragtex.c \
        nv40_fragtex.c \
        nvfx_miptree.c \
+       nvfx_push.c \
        nvfx_query.c \
        nvfx_resource.c \
        nvfx_screen.c \
index 63c578a0ce1509b3a360a3fa32993cfe67a0ef6c..db8a8fc4b08db64a80c6d0a264ede4359b4577df 100644 (file)
@@ -1,7 +1,6 @@
 #include "util/u_format.h"
 
 #include "nvfx_context.h"
-#include "nouveau/nouveau_util.h"
 #include "nvfx_tex.h"
 #include "nvfx_resource.h"
 
@@ -44,9 +43,9 @@ nv30_sampler_view_init(struct pipe_context *pipe,
 
        txf = sv->u.init_fmt;
        txf |= (level != sv->base.last_level ? NV34TCL_TX_FORMAT_MIPMAP : 0);
-       txf |= log2i(u_minify(pt->width0, level)) << NV34TCL_TX_FORMAT_BASE_SIZE_U_SHIFT;
-       txf |= log2i(u_minify(pt->height0, level)) << NV34TCL_TX_FORMAT_BASE_SIZE_V_SHIFT;
-       txf |= log2i(u_minify(pt->depth0, level)) << NV34TCL_TX_FORMAT_BASE_SIZE_W_SHIFT;
+       txf |= util_logbase2(u_minify(pt->width0, level)) << NV34TCL_TX_FORMAT_BASE_SIZE_U_SHIFT;
+       txf |= util_logbase2(u_minify(pt->height0, level)) << NV34TCL_TX_FORMAT_BASE_SIZE_V_SHIFT;
+       txf |= util_logbase2(u_minify(pt->depth0, level)) << NV34TCL_TX_FORMAT_BASE_SIZE_W_SHIFT;
        txf |=  0x10000;
 
        sv->u.nv30.fmt[0] = tf->fmt[0] | txf;
index 44680e519596524d296098469502a5a1271ac25e..89bb8570efd650b659e093c0334b8aa80f5c5759 100644 (file)
@@ -6,13 +6,16 @@
 #include "nouveau/nouveau_screen.h"
 #include "nouveau/nouveau_winsys.h"
 #include "nvfx_resource.h"
+#include "nvfx_screen.h"
 
 void nvfx_buffer_destroy(struct pipe_screen *pscreen,
                                struct pipe_resource *presource)
 {
-       struct nvfx_resource *buffer = nvfx_resource(presource);
+       struct nvfx_buffer *buffer = nvfx_buffer(presource);
 
-       nouveau_screen_bo_release(pscreen, buffer->bo);
+       if(!(buffer->base.base.flags & NVFX_RESOURCE_FLAG_USER))
+               align_free(buffer->data);
+       nouveau_screen_bo_release(pscreen, buffer->base.bo);
        FREE(buffer);
 }
 
@@ -20,31 +23,22 @@ struct pipe_resource *
 nvfx_buffer_create(struct pipe_screen *pscreen,
                   const struct pipe_resource *template)
 {
-       struct nvfx_resource *buffer;
+       struct nvfx_screen* screen = nvfx_screen(pscreen);
+       struct nvfx_buffer* buffer;
 
-       buffer = CALLOC_STRUCT(nvfx_resource);
+       buffer = CALLOC_STRUCT(nvfx_buffer);
        if (!buffer)
                return NULL;
 
-       buffer->base = *template;
-       buffer->base.flags |= NVFX_RESOURCE_FLAG_LINEAR;
-       pipe_reference_init(&buffer->base.reference, 1);
-       buffer->base.screen = pscreen;
+       buffer->base.base = *template;
+       buffer->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR;
+       pipe_reference_init(&buffer->base.base.reference, 1);
+       buffer->base.base.screen = pscreen;
+       buffer->size = util_format_get_stride(template->format, template->width0);
+       buffer->bytes_to_draw_until_static = buffer->size * screen->static_reuse_threshold;
+       buffer->data = align_malloc(buffer->size, 16);
 
-       buffer->bo = nouveau_screen_bo_new(pscreen,
-                                          16,
-                                          buffer->base.usage,
-                                          buffer->base.bind,
-                                          buffer->base.width0);
-
-       if (buffer->bo == NULL)
-               goto fail;
-
-       return &buffer->base;
-
-fail:
-       FREE(buffer);
-       return NULL;
+       return &buffer->base.base;
 }
 
 
@@ -54,29 +48,49 @@ nvfx_user_buffer_create(struct pipe_screen *pscreen,
                        unsigned bytes,
                        unsigned usage)
 {
-       struct nvfx_resource *buffer;
+       struct nvfx_screen* screen = nvfx_screen(pscreen);
+       struct nvfx_buffer* buffer;
 
-       buffer = CALLOC_STRUCT(nvfx_resource);
+       buffer = CALLOC_STRUCT(nvfx_buffer);
        if (!buffer)
                return NULL;
 
-       pipe_reference_init(&buffer->base.reference, 1);
-       buffer->base.flags = NVFX_RESOURCE_FLAG_LINEAR;
-       buffer->base.screen = pscreen;
-       buffer->base.format = PIPE_FORMAT_R8_UNORM;
-       buffer->base.usage = PIPE_USAGE_IMMUTABLE;
-       buffer->base.bind = usage;
-       buffer->base.width0 = bytes;
-       buffer->base.height0 = 1;
-       buffer->base.depth0 = 1;
-
-       buffer->bo = nouveau_screen_bo_user(pscreen, ptr, bytes);
-       if (!buffer->bo)
-               goto fail;
-
-       return &buffer->base;
+       pipe_reference_init(&buffer->base.base.reference, 1);
+       buffer->base.base.flags = NVFX_RESOURCE_FLAG_LINEAR | NVFX_RESOURCE_FLAG_USER;
+       buffer->base.base.screen = pscreen;
+       buffer->base.base.format = PIPE_FORMAT_R8_UNORM;
+       buffer->base.base.usage = PIPE_USAGE_IMMUTABLE;
+       buffer->base.base.bind = usage;
+       buffer->base.base.width0 = bytes;
+       buffer->base.base.height0 = 1;
+       buffer->base.base.depth0 = 1;
+       buffer->data = ptr;
+       buffer->size = bytes;
+       buffer->bytes_to_draw_until_static = bytes * screen->static_reuse_threshold;
+       buffer->dirty_end = bytes;
+
+       return &buffer->base.base;
+}
 
-fail:
-       FREE(buffer);
-       return NULL;
+void nvfx_buffer_upload(struct nvfx_buffer* buffer)
+{
+       unsigned dirty = buffer->dirty_end - buffer->dirty_begin;
+       if(!buffer->base.bo)
+       {
+               buffer->base.bo = nouveau_screen_bo_new(buffer->base.base.screen,
+                                          16,
+                                          buffer->base.base.usage,
+                                          buffer->base.base.bind,
+                                          buffer->base.base.width0);
+       }
+
+       if(dirty)
+       {
+               // TODO: may want to use a temporary in some cases
+               nouveau_bo_map(buffer->base.bo, NOUVEAU_BO_WR
+                               | (buffer->dirty_unsynchronized ? NOUVEAU_BO_NOSYNC : 0));
+               memcpy(buffer->base.bo->map + buffer->dirty_begin, buffer->data + buffer->dirty_begin, dirty);
+               nouveau_bo_unmap(buffer->base.bo);
+               buffer->dirty_begin = buffer->dirty_end = 0;
+       }
 }
index 1980176b23ec0789deb3a5c5ac04776b040e9935..94c854b22b8c07908b89a2a6c89c69d7408d89af 100644 (file)
@@ -76,7 +76,9 @@ nvfx_create(struct pipe_screen *pscreen, void *priv)
        nvfx_init_surface_functions(nvfx);
        nvfx_init_state_functions(nvfx);
        nvfx_init_sampling_functions(nvfx);
+       nvfx_init_vbo_functions(nvfx);
        nvfx_init_resource_functions(&nvfx->pipe);
+       nvfx_init_transfer_functions(&nvfx->pipe);
 
        /* Create, configure, and install fallback swtnl path */
        nvfx->draw = draw_create(&nvfx->pipe);
@@ -89,6 +91,7 @@ nvfx_create(struct pipe_screen *pscreen, void *priv)
        /* set these to that we init them on first validation */
        nvfx->state.scissor_enabled = ~0;
        nvfx->state.stipple_enabled = ~0;
+       nvfx->use_vertex_buffers = -1;
 
        LIST_INITHEAD(&nvfx->render_cache);
 
index bce19df044d46e331f17f79d4cfd3eeea2f66f07..8899bf991e123b8045dbde95acc306b461d5fa60 100644 (file)
@@ -44,6 +44,7 @@
 #define NVFX_NEW_SR            (1 << 13)
 #define NVFX_NEW_VERTCONST     (1 << 14)
 #define NVFX_NEW_FRAGCONST     (1 << 15)
+#define NVFX_NEW_INDEX (1 << 16)
 
 struct nvfx_rasterizer_state {
        struct pipe_rasterizer_state pipe;
@@ -71,9 +72,53 @@ struct nvfx_state {
        unsigned render_temps;
 };
 
+struct nvfx_per_vertex_element {
+       unsigned idx;
+        unsigned vertex_buffer_index;
+        unsigned src_offset;
+};
+
+struct nvfx_low_frequency_element {
+       unsigned idx;
+       unsigned vertex_buffer_index;
+       unsigned src_offset;
+        void (*fetch_rgba_float)(float *dst, const uint8_t *src, unsigned i, unsigned j);
+        unsigned ncomp;
+};
+
+struct nvfx_per_instance_element {
+       struct nvfx_low_frequency_element base;
+       unsigned instance_divisor;
+};
+
+struct nvfx_per_vertex_buffer_info
+{
+       unsigned vertex_buffer_index;
+       unsigned per_vertex_size;
+};
+
 struct nvfx_vtxelt_state {
        struct pipe_vertex_element pipe[16];
        unsigned num_elements;
+       unsigned vtxfmt[16];
+
+       unsigned num_per_vertex_buffer_infos;
+       struct nvfx_per_vertex_buffer_info per_vertex_buffer_info[16];
+
+       unsigned num_per_vertex;
+       struct nvfx_per_vertex_element per_vertex[16];
+
+       unsigned num_per_instance;
+       struct nvfx_per_instance_element per_instance[16];
+
+       unsigned num_constant;
+       struct nvfx_low_frequency_element constant[16];
+
+       boolean needs_translate;
+       struct translate* translate;
+
+       unsigned vertex_length;
+       unsigned max_vertices_per_packet;
 };
 
 struct nvfx_render_target {
@@ -127,8 +172,6 @@ struct nvfx_context {
        struct pipe_viewport_state viewport;
        struct pipe_framebuffer_state framebuffer;
        struct pipe_index_buffer idxbuf;
-       struct pipe_resource *idxbuf_buffer;
-       unsigned idxbuf_format;
        struct nvfx_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS];
        struct pipe_sampler_view *fragment_sampler_views[PIPE_MAX_SAMPLERS];
        unsigned nr_samplers;
@@ -137,8 +180,14 @@ struct nvfx_context {
        struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
        unsigned vtxbuf_nr;
        struct nvfx_vtxelt_state *vtxelt;
+       int base_vertex;
+       boolean use_index_buffer;
+       /* -1 = hardware input setup is outdated
+        * 0 = hardware input setup is for inline vertices
+        * 1 = hardware input setup is for hardware vertices
+        */
+       int use_vertex_buffers;
 
-       unsigned vbo_bo;
        unsigned hw_vtxelt_nr;
        uint8_t hw_samplers;
        uint32_t hw_txf[8];
@@ -180,11 +229,7 @@ extern void nvfx_clear(struct pipe_context *pipe, unsigned buffers,
 
 /* nvfx_draw.c */
 extern struct draw_stage *nvfx_draw_render_stage(struct nvfx_context *nvfx);
-extern void nvfx_draw_elements_swtnl(struct pipe_context *pipe,
-                                     struct pipe_resource *idxbuf,
-                                     unsigned ib_size, int ib_bias,
-                                     unsigned mode,
-                                     unsigned start, unsigned count);
+extern void nvfx_draw_vbo_swtnl(struct pipe_context *pipe, const struct pipe_draw_info* info);
 extern void nvfx_vtxfmt_validate(struct nvfx_context *nvfx);
 
 /* nvfx_fb.c */
@@ -245,17 +290,53 @@ extern boolean nvfx_state_validate_swtnl(struct nvfx_context *nvfx);
 extern void nvfx_state_emit(struct nvfx_context *nvfx);
 
 /* nvfx_transfer.c */
-extern void nvfx_init_transfer_functions(struct nvfx_context *nvfx);
+extern void nvfx_init_transfer_functions(struct pipe_context *pipe);
 
 /* nvfx_vbo.c */
 extern boolean nvfx_vbo_validate(struct nvfx_context *nvfx);
 extern void nvfx_vbo_relocate(struct nvfx_context *nvfx);
+extern void nvfx_idxbuf_validate(struct nvfx_context* nvfx);
+extern void nvfx_idxbuf_relocate(struct nvfx_context* nvfx);
 extern void nvfx_draw_vbo(struct pipe_context *pipe,
                           const struct pipe_draw_info *info);
+extern void nvfx_init_vbo_functions(struct nvfx_context *nvfx);
+extern unsigned nvfx_vertex_formats[];
 
 /* nvfx_vertprog.c */
 extern boolean nvfx_vertprog_validate(struct nvfx_context *nvfx);
 extern void nvfx_vertprog_destroy(struct nvfx_context *,
                                  struct nvfx_vertex_program *);
 
+/* nvfx_push.c */
+extern void nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info);
+
+/* must WAIT_RING(chan, ncomp + 1) or equivalent beforehand! */
+static inline void nvfx_emit_vtx_attr(struct nouveau_channel* chan, unsigned attrib, float* v, unsigned ncomp)
+{
+       switch (ncomp) {
+       case 4:
+               OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_4F_X(attrib), 4));
+               OUT_RING(chan, fui(v[0]));
+               OUT_RING(chan, fui(v[1]));
+               OUT_RING(chan,  fui(v[2]));
+               OUT_RING(chan,  fui(v[3]));
+               break;
+       case 3:
+               OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_3F_X(attrib), 3));
+               OUT_RING(chan,  fui(v[0]));
+               OUT_RING(chan,  fui(v[1]));
+               OUT_RING(chan,  fui(v[2]));
+               break;
+       case 2:
+               OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_2F_X(attrib), 2));
+               OUT_RING(chan,  fui(v[0]));
+               OUT_RING(chan,  fui(v[1]));
+               break;
+       case 1:
+               OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_1F(attrib), 1));
+               OUT_RING(chan,  fui(v[0]));
+               break;
+       }
+}
+
 #endif
index 22cff370b77e44a3851a64704a68a6e04f9f19a3..331e28418adc681d1a029a445b85b9ad575c8c61 100644 (file)
@@ -9,6 +9,7 @@
 #include "draw/draw_pipe.h"
 
 #include "nvfx_context.h"
+#include "nvfx_resource.h"
 
 /* Simple, but crappy, swtnl path, hopefully we wont need to hit this very
  * often at all.  Uses "quadro style" vertex submission + a fixed vertex
@@ -39,30 +40,21 @@ nvfx_render_vertex(struct nvfx_context *nvfx, const struct vertex_header *v)
                unsigned idx = nvfx->swtnl.draw[i];
                unsigned hw = nvfx->swtnl.hw[i];
 
+               WAIT_RING(chan, 5);
                switch (nvfx->swtnl.emit[i]) {
                case EMIT_OMIT:
                        break;
                case EMIT_1F:
-                       BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_1F(hw), 1);
-                       OUT_RING  (chan, fui(v->data[idx][0]));
+                       nvfx_emit_vtx_attr(chan, hw, v->data[idx], 1);
                        break;
                case EMIT_2F:
-                       BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_2F_X(hw), 2);
-                       OUT_RING  (chan, fui(v->data[idx][0]));
-                       OUT_RING  (chan, fui(v->data[idx][1]));
+                       nvfx_emit_vtx_attr(chan, hw, v->data[idx], 2);
                        break;
                case EMIT_3F:
-                       BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_3F_X(hw), 3);
-                       OUT_RING  (chan, fui(v->data[idx][0]));
-                       OUT_RING  (chan, fui(v->data[idx][1]));
-                       OUT_RING  (chan, fui(v->data[idx][2]));
+                       nvfx_emit_vtx_attr(chan, hw, v->data[idx], 3);
                        break;
                case EMIT_4F:
-                       BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_4F_X(hw), 4);
-                       OUT_RING  (chan, fui(v->data[idx][0]));
-                       OUT_RING  (chan, fui(v->data[idx][1]));
-                       OUT_RING  (chan, fui(v->data[idx][2]));
-                       OUT_RING  (chan, fui(v->data[idx][3]));
+                       nvfx_emit_vtx_attr(chan, hw, v->data[idx], 4);
                        break;
                case 0xff:
                        BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_4F_X(hw), 4);
@@ -231,15 +223,9 @@ nvfx_draw_render_stage(struct nvfx_context *nvfx)
 }
 
 void
-nvfx_draw_elements_swtnl(struct pipe_context *pipe,
-                        struct pipe_resource *idxbuf,
-                        unsigned idxbuf_size, int idxbuf_bias,
-                        unsigned mode, unsigned start, unsigned count)
+nvfx_draw_vbo_swtnl(struct pipe_context *pipe, const struct pipe_draw_info* info)
 {
        struct nvfx_context *nvfx = nvfx_context(pipe);
-       struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS];
-       struct pipe_transfer *ib_transfer = NULL;
-       struct pipe_transfer *cb_transfer = NULL;
        unsigned i;
        void *map;
 
@@ -247,18 +233,15 @@ nvfx_draw_elements_swtnl(struct pipe_context *pipe,
                return;
        nvfx_state_emit(nvfx);
 
+       /* these must be passed without adding the offsets */
        for (i = 0; i < nvfx->vtxbuf_nr; i++) {
-               map = pipe_buffer_map(pipe, nvfx->vtxbuf[i].buffer,
-                                      PIPE_TRANSFER_READ,
-                                     &vb_transfer[i]);
+               map = nvfx_buffer(nvfx->vtxbuf[i].buffer)->data;
                draw_set_mapped_vertex_buffer(nvfx->draw, i, map);
        }
 
-       if (idxbuf) {
-               map = pipe_buffer_map(pipe, idxbuf,
-                                     PIPE_TRANSFER_READ,
-                                     &ib_transfer);
-               draw_set_mapped_element_buffer(nvfx->draw, idxbuf_size, idxbuf_bias, map);
+       if (info->indexed) {
+               map = nvfx_buffer(nvfx->idxbuf.buffer)->data + nvfx->idxbuf.offset;
+               draw_set_mapped_element_buffer_range(nvfx->draw, nvfx->idxbuf.index_size, info->index_bias, info->min_index, info->max_index, map);
        } else {
                draw_set_mapped_element_buffer(nvfx->draw, 0, 0, NULL);
        }
@@ -266,28 +249,14 @@ nvfx_draw_elements_swtnl(struct pipe_context *pipe,
        if (nvfx->constbuf[PIPE_SHADER_VERTEX]) {
                const unsigned nr = nvfx->constbuf_nr[PIPE_SHADER_VERTEX];
 
-               map = pipe_buffer_map(pipe,
-                                     nvfx->constbuf[PIPE_SHADER_VERTEX],
-                                     PIPE_TRANSFER_READ,
-                                     &cb_transfer);
+               map = nvfx_buffer(nvfx->constbuf[PIPE_SHADER_VERTEX])->data;
                draw_set_mapped_constant_buffer(nvfx->draw, PIPE_SHADER_VERTEX, 0,
                                                 map, nr);
        }
 
-       draw_arrays(nvfx->draw, mode, start, count);
-
-       for (i = 0; i < nvfx->vtxbuf_nr; i++)
-               pipe_buffer_unmap(pipe, nvfx->vtxbuf[i].buffer, vb_transfer[i]);
-
-       if (idxbuf)
-               pipe_buffer_unmap(pipe, idxbuf, ib_transfer);
-
-       if (nvfx->constbuf[PIPE_SHADER_VERTEX])
-               pipe_buffer_unmap(pipe, nvfx->constbuf[PIPE_SHADER_VERTEX],
-                                 cb_transfer);
+       draw_arrays_instanced(nvfx->draw, info->mode, info->start, info->count, info->start_instance, info->instance_count);
 
        draw_flush(nvfx->draw);
-       pipe->flush(pipe, 0, NULL);
 }
 
 static INLINE void
index ee41f03b9b8fd7064f64ff75efeb63f266c427c5..ae4fe3aa262cf4ebb867335a04ca1d96130a9d93 100644 (file)
@@ -9,6 +9,7 @@
 
 #include "nvfx_context.h"
 #include "nvfx_shader.h"
+#include "nvfx_resource.h"
 
 #define MAX_CONSTS 128
 #define MAX_IMM 32
@@ -925,10 +926,7 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx)
 
                if(nvfx->constbuf[PIPE_SHADER_FRAGMENT]) {
                        struct pipe_resource* constbuf = nvfx->constbuf[PIPE_SHADER_FRAGMENT];
-                       // TODO: avoid using transfers, just directly the buffer
-                       struct pipe_transfer* transfer;
-                       // TODO: does this check make any sense, or should we do this unconditionally?
-                       uint32_t* map = pipe_buffer_map(&nvfx->pipe, constbuf, PIPE_TRANSFER_READ, &transfer);
+                       uint32_t* map = (uint32_t*)nvfx_buffer(constbuf)->data;
                        uint32_t* fpmap = (uint32_t*)((char*)fp->fpbo->bo->map + offset);
                        uint32_t* buf = (uint32_t*)((char*)fp->fpbo->insn + offset);
                        int i;
@@ -942,7 +940,6 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx)
                                        nvfx_fp_memcpy(&fpmap[off], &map[idx], 4 * sizeof(uint32_t));
                                }
                        }
-                       pipe_buffer_unmap(&nvfx->pipe, constbuf, transfer);
                }
        }
 
diff --git a/src/gallium/drivers/nvfx/nvfx_push.c b/src/gallium/drivers/nvfx/nvfx_push.c
new file mode 100644 (file)
index 0000000..52e891c
--- /dev/null
@@ -0,0 +1,402 @@
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "util/u_split_prim.h"
+#include "translate/translate.h"
+
+#include "nvfx_context.h"
+#include "nvfx_resource.h"
+
+struct push_context {
+       struct nouveau_channel* chan;
+
+       void *idxbuf;
+       int32_t idxbias;
+
+       float edgeflag;
+       int edgeflag_attr;
+
+       unsigned vertex_length;
+       unsigned max_vertices_per_packet;
+
+       struct translate* translate;
+};
+
+static void
+emit_edgeflag(void *priv, boolean enabled)
+{
+       struct push_context* ctx = priv;
+       struct nouveau_channel *chan = ctx->chan;
+
+       OUT_RING(chan, RING_3D(NV34TCL_EDGEFLAG_ENABLE, 1));
+       OUT_RING(chan, enabled ? 1 : 0);
+}
+
+static void
+emit_vertices_lookup8(void *priv, unsigned start, unsigned count)
+{
+        struct push_context *ctx = priv;
+        uint8_t* elts = (uint8_t*)ctx->idxbuf + start;
+
+        while(count)
+        {
+                unsigned push = MIN2(count, ctx->max_vertices_per_packet);
+                unsigned length = push * ctx->vertex_length;
+
+                OUT_RING(ctx->chan, RING_3D_NI(NV34TCL_VERTEX_DATA, length));
+                ctx->translate->run_elts8(ctx->translate, elts, push, 0, ctx->chan->cur);
+                ctx->chan->cur += length;
+
+                count -= push;
+                elts += push;
+        }
+}
+
+static void
+emit_vertices_lookup16(void *priv, unsigned start, unsigned count)
+{
+       struct push_context *ctx = priv;
+        uint16_t* elts = (uint16_t*)ctx->idxbuf + start;
+
+        while(count)
+        {
+                unsigned push = MIN2(count, ctx->max_vertices_per_packet);
+                unsigned length = push * ctx->vertex_length;
+
+                OUT_RING(ctx->chan, RING_3D_NI(NV34TCL_VERTEX_DATA, length));
+                ctx->translate->run_elts16(ctx->translate, elts, push, 0, ctx->chan->cur);
+                ctx->chan->cur += length;
+
+                count -= push;
+                elts += push;
+        }
+}
+
+static void
+emit_vertices_lookup32(void *priv, unsigned start, unsigned count)
+{
+        struct push_context *ctx = priv;
+        uint32_t* elts = (uint32_t*)ctx->idxbuf + start;
+
+        while(count)
+        {
+                unsigned push = MIN2(count, ctx->max_vertices_per_packet);
+                unsigned length = push * ctx->vertex_length;
+
+                OUT_RING(ctx->chan, RING_3D_NI(NV34TCL_VERTEX_DATA, length));
+                ctx->translate->run_elts(ctx->translate, elts, push, 0, ctx->chan->cur);
+                ctx->chan->cur += length;
+
+                count -= push;
+                elts += push;
+        }
+}
+
+static void
+emit_vertices(void *priv, unsigned start, unsigned count)
+{
+        struct push_context *ctx = priv;
+
+        while(count)
+        {
+               unsigned push = MIN2(count, ctx->max_vertices_per_packet);
+               unsigned length = push * ctx->vertex_length;
+
+               OUT_RING(ctx->chan, RING_3D_NI(NV34TCL_VERTEX_DATA, length));
+               ctx->translate->run(ctx->translate, start, push, 0, ctx->chan->cur);
+               ctx->chan->cur += length;
+
+               count -= push;
+               start += push;
+        }
+}
+
+static void
+emit_ranges(void* priv, unsigned start, unsigned vc, unsigned reg)
+{
+       struct push_context* ctx = priv;
+       struct nouveau_channel *chan = ctx->chan;
+       unsigned nr = (vc & 0xff);
+       if (nr) {
+               OUT_RING(chan, RING_3D(reg, 1));
+               OUT_RING  (chan, ((nr - 1) << 24) | start);
+               start += nr;
+       }
+
+       nr = vc >> 8;
+       while (nr) {
+               unsigned push = nr > 2047 ? 2047 : nr;
+
+               nr -= push;
+
+               OUT_RING(chan, RING_3D_NI(reg, push));
+               while (push--) {
+                       OUT_RING(chan, ((0x100 - 1) << 24) | start);
+                       start += 0x100;
+               }
+       }
+}
+
+static void
+emit_ib_ranges(void* priv, unsigned start, unsigned vc)
+{
+       emit_ranges(priv, start, vc, NV34TCL_VB_INDEX_BATCH);
+}
+
+static void
+emit_vb_ranges(void* priv, unsigned start, unsigned vc)
+{
+       emit_ranges(priv, start, vc, NV34TCL_VB_VERTEX_BATCH);
+}
+
+static INLINE void
+emit_elt8(void* priv, unsigned start, unsigned vc)
+{
+       struct push_context* ctx = priv;
+       struct nouveau_channel *chan = ctx->chan;
+       uint8_t *elts = (uint8_t *)ctx->idxbuf + start;
+       int idxbias = ctx->idxbias;
+
+       if (vc & 1) {
+               OUT_RING(chan, RING_3D(NV34TCL_VB_ELEMENT_U32, 1));
+               OUT_RING  (chan, elts[0]);
+               elts++; vc--;
+       }
+
+       while (vc) {
+               unsigned i;
+               unsigned push = MIN2(vc, 2047 * 2);
+
+               OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U16, push >> 1));
+               for (i = 0; i < push; i+=2)
+                       OUT_RING(chan, ((elts[i+1] + idxbias) << 16) | (elts[i] + idxbias));
+
+               vc -= push;
+               elts += push;
+       }
+}
+
+static INLINE void
+emit_elt16(void* priv, unsigned start, unsigned vc)
+{
+       struct push_context* ctx = priv;
+       struct nouveau_channel *chan = ctx->chan;
+       uint16_t *elts = (uint16_t *)ctx->idxbuf + start;
+       int idxbias = ctx->idxbias;
+
+       if (vc & 1) {
+               OUT_RING(chan, RING_3D(NV34TCL_VB_ELEMENT_U32, 1));
+               OUT_RING  (chan, elts[0]);
+               elts++; vc--;
+       }
+
+       while (vc) {
+               unsigned i;
+               unsigned push = MIN2(vc, 2047 * 2);
+
+               OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U16, push >> 1));
+               for (i = 0; i < push; i+=2)
+                       OUT_RING(chan, ((elts[i+1] + idxbias) << 16) | (elts[i] + idxbias));
+
+               vc -= push;
+               elts += push;
+       }
+}
+
+static INLINE void
+emit_elt32(void* priv, unsigned start, unsigned vc)
+{
+       struct push_context* ctx = priv;
+       struct nouveau_channel *chan = ctx->chan;
+       uint32_t *elts = (uint32_t *)ctx->idxbuf + start;
+       int idxbias = ctx->idxbias;
+
+       while (vc) {
+               unsigned push = MIN2(vc, 2047);
+
+               OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U32, push));
+               assert(AVAIL_RING(chan) >= push);
+               if(idxbias)
+               {
+                       for(unsigned i = 0; i < push; ++i)
+                               OUT_RING(chan, elts[i] + idxbias);
+               }
+               else
+                       OUT_RINGp(chan, elts, push);
+
+               vc -= push;
+               elts += push;
+       }
+}
+
+void
+nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
+{
+       struct nvfx_context *nvfx = nvfx_context(pipe);
+       struct nouveau_channel *chan = nvfx->screen->base.channel;
+       struct push_context ctx;
+       struct util_split_prim s;
+       unsigned instances_left = info->instance_count;
+       int vtx_value;
+       unsigned hw_mode = nvgl_primitive(info->mode);
+       int i;
+       struct
+       {
+               uint8_t* map;
+               unsigned step;
+       } per_instance[16];
+       unsigned p_overhead = 0
+                       + 4 /* begin/end */
+                       + 4; /* potential edgeflag enable/disable */
+
+       ctx.chan = nvfx->screen->base.channel;
+       ctx.translate = nvfx->vtxelt->translate;
+       ctx.idxbuf = NULL;
+       ctx.vertex_length = nvfx->vtxelt->vertex_length;
+       ctx.max_vertices_per_packet = nvfx->vtxelt->max_vertices_per_packet;
+       ctx.edgeflag = 0.5f;
+       // TODO: figure out if we really want to handle this, and do so in that case
+       ctx.edgeflag_attr = 0xff; // nvfx->vertprog->cfg.edgeflag_in;
+
+       if(!nvfx->use_vertex_buffers)
+       {
+               for(i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; ++i)
+               {
+                       struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i];
+                       struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index];
+                       uint8_t* data = nvfx_buffer(vb->buffer)->data + vb->buffer_offset;
+                       if(info->indexed)
+                               data += info->index_bias * vb->stride;
+                       ctx.translate->set_buffer(ctx.translate, i, data, vb->stride, ~0);
+               }
+
+               if(ctx.edgeflag_attr < 16)
+                       vtx_value = -(ctx.vertex_length + 3);  /* vertex data and edgeflag header and value */
+               else
+               {
+                       p_overhead += 1; /* initial vertex_data header */
+                       vtx_value = -ctx.vertex_length;  /* vertex data and edgeflag header and value */
+               }
+
+               if (info->indexed) {
+                       // XXX: this case and is broken and probably need a new VTX_ATTR push path
+                       if (nvfx->idxbuf.index_size == 1)
+                               s.emit = emit_vertices_lookup8;
+                       else if (nvfx->idxbuf.index_size == 2)
+                               s.emit = emit_vertices_lookup16;
+                       else
+                               s.emit = emit_vertices_lookup32;
+               } else
+                       s.emit = emit_vertices;
+       }
+       else
+       {
+               if(!info->indexed || nvfx->use_index_buffer)
+               {
+                       s.emit = info->indexed ? emit_ib_ranges : emit_vb_ranges;
+                       p_overhead += 3;
+                       vtx_value = 0;
+               }
+               else if (nvfx->idxbuf.index_size == 4)
+               {
+                       s.emit = emit_elt32;
+                       p_overhead += 1;
+                       vtx_value = 8;
+               }
+               else
+               {
+                       s.emit = (nvfx->idxbuf.index_size == 2) ? emit_elt16 : emit_elt8;
+                       p_overhead += 3;
+                       vtx_value = 7;
+               }
+       }
+
+       ctx.idxbias = info->index_bias;
+       if(nvfx->use_vertex_buffers)
+               ctx.idxbias -= nvfx->base_vertex;
+
+       /* map index buffer, if present */
+       if (info->indexed && !nvfx->use_index_buffer)
+               ctx.idxbuf = nvfx_buffer(nvfx->idxbuf.buffer)->data + nvfx->idxbuf.offset;
+
+       s.priv = &ctx;
+       s.edge = emit_edgeflag;
+
+       for (i = 0; i < nvfx->vtxelt->num_per_instance; ++i)
+       {
+               struct nvfx_per_instance_element *ve = &nvfx->vtxelt->per_instance[i];
+               struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->base.vertex_buffer_index];
+               float v[4];
+               per_instance[i].step = info->start_instance % ve->instance_divisor;
+               per_instance[i].map = nvfx_buffer(vb->buffer)->data + vb->buffer_offset + ve->base.src_offset;
+
+               nvfx->vtxelt->per_instance[i].base.fetch_rgba_float(v, per_instance[i].map, 0, 0);
+
+               WAIT_RING(chan, 5);
+               nvfx_emit_vtx_attr(chan, nvfx->vtxelt->per_instance[i].base.idx, v, nvfx->vtxelt->per_instance[i].base.ncomp);
+       }
+
+       /* per-instance loop */
+       while (instances_left--) {
+               int max_verts;
+               boolean done;
+
+               util_split_prim_init(&s, info->mode, info->start, info->count);
+               nvfx_state_emit(nvfx);
+               for(;;) {
+                       max_verts  = AVAIL_RING(chan);
+                       max_verts -= p_overhead;
+
+                       /* if vtx_value < 0, each vertex is -vtx_value words long
+                        * otherwise, each vertex is 2^(vtx_value) / 255 words long (this is an approximation)
+                        */
+                       if(vtx_value < 0)
+                       {
+                               max_verts /= -vtx_value;
+                               max_verts -= (max_verts >> 10); /* vertex data headers */
+                       }
+                       else
+                       {
+                               if(max_verts >= (1 << 23)) /* avoid overflow here */
+                                       max_verts = (1 << 23);
+                               max_verts = (max_verts * 255) >> vtx_value;
+                       }
+
+                       //printf("avail %u max_verts %u\n", AVAIL_RING(chan), max_verts);
+
+                       if(max_verts >= 16)
+                       {
+                               OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
+                               OUT_RING(chan, hw_mode);
+                               done = util_split_prim_next(&s, max_verts);
+                               OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
+                               OUT_RING(chan, 0);
+
+                               if(done)
+                                       break;
+                       }
+
+                       FIRE_RING(chan);
+                       nvfx_state_emit(nvfx);
+               }
+
+               /* set data for the next instance, if any changed */
+               for (i = 0; i < nvfx->vtxelt->num_per_instance; ++i)
+               {
+                       struct nvfx_per_instance_element *ve = &nvfx->vtxelt->per_instance[i];
+                       struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->base.vertex_buffer_index];
+
+                       if(++per_instance[i].step == ve->instance_divisor)
+                       {
+                               float v[4];
+                               per_instance[i].map += vb->stride;
+                               per_instance[i].step = 0;
+
+                               nvfx->vtxelt->per_instance[i].base.fetch_rgba_float(v, per_instance[i].map, 0, 0);
+                               WAIT_RING(chan, 5);
+                               nvfx_emit_vtx_attr(chan, nvfx->vtxelt->per_instance[i].base.idx, v, nvfx->vtxelt->per_instance[i].base.ncomp);
+                       }
+               }
+       }
+}
index 1c921b471004649995d39e25c13d4b44c1e38b88..3a46e0a7a5788f0890f3ffe15b20700e6aa86c91 100644 (file)
@@ -59,12 +59,6 @@ nvfx_resource_get_handle(struct pipe_screen *pscreen,
 void
 nvfx_init_resource_functions(struct pipe_context *pipe)
 {
-       pipe->get_transfer = nvfx_transfer_new;
-       pipe->transfer_map = nvfx_transfer_map;
-       pipe->transfer_flush_region = u_default_transfer_flush_region;
-       pipe->transfer_unmap = nvfx_transfer_unmap;
-       pipe->transfer_destroy = util_staging_transfer_destroy;
-       pipe->transfer_inline_write = u_default_transfer_inline_write;
        pipe->is_resource_referenced = nvfx_resource_is_referenced;
 }
 
index ff86f6d9cb6809582fa3ea8c4f2b55f6c46e33d0..583be4de2ae2596bca94c091fd1163fd701254b3 100644 (file)
@@ -17,8 +17,23 @@ struct nvfx_resource {
        struct nouveau_bo *bo;
 };
 
+static INLINE
+struct nvfx_resource *nvfx_resource(struct pipe_resource *resource)
+{
+       return (struct nvfx_resource *)resource;
+}
+
 #define NVFX_RESOURCE_FLAG_LINEAR (PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
+#define NVFX_RESOURCE_FLAG_USER (PIPE_RESOURCE_FLAG_DRV_PRIV << 1)
+
+/* is resource mapped into the GPU's address space (i.e. VRAM or GART) ? */
+static INLINE boolean
+nvfx_resource_mapped_by_gpu(struct pipe_resource *resource)
+{
+   return nvfx_resource(resource)->bo->handle;
+}
 
+/* is resource in VRAM? */
 static inline int
 nvfx_resource_on_gpu(struct pipe_resource* pr)
 {
@@ -63,12 +78,6 @@ struct nvfx_surface {
        struct nvfx_miptree* temp;
 };
 
-static INLINE 
-struct nvfx_resource *nvfx_resource(struct pipe_resource *resource)
-{
-       return (struct nvfx_resource *)resource;
-}
-
 static INLINE struct nouveau_bo *
 nvfx_surface_buffer(struct pipe_surface *surf)
 {
@@ -106,22 +115,6 @@ nvfx_miptree_from_handle(struct pipe_screen *pscreen,
                         const struct pipe_resource *template,
                         struct winsys_handle *whandle);
 
-struct pipe_resource *
-nvfx_buffer_create(struct pipe_screen *pscreen,
-                  const struct pipe_resource *template);
-
-void
-nvfx_buffer_destroy(struct pipe_screen *pscreen,
-                    struct pipe_resource *presource);
-
-struct pipe_resource *
-nvfx_user_buffer_create(struct pipe_screen *screen,
-                       void *ptr,
-                       unsigned bytes,
-                       unsigned usage);
-
-
-
 void
 nvfx_miptree_surface_del(struct pipe_surface *ps);
 
@@ -173,4 +166,58 @@ nvfx_surface_create_temp(struct pipe_context* pipe, struct pipe_surface* surf);
 void
 nvfx_surface_flush(struct pipe_context* pipe, struct pipe_surface* surf);
 
+struct nvfx_buffer
+{
+       struct nvfx_resource base;
+       uint8_t* data;
+       unsigned size;
+
+       /* the range of data not yet uploaded to the GPU bo */
+       unsigned dirty_begin;
+       unsigned dirty_end;
+
+       /* whether all transfers were unsynchronized */
+       boolean dirty_unsynchronized;
+
+       /* whether it would have been profitable to upload
+        * the latest updated data to the GPU immediately */
+       boolean last_update_static;
+
+       /* how many bytes we need to draw before we deem
+        * the buffer to be static
+        */
+       long long bytes_to_draw_until_static;
+};
+
+static inline struct nvfx_buffer* nvfx_buffer(struct pipe_resource* pr)
+{
+       return (struct nvfx_buffer*)pr;
+}
+
+/* this is an heuristic to determine whether we are better off uploading the
+ * buffer to the GPU, or just continuing pushing it on the FIFO
+ */
+static inline boolean nvfx_buffer_seems_static(struct nvfx_buffer* buffer)
+{
+       return buffer->last_update_static
+               || buffer->bytes_to_draw_until_static < 0;
+}
+
+struct pipe_resource *
+nvfx_buffer_create(struct pipe_screen *pscreen,
+                  const struct pipe_resource *template);
+
+void
+nvfx_buffer_destroy(struct pipe_screen *pscreen,
+                    struct pipe_resource *presource);
+
+struct pipe_resource *
+nvfx_user_buffer_create(struct pipe_screen *screen,
+                       void *ptr,
+                       unsigned bytes,
+                       unsigned usage);
+
+void
+nvfx_buffer_upload(struct nvfx_buffer* buffer);
+
 #endif
index a1b8361a9a47595f35e6d44d22b022cb3e76a6d9..7e3caf8d2e372fcfe38f2ce799c7f6023a3d47a4 100644 (file)
@@ -163,11 +163,11 @@ nvfx_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_cap param)
 }
 
 static boolean
-nvfx_screen_surface_format_supported(struct pipe_screen *pscreen,
+nvfx_screen_is_format_supported(struct pipe_screen *pscreen,
                                     enum pipe_format format,
                                     enum pipe_texture_target target,
                                     unsigned sample_count,
-                                    unsigned tex_usage, unsigned geom_flags)
+                                    unsigned bind, unsigned geom_flags)
 {
        struct nvfx_screen *screen = nvfx_screen(pscreen);
        struct pipe_surface *front = ((struct nouveau_winsys *) pscreen->winsys)->front;
@@ -175,7 +175,7 @@ nvfx_screen_surface_format_supported(struct pipe_screen *pscreen,
         if (sample_count > 1)
                return FALSE;
 
-       if (tex_usage & PIPE_BIND_RENDER_TARGET) {
+       if (bind & PIPE_BIND_RENDER_TARGET) {
                switch (format) {
                case PIPE_FORMAT_B8G8R8A8_UNORM:
                case PIPE_FORMAT_B8G8R8X8_UNORM:
@@ -186,7 +186,7 @@ nvfx_screen_surface_format_supported(struct pipe_screen *pscreen,
                }
        }
 
-       if (tex_usage & PIPE_BIND_DEPTH_STENCIL) {
+       if (bind & PIPE_BIND_DEPTH_STENCIL) {
                switch (format) {
                case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
                case PIPE_FORMAT_X8Z24_UNORM:
@@ -201,7 +201,7 @@ nvfx_screen_surface_format_supported(struct pipe_screen *pscreen,
                }
        }
 
-       if (tex_usage & PIPE_BIND_SAMPLER_VIEW) {
+       if (bind & PIPE_BIND_SAMPLER_VIEW) {
                struct nvfx_texture_format* tf = &nvfx_texture_formats[format];
                if(util_format_is_s3tc(format) && !util_format_s3tc_enabled)
                        return FALSE;
@@ -218,6 +218,22 @@ nvfx_screen_surface_format_supported(struct pipe_screen *pscreen,
                }
        }
 
+       // note that we do actually support everything through translate
+       if (bind & PIPE_BIND_VERTEX_BUFFER) {
+               unsigned type = nvfx_vertex_formats[format];
+               if(!type)
+                       return FALSE;
+       }
+
+       if (bind & PIPE_BIND_INDEX_BUFFER) {
+               // 8-bit indices supported, but not in hardware index buffer
+               if(format != PIPE_FORMAT_R16_USCALED && format != PIPE_FORMAT_R32_USCALED)
+                       return FALSE;
+       }
+
+       if(bind & PIPE_BIND_STREAM_OUTPUT)
+               return FALSE;
+
        return TRUE;
 }
 
@@ -387,7 +403,7 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
        pscreen->destroy = nvfx_screen_destroy;
        pscreen->get_param = nvfx_screen_get_param;
        pscreen->get_paramf = nvfx_screen_get_paramf;
-       pscreen->is_format_supported = nvfx_screen_surface_format_supported;
+       pscreen->is_format_supported = nvfx_screen_is_format_supported;
        pscreen->context_create = nvfx_create;
 
        switch (dev->chipset & 0xf0) {
@@ -419,6 +435,11 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
        }
 
        screen->force_swtnl = debug_get_bool_option("NOUVEAU_SWTNL", FALSE);
+       screen->trace_draw = debug_get_bool_option("NVFX_TRACE_DRAW", FALSE);
+
+       screen->buffer_allocation_cost = debug_get_num_option("NVFX_BUFFER_ALLOCATION_COST", 16384);
+       screen->inline_cost_per_hardware_cost = atof(debug_get_option("NVFX_INLINE_COST_PER_HARDWARE_COST", "1.0"));
+       screen->static_reuse_threshold = atof(debug_get_option("NVFX_STATIC_REUSE_THRESHOLD", "2.0"));
 
        screen->vertex_buffer_reloc_flags = nvfx_screen_get_vertex_buffer_flags(screen);
 
index 4dedbe9cb40ee0ff7e61a551006f9c325295a76e..473a1127752d440035716a929a6169b8c1c2a2fc 100644 (file)
@@ -16,6 +16,7 @@ struct nvfx_screen {
 
        unsigned is_nv4x; /* either 0 or ~0 */
        boolean force_swtnl;
+       boolean trace_draw;
        unsigned vertex_buffer_reloc_flags;
        unsigned index_buffer_reloc_flags;
 
@@ -33,6 +34,18 @@ struct nvfx_screen {
        struct nouveau_resource *vp_data_heap;
 
        struct nv04_2d_context* eng2d;
+
+       /* Once the amount of bytes drawn from the buffer reaches the updated size times this value,
+        * we will assume that the buffer will be drawn an huge number of times before the
+        * next modification
+        */
+       float static_reuse_threshold;
+
+       /* Cost of allocating a buffer in terms of the cost of copying a byte to an hardware buffer */
+       unsigned buffer_allocation_cost;
+
+       /* inline_cost/hardware_cost conversion ration */
+       float inline_cost_per_hardware_cost;
 };
 
 static INLINE struct nvfx_screen *
index d459f9a88013ccabc43404275abe71e4951f71cb..25d29720a853c140b5ed8537b2f4b3c30bfec078 100644 (file)
@@ -441,83 +441,6 @@ nvfx_set_viewport_state(struct pipe_context *pipe,
        nvfx->draw_dirty |= NVFX_NEW_VIEWPORT;
 }
 
-static void
-nvfx_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
-                       const struct pipe_vertex_buffer *vb)
-{
-       struct nvfx_context *nvfx = nvfx_context(pipe);
-
-       for(unsigned i = 0; i < count; ++i)
-       {
-               pipe_resource_reference(&nvfx->vtxbuf[i].buffer, vb[i].buffer);
-               nvfx->vtxbuf[i].buffer_offset = vb[i].buffer_offset;
-               nvfx->vtxbuf[i].max_index = vb[i].max_index;
-               nvfx->vtxbuf[i].stride = vb[i].stride;
-       }
-
-       for(unsigned i = count; i < nvfx->vtxbuf_nr; ++i)
-               pipe_resource_reference(&nvfx->vtxbuf[i].buffer, 0);
-
-       nvfx->vtxbuf_nr = count;
-
-       nvfx->dirty |= NVFX_NEW_ARRAYS;
-       nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
-}
-
-static void
-nvfx_set_index_buffer(struct pipe_context *pipe,
-                     const struct pipe_index_buffer *ib)
-{
-       struct nvfx_context *nvfx = nvfx_context(pipe);
-
-       /* TODO make this more like a state */
-
-       if(ib)
-       {
-               pipe_resource_reference(&nvfx->idxbuf.buffer, ib->buffer);
-               nvfx->idxbuf.index_size = ib->index_size;
-               nvfx->idxbuf.offset = ib->offset;
-       }
-       else
-       {
-               pipe_resource_reference(&nvfx->idxbuf.buffer, 0);
-               nvfx->idxbuf.index_size = 0;
-               nvfx->idxbuf.offset = 0;
-       }
-}
-
-static void *
-nvfx_vtxelts_state_create(struct pipe_context *pipe,
-                         unsigned num_elements,
-                         const struct pipe_vertex_element *elements)
-{
-       struct nvfx_vtxelt_state *cso = CALLOC_STRUCT(nvfx_vtxelt_state);
-
-       assert(num_elements < 16); /* not doing fallbacks yet */
-       cso->num_elements = num_elements;
-       memcpy(cso->pipe, elements, num_elements * sizeof(*elements));
-
-/*     nvfx_vtxelt_construct(cso);*/
-
-       return (void *)cso;
-}
-
-static void
-nvfx_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso)
-{
-       FREE(hwcso);
-}
-
-static void
-nvfx_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso)
-{
-       struct nvfx_context *nvfx = nvfx_context(pipe);
-
-       nvfx->vtxelt = hwcso;
-       nvfx->dirty |= NVFX_NEW_ARRAYS;
-       /*nvfx->draw_dirty |= NVFX_NEW_ARRAYS;*/
-}
-
 void
 nvfx_init_state_functions(struct nvfx_context *nvfx)
 {
@@ -553,11 +476,4 @@ nvfx_init_state_functions(struct nvfx_context *nvfx)
        nvfx->pipe.set_polygon_stipple = nvfx_set_polygon_stipple;
        nvfx->pipe.set_scissor_state = nvfx_set_scissor_state;
        nvfx->pipe.set_viewport_state = nvfx_set_viewport_state;
-
-       nvfx->pipe.create_vertex_elements_state = nvfx_vtxelts_state_create;
-       nvfx->pipe.delete_vertex_elements_state = nvfx_vtxelts_state_delete;
-       nvfx->pipe.bind_vertex_elements_state = nvfx_vtxelts_state_bind;
-
-       nvfx->pipe.set_vertex_buffers = nvfx_set_vertex_buffers;
-       nvfx->pipe.set_index_buffer = nvfx_set_index_buffer;
 }
index dc70f3de87020acf01b9a7ee6451a228ecf0f5fa..b9d189779197b57ac5d496b57a1311629419ba50 100644 (file)
@@ -8,6 +8,7 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)
 {
        struct nouveau_channel* chan = nvfx->screen->base.channel;
        unsigned dirty;
+       unsigned still_dirty = 0;
        int all_swizzled = -1;
        boolean flush_tex_cache = FALSE;
 
@@ -52,11 +53,19 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)
                                return FALSE;
                }
 
-               if(dirty & (NVFX_NEW_ARRAYS))
+               if(dirty & NVFX_NEW_ARRAYS)
                {
                        if(!nvfx_vbo_validate(nvfx))
                                return FALSE;
                }
+
+               if(dirty & NVFX_NEW_INDEX)
+               {
+                       if(nvfx->use_index_buffer)
+                               nvfx_idxbuf_validate(nvfx);
+                       else
+                               still_dirty = NVFX_NEW_INDEX;
+               }
        }
        else
        {
@@ -64,7 +73,7 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)
                if(dirty & (NVFX_NEW_VERTPROG | NVFX_NEW_UCP))
                        nvfx_vertprog_validate(nvfx);
 
-               if(dirty & (NVFX_NEW_ARRAYS | NVFX_NEW_FRAGPROG))
+               if(dirty & (NVFX_NEW_ARRAYS | NVFX_NEW_INDEX | NVFX_NEW_FRAGPROG))
                        nvfx_vtxfmt_validate(nvfx);
        }
 
@@ -118,7 +127,24 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)
                        OUT_RING(chan, 1);
                }
        }
-       nvfx->dirty = 0;
+
+       nvfx->dirty = dirty & still_dirty;
+
+       unsigned render_temps = nvfx->state.render_temps;
+       if(render_temps)
+       {
+               for(int i = 0; i < nvfx->framebuffer.nr_cbufs; ++i)
+               {
+                       if(render_temps & (1 << i))
+                               util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(nvfx->framebuffer.cbufs[i]),
+                                               (struct util_dirty_surface*)nvfx->framebuffer.cbufs[i]);
+               }
+
+               if(render_temps & 0x80)
+                       util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(nvfx->framebuffer.zsbuf),
+                                       (struct util_dirty_surface*)nvfx->framebuffer.zsbuf);
+       }
+
        return TRUE;
 }
 
@@ -137,21 +163,6 @@ nvfx_state_emit(struct nvfx_context *nvfx)
              ;
        MARK_RING(chan, max_relocs * 2, max_relocs * 2);
        nvfx_state_relocate(nvfx);
-
-       unsigned render_temps = nvfx->state.render_temps;
-       if(render_temps)
-       {
-               for(int i = 0; i < nvfx->framebuffer.nr_cbufs; ++i)
-               {
-                       if(render_temps & (1 << i))
-                               util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(nvfx->framebuffer.cbufs[i]),
-                                               (struct util_dirty_surface*)nvfx->framebuffer.cbufs[i]);
-               }
-
-               if(render_temps & 0x80)
-                       util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(nvfx->framebuffer.zsbuf),
-                                       (struct util_dirty_surface*)nvfx->framebuffer.zsbuf);
-       }
 }
 
 void
@@ -161,7 +172,11 @@ nvfx_state_relocate(struct nvfx_context *nvfx)
        nvfx_fragtex_relocate(nvfx);
        nvfx_fragprog_relocate(nvfx);
        if (nvfx->render_mode == HW)
+       {
                nvfx_vbo_relocate(nvfx);
+               if(nvfx->use_index_buffer)
+                       nvfx_idxbuf_relocate(nvfx);
+       }
 }
 
 boolean
index 80b0f21575fc7280ebf67a3f0fb22c02d6e786dd..28bbd36c2e81ae0d0936b6f3014ea85f6360e51f 100644 (file)
@@ -1,6 +1,5 @@
 #include "nvfx_context.h"
 #include "nvfx_resource.h"
-#include "nouveau/nouveau_util.h"
 #include "util/u_format.h"
 
 static inline boolean
@@ -125,8 +124,8 @@ nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result)
                assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1)));
 
                rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED |
-                       (log2i(fb->width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) |
-                       (log2i(fb->height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT);
+                       (util_logbase2(fb->width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) |
+                       (util_logbase2(fb->height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT);
        } else
                rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR;
 
index 7efdd954b4b4183db7fcc1df1f5a4944174658f7..135978ad2748b9737a85fe5779ac70f4b057fc78 100644 (file)
@@ -36,7 +36,6 @@
 #include "util/u_blitter.h"
 
 #include "nouveau/nouveau_winsys.h"
-#include "nouveau/nouveau_util.h"
 #include "nouveau/nouveau_screen.h"
 #include "nvfx_context.h"
 #include "nvfx_screen.h"
@@ -62,7 +61,7 @@ nvfx_region_set_format(struct nv04_region* rgn, enum pipe_format format)
                break;
        default:
                assert(util_is_pot(bits));
-               int shift = log2i(bits) - 3;
+               int shift = util_logbase2(bits) - 3;
                assert(shift >= 2);
                rgn->bpps = 2;
                shift -= 2;
@@ -365,25 +364,29 @@ nvfx_surface_copy_temp(struct pipe_context* pipe, struct pipe_surface* surf, int
 {
        struct nvfx_surface* ns = (struct nvfx_surface*)surf;
        struct pipe_subresource tempsr, surfsr;
-       struct pipe_resource *idxbuf_buffer;
-       unsigned idxbuf_format;
+       struct nvfx_context* nvfx = nvfx_context(pipe);
+
+       // TODO: we really should do this validation before setting these variable in draw calls
+       unsigned use_vertex_buffers = nvfx->use_vertex_buffers;
+       boolean use_index_buffer = nvfx->use_index_buffer;
+       unsigned base_vertex = nvfx->base_vertex;
 
        tempsr.face = 0;
        tempsr.level = 0;
        surfsr.face = surf->face;
        surfsr.level = surf->level;
 
-       // TODO: do this properly, in blitter save
-       idxbuf_buffer = ((struct nvfx_context*)pipe)->idxbuf_buffer;
-       idxbuf_format = ((struct nvfx_context*)pipe)->idxbuf_format;
-
        if(to_temp)
                nvfx_resource_copy_region(pipe, &ns->temp->base.base, tempsr, 0, 0, 0, surf->texture, surfsr, 0, 0, surf->zslice, surf->width, surf->height);
        else
                nvfx_resource_copy_region(pipe, surf->texture, surfsr, 0, 0, surf->zslice, &ns->temp->base.base, tempsr, 0, 0, 0, surf->width, surf->height);
 
-       ((struct nvfx_context*)pipe)->idxbuf_buffer = idxbuf_buffer;
-       ((struct nvfx_context*)pipe)->idxbuf_format = idxbuf_format;
+       nvfx->use_vertex_buffers = use_vertex_buffers;
+       nvfx->use_index_buffer = use_index_buffer;
+        nvfx->base_vertex = base_vertex;
+
+       nvfx->dirty |= NVFX_NEW_ARRAYS;
+       nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
 }
 
 void
index e9c3dd7e551332c09c9c9a5eee8ec1441160d925..ca4462ef9dcf296b273a024b30f89974891202fd 100644 (file)
@@ -26,25 +26,44 @@ nvfx_transfer_new(struct pipe_context *pipe,
                          unsigned usage,
                          const struct pipe_box *box)
 {
-       struct nvfx_staging_transfer* tx;
-       bool direct = !nvfx_resource_on_gpu(pt) && pt->flags & NVFX_RESOURCE_FLAG_LINEAR;
-
-       tx = CALLOC_STRUCT(nvfx_staging_transfer);
-       if(!tx)
-               return NULL;
-
-       util_staging_transfer_init(pipe, pt, sr, usage, box, direct, tx);
+        if((usage & (PIPE_TRANSFER_UNSYNCHRONIZED | PIPE_TRANSFER_DONTBLOCK)) == PIPE_TRANSFER_DONTBLOCK)
+        {
+                struct nouveau_bo* bo = ((struct nvfx_resource*)pt)->bo;
+                if(bo && nouveau_bo_busy(bo, NOUVEAU_BO_WR))
+                        return NULL;
+        }
 
        if(pt->target == PIPE_BUFFER)
        {
-               tx->base.base.slice_stride = tx->base.base.stride = ((struct nvfx_resource*)tx->base.staging_resource)->bo->size;
-               if(direct)
-                       tx->offset = util_format_get_stride(pt->format, box->x);
-               else
-                       tx->offset = 0;
+               // it would be nice if we could avoid all this ridiculous overhead...
+               struct pipe_transfer* tx;
+               struct nvfx_buffer* buffer = nvfx_buffer(pt);
+
+               tx = CALLOC_STRUCT(pipe_transfer);
+               if (!tx)
+                       return NULL;
+
+               pipe_resource_reference(&tx->resource, pt);
+               tx->sr = sr;
+               tx->usage = usage;
+               tx->box = *box;
+
+               tx->slice_stride = tx->stride = util_format_get_stride(pt->format, box->width);
+               tx->data = buffer->data + util_format_get_stride(pt->format, box->x);
+
+               return tx;
        }
        else
        {
+               struct nvfx_staging_transfer* tx;
+               bool direct = !nvfx_resource_on_gpu(pt) && pt->flags & NVFX_RESOURCE_FLAG_LINEAR;
+
+               tx = CALLOC_STRUCT(nvfx_staging_transfer);
+               if(!tx)
+                       return NULL;
+
+               util_staging_transfer_init(pipe, pt, sr, usage, box, direct, &tx->base);
+
                if(direct)
                {
                        tx->base.base.stride = nvfx_subresource_pitch(pt, sr.level);
@@ -66,26 +85,132 @@ nvfx_transfer_new(struct pipe_context *pipe,
        }
 }
 
+static void nvfx_buffer_dirty_interval(struct nvfx_buffer* buffer, unsigned begin, unsigned size, boolean unsynchronized)
+{
+       struct nvfx_screen* screen = nvfx_screen(buffer->base.base.screen);
+       buffer->last_update_static = buffer->bytes_to_draw_until_static < 0;
+       if(buffer->dirty_begin == buffer->dirty_end)
+       {
+               buffer->dirty_begin = begin;
+               buffer->dirty_end = begin + size;
+               buffer->dirty_unsynchronized = unsynchronized;
+       }
+       else
+       {
+               buffer->dirty_begin = MIN2(buffer->dirty_begin, begin);
+               buffer->dirty_end = MAX2(buffer->dirty_end, begin + size);
+               buffer->dirty_unsynchronized &= unsynchronized;
+       }
+
+       if(unsynchronized)
+       {
+               // TODO: revisit this, it doesn't seem quite right
+               //printf("UNSYNC UPDATE %p %u %u\n", buffer, begin, size);
+               buffer->bytes_to_draw_until_static += size * screen->static_reuse_threshold;
+       }
+       else
+               buffer->bytes_to_draw_until_static = buffer->size * screen->static_reuse_threshold;
+}
+
+static void nvfx_transfer_flush_region( struct pipe_context *pipe,
+                                     struct pipe_transfer *ptx,
+                                     const struct pipe_box *box)
+{
+       if(ptx->resource->target == PIPE_BUFFER && (ptx->usage & PIPE_TRANSFER_FLUSH_EXPLICIT))
+       {
+               struct nvfx_buffer* buffer = nvfx_buffer(ptx->resource);
+               nvfx_buffer_dirty_interval(buffer,
+                               (uint8_t*)ptx->data - buffer->data + util_format_get_stride(buffer->base.base.format, box->x),
+                               util_format_get_stride(buffer->base.base.format, box->width),
+                               !!(ptx->usage & PIPE_TRANSFER_UNSYNCHRONIZED));
+       }
+}
+
+static void
+nvfx_transfer_destroy(struct pipe_context *pipe, struct pipe_transfer *ptx)
+{
+       if(ptx->resource->target == PIPE_BUFFER)
+       {
+               struct nvfx_buffer* buffer = nvfx_buffer(ptx->resource);
+               if((ptx->usage & (PIPE_TRANSFER_WRITE | PIPE_TRANSFER_FLUSH_EXPLICIT)) == PIPE_TRANSFER_WRITE)
+                       nvfx_buffer_dirty_interval(buffer,
+                               (uint8_t*)ptx->data - buffer->data,
+                               ptx->stride,
+                               !!(ptx->usage & PIPE_TRANSFER_UNSYNCHRONIZED));
+               pipe_resource_reference(&ptx->resource, 0);
+               FREE(ptx);
+       }
+       else
+               util_staging_transfer_destroy(pipe, ptx);
+}
+
 void *
 nvfx_transfer_map(struct pipe_context *pipe, struct pipe_transfer *ptx)
 {
-       struct nvfx_staging_transfer *tx = (struct nvfx_staging_transfer *)ptx;
-       if(!ptx->data)
+       if(ptx->resource->target == PIPE_BUFFER)
+               return ptx->data;
+       else
        {
-               struct nvfx_miptree *mt = (struct nvfx_miptree *)tx->base.staging_resource;
-               uint8_t *map = nouveau_screen_bo_map(pipe->screen, mt->base.bo, nouveau_screen_transfer_flags(ptx->usage));
-               ptx->data = map + tx->offset;
+               struct nvfx_staging_transfer *tx = (struct nvfx_staging_transfer *)ptx;
+               if(!ptx->data)
+               {
+                       struct nvfx_miptree *mt = (struct nvfx_miptree *)tx->base.staging_resource;
+                       uint8_t *map = nouveau_screen_bo_map(pipe->screen, mt->base.bo, nouveau_screen_transfer_flags(ptx->usage));
+                       ptx->data = map + tx->offset;
+               }
+
+               ++tx->map_count;
+               return ptx->data;
        }
-       ++tx->map_count;
-       return ptx->data;
 }
 
 void
 nvfx_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *ptx)
 {
-       struct nvfx_staging_transfer *tx = (struct nvfx_staging_transfer *)ptx;
-       struct nvfx_miptree *mt = (struct nvfx_miptree *)tx->base.staging_resource;
+       if(ptx->resource->target != PIPE_BUFFER)
+       {
+               struct nvfx_staging_transfer *tx = (struct nvfx_staging_transfer *)ptx;
+               struct nvfx_miptree *mt = (struct nvfx_miptree *)tx->base.staging_resource;
+
+               if(!--tx->map_count)
+               {
+                       nouveau_screen_bo_unmap(pipe->screen, mt->base.bo);
+                       ptx->data = 0;
+               }
+       }
+}
+
+static void nvfx_transfer_inline_write( struct pipe_context *pipe,
+                                     struct pipe_resource *pr,
+                                     struct pipe_subresource sr,
+                                     unsigned usage,
+                                     const struct pipe_box *box,
+                                     const void *data,
+                                     unsigned stride,
+                                     unsigned slice_stride)
+{
+       if(pr->target != PIPE_BUFFER)
+       {
+               u_default_transfer_inline_write(pipe, pr, sr, usage, box, data, stride, slice_stride);
+       }
+       else
+       {
+               struct nvfx_buffer* buffer = nvfx_buffer(pr);
+               unsigned begin = util_format_get_stride(pr->format, box->x);
+               unsigned size = util_format_get_stride(pr->format, box->width);
+               memcpy(buffer->data + begin, data, size);
+               nvfx_buffer_dirty_interval(buffer, begin, size,
+                               !!(pr->flags & PIPE_TRANSFER_UNSYNCHRONIZED));
+       }
+}
 
-       if(!--tx->map_count)
-               nouveau_screen_bo_unmap(pipe->screen, mt->base.bo);
+void
+nvfx_init_transfer_functions(struct pipe_context *pipe)
+{
+       pipe->get_transfer = nvfx_transfer_new;
+       pipe->transfer_map = nvfx_transfer_map;
+       pipe->transfer_flush_region = nvfx_transfer_flush_region;
+       pipe->transfer_unmap = nvfx_transfer_unmap;
+       pipe->transfer_destroy = nvfx_transfer_destroy;
+       pipe->transfer_inline_write = nvfx_transfer_inline_write;
 }
index 4aa37938425c0600a820a84c2a259133521b4ee7..a6cd12563507ea871aa91e2295bd7afdde90e070 100644 (file)
@@ -2,6 +2,7 @@
 #include "pipe/p_state.h"
 #include "util/u_inlines.h"
 #include "util/u_format.h"
+#include "translate/translate.h"
 
 #include "nvfx_context.h"
 #include "nvfx_state.h"
 #include "nouveau/nouveau_channel.h"
 #include "nouveau/nouveau_class.h"
 #include "nouveau/nouveau_pushbuf.h"
-#include "nouveau/nouveau_util.h"
 
-static INLINE int
-nvfx_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp)
+static inline unsigned
+util_guess_unique_indices_count(unsigned mode, unsigned indices)
 {
-       switch (pipe) {
-       case PIPE_FORMAT_R32_FLOAT:
-       case PIPE_FORMAT_R32G32_FLOAT:
-       case PIPE_FORMAT_R32G32B32_FLOAT:
-       case PIPE_FORMAT_R32G32B32A32_FLOAT:
-               *fmt = NV34TCL_VTXFMT_TYPE_FLOAT;
-               break;
-       case PIPE_FORMAT_R16_FLOAT:
-       case PIPE_FORMAT_R16G16_FLOAT:
-       case PIPE_FORMAT_R16G16B16_FLOAT:
-       case PIPE_FORMAT_R16G16B16A16_FLOAT:
-               *fmt = NV34TCL_VTXFMT_TYPE_HALF;
-               break;
-       case PIPE_FORMAT_R8_UNORM:
-       case PIPE_FORMAT_R8G8_UNORM:
-       case PIPE_FORMAT_R8G8B8_UNORM:
-       case PIPE_FORMAT_R8G8B8A8_UNORM:
-               *fmt = NV34TCL_VTXFMT_TYPE_UBYTE;
-               break;
-       case PIPE_FORMAT_R16_SSCALED:
-       case PIPE_FORMAT_R16G16_SSCALED:
-       case PIPE_FORMAT_R16G16B16_SSCALED:
-       case PIPE_FORMAT_R16G16B16A16_SSCALED:
-               *fmt = NV34TCL_VTXFMT_TYPE_USHORT;
-               break;
-       default:
-               NOUVEAU_ERR("Unknown format %s\n", util_format_name(pipe));
-               return 1;
+       /* Euler's formula gives V =
+        * = E - F + 2 =
+        * = F * (polygon_edges / 2 - 1) + 2 =
+        * =  F * (polygon_edges - 2) / 2 + 2 =
+        * =  indices * (polygon_edges - 2) / (2 * indices_per_face) + 2
+        * =  indices * (1 / 2 - 1 / polygon_edges) + 2
+        */
+       switch(mode)
+       {
+       case PIPE_PRIM_LINES:
+               return indices >> 1;
+       case PIPE_PRIM_TRIANGLES:
+       {
+               // avoid an expensive division by 3 using the multiplicative inverse mod 2^32
+               unsigned q;
+               unsigned inv3 = 2863311531;
+               indices >>= 1;
+               q = indices * inv3;
+               if(unlikely(q >= indices))
+               {
+                       q += inv3;
+                       if(q >= indices)
+                               q += inv3;
+               }
+               return indices + 2;
+               //return indices / 6 + 2;
        }
-
-       switch (pipe) {
-       case PIPE_FORMAT_R8_UNORM:
-       case PIPE_FORMAT_R32_FLOAT:
-       case PIPE_FORMAT_R16_FLOAT:
-       case PIPE_FORMAT_R16_SSCALED:
-               *ncomp = 1;
-               break;
-       case PIPE_FORMAT_R8G8_UNORM:
-       case PIPE_FORMAT_R32G32_FLOAT:
-       case PIPE_FORMAT_R16G16_FLOAT:
-       case PIPE_FORMAT_R16G16_SSCALED:
-               *ncomp = 2;
-               break;
-       case PIPE_FORMAT_R8G8B8_UNORM:
-       case PIPE_FORMAT_R32G32B32_FLOAT:
-       case PIPE_FORMAT_R16G16B16_FLOAT:
-       case PIPE_FORMAT_R16G16B16_SSCALED:
-               *ncomp = 3;
-               break;
-       case PIPE_FORMAT_R8G8B8A8_UNORM:
-       case PIPE_FORMAT_R32G32B32A32_FLOAT:
-       case PIPE_FORMAT_R16G16B16A16_FLOAT:
-       case PIPE_FORMAT_R16G16B16A16_SSCALED:
-               *ncomp = 4;
-               break;
+       // guess that indexed quads are created by successive connections, since a closed mesh seems unlikely
+       case PIPE_PRIM_QUADS:
+               return (indices >> 1) + 2;
+       //      return (indices >> 2) + 2; // if it is a closed mesh
        default:
-               NOUVEAU_ERR("Unknown format %s\n", util_format_name(pipe));
-               return 1;
+               return indices;
        }
-
-       return 0;
 }
 
-static boolean
-nvfx_vbo_set_idxbuf(struct nvfx_context *nvfx, struct pipe_resource *ib,
-                   unsigned ib_size)
+static unsigned nvfx_decide_upload_mode(struct pipe_context *pipe, const struct pipe_draw_info *info)
 {
-       unsigned type;
-
-       if (!ib) {
-               nvfx->idxbuf_buffer = NULL;
-               nvfx->idxbuf_format = 0xdeadbeef;
-               return FALSE;
+       struct nvfx_context* nvfx = nvfx_context(pipe);
+       unsigned hardware_cost = 0;
+       unsigned inline_cost = 0;
+       unsigned unique_vertices;
+       unsigned upload_mode;
+       if (info->indexed)
+               unique_vertices = util_guess_unique_indices_count(info->mode, info->count);
+       else
+               unique_vertices = info->count;
+
+       /* Here we try to figure out if we are better off writing vertex data directly on the FIFO,
+        * or create hardware buffer objects and pointing the hardware to them.
+        *
+        * This is done by computing the total memcpy cost of each option, ignoring uploads
+        * if we think that the buffer is static and thus the upload cost will be amortized over
+        * future draw calls.
+        *
+        * For instance, if everything looks static, we will always create buffer objects, while if
+        * everything is a user buffer and we are not doing indexed drawing, we never do.
+        *
+        * Other interesting cases are where a small user vertex buffer, but a huge user index buffer,
+        * where we will upload the vertex buffer, so that we can use hardware index lookup, and
+        * the opposite case, where we instead do index lookup in software to avoid uploading
+        * a huge amount of vertex data that is not going to be used.
+        *
+        * Otherwise, we generally move to the GPU the after it has been pushed
+        * NVFX_STATIC_BUFFER_MIN_REUSE_TIMES times to the GPU without having
+        * been updated with a transfer (or just the buffer having been destroyed).
+        *
+        * There is no special handling for user buffers, since applications can use
+        * OpenGL VBOs in a one-shot fashion. OpenGL 3/4 core profile forces this
+        * by the way.
+        *
+        * Note that currently we don't support only putting some data on the FIFO, and
+        * some on vertex buffers (constant and instanced data is independent from this).
+        *
+        * nVidia doesn't seem to do this either, even though it should be at least
+        * doable with VTX_ATTR and possibly with VERTEX_DATA too if not indexed.
+        */
+
+       for (unsigned i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; i++)
+       {
+               struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i];
+               struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index];
+               struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer);
+               buffer->bytes_to_draw_until_static -= vbi->per_vertex_size * unique_vertices;
+               if (!nvfx_buffer_seems_static(buffer))
+               {
+                       hardware_cost += buffer->dirty_end - buffer->dirty_begin;
+                       if (!buffer->base.bo)
+                               hardware_cost += nvfx->screen->buffer_allocation_cost;
+               }
+               inline_cost += vbi->per_vertex_size * info->count;
        }
 
-       if (!nvfx->screen->index_buffer_reloc_flags || ib_size == 1)
-               return FALSE;
+       float best_index_cost_for_hardware_vertices_as_inline_cost = 0.0f;
+       boolean prefer_hardware_indices = FALSE;
+       unsigned index_inline_cost = 0;
+       unsigned index_hardware_cost = 0;
 
-       switch (ib_size) {
-       case 2:
-               type = NV34TCL_IDXBUF_FORMAT_TYPE_U16;
-               break;
-       case 4:
-               type = NV34TCL_IDXBUF_FORMAT_TYPE_U32;
-               break;
-       default:
-               return FALSE;
-       }
+       if (info->indexed)
+       {
+               index_inline_cost = nvfx->idxbuf.index_size * info->count;
+               if (nvfx->screen->index_buffer_reloc_flags
+                       && (nvfx->idxbuf.index_size == 2 || nvfx->idxbuf.index_size == 4)
+                       && !(nvfx->idxbuf.offset & (nvfx->idxbuf.index_size - 1)))
+               {
+                       struct nvfx_buffer* buffer = nvfx_buffer(nvfx->idxbuf.buffer);
+                       buffer->bytes_to_draw_until_static -= index_inline_cost;
 
-       if (ib != nvfx->idxbuf_buffer ||
-           type != nvfx->idxbuf_format) {
-               nvfx->dirty |= NVFX_NEW_ARRAYS;
-               nvfx->idxbuf_buffer = ib;
-               nvfx->idxbuf_format = type;
-       }
+                       prefer_hardware_indices = TRUE;
 
-       return TRUE;
-}
+                       if (!nvfx_buffer_seems_static(buffer))
+                       {
+                               index_hardware_cost = buffer->dirty_end - buffer->dirty_begin;
+                               if (!buffer->base.bo)
+                                       index_hardware_cost += nvfx->screen->buffer_allocation_cost;
+                       }
 
-// type must be floating point
-static inline void
-nvfx_vbo_static_attrib(struct nvfx_context *nvfx,
-                      int attrib, struct pipe_vertex_element *ve,
-                      struct pipe_vertex_buffer *vb, unsigned ncomp)
-{
-       struct pipe_transfer *transfer;
-       struct nouveau_channel* chan = nvfx->screen->base.channel;
-       void *map;
-       float *v;
-
-       map  = pipe_buffer_map(&nvfx->pipe, vb->buffer, PIPE_TRANSFER_READ, &transfer);
-       map = (uint8_t *) map + vb->buffer_offset + ve->src_offset;
-
-       v = map;
-
-       switch (ncomp) {
-       case 4:
-               OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_4F_X(attrib), 4));
-               OUT_RING(chan, fui(v[0]));
-               OUT_RING(chan, fui(v[1]));
-               OUT_RING(chan,  fui(v[2]));
-               OUT_RING(chan,  fui(v[3]));
-               break;
-       case 3:
-               OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_3F_X(attrib), 3));
-               OUT_RING(chan,  fui(v[0]));
-               OUT_RING(chan,  fui(v[1]));
-               OUT_RING(chan,  fui(v[2]));
-               break;
-       case 2:
-               OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_2F_X(attrib), 2));
-               OUT_RING(chan,  fui(v[0]));
-               OUT_RING(chan,  fui(v[1]));
-               break;
-       case 1:
-               OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_1F(attrib), 1));
-               OUT_RING(chan,  fui(v[0]));
-               break;
+                       if ((float) index_inline_cost < (float) index_hardware_cost * nvfx->screen->inline_cost_per_hardware_cost)
+                       {
+                               best_index_cost_for_hardware_vertices_as_inline_cost = (float) index_inline_cost;
+                       }
+                       else
+                       {
+                               best_index_cost_for_hardware_vertices_as_inline_cost = (float) index_hardware_cost * nvfx->screen->inline_cost_per_hardware_cost;
+                               prefer_hardware_indices = TRUE;
+                       }
+               }
        }
 
-       pipe_buffer_unmap(&nvfx->pipe, vb->buffer, transfer);
+       /* let's finally figure out which of the 3 paths we want to take */
+       if ((float) (inline_cost + index_inline_cost) > ((float) hardware_cost * nvfx->screen->inline_cost_per_hardware_cost + best_index_cost_for_hardware_vertices_as_inline_cost))
+               upload_mode = 1 + prefer_hardware_indices;
+       else
+               upload_mode = 0;
+
+#ifdef DEBUG
+        if (unlikely(nvfx->screen->trace_draw))
+          {
+                  fprintf(stderr, "DRAW");
+                  if (info->indexed)
+                  {
+                          fprintf(stderr, "_IDX%u", nvfx->idxbuf.index_size);
+                          if (info->index_bias)
+                                  fprintf(stderr, " biased %u", info->index_bias);
+                          fprintf(stderr, " idxrange %u -> %u", info->min_index, info->max_index);
+                  }
+                  if (info->instance_count > 1)
+                          fprintf(stderr, " %u instances from %u", info->instance_count, info->indexed);
+                  fprintf(stderr, " start %u count %u prim %u", info->start, info->count, info->mode);
+                  if (!upload_mode)
+                          fprintf(stderr, " -> inline vertex data");
+                  else if (upload_mode == 2 || !info->indexed)
+                          fprintf(stderr, " -> buffer range");
+                  else
+                          fprintf(stderr, " -> inline indices");
+                  fprintf(stderr, " [ivtx %u hvtx %u iidx %u hidx %u bidx %f] <", inline_cost, hardware_cost, index_inline_cost, index_hardware_cost, best_index_cost_for_hardware_vertices_as_inline_cost);
+                  for (unsigned i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; ++i)
+                  {
+                          struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i];
+                          struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index];
+                          struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer);
+                          if (i)
+                                  fprintf(stderr, ", ");
+                          fprintf(stderr, "%p%s left %Li", buffer, buffer->last_update_static ? " static" : "", buffer->bytes_to_draw_until_static);
+                  }
+                  fprintf(stderr, ">\n");
+          }
+#endif
+
+       return upload_mode;
 }
 
-static void
-nvfx_draw_arrays(struct pipe_context *pipe,
-                unsigned mode, unsigned start, unsigned count)
+void nvfx_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
 {
        struct nvfx_context *nvfx = nvfx_context(pipe);
-       struct nvfx_screen *screen = nvfx->screen;
-       struct nouveau_channel *chan = screen->base.channel;
-       unsigned restart = 0;
-
-       nvfx_vbo_set_idxbuf(nvfx, NULL, 0);
-       if (nvfx->screen->force_swtnl || !nvfx_state_validate(nvfx)) {
-               nvfx_draw_elements_swtnl(pipe, NULL, 0, 0,
-                                           mode, start, count);
-                return;
-       }
+       unsigned upload_mode = 0;
 
-       while (count) {
-               unsigned vc, nr, avail;
+       if (!nvfx->vtxelt->needs_translate)
+               upload_mode = nvfx_decide_upload_mode(pipe, info);
 
-               nvfx_state_emit(nvfx);
+       nvfx->use_index_buffer = upload_mode > 1;
 
-               avail = AVAIL_RING(chan);
-               avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */
+       if ((upload_mode > 0) != nvfx->use_vertex_buffers)
+       {
+               nvfx->use_vertex_buffers = (upload_mode > 0);
+               nvfx->dirty |= NVFX_NEW_ARRAYS;
+               nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
+       }
 
-               vc = nouveau_vbuf_split(avail, 6, 256,
-                                       mode, start, count, &restart);
-               if (!vc) {
-                       FIRE_RING(chan);
-                       continue;
+       if (upload_mode > 0)
+       {
+               for (unsigned i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; i++)
+               {
+                       struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i];
+                       struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index];
+                       nvfx_buffer_upload(nvfx_buffer(vb->buffer));
                }
 
-               OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
-               OUT_RING  (chan, nvgl_primitive(mode));
+               if (upload_mode > 1)
+               {
+                       nvfx_buffer_upload(nvfx_buffer(nvfx->idxbuf.buffer));
 
-               nr = (vc & 0xff);
-               if (nr) {
-                       OUT_RING(chan, RING_3D(NV34TCL_VB_VERTEX_BATCH, 1));
-                       OUT_RING  (chan, ((nr - 1) << 24) | start);
-                       start += nr;
+                       if (unlikely(info->index_bias != nvfx->base_vertex))
+                       {
+                               nvfx->base_vertex = info->index_bias;
+                               nvfx->dirty |= NVFX_NEW_ARRAYS;
+                       }
                }
-
-               nr = vc >> 8;
-               while (nr) {
-                       unsigned push = nr > 2047 ? 2047 : nr;
-
-                       nr -= push;
-
-                       OUT_RING(chan, RING_3D_NI(NV34TCL_VB_VERTEX_BATCH, push));
-                       while (push--) {
-                               OUT_RING(chan, ((0x100 - 1) << 24) | start);
-                               start += 0x100;
+               else
+               {
+                       if (unlikely(info->start < nvfx->base_vertex && nvfx->base_vertex))
+                       {
+                               nvfx->base_vertex = 0;
+                               nvfx->dirty |= NVFX_NEW_ARRAYS;
                        }
                }
-
-               OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
-               OUT_RING  (chan, 0);
-
-               count -= vc;
-               start = restart;
        }
 
-       pipe->flush(pipe, 0, NULL);
+       if (nvfx->screen->force_swtnl || !nvfx_state_validate(nvfx))
+               nvfx_draw_vbo_swtnl(pipe, info);
+       else
+               nvfx_push_vbo(pipe, info);
 }
 
-static INLINE void
-nvfx_draw_elements_u08(struct nvfx_context *nvfx, void *ib,
-                      unsigned mode, unsigned start, unsigned count)
+boolean
+nvfx_vbo_validate(struct nvfx_context *nvfx)
 {
-       struct nvfx_screen *screen = nvfx->screen;
-       struct nouveau_channel *chan = screen->base.channel;
+       struct nouveau_channel* chan = nvfx->screen->base.channel;
+       int i;
+       int elements = MAX2(nvfx->vtxelt->num_elements, nvfx->hw_vtxelt_nr);
+       unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD;
 
-       while (count) {
-               uint8_t *elts = (uint8_t *)ib + start;
-               unsigned vc, push, restart = 0, avail;
+       if (!elements)
+               return TRUE;
 
-               nvfx_state_emit(nvfx);
+       MARK_RING(chan, (5 + 2) * 16 + 2 + 11, 16 + 2);
+       for(unsigned i = 0; i < nvfx->vtxelt->num_constant; ++i)
+       {
+               struct nvfx_low_frequency_element *ve = &nvfx->vtxelt->constant[i];
+               struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
+               struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer);
+               float v[4];
+               ve->fetch_rgba_float(v, buffer->data + vb->buffer_offset + ve->src_offset, 0, 0);
+               nvfx_emit_vtx_attr(chan, ve->idx, v, ve->ncomp);
+       }
 
-               avail = AVAIL_RING(chan);
-               avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */
 
-               vc = nouveau_vbuf_split(avail, 6, 2,
-                                       mode, start, count, &restart);
-               if (vc == 0) {
-                       FIRE_RING(chan);
-                       continue;
-               }
-               count -= vc;
+       OUT_RING(chan, RING_3D(NV34TCL_VTXFMT(0), elements));
+       if(nvfx->use_vertex_buffers)
+       {
+               unsigned idx = 0;
+               for (i = 0; i < nvfx->vtxelt->num_per_vertex; i++) {
+                       struct nvfx_per_vertex_element *ve = &nvfx->vtxelt->per_vertex[i];
+                       struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
 
-               OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
-               OUT_RING  (chan, nvgl_primitive(mode));
+                       if(idx != ve->idx)
+                       {
+                               assert(idx < ve->idx);
+                               OUT_RINGp(chan, &nvfx->vtxelt->vtxfmt[idx], ve->idx - idx);
+                               idx = ve->idx;
+                       }
 
-               if (vc & 1) {
-                       OUT_RING(chan, RING_3D(NV34TCL_VB_ELEMENT_U32, 1));
-                       OUT_RING  (chan, elts[0]);
-                       elts++; vc--;
+                       OUT_RING(chan, nvfx->vtxelt->vtxfmt[idx] | (vb->stride << NV34TCL_VTXFMT_STRIDE_SHIFT));
+                       ++idx;
                }
+               if(idx != nvfx->vtxelt->num_elements)
+                       OUT_RINGp(chan, &nvfx->vtxelt->vtxfmt[idx], nvfx->vtxelt->num_elements - idx);
+       }
+       else
+               OUT_RINGp(chan, nvfx->vtxelt->vtxfmt, nvfx->vtxelt->num_elements);
 
-               while (vc) {
-                       unsigned i;
-
-                       push = MIN2(vc, 2047 * 2);
-
-                       OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U16, push >> 1));
-                       for (i = 0; i < push; i+=2)
-                               OUT_RING(chan, (elts[i+1] << 16) | elts[i]);
+       for(i = nvfx->vtxelt->num_elements; i < elements; ++i)
+               OUT_RING(chan, NV34TCL_VTXFMT_TYPE_32_FLOAT);
 
-                       vc -= push;
-                       elts += push;
+       if(nvfx->is_nv4x) {
+               unsigned i;
+               /* seems to be some kind of cache flushing */
+               for(i = 0; i < 3; ++i) {
+                       OUT_RING(chan, RING_3D(0x1718, 1));
+                       OUT_RING(chan, 0);
                }
-
-               OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
-               OUT_RING  (chan, 0);
-
-               start = restart;
        }
-}
-
-static INLINE void
-nvfx_draw_elements_u16(struct nvfx_context *nvfx, void *ib,
-                      unsigned mode, unsigned start, unsigned count)
-{
-       struct nvfx_screen *screen = nvfx->screen;
-       struct nouveau_channel *chan = screen->base.channel;
-
-       while (count) {
-               uint16_t *elts = (uint16_t *)ib + start;
-               unsigned vc, push, restart = 0, avail;
 
-               nvfx_state_emit(nvfx);
-
-               avail = AVAIL_RING(chan);
-               avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */
-
-               vc = nouveau_vbuf_split(avail, 6, 2,
-                                       mode, start, count, &restart);
-               if (vc == 0) {
-                       FIRE_RING(chan);
-                       continue;
-               }
-               count -= vc;
+       OUT_RING(chan, RING_3D(NV34TCL_VTXBUF_ADDRESS(0), elements));
+       if(nvfx->use_vertex_buffers)
+       {
+               unsigned idx = 0;
+               for (i = 0; i < nvfx->vtxelt->num_per_vertex; i++) {
+                       struct nvfx_per_vertex_element *ve = &nvfx->vtxelt->per_vertex[i];
+                       struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
+                       struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo;
 
-               OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
-               OUT_RING  (chan, nvgl_primitive(mode));
+                       for(; idx < ve->idx; ++idx)
+                               OUT_RING(chan, 0);
 
-               if (vc & 1) {
-                       OUT_RING(chan, RING_3D(NV34TCL_VB_ELEMENT_U32, 1));
-                       OUT_RING  (chan, elts[0]);
-                       elts++; vc--;
+                       OUT_RELOC(chan, bo,
+                                       vb->buffer_offset + ve->src_offset + nvfx->base_vertex * vb->stride,
+                                       vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
+                                       0, NV34TCL_VTXBUF_ADDRESS_DMA1);
+                       ++idx;
                }
 
-               while (vc) {
-                       unsigned i;
-
-                       push = MIN2(vc, 2047 * 2);
-
-                       OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U16, push >> 1));
-                       for (i = 0; i < push; i+=2)
-                               OUT_RING(chan, (elts[i+1] << 16) | elts[i]);
-
-                       vc -= push;
-                       elts += push;
-               }
+               for(; idx < elements; ++idx)
+                       OUT_RING(chan, 0);
+       }
+       else
+       {
+               for (i = 0; i < elements; i++)
+                       OUT_RING(chan, 0);
+       }
 
-               OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
-               OUT_RING  (chan, 0);
+       OUT_RING(chan, RING_3D(0x1710, 1));
+       OUT_RING(chan, 0);
 
-               start = restart;
-       }
+       nvfx->hw_vtxelt_nr = nvfx->vtxelt->num_elements;
+       return TRUE;
 }
 
-static INLINE void
-nvfx_draw_elements_u32(struct nvfx_context *nvfx, void *ib,
-                      unsigned mode, unsigned start, unsigned count)
+void
+nvfx_vbo_relocate(struct nvfx_context *nvfx)
 {
-       struct nvfx_screen *screen = nvfx->screen;
-       struct nouveau_channel *chan = screen->base.channel;
-
-       while (count) {
-               uint32_t *elts = (uint32_t *)ib + start;
-               unsigned vc, push, restart = 0, avail;
-
-               nvfx_state_emit(nvfx);
-
-               avail = AVAIL_RING(chan);
-               avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */
-
-               vc = nouveau_vbuf_split(avail, 5, 1,
-                                       mode, start, count, &restart);
-               if (vc == 0) {
-                       FIRE_RING(chan);
-                       continue;
-               }
-               count -= vc;
-
-               OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
-               OUT_RING  (chan, nvgl_primitive(mode));
-
-               while (vc) {
-                       push = MIN2(vc, 2047);
-
-                       OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U32, push));
-                       OUT_RINGp    (chan, elts, push);
-
-                       vc -= push;
-                       elts += push;
-               }
+        if(!nvfx->use_vertex_buffers)
+                return;
 
-               OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
-               OUT_RING  (chan, 0);
+       struct nouveau_channel* chan = nvfx->screen->base.channel;
+       unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY;
+       int i;
 
-               start = restart;
+       MARK_RING(chan, 2 * 16 + 3, 2 * 16 + 3);
+        for (i = 0; i < nvfx->vtxelt->num_per_vertex; i++) {
+                struct nvfx_per_vertex_element *ve = &nvfx->vtxelt->per_vertex[i];
+                struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
+                struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo;
+
+                OUT_RELOC(chan, bo, RING_3D(NV34TCL_VTXBUF_ADDRESS(ve->idx), 1),
+                               vb_flags, 0, 0);
+                OUT_RELOC(chan, bo, vb->buffer_offset + ve->src_offset + nvfx->base_vertex * vb->stride,
+                               vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
+                               0, NV34TCL_VTXBUF_ADDRESS_DMA1);
        }
 }
 
 static void
-nvfx_draw_elements_inline(struct pipe_context *pipe,
-                         struct pipe_resource *ib,
-                         unsigned ib_size, int ib_bias,
-                         unsigned mode, unsigned start, unsigned count)
+nvfx_idxbuf_emit(struct nvfx_context* nvfx, unsigned ib_flags)
 {
-       struct nvfx_context *nvfx = nvfx_context(pipe);
-       struct pipe_transfer *transfer;
-       void *map;
-
-       map = pipe_buffer_map(pipe, ib, PIPE_TRANSFER_READ, &transfer);
-       if (!ib) {
-               NOUVEAU_ERR("failed mapping ib\n");
-               return;
-       }
+       struct nouveau_channel* chan = nvfx->screen->base.channel;
+       unsigned ib_format = (nvfx->idxbuf.index_size == 2) ? NV34TCL_IDXBUF_FORMAT_TYPE_U16 : NV34TCL_IDXBUF_FORMAT_TYPE_U32;
+       struct nouveau_bo* bo = nvfx_resource(nvfx->idxbuf.buffer)->bo;
+       ib_flags |= nvfx->screen->index_buffer_reloc_flags | NOUVEAU_BO_RD;
 
-       assert(ib_bias == 0);
-
-       switch (ib_size) {
-       case 1:
-               nvfx_draw_elements_u08(nvfx, map, mode, start, count);
-               break;
-       case 2:
-               nvfx_draw_elements_u16(nvfx, map, mode, start, count);
-               break;
-       case 4:
-               nvfx_draw_elements_u32(nvfx, map, mode, start, count);
-               break;
-       default:
-               NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size);
-               break;
-       }
+       assert(nvfx->screen->index_buffer_reloc_flags);
 
-       pipe_buffer_unmap(pipe, ib, transfer);
+       MARK_RING(chan, 3, 3);
+       if(ib_flags & NOUVEAU_BO_DUMMY)
+               OUT_RELOC(chan, bo, RING_3D(NV34TCL_IDXBUF_ADDRESS, 2), ib_flags, 0, 0);
+       else
+               OUT_RING(chan, RING_3D(NV34TCL_IDXBUF_ADDRESS, 2));
+       OUT_RELOC(chan, bo, nvfx->idxbuf.offset + 1, ib_flags | NOUVEAU_BO_LOW, 0, 0);
+       OUT_RELOC(chan, bo, ib_format, ib_flags | NOUVEAU_BO_OR,
+                       0, NV34TCL_IDXBUF_FORMAT_DMA1);
 }
 
-static void
-nvfx_draw_elements_vbo(struct pipe_context *pipe,
-                      unsigned mode, unsigned start, unsigned count)
+void
+nvfx_idxbuf_validate(struct nvfx_context* nvfx)
 {
-       struct nvfx_context *nvfx = nvfx_context(pipe);
-       struct nvfx_screen *screen = nvfx->screen;
-       struct nouveau_channel *chan = screen->base.channel;
-       unsigned restart = 0;
-
-       while (count) {
-               unsigned nr, vc, avail;
-
-               nvfx_state_emit(nvfx);
+       nvfx_idxbuf_emit(nvfx, 0);
+}
 
-               avail = AVAIL_RING(chan);
-               avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */
+void
+nvfx_idxbuf_relocate(struct nvfx_context* nvfx)
+{
+       nvfx_idxbuf_emit(nvfx, NOUVEAU_BO_DUMMY);
+}
 
-               vc = nouveau_vbuf_split(avail, 6, 256,
-                                       mode, start, count, &restart);
-               if (!vc) {
-                       FIRE_RING(chan);
-                       continue;
-               }
+unsigned nvfx_vertex_formats[PIPE_FORMAT_COUNT] =
+{
+       [PIPE_FORMAT_R32_FLOAT] = NV34TCL_VTXFMT_TYPE_32_FLOAT,
+       [PIPE_FORMAT_R32G32_FLOAT] = NV34TCL_VTXFMT_TYPE_32_FLOAT,
+       [PIPE_FORMAT_R32G32B32A32_FLOAT] = NV34TCL_VTXFMT_TYPE_32_FLOAT,
+       [PIPE_FORMAT_R32G32B32_FLOAT] = NV34TCL_VTXFMT_TYPE_32_FLOAT,
+       [PIPE_FORMAT_R16_FLOAT] = NV34TCL_VTXFMT_TYPE_16_FLOAT,
+       [PIPE_FORMAT_R16G16_FLOAT] = NV34TCL_VTXFMT_TYPE_16_FLOAT,
+       [PIPE_FORMAT_R16G16B16_FLOAT] = NV34TCL_VTXFMT_TYPE_16_FLOAT,
+       [PIPE_FORMAT_R16G16B16A16_FLOAT] = NV34TCL_VTXFMT_TYPE_16_FLOAT,
+       [PIPE_FORMAT_R8_UNORM] = NV34TCL_VTXFMT_TYPE_8_UNORM,
+       [PIPE_FORMAT_R8G8_UNORM] = NV34TCL_VTXFMT_TYPE_8_UNORM,
+       [PIPE_FORMAT_R8G8B8_UNORM] = NV34TCL_VTXFMT_TYPE_8_UNORM,
+       [PIPE_FORMAT_R8G8B8A8_UNORM] = NV34TCL_VTXFMT_TYPE_8_UNORM,
+       [PIPE_FORMAT_R8G8B8A8_USCALED] = NV34TCL_VTXFMT_TYPE_8_USCALED,
+       [PIPE_FORMAT_R16_SNORM] = NV34TCL_VTXFMT_TYPE_16_SNORM,
+       [PIPE_FORMAT_R16G16_SNORM] = NV34TCL_VTXFMT_TYPE_16_SNORM,
+       [PIPE_FORMAT_R16G16B16_SNORM] = NV34TCL_VTXFMT_TYPE_16_SNORM,
+       [PIPE_FORMAT_R16G16B16A16_SNORM] = NV34TCL_VTXFMT_TYPE_16_SNORM,
+       [PIPE_FORMAT_R16_SSCALED] = NV34TCL_VTXFMT_TYPE_16_SSCALED,
+       [PIPE_FORMAT_R16G16_SSCALED] = NV34TCL_VTXFMT_TYPE_16_SSCALED,
+       [PIPE_FORMAT_R16G16B16_SSCALED] = NV34TCL_VTXFMT_TYPE_16_SSCALED,
+       [PIPE_FORMAT_R16G16B16A16_SSCALED] = NV34TCL_VTXFMT_TYPE_16_SSCALED,
+};
+
+static void *
+nvfx_vtxelts_state_create(struct pipe_context *pipe,
+                         unsigned num_elements,
+                         const struct pipe_vertex_element *elements)
+{
+       struct nvfx_context* nvfx = nvfx_context(pipe);
+       struct nvfx_vtxelt_state *cso = CALLOC_STRUCT(nvfx_vtxelt_state);
+        struct translate_key transkey;
+        unsigned per_vertex_size[16];
+        memset(per_vertex_size, 0, sizeof(per_vertex_size));
+
+        unsigned vb_compacted_index[16];
+
+       assert(num_elements < 16); /* not doing fallbacks yet */
+
+       memcpy(cso->pipe, elements, num_elements * sizeof(elements[0]));
+       cso->num_elements = num_elements;
+       cso->needs_translate = FALSE;
+
+       transkey.nr_elements = 0;
+       transkey.output_stride = 0;
+
+       for(unsigned i = 0; i < num_elements; ++i)
+        {
+               const struct pipe_vertex_element* ve = &elements[i];
+               if(!ve->instance_divisor)
+                        per_vertex_size[ve->vertex_buffer_index] += util_format_get_stride(ve->src_format, 1);
+        }
+
+        for(unsigned i = 0; i < 16; ++i)
+        {
+                if(per_vertex_size[i])
+                {
+                        unsigned idx = cso->num_per_vertex_buffer_infos++;
+                        cso->per_vertex_buffer_info[idx].vertex_buffer_index = i;
+                        cso->per_vertex_buffer_info[idx].per_vertex_size = per_vertex_size[i];
+                        vb_compacted_index[i] = idx;
+                }
+        }
+
+       for(unsigned i = 0; i < num_elements; ++i)
+       {
+               const struct pipe_vertex_element* ve = &elements[i];
+               unsigned type = nvfx_vertex_formats[ve->src_format];
+               unsigned ncomp = util_format_get_nr_components(ve->src_format);
 
-               OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
-               OUT_RING  (chan, nvgl_primitive(mode));
+               //if(ve->frequency != PIPE_ELEMENT_FREQUENCY_PER_VERTEX)
+               if(ve->instance_divisor)
+               {
+                       struct nvfx_low_frequency_element* lfve;
+                       cso->vtxfmt[i] = NV34TCL_VTXFMT_TYPE_32_FLOAT;
+
+                       //if(ve->frequency == PIPE_ELEMENT_FREQUENCY_CONSTANT)
+                       if(0)
+                               lfve = &cso->constant[cso->num_constant++];
+                       else
+                       {
+                               lfve = &cso->per_instance[cso->num_per_instance++].base;
+                               ((struct nvfx_per_instance_element*)lfve)->instance_divisor = ve->instance_divisor;
+                       }
 
-               nr = (vc & 0xff);
-               if (nr) {
-                       OUT_RING(chan, RING_3D(NV34TCL_VB_INDEX_BATCH, 1));
-                       OUT_RING  (chan, ((nr - 1) << 24) | start);
-                       start += nr;
+                        lfve->idx = i;
+                        lfve->vertex_buffer_index = ve->vertex_buffer_index;
+                        lfve->src_offset = ve->src_offset;
+                        lfve->fetch_rgba_float = util_format_description(ve->src_format)->fetch_rgba_float;
+                        lfve->ncomp = ncomp;
                }
-
-               nr = vc >> 8;
-               while (nr) {
-                       unsigned push = nr > 2047 ? 2047 : nr;
-
-                       nr -= push;
-
-                       OUT_RING(chan, RING_3D_NI(NV34TCL_VB_INDEX_BATCH, push));
-                       while (push--) {
-                               OUT_RING(chan, ((0x100 - 1) << 24) | start);
-                               start += 0x100;
+               else
+               {
+                       unsigned idx;
+
+                       idx = cso->num_per_vertex++;
+                       cso->per_vertex[idx].idx = i;
+                       cso->per_vertex[idx].vertex_buffer_index = ve->vertex_buffer_index;
+                       cso->per_vertex[idx].src_offset = ve->src_offset;
+
+                       idx = transkey.nr_elements++;
+                       transkey.element[idx].input_format = ve->src_format;
+                       transkey.element[idx].input_buffer = vb_compacted_index[ve->vertex_buffer_index];
+                       transkey.element[idx].input_offset = ve->src_offset;
+                       transkey.element[idx].instance_divisor = 0;
+                       transkey.element[idx].type = TRANSLATE_ELEMENT_NORMAL;
+                       if(type)
+                       {
+                               transkey.element[idx].output_format = ve->src_format;
+                               cso->vtxfmt[i] = (ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | type;
+                       }
+                       else
+                       {
+                               unsigned float32[4] = {PIPE_FORMAT_R32_FLOAT, PIPE_FORMAT_R32G32_FLOAT, PIPE_FORMAT_R32G32B32_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT};
+                               transkey.element[idx].output_format = float32[ncomp - 1];
+                               cso->needs_translate = TRUE;
+                               cso->vtxfmt[i] = (ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | NV34TCL_VTXFMT_TYPE_32_FLOAT;
                        }
+                       transkey.element[idx].output_offset = transkey.output_stride;
+                       transkey.output_stride += (util_format_get_stride(transkey.element[idx].output_format, 1) + 3) & ~3;
                }
+       }
 
-               OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
-               OUT_RING  (chan, 0);
+       cso->translate = translate_generic_create(&transkey);
+       cso->vertex_length = transkey.output_stride >> 2;
+       cso->max_vertices_per_packet = 2047 / cso->vertex_length;
 
-               count -= vc;
-               start = restart;
-       }
+       return (void *)cso;
 }
 
 static void
-nvfx_draw_elements(struct pipe_context *pipe,
-                  struct pipe_resource *indexBuffer,
-                  unsigned indexSize, int indexBias,
-                  unsigned mode, unsigned start, unsigned count)
+nvfx_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso)
 {
-       struct nvfx_context *nvfx = nvfx_context(pipe);
-       boolean idxbuf;
-
-       idxbuf = nvfx_vbo_set_idxbuf(nvfx, indexBuffer, indexSize);
-       if (nvfx->screen->force_swtnl || !nvfx_state_validate(nvfx)) {
-               nvfx_draw_elements_swtnl(pipe,
-                                        indexBuffer, indexSize, indexBias,
-                                        mode, start, count);
-               return;
-       }
-
-       if (idxbuf) {
-               nvfx_draw_elements_vbo(pipe, mode, start, count);
-       } else {
-               nvfx_draw_elements_inline(pipe,
-                                         indexBuffer, indexSize, indexBias,
-                                         mode, start, count);
-       }
-
-       pipe->flush(pipe, 0, NULL);
+       FREE(hwcso);
 }
 
-void
-nvfx_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
+static void
+nvfx_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso)
 {
        struct nvfx_context *nvfx = nvfx_context(pipe);
 
-       if (info->indexed && nvfx->idxbuf.buffer) {
-               unsigned offset;
-
-               assert(nvfx->idxbuf.offset % nvfx->idxbuf.index_size == 0);
-               offset = nvfx->idxbuf.offset / nvfx->idxbuf.index_size;
-
-               nvfx_draw_elements(pipe,
-                                  nvfx->idxbuf.buffer,
-                                  nvfx->idxbuf.index_size,
-                                  info->index_bias,
-                                  info->mode,
-                                  info->start + offset,
-                                  info->count);
-       }
-       else {
-               nvfx_draw_arrays(pipe,
-                               info->mode,
-                               info->start,
-                               info->count);
-       }
+       nvfx->vtxelt = hwcso;
+       nvfx->use_vertex_buffers = -1;
+       nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
 }
 
-boolean
-nvfx_vbo_validate(struct nvfx_context *nvfx)
+static void
+nvfx_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
+                       const struct pipe_vertex_buffer *vb)
 {
-       struct nouveau_channel* chan = nvfx->screen->base.channel;
-       struct pipe_resource *ib = nvfx->idxbuf_buffer;
-       unsigned ib_format = nvfx->idxbuf_format;
-       int i;
-       int elements = MAX2(nvfx->vtxelt->num_elements, nvfx->hw_vtxelt_nr);
-       uint32_t vtxfmt[16];
-       unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD;
-
-       if (!elements)
-               return TRUE;
-
-       nvfx->vbo_bo = 0;
-
-       MARK_RING(chan, (5 + 2) * 16 + 2 + 11, 16 + 2);
-       for (i = 0; i < nvfx->vtxelt->num_elements; i++) {
-               struct pipe_vertex_element *ve;
-               struct pipe_vertex_buffer *vb;
-               unsigned type, ncomp;
-
-               ve = &nvfx->vtxelt->pipe[i];
-               vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
-
-               if (nvfx_vbo_format_to_hw(ve->src_format, &type, &ncomp)) {
-                       MARK_UNDO(chan);
-                       nvfx->fallback_swtnl |= NVFX_NEW_ARRAYS;
-                       return FALSE;
-               }
+       struct nvfx_context *nvfx = nvfx_context(pipe);
 
-               if (!vb->stride && type == NV34TCL_VTXFMT_TYPE_FLOAT) {
-                       nvfx_vbo_static_attrib(nvfx, i, ve, vb, ncomp);
-                       vtxfmt[i] = type;
-               } else {
-                       vtxfmt[i] = ((vb->stride << NV34TCL_VTXFMT_STRIDE_SHIFT) |
-                               (ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | type);
-                       nvfx->vbo_bo |= (1 << i);
-               }
+       for(unsigned i = 0; i < count; ++i)
+       {
+               pipe_resource_reference(&nvfx->vtxbuf[i].buffer, vb[i].buffer);
+               nvfx->vtxbuf[i].buffer_offset = vb[i].buffer_offset;
+               nvfx->vtxbuf[i].max_index = vb[i].max_index;
+               nvfx->vtxbuf[i].stride = vb[i].stride;
        }
 
-       for(; i < elements; ++i)
-               vtxfmt[i] = NV34TCL_VTXFMT_TYPE_FLOAT;
-
-       OUT_RING(chan, RING_3D(NV34TCL_VTXFMT(0), elements));
-       OUT_RINGp(chan, vtxfmt, elements);
-
-       if(nvfx->is_nv4x) {
-               unsigned i;
-               /* seems to be some kind of cache flushing */
-               for(i = 0; i < 3; ++i) {
-                       OUT_RING(chan, RING_3D(0x1718, 1));
-                       OUT_RING(chan, 0);
-               }
-       }
+       for(unsigned i = count; i < nvfx->vtxbuf_nr; ++i)
+               pipe_resource_reference(&nvfx->vtxbuf[i].buffer, 0);
 
-       OUT_RING(chan, RING_3D(NV34TCL_VTXBUF_ADDRESS(0), elements));
-       for (i = 0; i < nvfx->vtxelt->num_elements; i++) {
-               struct pipe_vertex_element *ve;
-               struct pipe_vertex_buffer *vb;
+       nvfx->vtxbuf_nr = count;
+       nvfx->use_vertex_buffers = -1;
+       nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
+}
 
-               ve = &nvfx->vtxelt->pipe[i];
-               vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
+static void
+nvfx_set_index_buffer(struct pipe_context *pipe,
+                     const struct pipe_index_buffer *ib)
+{
+       struct nvfx_context *nvfx = nvfx_context(pipe);
 
-               if (!(nvfx->vbo_bo & (1 << i)))
-                       OUT_RING(chan, 0);
-               else
-               {
-                       struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo;
-                       OUT_RELOC(chan, bo,
-                                vb->buffer_offset + ve->src_offset,
-                                vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
-                                0, NV34TCL_VTXBUF_ADDRESS_DMA1);
-               }
+       if(ib)
+       {
+               pipe_resource_reference(&nvfx->idxbuf.buffer, ib->buffer);
+               nvfx->idxbuf.index_size = ib->index_size;
+               nvfx->idxbuf.offset = ib->offset;
        }
-
-        for (; i < elements; i++)
-               OUT_RING(chan, 0);
-
-       OUT_RING(chan, RING_3D(0x1710, 1));
-       OUT_RING(chan, 0);
-
-       if (ib) {
-               unsigned ib_flags = nvfx->screen->index_buffer_reloc_flags | NOUVEAU_BO_RD;
-               struct nouveau_bo* bo = nvfx_resource(ib)->bo;
-
-               assert(nvfx->screen->index_buffer_reloc_flags);
-
-               OUT_RING(chan, RING_3D(NV34TCL_IDXBUF_ADDRESS, 2));
-               OUT_RELOC(chan, bo, 0, ib_flags | NOUVEAU_BO_LOW, 0, 0);
-               OUT_RELOC(chan, bo, ib_format, ib_flags | NOUVEAU_BO_OR,
-                                 0, NV34TCL_IDXBUF_FORMAT_DMA1);
+       else
+       {
+               pipe_resource_reference(&nvfx->idxbuf.buffer, 0);
+               nvfx->idxbuf.index_size = 0;
+               nvfx->idxbuf.offset = 0;
        }
 
-       nvfx->hw_vtxelt_nr = nvfx->vtxelt->num_elements;
-       return TRUE;
+       nvfx->dirty |= NVFX_NEW_INDEX;
+       nvfx->draw_dirty |= NVFX_NEW_INDEX;
 }
 
 void
-nvfx_vbo_relocate(struct nvfx_context *nvfx)
+nvfx_init_vbo_functions(struct nvfx_context *nvfx)
 {
-       struct nouveau_channel* chan = nvfx->screen->base.channel;
-       unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY;
-       int i;
+       nvfx->pipe.set_vertex_buffers = nvfx_set_vertex_buffers;
+       nvfx->pipe.set_index_buffer = nvfx_set_index_buffer;
 
-       MARK_RING(chan, 2 * 16 + 3, 2 * 16 + 3);
-       for(i = 0; i < nvfx->vtxelt->num_elements; ++i) {
-               if(nvfx->vbo_bo & (1 << i)) {
-                       struct pipe_vertex_element *ve = &nvfx->vtxelt->pipe[i];
-                       struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
-                       struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo;
-                       OUT_RELOC(chan, bo, RING_3D(NV34TCL_VTXBUF_ADDRESS(i), 1),
-                                       vb_flags, 0, 0);
-                       OUT_RELOC(chan, bo, vb->buffer_offset + ve->src_offset,
-                                       vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
-                                       0, NV34TCL_VTXBUF_ADDRESS_DMA1);
-               }
-       }
-
-       if(nvfx->idxbuf_buffer)
-       {
-               unsigned ib_flags = nvfx->screen->index_buffer_reloc_flags | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY;
-               struct nouveau_bo* bo = nvfx_resource(nvfx->idxbuf_buffer)->bo;
-
-               assert(nvfx->screen->index_buffer_reloc_flags);
-
-               OUT_RELOC(chan, bo, RING_3D(NV34TCL_IDXBUF_ADDRESS, 2),
-                               ib_flags, 0, 0);
-               OUT_RELOC(chan, bo, 0,
-                               ib_flags | NOUVEAU_BO_LOW, 0, 0);
-               OUT_RELOC(chan, bo, nvfx->idxbuf_format,
-                               ib_flags | NOUVEAU_BO_OR,
-                               0, NV34TCL_IDXBUF_FORMAT_DMA1);
-       }
+       nvfx->pipe.create_vertex_elements_state = nvfx_vtxelts_state_create;
+       nvfx->pipe.delete_vertex_elements_state = nvfx_vtxelts_state_delete;
+       nvfx->pipe.bind_vertex_elements_state = nvfx_vtxelts_state_bind;
 }
index 24d9846310e001413d4519b98add36f7aaccf9dc..939d2b83aee4ab3feda1e00aadd5afaabb1c2b4c 100644 (file)
@@ -10,6 +10,7 @@
 
 #include "nvfx_context.h"
 #include "nvfx_state.h"
+#include "nvfx_resource.h"
 
 /* TODO (at least...):
  *  1. Indexed consts  + ARL
@@ -874,7 +875,6 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
        struct nouveau_grobj *eng3d = screen->eng3d;
        struct nvfx_vertex_program *vp;
        struct pipe_resource *constbuf;
-       struct pipe_transfer *transfer = NULL;
        boolean upload_code = FALSE, upload_data = FALSE;
        int i;
 
@@ -983,11 +983,8 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
        if (vp->nr_consts) {
                float *map = NULL;
 
-               if (constbuf) {
-                       map = pipe_buffer_map(pipe, constbuf,
-                                             PIPE_TRANSFER_READ,
-                                             &transfer);
-               }
+               if (constbuf)
+                       map = nvfx_buffer(constbuf)->data;
 
                for (i = 0; i < vp->nr_consts; i++) {
                        struct nvfx_vertex_program_data *vpd = &vp->consts[i];
@@ -1005,9 +1002,6 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
                        OUT_RING  (chan, i + vp->data->start);
                        OUT_RINGp (chan, (uint32_t *)vpd->value, 4);
                }
-
-               if (constbuf)
-                       pipe_buffer_unmap(pipe, constbuf, transfer);
        }
 
        /* Upload vtxprog */