nouveau: replace vtxbuf/idxbuf caps with BO_ flags in nouveau_screen and fix uncached...
[mesa.git] / src / gallium / drivers / nvfx / nvfx_vbo.c
index 257087f8f636e7b507ce1eda5c5167cedc5f147b..bc87fe275c0c20b4c9e32b6358efc0b2fc2ac2cf 100644 (file)
@@ -5,20 +5,12 @@
 
 #include "nvfx_context.h"
 #include "nvfx_state.h"
+#include "nvfx_resource.h"
 
 #include "nouveau/nouveau_channel.h"
 #include "nouveau/nouveau_pushbuf.h"
 #include "nouveau/nouveau_util.h"
 
-static boolean
-nvfx_force_swtnl(struct nvfx_context *nvfx)
-{
-       static int force_swtnl = -1;
-       if(force_swtnl < 0)
-               force_swtnl = debug_get_bool_option("NOUVEAU_SWTNL", 0);
-       return force_swtnl;
-}
-
 static INLINE int
 nvfx_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp)
 {
@@ -29,6 +21,12 @@ nvfx_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp)
        case PIPE_FORMAT_R32G32B32A32_FLOAT:
                *fmt = NV34TCL_VTXFMT_TYPE_FLOAT;
                break;
+       case PIPE_FORMAT_R16_FLOAT:
+       case PIPE_FORMAT_R16G16_FLOAT:
+       case PIPE_FORMAT_R16G16B16_FLOAT:
+       case PIPE_FORMAT_R16G16B16A16_FLOAT:
+               *fmt = NV34TCL_VTXFMT_TYPE_HALF;
+               break;
        case PIPE_FORMAT_R8_UNORM:
        case PIPE_FORMAT_R8G8_UNORM:
        case PIPE_FORMAT_R8G8B8_UNORM:
@@ -49,21 +47,25 @@ nvfx_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp)
        switch (pipe) {
        case PIPE_FORMAT_R8_UNORM:
        case PIPE_FORMAT_R32_FLOAT:
+       case PIPE_FORMAT_R16_FLOAT:
        case PIPE_FORMAT_R16_SSCALED:
                *ncomp = 1;
                break;
        case PIPE_FORMAT_R8G8_UNORM:
        case PIPE_FORMAT_R32G32_FLOAT:
+       case PIPE_FORMAT_R16G16_FLOAT:
        case PIPE_FORMAT_R16G16_SSCALED:
                *ncomp = 2;
                break;
        case PIPE_FORMAT_R8G8B8_UNORM:
        case PIPE_FORMAT_R32G32B32_FLOAT:
+       case PIPE_FORMAT_R16G16B16_FLOAT:
        case PIPE_FORMAT_R16G16B16_SSCALED:
                *ncomp = 3;
                break;
        case PIPE_FORMAT_R8G8B8A8_UNORM:
        case PIPE_FORMAT_R32G32B32A32_FLOAT:
+       case PIPE_FORMAT_R16G16B16A16_FLOAT:
        case PIPE_FORMAT_R16G16B16A16_SSCALED:
                *ncomp = 4;
                break;
@@ -76,7 +78,7 @@ nvfx_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp)
 }
 
 static boolean
-nvfx_vbo_set_idxbuf(struct nvfx_context *nvfx, struct pipe_buffer *ib,
+nvfx_vbo_set_idxbuf(struct nvfx_context *nvfx, struct pipe_resource *ib,
                    unsigned ib_size)
 {
        struct pipe_screen *pscreen = &nvfx->screen->base.base;
@@ -88,7 +90,7 @@ nvfx_vbo_set_idxbuf(struct nvfx_context *nvfx, struct pipe_buffer *ib,
                return FALSE;
        }
 
-       if (!pscreen->get_param(pscreen, NOUVEAU_CAP_HW_IDXBUF) || ib_size == 1)
+       if (!nvfx->screen->index_buffer_reloc_flags || ib_size == 1)
                return FALSE;
 
        switch (ib_size) {
@@ -112,63 +114,47 @@ nvfx_vbo_set_idxbuf(struct nvfx_context *nvfx, struct pipe_buffer *ib,
        return TRUE;
 }
 
-static boolean
-nvfx_vbo_static_attrib(struct nvfx_context *nvfx, struct nouveau_stateobj *so,
+// type must be floating point
+static inline void
+nvfx_vbo_static_attrib(struct nvfx_context *nvfx,
                       int attrib, struct pipe_vertex_element *ve,
-                      struct pipe_vertex_buffer *vb)
+                      struct pipe_vertex_buffer *vb, unsigned ncomp)
 {
-       struct pipe_screen *pscreen = nvfx->pipe.screen;
-       struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
-       unsigned type, ncomp;
+       struct pipe_transfer *transfer;
+       struct nouveau_channel* chan = nvfx->screen->base.channel;
        void *map;
 
-       if (nvfx_vbo_format_to_hw(ve->src_format, &type, &ncomp))
-               return FALSE;
-
-       map  = pipe_buffer_map(pscreen, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ);
+       map  = pipe_buffer_map(&nvfx->pipe, vb->buffer, PIPE_TRANSFER_READ, &transfer);
        map += vb->buffer_offset + ve->src_offset;
 
-       switch (type) {
-       case NV34TCL_VTXFMT_TYPE_FLOAT:
-       {
-               float *v = map;
-
-               switch (ncomp) {
-               case 4:
-                       so_method(so, eng3d, NV34TCL_VTX_ATTR_4F_X(attrib), 4);
-                       so_data  (so, fui(v[0]));
-                       so_data  (so, fui(v[1]));
-                       so_data  (so, fui(v[2]));
-                       so_data  (so, fui(v[3]));
-                       break;
-               case 3:
-                       so_method(so, eng3d, NV34TCL_VTX_ATTR_3F_X(attrib), 3);
-                       so_data  (so, fui(v[0]));
-                       so_data  (so, fui(v[1]));
-                       so_data  (so, fui(v[2]));
-                       break;
-               case 2:
-                       so_method(so, eng3d, NV34TCL_VTX_ATTR_2F_X(attrib), 2);
-                       so_data  (so, fui(v[0]));
-                       so_data  (so, fui(v[1]));
-                       break;
-               case 1:
-                       so_method(so, eng3d, NV34TCL_VTX_ATTR_1F(attrib), 1);
-                       so_data  (so, fui(v[0]));
-                       break;
-               default:
-                       pipe_buffer_unmap(pscreen, vb->buffer);
-                       return FALSE;
-               }
-       }
+       float *v = map;
+
+       switch (ncomp) {
+       case 4:
+               OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_4F_X(attrib), 4));
+               OUT_RING(chan, fui(v[0]));
+               OUT_RING(chan, fui(v[1]));
+               OUT_RING(chan,  fui(v[2]));
+               OUT_RING(chan,  fui(v[3]));
+               break;
+       case 3:
+               OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_3F_X(attrib), 3));
+               OUT_RING(chan,  fui(v[0]));
+               OUT_RING(chan,  fui(v[1]));
+               OUT_RING(chan,  fui(v[2]));
+               break;
+       case 2:
+               OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_2F_X(attrib), 2));
+               OUT_RING(chan,  fui(v[0]));
+               OUT_RING(chan,  fui(v[1]));
+               break;
+       case 1:
+               OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_1F(attrib), 1));
+               OUT_RING(chan,  fui(v[0]));
                break;
-       default:
-               pipe_buffer_unmap(pscreen, vb->buffer);
-               return FALSE;
        }
 
-       pipe_buffer_unmap(pscreen, vb->buffer);
-       return TRUE;
+       pipe_buffer_unmap(&nvfx->pipe, vb->buffer, transfer);
 }
 
 void
@@ -178,11 +164,10 @@ nvfx_draw_arrays(struct pipe_context *pipe,
        struct nvfx_context *nvfx = nvfx_context(pipe);
        struct nvfx_screen *screen = nvfx->screen;
        struct nouveau_channel *chan = screen->base.channel;
-       struct nouveau_grobj *eng3d = screen->eng3d;
        unsigned restart = 0;
 
        nvfx_vbo_set_idxbuf(nvfx, NULL, 0);
-       if (nvfx_force_swtnl(nvfx) || !nvfx_state_validate(nvfx)) {
+       if (nvfx->screen->force_swtnl || !nvfx_state_validate(nvfx)) {
                nvfx_draw_elements_swtnl(pipe, NULL, 0,
                                            mode, start, count);
                 return;
@@ -193,19 +178,22 @@ nvfx_draw_arrays(struct pipe_context *pipe,
 
                nvfx_state_emit(nvfx);
 
-               vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 256,
+               unsigned avail = AVAIL_RING(chan);
+               avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */
+
+               vc = nouveau_vbuf_split(avail, 6, 256,
                                        mode, start, count, &restart);
                if (!vc) {
                        FIRE_RING(chan);
                        continue;
                }
 
-               BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1);
+               OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
                OUT_RING  (chan, nvgl_primitive(mode));
 
                nr = (vc & 0xff);
                if (nr) {
-                       BEGIN_RING(chan, eng3d, NV34TCL_VB_VERTEX_BATCH, 1);
+                       OUT_RING(chan, RING_3D(NV34TCL_VB_VERTEX_BATCH, 1));
                        OUT_RING  (chan, ((nr - 1) << 24) | start);
                        start += nr;
                }
@@ -216,14 +204,14 @@ nvfx_draw_arrays(struct pipe_context *pipe,
 
                        nr -= push;
 
-                       BEGIN_RING_NI(chan, eng3d, NV34TCL_VB_VERTEX_BATCH, push);
+                       OUT_RING(chan, RING_3D_NI(NV34TCL_VB_VERTEX_BATCH, push));
                        while (push--) {
                                OUT_RING(chan, ((0x100 - 1) << 24) | start);
                                start += 0x100;
                        }
                }
 
-               BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1);
+               OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
                OUT_RING  (chan, 0);
 
                count -= vc;
@@ -239,7 +227,6 @@ nvfx_draw_elements_u08(struct nvfx_context *nvfx, void *ib,
 {
        struct nvfx_screen *screen = nvfx->screen;
        struct nouveau_channel *chan = screen->base.channel;
-       struct nouveau_grobj *eng3d = screen->eng3d;
 
        while (count) {
                uint8_t *elts = (uint8_t *)ib + start;
@@ -247,7 +234,10 @@ nvfx_draw_elements_u08(struct nvfx_context *nvfx, void *ib,
 
                nvfx_state_emit(nvfx);
 
-               vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 2,
+               unsigned avail = AVAIL_RING(chan);
+               avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */
+
+               vc = nouveau_vbuf_split(avail, 6, 2,
                                        mode, start, count, &restart);
                if (vc == 0) {
                        FIRE_RING(chan);
@@ -255,11 +245,11 @@ nvfx_draw_elements_u08(struct nvfx_context *nvfx, void *ib,
                }
                count -= vc;
 
-               BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1);
+               OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
                OUT_RING  (chan, nvgl_primitive(mode));
 
                if (vc & 1) {
-                       BEGIN_RING(chan, eng3d, NV34TCL_VB_ELEMENT_U32, 1);
+                       OUT_RING(chan, RING_3D(NV34TCL_VB_ELEMENT_U32, 1));
                        OUT_RING  (chan, elts[0]);
                        elts++; vc--;
                }
@@ -269,7 +259,7 @@ nvfx_draw_elements_u08(struct nvfx_context *nvfx, void *ib,
 
                        push = MIN2(vc, 2047 * 2);
 
-                       BEGIN_RING_NI(chan, eng3d, NV34TCL_VB_ELEMENT_U16, push >> 1);
+                       OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U16, push >> 1));
                        for (i = 0; i < push; i+=2)
                                OUT_RING(chan, (elts[i+1] << 16) | elts[i]);
 
@@ -277,7 +267,7 @@ nvfx_draw_elements_u08(struct nvfx_context *nvfx, void *ib,
                        elts += push;
                }
 
-               BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1);
+               OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
                OUT_RING  (chan, 0);
 
                start = restart;
@@ -290,7 +280,6 @@ nvfx_draw_elements_u16(struct nvfx_context *nvfx, void *ib,
 {
        struct nvfx_screen *screen = nvfx->screen;
        struct nouveau_channel *chan = screen->base.channel;
-       struct nouveau_grobj *eng3d = screen->eng3d;
 
        while (count) {
                uint16_t *elts = (uint16_t *)ib + start;
@@ -298,7 +287,10 @@ nvfx_draw_elements_u16(struct nvfx_context *nvfx, void *ib,
 
                nvfx_state_emit(nvfx);
 
-               vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 2,
+               unsigned avail = AVAIL_RING(chan);
+               avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */
+
+               vc = nouveau_vbuf_split(avail, 6, 2,
                                        mode, start, count, &restart);
                if (vc == 0) {
                        FIRE_RING(chan);
@@ -306,11 +298,11 @@ nvfx_draw_elements_u16(struct nvfx_context *nvfx, void *ib,
                }
                count -= vc;
 
-               BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1);
+               OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
                OUT_RING  (chan, nvgl_primitive(mode));
 
                if (vc & 1) {
-                       BEGIN_RING(chan, eng3d, NV34TCL_VB_ELEMENT_U32, 1);
+                       OUT_RING(chan, RING_3D(NV34TCL_VB_ELEMENT_U32, 1));
                        OUT_RING  (chan, elts[0]);
                        elts++; vc--;
                }
@@ -320,7 +312,7 @@ nvfx_draw_elements_u16(struct nvfx_context *nvfx, void *ib,
 
                        push = MIN2(vc, 2047 * 2);
 
-                       BEGIN_RING_NI(chan, eng3d, NV34TCL_VB_ELEMENT_U16, push >> 1);
+                       OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U16, push >> 1));
                        for (i = 0; i < push; i+=2)
                                OUT_RING(chan, (elts[i+1] << 16) | elts[i]);
 
@@ -328,7 +320,7 @@ nvfx_draw_elements_u16(struct nvfx_context *nvfx, void *ib,
                        elts += push;
                }
 
-               BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1);
+               OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
                OUT_RING  (chan, 0);
 
                start = restart;
@@ -341,7 +333,6 @@ nvfx_draw_elements_u32(struct nvfx_context *nvfx, void *ib,
 {
        struct nvfx_screen *screen = nvfx->screen;
        struct nouveau_channel *chan = screen->base.channel;
-       struct nouveau_grobj *eng3d = screen->eng3d;
 
        while (count) {
                uint32_t *elts = (uint32_t *)ib + start;
@@ -349,7 +340,10 @@ nvfx_draw_elements_u32(struct nvfx_context *nvfx, void *ib,
 
                nvfx_state_emit(nvfx);
 
-               vc = nouveau_vbuf_split(AVAIL_RING(chan), 5, 1,
+               unsigned avail = AVAIL_RING(chan);
+               avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */
+
+               vc = nouveau_vbuf_split(avail, 5, 1,
                                        mode, start, count, &restart);
                if (vc == 0) {
                        FIRE_RING(chan);
@@ -357,20 +351,20 @@ nvfx_draw_elements_u32(struct nvfx_context *nvfx, void *ib,
                }
                count -= vc;
 
-               BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1);
+               OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
                OUT_RING  (chan, nvgl_primitive(mode));
 
                while (vc) {
                        push = MIN2(vc, 2047);
 
-                       BEGIN_RING_NI(chan, eng3d, NV34TCL_VB_ELEMENT_U32, push);
+                       OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U32, push));
                        OUT_RINGp    (chan, elts, push);
 
                        vc -= push;
                        elts += push;
                }
 
-               BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1);
+               OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
                OUT_RING  (chan, 0);
 
                start = restart;
@@ -379,14 +373,14 @@ nvfx_draw_elements_u32(struct nvfx_context *nvfx, void *ib,
 
 static void
 nvfx_draw_elements_inline(struct pipe_context *pipe,
-                         struct pipe_buffer *ib, unsigned ib_size,
+                         struct pipe_resource *ib, unsigned ib_size,
                          unsigned mode, unsigned start, unsigned count)
 {
        struct nvfx_context *nvfx = nvfx_context(pipe);
-       struct pipe_screen *pscreen = pipe->screen;
+       struct pipe_transfer *transfer;
        void *map;
 
-       map = pipe_buffer_map(pscreen, ib, PIPE_BUFFER_USAGE_CPU_READ);
+       map = pipe_buffer_map(pipe, ib, PIPE_TRANSFER_READ, &transfer);
        if (!ib) {
                NOUVEAU_ERR("failed mapping ib\n");
                return;
@@ -407,7 +401,7 @@ nvfx_draw_elements_inline(struct pipe_context *pipe,
                break;
        }
 
-       pipe_buffer_unmap(pscreen, ib);
+       pipe_buffer_unmap(pipe, ib, transfer);
 }
 
 static void
@@ -417,7 +411,6 @@ nvfx_draw_elements_vbo(struct pipe_context *pipe,
        struct nvfx_context *nvfx = nvfx_context(pipe);
        struct nvfx_screen *screen = nvfx->screen;
        struct nouveau_channel *chan = screen->base.channel;
-       struct nouveau_grobj *eng3d = screen->eng3d;
        unsigned restart = 0;
 
        while (count) {
@@ -425,19 +418,22 @@ nvfx_draw_elements_vbo(struct pipe_context *pipe,
 
                nvfx_state_emit(nvfx);
 
-               vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 256,
+               unsigned avail = AVAIL_RING(chan);
+               avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */
+
+               vc = nouveau_vbuf_split(avail, 6, 256,
                                        mode, start, count, &restart);
                if (!vc) {
                        FIRE_RING(chan);
                        continue;
                }
 
-               BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1);
+               OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
                OUT_RING  (chan, nvgl_primitive(mode));
 
                nr = (vc & 0xff);
                if (nr) {
-                       BEGIN_RING(chan, eng3d, NV34TCL_VB_INDEX_BATCH, 1);
+                       OUT_RING(chan, RING_3D(NV34TCL_VB_INDEX_BATCH, 1));
                        OUT_RING  (chan, ((nr - 1) << 24) | start);
                        start += nr;
                }
@@ -448,14 +444,14 @@ nvfx_draw_elements_vbo(struct pipe_context *pipe,
 
                        nr -= push;
 
-                       BEGIN_RING_NI(chan, eng3d, NV34TCL_VB_INDEX_BATCH, push);
+                       OUT_RING(chan, RING_3D_NI(NV34TCL_VB_INDEX_BATCH, push));
                        while (push--) {
                                OUT_RING(chan, ((0x100 - 1) << 24) | start);
                                start += 0x100;
                        }
                }
 
-               BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1);
+               OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));
                OUT_RING  (chan, 0);
 
                count -= vc;
@@ -465,14 +461,14 @@ nvfx_draw_elements_vbo(struct pipe_context *pipe,
 
 void
 nvfx_draw_elements(struct pipe_context *pipe,
-                  struct pipe_buffer *indexBuffer, unsigned indexSize,
+                  struct pipe_resource *indexBuffer, unsigned indexSize,
                   unsigned mode, unsigned start, unsigned count)
 {
        struct nvfx_context *nvfx = nvfx_context(pipe);
        boolean idxbuf;
 
        idxbuf = nvfx_vbo_set_idxbuf(nvfx, indexBuffer, indexSize);
-       if (nvfx_force_swtnl(nvfx) || !nvfx_state_validate(nvfx)) {
+       if (nvfx->screen->force_swtnl || !nvfx_state_validate(nvfx)) {
                nvfx_draw_elements_swtnl(pipe, indexBuffer, indexSize,
                                            mode, start, count);
                return;
@@ -488,83 +484,138 @@ nvfx_draw_elements(struct pipe_context *pipe,
        pipe->flush(pipe, 0, NULL);
 }
 
-static boolean
+boolean
 nvfx_vbo_validate(struct nvfx_context *nvfx)
 {
-       struct nouveau_stateobj *vtxbuf, *vtxfmt, *sattr = NULL;
-       struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
-       struct pipe_buffer *ib = nvfx->idxbuf;
+       struct nouveau_channel* chan = nvfx->screen->base.channel;
+       struct pipe_resource *ib = nvfx->idxbuf;
        unsigned ib_format = nvfx->idxbuf_format;
-       unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
-       int hw;
+       int i;
+       int elements = MAX2(nvfx->vtxelt->num_elements, nvfx->hw_vtxelt_nr);
+       uint32_t vtxfmt[16];
+       unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD;
+
+       if (!elements)
+               return TRUE;
 
-       vtxbuf = so_new(3, 17, 18);
-       so_method(vtxbuf, eng3d, NV34TCL_VTXBUF_ADDRESS(0), nvfx->vtxelt->num_elements);
-       vtxfmt = so_new(1, 16, 0);
-       so_method(vtxfmt, eng3d, NV34TCL_VTXFMT(0), nvfx->vtxelt->num_elements);
+       nvfx->vbo_bo = 0;
 
-       for (hw = 0; hw < nvfx->vtxelt->num_elements; hw++) {
+       MARK_RING(chan, (5 + 2) * 16 + 2 + 11, 16 + 2);
+       for (i = 0; i < nvfx->vtxelt->num_elements; i++) {
                struct pipe_vertex_element *ve;
                struct pipe_vertex_buffer *vb;
                unsigned type, ncomp;
 
-               ve = &nvfx->vtxelt->pipe[hw];
+               ve = &nvfx->vtxelt->pipe[i];
                vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
 
-               if (!vb->stride) {
-                       if (!sattr)
-                               sattr = so_new(16, 16 * 4, 0);
-
-                       if (nvfx_vbo_static_attrib(nvfx, sattr, hw, ve, vb)) {
-                               so_data(vtxbuf, 0);
-                               so_data(vtxfmt, NV34TCL_VTXFMT_TYPE_FLOAT);
-                               continue;
-                       }
-               }
-
                if (nvfx_vbo_format_to_hw(ve->src_format, &type, &ncomp)) {
+                       MARK_UNDO(chan);
                        nvfx->fallback_swtnl |= NVFX_NEW_ARRAYS;
-                       so_ref(NULL, &vtxbuf);
-                       so_ref(NULL, &vtxfmt);
                        return FALSE;
                }
 
-               so_reloc(vtxbuf, nouveau_bo(vb->buffer),
+               if (!vb->stride && type == NV34TCL_VTXFMT_TYPE_FLOAT) {
+                       nvfx_vbo_static_attrib(nvfx, i, ve, vb, ncomp);
+                       vtxfmt[i] = type;
+               } else {
+                       vtxfmt[i] = ((vb->stride << NV34TCL_VTXFMT_STRIDE_SHIFT) |
+                               (ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | type);
+                       nvfx->vbo_bo |= (1 << i);
+               }
+       }
+
+       for(; i < elements; ++i)
+               vtxfmt[i] = NV34TCL_VTXFMT_TYPE_FLOAT;
+
+       OUT_RING(chan, RING_3D(NV34TCL_VTXFMT(0), elements));
+       OUT_RINGp(chan, vtxfmt, elements);
+
+       if(nvfx->is_nv4x) {
+               unsigned i;
+               /* seems to be some kind of cache flushing */
+               for(i = 0; i < 3; ++i) {
+                       OUT_RING(chan, RING_3D(0x1718, 1));
+                       OUT_RING(chan, 0);
+               }
+       }
+
+       OUT_RING(chan, RING_3D(NV34TCL_VTXBUF_ADDRESS(0), elements));
+       for (i = 0; i < nvfx->vtxelt->num_elements; i++) {
+               struct pipe_vertex_element *ve;
+               struct pipe_vertex_buffer *vb;
+
+               ve = &nvfx->vtxelt->pipe[i];
+               vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
+
+               if (!(nvfx->vbo_bo & (1 << i)))
+                       OUT_RING(chan, 0);
+               else
+               {
+                       struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo;
+                       OUT_RELOC(chan, bo,
                                 vb->buffer_offset + ve->src_offset,
                                 vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
                                 0, NV34TCL_VTXBUF_ADDRESS_DMA1);
-               so_data (vtxfmt, ((vb->stride << NV34TCL_VTXFMT_STRIDE_SHIFT) |
-                                 (ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | type));
+               }
        }
 
+        for (; i < elements; i++)
+               OUT_RING(chan, 0);
+
+       OUT_RING(chan, RING_3D(0x1710, 1));
+       OUT_RING(chan, 0);
+
        if (ib) {
-               struct nouveau_bo *bo = nouveau_bo(ib);
+               unsigned ib_flags = nvfx->screen->index_buffer_reloc_flags | NOUVEAU_BO_RD;
+               struct nouveau_bo* bo = nvfx_resource(ib)->bo;
+
+               assert(nvfx->screen->index_buffer_reloc_flags);
 
-               so_method(vtxbuf, eng3d, NV34TCL_IDXBUF_ADDRESS, 2);
-               so_reloc (vtxbuf, bo, 0, vb_flags | NOUVEAU_BO_LOW, 0, 0);
-               so_reloc (vtxbuf, bo, ib_format, vb_flags | NOUVEAU_BO_OR,
+               OUT_RING(chan, RING_3D(NV34TCL_IDXBUF_ADDRESS, 2));
+               OUT_RELOC(chan, bo, 0, ib_flags | NOUVEAU_BO_LOW, 0, 0);
+               OUT_RELOC(chan, bo, ib_format, ib_flags | NOUVEAU_BO_OR,
                                  0, NV34TCL_IDXBUF_FORMAT_DMA1);
        }
 
-       so_method(vtxbuf, eng3d, 0x1710, 1);
-       so_data  (vtxbuf, 0);
-
-       so_ref(vtxbuf, &nvfx->state.hw[NVFX_STATE_VTXBUF]);
-       so_ref(NULL, &vtxbuf);
-       nvfx->state.dirty |= (1ULL << NVFX_STATE_VTXBUF);
-       so_ref(vtxfmt, &nvfx->state.hw[NVFX_STATE_VTXFMT]);
-       so_ref(NULL, &vtxfmt);
-       nvfx->state.dirty |= (1ULL << NVFX_STATE_VTXFMT);
-       so_ref(sattr, &nvfx->state.hw[NVFX_STATE_VTXATTR]);
-       so_ref(NULL, &sattr);
-       nvfx->state.dirty |= (1ULL << NVFX_STATE_VTXATTR);
-       return FALSE;
+       nvfx->hw_vtxelt_nr = nvfx->vtxelt->num_elements;
+       return TRUE;
 }
 
-struct nvfx_state_entry nvfx_state_vbo = {
-       .validate = nvfx_vbo_validate,
-       .dirty = {
-               .pipe = NVFX_NEW_ARRAYS,
-               .hw = 0,
+void
+nvfx_vbo_relocate(struct nvfx_context *nvfx)
+{
+       struct nouveau_channel* chan = nvfx->screen->base.channel;
+       unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY;
+       int i;
+
+       MARK_RING(chan, 2 * 16 + 3, 2 * 16 + 3);
+       for(i = 0; i < nvfx->vtxelt->num_elements; ++i) {
+               if(nvfx->vbo_bo & (1 << i)) {
+                       struct pipe_vertex_element *ve = &nvfx->vtxelt->pipe[i];
+                       struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index];
+                       struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo;
+                       OUT_RELOC(chan, bo, RING_3D(NV34TCL_VTXBUF_ADDRESS(i), 1),
+                                       vb_flags, 0, 0);
+                       OUT_RELOC(chan, bo, vb->buffer_offset + ve->src_offset,
+                                       vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
+                                       0, NV34TCL_VTXBUF_ADDRESS_DMA1);
+               }
        }
-};
+
+       if(nvfx->idxbuf)
+       {
+               unsigned ib_flags = nvfx->screen->index_buffer_reloc_flags | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY;
+               struct nouveau_bo* bo = nvfx_resource(nvfx->idxbuf)->bo;
+
+               assert(nvfx->screen->index_buffer_reloc_flags);
+
+               OUT_RELOC(chan, bo, RING_3D(NV34TCL_IDXBUF_ADDRESS, 2),
+                               ib_flags, 0, 0);
+               OUT_RELOC(chan, bo, 0,
+                               ib_flags | NOUVEAU_BO_LOW, 0, 0);
+               OUT_RELOC(chan, bo, nvfx->idxbuf_format,
+                               ib_flags | NOUVEAU_BO_OR,
+                               0, NV34TCL_IDXBUF_FORMAT_DMA1);
+       }
+}