Merge branch 'upstream-gallium-0.1' into nouveau-gallium-0.1
[mesa.git] / src / mesa / pipe / nv40 / nv40_vbo.c
index 2da0001b2886d2b87fb6fe5361223a538a656e9c..6b1ac65b49e7040dd7c8d640a9323c9426c38d82 100644 (file)
 #include "pipe/p_util.h"
 
 #include "nv40_context.h"
-#include "nv40_dma.h"
 #include "nv40_state.h"
-#include "nvgl_pipe.h"
+
+#include "pipe/nouveau/nouveau_channel.h"
+#include "pipe/nouveau/nouveau_pushbuf.h"
+
+static INLINE int
+nv40_vbo_ncomp(uint format)
+{
+       int ncomp = 0;
+
+       if (pf_size_x(format)) ncomp++;
+       if (pf_size_y(format)) ncomp++;
+       if (pf_size_z(format)) ncomp++;
+       if (pf_size_w(format)) ncomp++;
+
+       return ncomp;
+}
+
+static INLINE int
+nv40_vbo_type(uint format)
+{
+       switch (pf_type(format)) {
+       case PIPE_FORMAT_TYPE_FLOAT:
+               return NV40TCL_VTXFMT_TYPE_FLOAT;
+       case PIPE_FORMAT_TYPE_UNORM:
+               return NV40TCL_VTXFMT_TYPE_UBYTE;
+       default:
+               NOUVEAU_ERR("Unknown format 0x%08x\n", format);
+               return NV40TCL_VTXFMT_TYPE_FLOAT;
+       }
+}
+
+static boolean
+nv40_vbo_static_attrib(struct nv40_context *nv40, int attrib,
+                      struct pipe_vertex_element *ve,
+                      struct pipe_vertex_buffer *vb)
+{
+       struct pipe_winsys *ws = nv40->pipe.winsys;
+       int type, ncomp;
+       void *map;
+
+       type = nv40_vbo_type(ve->src_format);
+       ncomp = nv40_vbo_ncomp(ve->src_format);
+
+       map  = ws->buffer_map(ws, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ);
+       map += vb->buffer_offset + ve->src_offset;
+
+       switch (type) {
+       case NV40TCL_VTXFMT_TYPE_FLOAT:
+       {
+               float *v = map;
+
+               BEGIN_RING(curie, NV40TCL_VTX_ATTR_4F_X(attrib), 4);
+               switch (ncomp) {
+               case 4:
+                       OUT_RINGf(v[0]);
+                       OUT_RINGf(v[1]);
+                       OUT_RINGf(v[2]);
+                       OUT_RINGf(v[3]);
+                       break;
+               case 3:
+                       OUT_RINGf(v[0]);
+                       OUT_RINGf(v[1]);
+                       OUT_RINGf(v[2]);
+                       OUT_RINGf(1.0);
+                       break;
+               case 2:
+                       OUT_RINGf(v[0]);
+                       OUT_RINGf(v[1]);
+                       OUT_RINGf(0.0);
+                       OUT_RINGf(1.0);
+                       break;
+               case 1:
+                       OUT_RINGf(v[0]);
+                       OUT_RINGf(0.0);
+                       OUT_RINGf(0.0);
+                       OUT_RINGf(1.0);
+                       break;
+               default:
+                       ws->buffer_unmap(ws, vb->buffer);
+                       return FALSE;
+               }
+       }
+               break;
+       default:
+               ws->buffer_unmap(ws, vb->buffer);
+               return FALSE;
+       }
+
+       ws->buffer_unmap(ws, vb->buffer);
+
+       return TRUE;
+}
+
+static void
+nv40_vbo_arrays_update(struct nv40_context *nv40, struct pipe_buffer *ib,
+                      unsigned ib_format)
+{
+       struct nv40_vertex_program *vp = nv40->vertprog.active;
+       struct nouveau_stateobj *vtxbuf, *vtxfmt;
+       unsigned inputs, hw, num_hw;
+       unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
+
+       inputs = vp->ir;
+       for (hw = 0; hw < 16 && inputs; hw++) {
+               if (inputs & (1 << hw)) {
+                       num_hw = hw;
+                       inputs &= ~(1 << hw);
+               }
+       }
+       num_hw++;
+
+       vtxbuf = so_new(20, 18);
+       so_method(vtxbuf, nv40->hw->curie, NV40TCL_VTXBUF_ADDRESS(0), num_hw);
+       vtxfmt = so_new(17, 0);
+       so_method(vtxfmt, nv40->hw->curie, NV40TCL_VTXFMT(0), num_hw);
+
+       inputs = vp->ir;
+       for (hw = 0; hw < num_hw; hw++) {
+               struct pipe_vertex_element *ve;
+               struct pipe_vertex_buffer *vb;
+
+               if (!(inputs & (1 << hw))) {
+                       so_data(vtxbuf, 0);
+                       so_data(vtxfmt, NV40TCL_VTXFMT_TYPE_FLOAT);
+                       continue;
+               }
+
+               ve = &nv40->vtxelt[hw];
+               vb = &nv40->vtxbuf[ve->vertex_buffer_index];
+
+               if (!vb->pitch && nv40_vbo_static_attrib(nv40, hw, ve, vb)) {
+                       so_data(vtxbuf, 0);
+                       so_data(vtxfmt, NV40TCL_VTXFMT_TYPE_FLOAT);
+                       continue;
+               }
+
+               so_reloc(vtxbuf, vb->buffer, vb->buffer_offset + ve->src_offset,
+                        vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
+                        0, NV40TCL_VTXBUF_ADDRESS_DMA1);
+               so_data (vtxfmt, ((vb->pitch << NV40TCL_VTXFMT_STRIDE_SHIFT) |
+                                 (nv40_vbo_ncomp(ve->src_format) <<
+                                  NV40TCL_VTXFMT_SIZE_SHIFT) |
+                                 nv40_vbo_type(ve->src_format)));
+       }
+
+       if (ib) {
+               so_method(vtxbuf, nv40->hw->curie, NV40TCL_IDXBUF_ADDRESS, 2);
+               so_reloc (vtxbuf, ib, 0, vb_flags | NOUVEAU_BO_LOW, 0, 0);
+               so_reloc (vtxbuf, ib, ib_format, vb_flags | NOUVEAU_BO_OR,
+                         0, NV40TCL_IDXBUF_FORMAT_DMA1);
+       }
+
+       so_emit(nv40->nvws, vtxfmt);
+       so_emit(nv40->nvws, vtxbuf);
+       so_ref (vtxbuf, &nv40->so_vtxbuf);
+       so_ref (NULL, &vtxbuf);
+       so_ref (NULL, &vtxfmt);
+}
+
+static boolean
+nv40_vbo_validate_state(struct nv40_context *nv40,
+                       struct pipe_buffer *ib, unsigned ib_format)
+{
+       unsigned vdn = nv40->dirty & NV40_NEW_ARRAYS;
+
+       nv40_emit_hw_state(nv40);
+       if (vdn || ib) {
+               nv40_vbo_arrays_update(nv40, ib, ib_format);
+               nv40->dirty &= ~NV40_NEW_ARRAYS;
+       }
+
+       so_emit_reloc_markers(nv40->nvws, nv40->so_vtxbuf);
+
+       BEGIN_RING(curie, 0x1710, 1);
+       OUT_RING  (0); /* vtx cache flush */
+
+       return TRUE;
+}
 
 boolean
 nv40_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start,
                 unsigned count)
 {
-       struct nv40_context *nv40 = (struct nv40_context *)pipe;
+       struct nv40_context *nv40 = nv40_context(pipe);
        unsigned nr;
+       boolean ret;
 
-       if (nv40->dirty)
-               nv40_emit_hw_state(nv40);
+       ret = nv40_vbo_validate_state(nv40, NULL, 0);
+       if (!ret) {
+               NOUVEAU_ERR("state validate failed\n");
+               return FALSE;
+       }
 
        BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
        OUT_RING  (nvgl_primitive(mode));
@@ -27,13 +207,14 @@ nv40_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start,
                start += nr;
        }
 
-       /*XXX: large arrays (nr>2047) will blow up */
        nr = count >> 8;
-       if (nr) {
-               assert (nr <= 2047);
+       while (nr) {
+               unsigned push = nr > 2047 ? 2047 : nr;
+
+               nr -= push;
 
-               BEGIN_RING_NI(curie, NV40TCL_VB_VERTEX_BATCH, nr);
-               while (nr--) {
+               BEGIN_RING_NI(curie, NV40TCL_VB_VERTEX_BATCH, push);
+               while (push--) {
                        OUT_RING(((0x100 - 1) << 24) | start);
                        start += 0x100;
                }
@@ -42,7 +223,7 @@ nv40_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start,
        BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
        OUT_RING  (0);
 
-       pipe->flush(pipe, PIPE_FLUSH_WAIT);
+       pipe->flush(pipe, 0);
        return TRUE;
 }
 
@@ -60,9 +241,9 @@ nv40_draw_elements_u08(struct nv40_context *nv40, void *ib,
        }
 
        while (count) {
-               push = MIN2(count, 2046);
+               push = MIN2(count, 2047 * 2);
 
-               BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push);
+               BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push >> 1);
                for (i = 0; i < push; i+=2)
                        OUT_RING((elts[i+1] << 16) | elts[i]);
 
@@ -85,9 +266,9 @@ nv40_draw_elements_u16(struct nv40_context *nv40, void *ib,
        }
 
        while (count) {
-               push = MIN2(count, 2046);
+               push = MIN2(count, 2047 * 2);
 
-               BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push);
+               BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push >> 1);
                for (i = 0; i < push; i+=2)
                        OUT_RING((elts[i+1] << 16) | elts[i]);
 
@@ -114,109 +295,130 @@ nv40_draw_elements_u32(struct nv40_context *nv40, void *ib,
        }
 }
 
-boolean
-nv40_draw_elements(struct pipe_context *pipe,
-                  struct pipe_buffer_handle *indexBuffer, unsigned indexSize,
-                  unsigned mode, unsigned start, unsigned count)
+static boolean
+nv40_draw_elements_inline(struct pipe_context *pipe,
+                         struct pipe_buffer *ib, unsigned ib_size,
+                         unsigned mode, unsigned start, unsigned count)
 {
-       struct nv40_context *nv40 = (struct nv40_context *)pipe;
-       void *ib;
-
-       if (nv40->dirty)
-               nv40_emit_hw_state(nv40);
+       struct nv40_context *nv40 = nv40_context(pipe);
+       struct pipe_winsys *ws = pipe->winsys;
+       boolean ret;
+       void *map;
+
+       ret = nv40_vbo_validate_state(nv40, NULL, 0);
+       if (!ret) {
+               NOUVEAU_ERR("state validate failed\n");
+               return FALSE;
+       }
 
-       ib = pipe->winsys->buffer_map(pipe->winsys, indexBuffer,
-                                     PIPE_BUFFER_FLAG_READ);
+       map = ws->buffer_map(ws, ib, PIPE_BUFFER_USAGE_CPU_READ);
        if (!ib) {
-               NOUVEAU_ERR("Couldn't map index buffer!!\n");
+               NOUVEAU_ERR("failed mapping ib\n");
                return FALSE;
        }
 
        BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
        OUT_RING  (nvgl_primitive(mode));
 
-       switch (indexSize) {
+       switch (ib_size) {
        case 1:
-               nv40_draw_elements_u08(nv40, ib, start, count);
+               nv40_draw_elements_u08(nv40, map, start, count);
                break;
        case 2:
-               nv40_draw_elements_u16(nv40, ib, start, count);
+               nv40_draw_elements_u16(nv40, map, start, count);
                break;
        case 4:
-               nv40_draw_elements_u32(nv40, ib, start, count);
+               nv40_draw_elements_u32(nv40, map, start, count);
                break;
        default:
-               NOUVEAU_ERR("unsupported elt size %d\n", indexSize);
+               NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size);
                break;
        }
 
        BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
        OUT_RING  (0);
 
-       pipe->winsys->buffer_unmap(pipe->winsys, ib);
-       pipe->flush(pipe, PIPE_FLUSH_WAIT);
+       ws->buffer_unmap(ws, ib);
+
        return TRUE;
 }
 
-static INLINE int
-nv40_vbo_format_to_ncomp(uint format)
+static boolean
+nv40_draw_elements_vbo(struct pipe_context *pipe,
+                      struct pipe_buffer *ib, unsigned ib_size,
+                      unsigned mode, unsigned start, unsigned count)
 {
-       switch (format) {
-       case PIPE_FORMAT_R32G32B32A32_FLOAT: return 4;
-       case PIPE_FORMAT_R32G32B32_FLOAT: return 3;
-       case PIPE_FORMAT_R32G32_FLOAT: return 2;
-       case PIPE_FORMAT_R32_FLOAT: return 1;
+       struct nv40_context *nv40 = nv40_context(pipe);
+       unsigned nr, type;
+       boolean ret;
+
+       switch (ib_size) {
+       case 2:
+               type = NV40TCL_IDXBUF_FORMAT_TYPE_U16;
+               break;
+       case 4:
+               type = NV40TCL_IDXBUF_FORMAT_TYPE_U32;
+               break;
        default:
-              NOUVEAU_ERR("AII, unknown vbo format %d\n", format);
-              return 1;
+               NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size);
+               return FALSE;
        }
-}
 
-void
-nv40_vbo_arrays_update(struct nv40_context *nv40)
-{
-       struct nouveau_winsys *nvws = nv40->nvws;
-       struct nv40_vertex_program *vp = nv40->vertprog.active;
-       uint32_t inputs, vtxfmt[16];
-       int hw, num_hw;
+       ret = nv40_vbo_validate_state(nv40, ib, type);
+       if (!ret) {
+               NOUVEAU_ERR("failed state validation\n");
+               return FALSE;
+       }
 
-       inputs = vp->ir;
-       for (hw = 0; hw < 16 && inputs; hw++) {
-               if (inputs & (1 << hw)) {
-                       num_hw = hw;
-                       inputs &= ~(1 << hw);
-               }
+       BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+       OUT_RING  (nvgl_primitive(mode));
+
+       nr = (count & 0xff);
+       if (nr) {
+               BEGIN_RING(curie, NV40TCL_VB_INDEX_BATCH, 1);
+               OUT_RING  (((nr - 1) << 24) | start);
+               start += nr;
        }
-       num_hw++;
 
-       inputs = vp->ir;
-       BEGIN_RING(curie, NV40TCL_VTXBUF_ADDRESS(0), num_hw);
-       for (hw = 0; hw < num_hw; hw++) {
-               struct pipe_vertex_element *ve;
-               struct pipe_vertex_buffer *vb;
+       nr = count >> 8;
+       while (nr) {
+               unsigned push = nr > 2047 ? 2047 : nr;
 
-               if (!(inputs & (1 << hw))) {
-                       OUT_RING(0);
-                       vtxfmt[hw] = NV40TCL_VTXFMT_TYPE_FLOAT;
-                       continue;
+               nr -= push;
+
+               BEGIN_RING_NI(curie, NV40TCL_VB_INDEX_BATCH, push);
+               while (push--) {
+                       OUT_RING(((0x100 - 1) << 24) | start);
+                       start += 0x100;
                }
+       }
 
-               ve = &nv40->vtxelt[hw];
-               vb = &nv40->vtxbuf[ve->vertex_buffer_index];
+       BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+       OUT_RING  (0);
+
+       return TRUE;
+}
 
-               OUT_RELOC(vb->buffer, vb->buffer_offset + ve->src_offset,
-                         NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW |
-                         NOUVEAU_BO_OR | NOUVEAU_BO_RD, 0,
-                         NV40TCL_VTXBUF_ADDRESS_DMA1);
-               vtxfmt[hw] = ((vb->pitch << NV40TCL_VTXFMT_STRIDE_SHIFT) |
-                             (nv40_vbo_format_to_ncomp(ve->src_format) <<
-                              NV40TCL_VTXFMT_SIZE_SHIFT) |
-                             NV40TCL_VTXFMT_TYPE_FLOAT);
+boolean
+nv40_draw_elements(struct pipe_context *pipe,
+                  struct pipe_buffer *indexBuffer, unsigned indexSize,
+                  unsigned mode, unsigned start, unsigned count)
+{
+       struct nv40_context *nv40 = nv40_context(pipe);
+
+       /* 0x4497 doesn't support real index buffers, and there doesn't appear
+        * to be support on any chipset for 8-bit indices.
+        */
+       if (nv40->hw->curie->grclass == NV44TCL || indexSize == 1) {
+               nv40_draw_elements_inline(pipe, indexBuffer, indexSize,
+                                         mode, start, count);
+       } else {
+               nv40_draw_elements_vbo(pipe, indexBuffer, indexSize,
+                                      mode, start, count);
        }
 
-       BEGIN_RING(curie, 0x1710, 1);
-       OUT_RING  (0); /* vtx cache flush */
-       BEGIN_RING(curie, NV40TCL_VTXFMT(0), num_hw);
-       OUT_RINGp (vtxfmt, num_hw);
+       pipe->flush(pipe, 0);
+       return TRUE;
 }
 
+