nvc0: expose 4 vertex streams, use stream ids in xfb

[mesa.git] / src / gallium / drivers / nouveau / nv50 / nv50_vbo.c
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c

index c6162b5f415f66c60e72d25c556aa34ca768cc44..7c2b7ff1049007a4ee4b61e36c16c0d6d11320d1 100644 (file)
--- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
@@ -189,9 +189,10 @@ nv50_emit_vtxattr(struct nv50_context *nv50, struct pipe_vertex_buffer *vb,
  }
  
  static INLINE void
-nv50_user_vbuf_range(struct nv50_context *nv50, int vbi,
+nv50_user_vbuf_range(struct nv50_context *nv50, unsigned vbi,
                       uint32_t *base, uint32_t *size)
  {
+   assert(vbi < PIPE_MAX_ATTRIBS);
     if (unlikely(nv50->vertex->instance_bufs & (1 << vbi))) {
        /* TODO: use min and max instance divisor to get a proper range */
        *base = 0;
@@ -211,6 +212,7 @@ nv50_upload_user_buffers(struct nv50_context *nv50,
  {
     unsigned b;
  
+   assert(nv50->num_vtxbufs <= PIPE_MAX_ATTRIBS);
     for (b = 0; b < nv50->num_vtxbufs; ++b) {
        struct nouveau_bo *bo;
        const struct pipe_vertex_buffer *vb = &nv50->vtxbuf[b];
@@ -241,9 +243,12 @@ nv50_update_user_vbufs(struct nv50_context *nv50)
     for (i = 0; i < nv50->vertex->num_elements; ++i) {
        struct pipe_vertex_element *ve = &nv50->vertex->element[i].pipe;
        const unsigned b = ve->vertex_buffer_index;
-      struct pipe_vertex_buffer *vb = &nv50->vtxbuf[b];
+      struct pipe_vertex_buffer *vb;
        uint32_t base, size;
  
+      assert(b < PIPE_MAX_ATTRIBS);
+      vb = &nv50->vtxbuf[b];
+
        if (!(nv50->vbo_user & (1 << b)))
           continue;
  
@@ -306,6 +311,7 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50)
  
     if (!nv50->vbo_fifo) {
        /* if vertex buffer was written by GPU - flush VBO cache */
+      assert(nv50->num_vtxbufs <= PIPE_MAX_ATTRIBS);
        for (i = 0; i < nv50->num_vtxbufs; ++i) {
           struct nv04_resource *buf = nv04_resource(nv50->vtxbuf[i].buffer);
           if (buf && buf->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
@@ -332,6 +338,8 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50)
     }
     for (i = 0; i < vertex->num_elements; ++i) {
        const unsigned b = vertex->element[i].pipe.vertex_buffer_index;
+
+      assert(b < PIPE_MAX_ATTRIBS);
        ve = &vertex->element[i];
        vb = &nv50->vtxbuf[b];
  
@@ -360,6 +368,8 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50)
     for (i = 0; i < vertex->num_elements; ++i) {
        uint64_t address, limit;
        const unsigned b = vertex->element[i].pipe.vertex_buffer_index;
+
+      assert(b < PIPE_MAX_ATTRIBS);
        ve = &vertex->element[i];
        vb = &nv50->vtxbuf[b];
  
@@ -597,6 +607,15 @@ nv50_draw_elements(struct nv50_context *nv50, boolean shorten,
  
        assert(nouveau_resource_mapped_by_gpu(nv50->idxbuf.buffer));
  
+      /* This shouldn't have to be here. The going theory is that the buffer
+       * is being filled in by PGRAPH, and it's not done yet by the time it
+       * gets submitted to PFIFO, which in turn starts immediately prefetching
+       * the not-yet-written data. Ideally this wait would only happen on
+       * pushbuf submit, but it's probably not a big performance difference.
+       */
+      if (buf->fence_wr && !nouveau_fence_signalled(buf->fence_wr))
+         nouveau_fence_wait(buf->fence_wr);
+
        while (instance_count--) {
           BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1);
           PUSH_DATA (push, prim);
@@ -728,6 +747,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
  {
     struct nv50_context *nv50 = nv50_context(pipe);
     struct nouveau_pushbuf *push = nv50->base.pushbuf;
+   int i;
  
     /* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */
     nv50->vb_elt_first = info->min_index + info->index_bias;
@@ -770,6 +790,17 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
        PUSH_DATA (push, info->start_instance);
     }
  
+   for (i = 0; i < nv50->num_vtxbufs && !nv50->base.vbo_dirty; ++i) {
+      if (!nv50->vtxbuf[i].buffer)
+         continue;
+      if (nv50->vtxbuf[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
+         nv50->base.vbo_dirty = TRUE;
+   }
+
+   if (!nv50->base.vbo_dirty && nv50->idxbuf.buffer &&
+       nv50->idxbuf.buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
+      nv50->base.vbo_dirty = TRUE;
+
     if (nv50->base.vbo_dirty) {
        BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FLUSH), 1);
        PUSH_DATA (push, 0);