gallium: add PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS

[mesa.git] / src / gallium / drivers / nouveau / nv50 / nv50_vbo.c
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c

index 5a4a4578d51e73a25f52d91101d4506a19f5a150..227038e95a5c68ee68763c3400bf13b8b9dbc38e 100644 (file)
--- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
@@ -27,6 +27,7 @@
  #include "translate/translate.h"
  
  #include "nv50/nv50_context.h"
+#include "nv50/nv50_query_hw.h"
  #include "nv50/nv50_resource.h"
  
  #include "nv50/nv50_3d.xml.h"
@@ -58,7 +59,7 @@ nv50_vertex_state_create(struct pipe_context *pipe,
      so->num_elements = num_elements;
      so->instance_elts = 0;
      so->instance_bufs = 0;
-    so->need_conversion = FALSE;
+    so->need_conversion = false;
  
      memset(so->vb_access_size, 0, sizeof(so->vb_access_size));
  
@@ -75,7 +76,7 @@ nv50_vertex_state_create(struct pipe_context *pipe,
          enum pipe_format fmt = ve->src_format;
  
          so->element[i].pipe = elements[i];
-        so->element[i].state = nv50_format_table[fmt].vtx;
+        so->element[i].state = nv50_vertex_format[fmt].vtx;
  
          if (!so->element[i].state) {
              switch (util_format_get_nr_components(fmt)) {
@@ -88,8 +89,11 @@ nv50_vertex_state_create(struct pipe_context *pipe,
                  FREE(so);
                  return NULL;
              }
-            so->element[i].state = nv50_format_table[fmt].vtx;
-            so->need_conversion = TRUE;
+            so->element[i].state = nv50_vertex_format[fmt].vtx;
+            so->need_conversion = true;
+            pipe_debug_message(&nouveau_context(pipe)->debug, FALLBACK,
+                               "Converting vertex element %d, no hw format %s",
+                               i, util_format_name(ve->src_format));
          }
          so->element[i].state |= i;
  
@@ -188,7 +192,7 @@ nv50_emit_vtxattr(struct nv50_context *nv50, struct pipe_vertex_buffer *vb,
     }
  }
  
-static INLINE void
+static inline void
  nv50_user_vbuf_range(struct nv50_context *nv50, unsigned vbi,
                       uint32_t *base, uint32_t *size)
  {
@@ -226,10 +230,10 @@ nv50_upload_user_buffers(struct nv50_context *nv50,
        addrs[b] = nouveau_scratch_data(&nv50->base, vb->user_buffer, base, size,
                                        &bo);
        if (addrs[b])
-         BCTX_REFN_bo(nv50->bufctx_3d, VERTEX_TMP, NOUVEAU_BO_GART |
+         BCTX_REFN_bo(nv50->bufctx_3d, 3D_VERTEX_TMP, NOUVEAU_BO_GART |
                        NOUVEAU_BO_RD, bo);
     }
-   nv50->base.vbo_dirty = TRUE;
+   nv50->base.vbo_dirty = true;
  }
  
  static void
@@ -265,7 +269,7 @@ nv50_update_user_vbufs(struct nv50_context *nv50)
           address[b] = nouveau_scratch_data(&nv50->base, vb->user_buffer,
                                             base, size, &bo);
           if (address[b])
-            BCTX_REFN_bo(nv50->bufctx_3d, VERTEX_TMP, bo_flags, bo);
+            BCTX_REFN_bo(nv50->bufctx_3d, 3D_VERTEX_TMP, bo_flags, bo);
        }
  
        BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2);
@@ -275,14 +279,14 @@ nv50_update_user_vbufs(struct nv50_context *nv50)
        PUSH_DATAh(push, address[b] + ve->src_offset);
        PUSH_DATA (push, address[b] + ve->src_offset);
     }
-   nv50->base.vbo_dirty = TRUE;
+   nv50->base.vbo_dirty = true;
  }
  
-static INLINE void
+static inline void
  nv50_release_user_vbufs(struct nv50_context *nv50)
  {
     if (nv50->vbo_user) {
-      nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_VERTEX_TMP);
+      nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_VERTEX_TMP);
        nouveau_scratch_done(&nv50->base);
     }
  }
@@ -316,8 +320,7 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50)
           struct nv04_resource *buf = nv04_resource(nv50->vtxbuf[i].buffer);
           if (buf && buf->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
              buf->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
-            nv50->base.vbo_dirty = TRUE;
-            break;
+            nv50->base.vbo_dirty = true;
           }
        }
     }
@@ -382,11 +385,16 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50)
        if (nv50->vbo_user & (1 << b)) {
           address = addrs[b] + ve->pipe.src_offset;
           limit = addrs[b] + limits[b];
+      } else
+      if (!vb->buffer) {
+         BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FETCH(i)), 1);
+         PUSH_DATA (push, 0);
+         continue;
        } else {
           struct nv04_resource *buf = nv04_resource(vb->buffer);
           if (!(refd & (1 << b))) {
              refd |= 1 << b;
-            BCTX_REFN(nv50->bufctx_3d, VERTEX, buf, RD);
+            BCTX_REFN(nv50->bufctx_3d, 3D_VERTEX, buf, RD);
           }
           address = buf->address + vb->buffer_offset + ve->pipe.src_offset;
           limit = buf->address + buf->base.width0 - 1;
@@ -418,7 +426,7 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50)
  #define NV50_PRIM_GL_CASE(n) \
     case PIPE_PRIM_##n: return NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n
  
-static INLINE unsigned
+static inline unsigned
  nv50_prim_gl(unsigned prim)
  {
     switch (prim) {
@@ -472,6 +480,10 @@ nv50_draw_arrays(struct nv50_context *nv50,
     if (nv50->state.index_bias) {
        BEGIN_NV04(push, NV50_3D(VB_ELEMENT_BASE), 1);
        PUSH_DATA (push, 0);
+      if (nv50->screen->base.class_3d >= NV84_3D_CLASS) {
+         BEGIN_NV04(push, NV84_3D(VERTEX_ID_BASE), 1);
+         PUSH_DATA (push, 0);
+      }
        nv50->state.index_bias = 0;
     }
  
@@ -581,7 +593,7 @@ nv50_draw_elements_inline_u32_short(struct nouveau_pushbuf *push,
  }
  
  static void
-nv50_draw_elements(struct nv50_context *nv50, boolean shorten,
+nv50_draw_elements(struct nv50_context *nv50, bool shorten,
                     unsigned mode, unsigned start, unsigned count,
                     unsigned instance_count, int32_t index_bias)
  {
@@ -594,6 +606,10 @@ nv50_draw_elements(struct nv50_context *nv50, boolean shorten,
     if (index_bias != nv50->state.index_bias) {
        BEGIN_NV04(push, NV50_3D(VB_ELEMENT_BASE), 1);
        PUSH_DATA (push, index_bias);
+      if (nv50->screen->base.class_3d >= NV84_3D_CLASS) {
+         BEGIN_NV04(push, NV84_3D(VERTEX_ID_BASE), 1);
+         PUSH_DATA (push, index_bias);
+      }
        nv50->state.index_bias = index_bias;
     }
  
@@ -614,13 +630,14 @@ nv50_draw_elements(struct nv50_context *nv50, boolean shorten,
         * pushbuf submit, but it's probably not a big performance difference.
         */
        if (buf->fence_wr && !nouveau_fence_signalled(buf->fence_wr))
-         nouveau_fence_wait(buf->fence_wr);
+         nouveau_fence_wait(buf->fence_wr, &nv50->base.debug);
  
        while (instance_count--) {
           BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1);
           PUSH_DATA (push, prim);
  
-         nouveau_pushbuf_space(push, 8, 0, 1);
+         nouveau_pushbuf_space(push, 16, 0, 1);
+         PUSH_REFN(push, buf->bo, NOUVEAU_BO_RD | buf->domain);
  
           switch (index_size) {
           case 4:
@@ -685,6 +702,7 @@ nv50_draw_elements(struct nv50_context *nv50, boolean shorten,
           prim |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
        }
     }
+   NOUVEAU_DRV_STAT(&nv50->screen->base, draw_calls_indexed, 1);
  }
  
  static void
@@ -722,9 +740,9 @@ nva0_draw_stream_output(struct nv50_context *nv50,
        BEGIN_NV04(push, NVA0_3D(DRAW_TFB_BASE), 1);
        PUSH_DATA (push, 0);
        BEGIN_NV04(push, NVA0_3D(DRAW_TFB_STRIDE), 1);
-      PUSH_DATA (push, 0);
-      BEGIN_NV04(push, NVA0_3D(DRAW_TFB_BYTES), 1);
-      nv50_query_pushbuf_submit(push, so->pq, 0x4);
+      PUSH_DATA (push, so->stride);
+      nv50_hw_query_pushbuf_submit(push, NVA0_3D_DRAW_TFB_BYTES,
+                                   nv50_query(so->pq), 0x4);
        BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1);
        PUSH_DATA (push, 0);
  
@@ -737,9 +755,9 @@ nv50_draw_vbo_kick_notify(struct nouveau_pushbuf *chan)
  {
     struct nv50_screen *screen = chan->user_priv;
  
-   nouveau_fence_update(&screen->base, TRUE);
+   nouveau_fence_update(&screen->base, true);
  
-   nv50_bufctx_fence(screen->cur_ctx->bufctx_3d, TRUE);
+   nv50_bufctx_fence(screen->cur_ctx->bufctx_3d, true);
  }
  
  void
@@ -747,7 +765,8 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
  {
     struct nv50_context *nv50 = nv50_context(pipe);
     struct nouveau_pushbuf *push = nv50->base.pushbuf;
-   int i, s;
+   bool tex_dirty = false;
+   int s;
  
     /* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */
     nv50->vb_elt_first = info->min_index + info->index_bias;
@@ -761,9 +780,9 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
     nv50->vbo_push_hint = /* the 64 is heuristic */
        !(info->indexed && ((nv50->vb_elt_limit + 64) < info->count));
  
-   if (nv50->vbo_user && !(nv50->dirty & (NV50_NEW_ARRAYS | NV50_NEW_VERTEX))) {
+   if (nv50->vbo_user && !(nv50->dirty_3d & (NV50_NEW_3D_ARRAYS | NV50_NEW_3D_VERTEX))) {
        if (!!nv50->vbo_fifo != nv50->vbo_push_hint)
-         nv50->dirty |= NV50_NEW_ARRAYS;
+         nv50->dirty_3d |= NV50_NEW_3D_ARRAYS;
        else
        if (!nv50->vbo_fifo)
           nv50_update_user_vbufs(nv50);
@@ -772,35 +791,43 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
     if (unlikely(nv50->num_so_targets && !nv50->gmtyprog))
        nv50->state.prim_size = nv50_pipe_prim_to_prim_size[info->mode];
  
-   nv50_state_validate(nv50, ~0, 8); /* 8 as minimum, we use flush_notify */
+   nv50_state_validate_3d(nv50, ~0);
  
     push->kick_notify = nv50_draw_vbo_kick_notify;
  
     for (s = 0; s < 3 && !nv50->cb_dirty; ++s) {
-      uint32_t valid = nv50->constbuf_valid[s];
-
-      while (valid && !nv50->cb_dirty) {
-         const unsigned i = ffs(valid) - 1;
-         struct pipe_resource *res;
-
-         valid &= ~(1 << i);
-         if (nv50->constbuf[s][i].user)
-            continue;
-
-         res = nv50->constbuf[s][i].u.buf;
-         if (!res)
-            continue;
-
-         if (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
-            nv50->cb_dirty = TRUE;
-      }
+      if (nv50->constbuf_coherent[s])
+         nv50->cb_dirty = true;
     }
  
     /* If there are any coherent constbufs, flush the cache */
     if (nv50->cb_dirty) {
        BEGIN_NV04(push, NV50_3D(CODE_CB_FLUSH), 1);
        PUSH_DATA (push, 0);
-      nv50->cb_dirty = FALSE;
+      nv50->cb_dirty = false;
+   }
+
+   for (s = 0; s < 3 && !tex_dirty; ++s) {
+      if (nv50->textures_coherent[s])
+         tex_dirty = true;
+   }
+
+   if (tex_dirty) {
+      BEGIN_NV04(push, NV50_3D(TEX_CACHE_CTL), 1);
+      PUSH_DATA (push, 0x20);
+   }
+
+   if (nv50->screen->base.class_3d >= NVA0_3D_CLASS &&
+       nv50->seamless_cube_map != nv50->state.seamless_cube_map) {
+      nv50->state.seamless_cube_map = nv50->seamless_cube_map;
+      BEGIN_NV04(push, SUBC_3D(NVA0_3D_TEX_MISC), 1);
+      PUSH_DATA (push, nv50->seamless_cube_map ? NVA0_3D_TEX_MISC_SEAMLESS_CUBE_MAP : 0);
+   }
+
+   if (nv50->vertprog->mul_zero_wins != nv50->state.mul_zero_wins) {
+      nv50->state.mul_zero_wins = nv50->vertprog->mul_zero_wins;
+      BEGIN_NV04(push, NV50_3D(UNK1690), 1);
+      PUSH_DATA (push, 0x00010000 * !!nv50->state.mul_zero_wins);
     }
  
     if (nv50->vbo_fifo) {
@@ -817,25 +844,16 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
        PUSH_DATA (push, info->start_instance);
     }
  
-   for (i = 0; i < nv50->num_vtxbufs && !nv50->base.vbo_dirty; ++i) {
-      if (!nv50->vtxbuf[i].buffer)
-         continue;
-      if (nv50->vtxbuf[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
-         nv50->base.vbo_dirty = TRUE;
-   }
-
-   if (!nv50->base.vbo_dirty && nv50->idxbuf.buffer &&
-       nv50->idxbuf.buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
-      nv50->base.vbo_dirty = TRUE;
+   nv50->base.vbo_dirty |= !!nv50->vtxbufs_coherent;
  
     if (nv50->base.vbo_dirty) {
        BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FLUSH), 1);
        PUSH_DATA (push, 0);
-      nv50->base.vbo_dirty = FALSE;
+      nv50->base.vbo_dirty = false;
     }
  
     if (info->indexed) {
-      boolean shorten = info->max_index <= 65535;
+      bool shorten = info->max_index <= 65535;
  
        if (info->primitive_restart != nv50->state.prim_restart) {
           if (info->primitive_restart) {
@@ -844,7 +862,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
              PUSH_DATA (push, info->restart_index);
  
              if (info->restart_index > 65535)
-               shorten = FALSE;
+               shorten = false;
           } else {
              BEGIN_NV04(push, NV50_3D(PRIM_RESTART_ENABLE), 1);
              PUSH_DATA (push, 0);
@@ -856,7 +874,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
           PUSH_DATA (push, info->restart_index);
  
           if (info->restart_index > 65535)
-            shorten = FALSE;
+            shorten = false;
        }
  
        nv50_draw_elements(nv50, shorten,