noop: implement stream output
[mesa.git] / src / gallium / drivers / nvc0 / nvc0_query.c
index ead015b6b84e7f4aa03a236c6c600c88f68b15b7..be363a21e6dbcc61ce84f1b070f966050bbdd0de 100644 (file)
 #include "nvc0_context.h"
 #include "nouveau/nv_object.xml.h"
 
-/* XXX: Nested queries, and simultaneous queries on multiple gallium contexts
- * (since we use only a single GPU channel per screen) will not work properly.
- *
- * The first is not that big of an issue because OpenGL does not allow nested
- * queries anyway.
- */
-
 struct nvc0_query {
    uint32_t *data;
    uint32_t type;
    uint32_t sequence;
    struct nouveau_bo *bo;
    uint32_t base;
-   uint32_t offset; /* base + i * 16 */
+   uint32_t offset; /* base + i * rotate */
    boolean ready;
+   boolean active;
    boolean is64bit;
+   uint8_t rotate;
+   int nesting; /* only used for occlusion queries */
    struct nouveau_mm_allocation *mm;
 };
 
-#define NVC0_QUERY_ALLOC_SPACE 128
+#define NVC0_QUERY_ALLOC_SPACE 256
 
 static INLINE struct nvc0_query *
 nvc0_query(struct pipe_query *pipe)
@@ -64,7 +60,8 @@ nvc0_query_allocate(struct nvc0_context *nvc0, struct nvc0_query *q, int size)
          if (q->ready)
             nouveau_mm_free(q->mm);
          else
-            nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, q->mm);
+            nouveau_fence_work(screen->base.fence.current,
+                               nouveau_mm_free_work, q->mm);
       }
    }
    if (size) {
@@ -97,24 +94,53 @@ nvc0_query_create(struct pipe_context *pipe, unsigned type)
 {
    struct nvc0_context *nvc0 = nvc0_context(pipe);
    struct nvc0_query *q;
+   unsigned space = NVC0_QUERY_ALLOC_SPACE;
 
    q = CALLOC_STRUCT(nvc0_query);
    if (!q)
       return NULL;
 
-   if (!nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE)) {
+   switch (type) {
+   case PIPE_QUERY_OCCLUSION_COUNTER:
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+   case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+      q->rotate = 32;
+      space = NVC0_QUERY_ALLOC_SPACE;
+      break;
+   case PIPE_QUERY_PIPELINE_STATISTICS:
+      q->is64bit = TRUE;
+      space = 512;
+      break;
+   case PIPE_QUERY_SO_STATISTICS:
+      q->is64bit = TRUE;
+      space = 64;
+      break;
+   case PIPE_QUERY_TIME_ELAPSED:
+   case PIPE_QUERY_TIMESTAMP:
+   case PIPE_QUERY_TIMESTAMP_DISJOINT:
+   case PIPE_QUERY_GPU_FINISHED:
+   case PIPE_QUERY_PRIMITIVES_GENERATED:
+   case PIPE_QUERY_PRIMITIVES_EMITTED:
+      space = 32;
+      break;
+   case NVC0_QUERY_TFB_BUFFER_OFFSETS:
+      space = 16;
+      break;
+   default:
+      FREE(q);
+      return NULL;
+   }
+   if (!nvc0_query_allocate(nvc0, q, space)) {
       FREE(q);
       return NULL;
    }
 
-   q->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED ||
-                 type == PIPE_QUERY_PRIMITIVES_EMITTED ||
-                 type == PIPE_QUERY_SO_STATISTICS);
    q->type = type;
 
-   if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
-      q->offset -= 16;
-      q->data -= 16 / sizeof(*q->data); /* we advance before query_begin ! */
+   if (q->rotate) {
+      /* we advance before query_begin ! */
+      q->offset -= q->rotate;
+      q->data -= q->rotate / sizeof(*q->data);
    }
 
    return (struct pipe_query *)q;
@@ -134,6 +160,15 @@ nvc0_query_get(struct nouveau_channel *chan, struct nvc0_query *q,
    OUT_RING  (chan, get);
 }
 
+static void
+nvc0_query_rotate(struct nvc0_context *nvc0, struct nvc0_query *q)
+{
+   q->offset += q->rotate;
+   q->data += q->rotate / sizeof(*q->data);
+   if (q->offset - q->base == NVC0_QUERY_ALLOC_SPACE)
+      nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE);
+}
+
 static void
 nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
 {
@@ -141,50 +176,68 @@ nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
    struct nouveau_channel *chan = nvc0->screen->base.channel;
    struct nvc0_query *q = nvc0_query(pq);
 
+   const int index = 0; /* vertex stream */
+
    /* For occlusion queries we have to change the storage, because a previous
     * query might set the initial render conition to FALSE even *after* we re-
     * initialized it to TRUE.
     */
-   if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
-      q->offset += 16;
-      q->data += 16 / sizeof(*q->data);
-      if (q->offset - q->base == NVC0_QUERY_ALLOC_SPACE)
-         nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE);
+   if (q->rotate) {
+      nvc0_query_rotate(nvc0, q);
 
       /* XXX: can we do this with the GPU, and sync with respect to a previous
        *  query ?
        */
       q->data[1] = 1; /* initial render condition = TRUE */
+      q->data[4] = q->sequence + 1; /* for comparison COND_MODE */
+      q->data[5] = 0;
    }
    if (!q->is64bit)
       q->data[0] = q->sequence++; /* the previously used one */
 
    switch (q->type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
-      IMMED_RING(chan, RING_3D(COUNTER_RESET), NVC0_3D_COUNTER_RESET_SAMPLECNT);
-      IMMED_RING(chan, RING_3D(SAMPLECNT_ENABLE), 1);
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+      q->nesting = nvc0->screen->num_occlusion_queries_active++;
+      if (q->nesting) {
+         nvc0_query_get(chan, q, 0x10, 0x0100f002);
+      } else {
+         BEGIN_RING(chan, RING_3D(COUNTER_RESET), 1);
+         OUT_RING  (chan, NVC0_3D_COUNTER_RESET_SAMPLECNT);
+         IMMED_RING(chan, RING_3D(SAMPLECNT_ENABLE), 1);
+      }
       break;
-   case PIPE_QUERY_PRIMITIVES_GENERATED: /* store before & after instead ? */
-      IMMED_RING(chan, RING_3D(COUNTER_RESET),
-                 NVC0_3D_COUNTER_RESET_GENERATED_PRIMITIVES);
+   case PIPE_QUERY_PRIMITIVES_GENERATED:
+      nvc0_query_get(chan, q, 0x10, 0x06805002 | (index << 5));
       break;
    case PIPE_QUERY_PRIMITIVES_EMITTED:
-      IMMED_RING(chan, RING_3D(COUNTER_RESET),
-                 NVC0_3D_COUNTER_RESET_EMITTED_PRIMITIVES);
+      nvc0_query_get(chan, q, 0x10, 0x05805002 | (index << 5));
       break;
    case PIPE_QUERY_SO_STATISTICS:
-      BEGIN_RING_NI(chan, RING_3D(COUNTER_RESET), 2);
-      OUT_RING  (chan, NVC0_3D_COUNTER_RESET_EMITTED_PRIMITIVES);
-      OUT_RING  (chan, NVC0_3D_COUNTER_RESET_GENERATED_PRIMITIVES);
+      nvc0_query_get(chan, q, 0x20, 0x05805002 | (index << 5));
+      nvc0_query_get(chan, q, 0x30, 0x06805002 | (index << 5));
       break;
    case PIPE_QUERY_TIMESTAMP_DISJOINT:
    case PIPE_QUERY_TIME_ELAPSED:
       nvc0_query_get(chan, q, 0x10, 0x00005002);
       break;
+   case PIPE_QUERY_PIPELINE_STATISTICS:
+      nvc0_query_get(chan, q, 0xc0 + 0x00, 0x00801002); /* VFETCH, VERTICES */
+      nvc0_query_get(chan, q, 0xc0 + 0x10, 0x01801002); /* VFETCH, PRIMS */
+      nvc0_query_get(chan, q, 0xc0 + 0x20, 0x02802002); /* VP, LAUNCHES */
+      nvc0_query_get(chan, q, 0xc0 + 0x30, 0x03806002); /* GP, LAUNCHES */
+      nvc0_query_get(chan, q, 0xc0 + 0x40, 0x04806002); /* GP, PRIMS_OUT */
+      nvc0_query_get(chan, q, 0xc0 + 0x50, 0x07804002); /* RAST, PRIMS_IN */
+      nvc0_query_get(chan, q, 0xc0 + 0x60, 0x08804002); /* RAST, PRIMS_OUT */
+      nvc0_query_get(chan, q, 0xc0 + 0x70, 0x0980a002); /* ROP, PIXELS */
+      nvc0_query_get(chan, q, 0xc0 + 0x80, 0x0d808002); /* TCP, LAUNCHES */
+      nvc0_query_get(chan, q, 0xc0 + 0x90, 0x0e809002); /* TEP, LAUNCHES */
+      break;
    default:
       break;
    }
    q->ready = FALSE;
+   q->active = TRUE;
 }
 
 static void
@@ -196,22 +249,38 @@ nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq)
 
    const int index = 0; /* for multiple vertex streams */
 
+   if (!q->active) {
+      /* some queries don't require 'begin' to be called (e.g. GPU_FINISHED) */
+      if (q->rotate)
+         nvc0_query_rotate(nvc0, q);
+      else
+      if (!q->is64bit)
+         q->data[0] = q->sequence++;
+   }
+   q->ready = FALSE;
+   q->active = FALSE;
+
    switch (q->type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
       nvc0_query_get(chan, q, 0, 0x0100f002);
-      BEGIN_RING(chan, RING_3D(SAMPLECNT_ENABLE), 1);
-      OUT_RING  (chan, 0);
+      if (--nvc0->screen->num_occlusion_queries_active == 0)
+         IMMED_RING(chan, RING_3D(SAMPLECNT_ENABLE), 0);
       break;
    case PIPE_QUERY_PRIMITIVES_GENERATED:
-      nvc0_query_get(chan, q, 0, 0x09005002 | (index << 5));
+      nvc0_query_get(chan, q, 0, 0x06805002 | (index << 5));
       break;
    case PIPE_QUERY_PRIMITIVES_EMITTED:
       nvc0_query_get(chan, q, 0, 0x05805002 | (index << 5));
       break;
    case PIPE_QUERY_SO_STATISTICS:
       nvc0_query_get(chan, q, 0x00, 0x05805002 | (index << 5));
-      nvc0_query_get(chan, q, 0x10, 0x09005002 | (index << 5));
+      nvc0_query_get(chan, q, 0x10, 0x06805002 | (index << 5));
+      break;
+   case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+      nvc0_query_get(chan, q, 0x00, 0x02005002 | (index << 5));
       break;
+   case PIPE_QUERY_TIMESTAMP:
    case PIPE_QUERY_TIMESTAMP_DISJOINT:
    case PIPE_QUERY_TIME_ELAPSED:
       nvc0_query_get(chan, q, 0, 0x00005002);
@@ -219,6 +288,24 @@ nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq)
    case PIPE_QUERY_GPU_FINISHED:
       nvc0_query_get(chan, q, 0, 0x1000f010);
       break;
+   case PIPE_QUERY_PIPELINE_STATISTICS:
+      nvc0_query_get(chan, q, 0x00, 0x00801002); /* VFETCH, VERTICES */
+      nvc0_query_get(chan, q, 0x10, 0x01801002); /* VFETCH, PRIMS */
+      nvc0_query_get(chan, q, 0x20, 0x02802002); /* VP, LAUNCHES */
+      nvc0_query_get(chan, q, 0x30, 0x03806002); /* GP, LAUNCHES */
+      nvc0_query_get(chan, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */
+      nvc0_query_get(chan, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */
+      nvc0_query_get(chan, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */
+      nvc0_query_get(chan, q, 0x70, 0x0980a002); /* ROP, PIXELS */
+      nvc0_query_get(chan, q, 0x80, 0x0d808002); /* TCP, LAUNCHES */
+      nvc0_query_get(chan, q, 0x90, 0x0e809002); /* TEP, LAUNCHES */
+      break;
+   case NVC0_QUERY_TFB_BUFFER_OFFSETS:
+      nvc0_query_get(chan, q, 0x00, 0x1d005002); /* TFB, BUFFER_OFFSET */
+      nvc0_query_get(chan, q, 0x04, 0x1d005022);
+      nvc0_query_get(chan, q, 0x08, 0x1d005042);
+      nvc0_query_get(chan, q, 0x0c, 0x1d005062);
+      break;
    default:
       assert(0);
       break;
@@ -250,11 +337,7 @@ nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
    uint32_t *res32 = result;
    boolean *res8 = result;
    uint64_t *data64 = (uint64_t *)q->data;
-
-   if (q->type == PIPE_QUERY_GPU_FINISHED) {
-      res8[0] = nvc0_query_ready(q);
-      return TRUE;
-   }
+   unsigned i;
 
    if (!q->ready) /* update ? */
       q->ready = nvc0_query_ready(q);
@@ -271,24 +354,50 @@ nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
    q->ready = TRUE;
 
    switch (q->type) {
+   case PIPE_QUERY_GPU_FINISHED:
+      res32[0] = 0;
+      res8[0] = TRUE;
+      break;
    case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
-      res32[0] = q->data[1];
+      res64[0] = q->data[1] - q->data[5];
+      break;
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+      res32[0] = 0;
+      res8[0] = q->data[1] != q->data[5];
       break;
    case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
    case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
-      res64[0] = data64[0];
+      res64[0] = data64[0] - data64[2];
       break;
    case PIPE_QUERY_SO_STATISTICS:
-      res64[0] = data64[0];
-      res64[1] = data64[1];
+      res64[0] = data64[0] - data64[4];
+      res64[1] = data64[2] - data64[6];
+      break;
+   case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+      res32[0] = 0;
+      res8[0] = !q->data[1];
+      break;
+   case PIPE_QUERY_TIMESTAMP:
+      res64[0] = data64[1];
       break;
    case PIPE_QUERY_TIMESTAMP_DISJOINT: /* u32 sequence, u32 0, u64 time */
       res64[0] = 1000000000;
-      res8[8] = (data64[0] == data64[2]) ? FALSE : TRUE;
+      res32[2] = 0;
+      res8[8] = (data64[1] == data64[3]) ? FALSE : TRUE;
       break;
    case PIPE_QUERY_TIME_ELAPSED:
       res64[0] = data64[1] - data64[3];
       break;
+   case PIPE_QUERY_PIPELINE_STATISTICS:
+      for (i = 0; i < 10; ++i)
+         res64[i] = data64[i * 2] - data64[24 + i * 2];
+      break;
+   case NVC0_QUERY_TFB_BUFFER_OFFSETS:
+      res32[0] = q->data[0];
+      res32[1] = q->data[1];
+      res32[2] = q->data[2];
+      res32[3] = q->data[3];
+      break;
    default:
       return FALSE;
    }
@@ -303,6 +412,11 @@ nvc0_render_condition(struct pipe_context *pipe,
    struct nvc0_context *nvc0 = nvc0_context(pipe);
    struct nouveau_channel *chan = nvc0->screen->base.channel;
    struct nvc0_query *q;
+   uint32_t cond;
+   boolean negated = FALSE;
+   boolean wait =
+      mode != PIPE_RENDER_COND_NO_WAIT &&
+      mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
 
    if (!pq) {
       IMMED_RING(chan, RING_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS);
@@ -310,8 +424,33 @@ nvc0_render_condition(struct pipe_context *pipe,
    }
    q = nvc0_query(pq);
 
-   if (mode == PIPE_RENDER_COND_WAIT ||
-       mode == PIPE_RENDER_COND_BY_REGION_WAIT) {
+   /* NOTE: comparison of 2 queries only works if both have completed */
+   switch (q->type) {
+   case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+      /* query writes 1 if there was no overflow */
+      cond = negated ? NVC0_3D_COND_MODE_RES_NON_ZERO :
+                       NVC0_3D_COND_MODE_EQUAL;
+      wait = TRUE;
+      break;
+   case PIPE_QUERY_OCCLUSION_COUNTER:
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+      if (likely(!negated)) {
+         if (unlikely(q->nesting))
+            cond = wait ? NVC0_3D_COND_MODE_NOT_EQUAL :
+                          NVC0_3D_COND_MODE_ALWAYS;
+         else
+            cond = NVC0_3D_COND_MODE_RES_NON_ZERO;
+      } else {
+         cond = wait ? NVC0_3D_COND_MODE_EQUAL : NVC0_3D_COND_MODE_ALWAYS;
+      }
+      break;
+   default:
+      assert(!"render condition query not a predicate");
+      mode = NVC0_3D_COND_MODE_ALWAYS;
+      break;
+   }
+
+   if (wait) {
       MARK_RING (chan, 5, 2);
       BEGIN_RING(chan, RING_3D_(NV84_SUBCHAN_QUERY_ADDRESS_HIGH), 4);
       OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
@@ -324,7 +463,17 @@ nvc0_render_condition(struct pipe_context *pipe,
    BEGIN_RING(chan, RING_3D(COND_ADDRESS_HIGH), 3);
    OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
    OUT_RELOCl(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
-   OUT_RING  (chan, NVC0_3D_COND_MODE_RES_NON_ZERO);
+   OUT_RING  (chan, cond);
+}
+
+void
+nvc0_query_pushbuf_submit(struct nvc0_context *nvc0,
+                          struct pipe_query *pq, unsigned result_offset)
+{
+   struct nvc0_query *q = nvc0_query(pq);
+
+   nouveau_pushbuf_submit(nvc0->screen->base.channel,
+                          q->bo, q->offset + result_offset, 4);
 }
 
 void