#include "nvc0_context.h"
#include "nouveau/nv_object.xml.h"
-/* XXX: Nested queries, and simultaneous queries on multiple gallium contexts
- * (since we use only a single GPU channel per screen) will not work properly.
- *
- * The first is not that big of an issue because OpenGL does not allow nested
- * queries anyway.
- */
-
struct nvc0_query {
uint32_t *data;
uint32_t type;
uint32_t sequence;
struct nouveau_bo *bo;
uint32_t base;
- uint32_t offset; /* base + i * 16 */
+ uint32_t offset; /* base + i * rotate */
boolean ready;
+ boolean active;
boolean is64bit;
+ uint8_t rotate;
+ int nesting; /* only used for occlusion queries */
struct nouveau_mm_allocation *mm;
};
-#define NVC0_QUERY_ALLOC_SPACE 128
+#define NVC0_QUERY_ALLOC_SPACE 256
static INLINE struct nvc0_query *
nvc0_query(struct pipe_query *pipe)
if (q->ready)
nouveau_mm_free(q->mm);
else
- nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, q->mm);
+ nouveau_fence_work(screen->base.fence.current,
+ nouveau_mm_free_work, q->mm);
}
}
if (size) {
{
struct nvc0_context *nvc0 = nvc0_context(pipe);
struct nvc0_query *q;
+ unsigned space = NVC0_QUERY_ALLOC_SPACE;
q = CALLOC_STRUCT(nvc0_query);
if (!q)
return NULL;
- if (!nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE)) {
+ switch (type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ q->rotate = 32;
+ space = NVC0_QUERY_ALLOC_SPACE;
+ break;
+ case PIPE_QUERY_PIPELINE_STATISTICS:
+ q->is64bit = TRUE;
+ space = 512;
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ q->is64bit = TRUE;
+ space = 64;
+ break;
+ case PIPE_QUERY_TIME_ELAPSED:
+ case PIPE_QUERY_TIMESTAMP:
+ case PIPE_QUERY_TIMESTAMP_DISJOINT:
+ case PIPE_QUERY_GPU_FINISHED:
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ space = 32;
+ break;
+ case NVC0_QUERY_TFB_BUFFER_OFFSETS:
+ space = 16;
+ break;
+ default:
+ FREE(q);
+ return NULL;
+ }
+ if (!nvc0_query_allocate(nvc0, q, space)) {
FREE(q);
return NULL;
}
- q->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED ||
- type == PIPE_QUERY_PRIMITIVES_EMITTED ||
- type == PIPE_QUERY_SO_STATISTICS);
q->type = type;
- if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
- q->offset -= 16;
- q->data -= 16 / sizeof(*q->data); /* we advance before query_begin ! */
+ if (q->rotate) {
+ /* we advance before query_begin ! */
+ q->offset -= q->rotate;
+ q->data -= q->rotate / sizeof(*q->data);
}
return (struct pipe_query *)q;
OUT_RING (chan, get);
}
+static void
+nvc0_query_rotate(struct nvc0_context *nvc0, struct nvc0_query *q)
+{
+ q->offset += q->rotate;
+ q->data += q->rotate / sizeof(*q->data);
+ if (q->offset - q->base == NVC0_QUERY_ALLOC_SPACE)
+ nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE);
+}
+
static void
nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
{
struct nouveau_channel *chan = nvc0->screen->base.channel;
struct nvc0_query *q = nvc0_query(pq);
+ const int index = 0; /* vertex stream */
+
/* For occlusion queries we have to change the storage, because a previous
* query might set the initial render conition to FALSE even *after* we re-
* initialized it to TRUE.
*/
- if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
- q->offset += 16;
- q->data += 16 / sizeof(*q->data);
- if (q->offset - q->base == NVC0_QUERY_ALLOC_SPACE)
- nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE);
+ if (q->rotate) {
+ nvc0_query_rotate(nvc0, q);
/* XXX: can we do this with the GPU, and sync with respect to a previous
* query ?
*/
q->data[1] = 1; /* initial render condition = TRUE */
+ q->data[4] = q->sequence + 1; /* for comparison COND_MODE */
+ q->data[5] = 0;
}
if (!q->is64bit)
q->data[0] = q->sequence++; /* the previously used one */
switch (q->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
- IMMED_RING(chan, RING_3D(COUNTER_RESET), NVC0_3D_COUNTER_RESET_SAMPLECNT);
- IMMED_RING(chan, RING_3D(SAMPLECNT_ENABLE), 1);
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ q->nesting = nvc0->screen->num_occlusion_queries_active++;
+ if (q->nesting) {
+ nvc0_query_get(chan, q, 0x10, 0x0100f002);
+ } else {
+ BEGIN_RING(chan, RING_3D(COUNTER_RESET), 1);
+ OUT_RING (chan, NVC0_3D_COUNTER_RESET_SAMPLECNT);
+ IMMED_RING(chan, RING_3D(SAMPLECNT_ENABLE), 1);
+ }
break;
- case PIPE_QUERY_PRIMITIVES_GENERATED: /* store before & after instead ? */
- IMMED_RING(chan, RING_3D(COUNTER_RESET),
- NVC0_3D_COUNTER_RESET_GENERATED_PRIMITIVES);
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ nvc0_query_get(chan, q, 0x10, 0x06805002 | (index << 5));
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
- IMMED_RING(chan, RING_3D(COUNTER_RESET),
- NVC0_3D_COUNTER_RESET_EMITTED_PRIMITIVES);
+ nvc0_query_get(chan, q, 0x10, 0x05805002 | (index << 5));
break;
case PIPE_QUERY_SO_STATISTICS:
- BEGIN_RING_NI(chan, RING_3D(COUNTER_RESET), 2);
- OUT_RING (chan, NVC0_3D_COUNTER_RESET_EMITTED_PRIMITIVES);
- OUT_RING (chan, NVC0_3D_COUNTER_RESET_GENERATED_PRIMITIVES);
+ nvc0_query_get(chan, q, 0x20, 0x05805002 | (index << 5));
+ nvc0_query_get(chan, q, 0x30, 0x06805002 | (index << 5));
break;
case PIPE_QUERY_TIMESTAMP_DISJOINT:
case PIPE_QUERY_TIME_ELAPSED:
nvc0_query_get(chan, q, 0x10, 0x00005002);
break;
+ case PIPE_QUERY_PIPELINE_STATISTICS:
+ nvc0_query_get(chan, q, 0xc0 + 0x00, 0x00801002); /* VFETCH, VERTICES */
+ nvc0_query_get(chan, q, 0xc0 + 0x10, 0x01801002); /* VFETCH, PRIMS */
+ nvc0_query_get(chan, q, 0xc0 + 0x20, 0x02802002); /* VP, LAUNCHES */
+ nvc0_query_get(chan, q, 0xc0 + 0x30, 0x03806002); /* GP, LAUNCHES */
+ nvc0_query_get(chan, q, 0xc0 + 0x40, 0x04806002); /* GP, PRIMS_OUT */
+ nvc0_query_get(chan, q, 0xc0 + 0x50, 0x07804002); /* RAST, PRIMS_IN */
+ nvc0_query_get(chan, q, 0xc0 + 0x60, 0x08804002); /* RAST, PRIMS_OUT */
+ nvc0_query_get(chan, q, 0xc0 + 0x70, 0x0980a002); /* ROP, PIXELS */
+ nvc0_query_get(chan, q, 0xc0 + 0x80, 0x0d808002); /* TCP, LAUNCHES */
+ nvc0_query_get(chan, q, 0xc0 + 0x90, 0x0e809002); /* TEP, LAUNCHES */
+ break;
default:
break;
}
q->ready = FALSE;
+ q->active = TRUE;
}
static void
const int index = 0; /* for multiple vertex streams */
+ if (!q->active) {
+ /* some queries don't require 'begin' to be called (e.g. GPU_FINISHED) */
+ if (q->rotate)
+ nvc0_query_rotate(nvc0, q);
+ else
+ if (!q->is64bit)
+ q->data[0] = q->sequence++;
+ }
+ q->ready = FALSE;
+ q->active = FALSE;
+
switch (q->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
nvc0_query_get(chan, q, 0, 0x0100f002);
- BEGIN_RING(chan, RING_3D(SAMPLECNT_ENABLE), 1);
- OUT_RING (chan, 0);
+ if (--nvc0->screen->num_occlusion_queries_active == 0)
+ IMMED_RING(chan, RING_3D(SAMPLECNT_ENABLE), 0);
break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
- nvc0_query_get(chan, q, 0, 0x09005002 | (index << 5));
+ nvc0_query_get(chan, q, 0, 0x06805002 | (index << 5));
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
nvc0_query_get(chan, q, 0, 0x05805002 | (index << 5));
break;
case PIPE_QUERY_SO_STATISTICS:
nvc0_query_get(chan, q, 0x00, 0x05805002 | (index << 5));
- nvc0_query_get(chan, q, 0x10, 0x09005002 | (index << 5));
+ nvc0_query_get(chan, q, 0x10, 0x06805002 | (index << 5));
+ break;
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ nvc0_query_get(chan, q, 0x00, 0x02005002 | (index << 5));
break;
+ case PIPE_QUERY_TIMESTAMP:
case PIPE_QUERY_TIMESTAMP_DISJOINT:
case PIPE_QUERY_TIME_ELAPSED:
nvc0_query_get(chan, q, 0, 0x00005002);
case PIPE_QUERY_GPU_FINISHED:
nvc0_query_get(chan, q, 0, 0x1000f010);
break;
+ case PIPE_QUERY_PIPELINE_STATISTICS:
+ nvc0_query_get(chan, q, 0x00, 0x00801002); /* VFETCH, VERTICES */
+ nvc0_query_get(chan, q, 0x10, 0x01801002); /* VFETCH, PRIMS */
+ nvc0_query_get(chan, q, 0x20, 0x02802002); /* VP, LAUNCHES */
+ nvc0_query_get(chan, q, 0x30, 0x03806002); /* GP, LAUNCHES */
+ nvc0_query_get(chan, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */
+ nvc0_query_get(chan, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */
+ nvc0_query_get(chan, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */
+ nvc0_query_get(chan, q, 0x70, 0x0980a002); /* ROP, PIXELS */
+ nvc0_query_get(chan, q, 0x80, 0x0d808002); /* TCP, LAUNCHES */
+ nvc0_query_get(chan, q, 0x90, 0x0e809002); /* TEP, LAUNCHES */
+ break;
+ case NVC0_QUERY_TFB_BUFFER_OFFSETS:
+ nvc0_query_get(chan, q, 0x00, 0x1d005002); /* TFB, BUFFER_OFFSET */
+ nvc0_query_get(chan, q, 0x04, 0x1d005022);
+ nvc0_query_get(chan, q, 0x08, 0x1d005042);
+ nvc0_query_get(chan, q, 0x0c, 0x1d005062);
+ break;
default:
assert(0);
break;
uint32_t *res32 = result;
boolean *res8 = result;
uint64_t *data64 = (uint64_t *)q->data;
-
- if (q->type == PIPE_QUERY_GPU_FINISHED) {
- res8[0] = nvc0_query_ready(q);
- return TRUE;
- }
+ unsigned i;
if (!q->ready) /* update ? */
q->ready = nvc0_query_ready(q);
q->ready = TRUE;
switch (q->type) {
+ case PIPE_QUERY_GPU_FINISHED:
+ res32[0] = 0;
+ res8[0] = TRUE;
+ break;
case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
- res32[0] = q->data[1];
+ res64[0] = q->data[1] - q->data[5];
+ break;
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ res32[0] = 0;
+ res8[0] = q->data[1] != q->data[5];
break;
case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
- res64[0] = data64[0];
+ res64[0] = data64[0] - data64[2];
break;
case PIPE_QUERY_SO_STATISTICS:
- res64[0] = data64[0];
- res64[1] = data64[1];
+ res64[0] = data64[0] - data64[4];
+ res64[1] = data64[2] - data64[6];
+ break;
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ res32[0] = 0;
+ res8[0] = !q->data[1];
+ break;
+ case PIPE_QUERY_TIMESTAMP:
+ res64[0] = data64[1];
break;
case PIPE_QUERY_TIMESTAMP_DISJOINT: /* u32 sequence, u32 0, u64 time */
res64[0] = 1000000000;
- res8[8] = (data64[0] == data64[2]) ? FALSE : TRUE;
+ res32[2] = 0;
+ res8[8] = (data64[1] == data64[3]) ? FALSE : TRUE;
break;
case PIPE_QUERY_TIME_ELAPSED:
res64[0] = data64[1] - data64[3];
break;
+ case PIPE_QUERY_PIPELINE_STATISTICS:
+ for (i = 0; i < 10; ++i)
+ res64[i] = data64[i * 2] - data64[24 + i * 2];
+ break;
+ case NVC0_QUERY_TFB_BUFFER_OFFSETS:
+ res32[0] = q->data[0];
+ res32[1] = q->data[1];
+ res32[2] = q->data[2];
+ res32[3] = q->data[3];
+ break;
default:
return FALSE;
}
struct nvc0_context *nvc0 = nvc0_context(pipe);
struct nouveau_channel *chan = nvc0->screen->base.channel;
struct nvc0_query *q;
+ uint32_t cond;
+ boolean negated = FALSE;
+ boolean wait =
+ mode != PIPE_RENDER_COND_NO_WAIT &&
+ mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
if (!pq) {
IMMED_RING(chan, RING_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS);
}
q = nvc0_query(pq);
- if (mode == PIPE_RENDER_COND_WAIT ||
- mode == PIPE_RENDER_COND_BY_REGION_WAIT) {
+ /* NOTE: comparison of 2 queries only works if both have completed */
+ switch (q->type) {
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ /* query writes 1 if there was no overflow */
+ cond = negated ? NVC0_3D_COND_MODE_RES_NON_ZERO :
+ NVC0_3D_COND_MODE_EQUAL;
+ wait = TRUE;
+ break;
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ if (likely(!negated)) {
+ if (unlikely(q->nesting))
+ cond = wait ? NVC0_3D_COND_MODE_NOT_EQUAL :
+ NVC0_3D_COND_MODE_ALWAYS;
+ else
+ cond = NVC0_3D_COND_MODE_RES_NON_ZERO;
+ } else {
+ cond = wait ? NVC0_3D_COND_MODE_EQUAL : NVC0_3D_COND_MODE_ALWAYS;
+ }
+ break;
+ default:
+ assert(!"render condition query not a predicate");
+ mode = NVC0_3D_COND_MODE_ALWAYS;
+ break;
+ }
+
+ if (wait) {
MARK_RING (chan, 5, 2);
BEGIN_RING(chan, RING_3D_(NV84_SUBCHAN_QUERY_ADDRESS_HIGH), 4);
OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
BEGIN_RING(chan, RING_3D(COND_ADDRESS_HIGH), 3);
OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
OUT_RELOCl(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
- OUT_RING (chan, NVC0_3D_COND_MODE_RES_NON_ZERO);
+ OUT_RING (chan, cond);
+}
+
+void
+nvc0_query_pushbuf_submit(struct nvc0_context *nvc0,
+ struct pipe_query *pq, unsigned result_offset)
+{
+ struct nvc0_query *q = nvc0_query(pq);
+
+ nouveau_pushbuf_submit(nvc0->screen->base.channel,
+ q->bo, q->offset + result_offset, 4);
}
void