From: Christoph Bumiller Date: Fri, 21 Oct 2011 20:58:18 +0000 (+0200) Subject: nvc0: handle more query types X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=b4ecef4b1b1e3c3d7bc3a9d7811af497ace5506d;p=mesa.git nvc0: handle more query types --- diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h index c4e481c28cb..c739b1b68e2 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nvc0/nvc0_context.h @@ -168,6 +168,10 @@ void nvc0_program_library_upload(struct nvc0_context *); /* nvc0_query.c */ void nvc0_init_query_functions(struct nvc0_context *); +void nvc0_query_pushbuf_submit(struct nvc0_context *nvc0, + struct pipe_query *pq, unsigned result_offset); + +#define NVC0_QUERY_TFB_BUFFER_OFFSETS (PIPE_QUERY_TYPES + 0) /* nvc0_shader_state.c */ void nvc0_vertprog_validate(struct nvc0_context *); diff --git a/src/gallium/drivers/nvc0/nvc0_query.c b/src/gallium/drivers/nvc0/nvc0_query.c index ead015b6b84..be363a21e6d 100644 --- a/src/gallium/drivers/nvc0/nvc0_query.c +++ b/src/gallium/drivers/nvc0/nvc0_query.c @@ -25,26 +25,22 @@ #include "nvc0_context.h" #include "nouveau/nv_object.xml.h" -/* XXX: Nested queries, and simultaneous queries on multiple gallium contexts - * (since we use only a single GPU channel per screen) will not work properly. - * - * The first is not that big of an issue because OpenGL does not allow nested - * queries anyway. - */ - struct nvc0_query { uint32_t *data; uint32_t type; uint32_t sequence; struct nouveau_bo *bo; uint32_t base; - uint32_t offset; /* base + i * 16 */ + uint32_t offset; /* base + i * rotate */ boolean ready; + boolean active; boolean is64bit; + uint8_t rotate; + int nesting; /* only used for occlusion queries */ struct nouveau_mm_allocation *mm; }; -#define NVC0_QUERY_ALLOC_SPACE 128 +#define NVC0_QUERY_ALLOC_SPACE 256 static INLINE struct nvc0_query * nvc0_query(struct pipe_query *pipe) @@ -64,7 +60,8 @@ nvc0_query_allocate(struct nvc0_context *nvc0, struct nvc0_query *q, int size) if (q->ready) nouveau_mm_free(q->mm); else - nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, q->mm); + nouveau_fence_work(screen->base.fence.current, + nouveau_mm_free_work, q->mm); } } if (size) { @@ -97,24 +94,53 @@ nvc0_query_create(struct pipe_context *pipe, unsigned type) { struct nvc0_context *nvc0 = nvc0_context(pipe); struct nvc0_query *q; + unsigned space = NVC0_QUERY_ALLOC_SPACE; q = CALLOC_STRUCT(nvc0_query); if (!q) return NULL; - if (!nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE)) { + switch (type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: + case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + q->rotate = 32; + space = NVC0_QUERY_ALLOC_SPACE; + break; + case PIPE_QUERY_PIPELINE_STATISTICS: + q->is64bit = TRUE; + space = 512; + break; + case PIPE_QUERY_SO_STATISTICS: + q->is64bit = TRUE; + space = 64; + break; + case PIPE_QUERY_TIME_ELAPSED: + case PIPE_QUERY_TIMESTAMP: + case PIPE_QUERY_TIMESTAMP_DISJOINT: + case PIPE_QUERY_GPU_FINISHED: + case PIPE_QUERY_PRIMITIVES_GENERATED: + case PIPE_QUERY_PRIMITIVES_EMITTED: + space = 32; + break; + case NVC0_QUERY_TFB_BUFFER_OFFSETS: + space = 16; + break; + default: + FREE(q); + return NULL; + } + if (!nvc0_query_allocate(nvc0, q, space)) { FREE(q); return NULL; } - q->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED || - type == PIPE_QUERY_PRIMITIVES_EMITTED || - type == PIPE_QUERY_SO_STATISTICS); q->type = type; - if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { - q->offset -= 16; - q->data -= 16 / sizeof(*q->data); /* we advance before query_begin ! */ + if (q->rotate) { + /* we advance before query_begin ! */ + q->offset -= q->rotate; + q->data -= q->rotate / sizeof(*q->data); } return (struct pipe_query *)q; @@ -134,6 +160,15 @@ nvc0_query_get(struct nouveau_channel *chan, struct nvc0_query *q, OUT_RING (chan, get); } +static void +nvc0_query_rotate(struct nvc0_context *nvc0, struct nvc0_query *q) +{ + q->offset += q->rotate; + q->data += q->rotate / sizeof(*q->data); + if (q->offset - q->base == NVC0_QUERY_ALLOC_SPACE) + nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE); +} + static void nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq) { @@ -141,50 +176,68 @@ nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq) struct nouveau_channel *chan = nvc0->screen->base.channel; struct nvc0_query *q = nvc0_query(pq); + const int index = 0; /* vertex stream */ + /* For occlusion queries we have to change the storage, because a previous * query might set the initial render conition to FALSE even *after* we re- * initialized it to TRUE. */ - if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { - q->offset += 16; - q->data += 16 / sizeof(*q->data); - if (q->offset - q->base == NVC0_QUERY_ALLOC_SPACE) - nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE); + if (q->rotate) { + nvc0_query_rotate(nvc0, q); /* XXX: can we do this with the GPU, and sync with respect to a previous * query ? */ q->data[1] = 1; /* initial render condition = TRUE */ + q->data[4] = q->sequence + 1; /* for comparison COND_MODE */ + q->data[5] = 0; } if (!q->is64bit) q->data[0] = q->sequence++; /* the previously used one */ switch (q->type) { case PIPE_QUERY_OCCLUSION_COUNTER: - IMMED_RING(chan, RING_3D(COUNTER_RESET), NVC0_3D_COUNTER_RESET_SAMPLECNT); - IMMED_RING(chan, RING_3D(SAMPLECNT_ENABLE), 1); + case PIPE_QUERY_OCCLUSION_PREDICATE: + q->nesting = nvc0->screen->num_occlusion_queries_active++; + if (q->nesting) { + nvc0_query_get(chan, q, 0x10, 0x0100f002); + } else { + BEGIN_RING(chan, RING_3D(COUNTER_RESET), 1); + OUT_RING (chan, NVC0_3D_COUNTER_RESET_SAMPLECNT); + IMMED_RING(chan, RING_3D(SAMPLECNT_ENABLE), 1); + } break; - case PIPE_QUERY_PRIMITIVES_GENERATED: /* store before & after instead ? */ - IMMED_RING(chan, RING_3D(COUNTER_RESET), - NVC0_3D_COUNTER_RESET_GENERATED_PRIMITIVES); + case PIPE_QUERY_PRIMITIVES_GENERATED: + nvc0_query_get(chan, q, 0x10, 0x06805002 | (index << 5)); break; case PIPE_QUERY_PRIMITIVES_EMITTED: - IMMED_RING(chan, RING_3D(COUNTER_RESET), - NVC0_3D_COUNTER_RESET_EMITTED_PRIMITIVES); + nvc0_query_get(chan, q, 0x10, 0x05805002 | (index << 5)); break; case PIPE_QUERY_SO_STATISTICS: - BEGIN_RING_NI(chan, RING_3D(COUNTER_RESET), 2); - OUT_RING (chan, NVC0_3D_COUNTER_RESET_EMITTED_PRIMITIVES); - OUT_RING (chan, NVC0_3D_COUNTER_RESET_GENERATED_PRIMITIVES); + nvc0_query_get(chan, q, 0x20, 0x05805002 | (index << 5)); + nvc0_query_get(chan, q, 0x30, 0x06805002 | (index << 5)); break; case PIPE_QUERY_TIMESTAMP_DISJOINT: case PIPE_QUERY_TIME_ELAPSED: nvc0_query_get(chan, q, 0x10, 0x00005002); break; + case PIPE_QUERY_PIPELINE_STATISTICS: + nvc0_query_get(chan, q, 0xc0 + 0x00, 0x00801002); /* VFETCH, VERTICES */ + nvc0_query_get(chan, q, 0xc0 + 0x10, 0x01801002); /* VFETCH, PRIMS */ + nvc0_query_get(chan, q, 0xc0 + 0x20, 0x02802002); /* VP, LAUNCHES */ + nvc0_query_get(chan, q, 0xc0 + 0x30, 0x03806002); /* GP, LAUNCHES */ + nvc0_query_get(chan, q, 0xc0 + 0x40, 0x04806002); /* GP, PRIMS_OUT */ + nvc0_query_get(chan, q, 0xc0 + 0x50, 0x07804002); /* RAST, PRIMS_IN */ + nvc0_query_get(chan, q, 0xc0 + 0x60, 0x08804002); /* RAST, PRIMS_OUT */ + nvc0_query_get(chan, q, 0xc0 + 0x70, 0x0980a002); /* ROP, PIXELS */ + nvc0_query_get(chan, q, 0xc0 + 0x80, 0x0d808002); /* TCP, LAUNCHES */ + nvc0_query_get(chan, q, 0xc0 + 0x90, 0x0e809002); /* TEP, LAUNCHES */ + break; default: break; } q->ready = FALSE; + q->active = TRUE; } static void @@ -196,22 +249,38 @@ nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq) const int index = 0; /* for multiple vertex streams */ + if (!q->active) { + /* some queries don't require 'begin' to be called (e.g. GPU_FINISHED) */ + if (q->rotate) + nvc0_query_rotate(nvc0, q); + else + if (!q->is64bit) + q->data[0] = q->sequence++; + } + q->ready = FALSE; + q->active = FALSE; + switch (q->type) { case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: nvc0_query_get(chan, q, 0, 0x0100f002); - BEGIN_RING(chan, RING_3D(SAMPLECNT_ENABLE), 1); - OUT_RING (chan, 0); + if (--nvc0->screen->num_occlusion_queries_active == 0) + IMMED_RING(chan, RING_3D(SAMPLECNT_ENABLE), 0); break; case PIPE_QUERY_PRIMITIVES_GENERATED: - nvc0_query_get(chan, q, 0, 0x09005002 | (index << 5)); + nvc0_query_get(chan, q, 0, 0x06805002 | (index << 5)); break; case PIPE_QUERY_PRIMITIVES_EMITTED: nvc0_query_get(chan, q, 0, 0x05805002 | (index << 5)); break; case PIPE_QUERY_SO_STATISTICS: nvc0_query_get(chan, q, 0x00, 0x05805002 | (index << 5)); - nvc0_query_get(chan, q, 0x10, 0x09005002 | (index << 5)); + nvc0_query_get(chan, q, 0x10, 0x06805002 | (index << 5)); + break; + case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + nvc0_query_get(chan, q, 0x00, 0x02005002 | (index << 5)); break; + case PIPE_QUERY_TIMESTAMP: case PIPE_QUERY_TIMESTAMP_DISJOINT: case PIPE_QUERY_TIME_ELAPSED: nvc0_query_get(chan, q, 0, 0x00005002); @@ -219,6 +288,24 @@ nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq) case PIPE_QUERY_GPU_FINISHED: nvc0_query_get(chan, q, 0, 0x1000f010); break; + case PIPE_QUERY_PIPELINE_STATISTICS: + nvc0_query_get(chan, q, 0x00, 0x00801002); /* VFETCH, VERTICES */ + nvc0_query_get(chan, q, 0x10, 0x01801002); /* VFETCH, PRIMS */ + nvc0_query_get(chan, q, 0x20, 0x02802002); /* VP, LAUNCHES */ + nvc0_query_get(chan, q, 0x30, 0x03806002); /* GP, LAUNCHES */ + nvc0_query_get(chan, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */ + nvc0_query_get(chan, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */ + nvc0_query_get(chan, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */ + nvc0_query_get(chan, q, 0x70, 0x0980a002); /* ROP, PIXELS */ + nvc0_query_get(chan, q, 0x80, 0x0d808002); /* TCP, LAUNCHES */ + nvc0_query_get(chan, q, 0x90, 0x0e809002); /* TEP, LAUNCHES */ + break; + case NVC0_QUERY_TFB_BUFFER_OFFSETS: + nvc0_query_get(chan, q, 0x00, 0x1d005002); /* TFB, BUFFER_OFFSET */ + nvc0_query_get(chan, q, 0x04, 0x1d005022); + nvc0_query_get(chan, q, 0x08, 0x1d005042); + nvc0_query_get(chan, q, 0x0c, 0x1d005062); + break; default: assert(0); break; @@ -250,11 +337,7 @@ nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq, uint32_t *res32 = result; boolean *res8 = result; uint64_t *data64 = (uint64_t *)q->data; - - if (q->type == PIPE_QUERY_GPU_FINISHED) { - res8[0] = nvc0_query_ready(q); - return TRUE; - } + unsigned i; if (!q->ready) /* update ? */ q->ready = nvc0_query_ready(q); @@ -271,24 +354,50 @@ nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq, q->ready = TRUE; switch (q->type) { + case PIPE_QUERY_GPU_FINISHED: + res32[0] = 0; + res8[0] = TRUE; + break; case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */ - res32[0] = q->data[1]; + res64[0] = q->data[1] - q->data[5]; + break; + case PIPE_QUERY_OCCLUSION_PREDICATE: + res32[0] = 0; + res8[0] = q->data[1] != q->data[5]; break; case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */ case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */ - res64[0] = data64[0]; + res64[0] = data64[0] - data64[2]; break; case PIPE_QUERY_SO_STATISTICS: - res64[0] = data64[0]; - res64[1] = data64[1]; + res64[0] = data64[0] - data64[4]; + res64[1] = data64[2] - data64[6]; + break; + case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + res32[0] = 0; + res8[0] = !q->data[1]; + break; + case PIPE_QUERY_TIMESTAMP: + res64[0] = data64[1]; break; case PIPE_QUERY_TIMESTAMP_DISJOINT: /* u32 sequence, u32 0, u64 time */ res64[0] = 1000000000; - res8[8] = (data64[0] == data64[2]) ? FALSE : TRUE; + res32[2] = 0; + res8[8] = (data64[1] == data64[3]) ? FALSE : TRUE; break; case PIPE_QUERY_TIME_ELAPSED: res64[0] = data64[1] - data64[3]; break; + case PIPE_QUERY_PIPELINE_STATISTICS: + for (i = 0; i < 10; ++i) + res64[i] = data64[i * 2] - data64[24 + i * 2]; + break; + case NVC0_QUERY_TFB_BUFFER_OFFSETS: + res32[0] = q->data[0]; + res32[1] = q->data[1]; + res32[2] = q->data[2]; + res32[3] = q->data[3]; + break; default: return FALSE; } @@ -303,6 +412,11 @@ nvc0_render_condition(struct pipe_context *pipe, struct nvc0_context *nvc0 = nvc0_context(pipe); struct nouveau_channel *chan = nvc0->screen->base.channel; struct nvc0_query *q; + uint32_t cond; + boolean negated = FALSE; + boolean wait = + mode != PIPE_RENDER_COND_NO_WAIT && + mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT; if (!pq) { IMMED_RING(chan, RING_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS); @@ -310,8 +424,33 @@ nvc0_render_condition(struct pipe_context *pipe, } q = nvc0_query(pq); - if (mode == PIPE_RENDER_COND_WAIT || - mode == PIPE_RENDER_COND_BY_REGION_WAIT) { + /* NOTE: comparison of 2 queries only works if both have completed */ + switch (q->type) { + case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + /* query writes 1 if there was no overflow */ + cond = negated ? NVC0_3D_COND_MODE_RES_NON_ZERO : + NVC0_3D_COND_MODE_EQUAL; + wait = TRUE; + break; + case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: + if (likely(!negated)) { + if (unlikely(q->nesting)) + cond = wait ? NVC0_3D_COND_MODE_NOT_EQUAL : + NVC0_3D_COND_MODE_ALWAYS; + else + cond = NVC0_3D_COND_MODE_RES_NON_ZERO; + } else { + cond = wait ? NVC0_3D_COND_MODE_EQUAL : NVC0_3D_COND_MODE_ALWAYS; + } + break; + default: + assert(!"render condition query not a predicate"); + mode = NVC0_3D_COND_MODE_ALWAYS; + break; + } + + if (wait) { MARK_RING (chan, 5, 2); BEGIN_RING(chan, RING_3D_(NV84_SUBCHAN_QUERY_ADDRESS_HIGH), 4); OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); @@ -324,7 +463,17 @@ nvc0_render_condition(struct pipe_context *pipe, BEGIN_RING(chan, RING_3D(COND_ADDRESS_HIGH), 3); OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); OUT_RELOCl(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); - OUT_RING (chan, NVC0_3D_COND_MODE_RES_NON_ZERO); + OUT_RING (chan, cond); +} + +void +nvc0_query_pushbuf_submit(struct nvc0_context *nvc0, + struct pipe_query *pq, unsigned result_offset) +{ + struct nvc0_query *q = nvc0_query(pq); + + nouveau_pushbuf_submit(nvc0->screen->base.channel, + q->bo, q->offset + result_offset, 4); } void diff --git a/src/gallium/drivers/nvc0/nvc0_screen.h b/src/gallium/drivers/nvc0/nvc0_screen.h index cad91737f51..98a94c65c5b 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.h +++ b/src/gallium/drivers/nvc0/nvc0_screen.h @@ -27,6 +27,8 @@ struct nvc0_screen { struct nvc0_context *cur_ctx; + int num_occlusion_queries_active; + struct nouveau_bo *text; struct nouveau_bo *uniforms; struct nouveau_bo *tls;