From 5f1eeb799bd9bcdb32382961e57ef74253701ed2 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Sat, 17 Oct 2015 01:04:27 +0200 Subject: [PATCH] nv50: move HW queries to nv50_query_hw.c/h files Signed-off-by: Samuel Pitoiset Reviewed-by: Pierre Moreau --- src/gallium/drivers/nouveau/Makefile.sources | 2 + src/gallium/drivers/nouveau/nv50/nv50_query.c | 354 ++-------------- src/gallium/drivers/nouveau/nv50/nv50_query.h | 26 +- .../drivers/nouveau/nv50/nv50_query_hw.c | 389 ++++++++++++++++++ .../drivers/nouveau/nv50/nv50_query_hw.h | 39 ++ .../drivers/nouveau/nv50/nv50_shader_state.c | 7 +- src/gallium/drivers/nouveau/nv50/nv50_state.c | 3 +- src/gallium/drivers/nouveau/nv50/nv50_vbo.c | 5 +- 8 files changed, 476 insertions(+), 349 deletions(-) create mode 100644 src/gallium/drivers/nouveau/nv50/nv50_query_hw.c create mode 100644 src/gallium/drivers/nouveau/nv50/nv50_query_hw.h diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources index 06d9d979e34..83f81135590 100644 --- a/src/gallium/drivers/nouveau/Makefile.sources +++ b/src/gallium/drivers/nouveau/Makefile.sources @@ -74,6 +74,8 @@ NV50_C_SOURCES := \ nv50/nv50_push.c \ nv50/nv50_query.c \ nv50/nv50_query.h \ + nv50/nv50_query_hw.c \ + nv50/nv50_query_hw.h \ nv50/nv50_resource.c \ nv50/nv50_resource.h \ nv50/nv50_screen.c \ diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c index 1b4abdbb99b..dd9b85b7208 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c @@ -26,334 +26,45 @@ #include "nv50/nv50_context.h" #include "nv50/nv50_query.h" -#include "nv_object.xml.h" - -#define NV50_QUERY_STATE_READY 0 -#define NV50_QUERY_STATE_ACTIVE 1 -#define NV50_QUERY_STATE_ENDED 2 -#define NV50_QUERY_STATE_FLUSHED 3 - -/* XXX: Nested queries, and simultaneous queries on multiple gallium contexts - * (since we use only a single GPU channel per screen) will not work properly. - * - * The first is not that big of an issue because OpenGL does not allow nested - * queries anyway. - */ - -#define NV50_QUERY_ALLOC_SPACE 256 - -static bool -nv50_query_allocate(struct nv50_context *nv50, struct nv50_query *q, int size) -{ - struct nv50_screen *screen = nv50->screen; - int ret; - - if (q->bo) { - nouveau_bo_ref(NULL, &q->bo); - if (q->mm) { - if (q->state == NV50_QUERY_STATE_READY) - nouveau_mm_free(q->mm); - else - nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, - q->mm); - } - } - if (size) { - q->mm = nouveau_mm_allocate(screen->base.mm_GART, size, &q->bo, &q->base); - if (!q->bo) - return false; - q->offset = q->base; - - ret = nouveau_bo_map(q->bo, 0, screen->base.client); - if (ret) { - nv50_query_allocate(nv50, q, 0); - return false; - } - q->data = (uint32_t *)((uint8_t *)q->bo->map + q->base); - } - return true; -} - -static void -nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) -{ - nv50_query_allocate(nv50_context(pipe), nv50_query(pq), 0); - nouveau_fence_ref(NULL, &nv50_query(pq)->fence); - FREE(nv50_query(pq)); -} +#include "nv50/nv50_query_hw.h" static struct pipe_query * -nv50_query_create(struct pipe_context *pipe, unsigned type, unsigned index) +nv50_create_query(struct pipe_context *pipe, unsigned type, unsigned index) { struct nv50_context *nv50 = nv50_context(pipe); struct nv50_query *q; - q = CALLOC_STRUCT(nv50_query); - if (!q) - return NULL; - - if (!nv50_query_allocate(nv50, q, NV50_QUERY_ALLOC_SPACE)) { - FREE(q); - return NULL; - } - - q->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED || - type == PIPE_QUERY_PRIMITIVES_EMITTED || - type == PIPE_QUERY_SO_STATISTICS || - type == PIPE_QUERY_PIPELINE_STATISTICS); - q->type = type; - - if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { - q->offset -= 32; - q->data -= 32 / sizeof(*q->data); /* we advance before query_begin ! */ - } - + q = nv50_hw_create_query(nv50, type, index); return (struct pipe_query *)q; } static void -nv50_query_get(struct nouveau_pushbuf *push, struct nv50_query *q, - unsigned offset, uint32_t get) +nv50_destroy_query(struct pipe_context *pipe, struct pipe_query *pq) { - offset += q->offset; - - PUSH_SPACE(push, 5); - PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR); - BEGIN_NV04(push, NV50_3D(QUERY_ADDRESS_HIGH), 4); - PUSH_DATAh(push, q->bo->offset + offset); - PUSH_DATA (push, q->bo->offset + offset); - PUSH_DATA (push, q->sequence); - PUSH_DATA (push, get); + struct nv50_query *q = nv50_query(pq); + q->funcs->destroy_query(nv50_context(pipe), q); } static boolean -nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq) +nv50_begin_query(struct pipe_context *pipe, struct pipe_query *pq) { - struct nv50_context *nv50 = nv50_context(pipe); - struct nouveau_pushbuf *push = nv50->base.pushbuf; struct nv50_query *q = nv50_query(pq); - - /* For occlusion queries we have to change the storage, because a previous - * query might set the initial render conition to false even *after* we re- - * initialized it to true. - */ - if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { - q->offset += 32; - q->data += 32 / sizeof(*q->data); - if (q->offset - q->base == NV50_QUERY_ALLOC_SPACE) - nv50_query_allocate(nv50, q, NV50_QUERY_ALLOC_SPACE); - - /* XXX: can we do this with the GPU, and sync with respect to a previous - * query ? - */ - q->data[0] = q->sequence; /* initialize sequence */ - q->data[1] = 1; /* initial render condition = true */ - q->data[4] = q->sequence + 1; /* for comparison COND_MODE */ - q->data[5] = 0; - } - if (!q->is64bit) - q->data[0] = q->sequence++; /* the previously used one */ - - switch (q->type) { - case PIPE_QUERY_OCCLUSION_COUNTER: - q->nesting = nv50->screen->num_occlusion_queries_active++; - if (q->nesting) { - nv50_query_get(push, q, 0x10, 0x0100f002); - } else { - PUSH_SPACE(push, 4); - BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1); - PUSH_DATA (push, NV50_3D_COUNTER_RESET_SAMPLECNT); - BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1); - PUSH_DATA (push, 1); - } - break; - case PIPE_QUERY_PRIMITIVES_GENERATED: - nv50_query_get(push, q, 0x10, 0x06805002); - break; - case PIPE_QUERY_PRIMITIVES_EMITTED: - nv50_query_get(push, q, 0x10, 0x05805002); - break; - case PIPE_QUERY_SO_STATISTICS: - nv50_query_get(push, q, 0x20, 0x05805002); - nv50_query_get(push, q, 0x30, 0x06805002); - break; - case PIPE_QUERY_PIPELINE_STATISTICS: - nv50_query_get(push, q, 0x80, 0x00801002); /* VFETCH, VERTICES */ - nv50_query_get(push, q, 0x90, 0x01801002); /* VFETCH, PRIMS */ - nv50_query_get(push, q, 0xa0, 0x02802002); /* VP, LAUNCHES */ - nv50_query_get(push, q, 0xb0, 0x03806002); /* GP, LAUNCHES */ - nv50_query_get(push, q, 0xc0, 0x04806002); /* GP, PRIMS_OUT */ - nv50_query_get(push, q, 0xd0, 0x07804002); /* RAST, PRIMS_IN */ - nv50_query_get(push, q, 0xe0, 0x08804002); /* RAST, PRIMS_OUT */ - nv50_query_get(push, q, 0xf0, 0x0980a002); /* ROP, PIXELS */ - break; - case PIPE_QUERY_TIME_ELAPSED: - nv50_query_get(push, q, 0x10, 0x00005002); - break; - default: - break; - } - q->state = NV50_QUERY_STATE_ACTIVE; - return true; + return q->funcs->begin_query(nv50_context(pipe), q); } static void -nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq) +nv50_end_query(struct pipe_context *pipe, struct pipe_query *pq) { - struct nv50_context *nv50 = nv50_context(pipe); - struct nouveau_pushbuf *push = nv50->base.pushbuf; struct nv50_query *q = nv50_query(pq); - - q->state = NV50_QUERY_STATE_ENDED; - - switch (q->type) { - case PIPE_QUERY_OCCLUSION_COUNTER: - nv50_query_get(push, q, 0, 0x0100f002); - if (--nv50->screen->num_occlusion_queries_active == 0) { - PUSH_SPACE(push, 2); - BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1); - PUSH_DATA (push, 0); - } - break; - case PIPE_QUERY_PRIMITIVES_GENERATED: - nv50_query_get(push, q, 0, 0x06805002); - break; - case PIPE_QUERY_PRIMITIVES_EMITTED: - nv50_query_get(push, q, 0, 0x05805002); - break; - case PIPE_QUERY_SO_STATISTICS: - nv50_query_get(push, q, 0x00, 0x05805002); - nv50_query_get(push, q, 0x10, 0x06805002); - break; - case PIPE_QUERY_PIPELINE_STATISTICS: - nv50_query_get(push, q, 0x00, 0x00801002); /* VFETCH, VERTICES */ - nv50_query_get(push, q, 0x10, 0x01801002); /* VFETCH, PRIMS */ - nv50_query_get(push, q, 0x20, 0x02802002); /* VP, LAUNCHES */ - nv50_query_get(push, q, 0x30, 0x03806002); /* GP, LAUNCHES */ - nv50_query_get(push, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */ - nv50_query_get(push, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */ - nv50_query_get(push, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */ - nv50_query_get(push, q, 0x70, 0x0980a002); /* ROP, PIXELS */ - break; - case PIPE_QUERY_TIMESTAMP: - q->sequence++; - /* fall through */ - case PIPE_QUERY_TIME_ELAPSED: - nv50_query_get(push, q, 0, 0x00005002); - break; - case PIPE_QUERY_GPU_FINISHED: - q->sequence++; - nv50_query_get(push, q, 0, 0x1000f010); - break; - case NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET: - q->sequence++; - nv50_query_get(push, q, 0, 0x0d005002 | (q->index << 5)); - break; - case PIPE_QUERY_TIMESTAMP_DISJOINT: - /* This query is not issued on GPU because disjoint is forced to false */ - q->state = NV50_QUERY_STATE_READY; - break; - default: - assert(0); - break; - } - - if (q->is64bit) - nouveau_fence_ref(nv50->screen->base.fence.current, &q->fence); -} - -static inline void -nv50_query_update(struct nv50_query *q) -{ - if (q->is64bit) { - if (nouveau_fence_signalled(q->fence)) - q->state = NV50_QUERY_STATE_READY; - } else { - if (q->data[0] == q->sequence) - q->state = NV50_QUERY_STATE_READY; - } + q->funcs->end_query(nv50_context(pipe), q); } static boolean -nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq, - boolean wait, union pipe_query_result *result) +nv50_get_query_result(struct pipe_context *pipe, struct pipe_query *pq, + boolean wait, union pipe_query_result *result) { - struct nv50_context *nv50 = nv50_context(pipe); struct nv50_query *q = nv50_query(pq); - uint64_t *res64 = (uint64_t *)result; - uint32_t *res32 = (uint32_t *)result; - uint8_t *res8 = (uint8_t *)result; - uint64_t *data64 = (uint64_t *)q->data; - int i; - - if (q->state != NV50_QUERY_STATE_READY) - nv50_query_update(q); - - if (q->state != NV50_QUERY_STATE_READY) { - if (!wait) { - /* for broken apps that spin on GL_QUERY_RESULT_AVAILABLE */ - if (q->state != NV50_QUERY_STATE_FLUSHED) { - q->state = NV50_QUERY_STATE_FLUSHED; - PUSH_KICK(nv50->base.pushbuf); - } - return false; - } - if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nv50->screen->base.client)) - return false; - } - q->state = NV50_QUERY_STATE_READY; - - switch (q->type) { - case PIPE_QUERY_GPU_FINISHED: - res8[0] = true; - break; - case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */ - res64[0] = q->data[1] - q->data[5]; - break; - case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */ - case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */ - res64[0] = data64[0] - data64[2]; - break; - case PIPE_QUERY_SO_STATISTICS: - res64[0] = data64[0] - data64[4]; - res64[1] = data64[2] - data64[6]; - break; - case PIPE_QUERY_PIPELINE_STATISTICS: - for (i = 0; i < 8; ++i) - res64[i] = data64[i * 2] - data64[16 + i * 2]; - break; - case PIPE_QUERY_TIMESTAMP: - res64[0] = data64[1]; - break; - case PIPE_QUERY_TIMESTAMP_DISJOINT: - res64[0] = 1000000000; - res8[8] = false; - break; - case PIPE_QUERY_TIME_ELAPSED: - res64[0] = data64[1] - data64[3]; - break; - case NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET: - res32[0] = q->data[1]; - break; - default: - return false; - } - - return true; -} - -void -nv84_query_fifo_wait(struct nouveau_pushbuf *push, struct nv50_query *q) -{ - unsigned offset = q->offset; - - PUSH_SPACE(push, 5); - PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); - BEGIN_NV04(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4); - PUSH_DATAh(push, q->bo->offset + offset); - PUSH_DATA (push, q->bo->offset + offset); - PUSH_DATA (push, q->sequence); - PUSH_DATA (push, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL); + return q->funcs->get_query_result(nv50_context(pipe), q, wait, result); } static void @@ -363,7 +74,8 @@ nv50_render_condition(struct pipe_context *pipe, { struct nv50_context *nv50 = nv50_context(pipe); struct nouveau_pushbuf *push = nv50->base.pushbuf; - struct nv50_query *q; + struct nv50_query *q = nv50_query(pq); + struct nv50_hw_query *hq = nv50_hw_query(q); uint32_t cond; bool wait = mode != PIPE_RENDER_COND_NO_WAIT && @@ -373,7 +85,6 @@ nv50_render_condition(struct pipe_context *pipe, cond = NV50_3D_COND_MODE_ALWAYS; } else { - q = nv50_query(pq); /* NOTE: comparison of 2 queries only works if both have completed */ switch (q->type) { case PIPE_QUERY_SO_OVERFLOW_PREDICATE: @@ -384,7 +95,7 @@ nv50_render_condition(struct pipe_context *pipe, case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: if (likely(!condition)) { - if (unlikely(q->nesting)) + if (unlikely(hq->nesting)) cond = wait ? NV50_3D_COND_MODE_NOT_EQUAL : NV50_3D_COND_MODE_ALWAYS; else @@ -419,28 +130,15 @@ nv50_render_condition(struct pipe_context *pipe, PUSH_DATA (push, 0); } - PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); + PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); BEGIN_NV04(push, NV50_3D(COND_ADDRESS_HIGH), 3); - PUSH_DATAh(push, q->bo->offset + q->offset); - PUSH_DATA (push, q->bo->offset + q->offset); + PUSH_DATAh(push, hq->bo->offset + hq->offset); + PUSH_DATA (push, hq->bo->offset + hq->offset); PUSH_DATA (push, cond); BEGIN_NV04(push, NV50_2D(COND_ADDRESS_HIGH), 2); - PUSH_DATAh(push, q->bo->offset + q->offset); - PUSH_DATA (push, q->bo->offset + q->offset); -} - -void -nv50_query_pushbuf_submit(struct nouveau_pushbuf *push, uint16_t method, - struct nv50_query *q, unsigned result_offset) -{ - nv50_query_update(q); - if (q->state != NV50_QUERY_STATE_READY) - nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, push->client); - q->state = NV50_QUERY_STATE_READY; - - BEGIN_NV04(push, SUBC_3D(method), 1); - PUSH_DATA (push, q->data[result_offset / 4]); + PUSH_DATAh(push, hq->bo->offset + hq->offset); + PUSH_DATA (push, hq->bo->offset + hq->offset); } void @@ -448,10 +146,10 @@ nv50_init_query_functions(struct nv50_context *nv50) { struct pipe_context *pipe = &nv50->base.pipe; - pipe->create_query = nv50_query_create; - pipe->destroy_query = nv50_query_destroy; - pipe->begin_query = nv50_query_begin; - pipe->end_query = nv50_query_end; - pipe->get_query_result = nv50_query_result; + pipe->create_query = nv50_create_query; + pipe->destroy_query = nv50_destroy_query; + pipe->begin_query = nv50_begin_query; + pipe->end_query = nv50_end_query; + pipe->get_query_result = nv50_get_query_result; pipe->render_condition = nv50_render_condition; } diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.h b/src/gallium/drivers/nouveau/nv50/nv50_query.h index a7030133f56..d990285c857 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.h @@ -4,23 +4,22 @@ #include "pipe/p_context.h" #include "nouveau_context.h" -#include "nouveau_mm.h" -#define NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0) +struct nv50_context; +struct nv50_query; + +struct nv50_query_funcs { + void (*destroy_query)(struct nv50_context *, struct nv50_query *); + boolean (*begin_query)(struct nv50_context *, struct nv50_query *); + void (*end_query)(struct nv50_context *, struct nv50_query *); + boolean (*get_query_result)(struct nv50_context *, struct nv50_query *, + boolean, union pipe_query_result *); +}; struct nv50_query { - uint32_t *data; + const struct nv50_query_funcs *funcs; uint16_t type; uint16_t index; - uint32_t sequence; - struct nouveau_bo *bo; - uint32_t base; - uint32_t offset; /* base + i * 32 */ - uint8_t state; - bool is64bit; - int nesting; /* only used for occlusion queries */ - struct nouveau_mm_allocation *mm; - struct nouveau_fence *fence; }; static inline struct nv50_query * @@ -30,8 +29,5 @@ nv50_query(struct pipe_query *pipe) } void nv50_init_query_functions(struct nv50_context *); -void nv50_query_pushbuf_submit(struct nouveau_pushbuf *, uint16_t, - struct nv50_query *, unsigned result_offset); -void nv84_query_fifo_wait(struct nouveau_pushbuf *, struct nv50_query *); #endif diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c new file mode 100644 index 00000000000..fcdd183e88a --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c @@ -0,0 +1,389 @@ +/* + * Copyright 2011 Christoph Bumiller + * Copyright 2015 Samuel Pitoiset + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#define NV50_PUSH_EXPLICIT_SPACE_CHECKING + +#include "nv50/nv50_context.h" +#include "nv50/nv50_query_hw.h" +#include "nv_object.xml.h" + +#define NV50_HW_QUERY_STATE_READY 0 +#define NV50_HW_QUERY_STATE_ACTIVE 1 +#define NV50_HW_QUERY_STATE_ENDED 2 +#define NV50_HW_QUERY_STATE_FLUSHED 3 + +/* XXX: Nested queries, and simultaneous queries on multiple gallium contexts + * (since we use only a single GPU channel per screen) will not work properly. + * + * The first is not that big of an issue because OpenGL does not allow nested + * queries anyway. + */ + +#define NV50_HW_QUERY_ALLOC_SPACE 256 + +static bool +nv50_hw_query_allocate(struct nv50_context *nv50, struct nv50_query *q, + int size) +{ + struct nv50_screen *screen = nv50->screen; + struct nv50_hw_query *hq = nv50_hw_query(q); + int ret; + + if (hq->bo) { + nouveau_bo_ref(NULL, &hq->bo); + if (hq->mm) { + if (hq->state == NV50_HW_QUERY_STATE_READY) + nouveau_mm_free(hq->mm); + else + nouveau_fence_work(screen->base.fence.current, + nouveau_mm_free_work, hq->mm); + } + } + if (size) { + hq->mm = nouveau_mm_allocate(screen->base.mm_GART, size, + &hq->bo, &hq->base_offset); + if (!hq->bo) + return false; + hq->offset = hq->base_offset; + + ret = nouveau_bo_map(hq->bo, 0, screen->base.client); + if (ret) { + nv50_hw_query_allocate(nv50, q, 0); + return false; + } + hq->data = (uint32_t *)((uint8_t *)hq->bo->map + hq->base_offset); + } + return true; +} + +static void +nv50_hw_query_get(struct nouveau_pushbuf *push, struct nv50_query *q, + unsigned offset, uint32_t get) +{ + struct nv50_hw_query *hq = nv50_hw_query(q); + + offset += hq->offset; + + PUSH_SPACE(push, 5); + PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR); + BEGIN_NV04(push, NV50_3D(QUERY_ADDRESS_HIGH), 4); + PUSH_DATAh(push, hq->bo->offset + offset); + PUSH_DATA (push, hq->bo->offset + offset); + PUSH_DATA (push, hq->sequence); + PUSH_DATA (push, get); +} + +static inline void +nv50_hw_query_update(struct nv50_query *q) +{ + struct nv50_hw_query *hq = nv50_hw_query(q); + + if (hq->is64bit) { + if (nouveau_fence_signalled(hq->fence)) + hq->state = NV50_HW_QUERY_STATE_READY; + } else { + if (hq->data[0] == hq->sequence) + hq->state = NV50_HW_QUERY_STATE_READY; + } +} + +static void +nv50_hw_destroy_query(struct nv50_context *nv50, struct nv50_query *q) +{ + struct nv50_hw_query *hq = nv50_hw_query(q); + nv50_hw_query_allocate(nv50, q, 0); + nouveau_fence_ref(NULL, &hq->fence); + FREE(hq); +} + +static boolean +nv50_hw_begin_query(struct nv50_context *nv50, struct nv50_query *q) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_hw_query *hq = nv50_hw_query(q); + + /* For occlusion queries we have to change the storage, because a previous + * query might set the initial render condition to false even *after* we re- + * initialized it to true. + */ + if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { + hq->offset += 32; + hq->data += 32 / sizeof(*hq->data); + if (hq->offset - hq->base_offset == NV50_HW_QUERY_ALLOC_SPACE) + nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE); + + /* XXX: can we do this with the GPU, and sync with respect to a previous + * query ? + */ + hq->data[0] = hq->sequence; /* initialize sequence */ + hq->data[1] = 1; /* initial render condition = true */ + hq->data[4] = hq->sequence + 1; /* for comparison COND_MODE */ + hq->data[5] = 0; + } + if (!hq->is64bit) + hq->data[0] = hq->sequence++; /* the previously used one */ + + switch (q->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + hq->nesting = nv50->screen->num_occlusion_queries_active++; + if (hq->nesting) { + nv50_hw_query_get(push, q, 0x10, 0x0100f002); + } else { + PUSH_SPACE(push, 4); + BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1); + PUSH_DATA (push, NV50_3D_COUNTER_RESET_SAMPLECNT); + BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1); + PUSH_DATA (push, 1); + } + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: + nv50_hw_query_get(push, q, 0x10, 0x06805002); + break; + case PIPE_QUERY_PRIMITIVES_EMITTED: + nv50_hw_query_get(push, q, 0x10, 0x05805002); + break; + case PIPE_QUERY_SO_STATISTICS: + nv50_hw_query_get(push, q, 0x20, 0x05805002); + nv50_hw_query_get(push, q, 0x30, 0x06805002); + break; + case PIPE_QUERY_PIPELINE_STATISTICS: + nv50_hw_query_get(push, q, 0x80, 0x00801002); /* VFETCH, VERTICES */ + nv50_hw_query_get(push, q, 0x90, 0x01801002); /* VFETCH, PRIMS */ + nv50_hw_query_get(push, q, 0xa0, 0x02802002); /* VP, LAUNCHES */ + nv50_hw_query_get(push, q, 0xb0, 0x03806002); /* GP, LAUNCHES */ + nv50_hw_query_get(push, q, 0xc0, 0x04806002); /* GP, PRIMS_OUT */ + nv50_hw_query_get(push, q, 0xd0, 0x07804002); /* RAST, PRIMS_IN */ + nv50_hw_query_get(push, q, 0xe0, 0x08804002); /* RAST, PRIMS_OUT */ + nv50_hw_query_get(push, q, 0xf0, 0x0980a002); /* ROP, PIXELS */ + break; + case PIPE_QUERY_TIME_ELAPSED: + nv50_hw_query_get(push, q, 0x10, 0x00005002); + break; + default: + assert(0); + return false; + } + hq->state = NV50_HW_QUERY_STATE_ACTIVE; + return true; +} + +static void +nv50_hw_end_query(struct nv50_context *nv50, struct nv50_query *q) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_hw_query *hq = nv50_hw_query(q); + + hq->state = NV50_HW_QUERY_STATE_ENDED; + + switch (q->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + nv50_hw_query_get(push, q, 0, 0x0100f002); + if (--nv50->screen->num_occlusion_queries_active == 0) { + PUSH_SPACE(push, 2); + BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1); + PUSH_DATA (push, 0); + } + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: + nv50_hw_query_get(push, q, 0, 0x06805002); + break; + case PIPE_QUERY_PRIMITIVES_EMITTED: + nv50_hw_query_get(push, q, 0, 0x05805002); + break; + case PIPE_QUERY_SO_STATISTICS: + nv50_hw_query_get(push, q, 0x00, 0x05805002); + nv50_hw_query_get(push, q, 0x10, 0x06805002); + break; + case PIPE_QUERY_PIPELINE_STATISTICS: + nv50_hw_query_get(push, q, 0x00, 0x00801002); /* VFETCH, VERTICES */ + nv50_hw_query_get(push, q, 0x10, 0x01801002); /* VFETCH, PRIMS */ + nv50_hw_query_get(push, q, 0x20, 0x02802002); /* VP, LAUNCHES */ + nv50_hw_query_get(push, q, 0x30, 0x03806002); /* GP, LAUNCHES */ + nv50_hw_query_get(push, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */ + nv50_hw_query_get(push, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */ + nv50_hw_query_get(push, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */ + nv50_hw_query_get(push, q, 0x70, 0x0980a002); /* ROP, PIXELS */ + break; + case PIPE_QUERY_TIMESTAMP: + hq->sequence++; + /* fall through */ + case PIPE_QUERY_TIME_ELAPSED: + nv50_hw_query_get(push, q, 0, 0x00005002); + break; + case PIPE_QUERY_GPU_FINISHED: + hq->sequence++; + nv50_hw_query_get(push, q, 0, 0x1000f010); + break; + case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET: + hq->sequence++; + nv50_hw_query_get(push, q, 0, 0x0d005002 | (q->index << 5)); + break; + case PIPE_QUERY_TIMESTAMP_DISJOINT: + /* This query is not issued on GPU because disjoint is forced to false */ + hq->state = NV50_HW_QUERY_STATE_READY; + break; + default: + assert(0); + break; + } + if (hq->is64bit) + nouveau_fence_ref(nv50->screen->base.fence.current, &hq->fence); +} + +static boolean +nv50_hw_get_query_result(struct nv50_context *nv50, struct nv50_query *q, + boolean wait, union pipe_query_result *result) +{ + struct nv50_hw_query *hq = nv50_hw_query(q); + uint64_t *res64 = (uint64_t *)result; + uint32_t *res32 = (uint32_t *)result; + uint8_t *res8 = (uint8_t *)result; + uint64_t *data64 = (uint64_t *)hq->data; + int i; + + if (hq->state != NV50_HW_QUERY_STATE_READY) + nv50_hw_query_update(q); + + if (hq->state != NV50_HW_QUERY_STATE_READY) { + if (!wait) { + /* for broken apps that spin on GL_QUERY_RESULT_AVAILABLE */ + if (hq->state != NV50_HW_QUERY_STATE_FLUSHED) { + hq->state = NV50_HW_QUERY_STATE_FLUSHED; + PUSH_KICK(nv50->base.pushbuf); + } + return false; + } + if (nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, nv50->screen->base.client)) + return false; + } + hq->state = NV50_HW_QUERY_STATE_READY; + + switch (q->type) { + case PIPE_QUERY_GPU_FINISHED: + res8[0] = true; + break; + case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */ + res64[0] = hq->data[1] - hq->data[5]; + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */ + case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */ + res64[0] = data64[0] - data64[2]; + break; + case PIPE_QUERY_SO_STATISTICS: + res64[0] = data64[0] - data64[4]; + res64[1] = data64[2] - data64[6]; + break; + case PIPE_QUERY_PIPELINE_STATISTICS: + for (i = 0; i < 8; ++i) + res64[i] = data64[i * 2] - data64[16 + i * 2]; + break; + case PIPE_QUERY_TIMESTAMP: + res64[0] = data64[1]; + break; + case PIPE_QUERY_TIMESTAMP_DISJOINT: + res64[0] = 1000000000; + res8[8] = false; + break; + case PIPE_QUERY_TIME_ELAPSED: + res64[0] = data64[1] - data64[3]; + break; + case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET: + res32[0] = hq->data[1]; + break; + default: + assert(0); + return false; + } + + return true; +} + +static const struct nv50_query_funcs hw_query_funcs = { + .destroy_query = nv50_hw_destroy_query, + .begin_query = nv50_hw_begin_query, + .end_query = nv50_hw_end_query, + .get_query_result = nv50_hw_get_query_result, +}; + +struct nv50_query * +nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index) +{ + struct nv50_hw_query *hq; + struct nv50_query *q; + + hq = CALLOC_STRUCT(nv50_hw_query); + if (!hq) + return NULL; + + q = &hq->base; + q->funcs = &hw_query_funcs; + q->type = type; + + if (!nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE)) { + FREE(hq); + return NULL; + } + + if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { + /* we advance before query_begin ! */ + hq->offset -= 32; + hq->data -= 32 / sizeof(*hq->data); + } + + hq->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED || + type == PIPE_QUERY_PRIMITIVES_EMITTED || + type == PIPE_QUERY_SO_STATISTICS || + type == PIPE_QUERY_PIPELINE_STATISTICS); + + return q; +} + +void +nv50_hw_query_pushbuf_submit(struct nouveau_pushbuf *push, uint16_t method, + struct nv50_query *q, unsigned result_offset) +{ + struct nv50_hw_query *hq = nv50_hw_query(q); + + nv50_hw_query_update(q); + if (hq->state != NV50_HW_QUERY_STATE_READY) + nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, push->client); + hq->state = NV50_HW_QUERY_STATE_READY; + + BEGIN_NV04(push, SUBC_3D(method), 1); + PUSH_DATA (push, hq->data[result_offset / 4]); +} + +void +nv84_hw_query_fifo_wait(struct nouveau_pushbuf *push, struct nv50_query *q) +{ + struct nv50_hw_query *hq = nv50_hw_query(q); + unsigned offset = hq->offset; + + PUSH_SPACE(push, 5); + PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); + BEGIN_NV04(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4); + PUSH_DATAh(push, hq->bo->offset + offset); + PUSH_DATA (push, hq->bo->offset + offset); + PUSH_DATA (push, hq->sequence); + PUSH_DATA (push, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL); +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h new file mode 100644 index 00000000000..fe518a55895 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h @@ -0,0 +1,39 @@ +#ifndef __NV50_QUERY_HW_H__ +#define __NV50_QUERY_HW_H__ + +#include "nouveau_fence.h" +#include "nouveau_mm.h" + +#include "nv50_query.h" + +#define NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0) + +struct nv50_hw_query { + struct nv50_query base; + uint32_t *data; + uint32_t sequence; + struct nouveau_bo *bo; + uint32_t base_offset; + uint32_t offset; /* base + i * 32 */ + uint8_t state; + bool is64bit; + int nesting; /* only used for occlusion queries */ + struct nouveau_mm_allocation *mm; + struct nouveau_fence *fence; +}; + +static inline struct nv50_hw_query * +nv50_hw_query(struct nv50_query *q) +{ + return (struct nv50_hw_query *)q; +} + +struct nv50_query * +nv50_hw_create_query(struct nv50_context *, unsigned, unsigned); +void +nv50_hw_query_pushbuf_submit(struct nouveau_pushbuf *, uint16_t, + struct nv50_query *, unsigned); +void +nv84_hw_query_fifo_wait(struct nouveau_pushbuf *, struct nv50_query *); + +#endif diff --git a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c index 6b3ecbf3771..9b911043132 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c @@ -27,6 +27,7 @@ #include "util/u_inlines.h" #include "nv50/nv50_context.h" +#include "nv50/nv50_query_hw.h" void nv50_constbufs_validate(struct nv50_context *nv50) @@ -641,7 +642,7 @@ nv50_stream_output_validate(struct nv50_context *nv50) const unsigned n = nv50->screen->base.class_3d >= NVA0_3D_CLASS ? 4 : 3; if (n == 4 && !targ->clean) - nv84_query_fifo_wait(push, nv50_query(targ->pq)); + nv84_hw_query_fifo_wait(push, nv50_query(targ->pq)); BEGIN_NV04(push, NV50_3D(STRMOUT_ADDRESS_HIGH(i)), n); PUSH_DATAh(push, buf->address + targ->pipe.buffer_offset); PUSH_DATA (push, buf->address + targ->pipe.buffer_offset); @@ -650,8 +651,8 @@ nv50_stream_output_validate(struct nv50_context *nv50) PUSH_DATA(push, targ->pipe.buffer_size); if (!targ->clean) { assert(targ->pq); - nv50_query_pushbuf_submit(push, NVA0_3D_STRMOUT_OFFSET(i), - nv50_query(targ->pq), 0x4); + nv50_hw_query_pushbuf_submit(push, NVA0_3D_STRMOUT_OFFSET(i), + nv50_query(targ->pq), 0x4); } else { BEGIN_NV04(push, NVA0_3D(STRMOUT_OFFSET(i)), 1); PUSH_DATA(push, 0); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c index a78afc9d216..6c8c9f0b4e6 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_state.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c @@ -30,6 +30,7 @@ #include "nv50/nv50_stateobj.h" #include "nv50/nv50_context.h" +#include "nv50/nv50_query_hw.h" #include "nv50/nv50_3d.xml.h" #include "nv50/nv50_texture.xml.h" @@ -1036,7 +1037,7 @@ nv50_so_target_create(struct pipe_context *pipe, if (nouveau_context(pipe)->screen->class_3d >= NVA0_3D_CLASS) { targ->pq = pipe->create_query(pipe, - NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET, 0); + NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET, 0); if (!targ->pq) { FREE(targ); return NULL; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c index dbc6632c248..9fa6fceeefa 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c @@ -27,6 +27,7 @@ #include "translate/translate.h" #include "nv50/nv50_context.h" +#include "nv50/nv50_query_hw.h" #include "nv50/nv50_resource.h" #include "nv50/nv50_3d.xml.h" @@ -745,8 +746,8 @@ nva0_draw_stream_output(struct nv50_context *nv50, PUSH_DATA (push, 0); BEGIN_NV04(push, NVA0_3D(DRAW_TFB_STRIDE), 1); PUSH_DATA (push, so->stride); - nv50_query_pushbuf_submit(push, NVA0_3D_DRAW_TFB_BYTES, - nv50_query(so->pq), 0x4); + nv50_hw_query_pushbuf_submit(push, NVA0_3D_DRAW_TFB_BYTES, + nv50_query(so->pq), 0x4); BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1); PUSH_DATA (push, 0); -- 2.30.2