From 631c47384c1f45450359fd7d1df2c5f0c79f40bc Mon Sep 17 00:00:00 2001 From: =?utf8?q?Nicolai=20H=C3=A4hnle?= Date: Wed, 14 Sep 2016 10:38:33 +0200 Subject: [PATCH] gallium/radeon: add query fences and r600_get_hw_query_params MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit We will support the waiting option in ARB_query_buffer_object using WAIT_REG_MEM on an appropriate fence-like dword. Some queries conveniently write their results with the highest bit set, and we can just use that; for others, we have to write a fence explicitly. ZPASS_DONE for occlusion queries writes its results with the high bit set, but it writes up to 8 pairs of results (one for each DB). We have to wait for all of these results, so let's just add an explicit fence. The new function provides summary information to be used by subsequent patches. Reviewed-by: Edward O'Callaghan Reviewed-by: Marek Olšák --- src/gallium/drivers/radeon/r600_query.c | 107 ++++++++++++++++++++---- 1 file changed, 91 insertions(+), 16 deletions(-) diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c index 2c3d530aefa..b9041eb2c95 100644 --- a/src/gallium/drivers/radeon/r600_query.c +++ b/src/gallium/drivers/radeon/r600_query.c @@ -26,6 +26,14 @@ #include "r600_cs.h" #include "util/u_memory.h" +struct r600_hw_query_params { + unsigned start_offset; + unsigned end_offset; + unsigned fence_offset; + unsigned pair_stride; + unsigned pair_count; +}; + /* Queries without buffer handling or suspend/resume. */ struct r600_query_sw { struct r600_query b; @@ -359,7 +367,7 @@ static bool r600_query_hw_prepare_buffer(struct r600_common_context *ctx, unsigned i, j; /* Set top bits for unused backends. */ - num_results = buffer->b.b.width0 / (16 * ctx->max_db); + num_results = buffer->b.b.width0 / query->result_size; for (j = 0; j < num_results; j++) { for (i = 0; i < ctx->max_db; i++) { if (!(ctx->backend_mask & (1<result_size = 16 * rctx->max_db; + query->result_size += 16; /* for the fence + alignment */ query->num_cs_dw_begin = 6; - query->num_cs_dw_end = 6; + query->num_cs_dw_end = 6 + r600_gfx_write_fence_dwords(rctx->screen); query->flags |= R600_QUERY_HW_FLAG_PREDICATE; break; case PIPE_QUERY_TIME_ELAPSED: - query->result_size = 16; + query->result_size = 24; query->num_cs_dw_begin = 8; - query->num_cs_dw_end = 8; + query->num_cs_dw_end = 8 + r600_gfx_write_fence_dwords(rctx->screen); break; case PIPE_QUERY_TIMESTAMP: - query->result_size = 8; - query->num_cs_dw_end = 8; + query->result_size = 16; + query->num_cs_dw_end = 8 + r600_gfx_write_fence_dwords(rctx->screen); query->flags = R600_QUERY_HW_FLAG_NO_START; break; case PIPE_QUERY_PRIMITIVES_EMITTED: @@ -457,8 +466,9 @@ static struct pipe_query *r600_query_hw_create(struct r600_common_context *rctx, case PIPE_QUERY_PIPELINE_STATISTICS: /* 11 values on EG, 8 on R600. */ query->result_size = (rctx->chip_class >= EVERGREEN ? 11 : 8) * 16; + query->result_size += 8; /* for the fence + alignment */ query->num_cs_dw_begin = 6; - query->num_cs_dw_end = 6; + query->num_cs_dw_end = 6 + r600_gfx_write_fence_dwords(rctx->screen); break; default: assert(0); @@ -605,6 +615,9 @@ static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx, radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1)); radeon_emit(cs, va); radeon_emit(cs, (va >> 32) & 0xFFFF); + + va += ctx->max_db * 16 - 8; + r600_gfx_write_fence(ctx, va, 0, 0x80000000); break; case PIPE_QUERY_PRIMITIVES_EMITTED: case PIPE_QUERY_PRIMITIVES_GENERATED: @@ -617,7 +630,7 @@ static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx, radeon_emit(cs, (va >> 32) & 0xFFFF); break; case PIPE_QUERY_TIME_ELAPSED: - va += query->result_size/2; + va += 8; /* fall through */ case PIPE_QUERY_TIMESTAMP: radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); @@ -626,14 +639,23 @@ static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx, radeon_emit(cs, (3 << 29) | ((va >> 32) & 0xFFFF)); radeon_emit(cs, 0); radeon_emit(cs, 0); + + va += 8; + r600_gfx_write_fence(ctx, va, 0, 0x80000000); break; - case PIPE_QUERY_PIPELINE_STATISTICS: - va += query->result_size/2; + case PIPE_QUERY_PIPELINE_STATISTICS: { + unsigned sample_size = (query->result_size - 8) / 2; + + va += sample_size; radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2)); radeon_emit(cs, va); radeon_emit(cs, (va >> 32) & 0xFFFF); + + va += sample_size; + r600_gfx_write_fence(ctx, va, 0, 0x80000000); break; + } default: assert(0); } @@ -832,6 +854,61 @@ bool r600_query_hw_end(struct r600_common_context *rctx, return true; } +static void r600_get_hw_query_params(struct r600_common_context *rctx, + struct r600_query_hw *rquery, int index, + struct r600_hw_query_params *params) +{ + params->pair_stride = 0; + params->pair_count = 1; + + switch (rquery->b.type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: + params->start_offset = 0; + params->end_offset = 8; + params->fence_offset = rctx->max_db * 16; + params->pair_stride = 16; + params->pair_count = rctx->max_db; + break; + case PIPE_QUERY_TIME_ELAPSED: + params->start_offset = 0; + params->end_offset = 8; + params->fence_offset = 16; + break; + case PIPE_QUERY_TIMESTAMP: + params->start_offset = 0; + params->end_offset = 0; + params->fence_offset = 8; + break; + case PIPE_QUERY_PRIMITIVES_EMITTED: + params->start_offset = 8; + params->end_offset = 24; + params->fence_offset = params->end_offset + 4; + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: + params->start_offset = 0; + params->end_offset = 16; + params->fence_offset = params->end_offset + 4; + break; + case PIPE_QUERY_SO_STATISTICS: + params->start_offset = 8 - index * 8; + params->end_offset = 24 - index * 8; + params->fence_offset = params->end_offset + 4; + break; + case PIPE_QUERY_PIPELINE_STATISTICS: + { + /* Offsets apply to EG+ */ + static const unsigned offsets[] = {56, 48, 24, 32, 40, 16, 8, 0, 64, 72, 80}; + params->start_offset = offsets[index]; + params->end_offset = 88 + offsets[index]; + params->fence_offset = 2 * 88; + break; + } + default: + unreachable("r600_get_hw_query_params unsupported"); + } +} + static unsigned r600_query_read_result(void *map, unsigned start_index, unsigned end_index, bool test_status_bit) { @@ -857,20 +934,18 @@ static void r600_query_hw_add_result(struct r600_common_context *ctx, { switch (query->b.type) { case PIPE_QUERY_OCCLUSION_COUNTER: { - unsigned results_base = 0; - while (results_base != query->result_size) { + for (unsigned i = 0; i < ctx->max_db; ++i) { + unsigned results_base = i * 16; result->u64 += r600_query_read_result(buffer + results_base, 0, 2, true); - results_base += 16; } break; } case PIPE_QUERY_OCCLUSION_PREDICATE: { - unsigned results_base = 0; - while (results_base != query->result_size) { + for (unsigned i = 0; i < ctx->max_db; ++i) { + unsigned results_base = i * 16; result->b = result->b || r600_query_read_result(buffer + results_base, 0, 2, true) != 0; - results_base += 16; } break; } -- 2.30.2