From 41e053954d326c53c3970575b2e09deb1f49dbb8 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 15 Aug 2017 02:40:30 +0200 Subject: [PATCH] radeonsi/gfx9: prevent a GPU hang after a timestamp event MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Reviewed-by: Nicolai Hähnle --- src/gallium/drivers/radeon/r600_pipe_common.c | 35 ++++++++++++++++++- src/gallium/drivers/radeon/r600_pipe_common.h | 4 ++- src/gallium/drivers/radeon/r600_query.c | 7 ++-- src/gallium/drivers/radeonsi/si_perfcounter.c | 2 +- src/gallium/drivers/radeonsi/si_state_draw.c | 4 +-- 5 files changed, 44 insertions(+), 8 deletions(-) diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index 960b59c5b29..b28f385e2b5 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -103,7 +103,8 @@ void r600_gfx_write_event_eop(struct r600_common_context *ctx, unsigned event, unsigned event_flags, unsigned data_sel, struct r600_resource *buf, uint64_t va, - uint32_t old_fence, uint32_t new_fence) + uint32_t old_fence, uint32_t new_fence, + unsigned query_type) { struct radeon_winsys_cs *cs = ctx->gfx.cs; unsigned op = EVENT_TYPE(event) | @@ -111,6 +112,29 @@ void r600_gfx_write_event_eop(struct r600_common_context *ctx, event_flags; if (ctx->chip_class >= GFX9) { + /* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion + * counters) must immediately precede every timestamp event to + * prevent a GPU hang on GFX9. + * + * Occlusion queries don't need to do it here, because they + * always do ZPASS_DONE before the timestamp. + */ + if (ctx->chip_class == GFX9 && + query_type != PIPE_QUERY_OCCLUSION_COUNTER && + query_type != PIPE_QUERY_OCCLUSION_PREDICATE) { + struct r600_resource *scratch = ctx->eop_bug_scratch; + + assert(16 * ctx->screen->info.num_render_backends <= + scratch->b.b.width0); + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); + radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1)); + radeon_emit(cs, scratch->gpu_address); + radeon_emit(cs, scratch->gpu_address >> 32); + + radeon_add_to_buffer_list(ctx, &ctx->gfx, scratch, + RADEON_USAGE_WRITE, RADEON_PRIO_QUERY); + } + radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, 6, 0)); radeon_emit(cs, op); radeon_emit(cs, EOP_DATA_SEL(data_sel)); @@ -655,6 +679,14 @@ bool r600_common_context_init(struct r600_common_context *rctx, r600_query_init(rctx); cayman_init_msaa(&rctx->b); + if (rctx->chip_class == GFX9) { + rctx->eop_bug_scratch = (struct r600_resource*) + pipe_buffer_create(&rscreen->b, 0, PIPE_USAGE_DEFAULT, + 16 * rscreen->info.num_render_backends); + if (!rctx->eop_bug_scratch) + return false; + } + rctx->allocator_zeroed_memory = u_suballocator_create(&rctx->b, rscreen->info.gart_page_size, 0, PIPE_USAGE_DEFAULT, 0, true); @@ -724,6 +756,7 @@ void r600_common_context_cleanup(struct r600_common_context *rctx) } rctx->ws->fence_reference(&rctx->last_gfx_fence, NULL); rctx->ws->fence_reference(&rctx->last_sdma_fence, NULL); + r600_resource_reference(&rctx->eop_bug_scratch, NULL); } /* diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 14bc63ed2ba..952fb77a453 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -560,6 +560,7 @@ struct r600_common_context { struct r600_ring dma; struct pipe_fence_handle *last_gfx_fence; struct pipe_fence_handle *last_sdma_fence; + struct r600_resource *eop_bug_scratch; unsigned num_gfx_cs_flushes; unsigned initial_gfx_cs_size; unsigned gpu_reset_counter; @@ -747,7 +748,8 @@ void r600_gfx_write_event_eop(struct r600_common_context *ctx, unsigned event, unsigned event_flags, unsigned data_sel, struct r600_resource *buf, uint64_t va, - uint32_t old_fence, uint32_t new_fence); + uint32_t old_fence, uint32_t new_fence, + unsigned query_type); unsigned r600_gfx_write_fence_dwords(struct r600_common_screen *screen); void r600_gfx_wait_fence(struct r600_common_context *ctx, uint64_t va, uint32_t ref, uint32_t mask); diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c index 03ea04d48f7..53b795584c1 100644 --- a/src/gallium/drivers/radeon/r600_query.c +++ b/src/gallium/drivers/radeon/r600_query.c @@ -780,7 +780,7 @@ static void r600_query_hw_do_emit_start(struct r600_common_context *ctx, * (bottom-of-pipe) */ r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS, - 0, 3, NULL, va, 0, 0); + 0, 3, NULL, va, 0, 0, query->b.type); } break; case PIPE_QUERY_PIPELINE_STATISTICS: @@ -865,7 +865,7 @@ static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx, /* fall through */ case PIPE_QUERY_TIMESTAMP: r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS, - 0, 3, NULL, va, 0, 0); + 0, 3, NULL, va, 0, 0, query->b.type); fence_va = va + 8; break; case PIPE_QUERY_PIPELINE_STATISTICS: { @@ -888,7 +888,8 @@ static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx, if (fence_va) r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS, 0, 1, - query->buffer.buf, fence_va, 0, 0x80000000); + query->buffer.buf, fence_va, 0, 0x80000000, + query->b.type); } static void r600_query_hw_emit_stop(struct r600_common_context *ctx, diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c index 41dd52edb11..df9eeaa8456 100644 --- a/src/gallium/drivers/radeonsi/si_perfcounter.c +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c @@ -591,7 +591,7 @@ static void si_pc_emit_stop(struct r600_common_context *ctx, struct radeon_winsys_cs *cs = ctx->gfx.cs; r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS, 0, 1, - buffer, va, 1, 0); + buffer, va, 1, 0, 0); r600_gfx_wait_fence(ctx, va, 0, 0xffffffff); radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index deb0691f5c3..abe2b5cc658 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -894,7 +894,7 @@ void si_emit_cache_flush(struct si_context *sctx) /* Necessary for DCC */ if (rctx->chip_class == VI) r600_gfx_write_event_eop(rctx, V_028A90_FLUSH_AND_INV_CB_DATA_TS, - 0, 0, NULL, 0, 0, 0); + 0, 0, NULL, 0, 0, 0, 0); } if (rctx->flags & SI_CONTEXT_FLUSH_AND_INV_DB) cp_coher_cntl |= S_0085F0_DB_ACTION_ENA(1) | @@ -996,7 +996,7 @@ void si_emit_cache_flush(struct si_context *sctx) r600_gfx_write_event_eop(rctx, cb_db_event, tc_flags, 1, sctx->wait_mem_scratch, va, sctx->wait_mem_number - 1, - sctx->wait_mem_number); + sctx->wait_mem_number, 0); r600_gfx_wait_fence(rctx, va, sctx->wait_mem_number, 0xffffffff); } -- 2.30.2