From: Nicolai Hähnle Date: Thu, 27 Jul 2017 17:24:59 +0000 (+0200) Subject: radeonsi: fix streamout overflow predication on VI+ X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=dfc1502c841863b01481f5e88af658f699cca12e;p=mesa.git radeonsi: fix streamout overflow predication on VI+ There is a firmware regression that causes failures. Work around it by using the compute shader for query_buffer_objects to summarize the query results. v2: rename to PREDICATION_OP_BOOL64 (consistent with sid.h) Reviewed-by: Marek Olšák --- diff --git a/src/amd/common/r600d_common.h b/src/amd/common/r600d_common.h index 33744753368..0b61215f2bc 100644 --- a/src/amd/common/r600d_common.h +++ b/src/amd/common/r600d_common.h @@ -117,6 +117,7 @@ #define PREDICATION_OP_CLEAR 0x0 #define PREDICATION_OP_ZPASS 0x1 #define PREDICATION_OP_PRIMCOUNT 0x2 +#define PREDICATION_OP_BOOL64 0x3 #define PRED_OP(x) ((x) << 16) #define PREDICATION_CONTINUE (1 << 31) #define PREDICATION_HINT_WAIT (0 << 12) diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c index 62ab74575bf..5879a07586c 100644 --- a/src/gallium/drivers/radeon/r600_query.c +++ b/src/gallium/drivers/radeon/r600_query.c @@ -505,6 +505,7 @@ void r600_query_hw_destroy(struct r600_common_screen *rscreen, } r600_resource_reference(&query->buffer.buf, NULL); + r600_resource_reference(&query->workaround_buf, NULL); FREE(rquery); } @@ -946,19 +947,23 @@ static void r600_emit_query_predication(struct r600_common_context *ctx, flag_wait = ctx->render_cond_mode == PIPE_RENDER_COND_WAIT || ctx->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT; - switch (query->b.type) { - case PIPE_QUERY_OCCLUSION_COUNTER: - case PIPE_QUERY_OCCLUSION_PREDICATE: - op = PRED_OP(PREDICATION_OP_ZPASS); - break; - case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: - op = PRED_OP(PREDICATION_OP_PRIMCOUNT); - invert = !invert; - break; - default: - assert(0); - return; + if (query->workaround_buf) { + op = PRED_OP(PREDICATION_OP_BOOL64); + } else { + switch (query->b.type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: + op = PRED_OP(PREDICATION_OP_ZPASS); + break; + case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: + op = PRED_OP(PREDICATION_OP_PRIMCOUNT); + invert = !invert; + break; + default: + assert(0); + return; + } } /* if true then invert, see GL_ARB_conditional_render_inverted */ @@ -967,8 +972,21 @@ static void r600_emit_query_predication(struct r600_common_context *ctx, else op |= PREDICATION_DRAW_VISIBLE; /* Draw if visible or no overflow */ + /* Use the value written by compute shader as a workaround. Note that + * the wait flag does not apply in this predication mode. + * + * The shader outputs the result value to L2. Workarounds only affect VI + * and later, where the CP reads data from L2, so we don't need an + * additional flush. + */ + if (query->workaround_buf) { + uint64_t va = query->workaround_buf->gpu_address + query->workaround_offset; + emit_set_predicate(ctx, query->workaround_buf, va, op); + return; + } + op |= flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW; - + /* emit predicate packets for all data blocks */ for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) { unsigned results_base = 0; @@ -1064,6 +1082,8 @@ bool r600_query_hw_begin(struct r600_common_context *rctx, if (!(query->flags & R600_QUERY_HW_FLAG_BEGIN_RESUMES)) r600_query_hw_reset_buffers(rctx, query); + r600_resource_reference(&query->workaround_buf, NULL); + r600_query_hw_emit_start(rctx, query); if (!query->buffer.buf) return false; @@ -1777,11 +1797,43 @@ static void r600_render_condition(struct pipe_context *ctx, /* Compute the size of SET_PREDICATION packets. */ atom->num_dw = 0; if (query) { - for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous) - atom->num_dw += (qbuf->results_end / rquery->result_size) * 5; + bool needs_workaround = false; + + /* There is a firmware regression in VI which causes successive + * SET_PREDICATION packets to give the wrong answer for + * non-inverted stream overflow predication. + */ + if (rctx->chip_class >= VI && !condition && + (rquery->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE || + (rquery->b.type == PIPE_QUERY_SO_OVERFLOW_PREDICATE && + (rquery->buffer.previous || + rquery->buffer.results_end > rquery->result_size)))) { + needs_workaround = true; + } - if (rquery->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) - atom->num_dw *= R600_MAX_STREAMS; + if (needs_workaround && !rquery->workaround_buf) { + bool old_force_off = rctx->render_cond_force_off; + rctx->render_cond_force_off = true; + + u_suballocator_alloc( + rctx->allocator_zeroed_memory, 8, 8, + &rquery->workaround_offset, + (struct pipe_resource **)&rquery->workaround_buf); + + ctx->get_query_result_resource( + ctx, query, true, PIPE_QUERY_TYPE_U64, 0, + &rquery->workaround_buf->b.b, rquery->workaround_offset); + + atom->num_dw = 5; + + rctx->render_cond_force_off = old_force_off; + } else { + for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous) + atom->num_dw += (qbuf->results_end / rquery->result_size) * 5; + + if (rquery->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) + atom->num_dw *= R600_MAX_STREAMS; + } } rctx->set_atom_dirty(rctx, atom, query != NULL); diff --git a/src/gallium/drivers/radeon/r600_query.h b/src/gallium/drivers/radeon/r600_query.h index e176246122d..5565d706be3 100644 --- a/src/gallium/drivers/radeon/r600_query.h +++ b/src/gallium/drivers/radeon/r600_query.h @@ -194,6 +194,10 @@ struct r600_query_hw { struct list_head list; /* For transform feedback: which stream the query is for */ unsigned stream; + + /* Workaround via compute shader */ + struct r600_resource *workaround_buf; + unsigned workaround_offset; }; bool r600_query_hw_init(struct r600_common_screen *rscreen,