From a44b65312e2cbba641652f4c78b8db9f24ba39b6 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 28 Feb 2011 16:19:58 +1000 Subject: [PATCH] r600g: add NV_conditional_render support. This is reliant on a drm patch that I posted on the list + a version bump. These will appear in drm-next today. Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/evergreend.h | 3 +- src/gallium/drivers/r600/r600.h | 3 + src/gallium/drivers/r600/r600_query.c | 28 +++++++ src/gallium/drivers/r600/r600d.h | 4 + .../winsys/r600/drm/evergreen_hw_context.c | 12 +-- src/gallium/winsys/r600/drm/r600_hw_context.c | 79 ++++++++++++++----- src/gallium/winsys/r600/drm/r600d.h | 15 +++- 7 files changed, 117 insertions(+), 27 deletions(-) diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index f0a1ee0cd02..c51a163bd06 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -108,8 +108,9 @@ #define PKT3_IT_OPCODE_S(x) (((x) & 0xFF) << 8) #define PKT3_IT_OPCODE_G(x) (((x) >> 8) & 0xFF) #define PKT3_IT_OPCODE_C 0xFFFF00FF +#define PKT3_PREDICATE(x) (((x) >> 0) & 0x1) #define PKT0(index, count) (PKT_TYPE_S(0) | PKT0_BASE_INDEX_S(index) | PKT_COUNT_S(count)) -#define PKT3(op, count) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count)) +#define PKT3(op, count, predicate) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count) | PKT3_PREDICATE(predicate)) /* Registers */ #define R_008C00_SQ_CONFIG 0x00008C00 diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 3568708785f..0b7d6f70968 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -253,6 +253,7 @@ struct r600_context { unsigned num_query_running; struct list_head fenced_bo; unsigned max_db; /* for OQ */ + boolean predicate_drawing; }; struct r600_draw { @@ -285,6 +286,8 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query); void r600_query_end(struct r600_context *ctx, struct r600_query *query); void r600_context_queries_suspend(struct r600_context *ctx); void r600_context_queries_resume(struct r600_context *ctx); +void r600_query_predication(struct r600_context *ctx, struct r600_query *query, int operation, + int flag_wait); int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon); void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *draw); diff --git a/src/gallium/drivers/r600/r600_query.c b/src/gallium/drivers/r600/r600_query.c index 726668260cc..343403f92f3 100644 --- a/src/gallium/drivers/r600/r600_query.c +++ b/src/gallium/drivers/r600/r600_query.c @@ -21,6 +21,7 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "r600_pipe.h" +#include "r600d.h" static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type) { @@ -66,6 +67,30 @@ static boolean r600_get_query_result(struct pipe_context *ctx, return r600_context_query_result(&rctx->ctx, (struct r600_query *)query, wait, vresult); } +static void r600_render_condition(struct pipe_context *ctx, + struct pipe_query *query, + uint mode) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_query *rquery = (struct r600_query *)query; + int wait_flag = 0; + + if (!query) { + rctx->ctx.predicate_drawing = false; + r600_query_predication(&rctx->ctx, NULL, PREDICATION_OP_CLEAR, 1); + return; + } + + if (mode == PIPE_RENDER_COND_WAIT || + mode == PIPE_RENDER_COND_BY_REGION_WAIT) { + wait_flag = 1; + } + + rctx->ctx.predicate_drawing = true; + r600_query_predication(&rctx->ctx, rquery, PREDICATION_OP_ZPASS, wait_flag); + +} + void r600_init_query_functions(struct r600_pipe_context *rctx) { rctx->context.create_query = r600_create_query; @@ -73,4 +98,7 @@ void r600_init_query_functions(struct r600_pipe_context *rctx) rctx->context.begin_query = r600_begin_query; rctx->context.end_query = r600_end_query; rctx->context.get_query_result = r600_get_query_result; + + if (r600_get_num_backends(rctx->screen->radeon) > 0) + rctx->context.render_condition = r600_render_condition; } diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index e8558c49a7c..df70e2889e2 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -67,6 +67,10 @@ #define PKT3_SET_CTL_CONST 0x6F #define PKT3_SURFACE_BASE_UPDATE 0x73 +#define PREDICATION_OP_CLEAR 0x0 +#define PREDICATION_OP_ZPASS 0x1 +#define PREDICATION_OP_PRIMCOUNT 0x2 + #define PKT_TYPE_S(x) (((x) & 0x3) << 30) #define PKT_TYPE_G(x) (((x) >> 30) & 0x3) #define PKT_TYPE_C 0x3FFFFFFF diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c index aa4035a302b..66398afa698 100644 --- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c +++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c @@ -834,25 +834,25 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr } /* draw packet */ - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_INDEX_TYPE, 0); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_INDEX_TYPE, 0, ctx->predicate_drawing); ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_index_type; - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NUM_INSTANCES, 0); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NUM_INSTANCES, 0, ctx->predicate_drawing); ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_instances; if (draw->indices) { - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX, 3); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX, 3, ctx->predicate_drawing); ctx->pm4[ctx->pm4_cdwords++] = draw->indices_bo_offset + r600_bo_offset(draw->indices); ctx->pm4[ctx->pm4_cdwords++] = 0; ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices; ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator; - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing); ctx->pm4[ctx->pm4_cdwords++] = 0; r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], draw->indices); } else { - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, ctx->predicate_drawing); ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices; ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator; } - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, ctx->predicate_drawing); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0); /* flush color buffer */ diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c index f170640407d..6b3baa2c909 100644 --- a/src/gallium/winsys/r600/drm/r600_hw_context.c +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -107,7 +107,7 @@ int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, /* initialize block */ block->start_offset = reg[i].offset; - block->pm4[block->pm4_ndwords++] = PKT3(reg[i].opcode, n); + block->pm4[block->pm4_ndwords++] = PKT3(reg[i].opcode, n, 0); block->pm4[block->pm4_ndwords++] = (block->start_offset - reg[i].offset_base) >> 2; block->reg = &block->pm4[block->pm4_ndwords]; block->pm4_ndwords += n; @@ -119,7 +119,7 @@ int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, block->nbo++; assert(block->nbo < R600_BLOCK_MAX_BO); block->pm4_bo_index[j] = block->nbo; - block->pm4[block->pm4_ndwords++] = PKT3(PKT3_NOP, 0); + block->pm4[block->pm4_ndwords++] = PKT3(PKT3_NOP, 0, 0); block->pm4[block->pm4_ndwords++] = 0x00000000; block->reloc[block->nbo].flush_flags = reg[i+j].flush_flags; block->reloc[block->nbo].flush_mask = reg[i+j].flush_mask; @@ -771,12 +771,12 @@ void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags, bo->last_flush &= flush_mask; return; } - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SURFACE_SYNC, 3); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SURFACE_SYNC, 3, ctx->predicate_drawing); ctx->pm4[ctx->pm4_cdwords++] = flush_flags; ctx->pm4[ctx->pm4_cdwords++] = (bo->size + 255) >> 8; ctx->pm4[ctx->pm4_cdwords++] = 0x00000000; ctx->pm4[ctx->pm4_cdwords++] = 0x0000000A; - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing); ctx->pm4[ctx->pm4_cdwords++] = bo->reloc_id; bo->last_flush = (bo->last_flush | flush_flags) & flush_mask; } @@ -1048,25 +1048,25 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw) } /* draw packet */ - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_INDEX_TYPE, 0); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_INDEX_TYPE, 0, ctx->predicate_drawing); ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_index_type; - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NUM_INSTANCES, 0); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NUM_INSTANCES, 0, ctx->predicate_drawing); ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_instances; if (draw->indices) { - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX, 3); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX, 3, ctx->predicate_drawing); ctx->pm4[ctx->pm4_cdwords++] = draw->indices_bo_offset + r600_bo_offset(draw->indices); ctx->pm4[ctx->pm4_cdwords++] = 0; ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices; ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator; - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing); ctx->pm4[ctx->pm4_cdwords++] = 0; r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], draw->indices); } else { - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, ctx->predicate_drawing); ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices; ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator; } - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, ctx->predicate_drawing); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0); /* flush color buffer */ @@ -1101,15 +1101,15 @@ void r600_context_flush(struct r600_context *ctx) r600_context_queries_suspend(ctx); /* emit fence */ - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, 0); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4); - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); ctx->pm4[ctx->pm4_cdwords++] = 0; ctx->pm4[ctx->pm4_cdwords++] = (1 << 29) | (0 << 24); ctx->pm4[ctx->pm4_cdwords++] = ctx->radeon->fence; ctx->pm4[ctx->pm4_cdwords++] = 0; - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); ctx->pm4[ctx->pm4_cdwords++] = 0; r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], ctx->radeon->fence_bo); @@ -1282,6 +1282,7 @@ static boolean r600_query_result(struct r600_context *ctx, struct r600_query *qu void r600_query_begin(struct r600_context *ctx, struct r600_query *query) { unsigned required_space; + int num_backends = r600_get_num_backends(ctx->radeon); /* query request needs 6/8 dwords for begin + 6/8 dwords for end */ if (query->type == PIPE_QUERY_TIME_ELAPSED) @@ -1300,21 +1301,39 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query) r600_query_result(ctx, query, TRUE); } + if (query->type == PIPE_QUERY_OCCLUSION_COUNTER && + num_backends > 0 && num_backends < ctx->max_db) { + /* as per info on ZPASS the driver must set the unusued DB top bits */ + u32 *results; + int i; + + results = r600_bo_map(ctx->radeon, query->buffer, PB_USAGE_DONTBLOCK | PB_USAGE_CPU_WRITE, NULL); + if (results) { + memset(results + (query->num_results * 4), 0, ctx->max_db * 4 * 4); + + for (i = num_backends; i < ctx->max_db; i++) { + results[(i * 4)+1] = 0x80000000; + results[(i * 4)+3] = 0x80000000; + } + r600_bo_unmap(ctx->radeon, query->buffer); + } + } + /* emit begin query */ if (query->type == PIPE_QUERY_TIME_ELAPSED) { - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); ctx->pm4[ctx->pm4_cdwords++] = query->num_results*4 + r600_bo_offset(query->buffer); ctx->pm4[ctx->pm4_cdwords++] = (3 << 29); ctx->pm4[ctx->pm4_cdwords++] = 0; ctx->pm4[ctx->pm4_cdwords++] = 0; } else { - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2, 0); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); ctx->pm4[ctx->pm4_cdwords++] = query->num_results*4 + r600_bo_offset(query->buffer); ctx->pm4[ctx->pm4_cdwords++] = 0; } - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); ctx->pm4[ctx->pm4_cdwords++] = 0; r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], query->buffer); @@ -1327,19 +1346,19 @@ void r600_query_end(struct r600_context *ctx, struct r600_query *query) { /* emit begin query */ if (query->type == PIPE_QUERY_TIME_ELAPSED) { - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); ctx->pm4[ctx->pm4_cdwords++] = query->num_results*4 + 8 + r600_bo_offset(query->buffer); ctx->pm4[ctx->pm4_cdwords++] = (3 << 29); ctx->pm4[ctx->pm4_cdwords++] = 0; ctx->pm4[ctx->pm4_cdwords++] = 0; } else { - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2, 0); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); ctx->pm4[ctx->pm4_cdwords++] = query->num_results*4 + 8 + r600_bo_offset(query->buffer); ctx->pm4[ctx->pm4_cdwords++] = 0; } - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); ctx->pm4[ctx->pm4_cdwords++] = 0; r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], query->buffer); @@ -1349,6 +1368,28 @@ void r600_query_end(struct r600_context *ctx, struct r600_query *query) ctx->num_query_running--; } +void r600_query_predication(struct r600_context *ctx, struct r600_query *query, int operation, + int flag_wait) +{ + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_PREDICATION, 1, 0); + + if (operation == PREDICATION_OP_CLEAR) { + ctx->pm4[ctx->pm4_cdwords++] = 0; + ctx->pm4[ctx->pm4_cdwords++] = PRED_OP(PREDICATION_OP_CLEAR); + } else { + int results_base = query->num_results - (4 * ctx->max_db); + + if (results_base < 0) + results_base = 0; + + ctx->pm4[ctx->pm4_cdwords++] = results_base*4 + r600_bo_offset(query->buffer); + ctx->pm4[ctx->pm4_cdwords++] = PRED_OP(operation) | (flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW) | PREDICATION_DRAW_VISIBLE; + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); + ctx->pm4[ctx->pm4_cdwords++] = 0; + r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], query->buffer); + } +} + struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned query_type) { struct r600_query *query; diff --git a/src/gallium/winsys/r600/drm/r600d.h b/src/gallium/winsys/r600/drm/r600d.h index 1c1ac76fe69..cb12865ff01 100644 --- a/src/gallium/winsys/r600/drm/r600d.h +++ b/src/gallium/winsys/r600/drm/r600d.h @@ -105,6 +105,18 @@ * 5 - TS events */ +#define PREDICATION_OP_CLEAR 0x0 +#define PREDICATION_OP_ZPASS 0x1 +#define PREDICATION_OP_PRIMCOUNT 0x2 + +#define PRED_OP(x) ((x) << 16) + +#define PREDICATION_HINT_WAIT (0 << 12) +#define PREDICATION_HINT_NOWAIT_DRAW (1 << 12) + +#define PREDICATION_DRAW_NOT_VISIBLE (0 << 8) +#define PREDICATION_DRAW_VISIBLE (1 << 8) + #define PKT_TYPE_S(x) (((x) & 0x3) << 30) #define PKT_TYPE_G(x) (((x) >> 30) & 0x3) #define PKT_TYPE_C 0x3FFFFFFF @@ -117,8 +129,9 @@ #define PKT3_IT_OPCODE_S(x) (((x) & 0xFF) << 8) #define PKT3_IT_OPCODE_G(x) (((x) >> 8) & 0xFF) #define PKT3_IT_OPCODE_C 0xFFFF00FF +#define PKT3_PRED_S(x) (((x) >> 0) & 0x1) #define PKT0(index, count) (PKT_TYPE_S(0) | PKT0_BASE_INDEX_S(index) | PKT_COUNT_S(count)) -#define PKT3(op, count) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count)) +#define PKT3(op, count, predicate) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count) | PKT3_PRED_S(predicate)) /* Registers */ #define R_0280A0_CB_COLOR0_INFO 0x0280A0 -- 2.30.2