From: Marek Olšák Date: Tue, 21 Jan 2014 23:06:32 +0000 (+0100) Subject: radeonsi: use queries from r600g X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=62d55c0a2d96cf482f955bc841006c2ac1e0d867;p=mesa.git radeonsi: use queries from r600g Reviewed-by: Michel Dänzer Reviewed-by: Tom Stellard --- diff --git a/src/gallium/drivers/radeonsi/Makefile.sources b/src/gallium/drivers/radeonsi/Makefile.sources index 33f1492f6ce..56295720f4d 100644 --- a/src/gallium/drivers/radeonsi/Makefile.sources +++ b/src/gallium/drivers/radeonsi/Makefile.sources @@ -7,7 +7,6 @@ C_SOURCES := \ si_hw_context.c \ si_pipe.c \ si_pm4.c \ - si_query.c \ si_resource.c \ si_shader.c \ si_state.c \ diff --git a/src/gallium/drivers/radeonsi/si.h b/src/gallium/drivers/radeonsi/si.h index 46184ec33bd..5c69b07a05a 100644 --- a/src/gallium/drivers/radeonsi/si.h +++ b/src/gallium/drivers/radeonsi/si.h @@ -33,56 +33,11 @@ #include "si_resource.h" struct winsys_handle; - -/* R600/R700 STATES */ -struct si_query { - union { - uint64_t u64; - boolean b; - struct pipe_query_data_so_statistics so; - } result; - /* The kind of query */ - unsigned type; - /* Offset of the first result for current query */ - unsigned results_start; - /* Offset of the next free result after current query data */ - unsigned results_end; - /* Size of the result in memory for both begin_query and end_query, - * this can be one or two numbers, or it could even be a size of a structure. */ - unsigned result_size; - /* The buffer where query results are stored. It's used as a ring, - * data blocks for current query are stored sequentially from - * results_start to results_end, with wrapping on the buffer end */ - struct r600_resource *buffer; - /* The number of dwords for begin_query or end_query. */ - unsigned num_cs_dw; - /* linked list of queries */ - struct list_head list; -}; - struct si_context; struct si_screen; -void si_get_backend_mask(struct si_context *ctx); void si_context_flush(struct si_context *ctx, unsigned flags); void si_begin_new_cs(struct si_context *ctx); - -struct si_query *si_context_query_create(struct si_context *ctx, unsigned query_type); -void si_context_query_destroy(struct si_context *ctx, struct si_query *query); -boolean si_context_query_result(struct si_context *ctx, - struct si_query *query, - boolean wait, void *vresult); -void si_query_begin(struct si_context *ctx, struct si_query *query); -void si_query_end(struct si_context *ctx, struct si_query *query); -void si_context_queries_suspend(struct si_context *ctx); -void si_context_queries_resume(struct si_context *ctx); -void si_query_predication(struct si_context *ctx, struct si_query *query, int operation, - int flag_wait); - -bool si_is_timer_query(unsigned type); -bool si_query_needs_begin(unsigned type); void si_need_cs_space(struct si_context *ctx, unsigned num_dw, boolean count_draw_in); -int si_context_init(struct si_context *ctx); - #endif diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index ba2ebe7d7f0..250caab64e1 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -51,7 +51,7 @@ static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op) { struct si_context *sctx = (struct si_context *)ctx; - si_context_queries_suspend(sctx); + r600_suspend_nontimer_queries(&sctx->b); util_blitter_save_blend(sctx->blitter, sctx->queued.named.blend); util_blitter_save_depth_stencil_alpha(sctx->blitter, sctx->queued.named.dsa); @@ -81,26 +81,18 @@ static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op) sctx->samplers[PIPE_SHADER_FRAGMENT].views.views); } - if ((op & SI_DISABLE_RENDER_COND) && sctx->current_render_cond) { - sctx->saved_render_cond = sctx->current_render_cond; - sctx->saved_render_cond_cond = sctx->current_render_cond_cond; - sctx->saved_render_cond_mode = sctx->current_render_cond_mode; - sctx->b.b.render_condition(&sctx->b.b, NULL, FALSE, 0); + if ((op & SI_DISABLE_RENDER_COND) && sctx->b.current_render_cond) { + util_blitter_save_render_condition(sctx->blitter, + sctx->b.current_render_cond, + sctx->b.current_render_cond_cond, + sctx->b.current_render_cond_mode); } - } static void si_blitter_end(struct pipe_context *ctx) { struct si_context *sctx = (struct si_context *)ctx; - if (sctx->saved_render_cond) { - sctx->b.b.render_condition(&sctx->b.b, - sctx->saved_render_cond, - sctx->saved_render_cond_cond, - sctx->saved_render_cond_mode); - sctx->saved_render_cond = NULL; - } - si_context_queries_resume(sctx); + r600_resume_nontimer_queries(&sctx->b); } static unsigned u_max_sample(struct pipe_resource *r) diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c index 517becbfec5..a7536e3fbbf 100644 --- a/src/gallium/drivers/radeonsi/si_hw_context.c +++ b/src/gallium/drivers/radeonsi/si_hw_context.c @@ -30,97 +30,6 @@ #include "util/u_memory.h" #include -#define GROUP_FORCE_NEW_BLOCK 0 - -/* Get backends mask */ -void si_get_backend_mask(struct si_context *ctx) -{ - struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs; - struct r600_resource *buffer; - uint32_t *results; - unsigned num_backends = ctx->screen->b.info.r600_num_backends; - unsigned i, mask = 0; - - /* if backend_map query is supported by the kernel */ - if (ctx->screen->b.info.r600_backend_map_valid) { - unsigned num_tile_pipes = ctx->screen->b.info.r600_num_tile_pipes; - unsigned backend_map = ctx->screen->b.info.r600_backend_map; - unsigned item_width = 4, item_mask = 0x7; - - while(num_tile_pipes--) { - i = backend_map & item_mask; - mask |= (1<>= item_width; - } - if (mask != 0) { - ctx->backend_mask = mask; - return; - } - } - - /* otherwise backup path for older kernels */ - - /* create buffer for event data */ - buffer = si_resource_create_custom(&ctx->screen->b.b, - PIPE_USAGE_STAGING, - ctx->max_db*16); - if (!buffer) - goto err; - - /* initialize buffer with zeroes */ - results = ctx->b.ws->buffer_map(buffer->cs_buf, ctx->b.rings.gfx.cs, PIPE_TRANSFER_WRITE); - if (results) { - uint64_t va = 0; - - memset(results, 0, ctx->max_db * 4 * 4); - ctx->b.ws->buffer_unmap(buffer->cs_buf); - - /* emit EVENT_WRITE for ZPASS_DONE */ - va = r600_resource_va(&ctx->screen->b.b, (void *)buffer); - cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0); - cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); - cs->buf[cs->cdw++] = va; - cs->buf[cs->cdw++] = va >> 32; - - cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); - cs->buf[cs->cdw++] = r600_context_bo_reloc(&ctx->b, &ctx->b.rings.gfx, buffer, RADEON_USAGE_WRITE); - - /* analyze results */ - results = ctx->b.ws->buffer_map(buffer->cs_buf, ctx->b.rings.gfx.cs, PIPE_TRANSFER_READ); - if (results) { - for(i = 0; i < ctx->max_db; i++) { - /* at least highest bit will be set if backend is used */ - if (results[i*4 + 1]) - mask |= (1<b.ws->buffer_unmap(buffer->cs_buf); - } - } - - r600_resource_reference(&buffer, NULL); - - if (mask != 0) { - ctx->backend_mask = mask; - return; - } - -err: - /* fallback to old method - set num_backends lower bits to 1 */ - ctx->backend_mask = (~((uint32_t)0))>>(32-num_backends); - return; -} - -bool si_is_timer_query(unsigned type) -{ - return type == PIPE_QUERY_TIME_ELAPSED || - type == PIPE_QUERY_TIMESTAMP || - type == PIPE_QUERY_TIMESTAMP_DISJOINT; -} - -bool si_query_needs_begin(unsigned type) -{ - return type != PIPE_QUERY_TIMESTAMP; -} /* initialize */ void si_need_cs_space(struct si_context *ctx, unsigned num_dw, @@ -146,7 +55,7 @@ void si_need_cs_space(struct si_context *ctx, unsigned num_dw, } /* Count in queries_suspend. */ - num_dw += ctx->num_cs_dw_nontimer_queries_suspend; + num_dw += ctx->b.num_cs_dw_nontimer_queries_suspend; /* Count in streamout_end at the end of CS. */ if (ctx->b.streamout.begin_emitted) { @@ -154,7 +63,7 @@ void si_need_cs_space(struct si_context *ctx, unsigned num_dw, } /* Count in render_condition(NULL) at the end of CS. */ - if (ctx->predicate_drawing) { + if (ctx->b.predicate_drawing) { num_dw += 3; } @@ -181,10 +90,10 @@ void si_context_flush(struct si_context *ctx, unsigned flags) return; /* suspend queries */ - ctx->nontimer_queries_suspended = false; - if (ctx->num_cs_dw_nontimer_queries_suspend) { - si_context_queries_suspend(ctx); - ctx->nontimer_queries_suspended = true; + ctx->b.nontimer_queries_suspended = false; + if (ctx->b.num_cs_dw_nontimer_queries_suspend) { + r600_suspend_nontimer_queries(&ctx->b); + ctx->b.nontimer_queries_suspended = true; } ctx->b.streamout.suspended = false; @@ -270,431 +179,13 @@ void si_begin_new_cs(struct si_context *ctx) } /* resume queries */ - if (ctx->nontimer_queries_suspended) { - si_context_queries_resume(ctx); + if (ctx->b.nontimer_queries_suspended) { + r600_resume_nontimer_queries(&ctx->b); } si_all_descriptors_begin_new_cs(ctx); } -static unsigned si_query_read_result(char *map, unsigned start_index, unsigned end_index, - bool test_status_bit) -{ - uint32_t *current_result = (uint32_t*)map; - uint64_t start, end; - - start = (uint64_t)current_result[start_index] | - (uint64_t)current_result[start_index+1] << 32; - end = (uint64_t)current_result[end_index] | - (uint64_t)current_result[end_index+1] << 32; - - if (!test_status_bit || - ((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL))) { - return end - start; - } - return 0; -} - -static boolean si_query_result(struct si_context *ctx, struct si_query *query, boolean wait) -{ - unsigned results_base = query->results_start; - char *map; - - map = ctx->b.ws->buffer_map(query->buffer->cs_buf, ctx->b.rings.gfx.cs, - PIPE_TRANSFER_READ | - (wait ? 0 : PIPE_TRANSFER_DONTBLOCK)); - if (!map) - return FALSE; - - /* count all results across all data blocks */ - switch (query->type) { - case PIPE_QUERY_OCCLUSION_COUNTER: - while (results_base != query->results_end) { - query->result.u64 += - si_query_read_result(map + results_base, 0, 2, true); - results_base = (results_base + 16) % query->buffer->b.b.width0; - } - break; - case PIPE_QUERY_OCCLUSION_PREDICATE: - while (results_base != query->results_end) { - query->result.b = query->result.b || - si_query_read_result(map + results_base, 0, 2, true) != 0; - results_base = (results_base + 16) % query->buffer->b.b.width0; - } - break; - case PIPE_QUERY_TIMESTAMP: - { - uint32_t *current_result = (uint32_t*)map; - query->result.u64 = (uint64_t)current_result[0] | (uint64_t)current_result[1] << 32; - break; - } - case PIPE_QUERY_TIME_ELAPSED: - while (results_base != query->results_end) { - query->result.u64 += - si_query_read_result(map + results_base, 0, 2, false); - results_base = (results_base + query->result_size) % query->buffer->b.b.width0; - } - break; - case PIPE_QUERY_PRIMITIVES_EMITTED: - /* SAMPLE_STREAMOUTSTATS stores this structure: - * { - * u64 NumPrimitivesWritten; - * u64 PrimitiveStorageNeeded; - * } - * We only need NumPrimitivesWritten here. */ - while (results_base != query->results_end) { - query->result.u64 += - si_query_read_result(map + results_base, 2, 6, true); - results_base = (results_base + query->result_size) % query->buffer->b.b.width0; - } - break; - case PIPE_QUERY_PRIMITIVES_GENERATED: - /* Here we read PrimitiveStorageNeeded. */ - while (results_base != query->results_end) { - query->result.u64 += - si_query_read_result(map + results_base, 0, 4, true); - results_base = (results_base + query->result_size) % query->buffer->b.b.width0; - } - break; - case PIPE_QUERY_SO_STATISTICS: - while (results_base != query->results_end) { - query->result.so.num_primitives_written += - si_query_read_result(map + results_base, 2, 6, true); - query->result.so.primitives_storage_needed += - si_query_read_result(map + results_base, 0, 4, true); - results_base = (results_base + query->result_size) % query->buffer->b.b.width0; - } - break; - case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - while (results_base != query->results_end) { - query->result.b = query->result.b || - si_query_read_result(map + results_base, 2, 6, true) != - si_query_read_result(map + results_base, 0, 4, true); - results_base = (results_base + query->result_size) % query->buffer->b.b.width0; - } - break; - default: - assert(0); - } - - query->results_start = query->results_end; - ctx->b.ws->buffer_unmap(query->buffer->cs_buf); - return TRUE; -} - -void si_query_begin(struct si_context *ctx, struct si_query *query) -{ - struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs; - unsigned new_results_end, i; - uint32_t *results; - uint64_t va; - - si_need_cs_space(ctx, query->num_cs_dw * 2, TRUE); - - new_results_end = (query->results_end + query->result_size) % query->buffer->b.b.width0; - - /* collect current results if query buffer is full */ - if (new_results_end == query->results_start) { - si_query_result(ctx, query, TRUE); - } - - switch (query->type) { - case PIPE_QUERY_OCCLUSION_COUNTER: - case PIPE_QUERY_OCCLUSION_PREDICATE: - results = ctx->b.ws->buffer_map(query->buffer->cs_buf, ctx->b.rings.gfx.cs, PIPE_TRANSFER_WRITE); - if (results) { - results = (uint32_t*)((char*)results + query->results_end); - memset(results, 0, query->result_size); - - /* Set top bits for unused backends */ - for (i = 0; i < ctx->max_db; i++) { - if (!(ctx->backend_mask & (1<b.ws->buffer_unmap(query->buffer->cs_buf); - } - break; - case PIPE_QUERY_TIME_ELAPSED: - break; - case PIPE_QUERY_PRIMITIVES_EMITTED: - case PIPE_QUERY_PRIMITIVES_GENERATED: - case PIPE_QUERY_SO_STATISTICS: - case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - results = ctx->b.ws->buffer_map(query->buffer->cs_buf, ctx->b.rings.gfx.cs, PIPE_TRANSFER_WRITE); - results = (uint32_t*)((char*)results + query->results_end); - memset(results, 0, query->result_size); - ctx->b.ws->buffer_unmap(query->buffer->cs_buf); - break; - default: - assert(0); - } - - /* emit begin query */ - va = r600_resource_va(&ctx->screen->b.b, (void*)query->buffer); - va += query->results_end; - - switch (query->type) { - case PIPE_QUERY_OCCLUSION_COUNTER: - case PIPE_QUERY_OCCLUSION_PREDICATE: - cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0); - cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); - cs->buf[cs->cdw++] = va; - cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF; - break; - case PIPE_QUERY_PRIMITIVES_EMITTED: - case PIPE_QUERY_PRIMITIVES_GENERATED: - case PIPE_QUERY_SO_STATISTICS: - case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0); - cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3); - cs->buf[cs->cdw++] = va; - cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF; - break; - case PIPE_QUERY_TIME_ELAPSED: - cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); - cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); - cs->buf[cs->cdw++] = va; - cs->buf[cs->cdw++] = (3 << 29) | ((va >> 32UL) & 0xFF); - cs->buf[cs->cdw++] = 0; - cs->buf[cs->cdw++] = 0; - break; - default: - assert(0); - } - cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); - cs->buf[cs->cdw++] = r600_context_bo_reloc(&ctx->b, &ctx->b.rings.gfx, query->buffer, RADEON_USAGE_WRITE); - - if (!si_is_timer_query(query->type)) { - ctx->num_cs_dw_nontimer_queries_suspend += query->num_cs_dw; - } -} - -void si_query_end(struct si_context *ctx, struct si_query *query) -{ - struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs; - uint64_t va; - unsigned new_results_end; - - /* The queries which need begin already called this in begin_query. */ - if (!si_query_needs_begin(query->type)) { - si_need_cs_space(ctx, query->num_cs_dw, TRUE); - - new_results_end = (query->results_end + query->result_size) % query->buffer->b.b.width0; - - /* collect current results if query buffer is full */ - if (new_results_end == query->results_start) { - si_query_result(ctx, query, TRUE); - } - } - - va = r600_resource_va(&ctx->screen->b.b, (void*)query->buffer); - /* emit end query */ - switch (query->type) { - case PIPE_QUERY_OCCLUSION_COUNTER: - case PIPE_QUERY_OCCLUSION_PREDICATE: - va += query->results_end + 8; - cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0); - cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); - cs->buf[cs->cdw++] = va; - cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF; - break; - case PIPE_QUERY_PRIMITIVES_EMITTED: - case PIPE_QUERY_PRIMITIVES_GENERATED: - case PIPE_QUERY_SO_STATISTICS: - case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - va += query->results_end + query->result_size/2; - cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0); - cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3); - cs->buf[cs->cdw++] = va; - cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF; - break; - case PIPE_QUERY_TIME_ELAPSED: - va += query->results_end + query->result_size/2; - /* fall through */ - case PIPE_QUERY_TIMESTAMP: - cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); - cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); - cs->buf[cs->cdw++] = va; - cs->buf[cs->cdw++] = (3 << 29) | ((va >> 32UL) & 0xFF); - cs->buf[cs->cdw++] = 0; - cs->buf[cs->cdw++] = 0; - break; - default: - assert(0); - } - cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); - cs->buf[cs->cdw++] = r600_context_bo_reloc(&ctx->b, &ctx->b.rings.gfx, query->buffer, RADEON_USAGE_WRITE); - - query->results_end = (query->results_end + query->result_size) % query->buffer->b.b.width0; - - if (si_query_needs_begin(query->type) && !si_is_timer_query(query->type)) { - ctx->num_cs_dw_nontimer_queries_suspend -= query->num_cs_dw; - } -} - -void si_query_predication(struct si_context *ctx, struct si_query *query, int operation, - int flag_wait) -{ - struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs; - uint64_t va; - - if (operation == PREDICATION_OP_CLEAR) { - si_need_cs_space(ctx, 3, FALSE); - - cs->buf[cs->cdw++] = PKT3(PKT3_SET_PREDICATION, 1, 0); - cs->buf[cs->cdw++] = 0; - cs->buf[cs->cdw++] = PRED_OP(PREDICATION_OP_CLEAR); - } else { - unsigned results_base = query->results_start; - unsigned count; - uint32_t op; - - /* find count of the query data blocks */ - count = (query->buffer->b.b.width0 + query->results_end - query->results_start) % query->buffer->b.b.width0; - count /= query->result_size; - - si_need_cs_space(ctx, 5 * count, TRUE); - - op = PRED_OP(operation) | PREDICATION_DRAW_VISIBLE | - (flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW); - va = r600_resource_va(&ctx->screen->b.b, (void*)query->buffer); - - /* emit predicate packets for all data blocks */ - while (results_base != query->results_end) { - cs->buf[cs->cdw++] = PKT3(PKT3_SET_PREDICATION, 1, 0); - cs->buf[cs->cdw++] = (va + results_base) & 0xFFFFFFFFUL; - cs->buf[cs->cdw++] = op | (((va + results_base) >> 32UL) & 0xFF); - cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); - cs->buf[cs->cdw++] = r600_context_bo_reloc(&ctx->b, &ctx->b.rings.gfx, - query->buffer, RADEON_USAGE_READ); - results_base = (results_base + query->result_size) % query->buffer->b.b.width0; - - /* set CONTINUE bit for all packets except the first */ - op |= PREDICATION_CONTINUE; - } - } -} - -struct si_query *si_context_query_create(struct si_context *ctx, unsigned query_type) -{ - struct si_query *query; - unsigned buffer_size = 4096; - - query = CALLOC_STRUCT(si_query); - if (query == NULL) - return NULL; - - query->type = query_type; - - switch (query_type) { - case PIPE_QUERY_OCCLUSION_COUNTER: - case PIPE_QUERY_OCCLUSION_PREDICATE: - query->result_size = 16 * ctx->max_db; - query->num_cs_dw = 6; - break; - case PIPE_QUERY_TIMESTAMP: - query->result_size = 8; - query->num_cs_dw = 8; - break; - case PIPE_QUERY_TIME_ELAPSED: - query->result_size = 16; - query->num_cs_dw = 8; - break; - case PIPE_QUERY_PRIMITIVES_EMITTED: - case PIPE_QUERY_PRIMITIVES_GENERATED: - case PIPE_QUERY_SO_STATISTICS: - case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - /* NumPrimitivesWritten, PrimitiveStorageNeeded. */ - query->result_size = 32; - query->num_cs_dw = 6; - break; - default: - assert(0); - FREE(query); - return NULL; - } - - /* adjust buffer size to simplify offsets wrapping math */ - buffer_size -= buffer_size % query->result_size; - - /* Queries are normally read by the CPU after - * being written by the gpu, hence staging is probably a good - * usage pattern. - */ - query->buffer = si_resource_create_custom(&ctx->screen->b.b, - PIPE_USAGE_STAGING, - buffer_size); - if (!query->buffer) { - FREE(query); - return NULL; - } - return query; -} - -void si_context_query_destroy(struct si_context *ctx, struct si_query *query) -{ - r600_resource_reference(&query->buffer, NULL); - free(query); -} - -boolean si_context_query_result(struct si_context *ctx, - struct si_query *query, - boolean wait, void *vresult) -{ - boolean *result_b = (boolean*)vresult; - uint64_t *result_u64 = (uint64_t*)vresult; - struct pipe_query_data_so_statistics *result_so = - (struct pipe_query_data_so_statistics*)vresult; - - if (!si_query_result(ctx, query, wait)) - return FALSE; - - switch (query->type) { - case PIPE_QUERY_OCCLUSION_COUNTER: - case PIPE_QUERY_PRIMITIVES_EMITTED: - case PIPE_QUERY_PRIMITIVES_GENERATED: - *result_u64 = query->result.u64; - break; - case PIPE_QUERY_OCCLUSION_PREDICATE: - case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - *result_b = query->result.b; - break; - case PIPE_QUERY_TIMESTAMP: - case PIPE_QUERY_TIME_ELAPSED: - *result_u64 = (1000000 * query->result.u64) / ctx->screen->b.info.r600_clock_crystal_freq; - break; - case PIPE_QUERY_SO_STATISTICS: - *result_so = query->result.so; - break; - default: - assert(0); - } - return TRUE; -} - -void si_context_queries_suspend(struct si_context *ctx) -{ - struct si_query *query; - - LIST_FOR_EACH_ENTRY(query, &ctx->active_nontimer_query_list, list) { - si_query_end(ctx, query); - } - assert(ctx->num_cs_dw_nontimer_queries_suspend == 0); -} - -void si_context_queries_resume(struct si_context *ctx) -{ - struct si_query *query; - - assert(ctx->num_cs_dw_nontimer_queries_suspend == 0); - - LIST_FOR_EACH_ENTRY(query, &ctx->active_nontimer_query_list, list) { - si_query_begin(ctx, query); - } -} - #if SI_TRACE_CS void si_trace_emit(struct si_context *sctx) { diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 662c5211f36..fee6251ee2f 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -67,10 +67,10 @@ void si_flush(struct pipe_context *ctx, struct pipe_fence_handle **fence, } /* Disable render condition. */ - if (sctx->current_render_cond) { - render_cond = sctx->current_render_cond; - render_cond_cond = sctx->current_render_cond_cond; - render_cond_mode = sctx->current_render_cond_mode; + if (sctx->b.current_render_cond) { + render_cond = sctx->b.current_render_cond; + render_cond_cond = sctx->b.current_render_cond_cond; + render_cond_mode = sctx->b.current_render_cond_mode; ctx->render_condition(ctx, NULL, FALSE, 0); } @@ -142,7 +142,6 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, void * goto fail; si_init_blit_functions(sctx); - si_init_query_functions(sctx); si_init_context_resource_functions(sctx); si_init_compute_functions(sctx); @@ -169,8 +168,6 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, void * case SI: case CIK: si_init_state_functions(sctx); - LIST_INITHEAD(&sctx->active_nontimer_query_list); - sctx->max_db = 8; si_init_config(sctx); break; default: @@ -192,7 +189,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, void * /* these must be last */ si_begin_new_cs(sctx); - si_get_backend_mask(sctx); + r600_query_init_backend_mask(&sctx->b); /* this emits commands and must be last */ /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy * with a NULL buffer). We need to use a dummy buffer instead. */ diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 56b704bfa5d..ecc66993230 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -127,12 +127,6 @@ struct si_context { struct si_pipe_shader_selector *ps_shader; struct si_pipe_shader_selector *vs_shader; struct si_cs_shader_state cs_shader_state; - struct pipe_query *current_render_cond; - unsigned current_render_cond_mode; - boolean current_render_cond_cond; - struct pipe_query *saved_render_cond; - unsigned saved_render_cond_mode; - boolean saved_render_cond_cond; /* shader information */ unsigned sprite_coord_enable; unsigned export_16bpc; @@ -148,16 +142,6 @@ struct si_context { */ unsigned pm4_dirty_cdwords; - /* The list of active queries. Only one query of each type can be active. */ - struct list_head active_nontimer_query_list; - unsigned num_cs_dw_nontimer_queries_suspend; - /* If queries have been suspended. */ - bool nontimer_queries_suspended; - - unsigned backend_mask; - unsigned max_db; /* for OQ */ - boolean predicate_drawing; - /* Vertex and index buffers. */ bool vertex_buffers_dirty; struct pipe_index_buffer index_buffer; @@ -192,9 +176,6 @@ void si_flush(struct pipe_context *ctx, struct pipe_fence_handle **fence, unsigned flags); const char *si_get_llvm_processor_name(enum radeon_family family); -/* si_query.c */ -void si_init_query_functions(struct si_context *sctx); - /* si_resource.c */ void si_init_context_resource_functions(struct si_context *sctx); diff --git a/src/gallium/drivers/radeonsi/si_query.c b/src/gallium/drivers/radeonsi/si_query.c deleted file mode 100644 index d694eca8d92..00000000000 --- a/src/gallium/drivers/radeonsi/si_query.c +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#include "si_pipe.h" -#include "sid.h" - -static struct pipe_query *si_create_query(struct pipe_context *ctx, unsigned query_type) -{ - struct si_context *sctx = (struct si_context *)ctx; - - return (struct pipe_query*)si_context_query_create(sctx, query_type); -} - -static void si_destroy_query(struct pipe_context *ctx, struct pipe_query *query) -{ - struct si_context *sctx = (struct si_context *)ctx; - - si_context_query_destroy(sctx, (struct si_query *)query); -} - -static void si_begin_query(struct pipe_context *ctx, struct pipe_query *query) -{ - struct si_context *sctx = (struct si_context *)ctx; - struct si_query *rquery = (struct si_query *)query; - - if (!si_query_needs_begin(rquery->type)) { - assert(0); - return; - } - - memset(&rquery->result, 0, sizeof(rquery->result)); - rquery->results_start = rquery->results_end; - si_query_begin(sctx, (struct si_query *)query); - - if (!si_is_timer_query(rquery->type)) { - LIST_ADDTAIL(&rquery->list, &sctx->active_nontimer_query_list); - } -} - -static void si_end_query(struct pipe_context *ctx, struct pipe_query *query) -{ - struct si_context *sctx = (struct si_context *)ctx; - struct si_query *rquery = (struct si_query *)query; - - if (!si_query_needs_begin(rquery->type)) { - memset(&rquery->result, 0, sizeof(rquery->result)); - } - - si_query_end(sctx, rquery); - - if (si_query_needs_begin(rquery->type) && !si_is_timer_query(rquery->type)) { - LIST_DELINIT(&rquery->list); - } -} - -static boolean si_get_query_result(struct pipe_context *ctx, - struct pipe_query *query, - boolean wait, union pipe_query_result *vresult) -{ - struct si_context *sctx = (struct si_context *)ctx; - struct si_query *rquery = (struct si_query *)query; - - return si_context_query_result(sctx, rquery, wait, vresult); -} - -static void si_render_condition(struct pipe_context *ctx, - struct pipe_query *query, - boolean condition, - uint mode) -{ - struct si_context *sctx = (struct si_context *)ctx; - struct si_query *rquery = (struct si_query *)query; - int wait_flag = 0; - - /* If we already have nonzero result, render unconditionally */ - if (query != NULL && rquery->result.u64 != 0) { - if (sctx->current_render_cond) { - si_render_condition(ctx, NULL, FALSE, 0); - } - return; - } - - sctx->current_render_cond = query; - sctx->current_render_cond_cond = condition; - sctx->current_render_cond_mode = mode; - - if (query == NULL) { - if (sctx->predicate_drawing) { - sctx->predicate_drawing = false; - si_query_predication(sctx, NULL, PREDICATION_OP_CLEAR, 1); - } - return; - } - - if (mode == PIPE_RENDER_COND_WAIT || - mode == PIPE_RENDER_COND_BY_REGION_WAIT) { - wait_flag = 1; - } - - sctx->predicate_drawing = true; - - switch (rquery->type) { - case PIPE_QUERY_OCCLUSION_COUNTER: - case PIPE_QUERY_OCCLUSION_PREDICATE: - si_query_predication(sctx, rquery, PREDICATION_OP_ZPASS, wait_flag); - break; - case PIPE_QUERY_PRIMITIVES_EMITTED: - case PIPE_QUERY_PRIMITIVES_GENERATED: - case PIPE_QUERY_SO_STATISTICS: - case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - si_query_predication(sctx, rquery, PREDICATION_OP_PRIMCOUNT, wait_flag); - break; - default: - assert(0); - } -} - -void si_init_query_functions(struct si_context *sctx) -{ - sctx->b.b.create_query = si_create_query; - sctx->b.b.destroy_query = si_destroy_query; - sctx->b.b.begin_query = si_begin_query; - sctx->b.b.end_query = si_end_query; - sctx->b.b.get_query_result = si_get_query_result; - - if (sctx->screen->b.info.r600_num_backends > 0) - sctx->b.b.render_condition = si_render_condition; -} diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index a269335b6b9..8396ef51210 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -3028,6 +3028,19 @@ static boolean si_dma_copy(struct pipe_context *ctx, return FALSE; } +static void si_set_occlusion_query_state(struct pipe_context *ctx, bool enable) +{ + /* XXX Turn this into a proper state. Right now the queries are + * enabled in draw_vbo, which snoops r600_common_context to see + * if any occlusion queries are active. */ +} + +static void si_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw, + bool include_draw_vbo) +{ + si_need_cs_space((struct si_context*)ctx, num_dw, include_draw_vbo); +} + void si_init_state_functions(struct si_context *sctx) { int i; @@ -3090,6 +3103,8 @@ void si_init_state_functions(struct si_context *sctx) sctx->b.b.create_surface = r600_create_surface; sctx->b.b.surface_destroy = r600_surface_destroy; sctx->b.dma_copy = si_dma_copy; + sctx->b.set_occlusion_query_state = si_set_occlusion_query_state; + sctx->b.need_gfx_cs_space = si_need_gfx_cs_space; sctx->b.b.draw_vbo = si_draw_vbo; } diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 4d347ed0774..90f9703cfd3 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -545,7 +545,7 @@ static void si_state_draw(struct si_context *sctx, /* queries need some special values * (this is non-zero if any query is active) */ - if (sctx->num_cs_dw_nontimer_queries_suspend) { + if (sctx->b.num_occlusion_queries > 0) { if (sctx->b.chip_class >= CIK) { si_pm4_set_reg(pm4, R_028004_DB_COUNT_CONTROL, S_028004_PERFECT_ZPASS_COUNTS(1) | @@ -592,11 +592,11 @@ static void si_state_draw(struct si_context *sctx, si_pm4_cmd_add(pm4, V_028A7C_VGT_INDEX_16 | (SI_BIG_ENDIAN ? V_028A7C_VGT_DMA_SWAP_16_BIT : 0)); } - si_pm4_cmd_end(pm4, sctx->predicate_drawing); + si_pm4_cmd_end(pm4, sctx->b.predicate_drawing); si_pm4_cmd_begin(pm4, PKT3_NUM_INSTANCES); si_pm4_cmd_add(pm4, info->instance_count); - si_pm4_cmd_end(pm4, sctx->predicate_drawing); + si_pm4_cmd_end(pm4, sctx->b.predicate_drawing); if (info->indexed) { uint32_t max_size = (ib->buffer->width0 - ib->offset) / @@ -608,11 +608,11 @@ static void si_state_draw(struct si_context *sctx, si_pm4_add_bo(pm4, (struct r600_resource *)ib->buffer, RADEON_USAGE_READ); si_cmd_draw_index_2(pm4, max_size, va, info->count, V_0287F0_DI_SRC_SEL_DMA, - sctx->predicate_drawing); + sctx->b.predicate_drawing); } else { uint32_t initiator = V_0287F0_DI_SRC_SEL_AUTO_INDEX; initiator |= S_0287F0_USE_OPAQUE(!!info->count_from_stream_output); - si_cmd_draw_index_auto(pm4, info->count, initiator, sctx->predicate_drawing); + si_cmd_draw_index_auto(pm4, info->count, initiator, sctx->b.predicate_drawing); } si_pm4_set_state(sctx, draw, pm4);