{
unsigned j, i, num_results, buf_size = 4096;
uint32_t *results;
+
+ /* Non-GPU queries. */
+ switch (type) {
+ case R600_QUERY_DRAW_CALLS:
+ case R600_QUERY_REQUESTED_VRAM:
+ case R600_QUERY_REQUESTED_GTT:
+ case R600_QUERY_BUFFER_WAIT_TIME:
+ return NULL;
+ }
+
/* Queries are normally read by the CPU after
* being written by the gpu, hence staging is probably a good
* usage pattern.
*/
struct r600_resource *buf = (struct r600_resource*)
- pipe_buffer_create(&ctx->screen->screen, PIPE_BIND_CUSTOM,
+ pipe_buffer_create(&ctx->screen->b.b, PIPE_BIND_CUSTOM,
PIPE_USAGE_STAGING, buf_size);
switch (type) {
}
results += 4 * ctx->max_db;
}
- ctx->ws->buffer_unmap(buf->cs_buf);
+ ctx->b.ws->buffer_unmap(buf->cs_buf);
break;
case PIPE_QUERY_TIME_ELAPSED:
case PIPE_QUERY_TIMESTAMP:
case PIPE_QUERY_PRIMITIVES_GENERATED:
case PIPE_QUERY_SO_STATISTICS:
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ case PIPE_QUERY_PIPELINE_STATISTICS:
results = r600_buffer_mmap_sync_with_rings(ctx, buf, PIPE_TRANSFER_WRITE);
memset(results, 0, buf_size);
- ctx->ws->buffer_unmap(buf->cs_buf);
+ ctx->b.ws->buffer_unmap(buf->cs_buf);
break;
default:
assert(0);
static void r600_emit_query_begin(struct r600_context *ctx, struct r600_query *query)
{
- struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
+ struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
uint64_t va;
r600_update_occlusion_query_state(ctx, query->type, 1);
}
/* emit begin query */
- va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer.buf);
+ va = r600_resource_va(&ctx->screen->b.b, (void*)query->buffer.buf);
va += query->buffer.results_end;
switch (query->type) {
cs->buf[cs->cdw++] = 0;
cs->buf[cs->cdw++] = 0;
break;
+ case PIPE_QUERY_PIPELINE_STATISTICS:
+ if (!ctx->num_pipelinestat_queries) {
+ cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
+ cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PIPELINESTAT_START) | EVENT_INDEX(0);
+ }
+ ctx->num_pipelinestat_queries++;
+ cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
+ cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2);
+ cs->buf[cs->cdw++] = va;
+ cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
+ break;
default:
assert(0);
}
cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
- cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, &ctx->rings.gfx, query->buffer.buf, RADEON_USAGE_WRITE);
+ cs->buf[cs->cdw++] = r600_context_bo_reloc(&ctx->b, &ctx->b.rings.gfx, query->buffer.buf, RADEON_USAGE_WRITE);
if (!r600_is_timer_query(query->type)) {
ctx->num_cs_dw_nontimer_queries_suspend += query->num_cs_dw;
static void r600_emit_query_end(struct r600_context *ctx, struct r600_query *query)
{
- struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
+ struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
uint64_t va;
/* The queries which need begin already called this in begin_query. */
r600_need_cs_space(ctx, query->num_cs_dw, FALSE);
}
- va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer.buf);
+ va = r600_resource_va(&ctx->screen->b.b, (void*)query->buffer.buf);
/* emit end query */
switch (query->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
cs->buf[cs->cdw++] = 0;
cs->buf[cs->cdw++] = 0;
break;
+ case PIPE_QUERY_PIPELINE_STATISTICS:
+ assert(ctx->num_pipelinestat_queries > 0);
+ ctx->num_pipelinestat_queries--;
+ if (!ctx->num_pipelinestat_queries) {
+ cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
+ cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PIPELINESTAT_STOP) | EVENT_INDEX(0);
+ }
+ va += query->buffer.results_end + query->result_size/2;
+ cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
+ cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2);
+ cs->buf[cs->cdw++] = va;
+ cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
+ break;
default:
assert(0);
}
cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
- cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, &ctx->rings.gfx, query->buffer.buf, RADEON_USAGE_WRITE);
+ cs->buf[cs->cdw++] = r600_context_bo_reloc(&ctx->b, &ctx->b.rings.gfx, query->buffer.buf, RADEON_USAGE_WRITE);
query->buffer.results_end += query->result_size;
static void r600_emit_query_predication(struct r600_context *ctx, struct r600_query *query,
int operation, bool flag_wait)
{
- struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
+ struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
if (operation == PREDICATION_OP_CLEAR) {
r600_need_cs_space(ctx, 3, FALSE);
/* emit predicate packets for all data blocks */
for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
unsigned results_base = 0;
- uint64_t va = r600_resource_va(&ctx->screen->screen, &qbuf->buf->b.b);
+ uint64_t va = r600_resource_va(&ctx->screen->b.b, &qbuf->buf->b.b);
while (results_base < qbuf->results_end) {
cs->buf[cs->cdw++] = PKT3(PKT3_SET_PREDICATION, 1, 0);
cs->buf[cs->cdw++] = (va + results_base) & 0xFFFFFFFFUL;
cs->buf[cs->cdw++] = op | (((va + results_base) >> 32UL) & 0xFF);
cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
- cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, &ctx->rings.gfx, qbuf->buf, RADEON_USAGE_READ);
+ cs->buf[cs->cdw++] = r600_context_bo_reloc(&ctx->b, &ctx->b.rings.gfx, qbuf->buf, RADEON_USAGE_READ);
results_base += query->result_size;
/* set CONTINUE bit for all packets except the first */
static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type)
{
struct r600_context *rctx = (struct r600_context *)ctx;
-
struct r600_query *query;
+ bool skip_allocation = false;
query = CALLOC_STRUCT(r600_query);
if (query == NULL)
query->result_size = 32;
query->num_cs_dw = 6;
break;
+ case PIPE_QUERY_PIPELINE_STATISTICS:
+ /* 11 values on EG, 8 on R600. */
+ query->result_size = (rctx->b.chip_class >= EVERGREEN ? 11 : 8) * 16;
+ query->num_cs_dw = 8;
+ break;
+ /* Non-GPU queries. */
+ case R600_QUERY_DRAW_CALLS:
+ case R600_QUERY_REQUESTED_VRAM:
+ case R600_QUERY_REQUESTED_GTT:
+ case R600_QUERY_BUFFER_WAIT_TIME:
+ skip_allocation = true;
+ break;
default:
assert(0);
FREE(query);
return NULL;
}
- query->buffer.buf = r600_new_query_buffer(rctx, query_type);
- if (!query->buffer.buf) {
- FREE(query);
- return NULL;
+ if (!skip_allocation) {
+ query->buffer.buf = r600_new_query_buffer(rctx, query_type);
+ if (!query->buffer.buf) {
+ FREE(query);
+ return NULL;
+ }
}
return (struct pipe_query*)query;
}
return;
}
+ /* Non-GPU queries. */
+ switch (rquery->type) {
+ case R600_QUERY_DRAW_CALLS:
+ rquery->begin_result = rctx->num_draw_calls;
+ return;
+ case R600_QUERY_REQUESTED_VRAM:
+ case R600_QUERY_REQUESTED_GTT:
+ rquery->begin_result = 0;
+ return;
+ case R600_QUERY_BUFFER_WAIT_TIME:
+ rquery->begin_result = rctx->b.ws->query_value(rctx->b.ws, RADEON_BUFFER_WAIT_TIME_NS);
+ return;
+ }
+
/* Discard the old query buffers. */
while (prev) {
struct r600_query_buffer *qbuf = prev;
/* Obtain a new buffer if the current one can't be mapped without a stall. */
if (r600_rings_is_buffer_referenced(rctx, rquery->buffer.buf->cs_buf, RADEON_USAGE_READWRITE) ||
- rctx->ws->buffer_is_busy(rquery->buffer.buf->buf, RADEON_USAGE_READWRITE)) {
+ rctx->b.ws->buffer_is_busy(rquery->buffer.buf->buf, RADEON_USAGE_READWRITE)) {
pipe_resource_reference((struct pipe_resource**)&rquery->buffer.buf, NULL);
rquery->buffer.buf = r600_new_query_buffer(rctx, rquery->type);
}
struct r600_context *rctx = (struct r600_context *)ctx;
struct r600_query *rquery = (struct r600_query *)query;
+ /* Non-GPU queries. */
+ switch (rquery->type) {
+ case R600_QUERY_DRAW_CALLS:
+ rquery->end_result = rctx->num_draw_calls;
+ return;
+ case R600_QUERY_REQUESTED_VRAM:
+ rquery->end_result = rctx->b.ws->query_value(rctx->b.ws, RADEON_REQUESTED_VRAM_MEMORY);
+ return;
+ case R600_QUERY_REQUESTED_GTT:
+ rquery->end_result = rctx->b.ws->query_value(rctx->b.ws, RADEON_REQUESTED_GTT_MEMORY);
+ return;
+ case R600_QUERY_BUFFER_WAIT_TIME:
+ rquery->end_result = rctx->b.ws->query_value(rctx->b.ws, RADEON_BUFFER_WAIT_TIME_NS);
+ return;
+ }
+
r600_emit_query_end(rctx, rquery);
if (r600_query_needs_begin(rquery->type) && !r600_is_timer_query(rquery->type)) {
unsigned results_base = 0;
char *map;
+ /* Non-GPU queries. */
+ switch (query->type) {
+ case R600_QUERY_DRAW_CALLS:
+ case R600_QUERY_REQUESTED_VRAM:
+ case R600_QUERY_REQUESTED_GTT:
+ case R600_QUERY_BUFFER_WAIT_TIME:
+ result->u64 = query->end_result - query->begin_result;
+ return TRUE;
+ }
+
map = r600_buffer_mmap_sync_with_rings(ctx, qbuf->buf,
PIPE_TRANSFER_READ |
(wait ? 0 : PIPE_TRANSFER_DONTBLOCK));
results_base += query->result_size;
}
break;
+ case PIPE_QUERY_PIPELINE_STATISTICS:
+ if (ctx->b.chip_class >= EVERGREEN) {
+ while (results_base != qbuf->results_end) {
+ result->pipeline_statistics.ps_invocations +=
+ r600_query_read_result(map + results_base, 0, 22, false);
+ result->pipeline_statistics.c_primitives +=
+ r600_query_read_result(map + results_base, 2, 24, false);
+ result->pipeline_statistics.c_invocations +=
+ r600_query_read_result(map + results_base, 4, 26, false);
+ result->pipeline_statistics.vs_invocations +=
+ r600_query_read_result(map + results_base, 6, 28, false);
+ result->pipeline_statistics.gs_invocations +=
+ r600_query_read_result(map + results_base, 8, 30, false);
+ result->pipeline_statistics.gs_primitives +=
+ r600_query_read_result(map + results_base, 10, 32, false);
+ result->pipeline_statistics.ia_primitives +=
+ r600_query_read_result(map + results_base, 12, 34, false);
+ result->pipeline_statistics.ia_vertices +=
+ r600_query_read_result(map + results_base, 14, 36, false);
+ result->pipeline_statistics.hs_invocations +=
+ r600_query_read_result(map + results_base, 16, 38, false);
+ result->pipeline_statistics.ds_invocations +=
+ r600_query_read_result(map + results_base, 18, 40, false);
+ result->pipeline_statistics.cs_invocations +=
+ r600_query_read_result(map + results_base, 20, 42, false);
+ results_base += query->result_size;
+ }
+ } else {
+ while (results_base != qbuf->results_end) {
+ result->pipeline_statistics.ps_invocations +=
+ r600_query_read_result(map + results_base, 0, 16, false);
+ result->pipeline_statistics.c_primitives +=
+ r600_query_read_result(map + results_base, 2, 18, false);
+ result->pipeline_statistics.c_invocations +=
+ r600_query_read_result(map + results_base, 4, 20, false);
+ result->pipeline_statistics.vs_invocations +=
+ r600_query_read_result(map + results_base, 6, 22, false);
+ result->pipeline_statistics.gs_invocations +=
+ r600_query_read_result(map + results_base, 8, 24, false);
+ result->pipeline_statistics.gs_primitives +=
+ r600_query_read_result(map + results_base, 10, 26, false);
+ result->pipeline_statistics.ia_primitives +=
+ r600_query_read_result(map + results_base, 12, 28, false);
+ result->pipeline_statistics.ia_vertices +=
+ r600_query_read_result(map + results_base, 14, 30, false);
+ results_base += query->result_size;
+ }
+ }
+#if 0 /* for testing */
+ printf("Pipeline stats: IA verts=%llu, IA prims=%llu, VS=%llu, HS=%llu, "
+ "DS=%llu, GS=%llu, GS prims=%llu, Clipper=%llu, "
+ "Clipper prims=%llu, PS=%llu, CS=%llu\n",
+ result->pipeline_statistics.ia_vertices,
+ result->pipeline_statistics.ia_primitives,
+ result->pipeline_statistics.vs_invocations,
+ result->pipeline_statistics.hs_invocations,
+ result->pipeline_statistics.ds_invocations,
+ result->pipeline_statistics.gs_invocations,
+ result->pipeline_statistics.gs_primitives,
+ result->pipeline_statistics.c_invocations,
+ result->pipeline_statistics.c_primitives,
+ result->pipeline_statistics.ps_invocations,
+ result->pipeline_statistics.cs_invocations);
+#endif
+ break;
default:
assert(0);
}
- ctx->ws->buffer_unmap(qbuf->buf->cs_buf);
+ ctx->b.ws->buffer_unmap(qbuf->buf->cs_buf);
return TRUE;
}
/* Convert the time to expected units. */
if (rquery->type == PIPE_QUERY_TIME_ELAPSED ||
rquery->type == PIPE_QUERY_TIMESTAMP) {
- result->u64 = (1000000 * result->u64) / rctx->screen->info.r600_clock_crystal_freq;
+ result->u64 = (1000000 * result->u64) / rctx->screen->b.info.r600_clock_crystal_freq;
}
return TRUE;
}
static void r600_render_condition(struct pipe_context *ctx,
struct pipe_query *query,
+ boolean condition,
uint mode)
{
struct r600_context *rctx = (struct r600_context *)ctx;
bool wait_flag = false;
rctx->current_render_cond = query;
+ rctx->current_render_cond_cond = condition;
rctx->current_render_cond_mode = mode;
if (query == NULL) {
void r600_init_query_functions(struct r600_context *rctx)
{
- rctx->context.create_query = r600_create_query;
- rctx->context.destroy_query = r600_destroy_query;
- rctx->context.begin_query = r600_begin_query;
- rctx->context.end_query = r600_end_query;
- rctx->context.get_query_result = r600_get_query_result;
-
- if (rctx->screen->info.r600_num_backends > 0)
- rctx->context.render_condition = r600_render_condition;
+ rctx->b.b.create_query = r600_create_query;
+ rctx->b.b.destroy_query = r600_destroy_query;
+ rctx->b.b.begin_query = r600_begin_query;
+ rctx->b.b.end_query = r600_end_query;
+ rctx->b.b.get_query_result = r600_get_query_result;
+
+ if (rctx->screen->b.info.r600_num_backends > 0)
+ rctx->b.b.render_condition = r600_render_condition;
}