query->result_size += 16; /* for the fence + alignment */
query->num_cs_dw_end = 6 + si_gfx_write_fence_dwords(sscreen);
break;
+ case SI_QUERY_TIME_ELAPSED_SDMA:
+ /* GET_GLOBAL_TIMESTAMP only works if the offset is a multiple of 32. */
+ query->result_size = 64;
+ query->num_cs_dw_end = 0;
+ break;
case PIPE_QUERY_TIME_ELAPSED:
query->result_size = 24;
query->num_cs_dw_end = 8 + si_gfx_write_fence_dwords(sscreen);
struct radeon_cmdbuf *cs = sctx->gfx_cs;
switch (query->b.type) {
+ case SI_QUERY_TIME_ELAPSED_SDMA:
+ si_dma_emit_timestamp(sctx, buffer, va - buffer->gpu_address);
+ return;
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
si_update_occlusion_query_state(sctx, query->b.type, 1);
si_update_prims_generated_query_state(sctx, query->b.type, 1);
- si_need_gfx_cs_space(sctx);
+ if (query->b.type != SI_QUERY_TIME_ELAPSED_SDMA)
+ si_need_gfx_cs_space(sctx);
/* Get a new query buffer if needed. */
if (query->buffer.results_end + query->result_size > query->buffer.buf->b.b.width0) {
uint64_t fence_va = 0;
switch (query->b.type) {
+ case SI_QUERY_TIME_ELAPSED_SDMA:
+ si_dma_emit_timestamp(sctx, buffer, va + 32 - buffer->gpu_address);
+ return;
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
if (query_type == PIPE_QUERY_TIMESTAMP_DISJOINT ||
query_type == PIPE_QUERY_GPU_FINISHED ||
- query_type >= PIPE_QUERY_DRIVER_SPECIFIC)
+ (query_type >= PIPE_QUERY_DRIVER_SPECIFIC &&
+ query_type != SI_QUERY_TIME_ELAPSED_SDMA))
return si_query_sw_create(query_type);
return si_query_hw_create(sscreen, query_type, index);
case PIPE_QUERY_TIME_ELAPSED:
result->u64 += si_query_read_result(buffer, 0, 2, false);
break;
+ case SI_QUERY_TIME_ELAPSED_SDMA:
+ result->u64 += si_query_read_result(buffer, 0, 32/4, false);
+ break;
case PIPE_QUERY_TIMESTAMP:
result->u64 = *(uint64_t*)buffer;
break;
/* Convert the time to expected units. */
if (rquery->type == PIPE_QUERY_TIME_ELAPSED ||
+ rquery->type == SI_QUERY_TIME_ELAPSED_SDMA ||
rquery->type == PIPE_QUERY_TIMESTAMP) {
result->u64 = (1000000 * result->u64) / sscreen->info.clock_crystal_freq;
}