unsigned num_db = device->physical_device->rad_info.num_render_backends;
MAYBE_UNUSED unsigned rb_mask = device->physical_device->rad_info.enabled_rb_mask;
- if (device->physical_device->rad_info.chip_class == SI)
- num_db = 8;
- else
- num_db = MAX2(8, num_db);
-
/* Otherwise we need to change the query reset procedure */
assert(rb_mask == ((1ull << num_db) - 1));
radv_load_push_int(nir_builder *b, unsigned offset, const char *name)
{
nir_intrinsic_instr *flags = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
+ nir_intrinsic_set_base(flags, 0);
+ nir_intrinsic_set_range(flags, 16);
flags->src[0] = nir_src_for_ssa(nir_imm_int(b, offset));
flags->num_components = 1;
nir_ssa_dest_init(&flags->instr, &flags->dest, 1, 32, name);
*/
nir_builder b;
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
- b.shader->info->name = ralloc_strdup(b.shader, "occlusion_query");
- b.shader->info->cs.local_size[0] = 64;
- b.shader->info->cs.local_size[1] = 1;
- b.shader->info->cs.local_size[2] = 1;
+ b.shader->info.name = ralloc_strdup(b.shader, "occlusion_query");
+ b.shader->info.cs.local_size[0] = 64;
+ b.shader->info.cs.local_size[1] = 1;
+ b.shader->info.cs.local_size[2] = 1;
nir_variable *result = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "result");
nir_variable *outer_counter = nir_local_variable_create(b.impl, glsl_int_type(), "outer_counter");
nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
- b.shader->info->cs.local_size[0],
- b.shader->info->cs.local_size[1],
- b.shader->info->cs.local_size[2], 0);
+ b.shader->info.cs.local_size[0],
+ b.shader->info.cs.local_size[1],
+ b.shader->info.cs.local_size[2], 0);
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
global_id = nir_channel(&b, global_id, 0); // We only care about x here.
*/
nir_builder b;
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
- b.shader->info->name = ralloc_strdup(b.shader, "pipeline_statistics_query");
- b.shader->info->cs.local_size[0] = 64;
- b.shader->info->cs.local_size[1] = 1;
- b.shader->info->cs.local_size[2] = 1;
+ b.shader->info.name = ralloc_strdup(b.shader, "pipeline_statistics_query");
+ b.shader->info.cs.local_size[0] = 64;
+ b.shader->info.cs.local_size[1] = 1;
+ b.shader->info.cs.local_size[2] = 1;
nir_variable *output_offset = nir_local_variable_create(b.impl, glsl_int_type(), "output_offset");
nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
- b.shader->info->cs.local_size[0],
- b.shader->info->cs.local_size[1],
- b.shader->info->cs.local_size[2], 0);
+ b.shader->info.cs.local_size[0],
+ b.shader->info.cs.local_size[1],
+ b.shader->info.cs.local_size[2], 0);
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
global_id = nir_channel(&b, global_id, 0); // We only care about x here.
struct radv_shader_module occlusion_cs = { .nir = NULL };
struct radv_shader_module pipeline_statistics_cs = { .nir = NULL };
- zero(device->meta_state.query);
-
occlusion_cs.nir = build_occlusion_query_shader(device);
pipeline_statistics_cs.nir = build_pipeline_statistics_query_shader(device);
VkDescriptorSetLayoutCreateInfo occlusion_ds_create_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
.bindingCount = 2,
.pBindings = (VkDescriptorSetLayoutBinding[]) {
{
radv_pipeline_cache_to_handle(&device->meta_state.cache),
1, &pipeline_statistics_vk_pipeline_info, NULL,
&device->meta_state.query.pipeline_statistics_query_pipeline);
- if (result != VK_SUCCESS)
- goto fail;
- return VK_SUCCESS;
fail:
- radv_device_finish_meta_query_state(device);
+ if (result != VK_SUCCESS)
+ radv_device_finish_meta_query_state(device);
ralloc_free(occlusion_cs.nir);
ralloc_free(pipeline_statistics_cs.nir);
return result;
uint32_t pipeline_stats_mask, uint32_t avail_offset)
{
struct radv_device *device = cmd_buffer->device;
- struct radv_meta_saved_compute_state saved_state;
- VkDescriptorSet ds;
+ struct radv_meta_saved_state saved_state;
- radv_meta_save_compute(&saved_state, cmd_buffer, 4);
-
- radv_temp_descriptor_set_create(device, cmd_buffer,
- device->meta_state.query.ds_layout,
- &ds);
+ radv_meta_save(&saved_state, cmd_buffer,
+ RADV_META_SAVE_COMPUTE_PIPELINE |
+ RADV_META_SAVE_CONSTANTS |
+ RADV_META_SAVE_DESCRIPTORS);
struct radv_buffer dst_buffer = {
.bo = dst_bo,
.size = MAX2(src_stride * count, avail_offset + 4 * count - src_offset)
};
- radv_UpdateDescriptorSets(radv_device_to_handle(device),
- 2, /* writeCount */
- (VkWriteDescriptorSet[]) {
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstSet = ds,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .pBufferInfo = &(VkDescriptorBufferInfo) {
- .buffer = radv_buffer_to_handle(&dst_buffer),
- .offset = 0,
- .range = VK_WHOLE_SIZE
- }
- },
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstSet = ds,
- .dstBinding = 1,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .pBufferInfo = &(VkDescriptorBufferInfo) {
- .buffer = radv_buffer_to_handle(&src_buffer),
- .offset = 0,
- .range = VK_WHOLE_SIZE
- }
- }
- }, 0, NULL);
-
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
- radv_CmdBindDescriptorSets(radv_cmd_buffer_to_handle(cmd_buffer),
- VK_PIPELINE_BIND_POINT_COMPUTE,
- device->meta_state.query.p_layout, 0, 1,
- &ds, 0, NULL);
+ radv_meta_push_descriptor_set(cmd_buffer,
+ VK_PIPELINE_BIND_POINT_COMPUTE,
+ device->meta_state.query.p_layout,
+ 0, /* set */
+ 2, /* descriptorWriteCount */
+ (VkWriteDescriptorSet[]) {
+ {
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .pBufferInfo = &(VkDescriptorBufferInfo) {
+ .buffer = radv_buffer_to_handle(&dst_buffer),
+ .offset = 0,
+ .range = VK_WHOLE_SIZE
+ }
+ },
+ {
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 1,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .pBufferInfo = &(VkDescriptorBufferInfo) {
+ .buffer = radv_buffer_to_handle(&src_buffer),
+ .offset = 0,
+ .range = VK_WHOLE_SIZE
+ }
+ }
+ });
/* Encode the number of elements for easy access by the shader. */
pipeline_stats_mask &= 0x7ff;
RADV_CMD_FLAG_INV_VMEM_L1 |
RADV_CMD_FLAG_CS_PARTIAL_FLUSH;
- radv_temp_descriptor_set_destroy(device, ds);
-
- radv_meta_restore_compute(&saved_state, cmd_buffer, 4);
+ radv_meta_restore(&saved_state, cmd_buffer);
}
VkResult radv_CreateQueryPool(
VkQueryPool* pQueryPool)
{
RADV_FROM_HANDLE(radv_device, device, _device);
- uint64_t size;
struct radv_query_pool *pool = vk_alloc2(&device->alloc, pAllocator,
sizeof(*pool), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!pool)
- return VK_ERROR_OUT_OF_HOST_MEMORY;
+ return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
switch(pCreateInfo->queryType) {
pool->type = pCreateInfo->queryType;
pool->pipeline_stats_mask = pCreateInfo->pipelineStatistics;
pool->availability_offset = pool->stride * pCreateInfo->queryCount;
- size = pool->availability_offset;
+ pool->size = pool->availability_offset;
if (pCreateInfo->queryType == VK_QUERY_TYPE_TIMESTAMP ||
pCreateInfo->queryType == VK_QUERY_TYPE_PIPELINE_STATISTICS)
- size += 4 * pCreateInfo->queryCount;
+ pool->size += 4 * pCreateInfo->queryCount;
- pool->bo = device->ws->buffer_create(device->ws, size,
- 64, RADEON_DOMAIN_GTT, 0);
+ pool->bo = device->ws->buffer_create(device->ws, pool->size,
+ 64, RADEON_DOMAIN_GTT, RADEON_FLAG_NO_INTERPROCESS_SHARING);
if (!pool->bo) {
vk_free2(&device->alloc, pAllocator, pool);
- return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+ return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
}
pool->ptr = device->ws->buffer_map(pool->bo);
if (!pool->ptr) {
device->ws->buffer_destroy(pool->bo);
vk_free2(&device->alloc, pAllocator, pool);
- return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+ return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
}
- memset(pool->ptr, 0, size);
+ memset(pool->ptr, 0, pool->size);
*pQueryPool = radv_query_pool_to_handle(pool);
return VK_SUCCESS;
RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
struct radeon_winsys_cs *cs = cmd_buffer->cs;
unsigned elem_size = (flags & VK_QUERY_RESULT_64_BIT) ? 8 : 4;
- uint64_t va = cmd_buffer->device->ws->buffer_get_va(pool->bo);
- uint64_t dest_va = cmd_buffer->device->ws->buffer_get_va(dst_buffer->bo);
+ uint64_t va = radv_buffer_get_va(pool->bo);
+ uint64_t dest_va = radv_buffer_get_va(dst_buffer->bo);
dest_va += dst_buffer->offset + dstOffset;
- cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, pool->bo, 8);
- cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, dst_buffer->bo, 8);
+ radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, pool->bo, 8);
+ radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dst_buffer->bo, 8);
switch (pool->type) {
case VK_QUERY_TYPE_OCCLUSION:
uint64_t avail_va = va + pool->availability_offset + 4 * query;
/* This waits on the ME. All copies below are done on the ME */
- radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
- radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1));
- radeon_emit(cs, avail_va);
- radeon_emit(cs, avail_va >> 32);
- radeon_emit(cs, 1); /* reference value */
- radeon_emit(cs, 0xffffffff); /* mask */
- radeon_emit(cs, 4); /* poll interval */
+ si_emit_wait_fence(cs, false, avail_va, 1, 0xffffffff);
}
}
radv_query_shader(cmd_buffer, cmd_buffer->device->meta_state.query.pipeline_statistics_query_pipeline,
uint64_t avail_va = va + pool->availability_offset + 4 * query;
/* This waits on the ME. All copies below are done on the ME */
- radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
- radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1));
- radeon_emit(cs, avail_va);
- radeon_emit(cs, avail_va >> 32);
- radeon_emit(cs, 1); /* reference value */
- radeon_emit(cs, 0xffffffff); /* mask */
- radeon_emit(cs, 4); /* poll interval */
+ si_emit_wait_fence(cs, false, avail_va, 1, 0xffffffff);
}
if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
uint64_t avail_va = va + pool->availability_offset + 4 * query;
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
- uint64_t va = cmd_buffer->device->ws->buffer_get_va(pool->bo);
+ uint32_t flush_bits = 0;
- cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, pool->bo, 8);
+ flush_bits |= radv_fill_buffer(cmd_buffer, pool->bo,
+ firstQuery * pool->stride,
+ queryCount * pool->stride, 0);
- si_cp_dma_clear_buffer(cmd_buffer, va + firstQuery * pool->stride,
- queryCount * pool->stride, 0);
if (pool->type == VK_QUERY_TYPE_TIMESTAMP ||
- pool->type == VK_QUERY_TYPE_PIPELINE_STATISTICS)
- si_cp_dma_clear_buffer(cmd_buffer, va + pool->availability_offset + firstQuery * 4,
- queryCount * 4, 0);
+ pool->type == VK_QUERY_TYPE_PIPELINE_STATISTICS) {
+ flush_bits |= radv_fill_buffer(cmd_buffer, pool->bo,
+ pool->availability_offset + firstQuery * 4,
+ queryCount * 4, 0);
+ }
+
+ if (flush_bits) {
+ /* Only need to flush caches for the compute shader path. */
+ cmd_buffer->pending_reset_query = true;
+ cmd_buffer->state.flush_bits |= flush_bits;
+ }
}
-void radv_CmdBeginQuery(
- VkCommandBuffer commandBuffer,
- VkQueryPool queryPool,
- uint32_t query,
- VkQueryControlFlags flags)
+static void emit_begin_query(struct radv_cmd_buffer *cmd_buffer,
+ uint64_t va,
+ VkQueryType query_type,
+ VkQueryControlFlags flags)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
struct radeon_winsys_cs *cs = cmd_buffer->cs;
- uint64_t va = cmd_buffer->device->ws->buffer_get_va(pool->bo);
- va += pool->stride * query;
-
- cmd_buffer->device->ws->cs_add_buffer(cs, pool->bo, 8);
-
- switch (pool->type) {
+ switch (query_type) {
case VK_QUERY_TYPE_OCCLUSION:
radeon_check_space(cmd_buffer->device->ws, cs, 7);
++cmd_buffer->state.active_occlusion_queries;
- if (cmd_buffer->state.active_occlusion_queries == 1)
+ if (cmd_buffer->state.active_occlusion_queries == 1) {
+ if (flags & VK_QUERY_CONTROL_PRECISE_BIT) {
+ /* This is the first occlusion query, enable
+ * the hint if the precision bit is set.
+ */
+ cmd_buffer->state.perfect_occlusion_queries_enabled = true;
+ }
+
radv_set_db_count_control(cmd_buffer);
+ } else {
+ if ((flags & VK_QUERY_CONTROL_PRECISE_BIT) &&
+ !cmd_buffer->state.perfect_occlusion_queries_enabled) {
+ /* This is not the first query, but this one
+ * needs to enable precision, DB_COUNT_CONTROL
+ * has to be updated accordingly.
+ */
+ cmd_buffer->state.perfect_occlusion_queries_enabled = true;
+
+ radv_set_db_count_control(cmd_buffer);
+ }
+ }
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1));
default:
unreachable("beginning unhandled query type");
}
-}
+}
-void radv_CmdEndQuery(
- VkCommandBuffer commandBuffer,
- VkQueryPool queryPool,
- uint32_t query)
+static void emit_end_query(struct radv_cmd_buffer *cmd_buffer,
+ uint64_t va, uint64_t avail_va,
+ VkQueryType query_type)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
struct radeon_winsys_cs *cs = cmd_buffer->cs;
- uint64_t va = cmd_buffer->device->ws->buffer_get_va(pool->bo);
- uint64_t avail_va = va + pool->availability_offset + 4 * query;
- va += pool->stride * query;
-
- cmd_buffer->device->ws->cs_add_buffer(cs, pool->bo, 8);
-
- switch (pool->type) {
+ switch (query_type) {
case VK_QUERY_TYPE_OCCLUSION:
radeon_check_space(cmd_buffer->device->ws, cs, 14);
cmd_buffer->state.active_occlusion_queries--;
- if (cmd_buffer->state.active_occlusion_queries == 0)
+ if (cmd_buffer->state.active_occlusion_queries == 0) {
radv_set_db_count_control(cmd_buffer);
+ /* Reset the perfect occlusion queries hint now that no
+ * queries are active.
+ */
+ cmd_buffer->state.perfect_occlusion_queries_enabled = false;
+ }
+
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1));
radeon_emit(cs, va + 8);
break;
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
- radeon_check_space(cmd_buffer->device->ws, cs, 10);
+ radeon_check_space(cmd_buffer->device->ws, cs, 16);
va += pipelinestat_block_size;
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
- radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_BOTTOM_OF_PIPE_TS) |
- EVENT_INDEX(5));
- radeon_emit(cs, avail_va);
- radeon_emit(cs, (avail_va >> 32) | EOP_DATA_SEL(1));
- radeon_emit(cs, 1);
- radeon_emit(cs, 0);
+ si_cs_emit_write_event_eop(cs,
+ false,
+ cmd_buffer->device->physical_device->rad_info.chip_class,
+ radv_cmd_buffer_uses_mec(cmd_buffer),
+ V_028A90_BOTTOM_OF_PIPE_TS, 0,
+ 1, avail_va, 0, 1);
break;
default:
unreachable("ending unhandled query type");
}
}
+void radv_CmdBeginQuery(
+ VkCommandBuffer commandBuffer,
+ VkQueryPool queryPool,
+ uint32_t query,
+ VkQueryControlFlags flags)
+{
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
+ struct radeon_winsys_cs *cs = cmd_buffer->cs;
+ uint64_t va = radv_buffer_get_va(pool->bo);
+
+ radv_cs_add_buffer(cmd_buffer->device->ws, cs, pool->bo, 8);
+
+ if (cmd_buffer->pending_reset_query) {
+ if (pool->size >= RADV_BUFFER_OPS_CS_THRESHOLD) {
+ /* Only need to flush caches if the query pool size is
+ * large enough to be resetted using the compute shader
+ * path. Small pools don't need any cache flushes
+ * because we use a CP dma clear.
+ */
+ si_emit_cache_flush(cmd_buffer);
+ cmd_buffer->pending_reset_query = false;
+ }
+ }
+
+ va += pool->stride * query;
+
+ emit_begin_query(cmd_buffer, va, pool->type, flags);
+}
+
+
+void radv_CmdEndQuery(
+ VkCommandBuffer commandBuffer,
+ VkQueryPool queryPool,
+ uint32_t query)
+{
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
+ uint64_t va = radv_buffer_get_va(pool->bo);
+ uint64_t avail_va = va + pool->availability_offset + 4 * query;
+ va += pool->stride * query;
+
+ /* Do not need to add the pool BO to the list because the query must
+ * currently be active, which means the BO is already in the list.
+ */
+ emit_end_query(cmd_buffer, va, avail_va, pool->type);
+
+ /*
+ * For multiview we have to emit a query for each bit in the mask,
+ * however the first query we emit will get the totals for all the
+ * operations, so we don't want to get a real value in the other
+ * queries. This emits a fake begin/end sequence so the waiting
+ * code gets a completed query value and doesn't hang, but the
+ * query returns 0.
+ */
+ if (cmd_buffer->state.subpass && cmd_buffer->state.subpass->view_mask) {
+ uint64_t avail_va = va + pool->availability_offset + 4 * query;
+
+
+ for (unsigned i = 1; i < util_bitcount(cmd_buffer->state.subpass->view_mask); i++) {
+ va += pool->stride;
+ avail_va += 4;
+ emit_begin_query(cmd_buffer, va, pool->type, 0);
+ emit_end_query(cmd_buffer, va, avail_va, pool->type);
+ }
+ }
+}
+
void radv_CmdWriteTimestamp(
VkCommandBuffer commandBuffer,
VkPipelineStageFlagBits pipelineStage,
RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
bool mec = radv_cmd_buffer_uses_mec(cmd_buffer);
struct radeon_winsys_cs *cs = cmd_buffer->cs;
- uint64_t va = cmd_buffer->device->ws->buffer_get_va(pool->bo);
+ uint64_t va = radv_buffer_get_va(pool->bo);
uint64_t avail_va = va + pool->availability_offset + 4 * query;
uint64_t query_va = va + pool->stride * query;
- cmd_buffer->device->ws->cs_add_buffer(cs, pool->bo, 5);
-
- MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 12);
-
- if (mec) {
- radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, 5, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | EVENT_INDEX(5));
- radeon_emit(cs, 3 << 29);
- radeon_emit(cs, query_va);
- radeon_emit(cs, query_va >> 32);
- radeon_emit(cs, 0);
- radeon_emit(cs, 0);
- } else {
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | EVENT_INDEX(5));
- radeon_emit(cs, query_va);
- radeon_emit(cs, (3 << 29) | ((query_va >> 32) & 0xFFFF));
- radeon_emit(cs, 0);
- radeon_emit(cs, 0);
- }
+ radv_cs_add_buffer(cmd_buffer->device->ws, cs, pool->bo, 5);
+
+ int num_queries = 1;
+ if (cmd_buffer->state.subpass && cmd_buffer->state.subpass->view_mask)
+ num_queries = util_bitcount(cmd_buffer->state.subpass->view_mask);
- radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
- radeon_emit(cs, S_370_DST_SEL(mec ? V_370_MEM_ASYNC : V_370_MEMORY_SYNC) |
- S_370_WR_CONFIRM(1) |
- S_370_ENGINE_SEL(V_370_ME));
- radeon_emit(cs, avail_va);
- radeon_emit(cs, avail_va >> 32);
- radeon_emit(cs, 1);
+ MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 28 * num_queries);
+ for (unsigned i = 0; i < num_queries; i++) {
+ switch(pipelineStage) {
+ case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT:
+ radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+ radeon_emit(cs, COPY_DATA_COUNT_SEL | COPY_DATA_WR_CONFIRM |
+ COPY_DATA_SRC_SEL(COPY_DATA_TIMESTAMP) |
+ COPY_DATA_DST_SEL(V_370_MEM_ASYNC));
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, query_va);
+ radeon_emit(cs, query_va >> 32);
+
+ radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
+ radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+ S_370_WR_CONFIRM(1) |
+ S_370_ENGINE_SEL(V_370_ME));
+ radeon_emit(cs, avail_va);
+ radeon_emit(cs, avail_va >> 32);
+ radeon_emit(cs, 1);
+ break;
+ default:
+ si_cs_emit_write_event_eop(cs,
+ false,
+ cmd_buffer->device->physical_device->rad_info.chip_class,
+ mec,
+ V_028A90_BOTTOM_OF_PIPE_TS, 0,
+ 3, query_va, 0, 0);
+ si_cs_emit_write_event_eop(cs,
+ false,
+ cmd_buffer->device->physical_device->rad_info.chip_class,
+ mec,
+ V_028A90_BOTTOM_OF_PIPE_TS, 0,
+ 1, avail_va, 0, 1);
+ break;
+ }
+ query_va += pool->stride;
+ avail_va += 4;
+ }
assert(cmd_buffer->cs->cdw <= cdw_max);
}