X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fvulkan%2Fradv_query.c;h=3c40774042dd67b43ae4f07a0fdeabc9bb7893fd;hb=e8d57802fea60d4b353761ba1bdb08545733c007;hp=7e0fd1d073f2fdab49e4e940d1b883635cb68686;hpb=5ab5d1bee4f01c56c9f87a43b82fd65473b75473;p=mesa.git diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c index 7e0fd1d073f..3c40774042d 100644 --- a/src/amd/vulkan/radv_query.c +++ b/src/amd/vulkan/radv_query.c @@ -44,11 +44,6 @@ static unsigned get_max_db(struct radv_device *device) unsigned num_db = device->physical_device->rad_info.num_render_backends; MAYBE_UNUSED unsigned rb_mask = device->physical_device->rad_info.enabled_rb_mask; - if (device->physical_device->rad_info.chip_class == SI) - num_db = 8; - else - num_db = MAX2(8, num_db); - /* Otherwise we need to change the query reset procedure */ assert(rb_mask == ((1ull << num_db) - 1)); @@ -77,6 +72,8 @@ static struct nir_ssa_def * radv_load_push_int(nir_builder *b, unsigned offset, const char *name) { nir_intrinsic_instr *flags = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant); + nir_intrinsic_set_base(flags, 0); + nir_intrinsic_set_range(flags, 16); flags->src[0] = nir_src_for_ssa(nir_imm_int(b, offset)); flags->num_components = 1; nir_ssa_dest_init(&flags->instr, &flags->dest, 1, 32, name); @@ -125,10 +122,10 @@ build_occlusion_query_shader(struct radv_device *device) { */ nir_builder b; nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL); - b.shader->info->name = ralloc_strdup(b.shader, "occlusion_query"); - b.shader->info->cs.local_size[0] = 64; - b.shader->info->cs.local_size[1] = 1; - b.shader->info->cs.local_size[2] = 1; + b.shader->info.name = ralloc_strdup(b.shader, "occlusion_query"); + b.shader->info.cs.local_size[0] = 64; + b.shader->info.cs.local_size[1] = 1; + b.shader->info.cs.local_size[2] = 1; nir_variable *result = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "result"); nir_variable *outer_counter = nir_local_variable_create(b.impl, glsl_int_type(), "outer_counter"); @@ -158,9 +155,9 @@ build_occlusion_query_shader(struct radv_device *device) { nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0); nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0); nir_ssa_def *block_size = nir_imm_ivec4(&b, - b.shader->info->cs.local_size[0], - b.shader->info->cs.local_size[1], - b.shader->info->cs.local_size[2], 0); + b.shader->info.cs.local_size[0], + b.shader->info.cs.local_size[1], + b.shader->info.cs.local_size[2], 0); nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id); global_id = nir_channel(&b, global_id, 0); // We only care about x here. @@ -320,10 +317,10 @@ build_pipeline_statistics_query_shader(struct radv_device *device) { */ nir_builder b; nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL); - b.shader->info->name = ralloc_strdup(b.shader, "pipeline_statistics_query"); - b.shader->info->cs.local_size[0] = 64; - b.shader->info->cs.local_size[1] = 1; - b.shader->info->cs.local_size[2] = 1; + b.shader->info.name = ralloc_strdup(b.shader, "pipeline_statistics_query"); + b.shader->info.cs.local_size[0] = 64; + b.shader->info.cs.local_size[1] = 1; + b.shader->info.cs.local_size[2] = 1; nir_variable *output_offset = nir_local_variable_create(b.impl, glsl_int_type(), "output_offset"); @@ -350,9 +347,9 @@ build_pipeline_statistics_query_shader(struct radv_device *device) { nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0); nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0); nir_ssa_def *block_size = nir_imm_ivec4(&b, - b.shader->info->cs.local_size[0], - b.shader->info->cs.local_size[1], - b.shader->info->cs.local_size[2], 0); + b.shader->info.cs.local_size[0], + b.shader->info.cs.local_size[1], + b.shader->info.cs.local_size[2], 0); nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id); global_id = nir_channel(&b, global_id, 0); // We only care about x here. @@ -612,12 +609,10 @@ VkResult radv_device_init_meta_query_state(struct radv_device *device) radv_pipeline_cache_to_handle(&device->meta_state.cache), 1, &pipeline_statistics_vk_pipeline_info, NULL, &device->meta_state.query.pipeline_statistics_query_pipeline); - if (result != VK_SUCCESS) - goto fail; - return VK_SUCCESS; fail: - radv_device_finish_meta_query_state(device); + if (result != VK_SUCCESS) + radv_device_finish_meta_query_state(device); ralloc_free(occlusion_cs.nir); ralloc_free(pipeline_statistics_cs.nir); return result; @@ -997,13 +992,7 @@ void radv_CmdCopyQueryPoolResults( uint64_t avail_va = va + pool->availability_offset + 4 * query; /* This waits on the ME. All copies below are done on the ME */ - radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0)); - radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1)); - radeon_emit(cs, avail_va); - radeon_emit(cs, avail_va >> 32); - radeon_emit(cs, 1); /* reference value */ - radeon_emit(cs, 0xffffffff); /* mask */ - radeon_emit(cs, 4); /* poll interval */ + si_emit_wait_fence(cs, false, avail_va, 1, 0xffffffff); } } radv_query_shader(cmd_buffer, cmd_buffer->device->meta_state.query.pipeline_statistics_query_pipeline, @@ -1026,13 +1015,7 @@ void radv_CmdCopyQueryPoolResults( uint64_t avail_va = va + pool->availability_offset + 4 * query; /* This waits on the ME. All copies below are done on the ME */ - radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0)); - radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1)); - radeon_emit(cs, avail_va); - radeon_emit(cs, avail_va >> 32); - radeon_emit(cs, 1); /* reference value */ - radeon_emit(cs, 0xffffffff); /* mask */ - radeon_emit(cs, 4); /* poll interval */ + si_emit_wait_fence(cs, false, avail_va, 1, 0xffffffff); } if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { uint64_t avail_va = va + pool->availability_offset + 4 * query; @@ -1156,7 +1139,7 @@ void radv_CmdEndQuery( break; case VK_QUERY_TYPE_PIPELINE_STATISTICS: - radeon_check_space(cmd_buffer->device->ws, cs, 10); + radeon_check_space(cmd_buffer->device->ws, cs, 16); va += pipelinestat_block_size; @@ -1165,13 +1148,12 @@ void radv_CmdEndQuery( radeon_emit(cs, va); radeon_emit(cs, va >> 32); - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); - radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_BOTTOM_OF_PIPE_TS) | - EVENT_INDEX(5)); - radeon_emit(cs, avail_va); - radeon_emit(cs, (avail_va >> 32) | EOP_DATA_SEL(1)); - radeon_emit(cs, 1); - radeon_emit(cs, 0); + si_cs_emit_write_event_eop(cs, + false, + cmd_buffer->device->physical_device->rad_info.chip_class, + false, + EVENT_TYPE_BOTTOM_OF_PIPE_TS, 0, + 1, avail_va, 0, 1); break; default: unreachable("ending unhandled query type"); @@ -1194,32 +1176,42 @@ void radv_CmdWriteTimestamp( cmd_buffer->device->ws->cs_add_buffer(cs, pool->bo, 5); - MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 12); + MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 28); - if (mec) { - radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, 5, 0)); - radeon_emit(cs, EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | EVENT_INDEX(5)); - radeon_emit(cs, 3 << 29); - radeon_emit(cs, query_va); - radeon_emit(cs, query_va >> 32); + switch(pipelineStage) { + case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT: + radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); + radeon_emit(cs, COPY_DATA_COUNT_SEL | COPY_DATA_WR_CONFIRM | + COPY_DATA_SRC_SEL(COPY_DATA_TIMESTAMP) | + COPY_DATA_DST_SEL(V_370_MEM_ASYNC)); radeon_emit(cs, 0); radeon_emit(cs, 0); - } else { - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); - radeon_emit(cs, EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | EVENT_INDEX(5)); radeon_emit(cs, query_va); - radeon_emit(cs, (3 << 29) | ((query_va >> 32) & 0xFFFF)); - radeon_emit(cs, 0); - radeon_emit(cs, 0); - } + radeon_emit(cs, query_va >> 32); - radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); - radeon_emit(cs, S_370_DST_SEL(mec ? V_370_MEM_ASYNC : V_370_MEMORY_SYNC) | - S_370_WR_CONFIRM(1) | - S_370_ENGINE_SEL(V_370_ME)); - radeon_emit(cs, avail_va); - radeon_emit(cs, avail_va >> 32); - radeon_emit(cs, 1); + radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); + radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) | + S_370_WR_CONFIRM(1) | + S_370_ENGINE_SEL(V_370_ME)); + radeon_emit(cs, avail_va); + radeon_emit(cs, avail_va >> 32); + radeon_emit(cs, 1); + break; + default: + si_cs_emit_write_event_eop(cs, + false, + cmd_buffer->device->physical_device->rad_info.chip_class, + mec, + V_028A90_BOTTOM_OF_PIPE_TS, 0, + 3, query_va, 0, 0); + si_cs_emit_write_event_eop(cs, + false, + cmd_buffer->device->physical_device->rad_info.chip_class, + mec, + V_028A90_BOTTOM_OF_PIPE_TS, 0, + 1, avail_va, 0, 1); + break; + } assert(cmd_buffer->cs->cdw <= cdw_max); }