unsigned num_db = device->physical_device->rad_info.num_render_backends;
MAYBE_UNUSED unsigned rb_mask = device->physical_device->rad_info.enabled_rb_mask;
- if (device->physical_device->rad_info.chip_class == SI)
- num_db = 8;
- else
- num_db = MAX2(8, num_db);
-
/* Otherwise we need to change the query reset procedure */
assert(rb_mask == ((1ull << num_db) - 1));
radv_load_push_int(nir_builder *b, unsigned offset, const char *name)
{
nir_intrinsic_instr *flags = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
+ nir_intrinsic_set_base(flags, 0);
+ nir_intrinsic_set_range(flags, 16);
flags->src[0] = nir_src_for_ssa(nir_imm_int(b, offset));
flags->num_components = 1;
nir_ssa_dest_init(&flags->instr, &flags->dest, 1, 32, name);
*/
nir_builder b;
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
- b.shader->info->name = ralloc_strdup(b.shader, "occlusion_query");
- b.shader->info->cs.local_size[0] = 64;
- b.shader->info->cs.local_size[1] = 1;
- b.shader->info->cs.local_size[2] = 1;
+ b.shader->info.name = ralloc_strdup(b.shader, "occlusion_query");
+ b.shader->info.cs.local_size[0] = 64;
+ b.shader->info.cs.local_size[1] = 1;
+ b.shader->info.cs.local_size[2] = 1;
nir_variable *result = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "result");
nir_variable *outer_counter = nir_local_variable_create(b.impl, glsl_int_type(), "outer_counter");
nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
- b.shader->info->cs.local_size[0],
- b.shader->info->cs.local_size[1],
- b.shader->info->cs.local_size[2], 0);
+ b.shader->info.cs.local_size[0],
+ b.shader->info.cs.local_size[1],
+ b.shader->info.cs.local_size[2], 0);
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
global_id = nir_channel(&b, global_id, 0); // We only care about x here.
*/
nir_builder b;
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
- b.shader->info->name = ralloc_strdup(b.shader, "pipeline_statistics_query");
- b.shader->info->cs.local_size[0] = 64;
- b.shader->info->cs.local_size[1] = 1;
- b.shader->info->cs.local_size[2] = 1;
+ b.shader->info.name = ralloc_strdup(b.shader, "pipeline_statistics_query");
+ b.shader->info.cs.local_size[0] = 64;
+ b.shader->info.cs.local_size[1] = 1;
+ b.shader->info.cs.local_size[2] = 1;
nir_variable *output_offset = nir_local_variable_create(b.impl, glsl_int_type(), "output_offset");
nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
- b.shader->info->cs.local_size[0],
- b.shader->info->cs.local_size[1],
- b.shader->info->cs.local_size[2], 0);
+ b.shader->info.cs.local_size[0],
+ b.shader->info.cs.local_size[1],
+ b.shader->info.cs.local_size[2], 0);
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
global_id = nir_channel(&b, global_id, 0); // We only care about x here.
radv_pipeline_cache_to_handle(&device->meta_state.cache),
1, &pipeline_statistics_vk_pipeline_info, NULL,
&device->meta_state.query.pipeline_statistics_query_pipeline);
- if (result != VK_SUCCESS)
- goto fail;
- return VK_SUCCESS;
fail:
- radv_device_finish_meta_query_state(device);
+ if (result != VK_SUCCESS)
+ radv_device_finish_meta_query_state(device);
ralloc_free(occlusion_cs.nir);
ralloc_free(pipeline_statistics_cs.nir);
return result;
uint64_t avail_va = va + pool->availability_offset + 4 * query;
/* This waits on the ME. All copies below are done on the ME */
- radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
- radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1));
- radeon_emit(cs, avail_va);
- radeon_emit(cs, avail_va >> 32);
- radeon_emit(cs, 1); /* reference value */
- radeon_emit(cs, 0xffffffff); /* mask */
- radeon_emit(cs, 4); /* poll interval */
+ si_emit_wait_fence(cs, false, avail_va, 1, 0xffffffff);
}
}
radv_query_shader(cmd_buffer, cmd_buffer->device->meta_state.query.pipeline_statistics_query_pipeline,
uint64_t avail_va = va + pool->availability_offset + 4 * query;
/* This waits on the ME. All copies below are done on the ME */
- radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
- radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1));
- radeon_emit(cs, avail_va);
- radeon_emit(cs, avail_va >> 32);
- radeon_emit(cs, 1); /* reference value */
- radeon_emit(cs, 0xffffffff); /* mask */
- radeon_emit(cs, 4); /* poll interval */
+ si_emit_wait_fence(cs, false, avail_va, 1, 0xffffffff);
}
if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
uint64_t avail_va = va + pool->availability_offset + 4 * query;
break;
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
- radeon_check_space(cmd_buffer->device->ws, cs, 10);
+ radeon_check_space(cmd_buffer->device->ws, cs, 16);
va += pipelinestat_block_size;
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
- radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_BOTTOM_OF_PIPE_TS) |
- EVENT_INDEX(5));
- radeon_emit(cs, avail_va);
- radeon_emit(cs, (avail_va >> 32) | EOP_DATA_SEL(1));
- radeon_emit(cs, 1);
- radeon_emit(cs, 0);
+ si_cs_emit_write_event_eop(cs,
+ false,
+ cmd_buffer->device->physical_device->rad_info.chip_class,
+ false,
+ EVENT_TYPE_BOTTOM_OF_PIPE_TS, 0,
+ 1, avail_va, 0, 1);
break;
default:
unreachable("ending unhandled query type");
cmd_buffer->device->ws->cs_add_buffer(cs, pool->bo, 5);
- MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 12);
+ MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 28);
switch(pipelineStage) {
case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT:
radeon_emit(cs, 0);
radeon_emit(cs, query_va);
radeon_emit(cs, query_va >> 32);
+
+ radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
+ radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+ S_370_WR_CONFIRM(1) |
+ S_370_ENGINE_SEL(V_370_ME));
+ radeon_emit(cs, avail_va);
+ radeon_emit(cs, avail_va >> 32);
+ radeon_emit(cs, 1);
break;
default:
- if (mec) {
- radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, 5, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | EVENT_INDEX(5));
- radeon_emit(cs, 3 << 29);
- radeon_emit(cs, query_va);
- radeon_emit(cs, query_va >> 32);
- radeon_emit(cs, 0);
- radeon_emit(cs, 0);
- } else {
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | EVENT_INDEX(5));
- radeon_emit(cs, query_va);
- radeon_emit(cs, (3 << 29) | ((query_va >> 32) & 0xFFFF));
- radeon_emit(cs, 0);
- radeon_emit(cs, 0);
- }
+ si_cs_emit_write_event_eop(cs,
+ false,
+ cmd_buffer->device->physical_device->rad_info.chip_class,
+ mec,
+ V_028A90_BOTTOM_OF_PIPE_TS, 0,
+ 3, query_va, 0, 0);
+ si_cs_emit_write_event_eop(cs,
+ false,
+ cmd_buffer->device->physical_device->rad_info.chip_class,
+ mec,
+ V_028A90_BOTTOM_OF_PIPE_TS, 0,
+ 1, avail_va, 0, 1);
break;
}
- radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
- radeon_emit(cs, S_370_DST_SEL(mec ? V_370_MEM_ASYNC : V_370_MEMORY_SYNC) |
- S_370_WR_CONFIRM(1) |
- S_370_ENGINE_SEL(V_370_ME));
- radeon_emit(cs, avail_va);
- radeon_emit(cs, avail_va >> 32);
- radeon_emit(cs, 1);
-
assert(cmd_buffer->cs->cdw <= cdw_max);
}