blend.sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) | S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED);
}
- if (pipeline->device->physical_device->has_rbplus) {
+ if (pipeline->device->physical_device->rad_info.has_rbplus) {
/* Disable RB+ blend optimizations for dual source blending. */
if (blend.mrt0_is_dual_src) {
for (i = 0; i < 8; i++) {
else
topology = V_028B6C_OUTPUT_TRIANGLE_CW;
- if (pipeline->device->has_distributed_tess) {
+ if (pipeline->device->physical_device->rad_info.has_distributed_tess) {
if (pipeline->device->physical_device->rad_info.family == CHIP_FIJI ||
pipeline->device->physical_device->rad_info.family >= CHIP_POLARIS10)
distribution_mode = V_028B6C_DISTRIBUTION_MODE_TRAPEZOIDS;
}
if (device->physical_device->rad_info.chip_class >= GFX10 &&
+ device->physical_device->rad_info.family != CHIP_NAVI14 &&
!(device->instance->debug_flags & RADV_DEBUG_NO_NGG)) {
if (nir[MESA_SHADER_TESS_CTRL]) {
keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = true;
keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = false;
}
+ /*
+ * Disable NGG with geometry shaders. There are a bunch of
+ * issues still:
+ * * GS primitives in pipeline statistic queries do not get
+ * updates. See dEQP-VK.query_pool.statistics_query.geometry_shader_primitives
+ * * dEQP-VK.clipping.user_defined.clip_cull_distance_dynamic_index.*geom* failures
+ * * Interactions with tessellation failing:
+ * dEQP-VK.tessellation.geometry_interaction.passthrough.tessellate_isolines_passthrough_geometry_no_change
+ * * General issues with the last primitive missing/corrupt:
+ * https://bugs.freedesktop.org/show_bug.cgi?id=111248
+ *
+ * Furthermore, XGL/AMDVLK also disables this as of 9b632ef.
+ */
+ if (nir[MESA_SHADER_GEOMETRY]) {
+ if (nir[MESA_SHADER_TESS_CTRL])
+ keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = false;
+ else
+ keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg = false;
+ }
+
/* TODO: Implement streamout support for NGG. */
gl_shader_stage last_xfb_stage = MESA_SHADER_VERTEX;
radeon_set_context_reg(ctx_cs, R_028808_CB_COLOR_CONTROL, blend->cb_color_control);
radeon_set_context_reg(ctx_cs, R_028B70_DB_ALPHA_TO_MASK, blend->db_alpha_to_mask);
- if (pipeline->device->physical_device->has_rbplus) {
+ if (pipeline->device->physical_device->rad_info.has_rbplus) {
radeon_set_context_reg_seq(ctx_cs, R_028760_SX_MRT0_BLEND_OPT, 8);
radeon_emit_array(ctx_cs, blend->sx_mrt_blend_opt, 8);
else
radeon_set_context_reg(ctx_cs, R_028B58_VGT_LS_HS_CONFIG,
tess->ls_hs_config);
+
+ if (pipeline->device->physical_device->rad_info.chip_class >= GFX10 &&
+ !radv_pipeline_has_gs(pipeline) && !radv_pipeline_has_ngg(pipeline)) {
+ radeon_set_context_reg(ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL,
+ S_028A44_ES_VERTS_PER_SUBGRP(250) |
+ S_028A44_GS_PRIMS_PER_SUBGRP(126) |
+ S_028A44_GS_INST_PRIMS_IN_SUBGRP(126));
+ }
}
static void
else
z_order = V_02880C_LATE_Z;
- bool disable_rbplus = device->physical_device->has_rbplus &&
+ bool disable_rbplus = device->physical_device->rad_info.has_rbplus &&
!device->physical_device->rbplus_allowed;
/* It shouldn't be needed to export gl_SampleMask when MSAA is disabled
radv_pipeline_has_gs(pipeline))
ia_multi_vgt_param.partial_vs_wave = true;
/* Needed for 028B6C_DISTRIBUTION_MODE != 0 */
- if (device->has_distributed_tess) {
+ if (device->physical_device->rad_info.has_distributed_tess) {
if (radv_pipeline_has_gs(pipeline)) {
if (device->physical_device->rad_info.chip_class <= GFX8)
ia_multi_vgt_param.partial_es_wave = true;
return result;
}
+VkResult radv_GetPipelineExecutableStatisticsKHR(
+ VkDevice _device,
+ const VkPipelineExecutableInfoKHR* pExecutableInfo,
+ uint32_t* pStatisticCount,
+ VkPipelineExecutableStatisticKHR* pStatistics)
+{
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_pipeline, pipeline, pExecutableInfo->pipeline);
+ gl_shader_stage stage;
+ struct radv_shader_variant *shader = radv_get_shader_from_executable_index(pipeline, pExecutableInfo->executableIndex, &stage);
+
+ enum chip_class chip_class = device->physical_device->rad_info.chip_class;
+ unsigned lds_increment = chip_class >= GFX7 ? 512 : 256;
+ unsigned max_waves = radv_get_max_waves(device, shader, stage);
+
+ VkPipelineExecutableStatisticKHR *s = pStatistics;
+ VkPipelineExecutableStatisticKHR *end = s + (pStatistics ? *pStatisticCount : 0);
+ VkResult result = VK_SUCCESS;
+
+ if (s < end) {
+ desc_copy(s->name, "SGPRs");
+ desc_copy(s->description, "Number of SGPR registers allocated per subgroup");
+ s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+ s->value.u64 = shader->config.num_sgprs;
+ }
+ ++s;
+
+ if (s < end) {
+ desc_copy(s->name, "VGPRs");
+ desc_copy(s->description, "Number of VGPR registers allocated per subgroup");
+ s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+ s->value.u64 = shader->config.num_vgprs;
+ }
+ ++s;
+
+ if (s < end) {
+ desc_copy(s->name, "Spilled SGPRs");
+ desc_copy(s->description, "Number of SGPR registers spilled per subgroup");
+ s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+ s->value.u64 = shader->config.spilled_sgprs;
+ }
+ ++s;
+
+ if (s < end) {
+ desc_copy(s->name, "Spilled VGPRs");
+ desc_copy(s->description, "Number of VGPR registers spilled per subgroup");
+ s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+ s->value.u64 = shader->config.spilled_vgprs;
+ }
+ ++s;
+
+ if (s < end) {
+ desc_copy(s->name, "PrivMem VGPRs");
+ desc_copy(s->description, "Number of VGPRs stored in private memory per subgroup");
+ s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+ s->value.u64 = shader->info.private_mem_vgprs;
+ }
+ ++s;
+
+ if (s < end) {
+ desc_copy(s->name, "Code size");
+ desc_copy(s->description, "Code size in bytes");
+ s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+ s->value.u64 = shader->code_size;
+ }
+ ++s;
+
+ if (s < end) {
+ desc_copy(s->name, "LDS size");
+ desc_copy(s->description, "LDS size in bytes per workgroup");
+ s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+ s->value.u64 = shader->config.lds_size * lds_increment;
+ }
+ ++s;
+
+ if (s < end) {
+ desc_copy(s->name, "Scratch size");
+ desc_copy(s->description, "Private memory in bytes per subgroup");
+ s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+ s->value.u64 = shader->config.scratch_bytes_per_wave;
+ }
+ ++s;
+
+ if (s < end) {
+ desc_copy(s->name, "Subgroups per SIMD");
+ desc_copy(s->description, "The maximum number of subgroups in flight on a SIMD unit");
+ s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+ s->value.u64 = max_waves;
+ }
+ ++s;
+
+ if (!pStatistics)
+ *pStatisticCount = s - pStatistics;
+ else if (s > end) {
+ *pStatisticCount = end - pStatistics;
+ result = VK_INCOMPLETE;
+ } else {
+ *pStatisticCount = s - pStatistics;
+ }
+
+ return result;
+}
+
static VkResult radv_copy_representation(void *data, size_t *data_size, const char *src)
{
size_t total_size = strlen(src) + 1;