blend.sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) | S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED);
}
- if (pipeline->device->physical_device->has_rbplus) {
+ if (pipeline->device->physical_device->rad_info.has_rbplus) {
/* Disable RB+ blend optimizations for dual source blending. */
if (blend.mrt0_is_dual_src) {
for (i = 0; i < 8; i++) {
if (es_info->info.so.num_outputs)
esvert_lds_size = 4 * es_info->info.so.num_outputs + 1;
*/
+
+ /* LDS size for passing data from GS to ES.
+ * GS stores Primitive IDs (one DWORD) into LDS at the address
+ * corresponding to the ES thread of the provoking vertex. All
+ * ES threads load and export PrimitiveID for their thread.
+ */
+ if (!radv_pipeline_has_tess(pipeline) &&
+ pipeline->shaders[MESA_SHADER_VERTEX]->info.vs.export_prim_id)
+ esvert_lds_size = MAX2(esvert_lds_size, 1);
}
unsigned max_gsprims = max_gsprims_base;
else
topology = V_028B6C_OUTPUT_TRIANGLE_CW;
- if (pipeline->device->has_distributed_tess) {
+ if (pipeline->device->physical_device->rad_info.has_distributed_tess) {
if (pipeline->device->physical_device->rad_info.family == CHIP_FIJI ||
pipeline->device->physical_device->rad_info.family >= CHIP_POLARIS10)
distribution_mode = V_028B6C_DISTRIBUTION_MODE_TRAPEZOIDS;
}
if (device->physical_device->rad_info.chip_class >= GFX10 &&
+ device->physical_device->rad_info.family != CHIP_NAVI14 &&
!(device->instance->debug_flags & RADV_DEBUG_NO_NGG)) {
if (nir[MESA_SHADER_TESS_CTRL]) {
keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = true;
keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = false;
}
+ /*
+ * Disable NGG with geometry shaders. There are a bunch of
+ * issues still:
+ * * GS primitives in pipeline statistic queries do not get
+ * updates. See dEQP-VK.query_pool.statistics_query.geometry_shader_primitives
+ * * dEQP-VK.clipping.user_defined.clip_cull_distance_dynamic_index.*geom* failures
+ * * Interactions with tessellation failing:
+ * dEQP-VK.tessellation.geometry_interaction.passthrough.tessellate_isolines_passthrough_geometry_no_change
+ * * General issues with the last primitive missing/corrupt:
+ * https://bugs.freedesktop.org/show_bug.cgi?id=111248
+ *
+ * Furthermore, XGL/AMDVLK also disables this as of 9b632ef.
+ */
+ if (nir[MESA_SHADER_GEOMETRY]) {
+ if (nir[MESA_SHADER_TESS_CTRL])
+ keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = false;
+ else
+ keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg = false;
+ }
+
/* TODO: Implement streamout support for NGG. */
gl_shader_stage last_xfb_stage = MESA_SHADER_VERTEX;
fpovs_per_batch = 63;
} else {
/* The context states are affected by the scissor bug. */
- context_states_per_bin = pipeline->device->physical_device->has_scissor_bug ? 1 : 6;
+ context_states_per_bin = pipeline->device->physical_device->rad_info.has_gfx9_scissor_bug ? 1 : 6;
/* 32 causes hangs for RAVEN. */
persistent_states_per_bin = 16;
fpovs_per_batch = 63;
radeon_set_context_reg(ctx_cs, R_028808_CB_COLOR_CONTROL, blend->cb_color_control);
radeon_set_context_reg(ctx_cs, R_028B70_DB_ALPHA_TO_MASK, blend->db_alpha_to_mask);
- if (pipeline->device->physical_device->has_rbplus) {
+ if (pipeline->device->physical_device->rad_info.has_rbplus) {
radeon_set_context_reg_seq(ctx_cs, R_028760_SX_MRT0_BLEND_OPT, 8);
radeon_emit_array(ctx_cs, blend->sx_mrt_blend_opt, 8);
else
radeon_set_context_reg(ctx_cs, R_028B58_VGT_LS_HS_CONFIG,
tess->ls_hs_config);
+
+ if (pipeline->device->physical_device->rad_info.chip_class >= GFX10 &&
+ !radv_pipeline_has_gs(pipeline) && !radv_pipeline_has_ngg(pipeline)) {
+ radeon_set_context_reg(ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL,
+ S_028A44_ES_VERTS_PER_SUBGRP(250) |
+ S_028A44_GS_PRIMS_PER_SUBGRP(126) |
+ S_028A44_GS_INST_PRIMS_IN_SUBGRP(126));
+ }
}
static void
else
z_order = V_02880C_LATE_Z;
- bool disable_rbplus = device->physical_device->has_rbplus &&
- !device->physical_device->rbplus_allowed;
+ bool disable_rbplus = device->physical_device->rad_info.has_rbplus &&
+ !device->physical_device->rad_info.rbplus_allowed;
/* It shouldn't be needed to export gl_SampleMask when MSAA is disabled
* but this appears to break Project Cars (DXVK). See
radv_pipeline_has_gs(pipeline))
ia_multi_vgt_param.partial_vs_wave = true;
/* Needed for 028B6C_DISTRIBUTION_MODE != 0 */
- if (device->has_distributed_tess) {
+ if (device->physical_device->rad_info.has_distributed_tess) {
if (radv_pipeline_has_gs(pipeline)) {
if (device->physical_device->rad_info.chip_class <= GFX8)
ia_multi_vgt_param.partial_es_wave = true;
return ret;
}
+static struct radv_shader_variant *
+radv_get_shader_from_executable_index(const struct radv_pipeline *pipeline, int index, gl_shader_stage *stage)
+{
+ for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
+ if (!pipeline->shaders[i])
+ continue;
+ if (!index) {
+ *stage = i;
+ return pipeline->shaders[i];
+ }
+
+ --index;
+
+ if (i == MESA_SHADER_GEOMETRY) {
+ if (!index) {
+ *stage = i;
+ return pipeline->gs_copy_shader;
+ }
+ --index;
+ }
+ }
+
+ *stage = -1;
+ return NULL;
+}
+
/* Basically strlcpy (which does not exist on linux) specialized for
* descriptions. */
static void desc_copy(char *desc, const char *src) {
const uint32_t count = MIN2(total_count, *pExecutableCount);
for (unsigned i = 0, executable_idx = 0;
i < MESA_SHADER_STAGES && executable_idx < count; ++i) {
- if (pipeline->shaders[i])
+ if (!pipeline->shaders[i])
continue;
pProperties[executable_idx].stages = mesa_to_vk_shader_stage(i);
const char *name = NULL;
break;
pProperties[executable_idx].stages = VK_SHADER_STAGE_GEOMETRY_BIT;
- snprintf(pProperties[executable_idx].name, VK_MAX_DESCRIPTION_SIZE,
- "GS Copy Shader");
- snprintf(pProperties[executable_idx].description, VK_MAX_DESCRIPTION_SIZE,
- "Extra shader stage that loads the GS output ringbuffer into the rasterizer");
+ desc_copy(pProperties[executable_idx].name, "GS Copy Shader");
+ desc_copy(pProperties[executable_idx].description,
+ "Extra shader stage that loads the GS output ringbuffer into the rasterizer");
+
+ ++executable_idx;
}
}
*pExecutableCount = count;
return result;
}
+
+VkResult radv_GetPipelineExecutableStatisticsKHR(
+ VkDevice _device,
+ const VkPipelineExecutableInfoKHR* pExecutableInfo,
+ uint32_t* pStatisticCount,
+ VkPipelineExecutableStatisticKHR* pStatistics)
+{
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_pipeline, pipeline, pExecutableInfo->pipeline);
+ gl_shader_stage stage;
+ struct radv_shader_variant *shader = radv_get_shader_from_executable_index(pipeline, pExecutableInfo->executableIndex, &stage);
+
+ enum chip_class chip_class = device->physical_device->rad_info.chip_class;
+ unsigned lds_increment = chip_class >= GFX7 ? 512 : 256;
+ unsigned max_waves = radv_get_max_waves(device, shader, stage);
+
+ VkPipelineExecutableStatisticKHR *s = pStatistics;
+ VkPipelineExecutableStatisticKHR *end = s + (pStatistics ? *pStatisticCount : 0);
+ VkResult result = VK_SUCCESS;
+
+ if (s < end) {
+ desc_copy(s->name, "SGPRs");
+ desc_copy(s->description, "Number of SGPR registers allocated per subgroup");
+ s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+ s->value.u64 = shader->config.num_sgprs;
+ }
+ ++s;
+
+ if (s < end) {
+ desc_copy(s->name, "VGPRs");
+ desc_copy(s->description, "Number of VGPR registers allocated per subgroup");
+ s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+ s->value.u64 = shader->config.num_vgprs;
+ }
+ ++s;
+
+ if (s < end) {
+ desc_copy(s->name, "Spilled SGPRs");
+ desc_copy(s->description, "Number of SGPR registers spilled per subgroup");
+ s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+ s->value.u64 = shader->config.spilled_sgprs;
+ }
+ ++s;
+
+ if (s < end) {
+ desc_copy(s->name, "Spilled VGPRs");
+ desc_copy(s->description, "Number of VGPR registers spilled per subgroup");
+ s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+ s->value.u64 = shader->config.spilled_vgprs;
+ }
+ ++s;
+
+ if (s < end) {
+ desc_copy(s->name, "PrivMem VGPRs");
+ desc_copy(s->description, "Number of VGPRs stored in private memory per subgroup");
+ s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+ s->value.u64 = shader->info.private_mem_vgprs;
+ }
+ ++s;
+
+ if (s < end) {
+ desc_copy(s->name, "Code size");
+ desc_copy(s->description, "Code size in bytes");
+ s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+ s->value.u64 = shader->code_size;
+ }
+ ++s;
+
+ if (s < end) {
+ desc_copy(s->name, "LDS size");
+ desc_copy(s->description, "LDS size in bytes per workgroup");
+ s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+ s->value.u64 = shader->config.lds_size * lds_increment;
+ }
+ ++s;
+
+ if (s < end) {
+ desc_copy(s->name, "Scratch size");
+ desc_copy(s->description, "Private memory in bytes per subgroup");
+ s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+ s->value.u64 = shader->config.scratch_bytes_per_wave;
+ }
+ ++s;
+
+ if (s < end) {
+ desc_copy(s->name, "Subgroups per SIMD");
+ desc_copy(s->description, "The maximum number of subgroups in flight on a SIMD unit");
+ s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+ s->value.u64 = max_waves;
+ }
+ ++s;
+
+ if (!pStatistics)
+ *pStatisticCount = s - pStatistics;
+ else if (s > end) {
+ *pStatisticCount = end - pStatistics;
+ result = VK_INCOMPLETE;
+ } else {
+ *pStatisticCount = s - pStatistics;
+ }
+
+ return result;
+}
+
+static VkResult radv_copy_representation(void *data, size_t *data_size, const char *src)
+{
+ size_t total_size = strlen(src) + 1;
+
+ if (!data) {
+ *data_size = total_size;
+ return VK_SUCCESS;
+ }
+
+ size_t size = MIN2(total_size, *data_size);
+
+ memcpy(data, src, size);
+ if (size)
+ *((char*)data + size - 1) = 0;
+ return size < total_size ? VK_INCOMPLETE : VK_SUCCESS;
+}
+
+VkResult radv_GetPipelineExecutableInternalRepresentationsKHR(
+ VkDevice device,
+ const VkPipelineExecutableInfoKHR* pExecutableInfo,
+ uint32_t* pInternalRepresentationCount,
+ VkPipelineExecutableInternalRepresentationKHR* pInternalRepresentations)
+{
+ RADV_FROM_HANDLE(radv_pipeline, pipeline, pExecutableInfo->pipeline);
+ gl_shader_stage stage;
+ struct radv_shader_variant *shader = radv_get_shader_from_executable_index(pipeline, pExecutableInfo->executableIndex, &stage);
+
+ VkPipelineExecutableInternalRepresentationKHR *p = pInternalRepresentations;
+ VkPipelineExecutableInternalRepresentationKHR *end = p + (pInternalRepresentations ? *pInternalRepresentationCount : 0);
+ VkResult result = VK_SUCCESS;
+ /* optimized NIR */
+ if (p < end) {
+ p->isText = true;
+ desc_copy(p->name, "NIR Shader(s)");
+ desc_copy(p->description, "The optimized NIR shader(s)");
+ if (radv_copy_representation(p->pData, &p->dataSize, shader->nir_string) != VK_SUCCESS)
+ result = VK_INCOMPLETE;
+ }
+ ++p;
+
+ /* LLVM IR */
+ if (p < end) {
+ p->isText = true;
+ desc_copy(p->name, "LLVM IR");
+ desc_copy(p->description, "The LLVM IR after some optimizations");
+ if (radv_copy_representation(p->pData, &p->dataSize, shader->llvm_ir_string) != VK_SUCCESS)
+ result = VK_INCOMPLETE;
+ }
+ ++p;
+
+ /* Disassembler */
+ if (p < end) {
+ p->isText = true;
+ desc_copy(p->name, "Assembly");
+ desc_copy(p->description, "Final Assembly");
+ if (radv_copy_representation(p->pData, &p->dataSize, shader->disasm_string) != VK_SUCCESS)
+ result = VK_INCOMPLETE;
+ }
+ ++p;
+
+ if (!pInternalRepresentations)
+ *pInternalRepresentationCount = p - pInternalRepresentations;
+ else if(p > end) {
+ result = VK_INCOMPLETE;
+ *pInternalRepresentationCount = end - pInternalRepresentations;
+ } else {
+ *pInternalRepresentationCount = p - pInternalRepresentations;
+ }
+
+ return result;
+}