ac: add has_rbplus to ac_gpu_info

[mesa.git] / src / amd / vulkan / radv_pipeline.c
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c

index a0fe269e915c2abfaabce876b6b0df6368d056a6..97ab503bef31d49c1bb7d8982d915458cfc191bb 100644 (file)
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -865,7 +865,7 @@ radv_pipeline_init_blend_state(struct radv_pipeline *pipeline,
                 blend.sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) | S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED);
         }
  
-       if (pipeline->device->physical_device->has_rbplus) {
+       if (pipeline->device->physical_device->rad_info.has_rbplus) {
                 /* Disable RB+ blend optimizations for dual source blending. */
                 if (blend.mrt0_is_dual_src) {
                         for (i = 0; i < 8; i++) {
@@ -2010,7 +2010,7 @@ calculate_tess_state(struct radv_pipeline *pipeline,
         else
                 topology = V_028B6C_OUTPUT_TRIANGLE_CW;
  
-       if (pipeline->device->has_distributed_tess) {
+       if (pipeline->device->physical_device->rad_info.has_distributed_tess) {
                 if (pipeline->device->physical_device->rad_info.family == CHIP_FIJI ||
                     pipeline->device->physical_device->rad_info.family >= CHIP_POLARIS10)
                         distribution_mode = V_028B6C_DISTRIBUTION_MODE_TRAPEZOIDS;
@@ -2320,6 +2320,7 @@ radv_fill_shader_keys(struct radv_device *device,
         }
  
         if (device->physical_device->rad_info.chip_class >= GFX10 &&
+           device->physical_device->rad_info.family != CHIP_NAVI14 &&
             !(device->instance->debug_flags & RADV_DEBUG_NO_NGG)) {
                 if (nir[MESA_SHADER_TESS_CTRL]) {
                         keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = true;
@@ -2339,6 +2340,26 @@ radv_fill_shader_keys(struct radv_device *device,
                         keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = false;
                 }
  
+               /*
+                * Disable NGG with geometry shaders. There are a bunch of
+                * issues still:
+                *   * GS primitives in pipeline statistic queries do not get
+                *     updates. See dEQP-VK.query_pool.statistics_query.geometry_shader_primitives
+                *   * dEQP-VK.clipping.user_defined.clip_cull_distance_dynamic_index.*geom* failures
+                *   * Interactions with tessellation failing:
+                *     dEQP-VK.tessellation.geometry_interaction.passthrough.tessellate_isolines_passthrough_geometry_no_change
+                *   * General issues with the last primitive missing/corrupt:
+                *     https://bugs.freedesktop.org/show_bug.cgi?id=111248
+                *
+                * Furthermore, XGL/AMDVLK also disables this as of 9b632ef.
+                */
+               if (nir[MESA_SHADER_GEOMETRY]) {
+                       if (nir[MESA_SHADER_TESS_CTRL])
+                               keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = false;
+                       else
+                               keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg = false;
+               }
+
                 /* TODO: Implement streamout support for NGG. */
                 gl_shader_stage last_xfb_stage = MESA_SHADER_VERTEX;
  
@@ -3308,7 +3329,7 @@ radv_pipeline_generate_blend_state(struct radeon_cmdbuf *ctx_cs,
         radeon_set_context_reg(ctx_cs, R_028808_CB_COLOR_CONTROL, blend->cb_color_control);
         radeon_set_context_reg(ctx_cs, R_028B70_DB_ALPHA_TO_MASK, blend->db_alpha_to_mask);
  
-       if (pipeline->device->physical_device->has_rbplus) {
+       if (pipeline->device->physical_device->rad_info.has_rbplus) {
  
                 radeon_set_context_reg_seq(ctx_cs, R_028760_SX_MRT0_BLEND_OPT, 8);
                 radeon_emit_array(ctx_cs, blend->sx_mrt_blend_opt, 8);
@@ -3812,6 +3833,14 @@ radv_pipeline_generate_tess_shaders(struct radeon_cmdbuf *ctx_cs,
         else
                 radeon_set_context_reg(ctx_cs, R_028B58_VGT_LS_HS_CONFIG,
                                        tess->ls_hs_config);
+
+       if (pipeline->device->physical_device->rad_info.chip_class >= GFX10 &&
+           !radv_pipeline_has_gs(pipeline) && !radv_pipeline_has_ngg(pipeline)) {
+               radeon_set_context_reg(ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL,
+                                      S_028A44_ES_VERTS_PER_SUBGRP(250) |
+                                      S_028A44_GS_PRIMS_PER_SUBGRP(126) |
+                                      S_028A44_GS_INST_PRIMS_IN_SUBGRP(126));
+       }
  }
  
  static void
@@ -4025,7 +4054,7 @@ radv_compute_db_shader_control(const struct radv_device *device,
         else
                 z_order = V_02880C_LATE_Z;
  
-       bool disable_rbplus = device->physical_device->has_rbplus &&
+       bool disable_rbplus = device->physical_device->rad_info.has_rbplus &&
                               !device->physical_device->rbplus_allowed;
  
         /* It shouldn't be needed to export gl_SampleMask when MSAA is disabled
@@ -4349,7 +4378,7 @@ radv_compute_ia_multi_vgt_param_helpers(struct radv_pipeline *pipeline,
                     radv_pipeline_has_gs(pipeline))
                         ia_multi_vgt_param.partial_vs_wave = true;
                 /* Needed for 028B6C_DISTRIBUTION_MODE != 0 */
-               if (device->has_distributed_tess) {
+               if (device->physical_device->rad_info.has_distributed_tess) {
                         if (radv_pipeline_has_gs(pipeline)) {
                                 if (device->physical_device->rad_info.chip_class <= GFX8)
                                         ia_multi_vgt_param.partial_es_wave = true;
@@ -4921,6 +4950,109 @@ VkResult radv_GetPipelineExecutablePropertiesKHR(
         return result;
  }
  
+VkResult radv_GetPipelineExecutableStatisticsKHR(
+    VkDevice                                    _device,
+    const VkPipelineExecutableInfoKHR*          pExecutableInfo,
+    uint32_t*                                   pStatisticCount,
+    VkPipelineExecutableStatisticKHR*           pStatistics)
+{
+       RADV_FROM_HANDLE(radv_device, device, _device);
+       RADV_FROM_HANDLE(radv_pipeline, pipeline, pExecutableInfo->pipeline);
+       gl_shader_stage stage;
+       struct radv_shader_variant *shader = radv_get_shader_from_executable_index(pipeline, pExecutableInfo->executableIndex, &stage);
+
+       enum chip_class chip_class = device->physical_device->rad_info.chip_class;
+       unsigned lds_increment = chip_class >= GFX7 ? 512 : 256;
+       unsigned max_waves = radv_get_max_waves(device, shader, stage);
+
+       VkPipelineExecutableStatisticKHR *s = pStatistics;
+       VkPipelineExecutableStatisticKHR *end = s + (pStatistics ? *pStatisticCount : 0);
+       VkResult result = VK_SUCCESS;
+
+       if (s < end) {
+               desc_copy(s->name, "SGPRs");
+               desc_copy(s->description, "Number of SGPR registers allocated per subgroup");
+               s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+               s->value.u64 = shader->config.num_sgprs;
+       }
+       ++s;
+
+       if (s < end) {
+               desc_copy(s->name, "VGPRs");
+               desc_copy(s->description, "Number of VGPR registers allocated per subgroup");
+               s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+               s->value.u64 = shader->config.num_vgprs;
+       }
+       ++s;
+
+       if (s < end) {
+               desc_copy(s->name, "Spilled SGPRs");
+               desc_copy(s->description, "Number of SGPR registers spilled per subgroup");
+               s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+               s->value.u64 = shader->config.spilled_sgprs;
+       }
+       ++s;
+
+       if (s < end) {
+               desc_copy(s->name, "Spilled VGPRs");
+               desc_copy(s->description, "Number of VGPR registers spilled per subgroup");
+               s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+               s->value.u64 = shader->config.spilled_vgprs;
+       }
+       ++s;
+
+       if (s < end) {
+               desc_copy(s->name, "PrivMem VGPRs");
+               desc_copy(s->description, "Number of VGPRs stored in private memory per subgroup");
+               s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+               s->value.u64 = shader->info.private_mem_vgprs;
+       }
+       ++s;
+
+       if (s < end) {
+               desc_copy(s->name, "Code size");
+               desc_copy(s->description, "Code size in bytes");
+               s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+               s->value.u64 = shader->code_size;
+       }
+       ++s;
+
+       if (s < end) {
+               desc_copy(s->name, "LDS size");
+               desc_copy(s->description, "LDS size in bytes per workgroup");
+               s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+               s->value.u64 = shader->config.lds_size * lds_increment;
+       }
+       ++s;
+
+       if (s < end) {
+               desc_copy(s->name, "Scratch size");
+               desc_copy(s->description, "Private memory in bytes per subgroup");
+               s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+               s->value.u64 = shader->config.scratch_bytes_per_wave;
+       }
+       ++s;
+
+       if (s < end) {
+               desc_copy(s->name, "Subgroups per SIMD");
+               desc_copy(s->description, "The maximum number of subgroups in flight on a SIMD unit");
+               s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+               s->value.u64 = max_waves;
+       }
+       ++s;
+
+       if (!pStatistics)
+               *pStatisticCount = s - pStatistics;
+       else if (s > end) {
+               *pStatisticCount = end - pStatistics;
+               result = VK_INCOMPLETE;
+       } else {
+               *pStatisticCount = s - pStatistics;
+       }
+
+       return result;
+}
+
  static VkResult radv_copy_representation(void *data, size_t *data_size, const char *src)
  {
         size_t total_size  = strlen(src) + 1;