radv: add an assertion in radv_gfx10_compute_bin_size()

[mesa.git] / src / amd / vulkan / radv_pipeline.c
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c

index 5036fa69d2084e269ecb4f0f44f0c6ac5f42c15c..9c83e22fda2c197bda5f2df7b43f0898614cccf8 100644 (file)
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -167,6 +167,8 @@ static uint32_t get_hash_flags(struct radv_device *device)
                 hash_flags |= RADV_HASH_SHADER_PS_WAVE32;
         if (device->physical_device->ge_wave_size == 32)
                 hash_flags |= RADV_HASH_SHADER_GE_WAVE32;
+       if (device->physical_device->use_aco)
+               hash_flags |= RADV_HASH_SHADER_ACO;
         return hash_flags;
  }
  
@@ -2261,6 +2263,9 @@ radv_generate_graphics_pipeline_key(struct radv_pipeline *pipeline,
         if (pipeline->device->physical_device->rad_info.chip_class < GFX8)
                 radv_pipeline_compute_get_int_clamp(pCreateInfo, &key.is_int8, &key.is_int10);
  
+       if (pipeline->device->physical_device->rad_info.chip_class >= GFX10)
+               key.topology = pCreateInfo->pInputAssemblyState->topology;
+
         return key;
  }
  
@@ -2290,6 +2295,7 @@ radv_fill_shader_keys(struct radv_device *device,
                 keys[MESA_SHADER_VERTEX].vs.vertex_attribute_offsets[i] = key->vertex_attribute_offsets[i];
                 keys[MESA_SHADER_VERTEX].vs.vertex_attribute_strides[i] = key->vertex_attribute_strides[i];
         }
+       keys[MESA_SHADER_VERTEX].vs.outprim = si_conv_prim_to_gs_out(key->topology);
  
         if (nir[MESA_SHADER_TESS_CTRL]) {
                 keys[MESA_SHADER_VERTEX].vs_common_out.as_ls = true;
@@ -2307,9 +2313,7 @@ radv_fill_shader_keys(struct radv_device *device,
                         keys[MESA_SHADER_VERTEX].vs_common_out.as_es = true;
         }
  
-       if (device->physical_device->rad_info.chip_class >= GFX10 &&
-           device->physical_device->rad_info.family != CHIP_NAVI14 &&
-           !(device->instance->debug_flags & RADV_DEBUG_NO_NGG)) {
+       if (device->physical_device->use_ngg) {
                 if (nir[MESA_SHADER_TESS_CTRL]) {
                         keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = true;
                 } else {
@@ -2551,6 +2555,14 @@ void radv_stop_feedback(VkPipelineCreationFeedbackEXT *feedback, bool cache_hit)
                            (cache_hit ? VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT : 0);
  }
  
+static
+bool radv_aco_supported_stage(gl_shader_stage stage, bool has_gs, bool has_ts)
+{
+       return (stage == MESA_SHADER_VERTEX && !has_gs && !has_ts) ||
+              stage == MESA_SHADER_FRAGMENT ||
+              stage == MESA_SHADER_COMPUTE;
+}
+
  static
  void radv_create_shaders(struct radv_pipeline *pipeline,
                           struct radv_device *device,
@@ -2613,6 +2625,10 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
                 modules[MESA_SHADER_FRAGMENT] = &fs_m;
         }
  
+       bool has_gs = modules[MESA_SHADER_GEOMETRY];
+       bool has_ts = modules[MESA_SHADER_TESS_CTRL] || modules[MESA_SHADER_TESS_EVAL];
+       bool use_aco = device->physical_device->use_aco;
+
         for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) {
                 const VkPipelineShaderStageCreateInfo *stage = pStages[i];
  
@@ -2621,10 +2637,11 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
  
                 radv_start_feedback(stage_feedbacks[i]);
  
+               bool aco = use_aco && radv_aco_supported_stage(i, has_gs, has_ts);
                 nir[i] = radv_shader_compile_to_nir(device, modules[i],
                                                     stage ? stage->pName : "main", i,
                                                     stage ? stage->pSpecializationInfo : NULL,
-                                                   flags, pipeline->layout);
+                                                   flags, pipeline->layout, aco);
  
                 /* We don't want to alter meta shaders IR directly so clone it
                  * first.
@@ -2651,7 +2668,10 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
                                            nir_lower_non_uniform_ssbo_access |
                                            nir_lower_non_uniform_texture_access |
                                            nir_lower_non_uniform_image_access);
-                       NIR_PASS_V(nir[i], nir_lower_bool_to_int32);
+
+                       bool aco = use_aco && radv_aco_supported_stage(i, has_gs, has_ts);
+                       if (!aco)
+                               NIR_PASS_V(nir[i], nir_lower_bool_to_int32);
                 }
  
                 if (radv_can_dump_shader(device, modules[i], false))
@@ -2690,11 +2710,13 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
                 if (!pipeline->shaders[MESA_SHADER_FRAGMENT]) {
                         radv_start_feedback(stage_feedbacks[MESA_SHADER_FRAGMENT]);
  
+                       bool aco = use_aco && radv_aco_supported_stage(MESA_SHADER_FRAGMENT, has_gs, has_ts);
                         pipeline->shaders[MESA_SHADER_FRAGMENT] =
                                radv_shader_variant_compile(device, modules[MESA_SHADER_FRAGMENT], &nir[MESA_SHADER_FRAGMENT], 1,
                                                           pipeline->layout, keys + MESA_SHADER_FRAGMENT,
                                                           infos + MESA_SHADER_FRAGMENT,
-                                                         keep_executable_info, &binaries[MESA_SHADER_FRAGMENT]);
+                                                         keep_executable_info, aco,
+                                                         &binaries[MESA_SHADER_FRAGMENT]);
  
                         radv_stop_feedback(stage_feedbacks[MESA_SHADER_FRAGMENT], false);
                 }
@@ -2725,7 +2747,7 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
                         pipeline->shaders[MESA_SHADER_TESS_CTRL] = radv_shader_variant_compile(device, modules[MESA_SHADER_TESS_CTRL], combined_nir, 2,
                                                                                               pipeline->layout,
                                                                                               &key, &infos[MESA_SHADER_TESS_CTRL], keep_executable_info,
-                                                                                             &binaries[MESA_SHADER_TESS_CTRL]);
+                                                                                             false, &binaries[MESA_SHADER_TESS_CTRL]);
  
                         radv_stop_feedback(stage_feedbacks[MESA_SHADER_TESS_CTRL], false);
                 }
@@ -2744,7 +2766,7 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
                         pipeline->shaders[MESA_SHADER_GEOMETRY] = radv_shader_variant_compile(device, modules[MESA_SHADER_GEOMETRY], combined_nir, 2,
                                                                                              pipeline->layout,
                                                                                              &keys[pre_stage], &infos[MESA_SHADER_GEOMETRY], keep_executable_info,
-                                                                                            &binaries[MESA_SHADER_GEOMETRY]);
+                                                                                            false, &binaries[MESA_SHADER_GEOMETRY]);
  
                         radv_stop_feedback(stage_feedbacks[MESA_SHADER_GEOMETRY], false);
                 }
@@ -2763,10 +2785,11 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
  
                         radv_start_feedback(stage_feedbacks[i]);
  
+                       bool aco = use_aco && radv_aco_supported_stage(i, has_gs, has_ts);
                         pipeline->shaders[i] = radv_shader_variant_compile(device, modules[i], &nir[i], 1,
                                                                           pipeline->layout,
                                                                           keys + i, infos + i,keep_executable_info,
-                                                                         &binaries[i]);
+                                                                         aco, &binaries[i]);
  
                         radv_stop_feedback(stage_feedbacks[i], false);
                 }
@@ -3203,6 +3226,7 @@ radv_gfx10_compute_bin_size(struct radv_pipeline *pipeline, const VkGraphicsPipe
                         color_bytes_per_pixel += vk_format_get_blocksize(format);
  
                         if (total_samples > 1) {
+                               assert(samples_log <= 3);
                                 const unsigned fmask_array[] = {0, 1, 1, 4};
                                 fmask_bytes_per_pixel += fmask_array[samples_log];
                         }
@@ -3372,6 +3396,7 @@ radv_pipeline_generate_depth_stencil_state(struct radeon_cmdbuf *ctx_cs,
         const VkPipelineDepthStencilStateCreateInfo *vkds = pCreateInfo->pDepthStencilState;
         RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
         struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
+       struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
         struct radv_render_pass_attachment *attachment = NULL;
         uint32_t db_depth_control = 0, db_stencil_control = 0;
         uint32_t db_render_control = 0, db_render_override2 = 0;
@@ -3420,7 +3445,8 @@ radv_pipeline_generate_depth_stencil_state(struct radeon_cmdbuf *ctx_cs,
         db_render_override |= S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
                               S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE);
  
-       if (!pCreateInfo->pRasterizationState->depthClampEnable) {
+       if (!pCreateInfo->pRasterizationState->depthClampEnable &&
+           ps->info.ps.writes_z) {
                 /* From VK_EXT_depth_range_unrestricted spec:
                  *
                  * "The behavior described in Primitive Clipping still applies.
@@ -4810,8 +4836,8 @@ radv_compute_generate_pm4(struct radv_pipeline *pipeline)
         unsigned max_waves_per_sh = 0;
         uint64_t va;
  
-       pipeline->cs.buf = malloc(20 * 4);
-       pipeline->cs.max_dw = 20;
+       pipeline->cs.max_dw = device->physical_device->rad_info.chip_class >= GFX10 ? 22 : 20;
+       pipeline->cs.buf = malloc(pipeline->cs.max_dw * 4);
  
         compute_shader = pipeline->shaders[MESA_SHADER_COMPUTE];
         va = radv_buffer_get_va(compute_shader->bo) + compute_shader->bo_offset;
@@ -4823,6 +4849,9 @@ radv_compute_generate_pm4(struct radv_pipeline *pipeline)
         radeon_set_sh_reg_seq(&pipeline->cs, R_00B848_COMPUTE_PGM_RSRC1, 2);
         radeon_emit(&pipeline->cs, compute_shader->config.rsrc1);
         radeon_emit(&pipeline->cs, compute_shader->config.rsrc2);
+       if (device->physical_device->rad_info.chip_class >= GFX10) {
+               radeon_set_sh_reg(&pipeline->cs, R_00B8A0_COMPUTE_PGM_RSRC3, compute_shader->config.rsrc3);
+       }
  
         radeon_set_sh_reg(&pipeline->cs, R_00B860_COMPUTE_TMPRING_SIZE,
                           S_00B860_WAVES(pipeline->max_waves) |
@@ -5220,12 +5249,17 @@ VkResult radv_GetPipelineExecutableInternalRepresentationsKHR(
         }
         ++p;
  
-       /* LLVM IR */
+       /* backend IR */
         if (p < end) {
                 p->isText = true;
-               desc_copy(p->name, "LLVM IR");
-               desc_copy(p->description, "The LLVM IR after some optimizations");
-               if (radv_copy_representation(p->pData, &p->dataSize, shader->llvm_ir_string) != VK_SUCCESS)
+               if (shader->aco_used) {
+                       desc_copy(p->name, "ACO IR");
+                       desc_copy(p->description, "The ACO IR after some optimizations");
+               } else {
+                       desc_copy(p->name, "LLVM IR");
+                       desc_copy(p->description, "The LLVM IR after some optimizations");
+               }
+               if (radv_copy_representation(p->pData, &p->dataSize, shader->ir_string) != VK_SUCCESS)
                         result = VK_INCOMPLETE;
         }
         ++p;