radv: fix dumping disassembly with RADV_DEBUG=shaders

[mesa.git] / src / amd / vulkan / radv_shader.c
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c

index d055b6c96ca253aa928fe52bdbfc109a9eb024e4..736388c555cb871f30b099cd990404375c6a2cd4 100644 (file)
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -194,12 +194,12 @@ radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively,
                  }
  
                  NIR_PASS(progress, shader, nir_opt_undef);
-                NIR_PASS(progress, shader, nir_opt_conditional_discard);
                  if (shader->options->max_unroll_iterations) {
                          NIR_PASS(progress, shader, nir_opt_loop_unroll, 0);
                  }
          } while (progress && !optimize_conservatively);
  
+       NIR_PASS(progress, shader, nir_opt_conditional_discard);
          NIR_PASS(progress, shader, nir_opt_shrink_load);
          NIR_PASS(progress, shader, nir_opt_move_load_ubo);
  }
@@ -270,6 +270,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
                                 .int64_atomics = true,
                                 .multiview = true,
                                 .physical_storage_buffer_address = true,
+                               .post_depth_coverage = true,
                                 .runtime_descriptor_array = true,
                                 .shader_viewport_index_layer = true,
                                 .stencil_export = true,
@@ -796,7 +797,7 @@ static void radv_postprocess_config(const struct radv_physical_device *pdevice,
                 break;
         }
  
-       if (pdevice->rad_info.chip_class >= GFX10 &&
+       if (pdevice->rad_info.chip_class >= GFX10 && info->is_ngg &&
             (stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL || stage == MESA_SHADER_GEOMETRY)) {
                 unsigned gs_vgpr_comp_cnt, es_vgpr_comp_cnt;
                 gl_shader_stage es_stage = stage;
@@ -809,7 +810,8 @@ static void radv_postprocess_config(const struct radv_physical_device *pdevice,
                 } else if (es_stage == MESA_SHADER_TESS_EVAL) {
                         bool enable_prim_id = info->tes.export_prim_id || info->info.uses_prim_id;
                         es_vgpr_comp_cnt = enable_prim_id ? 3 : 2;
-               }
+               } else
+                       unreachable("Unexpected ES shader stage");
  
                 bool tes_triangles = stage == MESA_SHADER_TESS_EVAL &&
                         info->tes.primitive_mode >= 4; /* GL_TRIANGLES */
@@ -826,7 +828,8 @@ static void radv_postprocess_config(const struct radv_physical_device *pdevice,
                 config_out->rsrc1 |= S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt) |
                                      S_00B228_WGP_MODE(1);
                 config_out->rsrc2 |= S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) |
-                                    S_00B22C_LDS_SIZE(config_in->lds_size);
+                                    S_00B22C_LDS_SIZE(config_in->lds_size) |
+                                    S_00B22C_OC_LDS_EN(es_stage == MESA_SHADER_TESS_EVAL);
         } else if (pdevice->rad_info.chip_class >= GFX9 &&
                    stage == MESA_SHADER_GEOMETRY) {
                 unsigned es_type = info->gs.es_type;
@@ -921,15 +924,34 @@ radv_shader_variant_create(struct radv_device *device,
                 unsigned num_lds_symbols = 0;
                 const char *elf_data = (const char *)((struct radv_shader_binary_rtld *)binary)->data;
                 size_t elf_size = ((struct radv_shader_binary_rtld *)binary)->elf_size;
+               unsigned esgs_ring_size = 0;
  
                 if (device->physical_device->rad_info.chip_class >= GFX9 &&
                     binary->stage == MESA_SHADER_GEOMETRY && !binary->is_gs_copy_shader) {
+                       /* TODO: Do not hardcode this value */
+                       esgs_ring_size = 32 * 1024;
+               }
+
+               if (binary->variant_info.is_ngg) {
+                       /* GS stores Primitive IDs into LDS at the address
+                        * corresponding to the ES thread of the provoking
+                        * vertex. All ES threads load and export PrimitiveID
+                        * for their thread.
+                        */
+                       if (binary->stage == MESA_SHADER_VERTEX &&
+                           binary->variant_info.vs.export_prim_id) {
+                               /* TODO: Do not harcode this value */
+                               esgs_ring_size = 256 /* max_out_verts */ * 4;
+                       }
+               }
+
+               if (esgs_ring_size) {
                         /* We add this symbol even on LLVM <= 8 to ensure that
                          * shader->config.lds_size is set correctly below.
                          */
                         struct ac_rtld_symbol *sym = &lds_symbols[num_lds_symbols++];
                         sym->name = "esgs_ring";
-                       sym->size = 32 * 1024;
+                       sym->size = esgs_ring_size;
                         sym->align = 64 * 1024;
  
                         /* Make sure to have LDS space for NGG scratch. */
@@ -940,6 +962,7 @@ radv_shader_variant_create(struct radv_device *device,
                 struct ac_rtld_open_info open_info = {
                         .info = &device->physical_device->rad_info,
                         .shader_type = binary->stage,
+                       .wave_size = 64,
                         .num_parts = 1,
                         .elf_ptrs = &elf_data,
                         .elf_sizes = &elf_size,
@@ -990,18 +1013,21 @@ radv_shader_variant_create(struct radv_device *device,
                         return NULL;
                 }
  
-               const char *disasm_data;
-               size_t disasm_size;
-               if (!ac_rtld_get_section_by_name(&rtld_binary, ".AMDGPU.disasm", &disasm_data, &disasm_size)) {
-                       radv_shader_variant_destroy(device, variant);
-                       ac_rtld_close(&rtld_binary);
-                       return NULL;
-               }
+               if (device->keep_shader_info ||
+                   (device->instance->debug_flags & RADV_DEBUG_DUMP_SHADERS)) {
+                       const char *disasm_data;
+                       size_t disasm_size;
+                       if (!ac_rtld_get_section_by_name(&rtld_binary, ".AMDGPU.disasm", &disasm_data, &disasm_size)) {
+                               radv_shader_variant_destroy(device, variant);
+                               ac_rtld_close(&rtld_binary);
+                               return NULL;
+                       }
  
-               variant->llvm_ir_string = bin->llvm_ir_size ? strdup((const char*)(bin->data + bin->elf_size)) : NULL;
-               variant->disasm_string = malloc(disasm_size + 1);
-               memcpy(variant->disasm_string, disasm_data, disasm_size);
-               variant->disasm_string[disasm_size] = 0;
+                       variant->llvm_ir_string = bin->llvm_ir_size ? strdup((const char*)(bin->data + bin->elf_size)) : NULL;
+                       variant->disasm_string = malloc(disasm_size + 1);
+                       memcpy(variant->disasm_string, disasm_data, disasm_size);
+                       variant->disasm_string[disasm_size] = 0;
+               }
  
                 ac_rtld_close(&rtld_binary);
         } else {
@@ -1211,7 +1237,7 @@ generate_shader_stats(struct radv_device *device,
                                      lds_increment);
         } else if (stage == MESA_SHADER_COMPUTE) {
                 unsigned max_workgroup_size =
-                               radv_nir_get_max_workgroup_size(chip_class, variant->nir);
+                       radv_nir_get_max_workgroup_size(chip_class, stage, variant->nir);
                 lds_per_wave = (conf->lds_size * lds_increment) /
                                DIV_ROUND_UP(max_workgroup_size, 64);
         }