}
NIR_PASS(progress, shader, nir_opt_undef);
- NIR_PASS(progress, shader, nir_opt_conditional_discard);
if (shader->options->max_unroll_iterations) {
NIR_PASS(progress, shader, nir_opt_loop_unroll, 0);
}
} while (progress && !optimize_conservatively);
+ NIR_PASS(progress, shader, nir_opt_conditional_discard);
NIR_PASS(progress, shader, nir_opt_shrink_load);
NIR_PASS(progress, shader, nir_opt_move_load_ubo);
}
.int64_atomics = true,
.multiview = true,
.physical_storage_buffer_address = true,
+ .post_depth_coverage = true,
.runtime_descriptor_array = true,
.shader_viewport_index_layer = true,
.stencil_export = true,
break;
}
- if (pdevice->rad_info.chip_class >= GFX10 &&
+ if (pdevice->rad_info.chip_class >= GFX10 && info->is_ngg &&
(stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL || stage == MESA_SHADER_GEOMETRY)) {
unsigned gs_vgpr_comp_cnt, es_vgpr_comp_cnt;
gl_shader_stage es_stage = stage;
} else if (es_stage == MESA_SHADER_TESS_EVAL) {
bool enable_prim_id = info->tes.export_prim_id || info->info.uses_prim_id;
es_vgpr_comp_cnt = enable_prim_id ? 3 : 2;
- }
+ } else
+ unreachable("Unexpected ES shader stage");
bool tes_triangles = stage == MESA_SHADER_TESS_EVAL &&
info->tes.primitive_mode >= 4; /* GL_TRIANGLES */
config_out->rsrc1 |= S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt) |
S_00B228_WGP_MODE(1);
config_out->rsrc2 |= S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) |
- S_00B22C_LDS_SIZE(config_in->lds_size);
+ S_00B22C_LDS_SIZE(config_in->lds_size) |
+ S_00B22C_OC_LDS_EN(es_stage == MESA_SHADER_TESS_EVAL);
} else if (pdevice->rad_info.chip_class >= GFX9 &&
stage == MESA_SHADER_GEOMETRY) {
unsigned es_type = info->gs.es_type;
unsigned num_lds_symbols = 0;
const char *elf_data = (const char *)((struct radv_shader_binary_rtld *)binary)->data;
size_t elf_size = ((struct radv_shader_binary_rtld *)binary)->elf_size;
+ unsigned esgs_ring_size = 0;
if (device->physical_device->rad_info.chip_class >= GFX9 &&
binary->stage == MESA_SHADER_GEOMETRY && !binary->is_gs_copy_shader) {
+ /* TODO: Do not hardcode this value */
+ esgs_ring_size = 32 * 1024;
+ }
+
+ if (binary->variant_info.is_ngg) {
+ /* GS stores Primitive IDs into LDS at the address
+ * corresponding to the ES thread of the provoking
+ * vertex. All ES threads load and export PrimitiveID
+ * for their thread.
+ */
+ if (binary->stage == MESA_SHADER_VERTEX &&
+ binary->variant_info.vs.export_prim_id) {
+ /* TODO: Do not harcode this value */
+ esgs_ring_size = 256 /* max_out_verts */ * 4;
+ }
+ }
+
+ if (esgs_ring_size) {
/* We add this symbol even on LLVM <= 8 to ensure that
* shader->config.lds_size is set correctly below.
*/
struct ac_rtld_symbol *sym = &lds_symbols[num_lds_symbols++];
sym->name = "esgs_ring";
- sym->size = 32 * 1024;
+ sym->size = esgs_ring_size;
sym->align = 64 * 1024;
/* Make sure to have LDS space for NGG scratch. */
struct ac_rtld_open_info open_info = {
.info = &device->physical_device->rad_info,
.shader_type = binary->stage,
+ .wave_size = 64,
.num_parts = 1,
.elf_ptrs = &elf_data,
.elf_sizes = &elf_size,
return NULL;
}
- const char *disasm_data;
- size_t disasm_size;
- if (!ac_rtld_get_section_by_name(&rtld_binary, ".AMDGPU.disasm", &disasm_data, &disasm_size)) {
- radv_shader_variant_destroy(device, variant);
- ac_rtld_close(&rtld_binary);
- return NULL;
- }
+ if (device->keep_shader_info ||
+ (device->instance->debug_flags & RADV_DEBUG_DUMP_SHADERS)) {
+ const char *disasm_data;
+ size_t disasm_size;
+ if (!ac_rtld_get_section_by_name(&rtld_binary, ".AMDGPU.disasm", &disasm_data, &disasm_size)) {
+ radv_shader_variant_destroy(device, variant);
+ ac_rtld_close(&rtld_binary);
+ return NULL;
+ }
- variant->llvm_ir_string = bin->llvm_ir_size ? strdup((const char*)(bin->data + bin->elf_size)) : NULL;
- variant->disasm_string = malloc(disasm_size + 1);
- memcpy(variant->disasm_string, disasm_data, disasm_size);
- variant->disasm_string[disasm_size] = 0;
+ variant->llvm_ir_string = bin->llvm_ir_size ? strdup((const char*)(bin->data + bin->elf_size)) : NULL;
+ variant->disasm_string = malloc(disasm_size + 1);
+ memcpy(variant->disasm_string, disasm_data, disasm_size);
+ variant->disasm_string[disasm_size] = 0;
+ }
ac_rtld_close(&rtld_binary);
} else {
lds_increment);
} else if (stage == MESA_SHADER_COMPUTE) {
unsigned max_workgroup_size =
- radv_nir_get_max_workgroup_size(chip_class, variant->nir);
+ radv_nir_get_max_workgroup_size(chip_class, stage, variant->nir);
lds_per_wave = (conf->lds_size * lds_increment) /
DIV_ROUND_UP(max_workgroup_size, 64);
}