*/
nir_lower_var_copies(nir);
+ /* Lower large variables that are always constant with load_constant
+ * intrinsics, which get turned into PC-relative loads from a data
+ * section next to the shader.
+ */
+ NIR_PASS_V(nir, nir_opt_large_constants,
+ glsl_get_natural_size_align_bytes, 16);
+
/* Indirect lowering must be called after the radv_optimize_nir() loop
* has been called at least once. Otherwise indirect lowering can
* bloat the instruction count of the loop and cause it to be
return nir;
}
-static void mark_16bit_fs_input(struct radv_shader_variant_info *shader_info,
- const struct glsl_type *type,
- int location)
-{
- if (glsl_type_is_scalar(type) || glsl_type_is_vector(type) || glsl_type_is_matrix(type)) {
- unsigned attrib_count = glsl_count_attribute_slots(type, false);
- if (glsl_type_is_16bit(type)) {
- shader_info->fs.float16_shaded_mask |= ((1ull << attrib_count) - 1) << location;
- }
- } else if (glsl_type_is_array(type)) {
- unsigned stride = glsl_count_attribute_slots(glsl_get_array_element(type), false);
- for (unsigned i = 0; i < glsl_get_length(type); ++i) {
- mark_16bit_fs_input(shader_info, glsl_get_array_element(type), location + i * stride);
- }
- } else {
- assert(glsl_type_is_struct_or_ifc(type));
- for (unsigned i = 0; i < glsl_get_length(type); i++) {
- mark_16bit_fs_input(shader_info, glsl_get_struct_field(type, i), location);
- location += glsl_count_attribute_slots(glsl_get_struct_field(type, i), false);
- }
- }
-}
-
-static void
-handle_fs_input_decl(struct radv_shader_variant_info *shader_info,
- struct nir_variable *variable)
-{
- unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
-
- if (variable->data.compact) {
- unsigned component_count = variable->data.location_frac +
- glsl_get_length(variable->type);
- attrib_count = (component_count + 3) / 4;
- } else {
- mark_16bit_fs_input(shader_info, variable->type,
- variable->data.driver_location);
- }
-
- uint64_t mask = ((1ull << attrib_count) - 1);
-
- if (variable->data.interpolation == INTERP_MODE_FLAT)
- shader_info->fs.flat_shaded_mask |= mask << variable->data.driver_location;
-
- if (variable->data.location >= VARYING_SLOT_VAR0)
- shader_info->fs.input_mask |= mask << (variable->data.location - VARYING_SLOT_VAR0);
-}
-
static int
type_size_vec4(const struct glsl_type *type, bool bindless)
{
return progress;
}
-/* Gather information needed to setup the vs<->ps linking registers in
- * radv_pipeline_generate_ps_inputs().
- */
-
-static void
-handle_fs_inputs(nir_shader *nir, struct radv_shader_variant_info *shader_info)
-{
- shader_info->fs.num_interp = nir->num_inputs;
-
- nir_foreach_variable(variable, &nir->inputs)
- handle_fs_input_decl(shader_info, variable);
-}
-
-static void
-lower_fs_io(nir_shader *nir, struct radv_shader_variant_info *shader_info)
+void
+radv_lower_fs_io(nir_shader *nir)
{
NIR_PASS_V(nir, lower_view_index);
nir_assign_io_var_locations(&nir->inputs, &nir->num_inputs,
MESA_SHADER_FRAGMENT);
- handle_fs_inputs(nir, shader_info);
-
NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in, type_size_vec4, 0);
/* This pass needs actual constants */
}
}
-static void radv_init_llvm_target()
-{
- LLVMInitializeAMDGPUTargetInfo();
- LLVMInitializeAMDGPUTarget();
- LLVMInitializeAMDGPUTargetMC();
- LLVMInitializeAMDGPUAsmPrinter();
-
- /* For inline assembly. */
- LLVMInitializeAMDGPUAsmParser();
-
- /* Workaround for bug in llvm 4.0 that causes image intrinsics
- * to disappear.
- * https://reviews.llvm.org/D26348
- *
- * Workaround for bug in llvm that causes the GPU to hang in presence
- * of nested loops because there is an exec mask issue. The proper
- * solution is to fix LLVM but this might require a bunch of work.
- * https://bugs.llvm.org/show_bug.cgi?id=37744
- *
- * "mesa" is the prefix for error messages.
- */
- const char *argv[2] = { "mesa", "-simplifycfg-sink-common=false" };
- LLVMParseCommandLineOptions(2, argv, NULL);
-}
-
-static once_flag radv_init_llvm_target_once_flag = ONCE_FLAG_INIT;
-
-static void radv_init_llvm_once(void)
-{
- call_once(&radv_init_llvm_target_once_flag, radv_init_llvm_target);
-}
-
struct radv_shader_variant *
radv_shader_variant_create(struct radv_device *device,
const struct radv_shader_binary *binary,
}
variant->code_size = rtld_binary.rx_size;
+ variant->exec_size = rtld_binary.exec_size;
} else {
assert(binary->type == RADV_BINARY_TYPE_LEGACY);
config = ((struct radv_shader_binary_legacy *)binary)->config;
- variant->code_size = radv_get_shader_binary_size(((struct radv_shader_binary_legacy *)binary)->code_size);
+ variant->code_size = radv_get_shader_binary_size(((struct radv_shader_binary_legacy *)binary)->code_size);
+ variant->exec_size = variant->code_size;
}
variant->info = binary->variant_info;
struct radv_shader_variant_info variant_info = {0};
bool thread_compiler;
- if (shaders[0]->info.stage == MESA_SHADER_FRAGMENT)
- lower_fs_io(shaders[0], &variant_info);
-
options->family = chip_family;
options->chip_class = device->physical_device->rad_info.chip_class;
options->dump_shader = radv_can_dump_shader(device, module, gs_copy_shader);
tm_options |= AC_TM_NO_LOAD_STORE_OPT;
thread_compiler = !(device->instance->debug_flags & RADV_DEBUG_NOTHREADLLVM);
- radv_init_llvm_once();
+ ac_init_llvm_once();
radv_init_llvm_compiler(&ac_llvm,
thread_compiler,
chip_family, tm_options,
if (stage == MESA_SHADER_FRAGMENT) {
lds_per_wave = conf->lds_size * lds_increment +
- align(variant->info.fs.num_interp * 48,
+ align(variant->info.info.ps.num_interp * 48,
lds_increment);
} else if (stage == MESA_SHADER_COMPUTE) {
unsigned max_workgroup_size =
"********************\n\n\n",
conf->num_sgprs, conf->num_vgprs,
conf->spilled_sgprs, conf->spilled_vgprs,
- variant->info.private_mem_vgprs, variant->code_size,
+ variant->info.private_mem_vgprs, variant->exec_size,
conf->lds_size, conf->scratch_bytes_per_wave,
max_simd_waves);
}