#include <llvm-c/Core.h>
#include <llvm-c/TargetMachine.h>
+#include <llvm-c/Support.h>
#include "sid.h"
#include "gfx9d.h"
sizeof(*module) + pCreateInfo->codeSize, 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (module == NULL)
- return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
module->nir = NULL;
module->size = pCreateInfo->codeSize;
}
void
-radv_optimize_nir(struct nir_shader *shader)
+radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively)
{
bool progress;
if (shader->options->max_unroll_iterations) {
NIR_PASS(progress, shader, nir_opt_loop_unroll, 0);
}
- } while (progress);
+ } while (progress && !optimize_conservatively);
NIR_PASS(progress, shader, nir_opt_shrink_load);
NIR_PASS(progress, shader, nir_opt_move_load_ubo);
struct radv_shader_module *module,
const char *entrypoint_name,
gl_shader_stage stage,
- const VkSpecializationInfo *spec_info)
+ const VkSpecializationInfo *spec_info,
+ const VkPipelineCreateFlags flags)
{
- if (strcmp(entrypoint_name, "main") != 0) {
- radv_finishme("Multiple shaders per module not really supported");
- }
-
nir_shader *nir;
nir_function *entry_point;
if (module->nir) {
nir_lower_tex(nir, &tex_options);
nir_lower_vars_to_ssa(nir);
+
+ if (nir->info.stage == MESA_SHADER_VERTEX ||
+ nir->info.stage == MESA_SHADER_GEOMETRY) {
+ NIR_PASS_V(nir, nir_lower_io_to_temporaries,
+ nir_shader_get_entrypoint(nir), true, true);
+ } else if (nir->info.stage == MESA_SHADER_TESS_EVAL||
+ nir->info.stage == MESA_SHADER_FRAGMENT) {
+ NIR_PASS_V(nir, nir_lower_io_to_temporaries,
+ nir_shader_get_entrypoint(nir), true, false);
+ }
+
+ nir_split_var_copies(nir);
nir_lower_var_copies(nir);
+
nir_lower_global_vars_to_local(nir);
nir_remove_dead_variables(nir, nir_var_local);
nir_lower_subgroups(nir, &(struct nir_lower_subgroups_options) {
.lower_vote_eq_to_ballot = 1,
});
- radv_optimize_nir(nir);
+ if (!(flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT))
+ radv_optimize_nir(nir, false);
/* Indirect lowering must be called after the radv_optimize_nir() loop
* has been called at least once. Otherwise indirect lowering can
* considered too large for unrolling.
*/
ac_lower_indirect_derefs(nir, device->physical_device->rad_info.chip_class);
- radv_optimize_nir(nir);
+ radv_optimize_nir(nir, flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT);
return nir;
}
struct radv_shader_info *info = &variant->info.info;
unsigned vgpr_comp_cnt = 0;
- if (scratch_enabled && !device->llvm_supports_spill)
- radv_finishme("shader scratch support only available with LLVM 4.0");
-
variant->code_size = binary->code_size;
variant->rsrc2 = S_00B12C_USER_SGPR(variant->info.num_user_sgprs) |
S_00B12C_SCRATCH_EN(scratch_enabled);
memcpy(ptr, binary->code, binary->code_size);
}
+static void radv_init_llvm_target()
+{
+ LLVMInitializeAMDGPUTargetInfo();
+ LLVMInitializeAMDGPUTarget();
+ LLVMInitializeAMDGPUTargetMC();
+ LLVMInitializeAMDGPUAsmPrinter();
+
+ /* For inline assembly. */
+ LLVMInitializeAMDGPUAsmParser();
+
+ /* Workaround for bug in llvm 4.0 that causes image intrinsics
+ * to disappear.
+ * https://reviews.llvm.org/D26348
+ *
+ * Workaround for bug in llvm that causes the GPU to hang in presence
+ * of nested loops because there is an exec mask issue. The proper
+ * solution is to fix LLVM but this might require a bunch of work.
+ * https://bugs.llvm.org/show_bug.cgi?id=37744
+ *
+ * "mesa" is the prefix for error messages.
+ */
+ const char *argv[3] = { "mesa", "-simplifycfg-sink-common=false",
+ "-amdgpu-skip-threshold=1" };
+ LLVMParseCommandLineOptions(3, argv, NULL);
+}
+
+static once_flag radv_init_llvm_target_once_flag = ONCE_FLAG_INIT;
+
+static LLVMTargetRef radv_get_llvm_target(const char *triple)
+{
+ LLVMTargetRef target = NULL;
+ char *err_message = NULL;
+
+ call_once(&radv_init_llvm_target_once_flag, radv_init_llvm_target);
+
+ if (LLVMGetTargetFromTriple(triple, &target, &err_message)) {
+ fprintf(stderr, "Cannot find target for triple %s ", triple);
+ if (err_message) {
+ fprintf(stderr, "%s\n", err_message);
+ }
+ LLVMDisposeMessage(err_message);
+ return NULL;
+ }
+ return target;
+}
+
+static LLVMTargetMachineRef radv_create_target_machine(enum radeon_family family,
+ enum ac_target_machine_options tm_options,
+ const char **out_triple)
+{
+ assert(family >= CHIP_TAHITI);
+ char features[256];
+ const char *triple = (tm_options & AC_TM_SUPPORTS_SPILL) ? "amdgcn-mesa-mesa3d" : "amdgcn--";
+ LLVMTargetRef target = radv_get_llvm_target(triple);
+
+ snprintf(features, sizeof(features),
+ "+DumpCode,+vgpr-spilling,-fp32-denormals,+fp64-denormals%s%s%s%s",
+ tm_options & AC_TM_SISCHED ? ",+si-scheduler" : "",
+ tm_options & AC_TM_FORCE_ENABLE_XNACK ? ",+xnack" : "",
+ tm_options & AC_TM_FORCE_DISABLE_XNACK ? ",-xnack" : "",
+ tm_options & AC_TM_PROMOTE_ALLOCA_TO_SCRATCH ? ",-promote-alloca" : "");
+
+ LLVMTargetMachineRef tm = LLVMCreateTargetMachine(
+ target,
+ triple,
+ ac_get_llvm_processor_name(family),
+ features,
+ LLVMCodeGenLevelDefault,
+ LLVMRelocDefault,
+ LLVMCodeModelDefault);
+
+ if (out_triple)
+ *out_triple = triple;
+ return tm;
+}
+
static struct radv_shader_variant *
shader_variant_create(struct radv_device *device,
struct radv_shader_module *module,
options->family = chip_family;
options->chip_class = device->physical_device->rad_info.chip_class;
- options->dump_shader = radv_can_dump_shader(device, module);
+ options->dump_shader = radv_can_dump_shader(device, module, gs_copy_shader);
options->dump_preoptir = options->dump_shader &&
device->instance->debug_flags & RADV_DEBUG_PREOPTIR;
options->record_llvm_ir = device->keep_shader_info;
options->tess_offchip_block_dw_size = device->tess_offchip_block_dw_size;
+ options->address32_hi = device->physical_device->rad_info.address32_hi;
if (options->supports_spill)
tm_options |= AC_TM_SUPPORTS_SPILL;
if (device->instance->perftest_flags & RADV_PERFTEST_SISCHED)
tm_options |= AC_TM_SISCHED;
- tm = ac_create_target_machine(chip_family, tm_options, NULL);
+ tm = radv_create_target_machine(chip_family, tm_options, NULL);
if (gs_copy_shader) {
assert(shader_count == 1);
options.key = *key;
options.unsafe_math = !!(device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH);
- options.supports_spill = device->llvm_supports_spill;
+ options.supports_spill = true;
return shader_variant_create(device, module, shaders, shader_count, shaders[shader_count - 1]->info.stage,
&options, false, code_out, code_size_out);
/* Spec doesn't indicate what to do if the stage is invalid, so just
* return no info for this. */
if (!variant)
- return vk_error(VK_ERROR_FEATURE_NOT_PRESENT);
+ return vk_error(device->instance, VK_ERROR_FEATURE_NOT_PRESENT);
switch (infoType) {
case VK_SHADER_INFO_TYPE_STATISTICS_AMD: