From: Samuel Pitoiset Date: Mon, 3 Jun 2019 13:09:38 +0000 (+0200) Subject: ac,radv: do not emit vec3 for raw load/store on SI X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=33f4e04d5a7396886f0b2ea6463d295ea038f8c5;p=mesa.git ac,radv: do not emit vec3 for raw load/store on SI It's unsupported, only load/store format with vec3 are supported. Fixes: 6970a9a6ca9 ("ac,radv: remove the vec3 restriction with LLVM 9+")" Signed-off-by: Samuel Pitoiset Reviewed-by: Marek Olšák --- diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 613c1eef942..b4d7eb0d0e3 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -1167,7 +1167,7 @@ ac_build_llvm8_buffer_store_common(struct ac_llvm_context *ctx, args[idx++] = voffset ? voffset : ctx->i32_0; args[idx++] = soffset ? soffset : ctx->i32_0; args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0); - unsigned func = HAVE_LLVM < 0x900 && num_channels == 3 ? 4 : num_channels; + unsigned func = !ac_has_vec3_support(ctx->chip_class, use_format) && num_channels == 3 ? 4 : num_channels; const char *indexing_kind = structurized ? "struct" : "raw"; char name[256], type_name[8]; @@ -1227,7 +1227,7 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx, { /* Split 3 channel stores, because only LLVM 9+ support 3-channel * intrinsics. */ - if (num_channels == 3 && HAVE_LLVM < 0x900) { + if (num_channels == 3 && !ac_has_vec3_support(ctx->chip_class, false)) { LLVMValueRef v[3], v01; for (int i = 0; i < 3; i++) { @@ -1354,7 +1354,7 @@ ac_build_llvm8_buffer_load_common(struct ac_llvm_context *ctx, args[idx++] = voffset ? voffset : ctx->i32_0; args[idx++] = soffset ? soffset : ctx->i32_0; args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0); - unsigned func = HAVE_LLVM < 0x900 && num_channels == 3 ? 4 : num_channels; + unsigned func = !ac_has_vec3_support(ctx->chip_class, use_format) && num_channels == 3 ? 4 : num_channels; const char *indexing_kind = structurized ? "struct" : "raw"; char name[256], type_name[8]; @@ -1420,7 +1420,7 @@ ac_build_buffer_load(struct ac_llvm_context *ctx, if (num_channels == 1) return result[0]; - if (num_channels == 3 && HAVE_LLVM < 0x900) + if (num_channels == 3 && !ac_has_vec3_support(ctx->chip_class, false)) result[num_channels++] = LLVMGetUndef(ctx->f32); return ac_build_gather_values(ctx, result, num_channels); } @@ -1512,7 +1512,7 @@ ac_build_llvm8_tbuffer_load(struct ac_llvm_context *ctx, args[idx++] = soffset ? soffset : ctx->i32_0; args[idx++] = LLVMConstInt(ctx->i32, dfmt | (nfmt << 4), 0); args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0); - unsigned func = HAVE_LLVM < 0x900 && num_channels == 3 ? 4 : num_channels; + unsigned func = !ac_has_vec3_support(ctx->chip_class, true) && num_channels == 3 ? 4 : num_channels; const char *indexing_kind = structurized ? "struct" : "raw"; char name[256], type_name[8]; @@ -2011,7 +2011,7 @@ ac_build_llvm8_tbuffer_store(struct ac_llvm_context *ctx, args[idx++] = soffset ? soffset : ctx->i32_0; args[idx++] = LLVMConstInt(ctx->i32, dfmt | (nfmt << 4), 0); args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0); - unsigned func = HAVE_LLVM < 0x900 && num_channels == 3 ? 4 : num_channels; + unsigned func = !ac_has_vec3_support(ctx->chip_class, true) && num_channels == 3 ? 4 : num_channels; const char *indexing_kind = structurized ? "struct" : "raw"; char name[256], type_name[8]; diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h index 18102be5207..16941e07b17 100644 --- a/src/amd/common/ac_llvm_util.h +++ b/src/amd/common/ac_llvm_util.h @@ -147,6 +147,17 @@ bool ac_compile_module_to_binary(struct ac_compiler_passes *p, LLVMModuleRef mod void ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr); void ac_enable_global_isel(LLVMTargetMachineRef tm); +static inline bool +ac_has_vec3_support(enum chip_class chip, bool use_format) +{ + if (chip == GFX6 && !use_format) { + /* GFX6 only supports vec3 with load/store format. */ + return false; + } + + return HAVE_LLVM >= 0x900; +} + #ifdef __cplusplus } #endif diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 6b37cecebd5..833b1e54abc 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -1576,7 +1576,8 @@ static void visit_store_ssbo(struct ac_nir_context *ctx, /* Due to an LLVM limitation with LLVM < 9, split 3-element * writes into a 2-element and a 1-element write. */ - if (count == 3 && (elem_size_bytes != 4 || HAVE_LLVM < 0x900)) { + if (count == 3 && + (elem_size_bytes != 4 || !ac_has_vec3_support(ctx->ac.chip_class, false))) { writemask |= 1 << (start + 2); count = 2; } diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index 3e5d40e4c9c..755b7cb0246 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -2764,7 +2764,7 @@ radv_emit_stream_output(struct radv_shader_context *ctx, /* fall through */ case 4: /* as v4i32 */ vdata = ac_build_gather_values(&ctx->ac, out, - HAVE_LLVM < 0x900 ? + !ac_has_vec3_support(ctx->ac.chip_class, false) ? util_next_power_of_two(num_comps) : num_comps); break;