From: Samuel Pitoiset Date: Thu, 2 May 2019 14:15:03 +0000 (+0200) Subject: ac,radv: remove the vec3 restriction with LLVM 9+ X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=6970a9a6ca9ea26a9e6cf968d3fec3565bef2e9b;p=mesa.git ac,radv: remove the vec3 restriction with LLVM 9+ This changes requires LLVM r356755. 32706 shaders in 16744 tests Totals: SGPRS: 1448848 -> 1455984 (0.49 %) VGPRS: 1016684 -> 1016220 (-0.05 %) Spilled SGPRs: 25871 -> 25815 (-0.22 %) Spilled VGPRs: 122 -> 122 (0.00 %) Scratch size: 11964 -> 11956 (-0.07 %) dwords per thread Code Size: 55324500 -> 55301152 (-0.04 %) bytes Max Waves: 235660 -> 235586 (-0.03 %) Totals from affected shaders: SGPRS: 293704 -> 300840 (2.43 %) VGPRS: 246716 -> 246252 (-0.19 %) Spilled SGPRs: 159 -> 103 (-35.22 %) Scratch size: 188 -> 180 (-4.26 %) dwords per thread Code Size: 8653664 -> 8630316 (-0.27 %) bytes Max Waves: 60811 -> 60737 (-0.12 %) Signed-off-by: Samuel Pitoiset Reviewed-by: Marek Olšák --- diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 3ad9bb34805..613c1eef942 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -84,6 +84,7 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, ctx->v3i32 = LLVMVectorType(ctx->i32, 3); ctx->v4i32 = LLVMVectorType(ctx->i32, 4); ctx->v2f32 = LLVMVectorType(ctx->f32, 2); + ctx->v3f32 = LLVMVectorType(ctx->f32, 3); ctx->v4f32 = LLVMVectorType(ctx->f32, 4); ctx->v8i32 = LLVMVectorType(ctx->i32, 8); @@ -1166,7 +1167,7 @@ ac_build_llvm8_buffer_store_common(struct ac_llvm_context *ctx, args[idx++] = voffset ? voffset : ctx->i32_0; args[idx++] = soffset ? soffset : ctx->i32_0; args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0); - unsigned func = num_channels == 3 ? 4 : num_channels; + unsigned func = HAVE_LLVM < 0x900 && num_channels == 3 ? 4 : num_channels; const char *indexing_kind = structurized ? "struct" : "raw"; char name[256], type_name[8]; @@ -1224,9 +1225,9 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx, bool writeonly_memory, bool swizzle_enable_hint) { - /* Split 3 channel stores, becase LLVM doesn't support 3-channel + /* Split 3 channel stores, because only LLVM 9+ support 3-channel * intrinsics. */ - if (num_channels == 3) { + if (num_channels == 3 && HAVE_LLVM < 0x900) { LLVMValueRef v[3], v01; for (int i = 0; i < 3; i++) { @@ -1353,7 +1354,7 @@ ac_build_llvm8_buffer_load_common(struct ac_llvm_context *ctx, args[idx++] = voffset ? voffset : ctx->i32_0; args[idx++] = soffset ? soffset : ctx->i32_0; args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0); - unsigned func = num_channels == 3 ? 4 : num_channels; + unsigned func = HAVE_LLVM < 0x900 && num_channels == 3 ? 4 : num_channels; const char *indexing_kind = structurized ? "struct" : "raw"; char name[256], type_name[8]; @@ -1419,7 +1420,7 @@ ac_build_buffer_load(struct ac_llvm_context *ctx, if (num_channels == 1) return result[0]; - if (num_channels == 3) + if (num_channels == 3 && HAVE_LLVM < 0x900) result[num_channels++] = LLVMGetUndef(ctx->f32); return ac_build_gather_values(ctx, result, num_channels); } @@ -1511,7 +1512,7 @@ ac_build_llvm8_tbuffer_load(struct ac_llvm_context *ctx, args[idx++] = soffset ? soffset : ctx->i32_0; args[idx++] = LLVMConstInt(ctx->i32, dfmt | (nfmt << 4), 0); args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0); - unsigned func = num_channels == 3 ? 4 : num_channels; + unsigned func = HAVE_LLVM < 0x900 && num_channels == 3 ? 4 : num_channels; const char *indexing_kind = structurized ? "struct" : "raw"; char name[256], type_name[8]; @@ -2010,7 +2011,7 @@ ac_build_llvm8_tbuffer_store(struct ac_llvm_context *ctx, args[idx++] = soffset ? soffset : ctx->i32_0; args[idx++] = LLVMConstInt(ctx->i32, dfmt | (nfmt << 4), 0); args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0); - unsigned func = num_channels == 3 ? 4 : num_channels; + unsigned func = HAVE_LLVM < 0x900 && num_channels == 3 ? 4 : num_channels; const char *indexing_kind = structurized ? "struct" : "raw"; char name[256], type_name[8]; diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index c284526727a..5ed9a112457 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -71,6 +71,7 @@ struct ac_llvm_context { LLVMTypeRef v3i32; LLVMTypeRef v4i32; LLVMTypeRef v2f32; + LLVMTypeRef v3f32; LLVMTypeRef v4f32; LLVMTypeRef v8i32; diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 265e3b636c4..51f92a6b062 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -1574,9 +1574,9 @@ static void visit_store_ssbo(struct ac_nir_context *ctx, u_bit_scan_consecutive_range(&writemask, &start, &count); - /* Due to an LLVM limitation, split 3-element writes - * into a 2-element and a 1-element write. */ - if (count == 3) { + /* Due to an LLVM limitation with LLVM < 9, split 3-element + * writes into a 2-element and a 1-element write. */ + if (count == 3 && (elem_size_bytes != 4 || HAVE_LLVM < 0x900)) { writemask |= 1 << (start + 2); count = 2; } @@ -1618,6 +1618,9 @@ static void visit_store_ssbo(struct ac_nir_context *ctx, case 16: /* v4f32 */ data_type = ctx->ac.v4f32; break; + case 12: /* v3f32 */ + data_type = ctx->ac.v3f32; + break; case 8: /* v2f32 */ data_type = ctx->ac.v2f32; break; diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index 341f6388f32..dca4bebcdd1 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -2768,7 +2768,9 @@ radv_emit_stream_output(struct radv_shader_context *ctx, /* fall through */ case 4: /* as v4i32 */ vdata = ac_build_gather_values(&ctx->ac, out, - util_next_power_of_two(num_comps)); + HAVE_LLVM < 0x900 ? + util_next_power_of_two(num_comps) : + num_comps); break; }