From: Bas Nieuwenhuizen Date: Mon, 12 Nov 2018 21:42:36 +0000 (+0100) Subject: radv: Use structured intrinsics instead of indexing workaround for GFX9. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=dd0172e865f3e452554a90df33a8464efc2ba235;p=mesa.git radv: Use structured intrinsics instead of indexing workaround for GFX9. These force the index to be used in the instruction so we don't need the workaround. Totals: SGPRS: 1321642 -> 1321802 (0.01 %) VGPRS: 943664 -> 943788 (0.01 %) Spilled SGPRs: 28468 -> 28480 (0.04 %) Spilled VGPRs: 88 -> 89 (1.14 %) Private memory VGPRs: 0 -> 0 (0.00 %) Scratch size: 80 -> 80 (0.00 %) dwords per thread Code Size: 52415292 -> 52338932 (-0.15 %) bytes LDS: 400 -> 400 (0.00 %) blocks Max Waves: 233903 -> 233803 (-0.04 %) Wait states: 0 -> 0 (0.00 %) Totals from affected shaders: SGPRS: 238344 -> 238504 (0.07 %) VGPRS: 232732 -> 232856 (0.05 %) Spilled SGPRs: 13125 -> 13137 (0.09 %) Spilled VGPRs: 88 -> 89 (1.14 %) Private memory VGPRs: 0 -> 0 (0.00 %) Scratch size: 80 -> 80 (0.00 %) dwords per thread Code Size: 15752712 -> 15676352 (-0.48 %) bytes LDS: 139 -> 139 (0.00 %) blocks Max Waves: 31680 -> 31580 (-0.32 %) Wait states: 0 -> 0 (0.00 %) Reviewed-by: Samuel Pitoiset --- diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 1392ec0f238..22245aadba1 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -1161,6 +1161,47 @@ ac_build_buffer_load_common(struct ac_llvm_context *ctx, ac_get_load_intr_attribs(can_speculate)); } +static LLVMValueRef +ac_build_llvm8_buffer_load_common(struct ac_llvm_context *ctx, + LLVMValueRef rsrc, + LLVMValueRef vindex, + LLVMValueRef voffset, + LLVMValueRef soffset, + unsigned num_channels, + bool glc, + bool slc, + bool can_speculate, + bool use_format, + bool structurized) +{ + LLVMValueRef args[5]; + int idx = 0; + args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""); + if (structurized) + args[idx++] = vindex ? vindex : ctx->i32_0; + args[idx++] = voffset ? voffset : ctx->i32_0; + args[idx++] = soffset ? soffset : ctx->i32_0; + args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0); + unsigned func = CLAMP(num_channels, 1, 3) - 1; + + LLVMTypeRef types[] = {ctx->f32, ctx->v2f32, ctx->v4f32}; + const char *type_names[] = {"f32", "v2f32", "v4f32"}; + const char *indexing_kind = structurized ? "struct" : "raw"; + char name[256]; + + if (use_format) { + snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.load.format.%s", + indexing_kind, type_names[func]); + } else { + snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.load.%s", + indexing_kind, type_names[func]); + } + + return ac_build_intrinsic(ctx, name, types[func], args, + idx, + ac_get_load_intr_attribs(can_speculate)); +} + LLVMValueRef ac_build_buffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc, @@ -1218,6 +1259,11 @@ LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx, bool glc, bool can_speculate) { + if (HAVE_LLVM >= 0x800) { + return ac_build_llvm8_buffer_load_common(ctx, rsrc, vindex, voffset, ctx->i32_0, + num_channels, glc, false, + can_speculate, true, true); + } return ac_build_buffer_load_common(ctx, rsrc, vindex, voffset, num_channels, glc, false, can_speculate, true); @@ -1231,6 +1277,12 @@ LLVMValueRef ac_build_buffer_load_format_gfx9_safe(struct ac_llvm_context *ctx, bool glc, bool can_speculate) { + if (HAVE_LLVM >= 0x800) { + return ac_build_llvm8_buffer_load_common(ctx, rsrc, vindex, voffset, ctx->i32_0, + num_channels, glc, false, + can_speculate, true, true); + } + LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->builder, rsrc, LLVMConstInt(ctx->i32, 2, 0), ""); LLVMValueRef stride = LLVMBuildExtractElement(ctx->builder, rsrc, ctx->i32_1, ""); stride = LLVMBuildLShr(ctx->builder, stride, LLVMConstInt(ctx->i32, 16, 0), ""); diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index c950b81dca2..a19e66fe2a0 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -2387,10 +2387,17 @@ static void visit_image_store(struct ac_nir_context *ctx, params[2] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]), ctx->ac.i32_0, ""); /* vindex */ params[3] = ctx->ac.i32_0; /* voffset */ - params[4] = glc; /* glc */ - params[5] = ctx->ac.i1false; /* slc */ - ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32", ctx->ac.voidt, - params, 6, 0); + if (HAVE_LLVM >= 0x800) { + params[4] = ctx->ac.i32_0; /* soffset */ + params[5] = glc ? ctx->ac.i32_1 : ctx->ac.i32_0; + ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.struct.buffer.store.format.v4f32", ctx->ac.voidt, + params, 6, 0); + } else { + params[4] = glc; /* glc */ + params[5] = ctx->ac.i1false; /* slc */ + ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32", ctx->ac.voidt, + params, 6, 0); + } } else { struct ac_image_args args = {}; args.opcode = ac_image_store; @@ -2470,10 +2477,18 @@ static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx, params[param_count++] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]), ctx->ac.i32_0, ""); /* vindex */ params[param_count++] = ctx->ac.i32_0; /* voffset */ - params[param_count++] = ctx->ac.i1false; /* slc */ + if (HAVE_LLVM >= 0x800) { + params[param_count++] = ctx->ac.i32_0; /* soffset */ + params[param_count++] = ctx->ac.i32_0; /* slc */ - length = snprintf(intrinsic_name, sizeof(intrinsic_name), - "llvm.amdgcn.buffer.atomic.%s", atomic_name); + length = snprintf(intrinsic_name, sizeof(intrinsic_name), + "llvm.amdgcn.struct.buffer.atomic.%s.i32", atomic_name); + } else { + params[param_count++] = ctx->ac.i1false; /* slc */ + + length = snprintf(intrinsic_name, sizeof(intrinsic_name), + "llvm.amdgcn.buffer.atomic.%s", atomic_name); + } assert(length < sizeof(intrinsic_name)); return ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.i32, diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index f56eb01dc52..2e6f88ac342 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -3500,7 +3500,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, ctx.abi.load_sampler_desc = radv_get_sampler_desc; ctx.abi.load_resource = radv_load_resource; ctx.abi.clamp_shadow_reference = false; - ctx.abi.gfx9_stride_size_workaround = ctx.ac.chip_class == GFX9; + ctx.abi.gfx9_stride_size_workaround = ctx.ac.chip_class == GFX9 && HAVE_LLVM < 0x800; if (shader_count >= 2) ac_init_exec_full_mask(&ctx.ac);