X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fcommon%2Fac_llvm_build.c;h=f4d91567fa1a48b63febb93d90573e8538e0a2f1;hb=af65f9431e0fea5df0957987efeb0b87cee6cadc;hp=221850139b19e23bfd1af1e60f3f055472bc517c;hpb=39d0c68321dfb592afc1f6f63d90d5508cdf7333;p=mesa.git diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 221850139b1..f4d91567fa1 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -58,7 +58,10 @@ struct ac_llvm_flow { */ void ac_llvm_context_init(struct ac_llvm_context *ctx, - enum chip_class chip_class, enum radeon_family family) + struct ac_llvm_compiler *compiler, + enum chip_class chip_class, enum radeon_family family, + enum ac_float_mode float_mode, unsigned wave_size, + unsigned ballot_mask_bits) { LLVMValueRef args[1]; @@ -66,8 +69,12 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, ctx->chip_class = chip_class; ctx->family = family; - ctx->module = NULL; - ctx->builder = NULL; + ctx->wave_size = wave_size; + ctx->ballot_mask_bits = ballot_mask_bits; + ctx->module = ac_create_module(wave_size == 32 ? compiler->tm_wave32 + : compiler->tm, + ctx->context); + ctx->builder = ac_create_builder(ctx->context, float_mode); ctx->voidt = LLVMVoidTypeInContext(ctx->context); ctx->i1 = LLVMInt1TypeInContext(ctx->context); @@ -87,6 +94,8 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, ctx->v3f32 = LLVMVectorType(ctx->f32, 3); ctx->v4f32 = LLVMVectorType(ctx->f32, 4); ctx->v8i32 = LLVMVectorType(ctx->i32, 8); + ctx->iN_wavemask = LLVMIntTypeInContext(ctx->context, ctx->wave_size); + ctx->iN_ballotmask = LLVMIntTypeInContext(ctx->context, ballot_mask_bits); ctx->i8_0 = LLVMConstInt(ctx->i8, 0, false); ctx->i8_1 = LLVMConstInt(ctx->i8, 1, false); @@ -121,14 +130,15 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, "amdgpu.uniform", 14); ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0); + ctx->flow = calloc(1, sizeof(*ctx->flow)); } void ac_llvm_context_dispose(struct ac_llvm_context *ctx) { + free(ctx->flow->stack); free(ctx->flow); ctx->flow = NULL; - ctx->flow_depth_max = 0; } int @@ -344,6 +354,7 @@ void ac_build_type_name_for_intr(LLVMTypeRef type, char *buf, unsigned bufsize) char *type_name = LLVMPrintTypeToString(type); fprintf(stderr, "Error building type name for: %s\n", type_name); + LLVMDisposeMessage(type_name); return; } elem_type = LLVMGetElementType(type); @@ -442,7 +453,16 @@ LLVMValueRef ac_build_ballot(struct ac_llvm_context *ctx, LLVMValueRef value) { - const char *name = HAVE_LLVM >= 0x900 ? "llvm.amdgcn.icmp.i64.i32" : "llvm.amdgcn.icmp.i32"; + const char *name; + + if (HAVE_LLVM >= 0x900) { + if (ctx->wave_size == 64) + name = "llvm.amdgcn.icmp.i64.i32"; + else + name = "llvm.amdgcn.icmp.i32.i32"; + } else { + name = "llvm.amdgcn.icmp.i32"; + } LLVMValueRef args[3] = { value, ctx->i32_0, @@ -456,8 +476,7 @@ ac_build_ballot(struct ac_llvm_context *ctx, args[0] = ac_to_integer(ctx, args[0]); - return ac_build_intrinsic(ctx, name, - ctx->i64, args, 3, + return ac_build_intrinsic(ctx, name, ctx->iN_wavemask, args, 3, AC_FUNC_ATTR_NOUNWIND | AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT); @@ -473,7 +492,6 @@ LLVMValueRef ac_get_i1_sgpr_mask(struct ac_llvm_context *ctx, LLVMConstInt(ctx->i32, LLVMIntNE, 0), }; - assert(HAVE_LLVM >= 0x0800); return ac_build_intrinsic(ctx, name, ctx->i64, args, 3, AC_FUNC_ATTR_NOUNWIND | AC_FUNC_ATTR_READNONE | @@ -493,7 +511,7 @@ ac_build_vote_any(struct ac_llvm_context *ctx, LLVMValueRef value) { LLVMValueRef vote_set = ac_build_ballot(ctx, value); return LLVMBuildICmp(ctx->builder, LLVMIntNE, vote_set, - LLVMConstInt(ctx->i64, 0, 0), ""); + LLVMConstInt(ctx->iN_wavemask, 0, 0), ""); } LLVMValueRef @@ -506,7 +524,7 @@ ac_build_vote_eq(struct ac_llvm_context *ctx, LLVMValueRef value) vote_set, active_set, ""); LLVMValueRef none = LLVMBuildICmp(ctx->builder, LLVMIntEQ, vote_set, - LLVMConstInt(ctx->i64, 0, 0), ""); + LLVMConstInt(ctx->iN_wavemask, 0, 0), ""); return LLVMBuildOr(ctx->builder, all, none, ""); } @@ -610,6 +628,22 @@ ac_build_expand(struct ac_llvm_context *ctx, return ac_build_gather_values(ctx, chan, dst_channels); } +/* Extract components [start, start + channels) from a vector. + */ +LLVMValueRef +ac_extract_components(struct ac_llvm_context *ctx, + LLVMValueRef value, + unsigned start, + unsigned channels) +{ + LLVMValueRef chan[channels]; + + for (unsigned i = 0; i < channels; i++) + chan[i] = ac_llvm_extract_elem(ctx, value, i + start); + + return ac_build_gather_values(ctx, chan, channels); +} + /* Expand a scalar or vector to <4 x type> by filling the remaining channels * with undef. Extract at most num_channels components from the input. */ @@ -1116,52 +1150,17 @@ static unsigned get_load_cache_policy(struct ac_llvm_context *ctx, } static void -ac_build_llvm7_buffer_store_common(struct ac_llvm_context *ctx, - LLVMValueRef rsrc, - LLVMValueRef data, - LLVMValueRef vindex, - LLVMValueRef voffset, - unsigned num_channels, - unsigned cache_policy, - bool use_format) -{ - LLVMValueRef args[] = { - data, - LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""), - vindex ? vindex : ctx->i32_0, - voffset, - LLVMConstInt(ctx->i1, !!(cache_policy & ac_glc), 0), - LLVMConstInt(ctx->i1, !!(cache_policy & ac_slc), 0) - }; - unsigned func = CLAMP(num_channels, 1, 3) - 1; - - const char *type_names[] = {"f32", "v2f32", "v4f32"}; - char name[256]; - - if (use_format) { - snprintf(name, sizeof(name), "llvm.amdgcn.buffer.store.format.%s", - type_names[func]); - } else { - snprintf(name, sizeof(name), "llvm.amdgcn.buffer.store.%s", - type_names[func]); - } - - ac_build_intrinsic(ctx, name, ctx->voidt, args, ARRAY_SIZE(args), - AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY); -} - -static void -ac_build_llvm8_buffer_store_common(struct ac_llvm_context *ctx, - LLVMValueRef rsrc, - LLVMValueRef data, - LLVMValueRef vindex, - LLVMValueRef voffset, - LLVMValueRef soffset, - unsigned num_channels, - LLVMTypeRef return_channel_type, - unsigned cache_policy, - bool use_format, - bool structurized) +ac_build_buffer_store_common(struct ac_llvm_context *ctx, + LLVMValueRef rsrc, + LLVMValueRef data, + LLVMValueRef vindex, + LLVMValueRef voffset, + LLVMValueRef soffset, + unsigned num_channels, + LLVMTypeRef return_channel_type, + unsigned cache_policy, + bool use_format, + bool structurized) { LLVMValueRef args[6]; int idx = 0; @@ -1200,16 +1199,10 @@ ac_build_buffer_store_format(struct ac_llvm_context *ctx, unsigned num_channels, unsigned cache_policy) { - if (HAVE_LLVM >= 0x800) { - ac_build_llvm8_buffer_store_common(ctx, rsrc, data, vindex, - voffset, NULL, num_channels, - ctx->f32, cache_policy, - true, true); - } else { - ac_build_llvm7_buffer_store_common(ctx, rsrc, data, vindex, voffset, - num_channels, cache_policy, - true); - } + ac_build_buffer_store_common(ctx, rsrc, data, vindex, + voffset, NULL, num_channels, + ctx->f32, cache_policy, + true, true); } /* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4. @@ -1259,25 +1252,10 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx, offset = LLVMBuildAdd(ctx->builder, offset, LLVMConstInt(ctx->i32, inst_offset, 0), ""); - if (HAVE_LLVM >= 0x800) { - ac_build_llvm8_buffer_store_common(ctx, rsrc, - ac_to_float(ctx, vdata), - ctx->i32_0, - voffset, offset, - num_channels, - ctx->f32, - cache_policy, - false, false); - } else { - if (voffset) - offset = LLVMBuildAdd(ctx->builder, offset, voffset, ""); - - ac_build_llvm7_buffer_store_common(ctx, rsrc, - ac_to_float(ctx, vdata), - ctx->i32_0, offset, - num_channels, cache_policy, - false); - } + ac_build_buffer_store_common(ctx, rsrc, ac_to_float(ctx, vdata), + ctx->i32_0, voffset, offset, + num_channels, ctx->f32, + cache_policy, false, false); return; } @@ -1296,53 +1274,17 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx, } static LLVMValueRef -ac_build_llvm7_buffer_load_common(struct ac_llvm_context *ctx, - LLVMValueRef rsrc, - LLVMValueRef vindex, - LLVMValueRef voffset, - unsigned num_channels, - unsigned cache_policy, - bool can_speculate, - bool use_format) -{ - LLVMValueRef args[] = { - LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""), - vindex ? vindex : ctx->i32_0, - voffset, - LLVMConstInt(ctx->i1, !!(cache_policy & ac_glc), 0), - LLVMConstInt(ctx->i1, !!(cache_policy & ac_slc), 0) - }; - unsigned func = CLAMP(num_channels, 1, 3) - 1; - - LLVMTypeRef types[] = {ctx->f32, ctx->v2f32, ctx->v4f32}; - const char *type_names[] = {"f32", "v2f32", "v4f32"}; - char name[256]; - - if (use_format) { - snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.format.%s", - type_names[func]); - } else { - snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.%s", - type_names[func]); - } - - return ac_build_intrinsic(ctx, name, types[func], args, - ARRAY_SIZE(args), - ac_get_load_intr_attribs(can_speculate)); -} - -static LLVMValueRef -ac_build_llvm8_buffer_load_common(struct ac_llvm_context *ctx, - LLVMValueRef rsrc, - LLVMValueRef vindex, - LLVMValueRef voffset, - LLVMValueRef soffset, - unsigned num_channels, - LLVMTypeRef channel_type, - unsigned cache_policy, - bool can_speculate, - bool use_format, - bool structurized) +ac_build_buffer_load_common(struct ac_llvm_context *ctx, + LLVMValueRef rsrc, + LLVMValueRef vindex, + LLVMValueRef voffset, + LLVMValueRef soffset, + unsigned num_channels, + LLVMTypeRef channel_type, + unsigned cache_policy, + bool can_speculate, + bool use_format, + bool structurized) { LLVMValueRef args[5]; int idx = 0; @@ -1390,7 +1332,7 @@ ac_build_buffer_load(struct ac_llvm_context *ctx, offset = LLVMBuildAdd(ctx->builder, offset, soffset, ""); if (allow_smem && !(cache_policy & ac_slc) && - (!(cache_policy & ac_glc) || (HAVE_LLVM >= 0x0800 && ctx->chip_class >= GFX8))) { + (!(cache_policy & ac_glc) || ctx->chip_class >= GFX8)) { assert(vindex == NULL); LLVMValueRef result[8]; @@ -1400,19 +1342,15 @@ ac_build_buffer_load(struct ac_llvm_context *ctx, offset = LLVMBuildAdd(ctx->builder, offset, LLVMConstInt(ctx->i32, 4, 0), ""); } - const char *intrname = - HAVE_LLVM >= 0x0800 ? "llvm.amdgcn.s.buffer.load.f32" - : "llvm.SI.load.const.v4i32"; - unsigned num_args = HAVE_LLVM >= 0x0800 ? 3 : 2; LLVMValueRef args[3] = { rsrc, offset, LLVMConstInt(ctx->i32, get_load_cache_policy(ctx, cache_policy), 0), }; - result[i] = ac_build_intrinsic(ctx, intrname, - ctx->f32, args, num_args, - AC_FUNC_ATTR_READNONE | - (HAVE_LLVM < 0x0800 ? AC_FUNC_ATTR_LEGACY : 0)); + result[i] = ac_build_intrinsic(ctx, + "llvm.amdgcn.s.buffer.load.f32", + ctx->f32, args, 3, + AC_FUNC_ATTR_READNONE); } if (num_channels == 1) return result[0]; @@ -1422,18 +1360,11 @@ ac_build_buffer_load(struct ac_llvm_context *ctx, return ac_build_gather_values(ctx, result, num_channels); } - if (HAVE_LLVM >= 0x0800) { - return ac_build_llvm8_buffer_load_common(ctx, rsrc, vindex, - offset, ctx->i32_0, - num_channels, ctx->f32, - cache_policy, - can_speculate, false, - false); - } - - return ac_build_llvm7_buffer_load_common(ctx, rsrc, vindex, offset, - num_channels, cache_policy, - can_speculate, false); + return ac_build_buffer_load_common(ctx, rsrc, vindex, + offset, ctx->i32_0, + num_channels, ctx->f32, + cache_policy, + can_speculate, false, false); } LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx, @@ -1444,44 +1375,10 @@ LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx, unsigned cache_policy, bool can_speculate) { - if (HAVE_LLVM >= 0x800) { - return ac_build_llvm8_buffer_load_common(ctx, rsrc, vindex, voffset, ctx->i32_0, - num_channels, ctx->f32, - cache_policy, can_speculate, true, true); - } - return ac_build_llvm7_buffer_load_common(ctx, rsrc, vindex, voffset, - num_channels, cache_policy, - can_speculate, true); -} - -LLVMValueRef ac_build_buffer_load_format_gfx9_safe(struct ac_llvm_context *ctx, - LLVMValueRef rsrc, - LLVMValueRef vindex, - LLVMValueRef voffset, - unsigned num_channels, - unsigned cache_policy, - bool can_speculate) -{ - if (HAVE_LLVM >= 0x800) { - return ac_build_llvm8_buffer_load_common(ctx, rsrc, vindex, voffset, ctx->i32_0, - num_channels, ctx->f32, - cache_policy, can_speculate, true, true); - } - - LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->builder, rsrc, LLVMConstInt(ctx->i32, 2, 0), ""); - LLVMValueRef stride = LLVMBuildExtractElement(ctx->builder, rsrc, ctx->i32_1, ""); - stride = LLVMBuildLShr(ctx->builder, stride, LLVMConstInt(ctx->i32, 16, 0), ""); - - LLVMValueRef new_elem_count = LLVMBuildSelect(ctx->builder, - LLVMBuildICmp(ctx->builder, LLVMIntUGT, elem_count, stride, ""), - elem_count, stride, ""); - - LLVMValueRef new_rsrc = LLVMBuildInsertElement(ctx->builder, rsrc, new_elem_count, - LLVMConstInt(ctx->i32, 2, 0), ""); - - return ac_build_llvm7_buffer_load_common(ctx, new_rsrc, vindex, voffset, - num_channels, cache_policy, - can_speculate, true); + return ac_build_buffer_load_common(ctx, rsrc, vindex, voffset, + ctx->i32_0, num_channels, ctx->f32, + cache_policy, can_speculate, + true, true); } /// Translate a (dfmt, nfmt) pair into a chip-appropriate combined format @@ -1494,6 +1391,7 @@ ac_get_tbuffer_format(struct ac_llvm_context *ctx, unsigned format; switch (dfmt) { default: unreachable("bad dfmt"); + case V_008F0C_BUF_DATA_FORMAT_INVALID: format = V_008F0C_IMG_FORMAT_INVALID; break; case V_008F0C_BUF_DATA_FORMAT_8: format = V_008F0C_IMG_FORMAT_8_UINT; break; case V_008F0C_BUF_DATA_FORMAT_8_8: format = V_008F0C_IMG_FORMAT_8_8_UINT; break; case V_008F0C_BUF_DATA_FORMAT_8_8_8_8: format = V_008F0C_IMG_FORMAT_8_8_8_8_UINT; break; @@ -1530,11 +1428,12 @@ ac_get_tbuffer_format(struct ac_llvm_context *ctx, } static LLVMValueRef -ac_build_llvm8_tbuffer_load(struct ac_llvm_context *ctx, +ac_build_tbuffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef vindex, LLVMValueRef voffset, LLVMValueRef soffset, + LLVMValueRef immoffset, unsigned num_channels, unsigned dfmt, unsigned nfmt, @@ -1542,6 +1441,8 @@ ac_build_llvm8_tbuffer_load(struct ac_llvm_context *ctx, bool can_speculate, bool structurized) { + voffset = LLVMBuildAdd(ctx->builder, voffset, immoffset, ""); + LLVMValueRef args[6]; int idx = 0; args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""); @@ -1565,52 +1466,6 @@ ac_build_llvm8_tbuffer_load(struct ac_llvm_context *ctx, ac_get_load_intr_attribs(can_speculate)); } -static LLVMValueRef -ac_build_tbuffer_load(struct ac_llvm_context *ctx, - LLVMValueRef rsrc, - LLVMValueRef vindex, - LLVMValueRef voffset, - LLVMValueRef soffset, - LLVMValueRef immoffset, - unsigned num_channels, - unsigned dfmt, - unsigned nfmt, - unsigned cache_policy, - bool can_speculate, - bool structurized) /* only matters for LLVM 8+ */ -{ - if (HAVE_LLVM >= 0x800) { - voffset = LLVMBuildAdd(ctx->builder, voffset, immoffset, ""); - - return ac_build_llvm8_tbuffer_load(ctx, rsrc, vindex, voffset, - soffset, num_channels, - dfmt, nfmt, cache_policy, - can_speculate, structurized); - } - - LLVMValueRef args[] = { - rsrc, - vindex ? vindex : ctx->i32_0, - voffset, - soffset, - immoffset, - LLVMConstInt(ctx->i32, dfmt, false), - LLVMConstInt(ctx->i32, nfmt, false), - LLVMConstInt(ctx->i1, !!(cache_policy & ac_glc), false), - LLVMConstInt(ctx->i1, !!(cache_policy & ac_slc), false), - }; - unsigned func = CLAMP(num_channels, 1, 3) - 1; - LLVMTypeRef types[] = {ctx->i32, ctx->v2i32, ctx->v4i32}; - const char *type_names[] = {"i32", "v2i32", "v4i32"}; - char name[256]; - - snprintf(name, sizeof(name), "llvm.amdgcn.tbuffer.load.%s", - type_names[func]); - - return ac_build_intrinsic(ctx, name, types[func], args, 9, - ac_get_load_intr_attribs(can_speculate)); -} - LLVMValueRef ac_build_struct_tbuffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc, @@ -1660,10 +1515,10 @@ ac_build_tbuffer_load_short(struct ac_llvm_context *ctx, voffset = LLVMBuildAdd(ctx->builder, voffset, immoffset, ""); /* LLVM 9+ supports i8/i16 with struct/raw intrinsics. */ - res = ac_build_llvm8_buffer_load_common(ctx, rsrc, NULL, - voffset, soffset, - 1, ctx->i16, cache_policy, - false, false, false); + res = ac_build_buffer_load_common(ctx, rsrc, NULL, + voffset, soffset, + 1, ctx->i16, cache_policy, + false, false, false); } else { unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_16; unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT; @@ -1692,10 +1547,10 @@ ac_build_tbuffer_load_byte(struct ac_llvm_context *ctx, voffset = LLVMBuildAdd(ctx->builder, voffset, immoffset, ""); /* LLVM 9+ supports i8/i16 with struct/raw intrinsics. */ - res = ac_build_llvm8_buffer_load_common(ctx, rsrc, NULL, - voffset, soffset, - 1, ctx->i8, cache_policy, - false, false, false); + res = ac_build_buffer_load_common(ctx, rsrc, NULL, + voffset, soffset, + 1, ctx->i8, cache_policy, + false, false, false); } else { unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_8; unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT; @@ -1836,20 +1691,13 @@ ac_build_opencoded_load_format(struct ac_llvm_context *ctx, for (unsigned i = 0; i < load_num_channels; ++i) { tmp = LLVMBuildAdd(ctx->builder, soffset, LLVMConstInt(ctx->i32, i << load_log_size, false), ""); - if (HAVE_LLVM >= 0x0800) { - LLVMTypeRef channel_type = load_log_size == 0 ? ctx->i8 : - load_log_size == 1 ? ctx->i16 : ctx->i32; - unsigned num_channels = 1 << (MAX2(load_log_size, 2) - 2); - loads[i] = ac_build_llvm8_buffer_load_common( - ctx, rsrc, vindex, voffset, tmp, - num_channels, channel_type, cache_policy, - can_speculate, false, true); - } else { - tmp = LLVMBuildAdd(ctx->builder, voffset, tmp, ""); - loads[i] = ac_build_llvm7_buffer_load_common( - ctx, rsrc, vindex, tmp, - 1 << (load_log_size - 2), cache_policy, can_speculate, false); - } + LLVMTypeRef channel_type = load_log_size == 0 ? ctx->i8 : + load_log_size == 1 ? ctx->i16 : ctx->i32; + unsigned num_channels = 1 << (MAX2(load_log_size, 2) - 2); + loads[i] = ac_build_buffer_load_common( + ctx, rsrc, vindex, voffset, tmp, + num_channels, channel_type, cache_policy, + can_speculate, false, true); if (load_log_size >= 2) loads[i] = ac_to_integer(ctx, loads[i]); } @@ -2022,18 +1870,22 @@ ac_build_opencoded_load_format(struct ac_llvm_context *ctx, } static void -ac_build_llvm8_tbuffer_store(struct ac_llvm_context *ctx, - LLVMValueRef rsrc, - LLVMValueRef vdata, - LLVMValueRef vindex, - LLVMValueRef voffset, - LLVMValueRef soffset, - unsigned num_channels, - unsigned dfmt, - unsigned nfmt, - unsigned cache_policy, - bool structurized) +ac_build_tbuffer_store(struct ac_llvm_context *ctx, + LLVMValueRef rsrc, + LLVMValueRef vdata, + LLVMValueRef vindex, + LLVMValueRef voffset, + LLVMValueRef soffset, + LLVMValueRef immoffset, + unsigned num_channels, + unsigned dfmt, + unsigned nfmt, + unsigned cache_policy, + bool structurized) { + voffset = LLVMBuildAdd(ctx->builder, voffset ? voffset : ctx->i32_0, + immoffset, ""); + LLVMValueRef args[7]; int idx = 0; args[idx++] = vdata; @@ -2058,53 +1910,6 @@ ac_build_llvm8_tbuffer_store(struct ac_llvm_context *ctx, AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY); } -static void -ac_build_tbuffer_store(struct ac_llvm_context *ctx, - LLVMValueRef rsrc, - LLVMValueRef vdata, - LLVMValueRef vindex, - LLVMValueRef voffset, - LLVMValueRef soffset, - LLVMValueRef immoffset, - unsigned num_channels, - unsigned dfmt, - unsigned nfmt, - unsigned cache_policy, - bool structurized) /* only matters for LLVM 8+ */ -{ - if (HAVE_LLVM >= 0x800) { - voffset = LLVMBuildAdd(ctx->builder, - voffset ? voffset : ctx->i32_0, - immoffset, ""); - - ac_build_llvm8_tbuffer_store(ctx, rsrc, vdata, vindex, voffset, - soffset, num_channels, dfmt, nfmt, - cache_policy, structurized); - } else { - LLVMValueRef params[] = { - vdata, - rsrc, - vindex ? vindex : ctx->i32_0, - voffset ? voffset : ctx->i32_0, - soffset ? soffset : ctx->i32_0, - immoffset, - LLVMConstInt(ctx->i32, dfmt, false), - LLVMConstInt(ctx->i32, nfmt, false), - LLVMConstInt(ctx->i1, !!(cache_policy & ac_glc), false), - LLVMConstInt(ctx->i1, !!(cache_policy & ac_slc), false), - }; - unsigned func = CLAMP(num_channels, 1, 3) - 1; - const char *type_names[] = {"i32", "v2i32", "v4i32"}; - char name[256]; - - snprintf(name, sizeof(name), "llvm.amdgcn.tbuffer.store.%s", - type_names[func]); - - ac_build_intrinsic(ctx, name, ctx->voidt, params, 10, - AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY); - } -} - void ac_build_struct_tbuffer_store(struct ac_llvm_context *ctx, LLVMValueRef rsrc, @@ -2152,10 +1957,10 @@ ac_build_tbuffer_store_short(struct ac_llvm_context *ctx, if (HAVE_LLVM >= 0x900) { /* LLVM 9+ supports i8/i16 with struct/raw intrinsics. */ - ac_build_llvm8_buffer_store_common(ctx, rsrc, vdata, NULL, - voffset, soffset, 1, - ctx->i16, cache_policy, - false, false); + ac_build_buffer_store_common(ctx, rsrc, vdata, NULL, + voffset, soffset, 1, + ctx->i16, cache_policy, + false, false); } else { unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_16; unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT; @@ -2179,10 +1984,10 @@ ac_build_tbuffer_store_byte(struct ac_llvm_context *ctx, if (HAVE_LLVM >= 0x900) { /* LLVM 9+ supports i8/i16 with struct/raw intrinsics. */ - ac_build_llvm8_buffer_store_common(ctx, rsrc, vdata, NULL, - voffset, soffset, 1, - ctx->i8, cache_policy, - false, false); + ac_build_buffer_store_common(ctx, rsrc, vdata, NULL, + voffset, soffset, 1, + ctx->i8, cache_policy, + false, false); } else { unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_8; unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT; @@ -2225,10 +2030,14 @@ ac_get_thread_id(struct ac_llvm_context *ctx) "llvm.amdgcn.mbcnt.lo", ctx->i32, tid_args, 2, AC_FUNC_ATTR_READNONE); - tid = ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.hi", - ctx->i32, tid_args, - 2, AC_FUNC_ATTR_READNONE); - set_range_metadata(ctx, tid, 0, 64); + if (ctx->wave_size == 32) { + tid = tid_args[1]; + } else { + tid = ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.hi", + ctx->i32, tid_args, + 2, AC_FUNC_ATTR_READNONE); + } + set_range_metadata(ctx, tid, 0, ctx->wave_size); return tid; } @@ -2559,6 +2368,8 @@ static const char *get_atomic_name(enum ac_atomic_op op) case ac_atomic_and: return "and"; case ac_atomic_or: return "or"; case ac_atomic_xor: return "xor"; + case ac_atomic_inc_wrap: return "inc"; + case ac_atomic_dec_wrap: return "dec"; } unreachable("bad atomic op"); } @@ -2837,22 +2648,10 @@ LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input, width, }; - LLVMValueRef result = ac_build_intrinsic(ctx, - is_signed ? "llvm.amdgcn.sbfe.i32" : - "llvm.amdgcn.ubfe.i32", - ctx->i32, args, 3, - AC_FUNC_ATTR_READNONE); - - if (HAVE_LLVM < 0x0800) { - /* FIXME: LLVM 7+ returns incorrect result when count is 0. - * https://bugs.freedesktop.org/show_bug.cgi?id=107276 - */ - LLVMValueRef zero = ctx->i32_0; - LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, width, zero, ""); - result = LLVMBuildSelect(ctx->builder, icond, zero, result, ""); - } + return ac_build_intrinsic(ctx, is_signed ? "llvm.amdgcn.sbfe.i32" : + "llvm.amdgcn.ubfe.i32", + ctx->i32, args, 3, AC_FUNC_ATTR_READNONE); - return result; } LLVMValueRef ac_build_imad(struct ac_llvm_context *ctx, LLVMValueRef s0, @@ -3476,17 +3275,17 @@ LLVMTypeRef ac_array_in_const32_addr_space(LLVMTypeRef elem_type) static struct ac_llvm_flow * get_current_flow(struct ac_llvm_context *ctx) { - if (ctx->flow_depth > 0) - return &ctx->flow[ctx->flow_depth - 1]; + if (ctx->flow->depth > 0) + return &ctx->flow->stack[ctx->flow->depth - 1]; return NULL; } static struct ac_llvm_flow * get_innermost_loop(struct ac_llvm_context *ctx) { - for (unsigned i = ctx->flow_depth; i > 0; --i) { - if (ctx->flow[i - 1].loop_entry_block) - return &ctx->flow[i - 1]; + for (unsigned i = ctx->flow->depth; i > 0; --i) { + if (ctx->flow->stack[i - 1].loop_entry_block) + return &ctx->flow->stack[i - 1]; } return NULL; } @@ -3496,16 +3295,16 @@ push_flow(struct ac_llvm_context *ctx) { struct ac_llvm_flow *flow; - if (ctx->flow_depth >= ctx->flow_depth_max) { - unsigned new_max = MAX2(ctx->flow_depth << 1, + if (ctx->flow->depth >= ctx->flow->depth_max) { + unsigned new_max = MAX2(ctx->flow->depth << 1, AC_LLVM_INITIAL_CF_DEPTH); - ctx->flow = realloc(ctx->flow, new_max * sizeof(*ctx->flow)); - ctx->flow_depth_max = new_max; + ctx->flow->stack = realloc(ctx->flow->stack, new_max * sizeof(*ctx->flow->stack)); + ctx->flow->depth_max = new_max; } - flow = &ctx->flow[ctx->flow_depth]; - ctx->flow_depth++; + flow = &ctx->flow->stack[ctx->flow->depth]; + ctx->flow->depth++; flow->next_block = NULL; flow->loop_entry_block = NULL; @@ -3525,10 +3324,10 @@ static void set_basicblock_name(LLVMBasicBlockRef bb, const char *base, static LLVMBasicBlockRef append_basic_block(struct ac_llvm_context *ctx, const char *name) { - assert(ctx->flow_depth >= 1); + assert(ctx->flow->depth >= 1); - if (ctx->flow_depth >= 2) { - struct ac_llvm_flow *flow = &ctx->flow[ctx->flow_depth - 2]; + if (ctx->flow->depth >= 2) { + struct ac_llvm_flow *flow = &ctx->flow->stack[ctx->flow->depth - 2]; return LLVMInsertBasicBlockInContext(ctx->context, flow->next_block, name); @@ -3598,7 +3397,7 @@ void ac_build_endif(struct ac_llvm_context *ctx, int label_id) LLVMPositionBuilderAtEnd(ctx->builder, current_branch->next_block); set_basicblock_name(current_branch->next_block, "endif", label_id); - ctx->flow_depth--; + ctx->flow->depth--; } void ac_build_endloop(struct ac_llvm_context *ctx, int label_id) @@ -3611,7 +3410,7 @@ void ac_build_endloop(struct ac_llvm_context *ctx, int label_id) LLVMPositionBuilderAtEnd(ctx->builder, current_loop->next_block); set_basicblock_name(current_loop->next_block, "endloop", label_id); - ctx->flow_depth--; + ctx->flow->depth--; } void ac_build_ifcc(struct ac_llvm_context *ctx, LLVMValueRef cond, int label_id) @@ -3819,22 +3618,27 @@ ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef la LLVMConstInt(ctx->i32, i, 0), ""); } } + if (LLVMGetTypeKind(src_type) == LLVMPointerTypeKind) + return LLVMBuildIntToPtr(ctx->builder, ret, src_type, ""); return LLVMBuildBitCast(ctx->builder, ret, src_type, ""); } LLVMValueRef ac_build_writelane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef value, LLVMValueRef lane) { - /* TODO: Use the actual instruction when LLVM adds an intrinsic for it. - */ - LLVMValueRef pred = LLVMBuildICmp(ctx->builder, LLVMIntEQ, lane, - ac_get_thread_id(ctx), ""); - return LLVMBuildSelect(ctx->builder, pred, value, src, ""); + return ac_build_intrinsic(ctx, "llvm.amdgcn.writelane", ctx->i32, + (LLVMValueRef []) {value, lane, src}, 3, + AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT); } LLVMValueRef ac_build_mbcnt(struct ac_llvm_context *ctx, LLVMValueRef mask) { + if (ctx->wave_size == 32) { + return ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.lo", ctx->i32, + (LLVMValueRef []) { mask, ctx->i32_0 }, + 2, AC_FUNC_ATTR_READNONE); + } LLVMValueRef mask_vec = LLVMBuildBitCast(ctx->builder, mask, LLVMVectorType(ctx->i32, 2), ""); @@ -4168,10 +3972,9 @@ ac_build_scan(struct ac_llvm_context *ctx, nir_op op, LLVMValueRef src, LLVMValu if (ctx->chip_class >= GFX10) { result = inclusive ? src : identity; } else { - if (inclusive) - result = src; - else - result = ac_build_dpp(ctx, identity, src, dpp_wf_sr1, 0xf, 0xf, false); + if (!inclusive) + src = ac_build_dpp(ctx, identity, src, dpp_wf_sr1, 0xf, 0xf, false); + result = src; } if (maxprefix <= 1) return result; @@ -4256,7 +4059,7 @@ ac_build_inclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src))); result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity), LLVMTypeOf(identity), ""); - result = ac_build_scan(ctx, op, result, identity, 64, true); + result = ac_build_scan(ctx, op, result, identity, ctx->wave_size, true); return ac_build_wwm(ctx, result); } @@ -4280,7 +4083,7 @@ ac_build_exclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src))); result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity), LLVMTypeOf(identity), ""); - result = ac_build_scan(ctx, op, result, identity, 64, false); + result = ac_build_scan(ctx, op, result, identity, ctx->wave_size, false); return ac_build_wwm(ctx, result); } @@ -4356,12 +4159,12 @@ ac_build_wg_wavescan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws) if (ws->maxwaves <= 1) return; - const LLVMValueRef i32_63 = LLVMConstInt(ctx->i32, 63, false); + const LLVMValueRef last_lane = LLVMConstInt(ctx->i32, ctx->wave_size - 1, false); LLVMBuilderRef builder = ctx->builder; LLVMValueRef tid = ac_get_thread_id(ctx); LLVMValueRef tmp; - tmp = LLVMBuildICmp(builder, LLVMIntEQ, tid, i32_63, ""); + tmp = LLVMBuildICmp(builder, LLVMIntEQ, tid, last_lane, ""); ac_build_ifcc(ctx, tmp, 1000); LLVMBuildStore(builder, ws->src, LLVMBuildGEP(builder, ws->scratch, &ws->waveidx, 1, "")); ac_build_endif(ctx, 1000);