*/
void
ac_llvm_context_init(struct ac_llvm_context *ctx,
- enum chip_class chip_class, enum radeon_family family)
+ struct ac_llvm_compiler *compiler,
+ enum chip_class chip_class, enum radeon_family family,
+ enum ac_float_mode float_mode, unsigned wave_size)
{
LLVMValueRef args[1];
ctx->chip_class = chip_class;
ctx->family = family;
- ctx->module = NULL;
- ctx->builder = NULL;
+ ctx->wave_size = wave_size;
+ ctx->module = ac_create_module(wave_size == 32 ? compiler->tm_wave32
+ : compiler->tm,
+ ctx->context);
+ ctx->builder = ac_create_builder(ctx->context, float_mode);
ctx->voidt = LLVMVoidTypeInContext(ctx->context);
ctx->i1 = LLVMInt1TypeInContext(ctx->context);
ctx->v3f32 = LLVMVectorType(ctx->f32, 3);
ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
+ ctx->iN_wavemask = LLVMIntTypeInContext(ctx->context, ctx->wave_size);
ctx->i8_0 = LLVMConstInt(ctx->i8, 0, false);
ctx->i8_1 = LLVMConstInt(ctx->i8, 1, false);
"amdgpu.uniform", 14);
ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
+ ctx->flow = calloc(1, sizeof(*ctx->flow));
}
void
ac_llvm_context_dispose(struct ac_llvm_context *ctx)
{
+ free(ctx->flow->stack);
free(ctx->flow);
ctx->flow = NULL;
- ctx->flow_depth_max = 0;
}
int
char *type_name = LLVMPrintTypeToString(type);
fprintf(stderr, "Error building type name for: %s\n",
type_name);
+ LLVMDisposeMessage(type_name);
return;
}
elem_type = LLVMGetElementType(type);
LLVMValueRef
ac_build_shader_clock(struct ac_llvm_context *ctx)
{
- LLVMValueRef tmp = ac_build_intrinsic(ctx, "llvm.readcyclecounter",
- ctx->i64, NULL, 0, 0);
+ const char *intr = HAVE_LLVM >= 0x0900 && ctx->chip_class >= GFX8 ?
+ "llvm.amdgcn.s.memrealtime" : "llvm.readcyclecounter";
+ LLVMValueRef tmp = ac_build_intrinsic(ctx, intr, ctx->i64, NULL, 0, 0);
return LLVMBuildBitCast(ctx->builder, tmp, ctx->v2i32, "");
}
ac_build_ballot(struct ac_llvm_context *ctx,
LLVMValueRef value)
{
+ const char *name;
+
+ if (HAVE_LLVM >= 0x900) {
+ if (ctx->wave_size == 64)
+ name = "llvm.amdgcn.icmp.i64.i32";
+ else
+ name = "llvm.amdgcn.icmp.i32.i32";
+ } else {
+ name = "llvm.amdgcn.icmp.i32";
+ }
LLVMValueRef args[3] = {
value,
ctx->i32_0,
args[0] = ac_to_integer(ctx, args[0]);
- return ac_build_intrinsic(ctx,
- "llvm.amdgcn.icmp.i32",
- ctx->i64, args, 3,
+ return ac_build_intrinsic(ctx, name, ctx->iN_wavemask, args, 3,
AC_FUNC_ATTR_NOUNWIND |
AC_FUNC_ATTR_READNONE |
AC_FUNC_ATTR_CONVERGENT);
LLVMValueRef ac_get_i1_sgpr_mask(struct ac_llvm_context *ctx,
LLVMValueRef value)
{
+ const char *name = HAVE_LLVM >= 0x900 ? "llvm.amdgcn.icmp.i64.i1" : "llvm.amdgcn.icmp.i1";
LLVMValueRef args[3] = {
value,
ctx->i1false,
};
assert(HAVE_LLVM >= 0x0800);
- return ac_build_intrinsic(ctx, "llvm.amdgcn.icmp.i1", ctx->i64, args, 3,
+ return ac_build_intrinsic(ctx, name, ctx->i64, args, 3,
AC_FUNC_ATTR_NOUNWIND |
AC_FUNC_ATTR_READNONE |
AC_FUNC_ATTR_CONVERGENT);
{
LLVMValueRef vote_set = ac_build_ballot(ctx, value);
return LLVMBuildICmp(ctx->builder, LLVMIntNE, vote_set,
- LLVMConstInt(ctx->i64, 0, 0), "");
+ LLVMConstInt(ctx->iN_wavemask, 0, 0), "");
}
LLVMValueRef
vote_set, active_set, "");
LLVMValueRef none = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
vote_set,
- LLVMConstInt(ctx->i64, 0, 0), "");
+ LLVMConstInt(ctx->iN_wavemask, 0, 0), "");
return LLVMBuildOr(ctx->builder, all, none, "");
}
return ac_build_load_custom(ctx, base_ptr, index, true, true, false);
}
+static unsigned get_load_cache_policy(struct ac_llvm_context *ctx,
+ unsigned cache_policy)
+{
+ return cache_policy |
+ (ctx->chip_class >= GFX10 && cache_policy & ac_glc ? ac_dlc : 0);
+}
+
static void
ac_build_llvm7_buffer_store_common(struct ac_llvm_context *ctx,
LLVMValueRef rsrc,
LLVMValueRef vindex,
LLVMValueRef voffset,
unsigned num_channels,
- bool glc,
- bool slc,
- bool writeonly_memory,
+ unsigned cache_policy,
bool use_format)
{
LLVMValueRef args[] = {
LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
vindex ? vindex : ctx->i32_0,
voffset,
- LLVMConstInt(ctx->i1, glc, 0),
- LLVMConstInt(ctx->i1, slc, 0)
+ LLVMConstInt(ctx->i1, !!(cache_policy & ac_glc), 0),
+ LLVMConstInt(ctx->i1, !!(cache_policy & ac_slc), 0)
};
unsigned func = CLAMP(num_channels, 1, 3) - 1;
}
ac_build_intrinsic(ctx, name, ctx->voidt, args, ARRAY_SIZE(args),
- ac_get_store_intr_attribs(writeonly_memory));
+ AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY);
}
static void
LLVMValueRef soffset,
unsigned num_channels,
LLVMTypeRef return_channel_type,
- bool glc,
- bool slc,
- bool writeonly_memory,
+ unsigned cache_policy,
bool use_format,
bool structurized)
{
args[idx++] = vindex ? vindex : ctx->i32_0;
args[idx++] = voffset ? voffset : ctx->i32_0;
args[idx++] = soffset ? soffset : ctx->i32_0;
- args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0);
+ args[idx++] = LLVMConstInt(ctx->i32, cache_policy, 0);
unsigned func = !ac_has_vec3_support(ctx->chip_class, use_format) && num_channels == 3 ? 4 : num_channels;
const char *indexing_kind = structurized ? "struct" : "raw";
char name[256], type_name[8];
}
ac_build_intrinsic(ctx, name, ctx->voidt, args, idx,
- ac_get_store_intr_attribs(writeonly_memory));
+ AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY);
}
void
LLVMValueRef vindex,
LLVMValueRef voffset,
unsigned num_channels,
- bool glc,
- bool writeonly_memory)
+ unsigned cache_policy)
{
if (HAVE_LLVM >= 0x800) {
ac_build_llvm8_buffer_store_common(ctx, rsrc, data, vindex,
voffset, NULL, num_channels,
- ctx->f32, glc, false,
- writeonly_memory, true, true);
+ ctx->f32, cache_policy,
+ true, true);
} else {
ac_build_llvm7_buffer_store_common(ctx, rsrc, data, vindex, voffset,
- num_channels, glc, false,
- writeonly_memory, true);
+ num_channels, cache_policy,
+ true);
}
}
LLVMValueRef voffset,
LLVMValueRef soffset,
unsigned inst_offset,
- bool glc,
- bool slc,
- bool writeonly_memory,
+ unsigned cache_policy,
bool swizzle_enable_hint)
{
/* Split 3 channel stores, because only LLVM 9+ support 3-channel
v01 = ac_build_gather_values(ctx, v, 2);
ac_build_buffer_store_dword(ctx, rsrc, v01, 2, voffset,
- soffset, inst_offset, glc, slc,
- writeonly_memory, swizzle_enable_hint);
+ soffset, inst_offset, cache_policy,
+ swizzle_enable_hint);
ac_build_buffer_store_dword(ctx, rsrc, v[2], 1, voffset,
soffset, inst_offset + 8,
- glc, slc,
- writeonly_memory, swizzle_enable_hint);
+ cache_policy,
+ swizzle_enable_hint);
return;
}
voffset, offset,
num_channels,
ctx->f32,
- glc, slc,
- writeonly_memory,
+ cache_policy,
false, false);
} else {
if (voffset)
ac_build_llvm7_buffer_store_common(ctx, rsrc,
ac_to_float(ctx, vdata),
ctx->i32_0, offset,
- num_channels, glc, slc,
- writeonly_memory, false);
+ num_channels, cache_policy,
+ false);
}
return;
}
LLVMValueRef immoffset = LLVMConstInt(ctx->i32, inst_offset, 0);
ac_build_raw_tbuffer_store(ctx, rsrc, vdata, voffset, soffset,
- immoffset, num_channels, dfmt, nfmt, glc,
- slc, writeonly_memory);
+ immoffset, num_channels, dfmt, nfmt, cache_policy);
}
static LLVMValueRef
LLVMValueRef vindex,
LLVMValueRef voffset,
unsigned num_channels,
- bool glc,
- bool slc,
+ unsigned cache_policy,
bool can_speculate,
bool use_format)
{
LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
vindex ? vindex : ctx->i32_0,
voffset,
- LLVMConstInt(ctx->i1, glc, 0),
- LLVMConstInt(ctx->i1, slc, 0)
+ LLVMConstInt(ctx->i1, !!(cache_policy & ac_glc), 0),
+ LLVMConstInt(ctx->i1, !!(cache_policy & ac_slc), 0)
};
unsigned func = CLAMP(num_channels, 1, 3) - 1;
LLVMValueRef soffset,
unsigned num_channels,
LLVMTypeRef channel_type,
- bool glc,
- bool slc,
+ unsigned cache_policy,
bool can_speculate,
bool use_format,
bool structurized)
args[idx++] = vindex ? vindex : ctx->i32_0;
args[idx++] = voffset ? voffset : ctx->i32_0;
args[idx++] = soffset ? soffset : ctx->i32_0;
- args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0);
+ args[idx++] = LLVMConstInt(ctx->i32, get_load_cache_policy(ctx, cache_policy), 0);
unsigned func = !ac_has_vec3_support(ctx->chip_class, use_format) && num_channels == 3 ? 4 : num_channels;
const char *indexing_kind = structurized ? "struct" : "raw";
char name[256], type_name[8];
LLVMValueRef voffset,
LLVMValueRef soffset,
unsigned inst_offset,
- unsigned glc,
- unsigned slc,
+ unsigned cache_policy,
bool can_speculate,
bool allow_smem)
{
if (soffset)
offset = LLVMBuildAdd(ctx->builder, offset, soffset, "");
- if (allow_smem && !slc &&
- (!glc || (HAVE_LLVM >= 0x0800 && ctx->chip_class >= GFX8))) {
+ if (allow_smem && !(cache_policy & ac_slc) &&
+ (!(cache_policy & ac_glc) || (HAVE_LLVM >= 0x0800 && ctx->chip_class >= GFX8))) {
assert(vindex == NULL);
LLVMValueRef result[8];
LLVMValueRef args[3] = {
rsrc,
offset,
- glc ? ctx->i32_1 : ctx->i32_0,
+ LLVMConstInt(ctx->i32, get_load_cache_policy(ctx, cache_policy), 0),
};
result[i] = ac_build_intrinsic(ctx, intrname,
ctx->f32, args, num_args,
return ac_build_llvm8_buffer_load_common(ctx, rsrc, vindex,
offset, ctx->i32_0,
num_channels, ctx->f32,
- glc, slc,
+ cache_policy,
can_speculate, false,
false);
}
return ac_build_llvm7_buffer_load_common(ctx, rsrc, vindex, offset,
- num_channels, glc, slc,
+ num_channels, cache_policy,
can_speculate, false);
}
LLVMValueRef vindex,
LLVMValueRef voffset,
unsigned num_channels,
- bool glc,
+ unsigned cache_policy,
bool can_speculate)
{
if (HAVE_LLVM >= 0x800) {
return ac_build_llvm8_buffer_load_common(ctx, rsrc, vindex, voffset, ctx->i32_0,
num_channels, ctx->f32,
- glc, false,
- can_speculate, true, true);
+ cache_policy, can_speculate, true, true);
}
return ac_build_llvm7_buffer_load_common(ctx, rsrc, vindex, voffset,
- num_channels, glc, false,
+ num_channels, cache_policy,
can_speculate, true);
}
LLVMValueRef vindex,
LLVMValueRef voffset,
unsigned num_channels,
- bool glc,
+ unsigned cache_policy,
bool can_speculate)
{
if (HAVE_LLVM >= 0x800) {
return ac_build_llvm8_buffer_load_common(ctx, rsrc, vindex, voffset, ctx->i32_0,
num_channels, ctx->f32,
- glc, false,
- can_speculate, true, true);
+ cache_policy, can_speculate, true, true);
}
LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->builder, rsrc, LLVMConstInt(ctx->i32, 2, 0), "");
LLVMConstInt(ctx->i32, 2, 0), "");
return ac_build_llvm7_buffer_load_common(ctx, new_rsrc, vindex, voffset,
- num_channels, glc, false,
+ num_channels, cache_policy,
can_speculate, true);
}
+/// Translate a (dfmt, nfmt) pair into a chip-appropriate combined format
+/// value for LLVM8+ tbuffer intrinsics.
+static unsigned
+ac_get_tbuffer_format(struct ac_llvm_context *ctx,
+ unsigned dfmt, unsigned nfmt)
+{
+ if (ctx->chip_class >= GFX10) {
+ unsigned format;
+ switch (dfmt) {
+ default: unreachable("bad dfmt");
+ case V_008F0C_BUF_DATA_FORMAT_INVALID: format = V_008F0C_IMG_FORMAT_INVALID; break;
+ case V_008F0C_BUF_DATA_FORMAT_8: format = V_008F0C_IMG_FORMAT_8_UINT; break;
+ case V_008F0C_BUF_DATA_FORMAT_8_8: format = V_008F0C_IMG_FORMAT_8_8_UINT; break;
+ case V_008F0C_BUF_DATA_FORMAT_8_8_8_8: format = V_008F0C_IMG_FORMAT_8_8_8_8_UINT; break;
+ case V_008F0C_BUF_DATA_FORMAT_16: format = V_008F0C_IMG_FORMAT_16_UINT; break;
+ case V_008F0C_BUF_DATA_FORMAT_16_16: format = V_008F0C_IMG_FORMAT_16_16_UINT; break;
+ case V_008F0C_BUF_DATA_FORMAT_16_16_16_16: format = V_008F0C_IMG_FORMAT_16_16_16_16_UINT; break;
+ case V_008F0C_BUF_DATA_FORMAT_32: format = V_008F0C_IMG_FORMAT_32_UINT; break;
+ case V_008F0C_BUF_DATA_FORMAT_32_32: format = V_008F0C_IMG_FORMAT_32_32_UINT; break;
+ case V_008F0C_BUF_DATA_FORMAT_32_32_32: format = V_008F0C_IMG_FORMAT_32_32_32_UINT; break;
+ case V_008F0C_BUF_DATA_FORMAT_32_32_32_32: format = V_008F0C_IMG_FORMAT_32_32_32_32_UINT; break;
+ case V_008F0C_BUF_DATA_FORMAT_2_10_10_10: format = V_008F0C_IMG_FORMAT_2_10_10_10_UINT; break;
+ }
+
+ // Use the regularity properties of the combined format enum.
+ //
+ // Note: float is incompatible with 8-bit data formats,
+ // [us]{norm,scaled} are incomparible with 32-bit data formats.
+ // [us]scaled are not writable.
+ switch (nfmt) {
+ case V_008F0C_BUF_NUM_FORMAT_UNORM: format -= 4; break;
+ case V_008F0C_BUF_NUM_FORMAT_SNORM: format -= 3; break;
+ case V_008F0C_BUF_NUM_FORMAT_USCALED: format -= 2; break;
+ case V_008F0C_BUF_NUM_FORMAT_SSCALED: format -= 1; break;
+ default: unreachable("bad nfmt");
+ case V_008F0C_BUF_NUM_FORMAT_UINT: break;
+ case V_008F0C_BUF_NUM_FORMAT_SINT: format += 1; break;
+ case V_008F0C_BUF_NUM_FORMAT_FLOAT: format += 2; break;
+ }
+
+ return format;
+ } else {
+ return dfmt | (nfmt << 4);
+ }
+}
+
static LLVMValueRef
ac_build_llvm8_tbuffer_load(struct ac_llvm_context *ctx,
LLVMValueRef rsrc,
unsigned num_channels,
unsigned dfmt,
unsigned nfmt,
- bool glc,
- bool slc,
+ unsigned cache_policy,
bool can_speculate,
bool structurized)
{
args[idx++] = vindex ? vindex : ctx->i32_0;
args[idx++] = voffset ? voffset : ctx->i32_0;
args[idx++] = soffset ? soffset : ctx->i32_0;
- args[idx++] = LLVMConstInt(ctx->i32, dfmt | (nfmt << 4), 0);
- args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0);
+ args[idx++] = LLVMConstInt(ctx->i32, ac_get_tbuffer_format(ctx, dfmt, nfmt), 0);
+ args[idx++] = LLVMConstInt(ctx->i32, get_load_cache_policy(ctx, cache_policy), 0);
unsigned func = !ac_has_vec3_support(ctx->chip_class, true) && num_channels == 3 ? 4 : num_channels;
const char *indexing_kind = structurized ? "struct" : "raw";
char name[256], type_name[8];
unsigned num_channels,
unsigned dfmt,
unsigned nfmt,
- bool glc,
- bool slc,
+ unsigned cache_policy,
bool can_speculate,
bool structurized) /* only matters for LLVM 8+ */
{
return ac_build_llvm8_tbuffer_load(ctx, rsrc, vindex, voffset,
soffset, num_channels,
- dfmt, nfmt, glc, slc,
+ dfmt, nfmt, cache_policy,
can_speculate, structurized);
}
immoffset,
LLVMConstInt(ctx->i32, dfmt, false),
LLVMConstInt(ctx->i32, nfmt, false),
- LLVMConstInt(ctx->i1, glc, false),
- LLVMConstInt(ctx->i1, slc, false),
+ LLVMConstInt(ctx->i1, !!(cache_policy & ac_glc), false),
+ LLVMConstInt(ctx->i1, !!(cache_policy & ac_slc), false),
};
unsigned func = CLAMP(num_channels, 1, 3) - 1;
LLVMTypeRef types[] = {ctx->i32, ctx->v2i32, ctx->v4i32};
unsigned num_channels,
unsigned dfmt,
unsigned nfmt,
- bool glc,
- bool slc,
+ unsigned cache_policy,
bool can_speculate)
{
return ac_build_tbuffer_load(ctx, rsrc, vindex, voffset, soffset,
- immoffset, num_channels, dfmt, nfmt, glc,
- slc, can_speculate, true);
+ immoffset, num_channels, dfmt, nfmt,
+ cache_policy, can_speculate, true);
}
LLVMValueRef
unsigned num_channels,
unsigned dfmt,
unsigned nfmt,
- bool glc,
- bool slc,
+ unsigned cache_policy,
bool can_speculate)
{
return ac_build_tbuffer_load(ctx, rsrc, NULL, voffset, soffset,
- immoffset, num_channels, dfmt, nfmt, glc,
- slc, can_speculate, false);
+ immoffset, num_channels, dfmt, nfmt,
+ cache_policy, can_speculate, false);
}
LLVMValueRef
LLVMValueRef voffset,
LLVMValueRef soffset,
LLVMValueRef immoffset,
- bool glc)
+ unsigned cache_policy)
{
LLVMValueRef res;
/* LLVM 9+ supports i8/i16 with struct/raw intrinsics. */
res = ac_build_llvm8_buffer_load_common(ctx, rsrc, NULL,
voffset, soffset,
- 1, ctx->i16, glc, false,
+ 1, ctx->i16, cache_policy,
false, false, false);
} else {
unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_16;
unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
res = ac_build_raw_tbuffer_load(ctx, rsrc, voffset, soffset,
- immoffset, 1, dfmt, nfmt, glc, false,
+ immoffset, 1, dfmt, nfmt, cache_policy,
false);
res = LLVMBuildTrunc(ctx->builder, res, ctx->i16, "");
LLVMValueRef voffset,
LLVMValueRef soffset,
LLVMValueRef immoffset,
- bool glc)
+ unsigned cache_policy)
{
LLVMValueRef res;
/* LLVM 9+ supports i8/i16 with struct/raw intrinsics. */
res = ac_build_llvm8_buffer_load_common(ctx, rsrc, NULL,
voffset, soffset,
- 1, ctx->i8, glc, false,
+ 1, ctx->i8, cache_policy,
false, false, false);
} else {
unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_8;
unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
res = ac_build_raw_tbuffer_load(ctx, rsrc, voffset, soffset,
- immoffset, 1, dfmt, nfmt, glc, false,
+ immoffset, 1, dfmt, nfmt, cache_policy,
false);
res = LLVMBuildTrunc(ctx->builder, res, ctx->i8, "");
LLVMValueRef vindex,
LLVMValueRef voffset,
LLVMValueRef soffset,
- bool glc,
- bool slc,
+ unsigned cache_policy,
bool can_speculate)
{
LLVMValueRef tmp;
unsigned num_channels = 1 << (MAX2(load_log_size, 2) - 2);
loads[i] = ac_build_llvm8_buffer_load_common(
ctx, rsrc, vindex, voffset, tmp,
- num_channels, channel_type, glc, slc,
+ num_channels, channel_type, cache_policy,
can_speculate, false, true);
} else {
tmp = LLVMBuildAdd(ctx->builder, voffset, tmp, "");
loads[i] = ac_build_llvm7_buffer_load_common(
ctx, rsrc, vindex, tmp,
- 1 << (load_log_size - 2), glc, slc, can_speculate, false);
+ 1 << (load_log_size - 2), cache_policy, can_speculate, false);
}
if (load_log_size >= 2)
loads[i] = ac_to_integer(ctx, loads[i]);
unsigned num_channels,
unsigned dfmt,
unsigned nfmt,
- bool glc,
- bool slc,
- bool writeonly_memory,
+ unsigned cache_policy,
bool structurized)
{
LLVMValueRef args[7];
args[idx++] = vindex ? vindex : ctx->i32_0;
args[idx++] = voffset ? voffset : ctx->i32_0;
args[idx++] = soffset ? soffset : ctx->i32_0;
- args[idx++] = LLVMConstInt(ctx->i32, dfmt | (nfmt << 4), 0);
- args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0);
+ args[idx++] = LLVMConstInt(ctx->i32, ac_get_tbuffer_format(ctx, dfmt, nfmt), 0);
+ args[idx++] = LLVMConstInt(ctx->i32, cache_policy, 0);
unsigned func = !ac_has_vec3_support(ctx->chip_class, true) && num_channels == 3 ? 4 : num_channels;
const char *indexing_kind = structurized ? "struct" : "raw";
char name[256], type_name[8];
indexing_kind, type_name);
ac_build_intrinsic(ctx, name, ctx->voidt, args, idx,
- ac_get_store_intr_attribs(writeonly_memory));
+ AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY);
}
static void
unsigned num_channels,
unsigned dfmt,
unsigned nfmt,
- bool glc,
- bool slc,
- bool writeonly_memory,
+ unsigned cache_policy,
bool structurized) /* only matters for LLVM 8+ */
{
if (HAVE_LLVM >= 0x800) {
ac_build_llvm8_tbuffer_store(ctx, rsrc, vdata, vindex, voffset,
soffset, num_channels, dfmt, nfmt,
- glc, slc, writeonly_memory,
- structurized);
+ cache_policy, structurized);
} else {
LLVMValueRef params[] = {
vdata,
immoffset,
LLVMConstInt(ctx->i32, dfmt, false),
LLVMConstInt(ctx->i32, nfmt, false),
- LLVMConstInt(ctx->i1, glc, false),
- LLVMConstInt(ctx->i1, slc, false),
+ LLVMConstInt(ctx->i1, !!(cache_policy & ac_glc), false),
+ LLVMConstInt(ctx->i1, !!(cache_policy & ac_slc), false),
};
unsigned func = CLAMP(num_channels, 1, 3) - 1;
const char *type_names[] = {"i32", "v2i32", "v4i32"};
type_names[func]);
ac_build_intrinsic(ctx, name, ctx->voidt, params, 10,
- ac_get_store_intr_attribs(writeonly_memory));
+ AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY);
}
}
unsigned num_channels,
unsigned dfmt,
unsigned nfmt,
- bool glc,
- bool slc,
- bool writeonly_memory)
+ unsigned cache_policy)
{
ac_build_tbuffer_store(ctx, rsrc, vdata, vindex, voffset, soffset,
- immoffset, num_channels, dfmt, nfmt, glc, slc,
- writeonly_memory, true);
+ immoffset, num_channels, dfmt, nfmt, cache_policy,
+ true);
}
void
unsigned num_channels,
unsigned dfmt,
unsigned nfmt,
- bool glc,
- bool slc,
- bool writeonly_memory)
+ unsigned cache_policy)
{
ac_build_tbuffer_store(ctx, rsrc, vdata, NULL, voffset, soffset,
- immoffset, num_channels, dfmt, nfmt, glc, slc,
- writeonly_memory, false);
+ immoffset, num_channels, dfmt, nfmt, cache_policy,
+ false);
}
void
LLVMValueRef vdata,
LLVMValueRef voffset,
LLVMValueRef soffset,
- bool glc,
- bool writeonly_memory)
+ unsigned cache_policy)
{
vdata = LLVMBuildBitCast(ctx->builder, vdata, ctx->i16, "");
/* LLVM 9+ supports i8/i16 with struct/raw intrinsics. */
ac_build_llvm8_buffer_store_common(ctx, rsrc, vdata, NULL,
voffset, soffset, 1,
- ctx->i16, glc, false,
- writeonly_memory, false,
- false);
+ ctx->i16, cache_policy,
+ false, false);
} else {
unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_16;
unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
vdata = LLVMBuildZExt(ctx->builder, vdata, ctx->i32, "");
ac_build_raw_tbuffer_store(ctx, rsrc, vdata, voffset, soffset,
- ctx->i32_0, 1, dfmt, nfmt, glc, false,
- writeonly_memory);
+ ctx->i32_0, 1, dfmt, nfmt, cache_policy);
}
}
LLVMValueRef vdata,
LLVMValueRef voffset,
LLVMValueRef soffset,
- bool glc,
- bool writeonly_memory)
+ unsigned cache_policy)
{
vdata = LLVMBuildBitCast(ctx->builder, vdata, ctx->i8, "");
/* LLVM 9+ supports i8/i16 with struct/raw intrinsics. */
ac_build_llvm8_buffer_store_common(ctx, rsrc, vdata, NULL,
voffset, soffset, 1,
- ctx->i8, glc, false,
- writeonly_memory, false,
- false);
+ ctx->i8, cache_policy,
+ false, false);
} else {
unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_8;
unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
vdata = LLVMBuildZExt(ctx->builder, vdata, ctx->i32, "");
ac_build_raw_tbuffer_store(ctx, rsrc, vdata, voffset, soffset,
- ctx->i32_0, 1, dfmt, nfmt, glc, false,
- writeonly_memory);
+ ctx->i32_0, 1, dfmt, nfmt, cache_policy);
}
}
/**
"llvm.amdgcn.mbcnt.lo", ctx->i32,
tid_args, 2, AC_FUNC_ATTR_READNONE);
- tid = ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.hi",
- ctx->i32, tid_args,
- 2, AC_FUNC_ATTR_READNONE);
- set_range_metadata(ctx, tid, 0, 64);
+ if (ctx->wave_size == 32) {
+ tid = tid_args[1];
+ } else {
+ tid = ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.hi",
+ ctx->i32, tid_args,
+ 2, AC_FUNC_ATTR_READNONE);
+ }
+ set_range_metadata(ctx, tid, 0, ctx->wave_size);
return tid;
}
a->opcode == ac_image_get_lod;
bool atomic = a->opcode == ac_image_atomic ||
a->opcode == ac_image_atomic_cmpswap;
+ bool load = a->opcode == ac_image_sample ||
+ a->opcode == ac_image_gather4 ||
+ a->opcode == ac_image_load ||
+ a->opcode == ac_image_load_mip;
LLVMTypeRef coord_type = sample ? ctx->f32 : ctx->i32;
if (atomic || a->opcode == ac_image_store || a->opcode == ac_image_store_mip) {
}
args[num_args++] = ctx->i32_0; /* texfailctrl */
- args[num_args++] = LLVMConstInt(ctx->i32, a->cache_policy, false);
+ args[num_args++] = LLVMConstInt(ctx->i32,
+ load ? get_load_cache_policy(ctx, a->cache_policy) :
+ a->cache_policy, false);
const char *name;
const char *atomic_subop = "";
width,
};
- return ac_build_intrinsic(ctx,
- is_signed ? "llvm.amdgcn.sbfe.i32" :
- "llvm.amdgcn.ubfe.i32",
- ctx->i32, args, 3,
- AC_FUNC_ATTR_READNONE);
+ LLVMValueRef result = ac_build_intrinsic(ctx,
+ is_signed ? "llvm.amdgcn.sbfe.i32" :
+ "llvm.amdgcn.ubfe.i32",
+ ctx->i32, args, 3,
+ AC_FUNC_ATTR_READNONE);
+
+ if (HAVE_LLVM < 0x0800) {
+ /* FIXME: LLVM 7+ returns incorrect result when count is 0.
+ * https://bugs.freedesktop.org/show_bug.cgi?id=107276
+ */
+ LLVMValueRef zero = ctx->i32_0;
+ LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, width, zero, "");
+ result = LLVMBuildSelect(ctx->builder, icond, zero, result, "");
+ }
+
+ return result;
}
LLVMValueRef ac_build_imad(struct ac_llvm_context *ctx, LLVMValueRef s0,
LLVMBuildFMul(ctx->builder, s0, s1, ""), s2, "");
}
-void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned simm16)
+void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned wait_flags)
{
+ if (!wait_flags)
+ return;
+
+ unsigned lgkmcnt = 63;
+ unsigned vmcnt = ctx->chip_class >= GFX9 ? 63 : 15;
+ unsigned vscnt = 63;
+
+ if (wait_flags & AC_WAIT_LGKM)
+ lgkmcnt = 0;
+ if (wait_flags & AC_WAIT_VLOAD)
+ vmcnt = 0;
+
+ if (wait_flags & AC_WAIT_VSTORE) {
+ if (ctx->chip_class >= GFX10)
+ vscnt = 0;
+ else
+ vmcnt = 0;
+ }
+
+ /* There is no intrinsic for vscnt(0), so use a fence. */
+ if ((wait_flags & AC_WAIT_LGKM &&
+ wait_flags & AC_WAIT_VLOAD &&
+ wait_flags & AC_WAIT_VSTORE) ||
+ vscnt == 0) {
+ LLVMBuildFence(ctx->builder, LLVMAtomicOrderingRelease, false, "");
+ return;
+ }
+
+ unsigned simm16 = (lgkmcnt << 8) |
+ (7 << 4) | /* expcnt */
+ (vmcnt & 0xf) |
+ ((vmcnt >> 4) << 14);
+
LLVMValueRef args[1] = {
LLVMConstInt(ctx->i32, simm16, false),
};
static struct ac_llvm_flow *
get_current_flow(struct ac_llvm_context *ctx)
{
- if (ctx->flow_depth > 0)
- return &ctx->flow[ctx->flow_depth - 1];
+ if (ctx->flow->depth > 0)
+ return &ctx->flow->stack[ctx->flow->depth - 1];
return NULL;
}
static struct ac_llvm_flow *
get_innermost_loop(struct ac_llvm_context *ctx)
{
- for (unsigned i = ctx->flow_depth; i > 0; --i) {
- if (ctx->flow[i - 1].loop_entry_block)
- return &ctx->flow[i - 1];
+ for (unsigned i = ctx->flow->depth; i > 0; --i) {
+ if (ctx->flow->stack[i - 1].loop_entry_block)
+ return &ctx->flow->stack[i - 1];
}
return NULL;
}
{
struct ac_llvm_flow *flow;
- if (ctx->flow_depth >= ctx->flow_depth_max) {
- unsigned new_max = MAX2(ctx->flow_depth << 1,
+ if (ctx->flow->depth >= ctx->flow->depth_max) {
+ unsigned new_max = MAX2(ctx->flow->depth << 1,
AC_LLVM_INITIAL_CF_DEPTH);
- ctx->flow = realloc(ctx->flow, new_max * sizeof(*ctx->flow));
- ctx->flow_depth_max = new_max;
+ ctx->flow->stack = realloc(ctx->flow->stack, new_max * sizeof(*ctx->flow->stack));
+ ctx->flow->depth_max = new_max;
}
- flow = &ctx->flow[ctx->flow_depth];
- ctx->flow_depth++;
+ flow = &ctx->flow->stack[ctx->flow->depth];
+ ctx->flow->depth++;
flow->next_block = NULL;
flow->loop_entry_block = NULL;
static LLVMBasicBlockRef append_basic_block(struct ac_llvm_context *ctx,
const char *name)
{
- assert(ctx->flow_depth >= 1);
+ assert(ctx->flow->depth >= 1);
- if (ctx->flow_depth >= 2) {
- struct ac_llvm_flow *flow = &ctx->flow[ctx->flow_depth - 2];
+ if (ctx->flow->depth >= 2) {
+ struct ac_llvm_flow *flow = &ctx->flow->stack[ctx->flow->depth - 2];
return LLVMInsertBasicBlockInContext(ctx->context,
flow->next_block, name);
LLVMPositionBuilderAtEnd(ctx->builder, current_branch->next_block);
set_basicblock_name(current_branch->next_block, "endif", label_id);
- ctx->flow_depth--;
+ ctx->flow->depth--;
}
void ac_build_endloop(struct ac_llvm_context *ctx, int label_id)
LLVMPositionBuilderAtEnd(ctx->builder, current_loop->next_block);
set_basicblock_name(current_loop->next_block, "endloop", label_id);
- ctx->flow_depth--;
+ ctx->flow->depth--;
}
void ac_build_ifcc(struct ac_llvm_context *ctx, LLVMValueRef cond, int label_id)
LLVMConstInt(ctx->i32, i, 0), "");
}
}
+ if (LLVMGetTypeKind(src_type) == LLVMPointerTypeKind)
+ return LLVMBuildIntToPtr(ctx->builder, ret, src_type, "");
return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
}
LLVMValueRef
ac_build_writelane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef value, LLVMValueRef lane)
{
- /* TODO: Use the actual instruction when LLVM adds an intrinsic for it.
- */
+ if (HAVE_LLVM >= 0x0800) {
+ return ac_build_intrinsic(ctx, "llvm.amdgcn.writelane", ctx->i32,
+ (LLVMValueRef []) {value, lane, src}, 3,
+ AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
+ }
+
LLVMValueRef pred = LLVMBuildICmp(ctx->builder, LLVMIntEQ, lane,
ac_get_thread_id(ctx), "");
return LLVMBuildSelect(ctx->builder, pred, value, src, "");
LLVMValueRef
ac_build_mbcnt(struct ac_llvm_context *ctx, LLVMValueRef mask)
{
+ if (ctx->wave_size == 32) {
+ return ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.lo", ctx->i32,
+ (LLVMValueRef []) { mask, ctx->i32_0 },
+ 2, AC_FUNC_ATTR_READNONE);
+ }
LLVMValueRef mask_vec = LLVMBuildBitCast(ctx->builder, mask,
LLVMVectorType(ctx->i32, 2),
"");
return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
}
+static LLVMValueRef
+_ac_build_permlane16(struct ac_llvm_context *ctx, LLVMValueRef src, uint64_t sel,
+ bool exchange_rows, bool bound_ctrl)
+{
+ LLVMValueRef args[6] = {
+ src,
+ src,
+ LLVMConstInt(ctx->i32, sel, false),
+ LLVMConstInt(ctx->i32, sel >> 32, false),
+ ctx->i1true, /* fi */
+ bound_ctrl ? ctx->i1true : ctx->i1false,
+ };
+ return ac_build_intrinsic(ctx, exchange_rows ? "llvm.amdgcn.permlanex16"
+ : "llvm.amdgcn.permlane16",
+ ctx->i32, args, 6,
+ AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
+}
+
+static LLVMValueRef
+ac_build_permlane16(struct ac_llvm_context *ctx, LLVMValueRef src, uint64_t sel,
+ bool exchange_rows, bool bound_ctrl)
+{
+ LLVMTypeRef src_type = LLVMTypeOf(src);
+ src = ac_to_integer(ctx, src);
+ unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
+ LLVMValueRef ret;
+ if (bits == 32) {
+ ret = _ac_build_permlane16(ctx, src, sel, exchange_rows,
+ bound_ctrl);
+ } else {
+ assert(bits % 32 == 0);
+ LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32);
+ LLVMValueRef src_vector =
+ LLVMBuildBitCast(ctx->builder, src, vec_type, "");
+ ret = LLVMGetUndef(vec_type);
+ for (unsigned i = 0; i < bits / 32; i++) {
+ src = LLVMBuildExtractElement(ctx->builder, src_vector,
+ LLVMConstInt(ctx->i32, i,
+ 0), "");
+ LLVMValueRef ret_comp =
+ _ac_build_permlane16(ctx, src, sel,
+ exchange_rows,
+ bound_ctrl);
+ ret = LLVMBuildInsertElement(ctx->builder, ret,
+ ret_comp,
+ LLVMConstInt(ctx->i32, i,
+ 0), "");
+ }
+ }
+ return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
+}
+
static inline unsigned
ds_pattern_bitmode(unsigned and_mask, unsigned or_mask, unsigned xor_mask)
{
*/
static LLVMValueRef
ac_build_scan(struct ac_llvm_context *ctx, nir_op op, LLVMValueRef src, LLVMValueRef identity,
- unsigned maxprefix)
+ unsigned maxprefix, bool inclusive)
{
LLVMValueRef result, tmp;
- result = src;
+
+ if (ctx->chip_class >= GFX10) {
+ result = inclusive ? src : identity;
+ } else {
+ if (inclusive)
+ result = src;
+ else
+ result = ac_build_dpp(ctx, identity, src, dpp_wf_sr1, 0xf, 0xf, false);
+ }
if (maxprefix <= 1)
return result;
tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(1), 0xf, 0xf, false);
result = ac_build_alu_op(ctx, result, tmp, op);
if (maxprefix <= 16)
return result;
+
+ if (ctx->chip_class >= GFX10) {
+ /* dpp_row_bcast{15,31} are not supported on gfx10. */
+ LLVMBuilderRef builder = ctx->builder;
+ LLVMValueRef tid = ac_get_thread_id(ctx);
+ LLVMValueRef cc;
+ /* TODO-GFX10: Can we get better code-gen by putting this into
+ * a branch so that LLVM generates EXEC mask manipulations? */
+ if (inclusive)
+ tmp = result;
+ else
+ tmp = ac_build_alu_op(ctx, result, src, op);
+ tmp = ac_build_permlane16(ctx, tmp, ~(uint64_t)0, true, false);
+ tmp = ac_build_alu_op(ctx, result, tmp, op);
+ cc = LLVMBuildAnd(builder, tid, LLVMConstInt(ctx->i32, 16, false), "");
+ cc = LLVMBuildICmp(builder, LLVMIntNE, cc, ctx->i32_0, "");
+ result = LLVMBuildSelect(builder, cc, tmp, result, "");
+ if (maxprefix <= 32)
+ return result;
+
+ if (inclusive)
+ tmp = result;
+ else
+ tmp = ac_build_alu_op(ctx, result, src, op);
+ tmp = ac_build_readlane(ctx, tmp, LLVMConstInt(ctx->i32, 31, false));
+ tmp = ac_build_alu_op(ctx, result, tmp, op);
+ cc = LLVMBuildICmp(builder, LLVMIntUGE, tid,
+ LLVMConstInt(ctx->i32, 32, false), "");
+ result = LLVMBuildSelect(builder, cc, tmp, result, "");
+ return result;
+ }
+
tmp = ac_build_dpp(ctx, identity, result, dpp_row_bcast15, 0xa, 0xf, false);
result = ac_build_alu_op(ctx, result, tmp, op);
if (maxprefix <= 32)
get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src)));
result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity),
LLVMTypeOf(identity), "");
- result = ac_build_scan(ctx, op, result, identity, 64);
+ result = ac_build_scan(ctx, op, result, identity, ctx->wave_size, true);
return ac_build_wwm(ctx, result);
}
get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src)));
result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity),
LLVMTypeOf(identity), "");
- result = ac_build_dpp(ctx, identity, result, dpp_wf_sr1, 0xf, 0xf, false);
- result = ac_build_scan(ctx, op, result, identity, 64);
+ result = ac_build_scan(ctx, op, result, identity, ctx->wave_size, false);
return ac_build_wwm(ctx, result);
}
result = ac_build_alu_op(ctx, result, swap, op);
if (cluster_size == 16) return ac_build_wwm(ctx, result);
- if (ctx->chip_class >= GFX8 && cluster_size != 32)
+ if (ctx->chip_class >= GFX10)
+ swap = ac_build_permlane16(ctx, result, 0, true, false);
+ else if (ctx->chip_class >= GFX8 && cluster_size != 32)
swap = ac_build_dpp(ctx, identity, result, dpp_row_bcast15, 0xa, 0xf, false);
else
swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x10));
if (cluster_size == 32) return ac_build_wwm(ctx, result);
if (ctx->chip_class >= GFX8) {
- swap = ac_build_dpp(ctx, identity, result, dpp_row_bcast31, 0xc, 0xf, false);
+ if (ctx->chip_class >= GFX10)
+ swap = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 31, false));
+ else
+ swap = ac_build_dpp(ctx, identity, result, dpp_row_bcast31, 0xc, 0xf, false);
result = ac_build_alu_op(ctx, result, swap, op);
result = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 63, 0));
return ac_build_wwm(ctx, result);
if (ws->maxwaves <= 1)
return;
- const LLVMValueRef i32_63 = LLVMConstInt(ctx->i32, 63, false);
+ const LLVMValueRef last_lane = LLVMConstInt(ctx->i32, ctx->wave_size - 1, false);
LLVMBuilderRef builder = ctx->builder;
LLVMValueRef tid = ac_get_thread_id(ctx);
LLVMValueRef tmp;
- tmp = LLVMBuildICmp(builder, LLVMIntEQ, tid, i32_63, "");
+ tmp = LLVMBuildICmp(builder, LLVMIntEQ, tid, last_lane, "");
ac_build_ifcc(ctx, tmp, 1000);
LLVMBuildStore(builder, ws->src, LLVMBuildGEP(builder, ws->scratch, &ws->waveidx, 1, ""));
ac_build_endif(ctx, 1000);
ac_build_optimization_barrier(ctx, &tmp);
bbs[1] = LLVMGetInsertBlock(builder);
- phivalues_scan[1] = ac_build_scan(ctx, ws->op, tmp, identity, ws->maxwaves);
+ phivalues_scan[1] = ac_build_scan(ctx, ws->op, tmp, identity, ws->maxwaves, true);
}
ac_build_endif(ctx, 1001);
result = LLVMBuildNot(ctx->builder, result, "");
return LLVMBuildSExt(ctx->builder, result, ctx->i32, "");
}
+
+LLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMValueRef func,
+ LLVMValueRef *args, unsigned num_args)
+{
+ LLVMValueRef ret = LLVMBuildCall(ctx->builder, func, args, num_args, "");
+ LLVMSetInstructionCallConv(ret, LLVMGetFunctionCallConv(func));
+ return ret;
+}