ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
+ ctx->i8_0 = LLVMConstInt(ctx->i8, 0, false);
+ ctx->i8_1 = LLVMConstInt(ctx->i8, 1, false);
ctx->i16_0 = LLVMConstInt(ctx->i16, 0, false);
ctx->i16_1 = LLVMConstInt(ctx->i16, 1, false);
ctx->i32_0 = LLVMConstInt(ctx->i32, 0, false);
ctx->i32_1 = LLVMConstInt(ctx->i32, 1, false);
ctx->i64_0 = LLVMConstInt(ctx->i64, 0, false);
ctx->i64_1 = LLVMConstInt(ctx->i64, 1, false);
+ ctx->f16_0 = LLVMConstReal(ctx->f16, 0.0);
+ ctx->f16_1 = LLVMConstReal(ctx->f16, 1.0);
ctx->f32_0 = LLVMConstReal(ctx->f32, 0.0);
ctx->f32_1 = LLVMConstReal(ctx->f32, 1.0);
ctx->f64_0 = LLVMConstReal(ctx->f64, 0.0);
static LLVMTypeRef to_integer_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t)
{
- if (t == ctx->f16 || t == ctx->i16)
+ if (t == ctx->i8)
+ return ctx->i8;
+ else if (t == ctx->f16 || t == ctx->i16)
return ctx->i16;
else if (t == ctx->f32 || t == ctx->i32)
return ctx->i32;
static LLVMTypeRef to_float_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t)
{
- if (t == ctx->i16 || t == ctx->f16)
+ if (t == ctx->i8)
+ return ctx->i8;
+ else if (t == ctx->i16 || t == ctx->f16)
return ctx->f16;
else if (t == ctx->i32 || t == ctx->f32)
return ctx->f32;
* If we do (num * (1 / den)), LLVM does:
* return num * v_rcp_f32(den);
*/
- LLVMValueRef one = LLVMTypeOf(num) == ctx->f64 ? ctx->f64_1 : ctx->f32_1;
+ LLVMValueRef one = LLVMConstReal(LLVMTypeOf(num), 1.0);
LLVMValueRef rcp = LLVMBuildFDiv(ctx->builder, one, den, "");
LLVMValueRef ret = LLVMBuildFMul(ctx->builder, num, rcp, "");
ctx->f32, args, 5, AC_FUNC_ATTR_READNONE);
}
+LLVMValueRef
+ac_build_fs_interp_f16(struct ac_llvm_context *ctx,
+ LLVMValueRef llvm_chan,
+ LLVMValueRef attr_number,
+ LLVMValueRef params,
+ LLVMValueRef i,
+ LLVMValueRef j)
+{
+ LLVMValueRef args[6];
+ LLVMValueRef p1;
+
+ args[0] = i;
+ args[1] = llvm_chan;
+ args[2] = attr_number;
+ args[3] = ctx->i1false;
+ args[4] = params;
+
+ p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1.f16",
+ ctx->f32, args, 5, AC_FUNC_ATTR_READNONE);
+
+ args[0] = p1;
+ args[1] = j;
+ args[2] = llvm_chan;
+ args[3] = attr_number;
+ args[4] = ctx->i1false;
+ args[5] = params;
+
+ return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2.f16",
+ ctx->f16, args, 6, AC_FUNC_ATTR_READNONE);
+}
+
LLVMValueRef
ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
LLVMValueRef parameter,
return ac_build_load_custom(ctx, base_ptr, index, true, true, false);
}
+static void
+ac_build_buffer_store_common(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef data,
+ LLVMValueRef vindex,
+ LLVMValueRef voffset,
+ unsigned num_channels,
+ bool glc,
+ bool slc,
+ bool writeonly_memory,
+ bool use_format)
+{
+ LLVMValueRef args[] = {
+ data,
+ LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
+ vindex ? vindex : ctx->i32_0,
+ voffset,
+ LLVMConstInt(ctx->i1, glc, 0),
+ LLVMConstInt(ctx->i1, slc, 0)
+ };
+ unsigned func = CLAMP(num_channels, 1, 3) - 1;
+
+ const char *type_names[] = {"f32", "v2f32", "v4f32"};
+ char name[256];
+
+ if (use_format) {
+ snprintf(name, sizeof(name), "llvm.amdgcn.buffer.store.format.%s",
+ type_names[func]);
+ } else {
+ snprintf(name, sizeof(name), "llvm.amdgcn.buffer.store.%s",
+ type_names[func]);
+ }
+
+ ac_build_intrinsic(ctx, name, ctx->voidt, args, ARRAY_SIZE(args),
+ ac_get_store_intr_attribs(writeonly_memory));
+}
+
+static void
+ac_build_llvm8_buffer_store_common(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef data,
+ LLVMValueRef vindex,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ unsigned num_channels,
+ bool glc,
+ bool slc,
+ bool writeonly_memory,
+ bool use_format,
+ bool structurized)
+{
+ LLVMValueRef args[6];
+ int idx = 0;
+ args[idx++] = data;
+ args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
+ if (structurized)
+ args[idx++] = vindex ? vindex : ctx->i32_0;
+ args[idx++] = voffset ? voffset : ctx->i32_0;
+ args[idx++] = soffset ? soffset : ctx->i32_0;
+ args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0);
+ unsigned func = CLAMP(num_channels, 1, 3) - 1;
+
+ const char *type_names[] = {"f32", "v2f32", "v4f32"};
+ const char *indexing_kind = structurized ? "struct" : "raw";
+ char name[256];
+
+ if (use_format) {
+ snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.store.format.%s",
+ indexing_kind, type_names[func]);
+ } else {
+ snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.store.%s",
+ indexing_kind, type_names[func]);
+ }
+
+ ac_build_intrinsic(ctx, name, ctx->voidt, args, idx,
+ ac_get_store_intr_attribs(writeonly_memory));
+}
+
+void
+ac_build_buffer_store_format(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef data,
+ LLVMValueRef vindex,
+ LLVMValueRef voffset,
+ unsigned num_channels,
+ bool glc,
+ bool writeonly_memory)
+{
+ if (HAVE_LLVM >= 0x800) {
+ ac_build_llvm8_buffer_store_common(ctx, rsrc, data, vindex,
+ voffset, NULL, num_channels,
+ glc, false, writeonly_memory,
+ true, true);
+ } else {
+ ac_build_buffer_store_common(ctx, rsrc, data, vindex, voffset,
+ num_channels, glc, false,
+ writeonly_memory, true);
+ }
+}
+
/* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4.
* The type of vdata must be one of i32 (num_channels=1), v2i32 (num_channels=2),
* or v4i32 (num_channels=3,4).
if (!swizzle_enable_hint) {
LLVMValueRef offset = soffset;
- static const char *types[] = {"f32", "v2f32", "v4f32"};
-
if (inst_offset)
offset = LLVMBuildAdd(ctx->builder, offset,
LLVMConstInt(ctx->i32, inst_offset, 0), "");
- if (voffset)
- offset = LLVMBuildAdd(ctx->builder, offset, voffset, "");
-
- LLVMValueRef args[] = {
- ac_to_float(ctx, vdata),
- LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
- ctx->i32_0,
- offset,
- LLVMConstInt(ctx->i1, glc, 0),
- LLVMConstInt(ctx->i1, slc, 0),
- };
- char name[256];
- snprintf(name, sizeof(name), "llvm.amdgcn.buffer.store.%s",
- types[CLAMP(num_channels, 1, 3) - 1]);
-
- ac_build_intrinsic(ctx, name, ctx->voidt,
- args, ARRAY_SIZE(args),
- writeonly_memory ?
- AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY :
- AC_FUNC_ATTR_WRITEONLY);
+ if (HAVE_LLVM >= 0x800) {
+ ac_build_llvm8_buffer_store_common(ctx, rsrc,
+ ac_to_float(ctx, vdata),
+ ctx->i32_0,
+ voffset, offset,
+ num_channels,
+ glc, slc,
+ writeonly_memory,
+ false, false);
+ } else {
+ if (voffset)
+ offset = LLVMBuildAdd(ctx->builder, offset, voffset, "");
+
+ ac_build_buffer_store_common(ctx, rsrc,
+ ac_to_float(ctx, vdata),
+ ctx->i32_0, offset,
+ num_channels, glc, slc,
+ writeonly_memory, false);
+ }
return;
}
- static const unsigned dfmt[] = {
+ static const unsigned dfmts[] = {
V_008F0C_BUF_DATA_FORMAT_32,
V_008F0C_BUF_DATA_FORMAT_32_32,
V_008F0C_BUF_DATA_FORMAT_32_32_32,
V_008F0C_BUF_DATA_FORMAT_32_32_32_32
};
- static const char *types[] = {"i32", "v2i32", "v4i32"};
- LLVMValueRef args[] = {
- vdata,
- LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
- ctx->i32_0,
- voffset ? voffset : ctx->i32_0,
- soffset,
- LLVMConstInt(ctx->i32, inst_offset, 0),
- LLVMConstInt(ctx->i32, dfmt[num_channels - 1], 0),
- LLVMConstInt(ctx->i32, V_008F0C_BUF_NUM_FORMAT_UINT, 0),
- LLVMConstInt(ctx->i1, glc, 0),
- LLVMConstInt(ctx->i1, slc, 0),
- };
- char name[256];
- snprintf(name, sizeof(name), "llvm.amdgcn.tbuffer.store.%s",
- types[CLAMP(num_channels, 1, 3) - 1]);
+ unsigned dfmt = dfmts[num_channels - 1];
+ unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
+ LLVMValueRef immoffset = LLVMConstInt(ctx->i32, inst_offset, 0);
- ac_build_intrinsic(ctx, name, ctx->voidt,
- args, ARRAY_SIZE(args),
- writeonly_memory ?
- AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY :
- AC_FUNC_ATTR_WRITEONLY);
+ ac_build_raw_tbuffer_store(ctx, rsrc, vdata, voffset, soffset,
+ immoffset, num_channels, dfmt, nfmt, glc,
+ slc, writeonly_memory);
}
static LLVMValueRef
return ac_build_gather_values(ctx, result, num_channels);
}
+ if (HAVE_LLVM >= 0x0800) {
+ return ac_build_llvm8_buffer_load_common(ctx, rsrc, vindex,
+ offset, ctx->i32_0,
+ num_channels, glc, slc,
+ can_speculate, false,
+ false);
+ }
+
return ac_build_buffer_load_common(ctx, rsrc, vindex, offset,
num_channels, glc, slc,
can_speculate, false);
can_speculate, true);
}
-LLVMValueRef
-ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
+static LLVMValueRef
+ac_build_llvm8_tbuffer_load(struct ac_llvm_context *ctx,
LLVMValueRef rsrc,
LLVMValueRef vindex,
LLVMValueRef voffset,
- LLVMValueRef soffset,
- LLVMValueRef immoffset,
- LLVMValueRef glc)
+ LLVMValueRef soffset,
+ unsigned num_channels,
+ unsigned dfmt,
+ unsigned nfmt,
+ bool glc,
+ bool slc,
+ bool can_speculate,
+ bool structurized)
{
- unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_16;
- unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
- LLVMValueRef res;
+ LLVMValueRef args[6];
+ int idx = 0;
+ args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
+ if (structurized)
+ args[idx++] = vindex ? vindex : ctx->i32_0;
+ args[idx++] = voffset ? voffset : ctx->i32_0;
+ args[idx++] = soffset ? soffset : ctx->i32_0;
+ args[idx++] = LLVMConstInt(ctx->i32, dfmt | (nfmt << 4), 0);
+ args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0);
+ unsigned func = CLAMP(num_channels, 1, 3) - 1;
- if (HAVE_LLVM >= 0x0800) {
- voffset = LLVMBuildAdd(ctx->builder, voffset, immoffset, "");
+ LLVMTypeRef types[] = {ctx->i32, ctx->v2i32, ctx->v4i32};
+ const char *type_names[] = {"i32", "v2i32", "v4i32"};
+ const char *indexing_kind = structurized ? "struct" : "raw";
+ char name[256];
- res = ac_build_llvm8_tbuffer_load(ctx, rsrc, vindex, voffset,
- soffset, 1, dfmt, nfmt, glc,
- false, true, true);
- } else {
- const char *name = "llvm.amdgcn.tbuffer.load.i32";
- LLVMTypeRef type = ctx->i32;
- LLVMValueRef params[] = {
- rsrc,
- vindex,
- voffset,
- soffset,
- immoffset,
- LLVMConstInt(ctx->i32, dfmt, false),
- LLVMConstInt(ctx->i32, nfmt, false),
- glc,
- ctx->i1false,
- };
- res = ac_build_intrinsic(ctx, name, type, params, 9, 0);
- }
+ snprintf(name, sizeof(name), "llvm.amdgcn.%s.tbuffer.load.%s",
+ indexing_kind, type_names[func]);
- return LLVMBuildTrunc(ctx->builder, res, ctx->i16, "");
+ return ac_build_intrinsic(ctx, name, types[func], args,
+ idx,
+ ac_get_load_intr_attribs(can_speculate));
}
-LLVMValueRef
-ac_build_llvm8_tbuffer_load(struct ac_llvm_context *ctx,
+static LLVMValueRef
+ac_build_tbuffer_load(struct ac_llvm_context *ctx,
LLVMValueRef rsrc,
LLVMValueRef vindex,
LLVMValueRef voffset,
LLVMValueRef soffset,
+ LLVMValueRef immoffset,
unsigned num_channels,
unsigned dfmt,
unsigned nfmt,
bool glc,
bool slc,
bool can_speculate,
- bool structurized)
+ bool structurized) /* only matters for LLVM 8+ */
{
- LLVMValueRef args[6];
+ if (HAVE_LLVM >= 0x800) {
+ voffset = LLVMBuildAdd(ctx->builder, voffset, immoffset, "");
+
+ return ac_build_llvm8_tbuffer_load(ctx, rsrc, vindex, voffset,
+ soffset, num_channels,
+ dfmt, nfmt, glc, slc,
+ can_speculate, structurized);
+ }
+
+ LLVMValueRef args[] = {
+ rsrc,
+ vindex ? vindex : ctx->i32_0,
+ voffset,
+ soffset,
+ immoffset,
+ LLVMConstInt(ctx->i32, dfmt, false),
+ LLVMConstInt(ctx->i32, nfmt, false),
+ LLVMConstInt(ctx->i1, glc, false),
+ LLVMConstInt(ctx->i1, slc, false),
+ };
+ unsigned func = CLAMP(num_channels, 1, 3) - 1;
+ LLVMTypeRef types[] = {ctx->i32, ctx->v2i32, ctx->v4i32};
+ const char *type_names[] = {"i32", "v2i32", "v4i32"};
+ char name[256];
+
+ snprintf(name, sizeof(name), "llvm.amdgcn.tbuffer.load.%s",
+ type_names[func]);
+
+ return ac_build_intrinsic(ctx, name, types[func], args, 9,
+ ac_get_load_intr_attribs(can_speculate));
+}
+
+LLVMValueRef
+ac_build_struct_tbuffer_load(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vindex,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ LLVMValueRef immoffset,
+ unsigned num_channels,
+ unsigned dfmt,
+ unsigned nfmt,
+ bool glc,
+ bool slc,
+ bool can_speculate)
+{
+ return ac_build_tbuffer_load(ctx, rsrc, vindex, voffset, soffset,
+ immoffset, num_channels, dfmt, nfmt, glc,
+ slc, can_speculate, true);
+}
+
+LLVMValueRef
+ac_build_raw_tbuffer_load(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ LLVMValueRef immoffset,
+ unsigned num_channels,
+ unsigned dfmt,
+ unsigned nfmt,
+ bool glc,
+ bool slc,
+ bool can_speculate)
+{
+ return ac_build_tbuffer_load(ctx, rsrc, NULL, voffset, soffset,
+ immoffset, num_channels, dfmt, nfmt, glc,
+ slc, can_speculate, false);
+}
+
+LLVMValueRef
+ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ LLVMValueRef immoffset,
+ bool glc)
+{
+ unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_16;
+ unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
+ LLVMValueRef res;
+
+ res = ac_build_raw_tbuffer_load(ctx, rsrc, voffset, soffset,
+ immoffset, 1, dfmt, nfmt, glc, false,
+ false);
+
+ return LLVMBuildTrunc(ctx->builder, res, ctx->i16, "");
+}
+
+LLVMValueRef
+ac_build_tbuffer_load_byte(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ LLVMValueRef immoffset,
+ bool glc)
+{
+ unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_8;
+ unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
+ LLVMValueRef res;
+
+ res = ac_build_raw_tbuffer_load(ctx, rsrc, voffset, soffset,
+ immoffset, 1, dfmt, nfmt, glc, false,
+ false);
+
+ return LLVMBuildTrunc(ctx->builder, res, ctx->i8, "");
+}
+static void
+ac_build_llvm8_tbuffer_store(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vdata,
+ LLVMValueRef vindex,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ unsigned num_channels,
+ unsigned dfmt,
+ unsigned nfmt,
+ bool glc,
+ bool slc,
+ bool writeonly_memory,
+ bool structurized)
+{
+ LLVMValueRef args[7];
int idx = 0;
+ args[idx++] = vdata;
args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
if (structurized)
args[idx++] = vindex ? vindex : ctx->i32_0;
args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0);
unsigned func = CLAMP(num_channels, 1, 3) - 1;
- LLVMTypeRef types[] = {ctx->i32, ctx->v2i32, ctx->v4i32};
const char *type_names[] = {"i32", "v2i32", "v4i32"};
const char *indexing_kind = structurized ? "struct" : "raw";
char name[256];
- snprintf(name, sizeof(name), "llvm.amdgcn.%s.tbuffer.load.%s",
+ snprintf(name, sizeof(name), "llvm.amdgcn.%s.tbuffer.store.%s",
indexing_kind, type_names[func]);
- return ac_build_intrinsic(ctx, name, types[func], args,
- idx,
- ac_get_load_intr_attribs(can_speculate));
+ ac_build_intrinsic(ctx, name, ctx->voidt, args, idx,
+ ac_get_store_intr_attribs(writeonly_memory));
+}
+
+static void
+ac_build_tbuffer_store(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vdata,
+ LLVMValueRef vindex,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ LLVMValueRef immoffset,
+ unsigned num_channels,
+ unsigned dfmt,
+ unsigned nfmt,
+ bool glc,
+ bool slc,
+ bool writeonly_memory,
+ bool structurized) /* only matters for LLVM 8+ */
+{
+ if (HAVE_LLVM >= 0x800) {
+ voffset = LLVMBuildAdd(ctx->builder,
+ voffset ? voffset : ctx->i32_0,
+ immoffset, "");
+
+ ac_build_llvm8_tbuffer_store(ctx, rsrc, vdata, vindex, voffset,
+ soffset, num_channels, dfmt, nfmt,
+ glc, slc, writeonly_memory,
+ structurized);
+ } else {
+ LLVMValueRef params[] = {
+ vdata,
+ rsrc,
+ vindex ? vindex : ctx->i32_0,
+ voffset ? voffset : ctx->i32_0,
+ soffset ? soffset : ctx->i32_0,
+ immoffset,
+ LLVMConstInt(ctx->i32, dfmt, false),
+ LLVMConstInt(ctx->i32, nfmt, false),
+ LLVMConstInt(ctx->i1, glc, false),
+ LLVMConstInt(ctx->i1, slc, false),
+ };
+ unsigned func = CLAMP(num_channels, 1, 3) - 1;
+ const char *type_names[] = {"i32", "v2i32", "v4i32"};
+ char name[256];
+
+ snprintf(name, sizeof(name), "llvm.amdgcn.tbuffer.store.%s",
+ type_names[func]);
+
+ ac_build_intrinsic(ctx, name, ctx->voidt, params, 10,
+ ac_get_store_intr_attribs(writeonly_memory));
+ }
+}
+
+void
+ac_build_struct_tbuffer_store(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vdata,
+ LLVMValueRef vindex,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ LLVMValueRef immoffset,
+ unsigned num_channels,
+ unsigned dfmt,
+ unsigned nfmt,
+ bool glc,
+ bool slc,
+ bool writeonly_memory)
+{
+ ac_build_tbuffer_store(ctx, rsrc, vdata, vindex, voffset, soffset,
+ immoffset, num_channels, dfmt, nfmt, glc, slc,
+ writeonly_memory, true);
+}
+
+void
+ac_build_raw_tbuffer_store(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vdata,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ LLVMValueRef immoffset,
+ unsigned num_channels,
+ unsigned dfmt,
+ unsigned nfmt,
+ bool glc,
+ bool slc,
+ bool writeonly_memory)
+{
+ ac_build_tbuffer_store(ctx, rsrc, vdata, NULL, voffset, soffset,
+ immoffset, num_channels, dfmt, nfmt, glc, slc,
+ writeonly_memory, false);
+}
+
+void
+ac_build_tbuffer_store_short(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vdata,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ bool glc,
+ bool writeonly_memory)
+{
+ unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_16;
+ unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
+
+ vdata = LLVMBuildBitCast(ctx->builder, vdata, ctx->i16, "");
+ vdata = LLVMBuildZExt(ctx->builder, vdata, ctx->i32, "");
+
+ ac_build_raw_tbuffer_store(ctx, rsrc, vdata, voffset, soffset,
+ ctx->i32_0, 1, dfmt, nfmt, glc, false,
+ writeonly_memory);
}
+void
+ac_build_tbuffer_store_byte(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vdata,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ bool glc,
+ bool writeonly_memory)
+{
+ unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_8;
+ unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
+
+ vdata = LLVMBuildBitCast(ctx->builder, vdata, ctx->i8, "");
+ vdata = LLVMBuildZExt(ctx->builder, vdata, ctx->i32, "");
+
+ ac_build_raw_tbuffer_store(ctx, rsrc, vdata, voffset, soffset,
+ ctx->i32_0, 1, dfmt, nfmt, glc, false,
+ writeonly_memory);
+}
/**
* Set range metadata on an instruction. This can only be used on load and
* call instructions. If you know an instruction can only produce the values
LLVMValueRef val)
{
unsigned tl_lanes[4], trbl_lanes[4];
+ char name[32], type[8];
LLVMValueRef tl, trbl;
+ LLVMTypeRef result_type;
LLVMValueRef result;
+ result_type = ac_to_float_type(ctx, LLVMTypeOf(val));
+
+ if (result_type == ctx->f16)
+ val = LLVMBuildZExt(ctx->builder, val, ctx->i32, "");
+
for (unsigned i = 0; i < 4; ++i) {
tl_lanes[i] = i & mask;
trbl_lanes[i] = (i & mask) + idx;
trbl_lanes[0], trbl_lanes[1],
trbl_lanes[2], trbl_lanes[3]);
- tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
- trbl = LLVMBuildBitCast(ctx->builder, trbl, ctx->f32, "");
+ if (result_type == ctx->f16) {
+ tl = LLVMBuildTrunc(ctx->builder, tl, ctx->i16, "");
+ trbl = LLVMBuildTrunc(ctx->builder, trbl, ctx->i16, "");
+ }
+
+ tl = LLVMBuildBitCast(ctx->builder, tl, result_type, "");
+ trbl = LLVMBuildBitCast(ctx->builder, trbl, result_type, "");
result = LLVMBuildFSub(ctx->builder, trbl, tl, "");
- result = ac_build_intrinsic(ctx, "llvm.amdgcn.wqm.f32", ctx->f32,
- &result, 1, 0);
+ ac_build_type_name_for_intr(result_type, type, sizeof(type));
+ snprintf(name, sizeof(name), "llvm.amdgcn.wqm.%s", type);
- return result;
+ return ac_build_intrinsic(ctx, name, result_type, &result, 1, 0);
}
void
highest_bit = LLVMConstInt(ctx->i16, 15, false);
zero = ctx->i16_0;
break;
+ case 8:
+ intrin_name = "llvm.ctlz.i8";
+ type = ctx->i8;
+ highest_bit = LLVMConstInt(ctx->i8, 7, false);
+ zero = ctx->i8_0;
+ break;
default:
unreachable(!"invalid bitsize");
break;
/* The HW returns the last bit index from MSB, but TGSI/NIR wants
* the index from LSB. Invert it by doing "31 - msb". */
msb = LLVMBuildSub(ctx->builder, highest_bit, msb, "");
- msb = LLVMBuildTruncOrBitCast(ctx->builder, msb, ctx->i32, "");
+
+ if (bitsize == 64) {
+ msb = LLVMBuildTrunc(ctx->builder, msb, ctx->i32, "");
+ } else if (bitsize < 32) {
+ msb = LLVMBuildSExt(ctx->builder, msb, ctx->i32, "");
+ }
/* check for zero */
return LLVMBuildSelect(ctx->builder,
return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
}
+LLVMValueRef ac_build_umax(struct ac_llvm_context *ctx, LLVMValueRef a,
+ LLVMValueRef b)
+{
+ LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntUGE, a, b, "");
+ return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
+}
+
LLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx, LLVMValueRef value)
{
LLVMTypeRef t = LLVMTypeOf(value);
ctx->voidt, args, 1, 0);
}
+LLVMValueRef ac_build_fmed3(struct ac_llvm_context *ctx, LLVMValueRef src0,
+ LLVMValueRef src1, LLVMValueRef src2,
+ unsigned bitsize)
+{
+ LLVMTypeRef type;
+ char *intr;
+
+ if (bitsize == 16) {
+ intr = "llvm.amdgcn.fmed3.f16";
+ type = ctx->f16;
+ } else if (bitsize == 32) {
+ intr = "llvm.amdgcn.fmed3.f32";
+ type = ctx->f32;
+ } else {
+ intr = "llvm.amdgcn.fmed3.f64";
+ type = ctx->f64;
+ }
+
+ LLVMValueRef params[] = {
+ src0,
+ src1,
+ src2,
+ };
+ return ac_build_intrinsic(ctx, intr, type, params, 3,
+ AC_FUNC_ATTR_READNONE);
+}
+
LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0,
unsigned bitsize)
{
LLVMTypeRef type;
char *intr;
- if (bitsize == 32) {
- intr = "llvm.floor.f32";
+ if (bitsize == 16) {
+ intr = "llvm.amdgcn.fract.f16";
+ type = ctx->f16;
+ } else if (bitsize == 32) {
+ intr = "llvm.amdgcn.fract.f32";
type = ctx->f32;
} else {
- intr = "llvm.floor.f64";
+ intr = "llvm.amdgcn.fract.f64";
type = ctx->f64;
}
LLVMValueRef params[] = {
src0,
};
- LLVMValueRef floor = ac_build_intrinsic(ctx, intr, type, params, 1,
- AC_FUNC_ATTR_READNONE);
- return LLVMBuildFSub(ctx->builder, src0, floor, "");
+ return ac_build_intrinsic(ctx, intr, type, params, 1,
+ AC_FUNC_ATTR_READNONE);
}
LLVMValueRef ac_build_isign(struct ac_llvm_context *ctx, LLVMValueRef src0,
LLVMValueRef cmp, val, zero, one;
LLVMTypeRef type;
- if (bitsize == 32) {
+ if (bitsize == 16) {
+ type = ctx->f16;
+ zero = ctx->f16_0;
+ one = ctx->f16_1;
+ } else if (bitsize == 32) {
type = ctx->f32;
zero = ctx->f32_0;
one = ctx->f32_1;
result = ac_build_intrinsic(ctx, "llvm.ctpop.i16", ctx->i16,
(LLVMValueRef []) { src0 }, 1,
AC_FUNC_ATTR_READNONE);
+
+ result = LLVMBuildZExt(ctx->builder, result, ctx->i32, "");
+ break;
+ case 8:
+ result = ac_build_intrinsic(ctx, "llvm.ctpop.i8", ctx->i8,
+ (LLVMValueRef []) { src0 }, 1,
+ AC_FUNC_ATTR_READNONE);
+
+ result = LLVMBuildZExt(ctx->builder, result, ctx->i32, "");
break;
default:
unreachable(!"invalid bitsize");
bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0));
switch (bitsize) {
+ case 64:
+ result = ac_build_intrinsic(ctx, "llvm.bitreverse.i64", ctx->i64,
+ (LLVMValueRef []) { src0 }, 1,
+ AC_FUNC_ATTR_READNONE);
+
+ result = LLVMBuildTrunc(ctx->builder, result, ctx->i32, "");
+ break;
case 32:
result = ac_build_intrinsic(ctx, "llvm.bitreverse.i32", ctx->i32,
(LLVMValueRef []) { src0 }, 1,
result = ac_build_intrinsic(ctx, "llvm.bitreverse.i16", ctx->i16,
(LLVMValueRef []) { src0 }, 1,
AC_FUNC_ATTR_READNONE);
+
+ result = LLVMBuildZExt(ctx->builder, result, ctx->i32, "");
+ break;
+ case 8:
+ result = ac_build_intrinsic(ctx, "llvm.bitreverse.i8", ctx->i8,
+ (LLVMValueRef []) { src0 }, 1,
+ AC_FUNC_ATTR_READNONE);
+
+ result = LLVMBuildZExt(ctx->builder, result, ctx->i32, "");
break;
default:
unreachable(!"invalid bitsize");
type = ctx->i16;
zero = ctx->i16_0;
break;
+ case 8:
+ intrin_name = "llvm.cttz.i8";
+ type = ctx->i8;
+ zero = ctx->i8_0;
+ break;
default:
unreachable(!"invalid bitsize");
}
if (src0_bitsize == 64) {
lsb = LLVMBuildTrunc(ctx->builder, lsb, ctx->i32, "");
+ } else if (src0_bitsize < 32) {
+ lsb = LLVMBuildSExt(ctx->builder, lsb, ctx->i32, "");
}
/* TODO: We need an intrinsic to skip this conditional. */
AC_FUNC_ATTR_READNONE |
AC_FUNC_ATTR_CONVERGENT);
}
+
+LLVMValueRef
+ac_build_frexp_exp(struct ac_llvm_context *ctx, LLVMValueRef src0,
+ unsigned bitsize)
+{
+ LLVMTypeRef type;
+ char *intr;
+
+ if (bitsize == 16) {
+ intr = "llvm.amdgcn.frexp.exp.i16.f16";
+ type = ctx->i16;
+ } else if (bitsize == 32) {
+ intr = "llvm.amdgcn.frexp.exp.i32.f32";
+ type = ctx->i32;
+ } else {
+ intr = "llvm.amdgcn.frexp.exp.i32.f64";
+ type = ctx->i32;
+ }
+
+ LLVMValueRef params[] = {
+ src0,
+ };
+ return ac_build_intrinsic(ctx, intr, type, params, 1,
+ AC_FUNC_ATTR_READNONE);
+}
+LLVMValueRef
+ac_build_frexp_mant(struct ac_llvm_context *ctx, LLVMValueRef src0,
+ unsigned bitsize)
+{
+ LLVMTypeRef type;
+ char *intr;
+
+ if (bitsize == 16) {
+ intr = "llvm.amdgcn.frexp.mant.f16";
+ type = ctx->f16;
+ } else if (bitsize == 32) {
+ intr = "llvm.amdgcn.frexp.mant.f32";
+ type = ctx->f32;
+ } else {
+ intr = "llvm.amdgcn.frexp.mant.f64";
+ type = ctx->f64;
+ }
+
+ LLVMValueRef params[] = {
+ src0,
+ };
+ return ac_build_intrinsic(ctx, intr, type, params, 1,
+ AC_FUNC_ATTR_READNONE);
+}
+
+/*
+ * this takes an I,J coordinate pair,
+ * and works out the X and Y derivatives.
+ * it returns DDX(I), DDX(J), DDY(I), DDY(J).
+ */
+LLVMValueRef
+ac_build_ddxy_interp(struct ac_llvm_context *ctx, LLVMValueRef interp_ij)
+{
+ LLVMValueRef result[4], a;
+ unsigned i;
+
+ for (i = 0; i < 2; i++) {
+ a = LLVMBuildExtractElement(ctx->builder, interp_ij,
+ LLVMConstInt(ctx->i32, i, false), "");
+ result[i] = ac_build_ddxy(ctx, AC_TID_MASK_TOP_LEFT, 1, a);
+ result[2+i] = ac_build_ddxy(ctx, AC_TID_MASK_TOP_LEFT, 2, a);
+ }
+ return ac_build_gather_values(ctx, result, 4);
+}
+
+LLVMValueRef
+ac_build_load_helper_invocation(struct ac_llvm_context *ctx)
+{
+ LLVMValueRef result = ac_build_intrinsic(ctx, "llvm.amdgcn.ps.live",
+ ctx->i1, NULL, 0,
+ AC_FUNC_ATTR_READNONE);
+ result = LLVMBuildNot(ctx->builder, result, "");
+ return LLVMBuildSExt(ctx->builder, result, ctx->i32, "");
+}