#include <stdio.h>
#include "ac_llvm_util.h"
-
+#include "ac_exp_param.h"
#include "util/bitscan.h"
#include "util/macros.h"
+#include "util/u_atomic.h"
#include "sid.h"
+#include "shader_enums.h"
+
/* Initialize module-independent parts of the context.
*
* The caller is responsible for initializing ctx::module and ctx::builder.
*/
void
-ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context)
+ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context,
+ enum chip_class chip_class)
{
LLVMValueRef args[1];
+ ctx->chip_class = chip_class;
+
ctx->context = context;
ctx->module = NULL;
ctx->builder = NULL;
ctx->voidt = LLVMVoidTypeInContext(ctx->context);
ctx->i1 = LLVMInt1TypeInContext(ctx->context);
ctx->i8 = LLVMInt8TypeInContext(ctx->context);
+ ctx->i16 = LLVMIntTypeInContext(ctx->context, 16);
ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
+ ctx->i64 = LLVMIntTypeInContext(ctx->context, 64);
+ ctx->f16 = LLVMHalfTypeInContext(ctx->context);
ctx->f32 = LLVMFloatTypeInContext(ctx->context);
+ ctx->f64 = LLVMDoubleTypeInContext(ctx->context);
ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
- ctx->v16i8 = LLVMVectorType(ctx->i8, 16);
+ ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
+
+ ctx->i32_0 = LLVMConstInt(ctx->i32, 0, false);
+ ctx->i32_1 = LLVMConstInt(ctx->i32, 1, false);
+ ctx->f32_0 = LLVMConstReal(ctx->f32, 0.0);
+ ctx->f32_1 = LLVMConstReal(ctx->f32, 1.0);
ctx->range_md_kind = LLVMGetMDKindIDInContext(ctx->context,
"range", 5);
ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
}
+unsigned
+ac_get_type_size(LLVMTypeRef type)
+{
+ LLVMTypeKind kind = LLVMGetTypeKind(type);
+
+ switch (kind) {
+ case LLVMIntegerTypeKind:
+ return LLVMGetIntTypeWidth(type) / 8;
+ case LLVMFloatTypeKind:
+ return 4;
+ case LLVMDoubleTypeKind:
+ case LLVMPointerTypeKind:
+ return 8;
+ case LLVMVectorTypeKind:
+ return LLVMGetVectorSize(type) *
+ ac_get_type_size(LLVMGetElementType(type));
+ case LLVMArrayTypeKind:
+ return LLVMGetArrayLength(type) *
+ ac_get_type_size(LLVMGetElementType(type));
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+static LLVMTypeRef to_integer_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t)
+{
+ if (t == ctx->f16 || t == ctx->i16)
+ return ctx->i16;
+ else if (t == ctx->f32 || t == ctx->i32)
+ return ctx->i32;
+ else if (t == ctx->f64 || t == ctx->i64)
+ return ctx->i64;
+ else
+ unreachable("Unhandled integer size");
+}
+
+LLVMTypeRef
+ac_to_integer_type(struct ac_llvm_context *ctx, LLVMTypeRef t)
+{
+ if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) {
+ LLVMTypeRef elem_type = LLVMGetElementType(t);
+ return LLVMVectorType(to_integer_type_scalar(ctx, elem_type),
+ LLVMGetVectorSize(t));
+ }
+ return to_integer_type_scalar(ctx, t);
+}
+
+LLVMValueRef
+ac_to_integer(struct ac_llvm_context *ctx, LLVMValueRef v)
+{
+ LLVMTypeRef type = LLVMTypeOf(v);
+ return LLVMBuildBitCast(ctx->builder, v, ac_to_integer_type(ctx, type), "");
+}
+
+static LLVMTypeRef to_float_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t)
+{
+ if (t == ctx->i16 || t == ctx->f16)
+ return ctx->f16;
+ else if (t == ctx->i32 || t == ctx->f32)
+ return ctx->f32;
+ else if (t == ctx->i64 || t == ctx->f64)
+ return ctx->f64;
+ else
+ unreachable("Unhandled float size");
+}
+
+LLVMTypeRef
+ac_to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef t)
+{
+ if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) {
+ LLVMTypeRef elem_type = LLVMGetElementType(t);
+ return LLVMVectorType(to_float_type_scalar(ctx, elem_type),
+ LLVMGetVectorSize(t));
+ }
+ return to_float_type_scalar(ctx, t);
+}
+
+LLVMValueRef
+ac_to_float(struct ac_llvm_context *ctx, LLVMValueRef v)
+{
+ LLVMTypeRef type = LLVMTypeOf(v);
+ return LLVMBuildBitCast(ctx->builder, v, ac_to_float_type(ctx, type), "");
+}
+
+
LLVMValueRef
ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name,
LLVMTypeRef return_type, LLVMValueRef *params,
return call;
}
-static LLVMValueRef bitcast_to_float(struct ac_llvm_context *ctx,
- LLVMValueRef value)
-{
- LLVMTypeRef type = LLVMTypeOf(value);
- LLVMTypeRef new_type;
-
- if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
- new_type = LLVMVectorType(ctx->f32, LLVMGetVectorSize(type));
- else
- new_type = ctx->f32;
-
- return LLVMBuildBitCast(ctx->builder, value, new_type, "");
-}
-
/**
* Given the i32 or vNi32 \p type, generate the textual name (e.g. for use with
* intrinsic names).
}
}
+/**
+ * Helper function that builds an LLVM IR PHI node and immediately adds
+ * incoming edges.
+ */
+LLVMValueRef
+ac_build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type,
+ unsigned count_incoming, LLVMValueRef *values,
+ LLVMBasicBlockRef *blocks)
+{
+ LLVMValueRef phi = LLVMBuildPhi(ctx->builder, type, "");
+ LLVMAddIncoming(phi, values, blocks, count_incoming);
+ return phi;
+}
+
+/* Prevent optimizations (at least of memory accesses) across the current
+ * point in the program by emitting empty inline assembly that is marked as
+ * having side effects.
+ *
+ * Optionally, a value can be passed through the inline assembly to prevent
+ * LLVM from hoisting calls to ReadNone functions.
+ */
+void
+ac_build_optimization_barrier(struct ac_llvm_context *ctx,
+ LLVMValueRef *pvgpr)
+{
+ static int counter = 0;
+
+ LLVMBuilderRef builder = ctx->builder;
+ char code[16];
+
+ snprintf(code, sizeof(code), "; %d", p_atomic_inc_return(&counter));
+
+ if (!pvgpr) {
+ LLVMTypeRef ftype = LLVMFunctionType(ctx->voidt, NULL, 0, false);
+ LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "", true, false);
+ LLVMBuildCall(builder, inlineasm, NULL, 0, "");
+ } else {
+ LLVMTypeRef ftype = LLVMFunctionType(ctx->i32, &ctx->i32, 1, false);
+ LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "=v,0", true, false);
+ LLVMValueRef vgpr = *pvgpr;
+ LLVMTypeRef vgpr_type = LLVMTypeOf(vgpr);
+ unsigned vgpr_size = ac_get_type_size(vgpr_type);
+ LLVMValueRef vgpr0;
+
+ assert(vgpr_size % 4 == 0);
+
+ vgpr = LLVMBuildBitCast(builder, vgpr, LLVMVectorType(ctx->i32, vgpr_size / 4), "");
+ vgpr0 = LLVMBuildExtractElement(builder, vgpr, ctx->i32_0, "");
+ vgpr0 = LLVMBuildCall(builder, inlineasm, &vgpr0, 1, "");
+ vgpr = LLVMBuildInsertElement(builder, vgpr, vgpr0, ctx->i32_0, "");
+ vgpr = LLVMBuildBitCast(builder, vgpr, vgpr_type, "");
+
+ *pvgpr = vgpr;
+ }
+}
+
+LLVMValueRef
+ac_build_ballot(struct ac_llvm_context *ctx,
+ LLVMValueRef value)
+{
+ LLVMValueRef args[3] = {
+ value,
+ ctx->i32_0,
+ LLVMConstInt(ctx->i32, LLVMIntNE, 0)
+ };
+
+ /* We currently have no other way to prevent LLVM from lifting the icmp
+ * calls to a dominating basic block.
+ */
+ ac_build_optimization_barrier(ctx, &args[0]);
+
+ if (LLVMTypeOf(args[0]) != ctx->i32)
+ args[0] = LLVMBuildBitCast(ctx->builder, args[0], ctx->i32, "");
+
+ return ac_build_intrinsic(ctx,
+ "llvm.amdgcn.icmp.i32",
+ ctx->i64, args, 3,
+ AC_FUNC_ATTR_NOUNWIND |
+ AC_FUNC_ATTR_READNONE |
+ AC_FUNC_ATTR_CONVERGENT);
+}
+
+LLVMValueRef
+ac_build_vote_all(struct ac_llvm_context *ctx, LLVMValueRef value)
+{
+ LLVMValueRef active_set = ac_build_ballot(ctx, ctx->i32_1);
+ LLVMValueRef vote_set = ac_build_ballot(ctx, value);
+ return LLVMBuildICmp(ctx->builder, LLVMIntEQ, vote_set, active_set, "");
+}
+
+LLVMValueRef
+ac_build_vote_any(struct ac_llvm_context *ctx, LLVMValueRef value)
+{
+ LLVMValueRef vote_set = ac_build_ballot(ctx, value);
+ return LLVMBuildICmp(ctx->builder, LLVMIntNE, vote_set,
+ LLVMConstInt(ctx->i64, 0, 0), "");
+}
+
+LLVMValueRef
+ac_build_vote_eq(struct ac_llvm_context *ctx, LLVMValueRef value)
+{
+ LLVMValueRef active_set = ac_build_ballot(ctx, ctx->i32_1);
+ LLVMValueRef vote_set = ac_build_ballot(ctx, value);
+
+ LLVMValueRef all = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
+ vote_set, active_set, "");
+ LLVMValueRef none = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
+ vote_set,
+ LLVMConstInt(ctx->i64, 0, 0), "");
+ return LLVMBuildOr(ctx->builder, all, none, "");
+}
+
LLVMValueRef
ac_build_gather_values_extended(struct ac_llvm_context *ctx,
LLVMValueRef *values,
unsigned value_count,
unsigned value_stride,
- bool load)
+ bool load,
+ bool always_vector)
{
LLVMBuilderRef builder = ctx->builder;
LLVMValueRef vec = NULL;
unsigned i;
- if (value_count == 1) {
+ if (value_count == 1 && !always_vector) {
if (load)
return LLVMBuildLoad(builder, values[0], "");
return values[0];
LLVMValueRef *values,
unsigned value_count)
{
- return ac_build_gather_values_extended(ctx, values, value_count, 1, false);
+ return ac_build_gather_values_extended(ctx, values, value_count, 1, false, false);
}
LLVMValueRef
LLVMValueRef in[3],
struct cube_selection_coords *out)
{
- LLVMBuilderRef builder = ctx->builder;
-
- if (HAVE_LLVM >= 0x0309) {
- LLVMTypeRef f32 = ctx->f32;
-
- out->stc[1] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubetc",
- f32, in, 3, AC_FUNC_ATTR_READNONE);
- out->stc[0] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubesc",
- f32, in, 3, AC_FUNC_ATTR_READNONE);
- out->ma = ac_build_intrinsic(ctx, "llvm.amdgcn.cubema",
- f32, in, 3, AC_FUNC_ATTR_READNONE);
- out->id = ac_build_intrinsic(ctx, "llvm.amdgcn.cubeid",
- f32, in, 3, AC_FUNC_ATTR_READNONE);
- } else {
- LLVMValueRef c[4] = {
- in[0],
- in[1],
- in[2],
- LLVMGetUndef(LLVMTypeOf(in[0]))
- };
- LLVMValueRef vec = ac_build_gather_values(ctx, c, 4);
-
- LLVMValueRef tmp =
- ac_build_intrinsic(ctx, "llvm.AMDGPU.cube",
- LLVMTypeOf(vec), &vec, 1,
- AC_FUNC_ATTR_READNONE);
-
- out->stc[1] = LLVMBuildExtractElement(builder, tmp,
- LLVMConstInt(ctx->i32, 0, 0), "");
- out->stc[0] = LLVMBuildExtractElement(builder, tmp,
- LLVMConstInt(ctx->i32, 1, 0), "");
- out->ma = LLVMBuildExtractElement(builder, tmp,
- LLVMConstInt(ctx->i32, 2, 0), "");
- out->id = LLVMBuildExtractElement(builder, tmp,
- LLVMConstInt(ctx->i32, 3, 0), "");
- }
+ LLVMTypeRef f32 = ctx->f32;
+
+ out->stc[1] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubetc",
+ f32, in, 3, AC_FUNC_ATTR_READNONE);
+ out->stc[0] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubesc",
+ f32, in, 3, AC_FUNC_ATTR_READNONE);
+ out->ma = ac_build_intrinsic(ctx, "llvm.amdgcn.cubema",
+ f32, in, 3, AC_FUNC_ATTR_READNONE);
+ out->id = ac_build_intrinsic(ctx, "llvm.amdgcn.cubeid",
+ f32, in, 3, AC_FUNC_ATTR_READNONE);
}
/**
* selcoords.ma; i.e., a positive out_ma means that coords is pointed towards
* the selcoords major axis.
*/
-static void build_cube_select(LLVMBuilderRef builder,
+static void build_cube_select(struct ac_llvm_context *ctx,
const struct cube_selection_coords *selcoords,
const LLVMValueRef *coords,
LLVMValueRef *out_st,
LLVMValueRef *out_ma)
{
+ LLVMBuilderRef builder = ctx->builder;
LLVMTypeRef f32 = LLVMTypeOf(coords[0]);
LLVMValueRef is_ma_positive;
LLVMValueRef sgn_ma;
is_ma_x = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildNot(builder, is_ma_y, ""), "");
/* Select sc */
- tmp = LLVMBuildSelect(builder, is_ma_z, coords[2], coords[0], "");
+ tmp = LLVMBuildSelect(builder, is_ma_x, coords[2], coords[0], "");
sgn = LLVMBuildSelect(builder, is_ma_y, LLVMConstReal(f32, 1.0),
- LLVMBuildSelect(builder, is_ma_x, sgn_ma,
+ LLVMBuildSelect(builder, is_ma_z, sgn_ma,
LLVMBuildFNeg(builder, sgn_ma, ""), ""), "");
out_st[0] = LLVMBuildFMul(builder, tmp, sgn, "");
/* Select tc */
tmp = LLVMBuildSelect(builder, is_ma_y, coords[2], coords[1], "");
- sgn = LLVMBuildSelect(builder, is_ma_y, LLVMBuildFNeg(builder, sgn_ma, ""),
+ sgn = LLVMBuildSelect(builder, is_ma_y, sgn_ma,
LLVMConstReal(f32, -1.0), "");
out_st[1] = LLVMBuildFMul(builder, tmp, sgn, "");
/* Select ma */
tmp = LLVMBuildSelect(builder, is_ma_z, coords[2],
LLVMBuildSelect(builder, is_ma_y, coords[1], coords[0], ""), "");
- sgn = LLVMBuildSelect(builder, is_ma_positive,
- LLVMConstReal(f32, 2.0), LLVMConstReal(f32, -2.0), "");
- *out_ma = LLVMBuildFMul(builder, tmp, sgn, "");
+ tmp = ac_build_intrinsic(ctx, "llvm.fabs.f32",
+ ctx->f32, &tmp, 1, AC_FUNC_ATTR_READNONE);
+ *out_ma = LLVMBuildFMul(builder, tmp, LLVMConstReal(f32, 2.0), "");
}
void
ac_prepare_cube_coords(struct ac_llvm_context *ctx,
- bool is_deriv, bool is_array,
+ bool is_deriv, bool is_array, bool is_lod,
LLVMValueRef *coords_arg,
LLVMValueRef *derivs_arg)
{
LLVMValueRef coords[3];
LLVMValueRef invma;
+ if (is_array && !is_lod) {
+ LLVMValueRef tmp = coords_arg[3];
+ tmp = ac_build_intrinsic(ctx, "llvm.rint.f32", ctx->f32, &tmp, 1, 0);
+
+ /* Section 8.9 (Texture Functions) of the GLSL 4.50 spec says:
+ *
+ * "For Array forms, the array layer used will be
+ *
+ * max(0, min(d−1, floor(layer+0.5)))
+ *
+ * where d is the depth of the texture array and layer
+ * comes from the component indicated in the tables below.
+ * Workaroudn for an issue where the layer is taken from a
+ * helper invocation which happens to fall on a different
+ * layer due to extrapolation."
+ *
+ * VI and earlier attempt to implement this in hardware by
+ * clamping the value of coords[2] = (8 * layer) + face.
+ * Unfortunately, this means that the we end up with the wrong
+ * face when clamping occurs.
+ *
+ * Clamp the layer earlier to work around the issue.
+ */
+ if (ctx->chip_class <= VI) {
+ LLVMValueRef ge0;
+ ge0 = LLVMBuildFCmp(builder, LLVMRealOGE, tmp, ctx->f32_0, "");
+ tmp = LLVMBuildSelect(builder, ge0, tmp, ctx->f32_0, "");
+ }
+
+ coords_arg[3] = tmp;
+ }
+
build_cube_intrinsic(ctx, coords_arg, &selcoords);
invma = ac_build_intrinsic(ctx, "llvm.fabs.f32",
* seems awfully quiet about how textureGrad for cube
* maps should be handled.
*/
- build_cube_select(builder, &selcoords, &derivs_arg[axis * 3],
+ build_cube_select(ctx, &selcoords, &derivs_arg[axis * 3],
deriv_st, &deriv_ma);
deriv_ma = LLVMBuildFMul(builder, deriv_ma, invma, "");
bool has_add_tid)
{
/* TODO: Fix stores with ADD_TID and remove the "has_add_tid" flag. */
- if (HAVE_LLVM >= 0x0309 && !has_add_tid) {
+ if (!has_add_tid) {
/* Split 3 channel stores, becase LLVM doesn't support 3-channel
* intrinsics. */
if (num_channels == 3) {
offset = LLVMBuildAdd(ctx->builder, offset, voffset, "");
LLVMValueRef args[] = {
- bitcast_to_float(ctx, vdata),
+ ac_to_float(ctx, vdata),
LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
LLVMConstInt(ctx->i32, 0, 0),
offset,
unsigned inst_offset,
unsigned glc,
unsigned slc,
- bool readonly_memory)
+ bool can_speculate,
+ bool allow_smem)
{
- unsigned func = CLAMP(num_channels, 1, 3) - 1;
-
- if (HAVE_LLVM >= 0x309) {
- LLVMValueRef args[] = {
- LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
- vindex ? vindex : LLVMConstInt(ctx->i32, 0, 0),
- LLVMConstInt(ctx->i32, inst_offset, 0),
- LLVMConstInt(ctx->i1, glc, 0),
- LLVMConstInt(ctx->i1, slc, 0)
- };
-
- LLVMTypeRef types[] = {ctx->f32, LLVMVectorType(ctx->f32, 2),
- ctx->v4f32};
- const char *type_names[] = {"f32", "v2f32", "v4f32"};
- char name[256];
-
- if (voffset) {
- args[2] = LLVMBuildAdd(ctx->builder, args[2], voffset,
- "");
- }
-
- if (soffset) {
- args[2] = LLVMBuildAdd(ctx->builder, args[2], soffset,
- "");
+ LLVMValueRef offset = LLVMConstInt(ctx->i32, inst_offset, 0);
+ if (voffset)
+ offset = LLVMBuildAdd(ctx->builder, offset, voffset, "");
+ if (soffset)
+ offset = LLVMBuildAdd(ctx->builder, offset, soffset, "");
+
+ /* TODO: VI and later generations can use SMEM with GLC=1.*/
+ if (allow_smem && !glc && !slc) {
+ assert(vindex == NULL);
+
+ LLVMValueRef result[4];
+
+ for (int i = 0; i < num_channels; i++) {
+ if (i) {
+ offset = LLVMBuildAdd(ctx->builder, offset,
+ LLVMConstInt(ctx->i32, 4, 0), "");
+ }
+ LLVMValueRef args[2] = {rsrc, offset};
+ result[i] = ac_build_intrinsic(ctx, "llvm.SI.load.const.v4i32",
+ ctx->f32, args, 2,
+ AC_FUNC_ATTR_READNONE |
+ AC_FUNC_ATTR_LEGACY);
}
+ if (num_channels == 1)
+ return result[0];
- snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.%s",
- type_names[func]);
-
- return ac_build_intrinsic(ctx, name, types[func], args,
- ARRAY_SIZE(args),
- /* READNONE means writes can't
- * affect it, while READONLY means
- * that writes can affect it. */
- readonly_memory && HAVE_LLVM >= 0x0400 ?
- AC_FUNC_ATTR_READNONE :
- AC_FUNC_ATTR_READONLY);
- } else {
- LLVMValueRef args[] = {
- LLVMBuildBitCast(ctx->builder, rsrc, ctx->v16i8, ""),
- voffset ? voffset : vindex,
- soffset,
- LLVMConstInt(ctx->i32, inst_offset, 0),
- LLVMConstInt(ctx->i32, voffset ? 1 : 0, 0), // offen
- LLVMConstInt(ctx->i32, vindex ? 1 : 0, 0), //idxen
- LLVMConstInt(ctx->i32, glc, 0),
- LLVMConstInt(ctx->i32, slc, 0),
- LLVMConstInt(ctx->i32, 0, 0), // TFE
- };
+ if (num_channels == 3)
+ result[num_channels++] = LLVMGetUndef(ctx->f32);
+ return ac_build_gather_values(ctx, result, num_channels);
+ }
- LLVMTypeRef types[] = {ctx->i32, LLVMVectorType(ctx->i32, 2),
- ctx->v4i32};
- const char *type_names[] = {"i32", "v2i32", "v4i32"};
- const char *arg_type = "i32";
- char name[256];
+ unsigned func = CLAMP(num_channels, 1, 3) - 1;
- if (voffset && vindex) {
- LLVMValueRef vaddr[] = {vindex, voffset};
+ LLVMValueRef args[] = {
+ LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
+ vindex ? vindex : LLVMConstInt(ctx->i32, 0, 0),
+ offset,
+ LLVMConstInt(ctx->i1, glc, 0),
+ LLVMConstInt(ctx->i1, slc, 0)
+ };
- arg_type = "v2i32";
- args[1] = ac_build_gather_values(ctx, vaddr, 2);
- }
+ LLVMTypeRef types[] = {ctx->f32, LLVMVectorType(ctx->f32, 2),
+ ctx->v4f32};
+ const char *type_names[] = {"f32", "v2f32", "v4f32"};
+ char name[256];
- snprintf(name, sizeof(name), "llvm.SI.buffer.load.dword.%s.%s",
- type_names[func], arg_type);
+ snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.%s",
+ type_names[func]);
- return ac_build_intrinsic(ctx, name, types[func], args,
- ARRAY_SIZE(args), AC_FUNC_ATTR_READONLY);
- }
+ return ac_build_intrinsic(ctx, name, types[func], args,
+ ARRAY_SIZE(args),
+ /* READNONE means writes can't affect it, while
+ * READONLY means that writes can affect it. */
+ can_speculate && HAVE_LLVM >= 0x0400 ?
+ AC_FUNC_ATTR_READNONE :
+ AC_FUNC_ATTR_READONLY);
}
LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
LLVMValueRef rsrc,
LLVMValueRef vindex,
LLVMValueRef voffset,
- bool readonly_memory)
+ bool can_speculate)
{
- if (HAVE_LLVM >= 0x0309) {
- LLVMValueRef args [] = {
- LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
- vindex,
- voffset,
- LLVMConstInt(ctx->i1, 0, 0), /* glc */
- LLVMConstInt(ctx->i1, 0, 0), /* slc */
- };
-
- return ac_build_intrinsic(ctx,
- "llvm.amdgcn.buffer.load.format.v4f32",
- ctx->v4f32, args, ARRAY_SIZE(args),
- /* READNONE means writes can't
- * affect it, while READONLY means
- * that writes can affect it. */
- readonly_memory && HAVE_LLVM >= 0x0400 ?
- AC_FUNC_ATTR_READNONE :
- AC_FUNC_ATTR_READONLY);
- }
-
- LLVMValueRef args[] = {
- rsrc,
- voffset,
+ LLVMValueRef args [] = {
+ LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
vindex,
+ voffset,
+ LLVMConstInt(ctx->i1, 0, 0), /* glc */
+ LLVMConstInt(ctx->i1, 0, 0), /* slc */
};
- return ac_build_intrinsic(ctx, "llvm.SI.vs.load.input",
- ctx->v4f32, args, 3,
- AC_FUNC_ATTR_READNONE |
- AC_FUNC_ATTR_LEGACY);
+
+ return ac_build_intrinsic(ctx,
+ "llvm.amdgcn.buffer.load.format.v4f32",
+ ctx->v4f32, args, ARRAY_SIZE(args),
+ /* READNONE means writes can't affect it, while
+ * READONLY means that writes can affect it. */
+ can_speculate && HAVE_LLVM >= 0x0400 ?
+ AC_FUNC_ATTR_READNONE :
+ AC_FUNC_ATTR_READONLY);
}
/**
*/
LLVMValueRef
ac_build_ddxy(struct ac_llvm_context *ctx,
- bool has_ds_bpermute,
uint32_t mask,
int idx,
- LLVMValueRef lds,
LLVMValueRef val)
{
- LLVMValueRef thread_id, tl, trbl, tl_tid, trbl_tid, args[2];
+ LLVMValueRef tl, trbl, args[2];
LLVMValueRef result;
- thread_id = ac_get_thread_id(ctx);
+ if (ctx->chip_class >= VI) {
+ LLVMValueRef thread_id, tl_tid, trbl_tid;
+ thread_id = ac_get_thread_id(ctx);
- tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
- LLVMConstInt(ctx->i32, mask, false), "");
+ tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
+ LLVMConstInt(ctx->i32, mask, false), "");
- trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
- LLVMConstInt(ctx->i32, idx, false), "");
+ trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
+ LLVMConstInt(ctx->i32, idx, false), "");
- if (has_ds_bpermute) {
args[0] = LLVMBuildMul(ctx->builder, tl_tid,
LLVMConstInt(ctx->i32, 4, false), "");
args[1] = val;
AC_FUNC_ATTR_READNONE |
AC_FUNC_ATTR_CONVERGENT);
} else {
- LLVMValueRef store_ptr, load_ptr0, load_ptr1;
+ uint32_t masks[2] = {};
- store_ptr = ac_build_gep0(ctx, lds, thread_id);
- load_ptr0 = ac_build_gep0(ctx, lds, tl_tid);
- load_ptr1 = ac_build_gep0(ctx, lds, trbl_tid);
+ switch (mask) {
+ case AC_TID_MASK_TOP_LEFT:
+ masks[0] = 0x8000;
+ if (idx == 1)
+ masks[1] = 0x8055;
+ else
+ masks[1] = 0x80aa;
- LLVMBuildStore(ctx->builder, val, store_ptr);
- tl = LLVMBuildLoad(ctx->builder, load_ptr0, "");
- trbl = LLVMBuildLoad(ctx->builder, load_ptr1, "");
+ break;
+ case AC_TID_MASK_TOP:
+ masks[0] = 0x8044;
+ masks[1] = 0x80ee;
+ break;
+ case AC_TID_MASK_LEFT:
+ masks[0] = 0x80a0;
+ masks[1] = 0x80f5;
+ break;
+ default:
+ assert(0);
+ }
+
+ args[0] = val;
+ args[1] = LLVMConstInt(ctx->i32, masks[0], false);
+
+ tl = ac_build_intrinsic(ctx,
+ "llvm.amdgcn.ds.swizzle", ctx->i32,
+ args, 2,
+ AC_FUNC_ATTR_READNONE |
+ AC_FUNC_ATTR_CONVERGENT);
+
+ args[1] = LLVMConstInt(ctx->i32, masks[1], false);
+ trbl = ac_build_intrinsic(ctx,
+ "llvm.amdgcn.ds.swizzle", ctx->i32,
+ args, 2,
+ AC_FUNC_ATTR_READNONE |
+ AC_FUNC_ATTR_CONVERGENT);
}
tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
LLVMConstInt(ctx->i32, -1, true), msb, "");
}
+LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef a,
+ LLVMValueRef b)
+{
+ LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntULE, a, b, "");
+ return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
+}
+
LLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx, LLVMValueRef value)
{
if (HAVE_LLVM >= 0x0500) {
a->opcode == ac_image_get_lod;
if (sample)
- args[num_args++] = bitcast_to_float(ctx, a->addr);
+ args[num_args++] = ac_to_float(ctx, a->addr);
else
args[num_args++] = a->addr;
case ac_image_get_resinfo:
name = "llvm.amdgcn.image.getresinfo";
break;
+ default:
+ unreachable("invalid image opcode");
}
ac_build_type_name_for_intr(LLVMTypeOf(args[0]), type,
data_type_name, coords_type_name, rsrc_type_name);
}
}
+
+#define AC_EXP_TARGET (HAVE_LLVM >= 0x0500 ? 0 : 3)
+#define AC_EXP_OUT0 (HAVE_LLVM >= 0x0500 ? 2 : 5)
+
+enum ac_ir_type {
+ AC_IR_UNDEF,
+ AC_IR_CONST,
+ AC_IR_VALUE,
+};
+
+struct ac_vs_exp_chan
+{
+ LLVMValueRef value;
+ float const_float;
+ enum ac_ir_type type;
+};
+
+struct ac_vs_exp_inst {
+ unsigned offset;
+ LLVMValueRef inst;
+ struct ac_vs_exp_chan chan[4];
+};
+
+struct ac_vs_exports {
+ unsigned num;
+ struct ac_vs_exp_inst exp[VARYING_SLOT_MAX];
+};
+
+/* Return true if the PARAM export has been eliminated. */
+static bool ac_eliminate_const_output(uint8_t *vs_output_param_offset,
+ uint32_t num_outputs,
+ struct ac_vs_exp_inst *exp)
+{
+ unsigned i, default_val; /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL */
+ bool is_zero[4] = {}, is_one[4] = {};
+
+ for (i = 0; i < 4; i++) {
+ /* It's a constant expression. Undef outputs are eliminated too. */
+ if (exp->chan[i].type == AC_IR_UNDEF) {
+ is_zero[i] = true;
+ is_one[i] = true;
+ } else if (exp->chan[i].type == AC_IR_CONST) {
+ if (exp->chan[i].const_float == 0)
+ is_zero[i] = true;
+ else if (exp->chan[i].const_float == 1)
+ is_one[i] = true;
+ else
+ return false; /* other constant */
+ } else
+ return false;
+ }
+
+ /* Only certain combinations of 0 and 1 can be eliminated. */
+ if (is_zero[0] && is_zero[1] && is_zero[2])
+ default_val = is_zero[3] ? 0 : 1;
+ else if (is_one[0] && is_one[1] && is_one[2])
+ default_val = is_zero[3] ? 2 : 3;
+ else
+ return false;
+
+ /* The PARAM export can be represented as DEFAULT_VAL. Kill it. */
+ LLVMInstructionEraseFromParent(exp->inst);
+
+ /* Change OFFSET to DEFAULT_VAL. */
+ for (i = 0; i < num_outputs; i++) {
+ if (vs_output_param_offset[i] == exp->offset) {
+ vs_output_param_offset[i] =
+ AC_EXP_PARAM_DEFAULT_VAL_0000 + default_val;
+ break;
+ }
+ }
+ return true;
+}
+
+static bool ac_eliminate_duplicated_output(uint8_t *vs_output_param_offset,
+ uint32_t num_outputs,
+ struct ac_vs_exports *processed,
+ struct ac_vs_exp_inst *exp)
+{
+ unsigned p, copy_back_channels = 0;
+
+ /* See if the output is already in the list of processed outputs.
+ * The LLVMValueRef comparison relies on SSA.
+ */
+ for (p = 0; p < processed->num; p++) {
+ bool different = false;
+
+ for (unsigned j = 0; j < 4; j++) {
+ struct ac_vs_exp_chan *c1 = &processed->exp[p].chan[j];
+ struct ac_vs_exp_chan *c2 = &exp->chan[j];
+
+ /* Treat undef as a match. */
+ if (c2->type == AC_IR_UNDEF)
+ continue;
+
+ /* If c1 is undef but c2 isn't, we can copy c2 to c1
+ * and consider the instruction duplicated.
+ */
+ if (c1->type == AC_IR_UNDEF) {
+ copy_back_channels |= 1 << j;
+ continue;
+ }
+
+ /* Test whether the channels are not equal. */
+ if (c1->type != c2->type ||
+ (c1->type == AC_IR_CONST &&
+ c1->const_float != c2->const_float) ||
+ (c1->type == AC_IR_VALUE &&
+ c1->value != c2->value)) {
+ different = true;
+ break;
+ }
+ }
+ if (!different)
+ break;
+
+ copy_back_channels = 0;
+ }
+ if (p == processed->num)
+ return false;
+
+ /* If a match was found, but the matching export has undef where the new
+ * one has a normal value, copy the normal value to the undef channel.
+ */
+ struct ac_vs_exp_inst *match = &processed->exp[p];
+
+ while (copy_back_channels) {
+ unsigned chan = u_bit_scan(©_back_channels);
+
+ assert(match->chan[chan].type == AC_IR_UNDEF);
+ LLVMSetOperand(match->inst, AC_EXP_OUT0 + chan,
+ exp->chan[chan].value);
+ match->chan[chan] = exp->chan[chan];
+ }
+
+ /* The PARAM export is duplicated. Kill it. */
+ LLVMInstructionEraseFromParent(exp->inst);
+
+ /* Change OFFSET to the matching export. */
+ for (unsigned i = 0; i < num_outputs; i++) {
+ if (vs_output_param_offset[i] == exp->offset) {
+ vs_output_param_offset[i] = match->offset;
+ break;
+ }
+ }
+ return true;
+}
+
+void ac_optimize_vs_outputs(struct ac_llvm_context *ctx,
+ LLVMValueRef main_fn,
+ uint8_t *vs_output_param_offset,
+ uint32_t num_outputs,
+ uint8_t *num_param_exports)
+{
+ LLVMBasicBlockRef bb;
+ bool removed_any = false;
+ struct ac_vs_exports exports;
+
+ exports.num = 0;
+
+ /* Process all LLVM instructions. */
+ bb = LLVMGetFirstBasicBlock(main_fn);
+ while (bb) {
+ LLVMValueRef inst = LLVMGetFirstInstruction(bb);
+
+ while (inst) {
+ LLVMValueRef cur = inst;
+ inst = LLVMGetNextInstruction(inst);
+ struct ac_vs_exp_inst exp;
+
+ if (LLVMGetInstructionOpcode(cur) != LLVMCall)
+ continue;
+
+ LLVMValueRef callee = ac_llvm_get_called_value(cur);
+
+ if (!ac_llvm_is_function(callee))
+ continue;
+
+ const char *name = LLVMGetValueName(callee);
+ unsigned num_args = LLVMCountParams(callee);
+
+ /* Check if this is an export instruction. */
+ if ((num_args != 9 && num_args != 8) ||
+ (strcmp(name, "llvm.SI.export") &&
+ strcmp(name, "llvm.amdgcn.exp.f32")))
+ continue;
+
+ LLVMValueRef arg = LLVMGetOperand(cur, AC_EXP_TARGET);
+ unsigned target = LLVMConstIntGetZExtValue(arg);
+
+ if (target < V_008DFC_SQ_EXP_PARAM)
+ continue;
+
+ target -= V_008DFC_SQ_EXP_PARAM;
+
+ /* Parse the instruction. */
+ memset(&exp, 0, sizeof(exp));
+ exp.offset = target;
+ exp.inst = cur;
+
+ for (unsigned i = 0; i < 4; i++) {
+ LLVMValueRef v = LLVMGetOperand(cur, AC_EXP_OUT0 + i);
+
+ exp.chan[i].value = v;
+
+ if (LLVMIsUndef(v)) {
+ exp.chan[i].type = AC_IR_UNDEF;
+ } else if (LLVMIsAConstantFP(v)) {
+ LLVMBool loses_info;
+ exp.chan[i].type = AC_IR_CONST;
+ exp.chan[i].const_float =
+ LLVMConstRealGetDouble(v, &loses_info);
+ } else {
+ exp.chan[i].type = AC_IR_VALUE;
+ }
+ }
+
+ /* Eliminate constant and duplicated PARAM exports. */
+ if (ac_eliminate_const_output(vs_output_param_offset,
+ num_outputs, &exp) ||
+ ac_eliminate_duplicated_output(vs_output_param_offset,
+ num_outputs, &exports,
+ &exp)) {
+ removed_any = true;
+ } else {
+ exports.exp[exports.num++] = exp;
+ }
+ }
+ bb = LLVMGetNextBasicBlock(bb);
+ }
+
+ /* Remove holes in export memory due to removed PARAM exports.
+ * This is done by renumbering all PARAM exports.
+ */
+ if (removed_any) {
+ uint8_t old_offset[VARYING_SLOT_MAX];
+ unsigned out, i;
+
+ /* Make a copy of the offsets. We need the old version while
+ * we are modifying some of them. */
+ memcpy(old_offset, vs_output_param_offset,
+ sizeof(old_offset));
+
+ for (i = 0; i < exports.num; i++) {
+ unsigned offset = exports.exp[i].offset;
+
+ /* Update vs_output_param_offset. Multiple outputs can
+ * have the same offset.
+ */
+ for (out = 0; out < num_outputs; out++) {
+ if (old_offset[out] == offset)
+ vs_output_param_offset[out] = i;
+ }
+
+ /* Change the PARAM offset in the instruction. */
+ LLVMSetOperand(exports.exp[i].inst, AC_EXP_TARGET,
+ LLVMConstInt(ctx->i32,
+ V_008DFC_SQ_EXP_PARAM + i, 0));
+ }
+ *num_param_exports = exports.num;
+ }
+}