ctx->f16 = LLVMHalfTypeInContext(ctx->context);
ctx->f32 = LLVMFloatTypeInContext(ctx->context);
ctx->f64 = LLVMDoubleTypeInContext(ctx->context);
+ ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
+ ctx->v3i32 = LLVMVectorType(ctx->i32, 3);
ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
+ ctx->v2f32 = LLVMVectorType(ctx->f32, 2);
ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
ctx->f32_0 = LLVMConstReal(ctx->f32, 0.0);
ctx->f32_1 = LLVMConstReal(ctx->f32, 1.0);
+ ctx->i1false = LLVMConstInt(ctx->i1, 0, false);
+ ctx->i1true = LLVMConstInt(ctx->i1, 1, false);
+
ctx->range_md_kind = LLVMGetMDKindIDInContext(ctx->context,
"range", 5);
}
}
+/**
+ * Helper function that builds an LLVM IR PHI node and immediately adds
+ * incoming edges.
+ */
+LLVMValueRef
+ac_build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type,
+ unsigned count_incoming, LLVMValueRef *values,
+ LLVMBasicBlockRef *blocks)
+{
+ LLVMValueRef phi = LLVMBuildPhi(ctx->builder, type, "");
+ LLVMAddIncoming(phi, values, blocks, count_incoming);
+ return phi;
+}
+
/* Prevent optimizations (at least of memory accesses) across the current
* point in the program by emitting empty inline assembly that is marked as
* having side effects.
* selcoords.ma; i.e., a positive out_ma means that coords is pointed towards
* the selcoords major axis.
*/
-static void build_cube_select(LLVMBuilderRef builder,
+static void build_cube_select(struct ac_llvm_context *ctx,
const struct cube_selection_coords *selcoords,
const LLVMValueRef *coords,
LLVMValueRef *out_st,
LLVMValueRef *out_ma)
{
+ LLVMBuilderRef builder = ctx->builder;
LLVMTypeRef f32 = LLVMTypeOf(coords[0]);
LLVMValueRef is_ma_positive;
LLVMValueRef sgn_ma;
is_ma_x = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildNot(builder, is_ma_y, ""), "");
/* Select sc */
- tmp = LLVMBuildSelect(builder, is_ma_z, coords[2], coords[0], "");
+ tmp = LLVMBuildSelect(builder, is_ma_x, coords[2], coords[0], "");
sgn = LLVMBuildSelect(builder, is_ma_y, LLVMConstReal(f32, 1.0),
- LLVMBuildSelect(builder, is_ma_x, sgn_ma,
+ LLVMBuildSelect(builder, is_ma_z, sgn_ma,
LLVMBuildFNeg(builder, sgn_ma, ""), ""), "");
out_st[0] = LLVMBuildFMul(builder, tmp, sgn, "");
/* Select tc */
tmp = LLVMBuildSelect(builder, is_ma_y, coords[2], coords[1], "");
- sgn = LLVMBuildSelect(builder, is_ma_y, LLVMBuildFNeg(builder, sgn_ma, ""),
+ sgn = LLVMBuildSelect(builder, is_ma_y, sgn_ma,
LLVMConstReal(f32, -1.0), "");
out_st[1] = LLVMBuildFMul(builder, tmp, sgn, "");
/* Select ma */
tmp = LLVMBuildSelect(builder, is_ma_z, coords[2],
LLVMBuildSelect(builder, is_ma_y, coords[1], coords[0], ""), "");
- sgn = LLVMBuildSelect(builder, is_ma_positive,
- LLVMConstReal(f32, 2.0), LLVMConstReal(f32, -2.0), "");
- *out_ma = LLVMBuildFMul(builder, tmp, sgn, "");
+ tmp = ac_build_intrinsic(ctx, "llvm.fabs.f32",
+ ctx->f32, &tmp, 1, AC_FUNC_ATTR_READNONE);
+ *out_ma = LLVMBuildFMul(builder, tmp, LLVMConstReal(f32, 2.0), "");
}
void
* seems awfully quiet about how textureGrad for cube
* maps should be handled.
*/
- build_cube_select(builder, &selcoords, &derivs_arg[axis * 3],
+ build_cube_select(ctx, &selcoords, &derivs_arg[axis * 3],
deriv_st, &deriv_ma);
deriv_ma = LLVMBuildFMul(builder, deriv_ma, invma, "");
* \param base_ptr Where the array starts.
* \param index The element index into the array.
* \param uniform Whether the base_ptr and index can be assumed to be
- * dynamically uniform
+ * dynamically uniform (i.e. load to an SGPR)
+ * \param invariant Whether the load is invariant (no other opcodes affect it)
*/
-LLVMValueRef
-ac_build_indexed_load(struct ac_llvm_context *ctx,
- LLVMValueRef base_ptr, LLVMValueRef index,
- bool uniform)
+static LLVMValueRef
+ac_build_load_custom(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
+ LLVMValueRef index, bool uniform, bool invariant)
{
- LLVMValueRef pointer;
+ LLVMValueRef pointer, result;
pointer = ac_build_gep0(ctx, base_ptr, index);
if (uniform)
LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md);
- return LLVMBuildLoad(ctx->builder, pointer, "");
+ result = LLVMBuildLoad(ctx->builder, pointer, "");
+ if (invariant)
+ LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md);
+ return result;
}
-/**
- * Do a load from &base_ptr[index], but also add a flag that it's loading
- * a constant from a dynamically uniform index.
- */
-LLVMValueRef
-ac_build_indexed_load_const(struct ac_llvm_context *ctx,
- LLVMValueRef base_ptr, LLVMValueRef index)
+LLVMValueRef ac_build_load(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
+ LLVMValueRef index)
{
- LLVMValueRef result = ac_build_indexed_load(ctx, base_ptr, index, true);
- LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md);
- return result;
+ return ac_build_load_custom(ctx, base_ptr, index, false, false);
+}
+
+LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx,
+ LLVMValueRef base_ptr, LLVMValueRef index)
+{
+ return ac_build_load_custom(ctx, base_ptr, index, false, true);
+}
+
+LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx,
+ LLVMValueRef base_ptr, LLVMValueRef index)
+{
+ return ac_build_load_custom(ctx, base_ptr, index, true, true);
}
/* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4.
bool glc,
bool slc,
bool writeonly_memory,
- bool has_add_tid)
+ bool swizzle_enable_hint)
{
- /* TODO: Fix stores with ADD_TID and remove the "has_add_tid" flag. */
- if (!has_add_tid) {
+ /* SWIZZLE_ENABLE requires that soffset isn't folded into voffset
+ * (voffset is swizzled, but soffset isn't swizzled).
+ * llvm.amdgcn.buffer.store doesn't have a separate soffset parameter.
+ */
+ if (!swizzle_enable_hint) {
/* Split 3 channel stores, becase LLVM doesn't support 3-channel
* intrinsics. */
if (num_channels == 3) {
ac_build_buffer_store_dword(ctx, rsrc, v01, 2, voffset,
soffset, inst_offset, glc, slc,
- writeonly_memory, has_add_tid);
+ writeonly_memory, swizzle_enable_hint);
ac_build_buffer_store_dword(ctx, rsrc, v[2], 1, voffset,
soffset, inst_offset + 8,
glc, slc,
- writeonly_memory, has_add_tid);
+ writeonly_memory, swizzle_enable_hint);
return;
}
LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
vindex,
voffset,
- LLVMConstInt(ctx->i1, 0, 0), /* glc */
- LLVMConstInt(ctx->i1, 0, 0), /* slc */
+ ctx->i1false, /* glc */
+ ctx->i1false, /* slc */
};
return ac_build_intrinsic(ctx,
AC_FUNC_ATTR_READNONE |
AC_FUNC_ATTR_CONVERGENT);
} else {
- uint32_t masks[2];
+ uint32_t masks[2] = {};
switch (mask) {
case AC_TID_MASK_TOP_LEFT:
masks[0] = 0x80a0;
masks[1] = 0x80f5;
break;
+ default:
+ assert(0);
}
args[0] = val;
{
LLVMValueRef args[2] = {
arg,
- LLVMConstInt(ctx->i1, 1, 0),
+ ctx->i1true,
};
LLVMValueRef msb = ac_build_intrinsic(ctx, "llvm.ctlz.i32",
dst_type, args, ARRAY_SIZE(args),
LLVMTypeRef dst_type;
LLVMValueRef args[11];
unsigned num_args = 0;
- const char *name;
+ const char *name = NULL;
char intr_name[128], type[64];
if (HAVE_LLVM >= 0x0400) {
args[num_args++] = LLVMConstInt(ctx->i32, a->dmask, 0);
if (sample)
args[num_args++] = LLVMConstInt(ctx->i1, a->unorm, 0);
- args[num_args++] = LLVMConstInt(ctx->i1, 0, 0); /* glc */
- args[num_args++] = LLVMConstInt(ctx->i1, 0, 0); /* slc */
- args[num_args++] = LLVMConstInt(ctx->i1, 0, 0); /* lwe */
+ args[num_args++] = ctx->i1false; /* glc */
+ args[num_args++] = ctx->i1false; /* slc */
+ args[num_args++] = ctx->i1false; /* lwe */
args[num_args++] = LLVMConstInt(ctx->i1, a->da, 0);
switch (a->opcode) {
AC_FUNC_ATTR_LEGACY);
}
-/**
- * KILL, AKA discard in GLSL.
- *
- * \param value kill if value < 0.0 or value == NULL.
- */
-void ac_build_kill(struct ac_llvm_context *ctx, LLVMValueRef value)
+LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef i1)
{
- if (value) {
- ac_build_intrinsic(ctx, "llvm.AMDGPU.kill", ctx->voidt,
- &value, 1, AC_FUNC_ATTR_LEGACY);
- } else {
- ac_build_intrinsic(ctx, "llvm.AMDGPU.kilp", ctx->voidt,
- NULL, 0, AC_FUNC_ATTR_LEGACY);
+ assert(HAVE_LLVM >= 0x0600);
+ return ac_build_intrinsic(ctx, "llvm.amdgcn.wqm.vote", ctx->i1,
+ &i1, 1, AC_FUNC_ATTR_READNONE);
+}
+
+void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1)
+{
+ if (HAVE_LLVM >= 0x0600) {
+ ac_build_intrinsic(ctx, "llvm.amdgcn.kill", ctx->voidt,
+ &i1, 1, 0);
+ return;
}
+
+ LLVMValueRef value = LLVMBuildSelect(ctx->builder, i1,
+ LLVMConstReal(ctx->f32, 1),
+ LLVMConstReal(ctx->f32, -1), "");
+ ac_build_intrinsic(ctx, "llvm.AMDGPU.kill", ctx->voidt,
+ &value, 1, AC_FUNC_ATTR_LEGACY);
}
LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input,
*num_param_exports = exports.num;
}
}
+
+void ac_init_exec_full_mask(struct ac_llvm_context *ctx)
+{
+ LLVMValueRef full_mask = LLVMConstInt(ctx->i64, ~0ull, 0);
+ ac_build_intrinsic(ctx,
+ "llvm.amdgcn.init.exec", ctx->voidt,
+ &full_mask, 1, AC_FUNC_ATTR_CONVERGENT);
+}
+
+void ac_declare_lds_as_pointer(struct ac_llvm_context *ctx)
+{
+ unsigned lds_size = ctx->chip_class >= CIK ? 65536 : 32768;
+ ctx->lds = LLVMBuildIntToPtr(ctx->builder, ctx->i32_0,
+ LLVMPointerType(LLVMArrayType(ctx->i32, lds_size / 4), AC_LOCAL_ADDR_SPACE),
+ "lds");
+}
+
+LLVMValueRef ac_lds_load(struct ac_llvm_context *ctx,
+ LLVMValueRef dw_addr)
+{
+ return ac_build_load(ctx, ctx->lds, dw_addr);
+}
+
+void ac_lds_store(struct ac_llvm_context *ctx,
+ LLVMValueRef dw_addr,
+ LLVMValueRef value)
+{
+ value = ac_to_integer(ctx, value);
+ ac_build_indexed_store(ctx, ctx->lds,
+ dw_addr, value);
+}
+
+LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx,
+ LLVMTypeRef dst_type,
+ LLVMValueRef src0)
+{
+ LLVMValueRef params[2] = {
+ src0,
+
+ /* The value of 1 means that ffs(x=0) = undef, so LLVM won't
+ * add special code to check for x=0. The reason is that
+ * the LLVM behavior for x=0 is different from what we
+ * need here. However, LLVM also assumes that ffs(x) is
+ * in [0, 31], but GLSL expects that ffs(0) = -1, so
+ * a conditional assignment to handle 0 is still required.
+ *
+ * The hardware already implements the correct behavior.
+ */
+ LLVMConstInt(ctx->i1, 1, false),
+ };
+
+ LLVMValueRef lsb = ac_build_intrinsic(ctx, "llvm.cttz.i32", ctx->i32,
+ params, 2,
+ AC_FUNC_ATTR_READNONE);
+
+ /* TODO: We need an intrinsic to skip this conditional. */
+ /* Check for zero: */
+ return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder,
+ LLVMIntEQ, src0,
+ ctx->i32_0, ""),
+ LLVMConstInt(ctx->i32, -1, 0), lsb, "");
+}