*/
void
ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context,
- enum chip_class chip_class)
+ enum chip_class chip_class, enum radeon_family family)
{
LLVMValueRef args[1];
ctx->chip_class = chip_class;
+ ctx->family = family;
ctx->context = context;
ctx->module = NULL;
ctx->f16 = LLVMHalfTypeInContext(ctx->context);
ctx->f32 = LLVMFloatTypeInContext(ctx->context);
ctx->f64 = LLVMDoubleTypeInContext(ctx->context);
+ ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
+ ctx->v3i32 = LLVMVectorType(ctx->i32, 3);
ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
+ ctx->v2f32 = LLVMVectorType(ctx->f32, 2);
ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
ctx->f32_0 = LLVMConstReal(ctx->f32, 0.0);
ctx->f32_1 = LLVMConstReal(ctx->f32, 1.0);
+ ctx->i1false = LLVMConstInt(ctx->i1, 0, false);
+ ctx->i1true = LLVMConstInt(ctx->i1, 1, false);
+
ctx->range_md_kind = LLVMGetMDKindIDInContext(ctx->context,
"range", 5);
return LLVMBuildOr(ctx->builder, all, none, "");
}
+LLVMValueRef
+ac_build_varying_gather_values(struct ac_llvm_context *ctx, LLVMValueRef *values,
+ unsigned value_count, unsigned component)
+{
+ LLVMValueRef vec = NULL;
+
+ if (value_count == 1) {
+ return values[component];
+ } else if (!value_count)
+ unreachable("value_count is 0");
+
+ for (unsigned i = component; i < value_count + component; i++) {
+ LLVMValueRef value = values[i];
+
+ if (!i)
+ vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), value_count));
+ LLVMValueRef index = LLVMConstInt(ctx->i32, i - component, false);
+ vec = LLVMBuildInsertElement(ctx->builder, vec, value, index, "");
+ }
+ return vec;
+}
+
LLVMValueRef
ac_build_gather_values_extended(struct ac_llvm_context *ctx,
LLVMValueRef *values,
{
LLVMValueRef ret = LLVMBuildFDiv(ctx->builder, num, den, "");
+ /* Use v_rcp_f32 instead of precise division. */
if (!LLVMIsConstant(ret))
LLVMSetMetadata(ret, ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp);
return ret;
LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
vindex,
voffset,
- LLVMConstInt(ctx->i1, 0, 0), /* glc */
- LLVMConstInt(ctx->i1, 0, 0), /* slc */
+ ctx->i1false, /* glc */
+ ctx->i1false, /* slc */
};
return ac_build_intrinsic(ctx,
{
LLVMValueRef args[2] = {
arg,
- LLVMConstInt(ctx->i1, 1, 0),
+ ctx->i1true,
};
LLVMValueRef msb = ac_build_intrinsic(ctx, "llvm.ctlz.i32",
dst_type, args, ARRAY_SIZE(args),
args[num_args++] = LLVMConstInt(ctx->i32, a->dmask, 0);
if (sample)
args[num_args++] = LLVMConstInt(ctx->i1, a->unorm, 0);
- args[num_args++] = LLVMConstInt(ctx->i1, 0, 0); /* glc */
- args[num_args++] = LLVMConstInt(ctx->i1, 0, 0); /* slc */
- args[num_args++] = LLVMConstInt(ctx->i1, 0, 0); /* lwe */
+ args[num_args++] = ctx->i1false; /* glc */
+ args[num_args++] = ctx->i1false; /* slc */
+ args[num_args++] = ctx->i1false; /* lwe */
args[num_args++] = LLVMConstInt(ctx->i1, a->da, 0);
switch (a->opcode) {
AC_FUNC_ATTR_LEGACY);
}
+void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned simm16)
+{
+ LLVMValueRef args[1] = {
+ LLVMConstInt(ctx->i32, simm16, false),
+ };
+ ac_build_intrinsic(ctx, "llvm.amdgcn.s.waitcnt",
+ ctx->voidt, args, 1, 0);
+}
+
void ac_get_image_intr_name(const char *base_name,
LLVMTypeRef data_type,
LLVMTypeRef coords_type,
"llvm.amdgcn.init.exec", ctx->voidt,
&full_mask, 1, AC_FUNC_ATTR_CONVERGENT);
}
+
+void ac_declare_lds_as_pointer(struct ac_llvm_context *ctx)
+{
+ unsigned lds_size = ctx->chip_class >= CIK ? 65536 : 32768;
+ ctx->lds = LLVMBuildIntToPtr(ctx->builder, ctx->i32_0,
+ LLVMPointerType(LLVMArrayType(ctx->i32, lds_size / 4), AC_LOCAL_ADDR_SPACE),
+ "lds");
+}
+
+LLVMValueRef ac_lds_load(struct ac_llvm_context *ctx,
+ LLVMValueRef dw_addr)
+{
+ return ac_build_load(ctx, ctx->lds, dw_addr);
+}
+
+void ac_lds_store(struct ac_llvm_context *ctx,
+ LLVMValueRef dw_addr,
+ LLVMValueRef value)
+{
+ value = ac_to_integer(ctx, value);
+ ac_build_indexed_store(ctx, ctx->lds,
+ dw_addr, value);
+}
+
+LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx,
+ LLVMTypeRef dst_type,
+ LLVMValueRef src0)
+{
+ LLVMValueRef params[2] = {
+ src0,
+
+ /* The value of 1 means that ffs(x=0) = undef, so LLVM won't
+ * add special code to check for x=0. The reason is that
+ * the LLVM behavior for x=0 is different from what we
+ * need here. However, LLVM also assumes that ffs(x) is
+ * in [0, 31], but GLSL expects that ffs(0) = -1, so
+ * a conditional assignment to handle 0 is still required.
+ *
+ * The hardware already implements the correct behavior.
+ */
+ LLVMConstInt(ctx->i1, 1, false),
+ };
+
+ LLVMValueRef lsb = ac_build_intrinsic(ctx, "llvm.cttz.i32", ctx->i32,
+ params, 2,
+ AC_FUNC_ATTR_READNONE);
+
+ /* TODO: We need an intrinsic to skip this conditional. */
+ /* Check for zero: */
+ return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder,
+ LLVMIntEQ, src0,
+ ctx->i32_0, ""),
+ LLVMConstInt(ctx->i32, -1, 0), lsb, "");
+}