enum ac_float_mode float_mode, unsigned wave_size,
unsigned ballot_mask_bits)
{
- LLVMValueRef args[1];
-
ctx->context = LLVMContextCreate();
ctx->chip_class = chip_class;
ctx->invariant_load_md_kind = LLVMGetMDKindIDInContext(ctx->context,
"invariant.load", 14);
- ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->context, "fpmath", 6);
-
- args[0] = LLVMConstReal(ctx->f32, 2.5);
- ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->context, args, 1);
-
ctx->uniform_md_kind = LLVMGetMDKindIDInContext(ctx->context,
"amdgpu.uniform", 14);
ac_get_llvm_num_components(LLVMValueRef value)
{
LLVMTypeRef type = LLVMTypeOf(value);
- unsigned num_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind
+ unsigned num_components = LLVMGetTypeKind(type) == LLVMFixedVectorTypeKind
? LLVMGetVectorSize(type)
: 1;
return num_components;
LLVMValueRef value,
int index)
{
- if (LLVMGetTypeKind(LLVMTypeOf(value)) != LLVMVectorTypeKind) {
+ if (LLVMGetTypeKind(LLVMTypeOf(value)) != LLVMFixedVectorTypeKind) {
assert(index == 0);
return value;
}
int
ac_get_elem_bits(struct ac_llvm_context *ctx, LLVMTypeRef type)
{
- if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
+ if (LLVMGetTypeKind(type) == LLVMFixedVectorTypeKind)
type = LLVMGetElementType(type);
if (LLVMGetTypeKind(type) == LLVMIntegerTypeKind)
if (LLVMGetPointerAddressSpace(type) == AC_ADDR_SPACE_CONST_32BIT)
return 4;
return 8;
- case LLVMVectorTypeKind:
+ case LLVMFixedVectorTypeKind:
return LLVMGetVectorSize(type) *
ac_get_type_size(LLVMGetElementType(type));
case LLVMArrayTypeKind:
LLVMTypeRef
ac_to_integer_type(struct ac_llvm_context *ctx, LLVMTypeRef t)
{
- if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) {
+ if (LLVMGetTypeKind(t) == LLVMFixedVectorTypeKind) {
LLVMTypeRef elem_type = LLVMGetElementType(t);
return LLVMVectorType(to_integer_type_scalar(ctx, elem_type),
LLVMGetVectorSize(t));
LLVMTypeRef
ac_to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef t)
{
- if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) {
+ if (LLVMGetTypeKind(t) == LLVMFixedVectorTypeKind) {
LLVMTypeRef elem_type = LLVMGetElementType(t);
return LLVMVectorType(to_float_type_scalar(ctx, elem_type),
LLVMGetVectorSize(t));
assert(bufsize >= 8);
- if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
+ if (LLVMGetTypeKind(type) == LLVMFixedVectorTypeKind) {
int ret = snprintf(buf, bufsize, "v%u",
LLVMGetVectorSize(type));
if (ret < 0) {
LLVMTypeRef elemtype;
LLVMValueRef chan[dst_channels];
- if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMVectorTypeKind) {
+ if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMFixedVectorTypeKind) {
unsigned vec_size = LLVMGetVectorSize(LLVMTypeOf(value));
if (src_channels == dst_channels && vec_size == dst_channels)
LLVMValueRef num,
LLVMValueRef den)
{
- /* If we do (num / den), LLVM >= 7.0 does:
- * return num * v_rcp_f32(den * (fabs(den) > 0x1.0p+96f ? 0x1.0p-32f : 1.0f));
- *
- * If we do (num * (1 / den)), LLVM does:
- * return num * v_rcp_f32(den);
- */
- LLVMValueRef one = LLVMConstReal(LLVMTypeOf(num), 1.0);
- LLVMValueRef rcp = LLVMBuildFDiv(ctx->builder, one, den, "");
- LLVMValueRef ret = LLVMBuildFMul(ctx->builder, num, rcp, "");
+ unsigned type_size = ac_get_type_size(LLVMTypeOf(den));
+ const char *name;
- /* Use v_rcp_f32 instead of precise division. */
- if (!LLVMIsConstant(ret))
- LLVMSetMetadata(ret, ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp);
- return ret;
+ if (type_size == 2)
+ name = "llvm.amdgcn.rcp.f16";
+ else if (type_size == 4)
+ name = "llvm.amdgcn.rcp.f32";
+ else
+ name = "llvm.amdgcn.rcp.f64";
+
+ LLVMValueRef rcp = ac_build_intrinsic(ctx, name, LLVMTypeOf(den),
+ &den, 1, AC_FUNC_ATTR_READNONE);
+
+ return LLVMBuildFMul(ctx->builder, num, rcp, "");
}
/* See fast_idiv_by_const.h. */
(a->lod ? 1 : 0) +
(a->level_zero ? 1 : 0) +
(a->derivs[0] ? 1 : 0) <= 1);
+ assert((a->min_lod ? 1 : 0) +
+ (a->lod ? 1 : 0) +
+ (a->level_zero ? 1 : 0) <= 1);
if (a->opcode == ac_image_get_lod) {
switch (dim) {
args[num_args++] = LLVMBuildBitCast(ctx->builder, a->coords[i], coord_type, "");
if (a->lod)
args[num_args++] = LLVMBuildBitCast(ctx->builder, a->lod, coord_type, "");
+ if (a->min_lod)
+ args[num_args++] = LLVMBuildBitCast(ctx->builder, a->min_lod, coord_type, "");
+
overload[num_overloads++] = sample ? ".f32" : ".i32";
args[num_args++] = a->resource;
char intr_name[96];
snprintf(intr_name, sizeof(intr_name),
"llvm.amdgcn.image.%s%s" /* base name */
- "%s%s%s" /* sample/gather modifiers */
+ "%s%s%s%s" /* sample/gather modifiers */
".%s.%s%s%s%s", /* dimension and type overloads */
name, atomic_subop,
a->compare ? ".c" : "",
lod_suffix ? ".l" :
a->derivs[0] ? ".d" :
a->level_zero ? ".lz" : "",
+ a->min_lod ? ".cl" : "",
a->offset ? ".o" : "",
dimname,
atomic ? "i32" : "v4f32",
LLVMValueRef src1, LLVMValueRef src2,
unsigned bitsize)
{
- LLVMTypeRef type;
- char *intr;
+ LLVMValueRef result;
- if (bitsize == 16) {
- intr = "llvm.amdgcn.fmed3.f16";
- type = ctx->f16;
- } else if (bitsize == 32) {
- intr = "llvm.amdgcn.fmed3.f32";
- type = ctx->f32;
+ if (bitsize == 64 || (bitsize == 16 && ctx->chip_class <= GFX8)) {
+ /* Lower 64-bit fmed because LLVM doesn't expose an intrinsic,
+ * or lower 16-bit fmed because it's only supported on GFX9+.
+ */
+ LLVMValueRef min1, min2, max1;
+
+ min1 = ac_build_fmin(ctx, src0, src1);
+ max1 = ac_build_fmax(ctx, src0, src1);
+ min2 = ac_build_fmin(ctx, max1, src2);
+
+ result = ac_build_fmax(ctx, min2, min1);
} else {
- intr = "llvm.amdgcn.fmed3.f64";
- type = ctx->f64;
+ LLVMTypeRef type;
+ char *intr;
+
+ if (bitsize == 16) {
+ intr = "llvm.amdgcn.fmed3.f16";
+ type = ctx->f16;
+ } else {
+ assert(bitsize == 32);
+ intr = "llvm.amdgcn.fmed3.f32";
+ type = ctx->f32;
+ }
+
+ LLVMValueRef params[] = {
+ src0,
+ src1,
+ src2,
+ };
+
+ result = ac_build_intrinsic(ctx, intr, type, params, 3,
+ AC_FUNC_ATTR_READNONE);
}
- LLVMValueRef params[] = {
- src0,
- src1,
- src2,
- };
- return ac_build_intrinsic(ctx, intr, type, params, 3,
- AC_FUNC_ATTR_READNONE);
+ if (ctx->chip_class < GFX9 && bitsize == 32) {
+ /* Only pre-GFX9 chips do not flush denorms. */
+ result = ac_build_canonicalize(ctx, result, bitsize);
+ }
+
+ return result;
}
LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0,
LLVMValueRef main_fn,
uint8_t *vs_output_param_offset,
uint32_t num_outputs,
+ uint32_t skip_output_mask,
uint8_t *num_param_exports)
{
LLVMBasicBlockRef bb;
}
/* Eliminate constant and duplicated PARAM exports. */
- if (ac_eliminate_const_output(vs_output_param_offset,
- num_outputs, &exp) ||
- ac_eliminate_duplicated_output(ctx,
- vs_output_param_offset,
- num_outputs, &exports,
- &exp)) {
+ if (!((1u << target) & skip_output_mask) &&
+ (ac_eliminate_const_output(vs_output_param_offset,
+ num_outputs, &exp) ||
+ ac_eliminate_duplicated_output(ctx,
+ vs_output_param_offset,
+ num_outputs, &exports,
+ &exp))) {
removed_any = true;
} else {
exports.exp[exports.num++] = exp;
}
static LLVMValueRef
-_ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane)
+_ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src,
+ LLVMValueRef lane, bool with_opt_barrier)
{
LLVMTypeRef type = LLVMTypeOf(src);
LLVMValueRef result;
+ if (with_opt_barrier)
+ ac_build_optimization_barrier(ctx, &src);
+
src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");
if (lane)
lane = LLVMBuildZExt(ctx->builder, lane, ctx->i32, "");
return LLVMBuildTrunc(ctx->builder, result, type, "");
}
-/**
- * Builds the "llvm.amdgcn.readlane" or "llvm.amdgcn.readfirstlane" intrinsic.
- *
- * The optimization barrier is not needed if the value is the same in all lanes
- * or if this is called in the outermost block.
- *
- * @param ctx
- * @param src
- * @param lane - id of the lane or NULL for the first active lane
- * @return value of the lane
- */
-LLVMValueRef ac_build_readlane_no_opt_barrier(struct ac_llvm_context *ctx,
- LLVMValueRef src, LLVMValueRef lane)
+static LLVMValueRef
+ac_build_readlane_common(struct ac_llvm_context *ctx,
+ LLVMValueRef src, LLVMValueRef lane,
+ bool with_opt_barrier)
{
LLVMTypeRef src_type = LLVMTypeOf(src);
src = ac_to_integer(ctx, src);
LLVMBuildBitCast(ctx->builder, src, vec_type, "");
ret = LLVMGetUndef(vec_type);
for (unsigned i = 0; i < bits / 32; i++) {
+ LLVMValueRef ret_comp;
+
src = LLVMBuildExtractElement(ctx->builder, src_vector,
LLVMConstInt(ctx->i32, i, 0), "");
- LLVMValueRef ret_comp = _ac_build_readlane(ctx, src, lane);
+
+ ret_comp = _ac_build_readlane(ctx, src, lane,
+ with_opt_barrier);
+
ret = LLVMBuildInsertElement(ctx->builder, ret, ret_comp,
LLVMConstInt(ctx->i32, i, 0), "");
}
} else {
- ret = _ac_build_readlane(ctx, src, lane);
+ ret = _ac_build_readlane(ctx, src, lane, with_opt_barrier);
}
if (LLVMGetTypeKind(src_type) == LLVMPointerTypeKind)
return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
}
+/**
+ * Builds the "llvm.amdgcn.readlane" or "llvm.amdgcn.readfirstlane" intrinsic.
+ *
+ * The optimization barrier is not needed if the value is the same in all lanes
+ * or if this is called in the outermost block.
+ *
+ * @param ctx
+ * @param src
+ * @param lane - id of the lane or NULL for the first active lane
+ * @return value of the lane
+ */
+LLVMValueRef ac_build_readlane_no_opt_barrier(struct ac_llvm_context *ctx,
+ LLVMValueRef src, LLVMValueRef lane)
+{
+ return ac_build_readlane_common(ctx, src, lane, false);
+}
+
+
LLVMValueRef
ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane)
{
- ac_build_optimization_barrier(ctx, &src);
-
- return ac_build_readlane_no_opt_barrier(ctx, src, lane);
+ return ac_build_readlane_common(ctx, src, lane, true);
}
LLVMValueRef
return LLVMBuildSExt(ctx->builder, result, ctx->i32, "");
}
+LLVMValueRef
+ac_build_is_helper_invocation(struct ac_llvm_context *ctx)
+{
+ if (!ctx->postponed_kill)
+ return ac_build_load_helper_invocation(ctx);
+
+ /* !(exact && postponed) */
+ LLVMValueRef exact = ac_build_intrinsic(ctx, "llvm.amdgcn.ps.live",
+ ctx->i1, NULL, 0,
+ AC_FUNC_ATTR_READNONE);
+
+ LLVMValueRef postponed = LLVMBuildLoad(ctx->builder, ctx->postponed_kill, "");
+ LLVMValueRef result = LLVMBuildAnd(ctx->builder, exact, postponed, "");
+
+ return LLVMBuildSelect(ctx->builder, result, ctx->i32_0,
+ LLVMConstInt(ctx->i32, 0xFFFFFFFF, false), "");
+}
+
LLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMValueRef func,
LLVMValueRef *args, unsigned num_args)
{
{
LLVMBuilderRef builder = ctx->builder;
LLVMValueRef tmp;
+ bool export_dummy_prim = false;
+
+ /* HW workaround for a GPU hang with 100% culling.
+ * We always have to export at least 1 primitive.
+ * Export a degenerate triangle using vertex 0 for all 3 vertices.
+ */
+ if (prim_cnt == ctx->i32_0 && ctx->chip_class == GFX10) {
+ assert(vtx_cnt == ctx->i32_0);
+ prim_cnt = ctx->i32_1;
+ vtx_cnt = ctx->i32_1;
+ export_dummy_prim = true;
+ }
ac_build_ifcc(ctx, LLVMBuildICmp(builder, LLVMIntEQ, wave_id, ctx->i32_0, ""), 5020);
tmp = LLVMBuildOr(builder, tmp, vtx_cnt, "");
ac_build_sendmsg(ctx, AC_SENDMSG_GS_ALLOC_REQ, tmp);
+ if (export_dummy_prim) {
+ struct ac_ngg_prim prim = {};
+ /* The vertex indices are 0,0,0. */
+ prim.passthrough = ctx->i32_0;
+
+ struct ac_export_args pos = {};
+ pos.out[0] = pos.out[1] = pos.out[2] = pos.out[3] = ctx->f32_0;
+ pos.target = V_008DFC_SQ_EXP_POS;
+ pos.enabled_channels = 0xf;
+ pos.done = true;
+
+ ac_build_ifcc(ctx, LLVMBuildICmp(builder, LLVMIntEQ, ac_get_thread_id(ctx),
+ ctx->i32_0, ""), 5021);
+ ac_build_export_prim(ctx, &prim);
+ ac_build_export(ctx, &pos);
+ ac_build_endif(ctx, 5021);
+ }
+
ac_build_endif(ctx, 5020);
}
}
ctx->main_function = main_function;
+
+ if (LLVM_VERSION_MAJOR >= 11) {
+ /* Enable denormals for FP16 and FP64: */
+ LLVMAddTargetDependentFunctionAttr(main_function, "denormal-fp-math",
+ "ieee,ieee");
+ /* Disable denormals for FP32: */
+ LLVMAddTargetDependentFunctionAttr(main_function, "denormal-fp-math-f32",
+ "preserve-sign,preserve-sign");
+ }
return main_function;
}