unsigned type_size = ac_get_type_size(LLVMTypeOf(den));
const char *name;
+ /* For doubles, we need precise division to pass GLCTS. */
+ if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL &&
+ type_size == 8)
+ return LLVMBuildFDiv(ctx->builder, num, den, "");
+
if (type_size == 2)
name = "llvm.amdgcn.rcp.f16";
else if (type_size == 4)
char name[256], type_name[8];
/* D16 is only supported on gfx8+ */
- assert((channel_type != ctx->f16 && channel_type != ctx->i16) ||
+ assert(!use_format ||
+ (channel_type != ctx->f16 && channel_type != ctx->i16) ||
ctx->chip_class >= GFX8);
LLVMTypeRef type = func > 1 ? LLVMVectorType(channel_type, func) : channel_type;
}
int log_recombine = 0;
- if (ctx->chip_class == GFX6 && !known_aligned) {
+ if ((ctx->chip_class == GFX6 || ctx->chip_class == GFX10) && !known_aligned) {
/* Avoid alignment restrictions by loading one byte at a time. */
load_num_channels <<= load_log_size;
log_recombine = load_log_size;
if (result_type == ctx->f16)
val = LLVMBuildZExt(ctx->builder, val, ctx->i32, "");
+ else if (result_type == ctx->v2f16)
+ val = LLVMBuildBitCast(ctx->builder, val, ctx->i32, "");
for (unsigned i = 0; i < 4; ++i) {
tl_lanes[i] = i & mask;
assert((a->min_lod ? 1 : 0) +
(a->lod ? 1 : 0) +
(a->level_zero ? 1 : 0) <= 1);
+ assert(!a->d16 || (ctx->chip_class >= GFX8 &&
+ a->opcode != ac_image_atomic &&
+ a->opcode != ac_image_atomic_cmpswap &&
+ a->opcode != ac_image_get_lod &&
+ a->opcode != ac_image_get_resinfo));
if (a->opcode == ac_image_get_lod) {
switch (dim) {
a->min_lod ? ".cl" : "",
a->offset ? ".o" : "",
dimname,
- atomic ? "i32" : "v4f32",
+ atomic ? "i32" : (a->d16 ? "v4f16" : "v4f32"),
overload[0], overload[1], overload[2]);
LLVMTypeRef retty;
else if (a->opcode == ac_image_store || a->opcode == ac_image_store_mip)
retty = ctx->voidt;
else
- retty = ctx->v4f32;
+ retty = a->d16 ? ctx->v4f16 : ctx->v4f32;
LLVMValueRef result =
ac_build_intrinsic(ctx, intr_name, retty, args, num_args,
a->attributes);
- if (!sample && retty == ctx->v4f32) {
- result = LLVMBuildBitCast(ctx->builder, result,
- ctx->v4i32, "");
- }
+ if (!sample && !atomic && retty != ctx->voidt)
+ result = ac_to_integer(ctx, result);
+
return result;
}
ctx->voidt, args, 1, 0);
}
-LLVMValueRef ac_build_fmed3(struct ac_llvm_context *ctx, LLVMValueRef src0,
- LLVMValueRef src1, LLVMValueRef src2,
- unsigned bitsize)
-{
- LLVMValueRef result;
-
- if (bitsize == 64 || (bitsize == 16 && ctx->chip_class <= GFX8)) {
- /* Lower 64-bit fmed because LLVM doesn't expose an intrinsic,
- * or lower 16-bit fmed because it's only supported on GFX9+.
- */
- LLVMValueRef min1, min2, max1;
-
- min1 = ac_build_fmin(ctx, src0, src1);
- max1 = ac_build_fmax(ctx, src0, src1);
- min2 = ac_build_fmin(ctx, max1, src2);
-
- result = ac_build_fmax(ctx, min2, min1);
- } else {
- LLVMTypeRef type;
- char *intr;
-
- if (bitsize == 16) {
- intr = "llvm.amdgcn.fmed3.f16";
- type = ctx->f16;
- } else {
- assert(bitsize == 32);
- intr = "llvm.amdgcn.fmed3.f32";
- type = ctx->f32;
- }
-
- LLVMValueRef params[] = {
- src0,
- src1,
- src2,
- };
-
- result = ac_build_intrinsic(ctx, intr, type, params, 3,
- AC_FUNC_ATTR_READNONE);
- }
-
- if (ctx->chip_class < GFX9 && bitsize == 32) {
- /* Only pre-GFX9 chips do not flush denorms. */
- result = ac_build_canonicalize(ctx, result, bitsize);
- }
-
- return result;
-}
-
LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0,
unsigned bitsize)
{
if (LLVMGetTypeKind(LLVMTypeOf(P)) == LLVMPointerTypeKind) {
ac_add_function_attr(ctx->context, main_function, i + 1, AC_FUNC_ATTR_NOALIAS);
ac_add_attr_dereferenceable(P, UINT64_MAX);
+ ac_add_attr_alignment(P, 32);
}
}