radv: Fix threading issue with submission refcounts.

[mesa.git] / src / amd / llvm / ac_llvm_build.c
diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c

index 0874cd99a57d2e5d06f3325970174a20515f40f1..77d3f7e73fbe84cf49548ad95d247b14a8a04d0b 100644 (file)
--- a/src/amd/llvm/ac_llvm_build.c
+++ b/src/amd/llvm/ac_llvm_build.c
@@ -705,6 +705,11 @@ ac_build_fdiv(struct ac_llvm_context *ctx,
         unsigned type_size = ac_get_type_size(LLVMTypeOf(den));
         const char *name;
  
+       /* For doubles, we need precise division to pass GLCTS. */
+       if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL &&
+           type_size == 8)
+               return LLVMBuildFDiv(ctx->builder, num, den, "");
+
         if (type_size == 2)
                 name = "llvm.amdgcn.rcp.f16";
         else if (type_size == 4)
@@ -1315,7 +1320,8 @@ ac_build_buffer_load_common(struct ac_llvm_context *ctx,
         char name[256], type_name[8];
  
         /* D16 is only supported on gfx8+ */
-       assert((channel_type != ctx->f16 && channel_type != ctx->i16) ||
+       assert(!use_format ||
+              (channel_type != ctx->f16 && channel_type != ctx->i16) ||
                ctx->chip_class >= GFX8);
  
         LLVMTypeRef type = func > 1 ? LLVMVectorType(channel_type, func) : channel_type;
@@ -1650,7 +1656,7 @@ ac_build_opencoded_load_format(struct ac_llvm_context *ctx,
         }
  
         int log_recombine = 0;
-       if (ctx->chip_class == GFX6 && !known_aligned) {
+       if ((ctx->chip_class == GFX6 || ctx->chip_class == GFX10) && !known_aligned) {
                 /* Avoid alignment restrictions by loading one byte at a time. */
                 load_num_channels <<= load_log_size;
                 log_recombine = load_log_size;
@@ -2055,6 +2061,8 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
  
         if (result_type == ctx->f16)
                 val = LLVMBuildZExt(ctx->builder, val, ctx->i32, "");
+       else if (result_type == ctx->v2f16)
+               val = LLVMBuildBitCast(ctx->builder, val, ctx->i32, "");
  
         for (unsigned i = 0; i < 4; ++i) {
                 tl_lanes[i] = i & mask;
@@ -2375,6 +2383,11 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
         assert((a->min_lod ? 1 : 0) +
                (a->lod ? 1 : 0) +
                (a->level_zero ? 1 : 0) <= 1);
+       assert(!a->d16 || (ctx->chip_class >= GFX8 &&
+                          a->opcode != ac_image_atomic &&
+                          a->opcode != ac_image_atomic_cmpswap &&
+                          a->opcode != ac_image_get_lod &&
+                          a->opcode != ac_image_get_resinfo));
  
         if (a->opcode == ac_image_get_lod) {
                 switch (dim) {
@@ -2497,7 +2510,7 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
                  a->min_lod ? ".cl" : "",
                  a->offset ? ".o" : "",
                  dimname,
-                atomic ? "i32" : "v4f32",
+                atomic ? "i32" : (a->d16 ? "v4f16" : "v4f32"),
                  overload[0], overload[1], overload[2]);
  
         LLVMTypeRef retty;
@@ -2506,15 +2519,14 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
         else if (a->opcode == ac_image_store || a->opcode == ac_image_store_mip)
                 retty = ctx->voidt;
         else
-               retty = ctx->v4f32;
+               retty = a->d16 ? ctx->v4f16 : ctx->v4f32;
  
         LLVMValueRef result =
                 ac_build_intrinsic(ctx, intr_name, retty, args, num_args,
                                    a->attributes);
-       if (!sample && retty == ctx->v4f32) {
-               result = LLVMBuildBitCast(ctx->builder, result,
-                                         ctx->v4i32, "");
-       }
+       if (!sample && !atomic && retty != ctx->voidt)
+               result = ac_to_integer(ctx, result);
+
         return result;
  }
  
@@ -2715,54 +2727,6 @@ void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned wait_flags)
                            ctx->voidt, args, 1, 0);
  }
  
-LLVMValueRef ac_build_fmed3(struct ac_llvm_context *ctx, LLVMValueRef src0,
-                           LLVMValueRef src1, LLVMValueRef src2,
-                           unsigned bitsize)
-{
-       LLVMValueRef result;
-
-       if (bitsize == 64 || (bitsize == 16 && ctx->chip_class <= GFX8)) {
-               /* Lower 64-bit fmed because LLVM doesn't expose an intrinsic,
-                * or lower 16-bit fmed because it's only supported on GFX9+.
-                */
-               LLVMValueRef min1, min2, max1;
-
-               min1 = ac_build_fmin(ctx, src0, src1);
-               max1 = ac_build_fmax(ctx, src0, src1);
-               min2 = ac_build_fmin(ctx, max1, src2);
-
-               result = ac_build_fmax(ctx, min2, min1);
-       } else {
-               LLVMTypeRef type;
-               char *intr;
-
-               if (bitsize == 16) {
-                       intr = "llvm.amdgcn.fmed3.f16";
-                       type = ctx->f16;
-               } else {
-                       assert(bitsize == 32);
-                       intr = "llvm.amdgcn.fmed3.f32";
-                       type = ctx->f32;
-               }
-
-               LLVMValueRef params[] = {
-                       src0,
-                       src1,
-                       src2,
-               };
-
-               result = ac_build_intrinsic(ctx, intr, type, params, 3,
-                                           AC_FUNC_ATTR_READNONE);
-       }
-
-       if (ctx->chip_class < GFX9 && bitsize == 32) {
-               /* Only pre-GFX9 chips do not flush denorms. */
-               result = ac_build_canonicalize(ctx, result, bitsize);
-       }
-
-       return result;
-}
-
  LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0,
                             unsigned bitsize)
  {
@@ -4976,6 +4940,7 @@ ac_build_main(const struct ac_shader_args *args,
                 if (LLVMGetTypeKind(LLVMTypeOf(P)) == LLVMPointerTypeKind) {
                         ac_add_function_attr(ctx->context, main_function, i + 1, AC_FUNC_ATTR_NOALIAS);
                         ac_add_attr_dereferenceable(P, UINT64_MAX);
+                       ac_add_attr_alignment(P, 32);
                 }
         }