ac/llvm: add support for texturing with clamped LOD

[mesa.git] / src / amd / llvm / ac_llvm_build.c
diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c

index 760d9123c623c4afe6e9a54808b69aa133808593..ebcb91bd4b3374c02d1bb60e19773d15c4822cdd 100644 (file)
--- a/src/amd/llvm/ac_llvm_build.c
+++ b/src/amd/llvm/ac_llvm_build.c
@@ -65,8 +65,6 @@ ac_llvm_context_init(struct ac_llvm_context *ctx,
                      enum ac_float_mode float_mode, unsigned wave_size,
                      unsigned ballot_mask_bits)
  {
-       LLVMValueRef args[1];
-
         ctx->context = LLVMContextCreate();
  
         ctx->chip_class = chip_class;
@@ -127,11 +125,6 @@ ac_llvm_context_init(struct ac_llvm_context *ctx,
         ctx->invariant_load_md_kind = LLVMGetMDKindIDInContext(ctx->context,
                                                                "invariant.load", 14);
  
-       ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->context, "fpmath", 6);
-
-       args[0] = LLVMConstReal(ctx->f32, 2.5);
-       ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->context, args, 1);
-
         ctx->uniform_md_kind = LLVMGetMDKindIDInContext(ctx->context,
                                                         "amdgpu.uniform", 14);
  
@@ -151,7 +144,7 @@ int
  ac_get_llvm_num_components(LLVMValueRef value)
  {
         LLVMTypeRef type = LLVMTypeOf(value);
-       unsigned num_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind
+       unsigned num_components = LLVMGetTypeKind(type) == LLVMFixedVectorTypeKind
                                       ? LLVMGetVectorSize(type)
                                       : 1;
         return num_components;
@@ -162,7 +155,7 @@ ac_llvm_extract_elem(struct ac_llvm_context *ac,
                      LLVMValueRef value,
                      int index)
  {
-       if (LLVMGetTypeKind(LLVMTypeOf(value)) != LLVMVectorTypeKind) {
+       if (LLVMGetTypeKind(LLVMTypeOf(value)) != LLVMFixedVectorTypeKind) {
                 assert(index == 0);
                 return value;
         }
@@ -174,7 +167,7 @@ ac_llvm_extract_elem(struct ac_llvm_context *ac,
  int
  ac_get_elem_bits(struct ac_llvm_context *ctx, LLVMTypeRef type)
  {
-       if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
+       if (LLVMGetTypeKind(type) == LLVMFixedVectorTypeKind)
                 type = LLVMGetElementType(type);
  
         if (LLVMGetTypeKind(type) == LLVMIntegerTypeKind)
@@ -213,7 +206,7 @@ ac_get_type_size(LLVMTypeRef type)
                 if (LLVMGetPointerAddressSpace(type) == AC_ADDR_SPACE_CONST_32BIT)
                         return 4;
                 return 8;
-       case LLVMVectorTypeKind:
+       case LLVMFixedVectorTypeKind:
                 return LLVMGetVectorSize(type) *
                        ac_get_type_size(LLVMGetElementType(type));
         case LLVMArrayTypeKind:
@@ -242,7 +235,7 @@ static LLVMTypeRef to_integer_type_scalar(struct ac_llvm_context *ctx, LLVMTypeR
  LLVMTypeRef
  ac_to_integer_type(struct ac_llvm_context *ctx, LLVMTypeRef t)
  {
-       if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) {
+       if (LLVMGetTypeKind(t) == LLVMFixedVectorTypeKind) {
                 LLVMTypeRef elem_type = LLVMGetElementType(t);
                 return LLVMVectorType(to_integer_type_scalar(ctx, elem_type),
                                       LLVMGetVectorSize(t));
@@ -297,7 +290,7 @@ static LLVMTypeRef to_float_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef
  LLVMTypeRef
  ac_to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef t)
  {
-       if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) {
+       if (LLVMGetTypeKind(t) == LLVMFixedVectorTypeKind) {
                 LLVMTypeRef elem_type = LLVMGetElementType(t);
                 return LLVMVectorType(to_float_type_scalar(ctx, elem_type),
                                       LLVMGetVectorSize(t));
@@ -359,7 +352,7 @@ void ac_build_type_name_for_intr(LLVMTypeRef type, char *buf, unsigned bufsize)
  
         assert(bufsize >= 8);
  
-       if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
+       if (LLVMGetTypeKind(type) == LLVMFixedVectorTypeKind) {
                 int ret = snprintf(buf, bufsize, "v%u",
                                         LLVMGetVectorSize(type));
                 if (ret < 0) {
@@ -634,7 +627,7 @@ ac_build_expand(struct ac_llvm_context *ctx,
         LLVMTypeRef elemtype;
         LLVMValueRef chan[dst_channels];
  
-       if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMVectorTypeKind) {
+       if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMFixedVectorTypeKind) {
                 unsigned vec_size = LLVMGetVectorSize(LLVMTypeOf(value));
  
                 if (src_channels == dst_channels && vec_size == dst_channels)
@@ -707,20 +700,20 @@ ac_build_fdiv(struct ac_llvm_context *ctx,
               LLVMValueRef num,
               LLVMValueRef den)
  {
-       /* If we do (num / den), LLVM >= 7.0 does:
-        *    return num * v_rcp_f32(den * (fabs(den) > 0x1.0p+96f ? 0x1.0p-32f : 1.0f));
-        *
-        * If we do (num * (1 / den)), LLVM does:
-        *    return num * v_rcp_f32(den);
-        */
-       LLVMValueRef one = LLVMConstReal(LLVMTypeOf(num), 1.0);
-       LLVMValueRef rcp = LLVMBuildFDiv(ctx->builder, one, den, "");
-       LLVMValueRef ret = LLVMBuildFMul(ctx->builder, num, rcp, "");
+       unsigned type_size = ac_get_type_size(LLVMTypeOf(den));
+       const char *name;
  
-       /* Use v_rcp_f32 instead of precise division. */
-       if (!LLVMIsConstant(ret))
-               LLVMSetMetadata(ret, ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp);
-       return ret;
+       if (type_size == 2)
+               name = "llvm.amdgcn.rcp.f16";
+       else if (type_size == 4)
+               name = "llvm.amdgcn.rcp.f32";
+       else
+               name = "llvm.amdgcn.rcp.f64";
+
+        LLVMValueRef rcp = ac_build_intrinsic(ctx, name, LLVMTypeOf(den),
+                                              &den, 1, AC_FUNC_ATTR_READNONE);
+
+       return LLVMBuildFMul(ctx->builder, num, rcp, "");
  }
  
  /* See fast_idiv_by_const.h. */
@@ -2380,6 +2373,9 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
                (a->lod ? 1 : 0) +
                (a->level_zero ? 1 : 0) +
                (a->derivs[0] ? 1 : 0) <= 1);
+       assert((a->min_lod ? 1 : 0) +
+              (a->lod ? 1 : 0) +
+              (a->level_zero ? 1 : 0) <= 1);
  
         if (a->opcode == ac_image_get_lod) {
                 switch (dim) {
@@ -2435,6 +2431,9 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
                 args[num_args++] = LLVMBuildBitCast(ctx->builder, a->coords[i], coord_type, "");
         if (a->lod)
                 args[num_args++] = LLVMBuildBitCast(ctx->builder, a->lod, coord_type, "");
+       if (a->min_lod)
+               args[num_args++] = LLVMBuildBitCast(ctx->builder, a->min_lod, coord_type, "");
+
         overload[num_overloads++] = sample ? ".f32" : ".i32";
  
         args[num_args++] = a->resource;
@@ -2488,7 +2487,7 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
         char intr_name[96];
         snprintf(intr_name, sizeof(intr_name),
                  "llvm.amdgcn.image.%s%s" /* base name */
-                "%s%s%s" /* sample/gather modifiers */
+                "%s%s%s%s" /* sample/gather modifiers */
                  ".%s.%s%s%s%s", /* dimension and type overloads */
                  name, atomic_subop,
                  a->compare ? ".c" : "",
@@ -2496,6 +2495,7 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
                  lod_suffix ? ".l" :
                  a->derivs[0] ? ".d" :
                  a->level_zero ? ".lz" : "",
+                a->min_lod ? ".cl" : "",
                  a->offset ? ".o" : "",
                  dimname,
                  atomic ? "i32" : "v4f32",
@@ -3088,6 +3088,7 @@ void ac_optimize_vs_outputs(struct ac_llvm_context *ctx,
                             LLVMValueRef main_fn,
                             uint8_t *vs_output_param_offset,
                             uint32_t num_outputs,
+                           uint32_t skip_output_mask,
                             uint8_t *num_param_exports)
  {
         LLVMBasicBlockRef bb;
@@ -3154,12 +3155,13 @@ void ac_optimize_vs_outputs(struct ac_llvm_context *ctx,
                         }
  
                         /* Eliminate constant and duplicated PARAM exports. */
-                       if (ac_eliminate_const_output(vs_output_param_offset,
-                                                     num_outputs, &exp) ||
-                           ac_eliminate_duplicated_output(ctx,
-                                                          vs_output_param_offset,
-                                                          num_outputs, &exports,
-                                                          &exp)) {
+                       if (!((1u << target) & skip_output_mask) &&
+                            (ac_eliminate_const_output(vs_output_param_offset,
+                                                      num_outputs, &exp) ||
+                            ac_eliminate_duplicated_output(ctx,
+                                                           vs_output_param_offset,
+                                                           num_outputs, &exports,
+                                                           &exp))) {
                                 removed_any = true;
                         } else {
                                 exports.exp[exports.num++] = exp;
@@ -3611,11 +3613,15 @@ void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask,
  }
  
  static LLVMValueRef
-_ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane)
+_ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src,
+                 LLVMValueRef lane, bool with_opt_barrier)
  {
         LLVMTypeRef type = LLVMTypeOf(src);
         LLVMValueRef result;
  
+       if (with_opt_barrier)
+               ac_build_optimization_barrier(ctx, &src);
+
         src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");
         if (lane)
                 lane = LLVMBuildZExt(ctx->builder, lane, ctx->i32, "");
@@ -3630,20 +3636,13 @@ _ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef l
         return LLVMBuildTrunc(ctx->builder, result, type, "");
  }
  
-/**
- * Builds the "llvm.amdgcn.readlane" or "llvm.amdgcn.readfirstlane" intrinsic.
- *
- * The optimization barrier is not needed if the value is the same in all lanes
- * or if this is called in the outermost block.
- *
- * @param ctx
- * @param src
- * @param lane - id of the lane or NULL for the first active lane
- * @return value of the lane
- */
-LLVMValueRef ac_build_readlane_no_opt_barrier(struct ac_llvm_context *ctx,
-                                             LLVMValueRef src, LLVMValueRef lane)
+static LLVMValueRef
+ac_build_readlane_common(struct ac_llvm_context *ctx,
+                        LLVMValueRef src, LLVMValueRef lane,
+                        bool with_opt_barrier)
  {
+       LLVMTypeRef src_type = LLVMTypeOf(src);
+       src = ac_to_integer(ctx, src);
         unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
         LLVMValueRef ret;
  
@@ -3654,32 +3653,48 @@ LLVMValueRef ac_build_readlane_no_opt_barrier(struct ac_llvm_context *ctx,
                         LLVMBuildBitCast(ctx->builder, src, vec_type, "");
                 ret = LLVMGetUndef(vec_type);
                 for (unsigned i = 0; i < bits / 32; i++) {
+                       LLVMValueRef ret_comp;
+
                         src = LLVMBuildExtractElement(ctx->builder, src_vector,
                                                 LLVMConstInt(ctx->i32, i, 0), "");
-                       LLVMValueRef ret_comp = _ac_build_readlane(ctx, src, lane);
+
+                       ret_comp = _ac_build_readlane(ctx, src, lane,
+                                                     with_opt_barrier);
+
                         ret = LLVMBuildInsertElement(ctx->builder, ret, ret_comp,
                                                 LLVMConstInt(ctx->i32, i, 0), "");
                 }
         } else {
-               ret = _ac_build_readlane(ctx, src, lane);
+               ret = _ac_build_readlane(ctx, src, lane, with_opt_barrier);
         }
  
-       return ret;
+       if (LLVMGetTypeKind(src_type) == LLVMPointerTypeKind)
+               return LLVMBuildIntToPtr(ctx->builder, ret, src_type, "");
+       return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
  }
  
-LLVMValueRef
-ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane)
+/**
+ * Builds the "llvm.amdgcn.readlane" or "llvm.amdgcn.readfirstlane" intrinsic.
+ *
+ * The optimization barrier is not needed if the value is the same in all lanes
+ * or if this is called in the outermost block.
+ *
+ * @param ctx
+ * @param src
+ * @param lane - id of the lane or NULL for the first active lane
+ * @return value of the lane
+ */
+LLVMValueRef ac_build_readlane_no_opt_barrier(struct ac_llvm_context *ctx,
+                                             LLVMValueRef src, LLVMValueRef lane)
  {
-       LLVMTypeRef src_type = LLVMTypeOf(src);
-       src = ac_to_integer(ctx, src);
-       LLVMValueRef ret;
+       return ac_build_readlane_common(ctx, src, lane, false);
+}
  
-       ac_build_optimization_barrier(ctx, &src);
  
-       ret = ac_build_readlane_no_opt_barrier(ctx, src, lane);
-       if (LLVMGetTypeKind(src_type) == LLVMPointerTypeKind)
-               return LLVMBuildIntToPtr(ctx->builder, ret, src_type, "");
-       return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
+LLVMValueRef
+ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane)
+{
+       return ac_build_readlane_common(ctx, src, lane, true);
  }
  
  LLVMValueRef
@@ -4700,6 +4715,9 @@ ac_build_load_helper_invocation(struct ac_llvm_context *ctx)
  LLVMValueRef
  ac_build_is_helper_invocation(struct ac_llvm_context *ctx)
  {
+       if (!ctx->postponed_kill)
+               return ac_build_load_helper_invocation(ctx);
+
         /* !(exact && postponed) */
         LLVMValueRef exact = ac_build_intrinsic(ctx, "llvm.amdgcn.ps.live",
                                                 ctx->i1, NULL, 0,
@@ -4805,10 +4823,7 @@ void ac_build_sendmsg_gs_alloc_req(struct ac_llvm_context *ctx, LLVMValueRef wav
          * We always have to export at least 1 primitive.
          * Export a degenerate triangle using vertex 0 for all 3 vertices.
          */
-       if (prim_cnt == ctx->i32_0 &&
-           (ctx->family == CHIP_NAVI10 ||
-            ctx->family == CHIP_NAVI12 ||
-            ctx->family == CHIP_NAVI14)) {
+       if (prim_cnt == ctx->i32_0 && ctx->chip_class == GFX10) {
                 assert(vtx_cnt == ctx->i32_0);
                 prim_cnt = ctx->i32_1;
                 vtx_cnt = ctx->i32_1;
@@ -4971,6 +4986,15 @@ ac_build_main(const struct ac_shader_args *args,
         }
  
         ctx->main_function = main_function;
+
+       if (LLVM_VERSION_MAJOR >= 11) {
+               /* Enable denormals for FP16 and FP64: */
+               LLVMAddTargetDependentFunctionAttr(main_function, "denormal-fp-math",
+                                                  "ieee,ieee");
+               /* Disable denormals for FP32: */
+               LLVMAddTargetDependentFunctionAttr(main_function, "denormal-fp-math-f32",
+                                                  "preserve-sign,preserve-sign");
+       }
         return main_function;
  }