ac: Silence a compiler warning about results[0].
[mesa.git] / src / amd / common / ac_llvm_build.c
index 6c010e8c3a68a758d69829a22eae072f3c35daf7..4d8f42d572c27ff488f969519ae78aa9a816e522 100644 (file)
@@ -252,6 +252,20 @@ void ac_build_type_name_for_intr(LLVMTypeRef type, char *buf, unsigned bufsize)
        }
 }
 
+/**
+ * Helper function that builds an LLVM IR PHI node and immediately adds
+ * incoming edges.
+ */
+LLVMValueRef
+ac_build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type,
+            unsigned count_incoming, LLVMValueRef *values,
+            LLVMBasicBlockRef *blocks)
+{
+       LLVMValueRef phi = LLVMBuildPhi(ctx->builder, type, "");
+       LLVMAddIncoming(phi, values, blocks, count_incoming);
+       return phi;
+}
+
 /* Prevent optimizations (at least of memory accesses) across the current
  * point in the program by emitting empty inline assembly that is marked as
  * having side effects.
@@ -438,12 +452,13 @@ build_cube_intrinsic(struct ac_llvm_context *ctx,
  * selcoords.ma; i.e., a positive out_ma means that coords is pointed towards
  * the selcoords major axis.
  */
-static void build_cube_select(LLVMBuilderRef builder,
+static void build_cube_select(struct ac_llvm_context *ctx,
                              const struct cube_selection_coords *selcoords,
                              const LLVMValueRef *coords,
                              LLVMValueRef *out_st,
                              LLVMValueRef *out_ma)
 {
+       LLVMBuilderRef builder = ctx->builder;
        LLVMTypeRef f32 = LLVMTypeOf(coords[0]);
        LLVMValueRef is_ma_positive;
        LLVMValueRef sgn_ma;
@@ -465,24 +480,24 @@ static void build_cube_select(LLVMBuilderRef builder,
        is_ma_x = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildNot(builder, is_ma_y, ""), "");
 
        /* Select sc */
-       tmp = LLVMBuildSelect(builder, is_ma_z, coords[2], coords[0], "");
+       tmp = LLVMBuildSelect(builder, is_ma_x, coords[2], coords[0], "");
        sgn = LLVMBuildSelect(builder, is_ma_y, LLVMConstReal(f32, 1.0),
-               LLVMBuildSelect(builder, is_ma_x, sgn_ma,
+               LLVMBuildSelect(builder, is_ma_z, sgn_ma,
                        LLVMBuildFNeg(builder, sgn_ma, ""), ""), "");
        out_st[0] = LLVMBuildFMul(builder, tmp, sgn, "");
 
        /* Select tc */
        tmp = LLVMBuildSelect(builder, is_ma_y, coords[2], coords[1], "");
-       sgn = LLVMBuildSelect(builder, is_ma_y, LLVMBuildFNeg(builder, sgn_ma, ""),
+       sgn = LLVMBuildSelect(builder, is_ma_y, sgn_ma,
                LLVMConstReal(f32, -1.0), "");
        out_st[1] = LLVMBuildFMul(builder, tmp, sgn, "");
 
        /* Select ma */
        tmp = LLVMBuildSelect(builder, is_ma_z, coords[2],
                LLVMBuildSelect(builder, is_ma_y, coords[1], coords[0], ""), "");
-       sgn = LLVMBuildSelect(builder, is_ma_positive,
-               LLVMConstReal(f32, 2.0), LLVMConstReal(f32, -2.0), "");
-       *out_ma = LLVMBuildFMul(builder, tmp, sgn, "");
+       tmp = ac_build_intrinsic(ctx, "llvm.fabs.f32",
+                                ctx->f32, &tmp, 1, AC_FUNC_ATTR_READNONE);
+       *out_ma = LLVMBuildFMul(builder, tmp, LLVMConstReal(f32, 2.0), "");
 }
 
 void
@@ -498,8 +513,35 @@ ac_prepare_cube_coords(struct ac_llvm_context *ctx,
        LLVMValueRef invma;
 
        if (is_array && !is_lod) {
-               coords_arg[3] = ac_build_intrinsic(ctx, "llvm.rint.f32", ctx->f32,
-                                                  &coords_arg[3], 1, 0);
+               LLVMValueRef tmp = coords_arg[3];
+               tmp = ac_build_intrinsic(ctx, "llvm.rint.f32", ctx->f32, &tmp, 1, 0);
+
+               /* Section 8.9 (Texture Functions) of the GLSL 4.50 spec says:
+                *
+                *    "For Array forms, the array layer used will be
+                *
+                *       max(0, min(d−1, floor(layer+0.5)))
+                *
+                *     where d is the depth of the texture array and layer
+                *     comes from the component indicated in the tables below.
+                *     Workaroudn for an issue where the layer is taken from a
+                *     helper invocation which happens to fall on a different
+                *     layer due to extrapolation."
+                *
+                * VI and earlier attempt to implement this in hardware by
+                * clamping the value of coords[2] = (8 * layer) + face.
+                * Unfortunately, this means that the we end up with the wrong
+                * face when clamping occurs.
+                *
+                * Clamp the layer earlier to work around the issue.
+                */
+               if (ctx->chip_class <= VI) {
+                       LLVMValueRef ge0;
+                       ge0 = LLVMBuildFCmp(builder, LLVMRealOGE, tmp, ctx->f32_0, "");
+                       tmp = LLVMBuildSelect(builder, ge0, tmp, ctx->f32_0, "");
+               }
+
+               coords_arg[3] = tmp;
        }
 
        build_cube_intrinsic(ctx, coords_arg, &selcoords);
@@ -543,7 +585,7 @@ ac_prepare_cube_coords(struct ac_llvm_context *ctx,
                         * seems awfully quiet about how textureGrad for cube
                         * maps should be handled.
                         */
-                       build_cube_select(builder, &selcoords, &derivs_arg[axis * 3],
+                       build_cube_select(ctx, &selcoords, &derivs_arg[axis * 3],
                                          deriv_st, &deriv_ma);
 
                        deriv_ma = LLVMBuildFMul(builder, deriv_ma, invma, "");
@@ -675,32 +717,40 @@ ac_build_indexed_store(struct ac_llvm_context *ctx,
  * \param base_ptr  Where the array starts.
  * \param index     The element index into the array.
  * \param uniform   Whether the base_ptr and index can be assumed to be
- *                  dynamically uniform
+ *                  dynamically uniform (i.e. load to an SGPR)
+ * \param invariant Whether the load is invariant (no other opcodes affect it)
  */
-LLVMValueRef
-ac_build_indexed_load(struct ac_llvm_context *ctx,
-                     LLVMValueRef base_ptr, LLVMValueRef index,
-                     bool uniform)
+static LLVMValueRef
+ac_build_load_custom(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
+                    LLVMValueRef index, bool uniform, bool invariant)
 {
-       LLVMValueRef pointer;
+       LLVMValueRef pointer, result;
 
        pointer = ac_build_gep0(ctx, base_ptr, index);
        if (uniform)
                LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md);
-       return LLVMBuildLoad(ctx->builder, pointer, "");
+       result = LLVMBuildLoad(ctx->builder, pointer, "");
+       if (invariant)
+               LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md);
+       return result;
 }
 
-/**
- * Do a load from &base_ptr[index], but also add a flag that it's loading
- * a constant from a dynamically uniform index.
- */
-LLVMValueRef
-ac_build_indexed_load_const(struct ac_llvm_context *ctx,
-                           LLVMValueRef base_ptr, LLVMValueRef index)
+LLVMValueRef ac_build_load(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
+                          LLVMValueRef index)
 {
-       LLVMValueRef result = ac_build_indexed_load(ctx, base_ptr, index, true);
-       LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md);
-       return result;
+       return ac_build_load_custom(ctx, base_ptr, index, false, false);
+}
+
+LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx,
+                                    LLVMValueRef base_ptr, LLVMValueRef index)
+{
+       return ac_build_load_custom(ctx, base_ptr, index, false, true);
+}
+
+LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx,
+                                  LLVMValueRef base_ptr, LLVMValueRef index)
+{
+       return ac_build_load_custom(ctx, base_ptr, index, true, true);
 }
 
 /* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4.
@@ -718,10 +768,13 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
                            bool glc,
                            bool slc,
                            bool writeonly_memory,
-                           bool has_add_tid)
+                           bool swizzle_enable_hint)
 {
-       /* TODO: Fix stores with ADD_TID and remove the "has_add_tid" flag. */
-       if (!has_add_tid) {
+       /* SWIZZLE_ENABLE requires that soffset isn't folded into voffset
+        * (voffset is swizzled, but soffset isn't swizzled).
+        * llvm.amdgcn.buffer.store doesn't have a separate soffset parameter.
+        */
+       if (!swizzle_enable_hint) {
                /* Split 3 channel stores, becase LLVM doesn't support 3-channel
                 * intrinsics. */
                if (num_channels == 3) {
@@ -735,11 +788,11 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
 
                        ac_build_buffer_store_dword(ctx, rsrc, v01, 2, voffset,
                                                    soffset, inst_offset, glc, slc,
-                                                   writeonly_memory, has_add_tid);
+                                                   writeonly_memory, swizzle_enable_hint);
                        ac_build_buffer_store_dword(ctx, rsrc, v[2], 1, voffset,
                                                    soffset, inst_offset + 8,
                                                    glc, slc,
-                                                   writeonly_memory, has_add_tid);
+                                                   writeonly_memory, swizzle_enable_hint);
                        return;
                }
 
@@ -1006,7 +1059,7 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
                                          AC_FUNC_ATTR_READNONE |
                                          AC_FUNC_ATTR_CONVERGENT);
        } else {
-               uint32_t masks[2];
+               uint32_t masks[2] = {};
 
                switch (mask) {
                case AC_TID_MASK_TOP_LEFT:
@@ -1025,6 +1078,8 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
                        masks[0] = 0x80a0;
                        masks[1] = 0x80f5;
                        break;
+               default:
+                       assert(0);
                }
 
                args[0] = val;
@@ -1202,7 +1257,7 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
        LLVMTypeRef dst_type;
        LLVMValueRef args[11];
        unsigned num_args = 0;
-       const char *name;
+       const char *name = NULL;
        char intr_name[128], type[64];
 
        if (HAVE_LLVM >= 0x0400) {
@@ -1679,3 +1734,11 @@ void ac_optimize_vs_outputs(struct ac_llvm_context *ctx,
                *num_param_exports = exports.num;
        }
 }
+
+void ac_init_exec_full_mask(struct ac_llvm_context *ctx)
+{
+       LLVMValueRef full_mask = LLVMConstInt(ctx->i64, ~0ull, 0);
+       ac_build_intrinsic(ctx,
+                          "llvm.amdgcn.init.exec", ctx->voidt,
+                          &full_mask, 1, AC_FUNC_ATTR_CONVERGENT);
+}