ac/llvm: load 1 byte at a time if unaligned on gfx10
[mesa.git] / src / amd / llvm / ac_llvm_build.c
index 6b8e2a98b5889ab777215815adf3d2e4f62721ad..77681834ffae7c31889110bc5b80118cf90d4e83 100644 (file)
@@ -1183,8 +1183,6 @@ ac_build_buffer_store_common(struct ac_llvm_context *ctx,
                             LLVMValueRef vindex,
                             LLVMValueRef voffset,
                             LLVMValueRef soffset,
-                            unsigned num_channels,
-                            LLVMTypeRef return_channel_type,
                             unsigned cache_policy,
                             bool use_format,
                             bool structurized)
@@ -1198,12 +1196,10 @@ ac_build_buffer_store_common(struct ac_llvm_context *ctx,
        args[idx++] = voffset ? voffset : ctx->i32_0;
        args[idx++] = soffset ? soffset : ctx->i32_0;
        args[idx++] = LLVMConstInt(ctx->i32, cache_policy, 0);
-       unsigned func = !ac_has_vec3_support(ctx->chip_class, use_format) && num_channels == 3 ? 4 : num_channels;
        const char *indexing_kind = structurized ? "struct" : "raw";
        char name[256], type_name[8];
 
-       LLVMTypeRef type = func > 1 ? LLVMVectorType(return_channel_type, func) : return_channel_type;
-       ac_build_type_name_for_intr(type, type_name, sizeof(type_name));
+       ac_build_type_name_for_intr(LLVMTypeOf(data), type_name, sizeof(type_name));
 
        if (use_format) {
                snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.store.format.%s",
@@ -1223,13 +1219,10 @@ ac_build_buffer_store_format(struct ac_llvm_context *ctx,
                             LLVMValueRef data,
                             LLVMValueRef vindex,
                             LLVMValueRef voffset,
-                            unsigned num_channels,
                             unsigned cache_policy)
 {
-       ac_build_buffer_store_common(ctx, rsrc, data, vindex,
-                                    voffset, NULL, num_channels,
-                                    ctx->f32, cache_policy,
-                                    true, true);
+       ac_build_buffer_store_common(ctx, rsrc, data, vindex, voffset, NULL,
+                                    cache_policy, true, true);
 }
 
 /* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4.
@@ -1278,7 +1271,6 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
 
                ac_build_buffer_store_common(ctx, rsrc, ac_to_float(ctx, vdata),
                                             ctx->i32_0, voffset, offset,
-                                            num_channels, ctx->f32,
                                             cache_policy, false, false);
                return;
        }
@@ -1322,6 +1314,11 @@ ac_build_buffer_load_common(struct ac_llvm_context *ctx,
        const char *indexing_kind = structurized ? "struct" : "raw";
        char name[256], type_name[8];
 
+       /* D16 is only supported on gfx8+ */
+       assert(!use_format ||
+              (channel_type != ctx->f16 && channel_type != ctx->i16) ||
+              ctx->chip_class >= GFX8);
+
        LLVMTypeRef type = func > 1 ? LLVMVectorType(channel_type, func) : channel_type;
        ac_build_type_name_for_intr(type, type_name, sizeof(type_name));
 
@@ -1397,10 +1394,12 @@ LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
                                         LLVMValueRef voffset,
                                         unsigned num_channels,
                                         unsigned cache_policy,
-                                        bool can_speculate)
+                                        bool can_speculate,
+                                        bool d16)
 {
        return ac_build_buffer_load_common(ctx, rsrc, vindex, voffset,
-                                          ctx->i32_0, num_channels, ctx->f32,
+                                          ctx->i32_0, num_channels,
+                                          d16 ? ctx->f16 : ctx->f32,
                                           cache_policy, can_speculate,
                                           true, true);
 }
@@ -1652,7 +1651,7 @@ ac_build_opencoded_load_format(struct ac_llvm_context *ctx,
        }
 
        int log_recombine = 0;
-       if (ctx->chip_class == GFX6 && !known_aligned) {
+       if ((ctx->chip_class == GFX6 || ctx->chip_class == GFX10) && !known_aligned) {
                /* Avoid alignment restrictions by loading one byte at a time. */
                load_num_channels <<= load_log_size;
                log_recombine = load_log_size;
@@ -1936,8 +1935,7 @@ ac_build_tbuffer_store_short(struct ac_llvm_context *ctx,
        if (LLVM_VERSION_MAJOR >= 9) {
                /* LLVM 9+ supports i8/i16 with struct/raw intrinsics. */
                ac_build_buffer_store_common(ctx, rsrc, vdata, NULL,
-                                            voffset, soffset, 1,
-                                            ctx->i16, cache_policy,
+                                            voffset, soffset, cache_policy,
                                             false, false);
        } else {
                unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_16;
@@ -1963,8 +1961,7 @@ ac_build_tbuffer_store_byte(struct ac_llvm_context *ctx,
        if (LLVM_VERSION_MAJOR >= 9) {
                /* LLVM 9+ supports i8/i16 with struct/raw intrinsics. */
                ac_build_buffer_store_common(ctx, rsrc, vdata, NULL,
-                                            voffset, soffset, 1,
-                                            ctx->i8, cache_policy,
+                                            voffset, soffset, cache_policy,
                                             false, false);
        } else {
                unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_8;
@@ -2059,6 +2056,8 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
 
        if (result_type == ctx->f16)
                val = LLVMBuildZExt(ctx->builder, val, ctx->i32, "");
+       else if (result_type == ctx->v2f16)
+               val = LLVMBuildBitCast(ctx->builder, val, ctx->i32, "");
 
        for (unsigned i = 0; i < 4; ++i) {
                tl_lanes[i] = i & mask;
@@ -2379,6 +2378,11 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
        assert((a->min_lod ? 1 : 0) +
               (a->lod ? 1 : 0) +
               (a->level_zero ? 1 : 0) <= 1);
+       assert(!a->d16 || (ctx->chip_class >= GFX8 &&
+                          a->opcode != ac_image_atomic &&
+                          a->opcode != ac_image_atomic_cmpswap &&
+                          a->opcode != ac_image_get_lod &&
+                          a->opcode != ac_image_get_resinfo));
 
        if (a->opcode == ac_image_get_lod) {
                switch (dim) {
@@ -2501,7 +2505,7 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
                 a->min_lod ? ".cl" : "",
                 a->offset ? ".o" : "",
                 dimname,
-                atomic ? "i32" : "v4f32",
+                atomic ? "i32" : (a->d16 ? "v4f16" : "v4f32"),
                 overload[0], overload[1], overload[2]);
 
        LLVMTypeRef retty;
@@ -2510,15 +2514,14 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
        else if (a->opcode == ac_image_store || a->opcode == ac_image_store_mip)
                retty = ctx->voidt;
        else
-               retty = ctx->v4f32;
+               retty = a->d16 ? ctx->v4f16 : ctx->v4f32;
 
        LLVMValueRef result =
                ac_build_intrinsic(ctx, intr_name, retty, args, num_args,
                                   a->attributes);
-       if (!sample && retty == ctx->v4f32) {
-               result = LLVMBuildBitCast(ctx->builder, result,
-                                         ctx->v4i32, "");
-       }
+       if (!sample && !atomic && retty != ctx->voidt)
+               result = ac_to_integer(ctx, result);
+
        return result;
 }