ac/nir: support 16-bit data in image opcodes
authorMarek Olšák <marek.olsak@amd.com>
Mon, 4 May 2020 13:30:40 +0000 (09:30 -0400)
committerMarek Olšák <marek.olsak@amd.com>
Tue, 2 Jun 2020 20:29:25 +0000 (16:29 -0400)
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5003>

src/amd/llvm/ac_llvm_build.c
src/amd/llvm/ac_llvm_build.h
src/amd/llvm/ac_nir_to_llvm.c

index 0874cd99a57d2e5d06f3325970174a20515f40f1..7fa2e38361e4730df9c9dcbbc73e02d0d423fe03 100644 (file)
@@ -2375,6 +2375,11 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
        assert((a->min_lod ? 1 : 0) +
               (a->lod ? 1 : 0) +
               (a->level_zero ? 1 : 0) <= 1);
+       assert(!a->d16 || (ctx->chip_class >= GFX8 &&
+                          a->opcode != ac_image_atomic &&
+                          a->opcode != ac_image_atomic_cmpswap &&
+                          a->opcode != ac_image_get_lod &&
+                          a->opcode != ac_image_get_resinfo));
 
        if (a->opcode == ac_image_get_lod) {
                switch (dim) {
@@ -2497,7 +2502,7 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
                 a->min_lod ? ".cl" : "",
                 a->offset ? ".o" : "",
                 dimname,
-                atomic ? "i32" : "v4f32",
+                atomic ? "i32" : (a->d16 ? "v4f16" : "v4f32"),
                 overload[0], overload[1], overload[2]);
 
        LLVMTypeRef retty;
@@ -2506,15 +2511,14 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
        else if (a->opcode == ac_image_store || a->opcode == ac_image_store_mip)
                retty = ctx->voidt;
        else
-               retty = ctx->v4f32;
+               retty = a->d16 ? ctx->v4f16 : ctx->v4f32;
 
        LLVMValueRef result =
                ac_build_intrinsic(ctx, intr_name, retty, args, num_args,
                                   a->attributes);
-       if (!sample && retty == ctx->v4f32) {
-               result = LLVMBuildBitCast(ctx->builder, result,
-                                         ctx->v4i32, "");
-       }
+       if (!sample && !atomic && retty != ctx->voidt)
+               result = ac_to_integer(ctx, result);
+
        return result;
 }
 
index 5ccd535723bf0a053700c3d9a398ca512ad6314a..1d54b9a1da963cbfdcd5c754694db30257aeed45 100644 (file)
@@ -554,6 +554,7 @@ struct ac_image_args {
        unsigned cache_policy : 3;
        bool unorm : 1;
        bool level_zero : 1;
+       bool d16 : 1; /* data and return values are 16-bit, requires GFX8+ */
        unsigned attributes; /* additional call-site specific AC_FUNC_ATTRs */
 
        LLVMValueRef resource;
index 91707b92d42db6f1ce5020a6e28e81d9781fab1d..24be5ca866e1e2981212f341e4f479ff351e1b78 100644 (file)
@@ -2807,6 +2807,9 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx,
                args.dmask = 15;
                args.attributes = AC_FUNC_ATTR_READONLY;
 
+               assert(instr->dest.is_ssa);
+               args.d16 = instr->dest.ssa.bit_size == 16;
+
                res = ac_build_image_opcode(&ctx->ac, &args);
        }
        return exit_waterfall(ctx, &wctx, res);
@@ -2873,6 +2876,7 @@ static void visit_image_store(struct ac_nir_context *ctx,
                if (!level_zero)
                        args.lod = get_src(ctx, instr->src[4]);
                args.dmask = 15;
+               args.d16 = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(args.data[0])) == 16;
 
                ac_build_image_opcode(&ctx->ac, &args);
        }
@@ -4736,6 +4740,9 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
                }
        }
 
+       assert(instr->dest.is_ssa);
+       args.d16 = instr->dest.ssa.bit_size == 16;
+
        result = build_tex_intrinsic(ctx, instr, &args);
 
        if (instr->op == nir_texop_query_levels)