ac/nir: support 16-bit data in buffer_load_format opcodes
authorMarek Olšák <marek.olsak@amd.com>
Mon, 4 May 2020 14:43:07 +0000 (10:43 -0400)
committerMarek Olšák <marek.olsak@amd.com>
Tue, 2 Jun 2020 20:29:25 +0000 (16:29 -0400)
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5003>

src/amd/llvm/ac_llvm_build.c
src/amd/llvm/ac_llvm_build.h
src/amd/llvm/ac_nir_to_llvm.c
src/gallium/drivers/radeonsi/si_compute_prim_discard.c
src/gallium/drivers/radeonsi/si_shader_llvm_vs.c

index 5483b3146c01c284eb7153f4cb67e5f1720a319b..0874cd99a57d2e5d06f3325970174a20515f40f1 100644 (file)
@@ -1314,6 +1314,10 @@ ac_build_buffer_load_common(struct ac_llvm_context *ctx,
        const char *indexing_kind = structurized ? "struct" : "raw";
        char name[256], type_name[8];
 
+       /* D16 is only supported on gfx8+ */
+       assert((channel_type != ctx->f16 && channel_type != ctx->i16) ||
+              ctx->chip_class >= GFX8);
+
        LLVMTypeRef type = func > 1 ? LLVMVectorType(channel_type, func) : channel_type;
        ac_build_type_name_for_intr(type, type_name, sizeof(type_name));
 
@@ -1389,10 +1393,12 @@ LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
                                         LLVMValueRef voffset,
                                         unsigned num_channels,
                                         unsigned cache_policy,
-                                        bool can_speculate)
+                                        bool can_speculate,
+                                        bool d16)
 {
        return ac_build_buffer_load_common(ctx, rsrc, vindex, voffset,
-                                          ctx->i32_0, num_channels, ctx->f32,
+                                          ctx->i32_0, num_channels,
+                                          d16 ? ctx->f16 : ctx->f32,
                                           cache_policy, can_speculate,
                                           true, true);
 }
index ceebd3bb2cefff8a10dc0fe50b52a01bf2fe2b8e..5ccd535723bf0a053700c3d9a398ca512ad6314a 100644 (file)
@@ -338,7 +338,8 @@ LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
                                         LLVMValueRef voffset,
                                         unsigned num_channels,
                                         unsigned cache_policy,
-                                        bool can_speculate);
+                                        bool can_speculate,
+                                        bool d16);
 
 LLVMValueRef
 ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
index 01040dc3506e2837dba8649cdbf3ea3f29866a01..91707b92d42db6f1ce5020a6e28e81d9781fab1d 100644 (file)
@@ -1436,12 +1436,14 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx,
        if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
                unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
 
+               assert(instr->dest.is_ssa);
                return ac_build_buffer_load_format(&ctx->ac,
                                                   args->resource,
                                                   args->coords[0],
                                                   ctx->ac.i32_0,
                                                   util_last_bit(mask),
-                                                  0, true);
+                                                  0, true,
+                                                  instr->dest.ssa.bit_size == 16);
        }
 
        args->opcode = ac_image_sample;
@@ -2782,11 +2784,13 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx,
                vindex = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
                                                 ctx->ac.i32_0, "");
 
+               assert(instr->dest.is_ssa);
                bool can_speculate = access & ACCESS_CAN_REORDER;
                res = ac_build_buffer_load_format(&ctx->ac, rsrc, vindex,
                                                  ctx->ac.i32_0, num_channels,
                                                  args.cache_policy,
-                                                 can_speculate);
+                                                 can_speculate,
+                                                 instr->dest.ssa.bit_size == 16);
                res = ac_build_expand_to_vec4(&ctx->ac, res, num_channels);
 
                res = ac_trim_vector(&ctx->ac, res, instr->dest.ssa.num_components);
index 09a2d904a2bca02ad34caff1d5dd7fca6081f83f..2313f74a0233461e8f7bc8c2b0d85896b5e9c502 100644 (file)
@@ -460,7 +460,7 @@ void si_build_prim_discard_compute_shader(struct si_shader_context *ctx)
    if (key->opt.cs_indexed) {
       for (unsigned i = 0; i < 3; i++) {
          index[i] = ac_build_buffer_load_format(&ctx->ac, input_indexbuf, index[i], ctx->ac.i32_0,
-                                                1, 0, true);
+                                                1, 0, true, false);
          index[i] = ac_to_integer(&ctx->ac, index[i]);
       }
    }
index 081d7fb908766b71e6bd9574d8a40521e6b18b62..69022cd723483a6fecf01e5d902bdae7fa217360 100644 (file)
@@ -152,7 +152,7 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L
    for (unsigned i = 0; i < num_fetches; ++i) {
       LLVMValueRef voffset = LLVMConstInt(ctx->ac.i32, fetch_stride * i, 0);
       fetches[i] = ac_build_buffer_load_format(&ctx->ac, vb_desc, vertex_index, voffset,
-                                               channels_per_fetch, 0, true);
+                                               channels_per_fetch, 0, true, false);
    }
 
    if (num_fetches == 1 && channels_per_fetch > 1) {