From c3e0ba52a0ac89c163ada8791151226b5a1b6efa Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 4 May 2020 10:43:07 -0400 Subject: [PATCH] ac/nir: support 16-bit data in buffer_load_format opcodes Reviewed-by: Samuel Pitoiset Part-of: --- src/amd/llvm/ac_llvm_build.c | 10 ++++++++-- src/amd/llvm/ac_llvm_build.h | 3 ++- src/amd/llvm/ac_nir_to_llvm.c | 8 ++++++-- src/gallium/drivers/radeonsi/si_compute_prim_discard.c | 2 +- src/gallium/drivers/radeonsi/si_shader_llvm_vs.c | 2 +- 5 files changed, 18 insertions(+), 7 deletions(-) diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c index 5483b3146c0..0874cd99a57 100644 --- a/src/amd/llvm/ac_llvm_build.c +++ b/src/amd/llvm/ac_llvm_build.c @@ -1314,6 +1314,10 @@ ac_build_buffer_load_common(struct ac_llvm_context *ctx, const char *indexing_kind = structurized ? "struct" : "raw"; char name[256], type_name[8]; + /* D16 is only supported on gfx8+ */ + assert((channel_type != ctx->f16 && channel_type != ctx->i16) || + ctx->chip_class >= GFX8); + LLVMTypeRef type = func > 1 ? LLVMVectorType(channel_type, func) : channel_type; ac_build_type_name_for_intr(type, type_name, sizeof(type_name)); @@ -1389,10 +1393,12 @@ LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx, LLVMValueRef voffset, unsigned num_channels, unsigned cache_policy, - bool can_speculate) + bool can_speculate, + bool d16) { return ac_build_buffer_load_common(ctx, rsrc, vindex, voffset, - ctx->i32_0, num_channels, ctx->f32, + ctx->i32_0, num_channels, + d16 ? ctx->f16 : ctx->f32, cache_policy, can_speculate, true, true); } diff --git a/src/amd/llvm/ac_llvm_build.h b/src/amd/llvm/ac_llvm_build.h index ceebd3bb2ce..5ccd535723b 100644 --- a/src/amd/llvm/ac_llvm_build.h +++ b/src/amd/llvm/ac_llvm_build.h @@ -338,7 +338,8 @@ LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx, LLVMValueRef voffset, unsigned num_channels, unsigned cache_policy, - bool can_speculate); + bool can_speculate, + bool d16); LLVMValueRef ac_build_tbuffer_load_short(struct ac_llvm_context *ctx, diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index 01040dc3506..91707b92d42 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -1436,12 +1436,14 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) { unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa); + assert(instr->dest.is_ssa); return ac_build_buffer_load_format(&ctx->ac, args->resource, args->coords[0], ctx->ac.i32_0, util_last_bit(mask), - 0, true); + 0, true, + instr->dest.ssa.bit_size == 16); } args->opcode = ac_image_sample; @@ -2782,11 +2784,13 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx, vindex = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]), ctx->ac.i32_0, ""); + assert(instr->dest.is_ssa); bool can_speculate = access & ACCESS_CAN_REORDER; res = ac_build_buffer_load_format(&ctx->ac, rsrc, vindex, ctx->ac.i32_0, num_channels, args.cache_policy, - can_speculate); + can_speculate, + instr->dest.ssa.bit_size == 16); res = ac_build_expand_to_vec4(&ctx->ac, res, num_channels); res = ac_trim_vector(&ctx->ac, res, instr->dest.ssa.num_components); diff --git a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c index 09a2d904a2b..2313f74a023 100644 --- a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c +++ b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c @@ -460,7 +460,7 @@ void si_build_prim_discard_compute_shader(struct si_shader_context *ctx) if (key->opt.cs_indexed) { for (unsigned i = 0; i < 3; i++) { index[i] = ac_build_buffer_load_format(&ctx->ac, input_indexbuf, index[i], ctx->ac.i32_0, - 1, 0, true); + 1, 0, true, false); index[i] = ac_to_integer(&ctx->ac, index[i]); } } diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c index 081d7fb9087..69022cd7234 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c @@ -152,7 +152,7 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L for (unsigned i = 0; i < num_fetches; ++i) { LLVMValueRef voffset = LLVMConstInt(ctx->ac.i32, fetch_stride * i, 0); fetches[i] = ac_build_buffer_load_format(&ctx->ac, vb_desc, vertex_index, voffset, - channels_per_fetch, 0, true); + channels_per_fetch, 0, true, false); } if (num_fetches == 1 && channels_per_fetch > 1) { -- 2.30.2