From 1647e098e94d6aab0b4c454ccdd5300afd1d0079 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Fri, 8 May 2020 16:22:53 +0200 Subject: [PATCH] aco: implement 16-bit interp For 16-bit bank LDS (ie. Kabini/Stoney) we need a slightly different path. It's completely untested though because I don't have these chips but according to vkpipeline-db the generated assembly seems fine. Note that 16-bit I/O is currently only exposed on GFX9+ for both compiler backends. Signed-off-by: Samuel Pitoiset Reviewed-by: Rhys Perry Part-of: --- .../compiler/aco_instruction_selection.cpp | 38 +++++++++++++++++-- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 539e548bb81..7747e6bbeea 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -4424,10 +4424,40 @@ void emit_interp_instr(isel_context *ctx, unsigned idx, unsigned component, Temp Temp coord2 = emit_extract_vector(ctx, src, 1, v1); Builder bld(ctx->program, ctx->block); - Builder::Result interp_p1 = bld.vintrp(aco_opcode::v_interp_p1_f32, bld.def(v1), coord1, bld.m0(prim_mask), idx, component); - if (ctx->program->has_16bank_lds) - interp_p1.instr->operands[0].setLateKill(true); - bld.vintrp(aco_opcode::v_interp_p2_f32, Definition(dst), coord2, bld.m0(prim_mask), interp_p1, idx, component); + + if (dst.regClass() == v2b) { + if (ctx->program->has_16bank_lds) { + assert(ctx->options->chip_class <= GFX8); + Builder::Result interp_p1 = + bld.vintrp(aco_opcode::v_interp_mov_f32, bld.def(v1), + Operand(2u) /* P0 */, bld.m0(prim_mask), idx, component); + interp_p1 = bld.vintrp(aco_opcode::v_interp_p1lv_f16, bld.def(v2b), + coord1, bld.m0(prim_mask), interp_p1, idx, component); + bld.vintrp(aco_opcode::v_interp_p2_legacy_f16, Definition(dst), coord2, + bld.m0(prim_mask), interp_p1, idx, component); + } else { + aco_opcode interp_p2_op = aco_opcode::v_interp_p2_f16; + + if (ctx->options->chip_class == GFX8) + interp_p2_op = aco_opcode::v_interp_p2_legacy_f16; + + Builder::Result interp_p1 = + bld.vintrp(aco_opcode::v_interp_p1ll_f16, bld.def(v1), + coord1, bld.m0(prim_mask), idx, component); + bld.vintrp(interp_p2_op, Definition(dst), coord2, bld.m0(prim_mask), + interp_p1, idx, component); + } + } else { + Builder::Result interp_p1 = + bld.vintrp(aco_opcode::v_interp_p1_f32, bld.def(v1), coord1, + bld.m0(prim_mask), idx, component); + + if (ctx->program->has_16bank_lds) + interp_p1.instr->operands[0].setLateKill(true); + + bld.vintrp(aco_opcode::v_interp_p2_f32, Definition(dst), coord2, + bld.m0(prim_mask), interp_p1, idx, component); + } } void emit_load_frag_coord(isel_context *ctx, Temp dst, unsigned num_components) -- 2.30.2