From 44a6b17df76bebcccff0a928987ea2a3272e32a4 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Timur=20Krist=C3=B3f?= Date: Fri, 10 Jan 2020 17:13:06 +0100 Subject: [PATCH] aco/wave32: Set the definitions of v_cmp instructions to the lane mask. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The output of v_cmp instructions is s1 (a single SGPR) in wave32 mode, as opposed to s2 (an SGPR-pair) in wave64 mode. A couple of cases where this should have been fixed were omitted from the previous patch by mistake. Fixes: e0bcefc3a0a15a8c7ec00cfa53fd8fffcc07342a Signed-off-by: Timur Kristóf Reviewed-by: Daniel Schürmann --- src/amd/compiler/aco_instruction_selection.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index cb28dffa65e..de2132c91db 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -188,7 +188,7 @@ static Temp emit_bpermute(isel_context *ctx, Builder &bld, Temp index, Temp data Temp lane_id = emit_mbcnt(ctx, bld.def(v1)); Temp lane_is_hi = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(0x20u), lane_id); Temp index_is_hi = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(0x20u), index); - Temp cmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(s2, vcc), lane_is_hi, index_is_hi); + Temp cmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), lane_is_hi, index_is_hi); return bld.reduction(aco_opcode::p_wave64_bpermute, bld.def(v1), bld.def(s2), bld.def(s1, scc), bld.vcc(cmp), Operand(v2.as_linear()), index_x4, data, gfx10_wave64_bpermute); @@ -591,6 +591,7 @@ void emit_comparison(isel_context *ctx, nir_alu_instr *instr, Temp dst, ctx->allocated[instr->src[1].src.ssa->index].type() == RegType::vgpr; aco_opcode op = use_valu ? v_op : s_op; assert(op != aco_opcode::num_opcodes); + assert(dst.regClass() == ctx->program->lane_mask); if (use_valu) emit_vopc_instruction(ctx, instr, op, dst); @@ -2142,6 +2143,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) if (src.type() == RegType::vgpr) { assert(src.regClass() == v1 || src.regClass() == v2); + assert(dst.regClass() == bld.lm); bld.vopc(src.size() == 2 ? aco_opcode::v_cmp_lg_u64 : aco_opcode::v_cmp_lg_u32, Definition(dst), Operand(0u), src).def(0).setHint(vcc); } else { @@ -2229,7 +2231,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) */ f32 = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), f16); Temp smallest = bld.copy(bld.def(s1), Operand(0x38800000u)); - Instruction* vop3 = bld.vopc_e64(aco_opcode::v_cmp_nlt_f32, bld.hint_vcc(bld.def(s2)), f32, smallest); + Instruction* vop3 = bld.vopc_e64(aco_opcode::v_cmp_nlt_f32, bld.hint_vcc(bld.def(bld.lm)), f32, smallest); static_cast(vop3)->abs[0] = true; cmp_res = vop3->definitions[0].getTemp(); } @@ -6342,7 +6344,7 @@ void build_cube_select(isel_context *ctx, Temp ma, Temp id, Temp deriv, Temp neg_sgn_ma = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1), Operand(0u), sgn_ma); Temp is_ma_z = bld.vopc(aco_opcode::v_cmp_le_f32, bld.hint_vcc(bld.def(bld.lm)), four, id); - Temp is_ma_y = bld.vopc(aco_opcode::v_cmp_le_f32, bld.def(s2), two, id); + Temp is_ma_y = bld.vopc(aco_opcode::v_cmp_le_f32, bld.def(bld.lm), two, id); is_ma_y = bld.sop2(Builder::s_andn2, bld.hint_vcc(bld.def(bld.lm)), is_ma_y, is_ma_z); Temp is_not_ma_x = bld.sop2(aco_opcode::s_or_b64, bld.hint_vcc(bld.def(bld.lm)), bld.def(s1, scc), is_ma_z, is_ma_y); @@ -7913,7 +7915,7 @@ static void emit_streamout(isel_context *ctx, unsigned stream) Temp tid = emit_mbcnt(ctx, bld.def(v1)); - Temp can_emit = bld.vopc(aco_opcode::v_cmp_gt_i32, bld.def(s2), so_vtx_count, tid); + Temp can_emit = bld.vopc(aco_opcode::v_cmp_gt_i32, bld.def(bld.lm), so_vtx_count, tid); if_context ic; begin_divergent_if_then(ctx, &ic, can_emit); -- 2.30.2