Temp src = get_alu_src(ctx, instr->src[0]);
if (instr->src[0].src.ssa->bit_size == 64)
src = bld.vop1(aco_opcode::v_cvt_f32_f64, bld.def(v1), src);
- bld.vop1(aco_opcode::v_cvt_f16_f32, Definition(dst), src);
+ if (instr->op == nir_op_f2f16_rtne && ctx->block->fp_mode.round16_64 != fp_round_ne)
+ /* We emit s_round_mode/s_setreg_imm32 in lower_to_hw_instr to
+ * keep value numbering and the scheduler simpler.
+ */
+ bld.vop1(aco_opcode::p_cvt_f16_f32_rtne, Definition(dst), src);
+ else
+ bld.vop1(aco_opcode::v_cvt_f16_f32, Definition(dst), src);
break;
}
case nir_op_f2f16_rtz: {
reduce->operands[2].physReg(), // vtmp
reduce->definitions[2].physReg(), // sitmp
reduce->operands[0], reduce->definitions[0]);
+ } else if (instr->opcode == aco_opcode::p_cvt_f16_f32_rtne) {
+ float_mode new_mode = block->fp_mode;
+ new_mode.round16_64 = fp_round_ne;
+ bool set_round = new_mode.round != block->fp_mode.round;
+
+ emit_set_mode(bld, new_mode, set_round, false);
+
+ instr->opcode = aco_opcode::v_cvt_f16_f32;
+ ctx.instructions.emplace_back(std::move(instr));
+
+ emit_set_mode(bld, block->fp_mode, set_round, false);
} else {
ctx.instructions.emplace_back(std::move(instr));
}
(0x08, 0x08, 0x08, 0x08, 0x08, "v_cvt_i32_f32", True, False),
(0x09, 0x09, -1, -1, 0x09, "v_mov_fed_b32", True, False), # LLVM mentions it for GFX8_9
(0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "v_cvt_f16_f32", True, True),
+ ( -1, -1, -1, -1, -1, "p_cvt_f16_f32_rtne", True, True),
(0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "v_cvt_f32_f16", True, True),
(0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "v_cvt_rpi_i32_f32", True, False),
(0x0d, 0x0d, 0x0d, 0x0d, 0x0d, "v_cvt_flr_i32_f32", True, False),