assert(dst.size() == 1);
Temp src = get_alu_src(ctx, instr->src[0]);
if (instr->src[0].src.ssa->bit_size == 8) {
- //TODO: we should use v_cvt_f32_ubyte1/v_cvt_f32_ubyte2/etc depending on the register assignment
bld.vop1(aco_opcode::v_cvt_f32_ubyte0, Definition(dst), src);
} else {
if (instr->src[0].src.ssa->bit_size == 16)
case nir_op_f2i8:
case nir_op_f2i16: {
Temp src = get_alu_src(ctx, instr->src[0]);
+ Temp tmp = dst.type() == RegType::vgpr ? dst : bld.tmp(v1);
if (instr->src[0].src.ssa->bit_size == 16)
- src = bld.vop1(aco_opcode::v_cvt_i16_f16, bld.def(v1), src);
+ src = bld.vop1(aco_opcode::v_cvt_i16_f16, Definition(tmp), src);
else if (instr->src[0].src.ssa->bit_size == 32)
- src = bld.vop1(aco_opcode::v_cvt_i32_f32, bld.def(v1), src);
+ src = bld.vop1(aco_opcode::v_cvt_i32_f32, Definition(tmp), src);
else
- src = bld.vop1(aco_opcode::v_cvt_i32_f64, bld.def(v1), src);
+ src = bld.vop1(aco_opcode::v_cvt_i32_f64, Definition(tmp), src);
- if (dst.type() == RegType::vgpr)
- bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), src, Operand(0u));
- else
+ if (dst.type() != RegType::vgpr)
bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), src);
break;
}
case nir_op_f2u8:
case nir_op_f2u16: {
Temp src = get_alu_src(ctx, instr->src[0]);
+ Temp tmp = dst.type() == RegType::vgpr ? dst : bld.tmp(v1);
if (instr->src[0].src.ssa->bit_size == 16)
- src = bld.vop1(aco_opcode::v_cvt_u16_f16, bld.def(v1), src);
+ bld.vop1(aco_opcode::v_cvt_u16_f16, Definition(tmp), src);
else if (instr->src[0].src.ssa->bit_size == 32)
- src = bld.vop1(aco_opcode::v_cvt_u32_f32, bld.def(v1), src);
+ bld.vop1(aco_opcode::v_cvt_u32_f32, Definition(tmp), src);
else
- src = bld.vop1(aco_opcode::v_cvt_u32_f64, bld.def(v1), src);
+ bld.vop1(aco_opcode::v_cvt_u32_f64, Definition(tmp), src);
- if (dst.type() == RegType::vgpr)
- bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), src, Operand(0u));
- else
- bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), src);
+ if (dst.type() != RegType::vgpr)
+ bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), tmp);
break;
}
case nir_op_f2i32: {
}
/* shift result right if needed */
- if (info->component_size < 4) {
+ if (info->component_size < 4 && byte_align_loads) {
Operand align((uint32_t)byte_align);
if (byte_align == -1) {
if (offset.isConstant())
if (target == V_008DFC_SQ_EXP_NULL)
return false;
+ /* Replace NaN by zero (only 32-bit) to fix game bugs if requested. */
+ if (ctx->options->enable_mrt_output_nan_fixup &&
+ !is_16bit &&
+ (col_format == V_028714_SPI_SHADER_32_R ||
+ col_format == V_028714_SPI_SHADER_32_GR ||
+ col_format == V_028714_SPI_SHADER_32_AR ||
+ col_format == V_028714_SPI_SHADER_32_ABGR ||
+ col_format == V_028714_SPI_SHADER_FP16_ABGR)) {
+ for (int i = 0; i < 4; i++) {
+ if (!(write_mask & (1 << i)))
+ continue;
+
+ Temp isnan = bld.vopc(aco_opcode::v_cmp_class_f32,
+ bld.hint_vcc(bld.def(bld.lm)), values[i],
+ bld.copy(bld.def(v1), Operand(3u)));
+ values[i] = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), values[i],
+ bld.copy(bld.def(v1), Operand(0u)), isnan);
+ }
+ }
+
if ((bool) compr_op) {
for (int i = 0; i < 2; i++) {
/* check if at least one of the values to be compressed is enabled */