if (dst.type() == RegType::vgpr) {
aco_ptr<Instruction> bcsel;
- if (dst.size() == 1) {
+ if (dst.regClass() == v2b) {
+ then = as_vgpr(ctx, then);
+ els = as_vgpr(ctx, els);
+
+ Temp tmp = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), els, then, cond);
+ bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(v2b), tmp);
+ } else if (dst.regClass() == v1) {
then = as_vgpr(ctx, then);
els = as_vgpr(ctx, els);
bld.vop2(aco_opcode::v_cndmask_b32, Definition(dst), els, then, cond);
- } else if (dst.size() == 2) {
+ } else if (dst.regClass() == v2) {
Temp then_lo = bld.tmp(v1), then_hi = bld.tmp(v1);
bld.pseudo(aco_opcode::p_split_vector, Definition(then_lo), Definition(then_hi), then);
Temp else_lo = bld.tmp(v1), else_hi = bld.tmp(v1);
break;
}
case nir_op_fmul: {
- if (dst.size() == 1) {
+ Temp src0 = get_alu_src(ctx, instr->src[0]);
+ Temp src1 = as_vgpr(ctx, get_alu_src(ctx, instr->src[1]));
+ if (dst.regClass() == v2b) {
+ Temp tmp = bld.tmp(v1);
+ emit_vop2_instruction(ctx, instr, aco_opcode::v_mul_f16, tmp, true);
+ bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(v2b), tmp);
+ } else if (dst.regClass() == v1) {
emit_vop2_instruction(ctx, instr, aco_opcode::v_mul_f32, dst, true);
- } else if (dst.size() == 2) {
- bld.vop3(aco_opcode::v_mul_f64, Definition(dst), get_alu_src(ctx, instr->src[0]),
- as_vgpr(ctx, get_alu_src(ctx, instr->src[1])));
+ } else if (dst.regClass() == v2) {
+ bld.vop3(aco_opcode::v_mul_f64, Definition(dst), src0, src1);
} else {
fprintf(stderr, "Unimplemented NIR instr bit size: ");
nir_print_instr(&instr->instr, stderr);
break;
}
case nir_op_fadd: {
- if (dst.size() == 1) {
+ Temp src0 = get_alu_src(ctx, instr->src[0]);
+ Temp src1 = as_vgpr(ctx, get_alu_src(ctx, instr->src[1]));
+ if (dst.regClass() == v2b) {
+ Temp tmp = bld.tmp(v1);
+ emit_vop2_instruction(ctx, instr, aco_opcode::v_add_f16, tmp, true);
+ bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(v2b), tmp);
+ } else if (dst.regClass() == v1) {
emit_vop2_instruction(ctx, instr, aco_opcode::v_add_f32, dst, true);
- } else if (dst.size() == 2) {
- bld.vop3(aco_opcode::v_add_f64, Definition(dst), get_alu_src(ctx, instr->src[0]),
- as_vgpr(ctx, get_alu_src(ctx, instr->src[1])));
+ } else if (dst.regClass() == v2) {
+ bld.vop3(aco_opcode::v_add_f64, Definition(dst), src0, src1);
} else {
fprintf(stderr, "Unimplemented NIR instr bit size: ");
nir_print_instr(&instr->instr, stderr);
}
case nir_op_fsub: {
Temp src0 = get_alu_src(ctx, instr->src[0]);
- Temp src1 = get_alu_src(ctx, instr->src[1]);
- if (dst.size() == 1) {
+ Temp src1 = as_vgpr(ctx, get_alu_src(ctx, instr->src[1]));
+ if (dst.regClass() == v2b) {
+ Temp tmp = bld.tmp(v1);
+ if (src1.type() == RegType::vgpr || src0.type() != RegType::vgpr)
+ emit_vop2_instruction(ctx, instr, aco_opcode::v_sub_f16, tmp, false);
+ else
+ emit_vop2_instruction(ctx, instr, aco_opcode::v_subrev_f16, tmp, true);
+ bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(v2b), tmp);
+ } else if (dst.regClass() == v1) {
if (src1.type() == RegType::vgpr || src0.type() != RegType::vgpr)
emit_vop2_instruction(ctx, instr, aco_opcode::v_sub_f32, dst, false);
else
emit_vop2_instruction(ctx, instr, aco_opcode::v_subrev_f32, dst, true);
- } else if (dst.size() == 2) {
+ } else if (dst.regClass() == v2) {
Instruction* add = bld.vop3(aco_opcode::v_add_f64, Definition(dst),
- get_alu_src(ctx, instr->src[0]),
- as_vgpr(ctx, get_alu_src(ctx, instr->src[1])));
+ src0, src1);
VOP3A_instruction* sub = static_cast<VOP3A_instruction*>(add);
sub->neg[1] = true;
} else {
break;
}
case nir_op_fmax: {
- if (dst.size() == 1) {
+ Temp src0 = get_alu_src(ctx, instr->src[0]);
+ Temp src1 = as_vgpr(ctx, get_alu_src(ctx, instr->src[1]));
+ if (dst.regClass() == v2b) {
+ // TODO: check fp_mode.must_flush_denorms16_64
+ Temp tmp = bld.tmp(v1);
+ emit_vop2_instruction(ctx, instr, aco_opcode::v_max_f16, tmp, true);
+ bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(v2b), tmp);
+ } else if (dst.regClass() == v1) {
emit_vop2_instruction(ctx, instr, aco_opcode::v_max_f32, dst, true, false, ctx->block->fp_mode.must_flush_denorms32);
- } else if (dst.size() == 2) {
+ } else if (dst.regClass() == v2) {
if (ctx->block->fp_mode.must_flush_denorms16_64 && ctx->program->chip_class < GFX9) {
- Temp tmp = bld.vop3(aco_opcode::v_max_f64, bld.def(v2),
- get_alu_src(ctx, instr->src[0]),
- as_vgpr(ctx, get_alu_src(ctx, instr->src[1])));
+ Temp tmp = bld.vop3(aco_opcode::v_max_f64, bld.def(v2), src0, src1);
bld.vop3(aco_opcode::v_mul_f64, Definition(dst), Operand(0x3FF0000000000000lu), tmp);
} else {
- bld.vop3(aco_opcode::v_max_f64, Definition(dst),
- get_alu_src(ctx, instr->src[0]),
- as_vgpr(ctx, get_alu_src(ctx, instr->src[1])));
+ bld.vop3(aco_opcode::v_max_f64, Definition(dst), src0, src1);
}
} else {
fprintf(stderr, "Unimplemented NIR instr bit size: ");
break;
}
case nir_op_fmin: {
- if (dst.size() == 1) {
+ Temp src0 = get_alu_src(ctx, instr->src[0]);
+ Temp src1 = as_vgpr(ctx, get_alu_src(ctx, instr->src[1]));
+ if (dst.regClass() == v2b) {
+ // TODO: check fp_mode.must_flush_denorms16_64
+ Temp tmp = bld.tmp(v1);
+ emit_vop2_instruction(ctx, instr, aco_opcode::v_min_f16, tmp, true);
+ bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(v2b), tmp);
+ } else if (dst.regClass() == v1) {
emit_vop2_instruction(ctx, instr, aco_opcode::v_min_f32, dst, true, false, ctx->block->fp_mode.must_flush_denorms32);
- } else if (dst.size() == 2) {
+ } else if (dst.regClass() == v2) {
if (ctx->block->fp_mode.must_flush_denorms16_64 && ctx->program->chip_class < GFX9) {
- Temp tmp = bld.vop3(aco_opcode::v_min_f64, bld.def(v2),
- get_alu_src(ctx, instr->src[0]),
- as_vgpr(ctx, get_alu_src(ctx, instr->src[1])));
+ Temp tmp = bld.vop3(aco_opcode::v_min_f64, bld.def(v2), src0, src1);
bld.vop3(aco_opcode::v_mul_f64, Definition(dst), Operand(0x3FF0000000000000lu), tmp);
} else {
- bld.vop3(aco_opcode::v_min_f64, Definition(dst),
- get_alu_src(ctx, instr->src[0]),
- as_vgpr(ctx, get_alu_src(ctx, instr->src[1])));
+ bld.vop3(aco_opcode::v_min_f64, Definition(dst), src0, src1);
}
} else {
fprintf(stderr, "Unimplemented NIR instr bit size: ");
break;
}
case nir_op_frsq: {
- if (dst.size() == 1) {
- emit_rsq(ctx, bld, Definition(dst), get_alu_src(ctx, instr->src[0]));
- } else if (dst.size() == 2) {
+ Temp src = get_alu_src(ctx, instr->src[0]);
+ if (dst.regClass() == v2b) {
+ Temp tmp = bld.vop1(aco_opcode::v_rsq_f16, bld.def(v1), src);
+ bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(v2b), tmp);
+ } else if (dst.regClass() == v1) {
+ emit_rsq(ctx, bld, Definition(dst), src);
+ } else if (dst.regClass() == v2) {
emit_vop1_instruction(ctx, instr, aco_opcode::v_rsq_f64, dst);
} else {
fprintf(stderr, "Unimplemented NIR instr bit size: ");
}
case nir_op_fneg: {
Temp src = get_alu_src(ctx, instr->src[0]);
- if (dst.size() == 1) {
+ if (dst.regClass() == v2b) {
+ Temp tmp = bld.vop2(aco_opcode::v_xor_b32, bld.def(v1), Operand(0x8000u), as_vgpr(ctx, src));
+ bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(v2b), tmp);
+ } else if (dst.regClass() == v1) {
if (ctx->block->fp_mode.must_flush_denorms32)
src = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand(0x3f800000u), as_vgpr(ctx, src));
bld.vop2(aco_opcode::v_xor_b32, Definition(dst), Operand(0x80000000u), as_vgpr(ctx, src));
- } else if (dst.size() == 2) {
+ } else if (dst.regClass() == v2) {
if (ctx->block->fp_mode.must_flush_denorms16_64)
src = bld.vop3(aco_opcode::v_mul_f64, bld.def(v2), Operand(0x3FF0000000000000lu), as_vgpr(ctx, src));
Temp upper = bld.tmp(v1), lower = bld.tmp(v1);
}
case nir_op_fabs: {
Temp src = get_alu_src(ctx, instr->src[0]);
- if (dst.size() == 1) {
+ if (dst.regClass() == v2b) {
+ Temp tmp = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(0x7FFFu), as_vgpr(ctx, src));
+ bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(v2b), tmp);
+ } else if (dst.regClass() == v1) {
if (ctx->block->fp_mode.must_flush_denorms32)
src = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand(0x3f800000u), as_vgpr(ctx, src));
bld.vop2(aco_opcode::v_and_b32, Definition(dst), Operand(0x7FFFFFFFu), as_vgpr(ctx, src));
- } else if (dst.size() == 2) {
+ } else if (dst.regClass() == v2) {
if (ctx->block->fp_mode.must_flush_denorms16_64)
src = bld.vop3(aco_opcode::v_mul_f64, bld.def(v2), Operand(0x3FF0000000000000lu), as_vgpr(ctx, src));
Temp upper = bld.tmp(v1), lower = bld.tmp(v1);
}
case nir_op_fsat: {
Temp src = get_alu_src(ctx, instr->src[0]);
- if (dst.size() == 1) {
+ if (dst.regClass() == v2b) {
+ Temp one = bld.copy(bld.def(s1), Operand(0x3c00u));
+ Temp tmp = bld.vop3(aco_opcode::v_med3_f16, bld.def(v1), Operand(0u), one, src);
+ bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(v2b), tmp);
+ } else if (dst.regClass() == v1) {
bld.vop3(aco_opcode::v_med3_f32, Definition(dst), Operand(0u), Operand(0x3f800000u), src);
/* apparently, it is not necessary to flush denorms if this instruction is used with these operands */
// TODO: confirm that this holds under any circumstances
- } else if (dst.size() == 2) {
+ } else if (dst.regClass() == v2) {
Instruction* add = bld.vop3(aco_opcode::v_add_f64, Definition(dst), src, Operand(0u));
VOP3A_instruction* vop3 = static_cast<VOP3A_instruction*>(add);
vop3->clamp = true;
break;
}
case nir_op_flog2: {
- if (dst.size() == 1) {
- emit_log2(ctx, bld, Definition(dst), get_alu_src(ctx, instr->src[0]));
+ Temp src = get_alu_src(ctx, instr->src[0]);
+ if (dst.regClass() == v2b) {
+ Temp tmp = bld.vop1(aco_opcode::v_log_f16, bld.def(v1), src);
+ bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(v2b), tmp);
+ } else if (dst.regClass() == v1) {
+ emit_log2(ctx, bld, Definition(dst), src);
} else {
fprintf(stderr, "Unimplemented NIR instr bit size: ");
nir_print_instr(&instr->instr, stderr);
break;
}
case nir_op_frcp: {
- if (dst.size() == 1) {
- emit_rcp(ctx, bld, Definition(dst), get_alu_src(ctx, instr->src[0]));
- } else if (dst.size() == 2) {
+ Temp src = get_alu_src(ctx, instr->src[0]);
+ if (dst.regClass() == v2b) {
+ Temp tmp = bld.vop1(aco_opcode::v_rcp_f16, bld.def(v1), src);
+ bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(v2b), tmp);
+ } else if (dst.regClass() == v1) {
+ emit_rcp(ctx, bld, Definition(dst), src);
+ } else if (dst.regClass() == v2) {
emit_vop1_instruction(ctx, instr, aco_opcode::v_rcp_f64, dst);
} else {
fprintf(stderr, "Unimplemented NIR instr bit size: ");
break;
}
case nir_op_fexp2: {
- if (dst.size() == 1) {
+ if (dst.regClass() == v2b) {
+ Temp src = get_alu_src(ctx, instr->src[0]);
+ Temp tmp = bld.vop1(aco_opcode::v_exp_f16, bld.def(v1), src);
+ bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(v2b), tmp);
+ } else if (dst.regClass() == v1) {
emit_vop1_instruction(ctx, instr, aco_opcode::v_exp_f32, dst);
} else {
fprintf(stderr, "Unimplemented NIR instr bit size: ");
break;
}
case nir_op_fsqrt: {
- if (dst.size() == 1) {
- emit_sqrt(ctx, bld, Definition(dst), get_alu_src(ctx, instr->src[0]));
- } else if (dst.size() == 2) {
+ Temp src = get_alu_src(ctx, instr->src[0]);
+ if (dst.regClass() == v2b) {
+ Temp tmp = bld.vop1(aco_opcode::v_sqrt_f16, bld.def(v1), src);
+ bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(v2b), tmp);
+ } else if (dst.regClass() == v1) {
+ emit_sqrt(ctx, bld, Definition(dst), src);
+ } else if (dst.regClass() == v2) {
emit_vop1_instruction(ctx, instr, aco_opcode::v_sqrt_f64, dst);
} else {
fprintf(stderr, "Unimplemented NIR instr bit size: ");
break;
}
case nir_op_ffract: {
- if (dst.size() == 1) {
+ if (dst.regClass() == v2b) {
+ Temp src = get_alu_src(ctx, instr->src[0]);
+ Temp tmp = bld.vop1(aco_opcode::v_fract_f16, bld.def(v1), src);
+ bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(v2b), tmp);
+ } else if (dst.regClass() == v1) {
emit_vop1_instruction(ctx, instr, aco_opcode::v_fract_f32, dst);
- } else if (dst.size() == 2) {
+ } else if (dst.regClass() == v2) {
emit_vop1_instruction(ctx, instr, aco_opcode::v_fract_f64, dst);
} else {
fprintf(stderr, "Unimplemented NIR instr bit size: ");
break;
}
case nir_op_ffloor: {
- if (dst.size() == 1) {
+ Temp src = get_alu_src(ctx, instr->src[0]);
+ if (dst.regClass() == v2b) {
+ Temp tmp = bld.vop1(aco_opcode::v_floor_f16, bld.def(v1), src);
+ bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(v2b), tmp);
+ } else if (dst.regClass() == v1) {
emit_vop1_instruction(ctx, instr, aco_opcode::v_floor_f32, dst);
- } else if (dst.size() == 2) {
- emit_floor_f64(ctx, bld, Definition(dst), get_alu_src(ctx, instr->src[0]));
+ } else if (dst.regClass() == v2) {
+ emit_floor_f64(ctx, bld, Definition(dst), src);
} else {
fprintf(stderr, "Unimplemented NIR instr bit size: ");
nir_print_instr(&instr->instr, stderr);
break;
}
case nir_op_fceil: {
- if (dst.size() == 1) {
+ Temp src0 = get_alu_src(ctx, instr->src[0]);
+ if (dst.regClass() == v2b) {
+ Temp tmp = bld.vop1(aco_opcode::v_ceil_f16, bld.def(v1), src0);
+ bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(v2b), tmp);
+ } else if (dst.regClass() == v1) {
emit_vop1_instruction(ctx, instr, aco_opcode::v_ceil_f32, dst);
- } else if (dst.size() == 2) {
+ } else if (dst.regClass() == v2) {
if (ctx->options->chip_class >= GFX7) {
emit_vop1_instruction(ctx, instr, aco_opcode::v_ceil_f64, dst);
} else {
/* GFX6 doesn't support V_CEIL_F64, lower it. */
- Temp src0 = get_alu_src(ctx, instr->src[0]);
-
/* trunc = trunc(src0)
* if (src0 > 0.0 && src0 != trunc)
* trunc += 1.0
break;
}
case nir_op_ftrunc: {
- if (dst.size() == 1) {
+ Temp src = get_alu_src(ctx, instr->src[0]);
+ if (dst.regClass() == v2b) {
+ Temp tmp = bld.vop1(aco_opcode::v_trunc_f16, bld.def(v1), src);
+ bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(v2b), tmp);
+ } else if (dst.regClass() == v1) {
emit_vop1_instruction(ctx, instr, aco_opcode::v_trunc_f32, dst);
- } else if (dst.size() == 2) {
- emit_trunc_f64(ctx, bld, Definition(dst), get_alu_src(ctx, instr->src[0]));
+ } else if (dst.regClass() == v2) {
+ emit_trunc_f64(ctx, bld, Definition(dst), src);
} else {
fprintf(stderr, "Unimplemented NIR instr bit size: ");
nir_print_instr(&instr->instr, stderr);
break;
}
case nir_op_fround_even: {
- if (dst.size() == 1) {
+ Temp src0 = get_alu_src(ctx, instr->src[0]);
+ if (dst.regClass() == v2b) {
+ Temp tmp = bld.vop1(aco_opcode::v_rndne_f16, bld.def(v1), src0);
+ bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(v2b), tmp);
+ } else if (dst.regClass() == v1) {
emit_vop1_instruction(ctx, instr, aco_opcode::v_rndne_f32, dst);
- } else if (dst.size() == 2) {
+ } else if (dst.regClass() == v2) {
if (ctx->options->chip_class >= GFX7) {
emit_vop1_instruction(ctx, instr, aco_opcode::v_rndne_f64, dst);
} else {
/* GFX6 doesn't support V_RNDNE_F64, lower it. */
- Temp src0 = get_alu_src(ctx, instr->src[0]);
-
Temp src0_lo = bld.tmp(v1), src0_hi = bld.tmp(v1);
bld.pseudo(aco_opcode::p_split_vector, Definition(src0_lo), Definition(src0_hi), src0);
}
case nir_op_fsin:
case nir_op_fcos: {
- Temp src = get_alu_src(ctx, instr->src[0]);
+ Temp src = as_vgpr(ctx, get_alu_src(ctx, instr->src[0]));
aco_ptr<Instruction> norm;
- if (dst.size() == 1) {
- Temp half_pi = bld.copy(bld.def(s1), Operand(0x3e22f983u));
- Temp tmp = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), half_pi, as_vgpr(ctx, src));
+ Temp half_pi = bld.copy(bld.def(s1), Operand(0x3e22f983u));
+ if (dst.regClass() == v2b) {
+ Temp tmp = bld.vop2(aco_opcode::v_mul_f16, bld.def(v1), half_pi, src);
+ aco_opcode opcode = instr->op == nir_op_fsin ? aco_opcode::v_sin_f16 : aco_opcode::v_cos_f16;
+ tmp = bld.vop1(opcode, bld.def(v1), tmp);
+ bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(v2b), tmp);
+ } else if (dst.regClass() == v1) {
+ Temp tmp = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), half_pi, src);
/* before GFX9, v_sin_f32 and v_cos_f32 had a valid input domain of [-256, +256] */
if (ctx->options->chip_class < GFX9)
break;
}
case nir_op_ldexp: {
- if (dst.size() == 1) {
- bld.vop3(aco_opcode::v_ldexp_f32, Definition(dst),
- as_vgpr(ctx, get_alu_src(ctx, instr->src[0])),
- get_alu_src(ctx, instr->src[1]));
- } else if (dst.size() == 2) {
- bld.vop3(aco_opcode::v_ldexp_f64, Definition(dst),
- as_vgpr(ctx, get_alu_src(ctx, instr->src[0])),
- get_alu_src(ctx, instr->src[1]));
+ Temp src0 = get_alu_src(ctx, instr->src[0]);
+ Temp src1 = get_alu_src(ctx, instr->src[1]);
+ if (dst.regClass() == v2b) {
+ Temp tmp = bld.tmp(v1);
+ emit_vop2_instruction(ctx, instr, aco_opcode::v_ldexp_f16, tmp, false);
+ bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(v2b), tmp);
+ } else if (dst.regClass() == v1) {
+ bld.vop3(aco_opcode::v_ldexp_f32, Definition(dst), as_vgpr(ctx, src0), src1);
+ } else if (dst.regClass() == v2) {
+ bld.vop3(aco_opcode::v_ldexp_f64, Definition(dst), as_vgpr(ctx, src0), src1);
} else {
fprintf(stderr, "Unimplemented NIR instr bit size: ");
nir_print_instr(&instr->instr, stderr);
break;
}
case nir_op_frexp_sig: {
- if (dst.size() == 1) {
- bld.vop1(aco_opcode::v_frexp_mant_f32, Definition(dst),
- get_alu_src(ctx, instr->src[0]));
- } else if (dst.size() == 2) {
- bld.vop1(aco_opcode::v_frexp_mant_f64, Definition(dst),
- get_alu_src(ctx, instr->src[0]));
+ Temp src = get_alu_src(ctx, instr->src[0]);
+ if (dst.regClass() == v2b) {
+ Temp tmp = bld.vop1(aco_opcode::v_frexp_mant_f16, bld.def(v1), src);
+ bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(v2b), tmp);
+ } else if (dst.regClass() == v1) {
+ bld.vop1(aco_opcode::v_frexp_mant_f32, Definition(dst), src);
+ } else if (dst.regClass() == v2) {
+ bld.vop1(aco_opcode::v_frexp_mant_f64, Definition(dst), src);
} else {
fprintf(stderr, "Unimplemented NIR instr bit size: ");
nir_print_instr(&instr->instr, stderr);
break;
}
case nir_op_frexp_exp: {
- if (instr->src[0].src.ssa->bit_size == 32) {
- bld.vop1(aco_opcode::v_frexp_exp_i32_f32, Definition(dst),
- get_alu_src(ctx, instr->src[0]));
+ Temp src = get_alu_src(ctx, instr->src[0]);
+ if (instr->src[0].src.ssa->bit_size == 16) {
+ Temp tmp = bld.vop1(aco_opcode::v_frexp_exp_i16_f16, bld.def(v1), src);
+ bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), tmp, Operand(0u));
+ } else if (instr->src[0].src.ssa->bit_size == 32) {
+ bld.vop1(aco_opcode::v_frexp_exp_i32_f32, Definition(dst), src);
} else if (instr->src[0].src.ssa->bit_size == 64) {
- bld.vop1(aco_opcode::v_frexp_exp_i32_f64, Definition(dst),
- get_alu_src(ctx, instr->src[0]));
+ bld.vop1(aco_opcode::v_frexp_exp_i32_f64, Definition(dst), src);
} else {
fprintf(stderr, "Unimplemented NIR instr bit size: ");
nir_print_instr(&instr->instr, stderr);
}
case nir_op_fsign: {
Temp src = as_vgpr(ctx, get_alu_src(ctx, instr->src[0]));
- if (dst.size() == 1) {
+ if (dst.regClass() == v2b) {
+ Temp one = bld.copy(bld.def(v1), Operand(0x3c00u));
+ Temp minus_one = bld.copy(bld.def(v1), Operand(0xbc00u));
+ Temp cond = bld.vopc(aco_opcode::v_cmp_nlt_f16, bld.hint_vcc(bld.def(bld.lm)), Operand(0u), src);
+ src = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), one, src, cond);
+ cond = bld.vopc(aco_opcode::v_cmp_le_f16, bld.hint_vcc(bld.def(bld.lm)), Operand(0u), src);
+ Temp tmp = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), minus_one, src, cond);
+ bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(v2b), tmp);
+ } else if (dst.regClass() == v1) {
Temp cond = bld.vopc(aco_opcode::v_cmp_nlt_f32, bld.hint_vcc(bld.def(bld.lm)), Operand(0u), src);
src = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand(0x3f800000u), src, cond);
cond = bld.vopc(aco_opcode::v_cmp_le_f32, bld.hint_vcc(bld.def(bld.lm)), Operand(0u), src);
bld.vop2(aco_opcode::v_cndmask_b32, Definition(dst), Operand(0xbf800000u), src, cond);
- } else if (dst.size() == 2) {
+ } else if (dst.regClass() == v2) {
Temp cond = bld.vopc(aco_opcode::v_cmp_nlt_f64, bld.hint_vcc(bld.def(bld.lm)), Operand(0u), src);
Temp tmp = bld.vop1(aco_opcode::v_mov_b32, bld.def(v1), Operand(0x3FF00000u));
Temp upper = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), tmp, emit_extract_vector(ctx, src, 1, v1), cond);
}
case nir_op_f2i32: {
Temp src = get_alu_src(ctx, instr->src[0]);
- if (instr->src[0].src.ssa->bit_size == 32) {
+ if (instr->src[0].src.ssa->bit_size == 16) {
+ Temp tmp = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), src);
+ if (dst.type() == RegType::vgpr) {
+ bld.vop1(aco_opcode::v_cvt_i32_f32, Definition(dst), tmp);
+ } else {
+ bld.pseudo(aco_opcode::p_as_uniform, Definition(dst),
+ bld.vop1(aco_opcode::v_cvt_i32_f32, bld.def(v1), tmp));
+ }
+ } else if (instr->src[0].src.ssa->bit_size == 32) {
if (dst.type() == RegType::vgpr)
bld.vop1(aco_opcode::v_cvt_i32_f32, Definition(dst), src);
else
}
case nir_op_f2u32: {
Temp src = get_alu_src(ctx, instr->src[0]);
- if (instr->src[0].src.ssa->bit_size == 32) {
+ if (instr->src[0].src.ssa->bit_size == 16) {
+ Temp tmp = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), src);
+ if (dst.type() == RegType::vgpr) {
+ bld.vop1(aco_opcode::v_cvt_u32_f32, Definition(dst), tmp);
+ } else {
+ bld.pseudo(aco_opcode::p_as_uniform, Definition(dst),
+ bld.vop1(aco_opcode::v_cvt_u32_f32, bld.def(v1), tmp));
+ }
+ } else if (instr->src[0].src.ssa->bit_size == 32) {
if (dst.type() == RegType::vgpr)
bld.vop1(aco_opcode::v_cvt_u32_f32, Definition(dst), src);
else