static bool
is_fp16_conv(struct ir3_instruction *instr)
{
- if (instr->opc == OPC_MOV &&
- instr->cat1.src_type == TYPE_F32 &&
+ if (instr->opc != OPC_MOV)
+ return false;
+
+ struct ir3_register *dst = instr->regs[0];
+ struct ir3_register *src = instr->regs[1];
+
+ /* disallow conversions that cannot be folded into
+ * alu instructions:
+ */
+ if (dst->flags & (IR3_REG_EVEN | IR3_REG_POS_INF))
+ return false;
+
+ if (instr->cat1.src_type == TYPE_F32 &&
instr->cat1.dst_type == TYPE_F16)
return true;
+ if (instr->cat1.src_type == TYPE_F16 &&
+ instr->cat1.dst_type == TYPE_F32)
+ return true;
+
return false;
}
if (!is_alu(src))
return;
+ /* avoid folding f2f32(f2f16) together, in cases where this is legal to
+ * do (glsl) nir should have handled that for us already:
+ */
+ if (is_fp16_conv(src))
+ return;
+
switch (src->opc) {
case OPC_SEL_B32:
+ case OPC_SEL_B16:
case OPC_MAX_F:
case OPC_MIN_F:
case OPC_SIGN_F:
* change the dst type to F32 to get the right behavior, since we
* could be moving a float with a u32.u32 move.
*/
- src->cat1.dst_type = TYPE_F16;
- src->cat1.src_type = TYPE_F32;
+ src->cat1.dst_type = conv->cat1.dst_type;
+ src->cat1.src_type = conv->cat1.src_type;
} else {
/* Otherwise, for typechanging movs, we can just change the dst
* type to F16 to collaps the two conversions. For example
* cov.s32f32 follwed by cov.f32f16 becomes cov.s32f16.
*/
- src->cat1.dst_type = TYPE_F16;
+ src->cat1.dst_type = conv->cat1.dst_type;
}
}
- src->regs[0]->flags |= IR3_REG_HALF;
+ if (conv->regs[0]->flags & IR3_REG_HALF) {
+ src->regs[0]->flags |= IR3_REG_HALF;
+ } else {
+ src->regs[0]->flags &= ~IR3_REG_HALF;
+ }
rewrite_uses(ir, conv, src);
}