unsigned bs[info->num_inputs]; /* bit size */
struct ir3_block *b = ctx->block;
unsigned dst_sz, wrmask;
+ type_t dst_type = nir_dest_bit_size(alu->dest.dest) < 32 ?
+ TYPE_U16 : TYPE_U32;
if (alu->dest.dest.is_ssa) {
dst_sz = alu->dest.dest.ssa.num_components;
src[i] = ir3_get_src(ctx, &asrc->src)[asrc->swizzle[0]];
if (!src[i])
- src[i] = create_immed(ctx->block, 0);
- dst[i] = ir3_MOV(b, src[i], TYPE_U32);
+ src[i] = create_immed_typed(ctx->block, 0, dst_type);
+ dst[i] = ir3_MOV(b, src[i], dst_type);
}
ir3_put_dst(ctx, &alu->dest.dest);
* handle those specially:
*/
if (alu->op == nir_op_mov) {
- type_t type = TYPE_U32;
nir_alu_src *asrc = &alu->src[0];
struct ir3_instruction *const *src0 = ir3_get_src(ctx, &asrc->src);
for (unsigned i = 0; i < dst_sz; i++) {
if (wrmask & (1 << i)) {
- dst[i] = ir3_MOV(b, src0[asrc->swizzle[i]], type);
+ dst[i] = ir3_MOV(b, src0[asrc->swizzle[i]], dst_type);
} else {
dst[i] = NULL;
}
dst[0] = ir3_n2b(b, dst[0]);
break;
case nir_op_b2f16:
+ dst[0] = ir3_COV(b, ir3_b2n(b, src[0]), TYPE_U32, TYPE_F16);
+ break;
case nir_op_b2f32:
dst[0] = ir3_COV(b, ir3_b2n(b, src[0]), TYPE_U32, TYPE_F32);
break;
(list_length(&alu->src[0].src.ssa->uses) == 1) &&
((opc_cat(src[0]->opc) == 2) || (opc_cat(src[0]->opc) == 3))) {
src[0]->flags |= IR3_INSTR_SAT;
- dst[0] = ir3_MOV(b, src[0], TYPE_U32);
+ dst[0] = ir3_MOV(b, src[0], dst_type);
} else {
/* otherwise generate a max.f that saturates.. blob does
* similar (generating a cat2 mov using max.f)