freedreno/ir3: set dst type of alu instructions correctly.
authorHyunjun Ko <zzoon@igalia.com>
Tue, 19 Mar 2019 07:17:40 +0000 (07:17 +0000)
committerRob Clark <robdclark@chromium.org>
Mon, 3 Jun 2019 20:31:26 +0000 (13:31 -0700)
Though it should be fixed in RA pass, it needs to be set correctly from
the beginning according to the bitsize of NIR dest.

v2: Would be better for mad,fddx,fddy to fixup later in RA pass.

[small cleanup of fallout from imov/fmov removal fallout]
Signed-off-by: Rob Clark <robdclark@chromium.org>
src/freedreno/ir3/ir3_compiler_nir.c

index 8fbb45c489c7f082904ee8056b1a489ed9d158fc..8692869d5ef4229ebde30e810ddc4fffbdd87a83 100644 (file)
@@ -294,6 +294,8 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu)
        unsigned bs[info->num_inputs];     /* bit size */
        struct ir3_block *b = ctx->block;
        unsigned dst_sz, wrmask;
+       type_t dst_type = nir_dest_bit_size(alu->dest.dest) < 32 ?
+                       TYPE_U16 : TYPE_U32;
 
        if (alu->dest.dest.is_ssa) {
                dst_sz = alu->dest.dest.ssa.num_components;
@@ -321,8 +323,8 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu)
 
                        src[i] = ir3_get_src(ctx, &asrc->src)[asrc->swizzle[0]];
                        if (!src[i])
-                               src[i] = create_immed(ctx->block, 0);
-                       dst[i] = ir3_MOV(b, src[i], TYPE_U32);
+                               src[i] = create_immed_typed(ctx->block, 0, dst_type);
+                       dst[i] = ir3_MOV(b, src[i], dst_type);
                }
 
                ir3_put_dst(ctx, &alu->dest.dest);
@@ -333,13 +335,12 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu)
         * handle those specially:
         */
        if (alu->op == nir_op_mov) {
-               type_t type = TYPE_U32;
                nir_alu_src *asrc = &alu->src[0];
                struct ir3_instruction *const *src0 = ir3_get_src(ctx, &asrc->src);
 
                for (unsigned i = 0; i < dst_sz; i++) {
                        if (wrmask & (1 << i)) {
-                               dst[i] = ir3_MOV(b, src0[asrc->swizzle[i]], type);
+                               dst[i] = ir3_MOV(b, src0[asrc->swizzle[i]], dst_type);
                        } else {
                                dst[i] = NULL;
                        }
@@ -392,6 +393,8 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu)
                dst[0] = ir3_n2b(b, dst[0]);
                break;
        case nir_op_b2f16:
+               dst[0] = ir3_COV(b, ir3_b2n(b, src[0]), TYPE_U32, TYPE_F16);
+               break;
        case nir_op_b2f32:
                dst[0] = ir3_COV(b, ir3_b2n(b, src[0]), TYPE_U32, TYPE_F32);
                break;
@@ -430,7 +433,7 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu)
                                (list_length(&alu->src[0].src.ssa->uses) == 1) &&
                                ((opc_cat(src[0]->opc) == 2) || (opc_cat(src[0]->opc) == 3))) {
                        src[0]->flags |= IR3_INSTR_SAT;
-                       dst[0] = ir3_MOV(b, src[0], TYPE_U32);
+                       dst[0] = ir3_MOV(b, src[0], dst_type);
                } else {
                        /* otherwise generate a max.f that saturates.. blob does
                         * similar (generating a cat2 mov using max.f)