pan/mdg: Schedule based on liveness
[mesa.git] / src / panfrost / midgard / mir.c
index e19ed4f98a6c38b574d34fde9309fbb68e6d7788..343c4bdb1d76a191fe7865603b6c8cf5e40e2fb0 100644 (file)
@@ -38,144 +38,14 @@ void mir_rewrite_index_dst_single(midgard_instruction *ins, unsigned old, unsign
                 ins->dest = new;
 }
 
-unsigned
-mir_get_swizzle(midgard_instruction *ins, unsigned idx)
-{
-        if (ins->type == TAG_ALU_4) {
-                if (idx == 2 || ins->compact_branch)
-                        return ins->cond_swizzle;
-
-                unsigned b = (idx == 0) ? ins->alu.src1 : ins->alu.src2;
-
-                midgard_vector_alu_src s =
-                        vector_alu_from_unsigned(b);
-
-                return s.swizzle;
-        } else if (ins->type == TAG_LOAD_STORE_4) {
-                /* Main swizzle of a load is on the destination */
-                if (!OP_IS_STORE(ins->load_store.op))
-                        idx++;
-
-                switch (idx) {
-                case 0:
-                        return ins->load_store.swizzle;
-                case 1:
-                case 2: {
-                        uint8_t raw =
-                                (idx == 2) ? ins->load_store.arg_2 : ins->load_store.arg_1;
-
-                        /* TODO: Integrate component count with properties */
-                        unsigned components = 1;
-                        switch (ins->load_store.op) {
-                        case midgard_op_ld_int4:
-                                components = (idx == 0) ? 2 : 1;
-                                break;
-                        case midgard_op_st_int4:
-                                components = (idx == 1) ? 2 : 1;
-                                break;
-                        case midgard_op_ld_cubemap_coords:
-                                components = 3;
-                                break;
-                        default:
-                                components = 1;
-                                break;
-                        }
-
-                        return component_to_swizzle(midgard_ldst_select(raw).component, components);
-                }
-                default:
-                        unreachable("Unknown load/store source");
-                }
-        } else if (ins->type == TAG_TEXTURE_4) {
-                switch (idx) {
-                case 0:
-                        return ins->texture.in_reg_swizzle;
-                case 1:
-                        /* Swizzle on bias doesn't make sense */
-                        return 0;
-                default:
-                        unreachable("Unknown texture source");
-                }
-        } else {
-                unreachable("Unknown type");
-        }
-}
-
-void
-mir_set_swizzle(midgard_instruction *ins, unsigned idx, unsigned new)
-{
-        if (ins->type == TAG_ALU_4) {
-                if (idx == 2 || ins->compact_branch) {
-                        ins->cond_swizzle = new;
-                        return;
-                }
-
-                unsigned b = (idx == 0) ? ins->alu.src1 : ins->alu.src2;
-
-                midgard_vector_alu_src s =
-                        vector_alu_from_unsigned(b);
-
-                s.swizzle = new;
-                unsigned pack = vector_alu_srco_unsigned(s);
-
-                if (idx == 0)
-                        ins->alu.src1 = pack;
-                else
-                        ins->alu.src2 = pack;
-        } else if (ins->type == TAG_LOAD_STORE_4) {
-                /* Main swizzle of a load is on the destination */
-                if (!OP_IS_STORE(ins->load_store.op))
-                        idx++;
-
-                switch (idx) {
-                case 0:
-                        ins->load_store.swizzle = new;
-                        break;
-                case 1:
-                case 2: {
-                        uint8_t raw =
-                                (idx == 2) ? ins->load_store.arg_2 : ins->load_store.arg_1;
-
-                        midgard_ldst_register_select sel
-                                = midgard_ldst_select(raw);
-                        sel.component = swizzle_to_component(new);
-                        uint8_t packed = midgard_ldst_pack(sel);
-
-                        if (idx == 2)
-                                ins->load_store.arg_2 = packed;
-                        else
-                                ins->load_store.arg_1 = packed;
-
-                        break;
-                }
-                default:
-                        assert(new == 0);
-                        break;
-                }
-        } else if (ins->type == TAG_TEXTURE_4) {
-                switch (idx) {
-                case 0:
-                        ins->texture.in_reg_swizzle = new;
-                        break;
-                default:
-                        assert(new == 0);
-                        break;
-                }
-        } else {
-                unreachable("Unknown type");
-        }
-}
-
 static void
-mir_rewrite_index_src_single_swizzle(midgard_instruction *ins, unsigned old, unsigned new, unsigned swizzle)
+mir_rewrite_index_src_single_swizzle(midgard_instruction *ins, unsigned old, unsigned new, unsigned *swizzle)
 {
         for (unsigned i = 0; i < ARRAY_SIZE(ins->src); ++i) {
                 if (ins->src[i] != old) continue;
 
                 ins->src[i] = new;
-
-                mir_set_swizzle(ins, i,
-                        pan_compose_swizzle(mir_get_swizzle(ins, i), swizzle));
+                mir_compose_swizzle(ins->swizzle[i], swizzle, ins->swizzle[i]);
         }
 }
 
@@ -188,7 +58,7 @@ mir_rewrite_index_src(compiler_context *ctx, unsigned old, unsigned new)
 }
 
 void
-mir_rewrite_index_src_swizzle(compiler_context *ctx, unsigned old, unsigned new, unsigned swizzle)
+mir_rewrite_index_src_swizzle(compiler_context *ctx, unsigned old, unsigned new, unsigned *swizzle)
 {
         mir_foreach_instr_global(ctx, ins) {
                 mir_rewrite_index_src_single_swizzle(ins, old, new, swizzle);
@@ -201,6 +71,10 @@ mir_rewrite_index_dst(compiler_context *ctx, unsigned old, unsigned new)
         mir_foreach_instr_global(ctx, ins) {
                 mir_rewrite_index_dst_single(ins, old, new);
         }
+
+        /* Implicitly written before the shader */
+        if (ctx->blend_input == old)
+                ctx->blend_input = new;
 }
 
 void
@@ -236,112 +110,142 @@ mir_single_use(compiler_context *ctx, unsigned value)
         return mir_use_count(ctx, value) <= 1;
 }
 
-static bool
-mir_nontrivial_raw_mod(midgard_vector_alu_src src, bool is_int)
-{
-        if (is_int)
-                return src.mod == midgard_int_shift;
-        else
-                return src.mod;
-}
-
 bool
-mir_nontrivial_mod(midgard_vector_alu_src src, bool is_int, unsigned mask)
+mir_nontrivial_mod(midgard_instruction *ins, unsigned i, bool check_swizzle)
 {
-        if (mir_nontrivial_raw_mod(src, is_int)) return true;
+        bool is_int = midgard_is_integer_op(ins->alu.op);
 
-        /* size-conversion */
-        if (src.half) return true;
+        if (is_int) {
+                if (ins->src_shift[i]) return true;
+        } else {
+                if (ins->src_neg[i]) return true;
+                if (ins->src_abs[i]) return true;
+        }
+
+        if (ins->dest_type != ins->src_types[i]) return true;
 
-        /* swizzle */
-        for (unsigned c = 0; c < 4; ++c) {
-                if (!(mask & (1 << c))) continue;
-                if (((src.swizzle >> (2*c)) & 3) != c) return true;
+        if (check_swizzle) {
+                for (unsigned c = 0; c < 16; ++c) {
+                        if (!(ins->mask & (1 << c))) continue;
+                        if (ins->swizzle[i][c] != c) return true;
+                }
         }
 
         return false;
 }
 
 bool
-mir_nontrivial_source2_mod(midgard_instruction *ins)
+mir_nontrivial_outmod(midgard_instruction *ins)
 {
         bool is_int = midgard_is_integer_op(ins->alu.op);
+        unsigned mod = ins->alu.outmod;
 
-        midgard_vector_alu_src src2 =
-                vector_alu_from_unsigned(ins->alu.src2);
+        if (ins->dest_type != ins->src_types[1])
+                return true;
 
-        return mir_nontrivial_mod(src2, is_int, ins->mask);
+        if (is_int)
+                return mod != midgard_outmod_int_wrap;
+        else
+                return mod != midgard_outmod_none;
 }
 
-bool
-mir_nontrivial_source2_mod_simple(midgard_instruction *ins)
-{
-        bool is_int = midgard_is_integer_op(ins->alu.op);
+/* 128 / sz = exp2(log2(128 / sz))
+ *          = exp2(log2(128) - log2(sz))
+ *          = exp2(7 - log2(sz))
+ *          = 1 << (7 - log2(sz))
+ */
 
-        midgard_vector_alu_src src2 =
-                vector_alu_from_unsigned(ins->alu.src2);
+static unsigned
+mir_components_for_bits(unsigned bits)
+{
+        return 1 << (7 - util_logbase2(bits));
+}
 
-        return mir_nontrivial_raw_mod(src2, is_int) || src2.half;
+unsigned
+mir_components_for_type(nir_alu_type T)
+{
+        unsigned sz = nir_alu_type_get_type_size(T);
+        return mir_components_for_bits(sz);
 }
 
-bool
-mir_nontrivial_outmod(midgard_instruction *ins)
+uint16_t
+mir_from_bytemask(uint16_t bytemask, unsigned bits)
 {
-        bool is_int = midgard_is_integer_op(ins->alu.op);
-        unsigned mod = ins->alu.outmod;
+        unsigned value = 0;
+        unsigned count = bits / 8;
 
-        /* Pseudo-outmod */
-        if (ins->invert)
-                return true;
+        for (unsigned c = 0, d = 0; c < 16; c += count, ++d) {
+                bool a = (bytemask & (1 << c)) != 0;
 
-        /* Type conversion is a sort of outmod */
-        if (ins->alu.dest_override != midgard_dest_override_none)
-                return true;
+                for (unsigned q = c; q < count; ++q)
+                        assert(((bytemask & (1 << q)) != 0) == a);
 
-        if (is_int)
-                return mod != midgard_outmod_int_wrap;
-        else
-                return mod != midgard_outmod_none;
+                value |= (a << d);
+        }
+
+        return value;
 }
 
-/* Checks if an index will be used as a special register -- basically, if we're
- * used as the input to a non-ALU op */
+/* Rounds up a bytemask to fill a given component count. Iterate each
+ * component, and check if any bytes in the component are masked on */
 
-bool
-mir_special_index(compiler_context *ctx, unsigned idx)
+uint16_t
+mir_round_bytemask_up(uint16_t mask, unsigned bits)
 {
-        mir_foreach_instr_global(ctx, ins) {
-                bool is_ldst = ins->type == TAG_LOAD_STORE_4;
-                bool is_tex = ins->type == TAG_TEXTURE_4;
-                bool is_writeout = ins->compact_branch && ins->writeout;
+        unsigned bytes = bits / 8;
+        unsigned maxmask = mask_of(bytes);
+        unsigned channels = mir_components_for_bits(bits);
 
-                if (!(is_ldst || is_tex || is_writeout))
-                        continue;
+        for (unsigned c = 0; c < channels; ++c) {
+                unsigned submask = maxmask << (c * bytes);
 
-                if (mir_has_arg(ins, idx))
-                        return true;
+                if (mask & submask)
+                        mask |= submask;
         }
 
-        return false;
+        return mask;
 }
 
-/* Is a node written before a given instruction? */
+/* Grabs the per-byte mask of an instruction (as opposed to per-component) */
 
-bool
-mir_is_written_before(compiler_context *ctx, midgard_instruction *ins, unsigned node)
+uint16_t
+mir_bytemask(midgard_instruction *ins)
 {
-        if (node >= SSA_FIXED_MINIMUM)
-                return true;
+        unsigned type_size = nir_alu_type_get_type_size(ins->dest_type);
+        return pan_to_bytemask(type_size, ins->mask);
+}
 
-        mir_foreach_instr_global(ctx, q) {
-                if (q == ins)
-                        break;
+void
+mir_set_bytemask(midgard_instruction *ins, uint16_t bytemask)
+{
+        unsigned type_size = nir_alu_type_get_type_size(ins->dest_type);
+        ins->mask = mir_from_bytemask(bytemask, type_size);
+}
 
-                if (q->dest == node)
-                        return true;
-        }
+/* Checks if we should use an upper destination override, rather than the lower
+ * one in the IR. Returns zero if no, returns the bytes to shift otherwise */
 
-        return false;
+signed
+mir_upper_override(midgard_instruction *ins, unsigned inst_size)
+{
+        unsigned type_size = nir_alu_type_get_type_size(ins->dest_type);
+
+        /* If the sizes are the same, there's nothing to override */
+        if (type_size == inst_size)
+                return -1;
+
+        /* There are 16 bytes per vector, so there are (16/bytes)
+         * components per vector. So the magic half is half of
+         * (16/bytes), which simplifies to 8/bytes = 8 / (bits / 8) = 64 / bits
+         * */
+
+        unsigned threshold = mir_components_for_bits(type_size) >> 1;
+
+        /* How many components did we shift over? */
+        unsigned zeroes = __builtin_ctz(ins->mask);
+
+        /* Did we hit the threshold? */
+        return (zeroes >= threshold) ? threshold : 0;
 }
 
 /* Creates a mask of the components of a node read by an instruction, by
@@ -352,90 +256,61 @@ mir_is_written_before(compiler_context *ctx, midgard_instruction *ins, unsigned
  * will return a mask of Z/Y for r2
  */
 
-static unsigned
-mir_mask_of_read_components_single(unsigned swizzle, unsigned outmask)
+static uint16_t
+mir_bytemask_of_read_components_single(unsigned *swizzle, unsigned inmask, unsigned bits)
 {
-        unsigned mask = 0;
-
-        for (unsigned c = 0; c < 4; ++c) {
-                if (!(outmask & (1 << c))) continue;
+        unsigned cmask = 0;
 
-                unsigned comp = (swizzle >> (2*c)) & 3;
-                mask |= (1 << comp);
+        for (unsigned c = 0; c < MIR_VEC_COMPONENTS; ++c) {
+                if (!(inmask & (1 << c))) continue;
+                cmask |= (1 << swizzle[c]);
         }
 
-        return mask;
-}
-
-static unsigned
-mir_source_count(midgard_instruction *ins)
-{
-        if (ins->type == TAG_ALU_4) {
-                /* ALU is always binary, except csel */
-                return OP_IS_CSEL(ins->alu.op) ? 3 : 2;
-        } else if (ins->type == TAG_LOAD_STORE_4) {
-                bool load = !OP_IS_STORE(ins->load_store.op);
-                return (load ? 2 : 3);
-        } else if (ins->type == TAG_TEXTURE_4) {
-                /* Coords, bias.. TODO: Offsets? */
-                return 2;
-        } else {
-                unreachable("Invalid instruction type");
-        }
+        return pan_to_bytemask(bits, cmask);
 }
 
-unsigned
-mir_mask_of_read_components(midgard_instruction *ins, unsigned node)
+uint16_t
+mir_bytemask_of_read_components_index(midgard_instruction *ins, unsigned i)
 {
-        unsigned mask = 0;
+        /* Conditional branches read one 32-bit component = 4 bytes (TODO: multi branch??) */
+        if (ins->compact_branch && ins->branch.conditional && (i == 0))
+                return 0xF;
 
-        for (unsigned i = 0; i < mir_source_count(ins); ++i) {
-                if (ins->src[i] != node) continue;
-
-                /* Branch writeout uses all components */
-                if (ins->compact_branch && ins->writeout && (i == 0))
-                        return 0xF;
+        /* ALU ops act componentwise so we need to pay attention to
+         * their mask. Texture/ldst does not so we don't clamp source
+         * readmasks based on the writemask */
+        unsigned qmask = ~0;
 
-                /* ALU ops act componentwise so we need to pay attention to
-                 * their mask. Texture/ldst does not so we don't clamp source
-                 * readmasks based on the writemask */
-                unsigned qmask = (ins->type == TAG_ALU_4) ? ins->mask : 0xF;
+        /* Handle dot products and things */
+        if (ins->type == TAG_ALU_4 && !ins->compact_branch) {
+                unsigned props = alu_opcode_props[ins->alu.op].props;
 
-                unsigned swizzle = mir_get_swizzle(ins, i);
-                unsigned m = mir_mask_of_read_components_single(swizzle, qmask);
+                unsigned channel_override = GET_CHANNEL_COUNT(props);
 
-                /* Handle dot products and things */
-                if (ins->type == TAG_ALU_4 && !ins->compact_branch) {
-                        unsigned channel_override =
-                                GET_CHANNEL_COUNT(alu_opcode_props[ins->alu.op].props);
-
-                        if (channel_override)
-                                m = mask_of(channel_override);
-                }
-
-                mask |= m;
+                if (channel_override)
+                        qmask = mask_of(channel_override);
+                else
+                        qmask = ins->mask;
         }
 
-        return mask;
+        return mir_bytemask_of_read_components_single(ins->swizzle[i], qmask,
+                nir_alu_type_get_type_size(ins->src_types[i]));
 }
 
-unsigned
-mir_ubo_shift(midgard_load_store_op op)
+uint16_t
+mir_bytemask_of_read_components(midgard_instruction *ins, unsigned node)
 {
-        switch (op) {
-        case midgard_op_ld_ubo_char:
+        uint16_t mask = 0;
+
+        if (node == ~0)
                 return 0;
-        case midgard_op_ld_ubo_char2:
-                return 1;
-        case midgard_op_ld_ubo_char4:
-                return 2;
-        case midgard_op_ld_ubo_short4:
-                return 3;
-        case midgard_op_ld_ubo_int4:
-                return 4;
-        default:
-                unreachable("Invalid op");
+
+        mir_foreach_src(ins, i) {
+                if (ins->src[i] != node) continue;
+                mask |= mir_bytemask_of_read_components_index(ins, i);
         }
+
+        return mask;
 }
 
 /* Register allocation occurs after instruction scheduling, which is fine until
@@ -506,6 +381,7 @@ mir_insert_instruction_before_scheduled(
         memcpy(bundles + before, &new, sizeof(new));
 
         list_addtail(&new.instructions[0]->link, &before_bundle->instructions[0]->link);
+        block->quadword_count += midgard_tag_props[new.tag].size;
 }
 
 void
@@ -515,17 +391,22 @@ mir_insert_instruction_after_scheduled(
         midgard_instruction *tag,
         midgard_instruction ins)
 {
-        unsigned after = mir_bundle_idx_for_ins(tag, block);
+        /* We need to grow the bundles array to add our new bundle */
         size_t count = util_dynarray_num_elements(&block->bundles, midgard_bundle);
         UNUSED void *unused = util_dynarray_grow(&block->bundles, midgard_bundle, 1);
 
+        /* Find the bundle that we want to insert after */
+        unsigned after = mir_bundle_idx_for_ins(tag, block);
+
+        /* All the bundles after that one, we move ahead by one */
         midgard_bundle *bundles = (midgard_bundle *) block->bundles.data;
         memmove(bundles + after + 2, bundles + after + 1, (count - after - 1) * sizeof(midgard_bundle));
-        midgard_bundle *after_bundle_1 = bundles + after + 2;
+        midgard_bundle *after_bundle = bundles + after;
 
         midgard_bundle new = mir_bundle_for_op(ctx, ins);
         memcpy(bundles + after + 1, &new, sizeof(new));
-        list_addtail(&new.instructions[0]->link, &after_bundle_1->instructions[0]->link);
+        list_add(&new.instructions[0]->link, &after_bundle->instructions[after_bundle->instruction_count - 1]->link);
+        block->quadword_count += midgard_tag_props[new.tag].size;
 }
 
 /* Flip the first-two arguments of a (binary) op. Currently ALU
@@ -543,6 +424,27 @@ mir_flip(midgard_instruction *ins)
         temp = ins->alu.src1;
         ins->alu.src1 = ins->alu.src2;
         ins->alu.src2 = temp;
+
+        temp = ins->src_types[0];
+        ins->src_types[0] = ins->src_types[1];
+        ins->src_types[1] = temp;
+
+        temp = ins->src_abs[0];
+        ins->src_abs[0] = ins->src_abs[1];
+        ins->src_abs[1] = temp;
+
+        temp = ins->src_neg[0];
+        ins->src_neg[0] = ins->src_neg[1];
+        ins->src_neg[1] = temp;
+
+        temp = ins->src_invert[0];
+        ins->src_invert[0] = ins->src_invert[1];
+        ins->src_invert[1] = temp;
+
+        unsigned temp_swizzle[16];
+        memcpy(temp_swizzle, ins->swizzle[0], sizeof(ins->swizzle[0]));
+        memcpy(ins->swizzle[0], ins->swizzle[1], sizeof(ins->swizzle[0]));
+        memcpy(ins->swizzle[1], temp_swizzle, sizeof(ins->swizzle[0]));
 }
 
 /* Before squashing, calculate ctx->temp_count just by observing the MIR */
@@ -557,7 +459,7 @@ mir_compute_temp_count(compiler_context *ctx)
 
         mir_foreach_instr_global(ctx, ins) {
                 if (ins->dest < SSA_FIXED_MINIMUM)
-                        max_dest = MAX2(max_dest, ins->dest);
+                        max_dest = MAX2(max_dest, ins->dest + 1);
         }
 
         ctx->temp_count = max_dest;