nir: rename nir_op_fne to nir_op_fneu
[mesa.git] / src / panfrost / midgard / mir_promote_uniforms.c
index e8da834b2fad57cd6130b45b8540f90460a58789..239020dd2192803a20d99f8b49ac4d838f015063 100644 (file)
  */
 
 #include "compiler.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
 
 /* This pass promotes reads from uniforms from load/store ops to uniform
  * registers if it is beneficial to do so. Normally, this saves both
  * instructions and total register pressure, but it does take a toll on the
  * number of work registers that are available, so this is a balance.
  *
- * To cope, we take as an argument the maximum work register pressure in the
- * program so we allow that many registers through at minimum, to prevent
- * spilling. If we spill anyway, I mean, it's a lose-lose at that point. */
+ * We use a heuristic to determine the ideal count, implemented by
+ * mir_work_heuristic, which returns the ideal number of work registers.
+ */
 
-void
-midgard_promote_uniforms(compiler_context *ctx, unsigned promoted_count)
+static bool
+mir_is_promoteable_ubo(midgard_instruction *ins)
 {
-        mir_foreach_instr_global_safe(ctx, ins) {
-                if (ins->type != TAG_LOAD_STORE_4) continue;
-                if (!OP_IS_UBO_READ(ins->load_store.op)) continue;
+        /* TODO: promote unaligned access via swizzle? */
+
+        return (ins->type == TAG_LOAD_STORE_4) &&
+                (OP_IS_UBO_READ(ins->op)) &&
+                !(ins->constants.u32[0] & 0xF) &&
+                !(ins->load_store.arg_1) &&
+                (ins->load_store.arg_2 == 0x1E) &&
+                ((ins->constants.u32[0] / 16) < 16);
+}
+
+static unsigned
+mir_promoteable_uniform_count(compiler_context *ctx)
+{
+        unsigned count = 0;
+
+        mir_foreach_instr_global(ctx, ins) {
+                if (mir_is_promoteable_ubo(ins))
+                        count = MAX2(count, ins->constants.u32[0] / 16);
+        }
+
+        return count;
+}
+
+static unsigned
+mir_count_live(uint16_t *live, unsigned temp_count)
+{
+        unsigned count = 0;
+
+        for (unsigned i = 0; i < temp_count; ++i)
+                count += util_bitcount(live[i]);
+
+        return count;
+}
+
+static unsigned
+mir_estimate_pressure(compiler_context *ctx)
+{
+        mir_invalidate_liveness(ctx);
+        mir_compute_liveness(ctx);
+
+        unsigned max_live = 0;
+
+        mir_foreach_block(ctx, _block) {
+                midgard_block *block = (midgard_block *) _block;
+                uint16_t *live = mem_dup(block->base.live_out, ctx->temp_count * sizeof(uint16_t));
+
+                mir_foreach_instr_in_block_rev(block, ins) {
+                        unsigned count = mir_count_live(live, ctx->temp_count);
+                        max_live = MAX2(max_live, count);
+                        mir_liveness_ins_update(live, ins, ctx->temp_count);
+                }
+
+                free(live);
+        }
+
+        return DIV_ROUND_UP(max_live, 16);
+}
+
+static unsigned
+mir_work_heuristic(compiler_context *ctx)
+{
+        unsigned uniform_count = mir_promoteable_uniform_count(ctx);
+
+        /* If there are 8 or fewer uniforms, it doesn't matter what we do, so
+         * allow as many work registers as needed */
+
+        if (uniform_count <= 8)
+                return 16;
+
+        /* Otherwise, estimate the register pressure */
+
+        unsigned pressure = mir_estimate_pressure(ctx);
+
+        /* Prioritize not spilling above all else. The relation between the
+         * pressure estimate and the actual register pressure is a little
+         * murkier than we might like (due to scheduling, pipeline registers,
+         * failure to pack vector registers, load/store registers, texture
+         * registers...), hence why this is a heuristic parameter */
+
+        if (pressure > 6)
+                return 16;
 
-                unsigned lo = ins->load_store.varying_parameters >> 7;
-                unsigned hi = ins->load_store.address;
+        /* If there's no chance of spilling, prioritize UBOs and thread count */
 
-                /* TODO: Combine fields logically */
-                unsigned address = (hi << 3) | lo;
+        return 8;
+}
+
+/* Bitset of indices that will be used as a special register -- inputs to a
+ * non-ALU op. We precompute this set so that testing is efficient, otherwise
+ * we end up O(mn) behaviour for n instructions and m uniform reads */
+
+static BITSET_WORD *
+mir_special_indices(compiler_context *ctx)
+{
+        mir_compute_temp_count(ctx);
+        BITSET_WORD *bset = calloc(BITSET_WORDS(ctx->temp_count), sizeof(BITSET_WORD));
 
-                /* Check this is UBO 0 */
-                if (ins->load_store.arg_1) continue;
+        mir_foreach_instr_global(ctx, ins) {
+                /* Look for special instructions */
+                bool is_ldst = ins->type == TAG_LOAD_STORE_4;
+                bool is_tex = ins->type == TAG_TEXTURE_4;
+                bool is_writeout = ins->compact_branch && ins->writeout;
 
-                /* Check we're accessing directly */
-                if (ins->load_store.arg_2 != 0x1E) continue;
+                if (!(is_ldst || is_tex || is_writeout))
+                        continue;
+
+                /* Anything read by a special instruction is itself special */
+                mir_foreach_src(ins, i) {
+                        unsigned idx = ins->src[i];
+
+                        if (idx < ctx->temp_count)
+                                BITSET_SET(bset, idx);
+                }
+        }
+
+        return bset;
+}
+
+void
+midgard_promote_uniforms(compiler_context *ctx)
+{
+        unsigned work_count = mir_work_heuristic(ctx);
+        unsigned promoted_count = 24 - work_count;
+
+        /* First, figure out special indices a priori so we don't recompute a lot */
+        BITSET_WORD *special = mir_special_indices(ctx);
+
+        mir_foreach_instr_global_safe(ctx, ins) {
+                if (!mir_is_promoteable_ubo(ins)) continue;
+
+                unsigned off = ins->constants.u32[0];
+                unsigned address = off / 16;
 
                 /* Check if it's a promotable range */
                 unsigned uniform_reg = 23 - address;
@@ -67,16 +186,26 @@ midgard_promote_uniforms(compiler_context *ctx, unsigned promoted_count)
                 /* We do need the move for safety for a non-SSA dest, or if
                  * we're being fed into a special class */
 
-                bool needs_move = ins->ssa_args.dest & IS_REG;
-                needs_move |= mir_special_index(ctx, ins->ssa_args.dest);
+                bool needs_move = ins->dest & PAN_IS_REG || ins->dest == ctx->blend_src1;
+
+                if (ins->dest < ctx->temp_count)
+                        needs_move |= BITSET_TEST(special, ins->dest);
 
                 if (needs_move) {
-                        midgard_instruction mov = v_mov(promoted, blank_alu_src, ins->ssa_args.dest);
-                        mir_insert_instruction_before(ins, mov);
+                        unsigned type_size = nir_alu_type_get_type_size(ins->dest_type);
+                        midgard_instruction mov = v_mov(promoted, ins->dest);
+                        mov.dest_type = nir_type_uint | type_size;
+                        mov.src_types[0] = mov.dest_type;
+
+                        uint16_t rounded = mir_round_bytemask_up(mir_bytemask(ins), type_size);
+                        mir_set_bytemask(&mov, rounded);
+                        mir_insert_instruction_before(ctx, ins, mov);
                 } else {
-                        mir_rewrite_index_src(ctx, ins->ssa_args.dest, promoted);
+                        mir_rewrite_index_src(ctx, ins->dest, promoted);
                 }
 
                 mir_remove_instruction(ins);
         }
+
+        free(special);
 }