pan/bi: Set branch_constant if there is a branch
[mesa.git] / src / panfrost / bifrost / bi_schedule.c
index 669b4019d5cc09d5189803c790dd9f384db0eeb7..f6652aa11352551efbb5ba01affa14ce51935a09 100644 (file)
 
 #include "compiler.h"
 
+/* Finds the clause type required or return none */
+
+static bool
+bi_is_fragz(bi_instruction *ins)
+{
+        if (!(ins->src[0] & BIR_INDEX_CONSTANT))
+                return false;
+
+        return (ins->constant.u32 == BIFROST_FRAGZ);
+}
+
+static enum bifrost_clause_type
+bi_clause_type_for_ins(bi_instruction *ins)
+{
+        unsigned T = ins->type;
+
+        /* Only high latency ops impose clause types */
+        if (!(bi_class_props[T] & BI_SCHED_HI_LATENCY))
+                return BIFROST_CLAUSE_NONE;
+
+        switch (T) {
+        case BI_BRANCH:
+        case BI_DISCARD:
+                return BIFROST_CLAUSE_NONE;
+
+        case BI_LOAD_VAR:
+                if (bi_is_fragz(ins))
+                        return BIFROST_CLAUSE_FRAGZ;
+
+                return BIFROST_CLAUSE_LOAD_VARY;
+
+        case BI_LOAD_UNIFORM:
+        case BI_LOAD_ATTR:
+        case BI_LOAD_VAR_ADDRESS:
+                return BIFROST_CLAUSE_UBO;
+
+        case BI_TEX:
+                return BIFROST_CLAUSE_TEX;
+
+        case BI_LOAD:
+                return BIFROST_CLAUSE_SSBO_LOAD;
+
+        case BI_STORE:
+        case BI_STORE_VAR:
+                return BIFROST_CLAUSE_SSBO_STORE;
+
+        case BI_BLEND:
+                return BIFROST_CLAUSE_BLEND;
+
+        case BI_ATEST:
+                return BIFROST_CLAUSE_ATEST;
+
+        default:
+                unreachable("Invalid high-latency class");
+        }
+}
+
+/* There is an encoding restriction against FMA fp16 add/min/max
+ * having both sources with abs(..) with a duplicated source. This is
+ * due to the packing being order-sensitive, so the ports must end up distinct
+ * to handle both having abs(..). The swizzle doesn't matter here. Note
+ * BIR_INDEX_REGISTER generally should not be used pre-schedule (TODO: enforce
+ * this).
+ */
+
+static bool
+bi_ambiguous_abs(bi_instruction *ins)
+{
+        bool classy = bi_class_props[ins->type] & BI_NO_ABS_ABS_FP16_FMA;
+        bool typey = ins->dest_type == nir_type_float16;
+        bool absy = ins->src_abs[0] && ins->src_abs[1];
+
+        return classy && typey && absy;
+}
+
+/* New Bifrost (which?) don't seem to have ICMP on FMA */
+static bool
+bi_icmp(bi_instruction *ins)
+{
+        bool ic = nir_alu_type_get_base_type(ins->src_types[0]) != nir_type_float;
+        return ic && (ins->type == BI_CMP);
+}
+
+/* No 8/16-bit IADD/ISUB on FMA */
+static bool
+bi_imath_small(bi_instruction *ins)
+{
+        bool sz = nir_alu_type_get_type_size(ins->src_types[0]) < 32;
+        return sz && (ins->type == BI_IMATH);
+}
+
+/* Lowers FMOV to ADD #0, since FMOV doesn't exist on the h/w and this is the
+ * latest time it's sane to lower (it's useful to distinguish before, but we'll
+ * need this handle during scheduling to ensure the ports get modeled
+ * correctly with respect to the new zero source) */
+
+static void
+bi_lower_fmov(bi_instruction *ins)
+{
+        if (ins->type != BI_FMOV)
+                return;
+
+        ins->type = BI_ADD;
+        ins->src[1] = BIR_INDEX_ZERO;
+        ins->src_types[1] = ins->src_types[0];
+}
+
 /* Eventually, we'll need a proper scheduling, grouping instructions
  * into clauses and ordering/assigning grouped instructions to the
  * appropriate FMA/ADD slots. Right now we do the dumbest possible
@@ -45,12 +152,26 @@ bi_schedule(bi_context *ctx)
                 list_inithead(&bblock->clauses);
 
                 bi_foreach_instr_in_block(bblock, ins) {
+                        /* Convenient time to lower */
+                        bi_lower_fmov(ins);
+
                         unsigned props = bi_class_props[ins->type];
 
                         bi_clause *u = rzalloc(ctx, bi_clause);
                         u->bundle_count = 1;
 
-                        if (props & BI_SCHED_FMA)
+                        /* Check for scheduling restrictions */
+
+                        bool can_fma = props & BI_SCHED_FMA;
+                        bool can_add = props & BI_SCHED_ADD;
+
+                        can_fma &= !bi_ambiguous_abs(ins);
+                        can_fma &= !bi_icmp(ins);
+                        can_fma &= !bi_imath_small(ins);
+
+                        assert(can_fma || can_add);
+
+                        if (can_fma)
                                 u->bundles[0].fma = ins;
                         else
                                 u->bundles[0].add = ins;
@@ -59,17 +180,31 @@ bi_schedule(bi_context *ctx)
 
                         if (is_first)
                                 is_first = false;
-                        else
+                        else {
+                                /* Rule: first instructions cannot have write barriers */
                                 u->dependencies |= (1 << last_id);
+                                u->data_register_write_barrier = true;
+                        }
+
+                        if (ins->type == BI_ATEST)
+                                u->dependencies |= (1 << 6);
+
+                        if (ins->type == BI_BLEND)
+                                u->dependencies |= (1 << 6) | (1 << 7);
 
                         ids = ids & 1;
                         last_id = u->scoreboard_id;
-                        u->back_to_back = true;
-                        u->data_register_write_barrier = true;
+                        u->back_to_back = false;
 
                         u->constant_count = 1;
                         u->constants[0] = ins->constant.u64;
 
+                        /* No indirect jumps yet */
+                        if (ins->type == BI_BRANCH)
+                                u->branch_constant = true;
+
+                        u->clause_type = bi_clause_type_for_ins(ins);
+
                         list_addtail(&u->link, &bblock->clauses);
                 }