pan/bi: Set branch_constant if there is a branch
[mesa.git] / src / panfrost / bifrost / bi_schedule.c
index f112553c05a1065434d9f7a1437e89bfd1ed28d9..f6652aa11352551efbb5ba01affa14ce51935a09 100644 (file)
 
 /* Finds the clause type required or return none */
 
+static bool
+bi_is_fragz(bi_instruction *ins)
+{
+        if (!(ins->src[0] & BIR_INDEX_CONSTANT))
+                return false;
+
+        return (ins->constant.u32 == BIFROST_FRAGZ);
+}
+
 static enum bifrost_clause_type
 bi_clause_type_for_ins(bi_instruction *ins)
 {
@@ -43,6 +52,9 @@ bi_clause_type_for_ins(bi_instruction *ins)
                 return BIFROST_CLAUSE_NONE;
 
         case BI_LOAD_VAR:
+                if (bi_is_fragz(ins))
+                        return BIFROST_CLAUSE_FRAGZ;
+
                 return BIFROST_CLAUSE_LOAD_VARY;
 
         case BI_LOAD_UNIFORM:
@@ -71,6 +83,56 @@ bi_clause_type_for_ins(bi_instruction *ins)
         }
 }
 
+/* There is an encoding restriction against FMA fp16 add/min/max
+ * having both sources with abs(..) with a duplicated source. This is
+ * due to the packing being order-sensitive, so the ports must end up distinct
+ * to handle both having abs(..). The swizzle doesn't matter here. Note
+ * BIR_INDEX_REGISTER generally should not be used pre-schedule (TODO: enforce
+ * this).
+ */
+
+static bool
+bi_ambiguous_abs(bi_instruction *ins)
+{
+        bool classy = bi_class_props[ins->type] & BI_NO_ABS_ABS_FP16_FMA;
+        bool typey = ins->dest_type == nir_type_float16;
+        bool absy = ins->src_abs[0] && ins->src_abs[1];
+
+        return classy && typey && absy;
+}
+
+/* New Bifrost (which?) don't seem to have ICMP on FMA */
+static bool
+bi_icmp(bi_instruction *ins)
+{
+        bool ic = nir_alu_type_get_base_type(ins->src_types[0]) != nir_type_float;
+        return ic && (ins->type == BI_CMP);
+}
+
+/* No 8/16-bit IADD/ISUB on FMA */
+static bool
+bi_imath_small(bi_instruction *ins)
+{
+        bool sz = nir_alu_type_get_type_size(ins->src_types[0]) < 32;
+        return sz && (ins->type == BI_IMATH);
+}
+
+/* Lowers FMOV to ADD #0, since FMOV doesn't exist on the h/w and this is the
+ * latest time it's sane to lower (it's useful to distinguish before, but we'll
+ * need this handle during scheduling to ensure the ports get modeled
+ * correctly with respect to the new zero source) */
+
+static void
+bi_lower_fmov(bi_instruction *ins)
+{
+        if (ins->type != BI_FMOV)
+                return;
+
+        ins->type = BI_ADD;
+        ins->src[1] = BIR_INDEX_ZERO;
+        ins->src_types[1] = ins->src_types[0];
+}
+
 /* Eventually, we'll need a proper scheduling, grouping instructions
  * into clauses and ordering/assigning grouped instructions to the
  * appropriate FMA/ADD slots. Right now we do the dumbest possible
@@ -90,12 +152,26 @@ bi_schedule(bi_context *ctx)
                 list_inithead(&bblock->clauses);
 
                 bi_foreach_instr_in_block(bblock, ins) {
+                        /* Convenient time to lower */
+                        bi_lower_fmov(ins);
+
                         unsigned props = bi_class_props[ins->type];
 
                         bi_clause *u = rzalloc(ctx, bi_clause);
                         u->bundle_count = 1;
 
-                        if (props & BI_SCHED_FMA)
+                        /* Check for scheduling restrictions */
+
+                        bool can_fma = props & BI_SCHED_FMA;
+                        bool can_add = props & BI_SCHED_ADD;
+
+                        can_fma &= !bi_ambiguous_abs(ins);
+                        can_fma &= !bi_icmp(ins);
+                        can_fma &= !bi_imath_small(ins);
+
+                        assert(can_fma || can_add);
+
+                        if (can_fma)
                                 u->bundles[0].fma = ins;
                         else
                                 u->bundles[0].add = ins;
@@ -110,13 +186,23 @@ bi_schedule(bi_context *ctx)
                                 u->data_register_write_barrier = true;
                         }
 
+                        if (ins->type == BI_ATEST)
+                                u->dependencies |= (1 << 6);
+
+                        if (ins->type == BI_BLEND)
+                                u->dependencies |= (1 << 6) | (1 << 7);
+
                         ids = ids & 1;
                         last_id = u->scoreboard_id;
-                        u->back_to_back = true;
+                        u->back_to_back = false;
 
                         u->constant_count = 1;
                         u->constants[0] = ins->constant.u64;
 
+                        /* No indirect jumps yet */
+                        if (ins->type == BI_BRANCH)
+                                u->branch_constant = true;
+
                         u->clause_type = bi_clause_type_for_ins(ins);
 
                         list_addtail(&u->link, &bblock->clauses);