From bd19e7634027036dfc67633579750f1d45a45b74 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 30 Mar 2020 12:25:20 -0400 Subject: [PATCH] pan/bi: Handle fp16/abs scheduling restriction See previous commit for the packing side. Here we update the scheduler to accomodate this. Note we don't actually hit this path yet, but it's good to be proactive. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/bi_schedule.c | 29 ++++++++++++++++++++++++++++- src/panfrost/bifrost/bi_tables.c | 4 ++-- src/panfrost/bifrost/compiler.h | 3 +++ 3 files changed, 33 insertions(+), 3 deletions(-) diff --git a/src/panfrost/bifrost/bi_schedule.c b/src/panfrost/bifrost/bi_schedule.c index f112553c05a..0f72fc12713 100644 --- a/src/panfrost/bifrost/bi_schedule.c +++ b/src/panfrost/bifrost/bi_schedule.c @@ -71,6 +71,24 @@ bi_clause_type_for_ins(bi_instruction *ins) } } +/* There is an encoding restriction against FMA fp16 add/min/max + * having both sources with abs(..) with a duplicated source. This is + * due to the packing being order-sensitive, so the ports must end up distinct + * to handle both having abs(..). The swizzle doesn't matter here. Note + * BIR_INDEX_REGISTER generally should not be used pre-schedule (TODO: enforce + * this). + */ + +static bool +bi_ambiguous_abs(bi_instruction *ins) +{ + bool classy = bi_class_props[ins->type] & BI_NO_ABS_ABS_FP16_FMA; + bool typey = ins->dest_type == nir_type_float16; + bool absy = ins->src_abs[0] && ins->src_abs[1]; + + return classy && typey && absy; +} + /* Eventually, we'll need a proper scheduling, grouping instructions * into clauses and ordering/assigning grouped instructions to the * appropriate FMA/ADD slots. Right now we do the dumbest possible @@ -95,7 +113,16 @@ bi_schedule(bi_context *ctx) bi_clause *u = rzalloc(ctx, bi_clause); u->bundle_count = 1; - if (props & BI_SCHED_FMA) + /* Check for scheduling restrictions */ + + bool can_fma = props & BI_SCHED_FMA; + bool can_add = props & BI_SCHED_ADD; + + can_fma &= !bi_ambiguous_abs(ins); + + assert(can_fma || can_add); + + if (can_fma) u->bundles[0].fma = ins; else u->bundles[0].add = ins; diff --git a/src/panfrost/bifrost/bi_tables.c b/src/panfrost/bifrost/bi_tables.c index 3926afc4335..04beb55c71a 100644 --- a/src/panfrost/bifrost/bi_tables.c +++ b/src/panfrost/bifrost/bi_tables.c @@ -27,7 +27,7 @@ #include "compiler.h" unsigned bi_class_props[BI_NUM_CLASSES] = { - [BI_ADD] = BI_GENERIC | BI_MODS | BI_SCHED_ALL, + [BI_ADD] = BI_GENERIC | BI_MODS | BI_SCHED_ALL | BI_NO_ABS_ABS_FP16_FMA, [BI_ATEST] = BI_SCHED_HI_LATENCY | BI_SCHED_ADD, [BI_BRANCH] = BI_SCHED_HI_LATENCY | BI_SCHED_ADD, [BI_CMP] = BI_GENERIC | BI_MODS | BI_SCHED_ALL, @@ -45,7 +45,7 @@ unsigned bi_class_props[BI_NUM_CLASSES] = { [BI_LOAD_ATTR] = BI_SCHED_HI_LATENCY | BI_SCHED_ADD | BI_VECTOR | BI_DATA_REG_DEST, [BI_LOAD_VAR] = BI_SCHED_HI_LATENCY | BI_SCHED_ADD | BI_VECTOR | BI_DATA_REG_DEST, [BI_LOAD_VAR_ADDRESS] = BI_SCHED_HI_LATENCY | BI_SCHED_ADD | BI_DATA_REG_DEST, - [BI_MINMAX] = BI_GENERIC | BI_SCHED_ALL, + [BI_MINMAX] = BI_GENERIC | BI_SCHED_ALL | BI_NO_ABS_ABS_FP16_FMA, [BI_MOV] = BI_SCHED_ALL, [BI_FMOV] = BI_MODS | BI_SCHED_ALL, [BI_SHIFT] = BI_SCHED_ALL, diff --git a/src/panfrost/bifrost/compiler.h b/src/panfrost/bifrost/compiler.h index 554cd4e93d7..9cfd0c67d15 100644 --- a/src/panfrost/bifrost/compiler.h +++ b/src/panfrost/bifrost/compiler.h @@ -121,6 +121,9 @@ extern unsigned bi_class_props[BI_NUM_CLASSES]; #define BI_DATA_REG_SRC (1 << 9) #define BI_DATA_REG_DEST (1 << 10) +/* Quirk: cannot encode multiple abs on FMA in fp16 mode */ +#define BI_NO_ABS_ABS_FP16_FMA (1 << 11) + /* It can't get any worse than csel4... can it? */ #define BIR_SRC_COUNT 4 -- 2.30.2