From 611258d5782c9b1c4d5e5b26f544d199404a511f Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 24 Oct 2019 10:22:33 -0700 Subject: [PATCH] freedreno/ir3: rename fanin/fanout to collect/split If I'm going to refactor a bit to use these meta instructions to also handle input/output, then might as well cleanup the names first. Nouveau also uses collect/split for names of these meta instructions, and I like those names better. Signed-off-by: Rob Clark Reviewed-by: Kristian H. Kristensen Reviewed-by: Eric Anholt --- src/freedreno/ir3/instr-a3xx.h | 13 ++++++++----- src/freedreno/ir3/ir3.h | 6 +++--- src/freedreno/ir3/ir3_a6xx.c | 2 +- src/freedreno/ir3/ir3_context.c | 11 ++++++----- src/freedreno/ir3/ir3_cp.c | 8 ++++---- src/freedreno/ir3/ir3_depth.c | 4 ++-- src/freedreno/ir3/ir3_group.c | 2 +- src/freedreno/ir3/ir3_print.c | 8 ++++---- src/freedreno/ir3/ir3_ra.c | 22 +++++++++++----------- src/freedreno/ir3/ir3_sched.c | 16 ++++++++-------- 10 files changed, 48 insertions(+), 44 deletions(-) diff --git a/src/freedreno/ir3/instr-a3xx.h b/src/freedreno/ir3/instr-a3xx.h index 30f5e37953c..4a2e9df64f3 100644 --- a/src/freedreno/ir3/instr-a3xx.h +++ b/src/freedreno/ir3/instr-a3xx.h @@ -214,13 +214,16 @@ typedef enum { /* meta instructions (category -1): */ /* placeholder instr to mark shader inputs: */ OPC_META_INPUT = _OPC(-1, 0), - /* The "fan-in" and "fan-out" instructions are used for keeping + /* The "collect" and "split" instructions are used for keeping * track of instructions that write to multiple dst registers - * (fan-out) like texture sample instructions, or read multiple - * consecutive scalar registers (fan-in) (bary.f, texture samp) + * (split) like texture sample instructions, or read multiple + * consecutive scalar registers (collect) (bary.f, texture samp) + * + * A "split" extracts a scalar component from a vecN, and a + * "collect" gathers multiple scalar components into a vecN */ - OPC_META_FO = _OPC(-1, 2), - OPC_META_FI = _OPC(-1, 3), + OPC_META_SPLIT = _OPC(-1, 2), + OPC_META_COLLECT = _OPC(-1, 3), /* placeholder for texture fetches that run before FS invocation * starts: diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index e1ec7579341..22802aecf18 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -267,7 +267,7 @@ struct ir3_instruction { */ struct { int off; /* component/offset */ - } fo; + } split; struct { unsigned samp, tex; unsigned input_offset; @@ -313,7 +313,7 @@ struct ir3_instruction { int sun; /* Sethi–Ullman number, used by sched */ int use_count; /* currently just updated/used by cp */ - /* Used during CP and RA stages. For fanin and shader inputs/ + /* Used during CP and RA stages. For collect and shader inputs/ * outputs where we need a sequence of consecutive registers, * keep track of each src instructions left (ie 'n-1') and right * (ie 'n+1') neighbor. The front-end must insert enough mov's @@ -333,7 +333,7 @@ struct ir3_instruction { * it should be overkill.. the problem is if, potentially after * already eliminating some mov's, if you have a single mov that * needs to be grouped with it's neighbors in two different - * places (ex. shader output and a fanin). + * places (ex. shader output and a collect). */ struct { struct ir3_instruction *left, *right; diff --git a/src/freedreno/ir3/ir3_a6xx.c b/src/freedreno/ir3/ir3_a6xx.c index 3f5127e72ee..4b2ecc00827 100644 --- a/src/freedreno/ir3/ir3_a6xx.c +++ b/src/freedreno/ir3/ir3_a6xx.c @@ -346,7 +346,7 @@ get_atomic_dest_mov(struct ir3_instruction *atomic) /* extract back out the 'dummy' which serves as stand-in for dest: */ struct ir3_instruction *src = ssa(atomic->regs[3]); - debug_assert(src->opc == OPC_META_FI); + debug_assert(src->opc == OPC_META_COLLECT); struct ir3_instruction *dummy = ssa(src->regs[1]); struct ir3_instruction *mov = ir3_MOV(atomic->block, dummy, TYPE_U32); diff --git a/src/freedreno/ir3/ir3_context.c b/src/freedreno/ir3/ir3_context.c index d3f41025b92..c358b37a896 100644 --- a/src/freedreno/ir3/ir3_context.c +++ b/src/freedreno/ir3/ir3_context.c @@ -209,7 +209,7 @@ ir3_put_dst(struct ir3_context *ctx, nir_dest *dst) for (unsigned i = 0; i < ctx->last_dst_n; i++) { struct ir3_instruction *dst = ctx->last_dst[i]; dst->regs[0]->flags |= IR3_REG_HALF; - if (ctx->last_dst[i]->opc == OPC_META_FO) + if (ctx->last_dst[i]->opc == OPC_META_SPLIT) dst->regs[1]->instr->regs[0]->flags |= IR3_REG_HALF; } } @@ -257,7 +257,7 @@ ir3_create_collect(struct ir3_context *ctx, struct ir3_instruction *const *arr, unsigned flags = dest_flags(arr[0]); - collect = ir3_instr_create2(block, OPC_META_FI, 1 + arrsz); + collect = ir3_instr_create2(block, OPC_META_COLLECT, 1 + arrsz); __ssa_dst(collect)->flags |= flags; for (unsigned i = 0; i < arrsz; i++) { struct ir3_instruction *elem = arr[i]; @@ -301,7 +301,7 @@ ir3_create_collect(struct ir3_context *ctx, struct ir3_instruction *const *arr, } /* helper for instructions that produce multiple consecutive scalar - * outputs which need to have a split/fanout meta instruction inserted + * outputs which need to have a split meta instruction inserted */ void ir3_split_dest(struct ir3_block *block, struct ir3_instruction **dst, @@ -317,10 +317,11 @@ ir3_split_dest(struct ir3_block *block, struct ir3_instruction **dst, unsigned flags = dest_flags(src); for (int i = 0, j = 0; i < n; i++) { - struct ir3_instruction *split = ir3_instr_create(block, OPC_META_FO); + struct ir3_instruction *split = + ir3_instr_create(block, OPC_META_SPLIT); __ssa_dst(split)->flags |= flags; __ssa_src(split, src, flags); - split->fo.off = i + base; + split->split.off = i + base; if (prev) { split->cp.left = prev; diff --git a/src/freedreno/ir3/ir3_cp.c b/src/freedreno/ir3/ir3_cp.c index 5cc3f833144..d07418419cd 100644 --- a/src/freedreno/ir3/ir3_cp.c +++ b/src/freedreno/ir3/ir3_cp.c @@ -85,10 +85,10 @@ static bool is_eligible_mov(struct ir3_instruction *instr, * We could possibly do a bit better, and copy-propagation if * we can CP all components that are being fanned out. */ - if (src_instr->opc == OPC_META_FO) { + if (src_instr->opc == OPC_META_SPLIT) { if (!dst_instr) return false; - if (dst_instr->opc == OPC_META_FI) + if (dst_instr->opc == OPC_META_COLLECT) return false; if (dst_instr->cp.left || dst_instr->cp.right) return false; @@ -706,12 +706,12 @@ instr_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr) */ if (is_tex(instr) && (instr->flags & IR3_INSTR_S2EN) && !(ir3_shader_debug & IR3_DBG_FORCES2EN)) { - /* The first src will be a fan-in (collect), if both of it's + /* The first src will be a collect, if both of it's * two sources are mov from imm, then we can */ struct ir3_instruction *samp_tex = ssa(instr->regs[1]); - debug_assert(samp_tex->opc == OPC_META_FI); + debug_assert(samp_tex->opc == OPC_META_COLLECT); struct ir3_instruction *samp = ssa(samp_tex->regs[1]); struct ir3_instruction *tex = ssa(samp_tex->regs[2]); diff --git a/src/freedreno/ir3/ir3_depth.c b/src/freedreno/ir3/ir3_depth.c index 49feda7b195..601ab427fb1 100644 --- a/src/freedreno/ir3/ir3_depth.c +++ b/src/freedreno/ir3/ir3_depth.c @@ -175,7 +175,7 @@ remove_unused_by_block(struct ir3_block *block) if (instr->opc == OPC_END || instr->opc == OPC_CHSH || instr->opc == OPC_CHMASK) continue; if (instr->flags & IR3_INSTR_UNUSED) { - if (instr->opc == OPC_META_FO) { + if (instr->opc == OPC_META_SPLIT) { struct ir3_instruction *src = ssa(instr->regs[1]); /* leave inputs alone.. we can't optimize out components of * an input, since the hw is still going to be writing all @@ -184,7 +184,7 @@ remove_unused_by_block(struct ir3_block *block) */ if ((src->opc != OPC_META_INPUT) && (src->regs[0]->wrmask > 1)) { - src->regs[0]->wrmask &= ~(1 << instr->fo.off); + src->regs[0]->wrmask &= ~(1 << instr->split.off); /* prune no-longer needed right-neighbors. We could * probably do the same for left-neighbors (ie. tex diff --git a/src/freedreno/ir3/ir3_group.c b/src/freedreno/ir3/ir3_group.c index bd7ef2b612e..e9eb1e58d47 100644 --- a/src/freedreno/ir3/ir3_group.c +++ b/src/freedreno/ir3/ir3_group.c @@ -185,7 +185,7 @@ instr_find_neighbors(struct ir3_instruction *instr) if (ir3_instr_check_mark(instr)) return; - if (instr->opc == OPC_META_FI) + if (instr->opc == OPC_META_COLLECT) group_n(&instr_ops, instr, instr->regs_count - 1); foreach_ssa_src(src, instr) diff --git a/src/freedreno/ir3/ir3_print.c b/src/freedreno/ir3/ir3_print.c index aa8ba95ce21..a4830ed0a17 100644 --- a/src/freedreno/ir3/ir3_print.c +++ b/src/freedreno/ir3/ir3_print.c @@ -95,8 +95,8 @@ static void print_instr_name(struct ir3_instruction *instr, bool flags) if (is_meta(instr)) { switch (instr->opc) { case OPC_META_INPUT: printf("_meta:in"); break; - case OPC_META_FO: printf("_meta:fo"); break; - case OPC_META_FI: printf("_meta:fi"); break; + case OPC_META_SPLIT: printf("_meta:split"); break; + case OPC_META_COLLECT: printf("_meta:collect"); break; case OPC_META_TEX_PREFETCH: printf("_meta:tex_prefetch"); break; /* shouldn't hit here.. just for debugging: */ @@ -237,8 +237,8 @@ print_instr(struct ir3_instruction *instr, int lvl) printf("]"); } - if (instr->opc == OPC_META_FO) { - printf(", off=%d", instr->fo.off); + if (instr->opc == OPC_META_SPLIT) { + printf(", off=%d", instr->split.off); } else if (instr->opc == OPC_META_TEX_PREFETCH) { printf(", tex=%d, samp=%d, input_offset=%d", instr->prefetch.tex, instr->prefetch.samp, instr->prefetch.input_offset); diff --git a/src/freedreno/ir3/ir3_ra.c b/src/freedreno/ir3/ir3_ra.c index 19596c73044..c3f04ff5aeb 100644 --- a/src/freedreno/ir3/ir3_ra.c +++ b/src/freedreno/ir3/ir3_ra.c @@ -421,10 +421,10 @@ get_definer(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr, return id->defn; } - if (instr->opc == OPC_META_FI) { + if (instr->opc == OPC_META_COLLECT) { /* What about the case where collect is subset of array, we * need to find the distance between where actual array starts - * and fanin.. that probably doesn't happen currently. + * and collect.. that probably doesn't happen currently. */ struct ir3_register *src; int dsz, doff; @@ -454,7 +454,7 @@ get_definer(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr, /* by definition, the entire sequence forms one linked list * of single scalar register nodes (even if some of them may - * be fanouts from a texture sample (for example) instr. We + * be splits from a texture sample (for example) instr. We * just need to walk the list finding the first element of * the group defined (lowest ip) */ @@ -480,7 +480,7 @@ get_definer(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr, } else { /* second case is looking directly at the instruction which * produces multiple values (eg, texture sample), rather - * than the fanout nodes that point back to that instruction. + * than the split nodes that point back to that instruction. * This isn't quite right, because it may be part of a larger * group, such as: * @@ -500,7 +500,7 @@ get_definer(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr, d = instr; } - if (d->opc == OPC_META_FO) { + if (d->opc == OPC_META_SPLIT) { struct ir3_instruction *dd; int dsz, doff; @@ -511,13 +511,13 @@ get_definer(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr, *sz = MAX2(*sz, dsz); - if (instr->opc == OPC_META_FO) - *off = MAX2(*off, instr->fo.off); + if (instr->opc == OPC_META_SPLIT) + *off = MAX2(*off, instr->split.off); d = dd; } - debug_assert(d->opc != OPC_META_FO); + debug_assert(d->opc != OPC_META_SPLIT); id->defn = d; id->sz = *sz; @@ -707,16 +707,16 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block) /* There are a couple special cases to deal with here: * - * fanout: used to split values from a higher class to a lower + * split: used to split values from a higher class to a lower * class, for example split the results of a texture fetch * into individual scalar values; We skip over these from * a 'def' perspective, and for a 'use' we walk the chain * up to the defining instruction. * - * fanin: used to collect values from lower class and assemble + * collect: used to collect values from lower class and assemble * them together into a higher class, for example arguments * to texture sample instructions; We consider these to be - * defined at the earliest fanin source. + * defined at the earliest collect source. * * Most of this is handled in the get_definer() helper. * diff --git a/src/freedreno/ir3/ir3_sched.c b/src/freedreno/ir3/ir3_sched.c index 34c648a6748..f5b9d3bd6f5 100644 --- a/src/freedreno/ir3/ir3_sched.c +++ b/src/freedreno/ir3/ir3_sched.c @@ -79,7 +79,7 @@ unuse_each_src(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr) continue; if (instr->block != src->block) continue; - if ((src->opc == OPC_META_FI) || (src->opc == OPC_META_FO)) { + if ((src->opc == OPC_META_COLLECT) || (src->opc == OPC_META_SPLIT)) { unuse_each_src(ctx, src); } else { debug_assert(src->use_count > 0); @@ -133,7 +133,7 @@ use_each_src(struct ir3_instruction *instr) static void use_instr(struct ir3_instruction *instr) { - if ((instr->opc == OPC_META_FI) || (instr->opc == OPC_META_FO)) { + if ((instr->opc == OPC_META_COLLECT) || (instr->opc == OPC_META_SPLIT)) { use_each_src(instr); } else { instr->use_count++; @@ -143,7 +143,7 @@ use_instr(struct ir3_instruction *instr) static void update_live_values(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr) { - if ((instr->opc == OPC_META_FI) || (instr->opc == OPC_META_FO)) + if ((instr->opc == OPC_META_COLLECT) || (instr->opc == OPC_META_SPLIT)) return; ctx->live_values += dest_regs(instr); @@ -161,7 +161,7 @@ update_use_count(struct ir3 *ir) list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { - if ((instr->opc == OPC_META_FI) || (instr->opc == OPC_META_FO)) + if ((instr->opc == OPC_META_COLLECT) || (instr->opc == OPC_META_SPLIT)) continue; use_each_src(instr); @@ -542,15 +542,15 @@ live_effect(struct ir3_instruction *instr) if (instr->block != src->block) continue; - /* for fanout/split, just pass things along to the real src: */ - if (src->opc == OPC_META_FO) + /* for split, just pass things along to the real src: */ + if (src->opc == OPC_META_SPLIT) src = ssa(src->regs[1]); - /* for fanin/collect, if this is the last use of *each* src, + /* for collect, if this is the last use of *each* src, * then it will decrease the live values, since RA treats * them as a whole: */ - if (src->opc == OPC_META_FI) { + if (src->opc == OPC_META_COLLECT) { struct ir3_instruction *src2; bool last_use = true; -- 2.30.2