/* meta instructions (category -1): */
/* placeholder instr to mark shader inputs: */
OPC_META_INPUT = _OPC(-1, 0),
- /* The "fan-in" and "fan-out" instructions are used for keeping
+ /* The "collect" and "split" instructions are used for keeping
* track of instructions that write to multiple dst registers
- * (fan-out) like texture sample instructions, or read multiple
- * consecutive scalar registers (fan-in) (bary.f, texture samp)
+ * (split) like texture sample instructions, or read multiple
+ * consecutive scalar registers (collect) (bary.f, texture samp)
+ *
+ * A "split" extracts a scalar component from a vecN, and a
+ * "collect" gathers multiple scalar components into a vecN
*/
- OPC_META_FO = _OPC(-1, 2),
- OPC_META_FI = _OPC(-1, 3),
+ OPC_META_SPLIT = _OPC(-1, 2),
+ OPC_META_COLLECT = _OPC(-1, 3),
/* placeholder for texture fetches that run before FS invocation
* starts:
*/
struct {
int off; /* component/offset */
- } fo;
+ } split;
struct {
unsigned samp, tex;
unsigned input_offset;
int sun; /* Sethi–Ullman number, used by sched */
int use_count; /* currently just updated/used by cp */
- /* Used during CP and RA stages. For fanin and shader inputs/
+ /* Used during CP and RA stages. For collect and shader inputs/
* outputs where we need a sequence of consecutive registers,
* keep track of each src instructions left (ie 'n-1') and right
* (ie 'n+1') neighbor. The front-end must insert enough mov's
* it should be overkill.. the problem is if, potentially after
* already eliminating some mov's, if you have a single mov that
* needs to be grouped with it's neighbors in two different
- * places (ex. shader output and a fanin).
+ * places (ex. shader output and a collect).
*/
struct {
struct ir3_instruction *left, *right;
/* extract back out the 'dummy' which serves as stand-in for dest: */
struct ir3_instruction *src = ssa(atomic->regs[3]);
- debug_assert(src->opc == OPC_META_FI);
+ debug_assert(src->opc == OPC_META_COLLECT);
struct ir3_instruction *dummy = ssa(src->regs[1]);
struct ir3_instruction *mov = ir3_MOV(atomic->block, dummy, TYPE_U32);
for (unsigned i = 0; i < ctx->last_dst_n; i++) {
struct ir3_instruction *dst = ctx->last_dst[i];
dst->regs[0]->flags |= IR3_REG_HALF;
- if (ctx->last_dst[i]->opc == OPC_META_FO)
+ if (ctx->last_dst[i]->opc == OPC_META_SPLIT)
dst->regs[1]->instr->regs[0]->flags |= IR3_REG_HALF;
}
}
unsigned flags = dest_flags(arr[0]);
- collect = ir3_instr_create2(block, OPC_META_FI, 1 + arrsz);
+ collect = ir3_instr_create2(block, OPC_META_COLLECT, 1 + arrsz);
__ssa_dst(collect)->flags |= flags;
for (unsigned i = 0; i < arrsz; i++) {
struct ir3_instruction *elem = arr[i];
}
/* helper for instructions that produce multiple consecutive scalar
- * outputs which need to have a split/fanout meta instruction inserted
+ * outputs which need to have a split meta instruction inserted
*/
void
ir3_split_dest(struct ir3_block *block, struct ir3_instruction **dst,
unsigned flags = dest_flags(src);
for (int i = 0, j = 0; i < n; i++) {
- struct ir3_instruction *split = ir3_instr_create(block, OPC_META_FO);
+ struct ir3_instruction *split =
+ ir3_instr_create(block, OPC_META_SPLIT);
__ssa_dst(split)->flags |= flags;
__ssa_src(split, src, flags);
- split->fo.off = i + base;
+ split->split.off = i + base;
if (prev) {
split->cp.left = prev;
* We could possibly do a bit better, and copy-propagation if
* we can CP all components that are being fanned out.
*/
- if (src_instr->opc == OPC_META_FO) {
+ if (src_instr->opc == OPC_META_SPLIT) {
if (!dst_instr)
return false;
- if (dst_instr->opc == OPC_META_FI)
+ if (dst_instr->opc == OPC_META_COLLECT)
return false;
if (dst_instr->cp.left || dst_instr->cp.right)
return false;
*/
if (is_tex(instr) && (instr->flags & IR3_INSTR_S2EN) &&
!(ir3_shader_debug & IR3_DBG_FORCES2EN)) {
- /* The first src will be a fan-in (collect), if both of it's
+ /* The first src will be a collect, if both of it's
* two sources are mov from imm, then we can
*/
struct ir3_instruction *samp_tex = ssa(instr->regs[1]);
- debug_assert(samp_tex->opc == OPC_META_FI);
+ debug_assert(samp_tex->opc == OPC_META_COLLECT);
struct ir3_instruction *samp = ssa(samp_tex->regs[1]);
struct ir3_instruction *tex = ssa(samp_tex->regs[2]);
if (instr->opc == OPC_END || instr->opc == OPC_CHSH || instr->opc == OPC_CHMASK)
continue;
if (instr->flags & IR3_INSTR_UNUSED) {
- if (instr->opc == OPC_META_FO) {
+ if (instr->opc == OPC_META_SPLIT) {
struct ir3_instruction *src = ssa(instr->regs[1]);
/* leave inputs alone.. we can't optimize out components of
* an input, since the hw is still going to be writing all
*/
if ((src->opc != OPC_META_INPUT) &&
(src->regs[0]->wrmask > 1)) {
- src->regs[0]->wrmask &= ~(1 << instr->fo.off);
+ src->regs[0]->wrmask &= ~(1 << instr->split.off);
/* prune no-longer needed right-neighbors. We could
* probably do the same for left-neighbors (ie. tex
if (ir3_instr_check_mark(instr))
return;
- if (instr->opc == OPC_META_FI)
+ if (instr->opc == OPC_META_COLLECT)
group_n(&instr_ops, instr, instr->regs_count - 1);
foreach_ssa_src(src, instr)
if (is_meta(instr)) {
switch (instr->opc) {
case OPC_META_INPUT: printf("_meta:in"); break;
- case OPC_META_FO: printf("_meta:fo"); break;
- case OPC_META_FI: printf("_meta:fi"); break;
+ case OPC_META_SPLIT: printf("_meta:split"); break;
+ case OPC_META_COLLECT: printf("_meta:collect"); break;
case OPC_META_TEX_PREFETCH: printf("_meta:tex_prefetch"); break;
/* shouldn't hit here.. just for debugging: */
printf("]");
}
- if (instr->opc == OPC_META_FO) {
- printf(", off=%d", instr->fo.off);
+ if (instr->opc == OPC_META_SPLIT) {
+ printf(", off=%d", instr->split.off);
} else if (instr->opc == OPC_META_TEX_PREFETCH) {
printf(", tex=%d, samp=%d, input_offset=%d", instr->prefetch.tex,
instr->prefetch.samp, instr->prefetch.input_offset);
return id->defn;
}
- if (instr->opc == OPC_META_FI) {
+ if (instr->opc == OPC_META_COLLECT) {
/* What about the case where collect is subset of array, we
* need to find the distance between where actual array starts
- * and fanin.. that probably doesn't happen currently.
+ * and collect.. that probably doesn't happen currently.
*/
struct ir3_register *src;
int dsz, doff;
/* by definition, the entire sequence forms one linked list
* of single scalar register nodes (even if some of them may
- * be fanouts from a texture sample (for example) instr. We
+ * be splits from a texture sample (for example) instr. We
* just need to walk the list finding the first element of
* the group defined (lowest ip)
*/
} else {
/* second case is looking directly at the instruction which
* produces multiple values (eg, texture sample), rather
- * than the fanout nodes that point back to that instruction.
+ * than the split nodes that point back to that instruction.
* This isn't quite right, because it may be part of a larger
* group, such as:
*
d = instr;
}
- if (d->opc == OPC_META_FO) {
+ if (d->opc == OPC_META_SPLIT) {
struct ir3_instruction *dd;
int dsz, doff;
*sz = MAX2(*sz, dsz);
- if (instr->opc == OPC_META_FO)
- *off = MAX2(*off, instr->fo.off);
+ if (instr->opc == OPC_META_SPLIT)
+ *off = MAX2(*off, instr->split.off);
d = dd;
}
- debug_assert(d->opc != OPC_META_FO);
+ debug_assert(d->opc != OPC_META_SPLIT);
id->defn = d;
id->sz = *sz;
/* There are a couple special cases to deal with here:
*
- * fanout: used to split values from a higher class to a lower
+ * split: used to split values from a higher class to a lower
* class, for example split the results of a texture fetch
* into individual scalar values; We skip over these from
* a 'def' perspective, and for a 'use' we walk the chain
* up to the defining instruction.
*
- * fanin: used to collect values from lower class and assemble
+ * collect: used to collect values from lower class and assemble
* them together into a higher class, for example arguments
* to texture sample instructions; We consider these to be
- * defined at the earliest fanin source.
+ * defined at the earliest collect source.
*
* Most of this is handled in the get_definer() helper.
*
continue;
if (instr->block != src->block)
continue;
- if ((src->opc == OPC_META_FI) || (src->opc == OPC_META_FO)) {
+ if ((src->opc == OPC_META_COLLECT) || (src->opc == OPC_META_SPLIT)) {
unuse_each_src(ctx, src);
} else {
debug_assert(src->use_count > 0);
static void
use_instr(struct ir3_instruction *instr)
{
- if ((instr->opc == OPC_META_FI) || (instr->opc == OPC_META_FO)) {
+ if ((instr->opc == OPC_META_COLLECT) || (instr->opc == OPC_META_SPLIT)) {
use_each_src(instr);
} else {
instr->use_count++;
static void
update_live_values(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
{
- if ((instr->opc == OPC_META_FI) || (instr->opc == OPC_META_FO))
+ if ((instr->opc == OPC_META_COLLECT) || (instr->opc == OPC_META_SPLIT))
return;
ctx->live_values += dest_regs(instr);
list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
- if ((instr->opc == OPC_META_FI) || (instr->opc == OPC_META_FO))
+ if ((instr->opc == OPC_META_COLLECT) || (instr->opc == OPC_META_SPLIT))
continue;
use_each_src(instr);
if (instr->block != src->block)
continue;
- /* for fanout/split, just pass things along to the real src: */
- if (src->opc == OPC_META_FO)
+ /* for split, just pass things along to the real src: */
+ if (src->opc == OPC_META_SPLIT)
src = ssa(src->regs[1]);
- /* for fanin/collect, if this is the last use of *each* src,
+ /* for collect, if this is the last use of *each* src,
* then it will decrease the live values, since RA treats
* them as a whole:
*/
- if (src->opc == OPC_META_FI) {
+ if (src->opc == OPC_META_COLLECT) {
struct ir3_instruction *src2;
bool last_use = true;