ctx->hr0_xyz_nodes = ctx->alloc_count;
ctx->alloc_count += 3;
+ /* Add vreg name for prefetch-exclusion range: */
+ ctx->prefetch_exclude_node = ctx->alloc_count++;
+
ctx->g = ra_alloc_interference_graph(ctx->set->regs, ctx->alloc_count);
ralloc_steal(ctx->g, ctx->instrd);
ctx->def = rzalloc_array(ctx->g, unsigned, ctx->alloc_count);
*/
if (is_tex_or_prefetch(instr)) {
int writemask_skipped_regs = ffs(instr->regs[0]->wrmask) - 1;
- int r0_xyz = (instr->regs[0]->flags & IR3_REG_HALF) ?
+ int r0_xyz = is_half(instr) ?
ctx->hr0_xyz_nodes : ctx->r0_xyz_nodes;
for (int i = 0; i < writemask_skipped_regs; i++)
ra_add_node_interference(ctx->g, name, r0_xyz + i);
}
+
+ /* Pre-fetched textures have a lower limit for bits to encode dst
+ * register, so add additional interference with registers above
+ * that limit.
+ */
+ if (instr->opc == OPC_META_TEX_PREFETCH) {
+ ra_add_node_interference(ctx->g, name,
+ ctx->prefetch_exclude_node);
+ }
}
foreach_use (name, ctx, instr) {
arr->end_ip = 0;
}
-
/* set up the r0.xyz precolor regs. */
for (int i = 0; i < 3; i++) {
ra_set_node_reg(ctx->g, ctx->r0_xyz_nodes + i, i);
ctx->set->first_half_reg + i);
}
+ /* pre-color node that conflict with half/full regs higher than what
+ * can be encoded for tex-prefetch:
+ */
+ ra_set_node_reg(ctx->g, ctx->prefetch_exclude_node,
+ ctx->set->prefetch_exclude_reg);
+
/* compute live ranges (use/def) on a block level, also updating
* block's def/use bitmasks (used below to calculate per-block
* livein/liveout):
unsigned int half_classes[half_class_count];
unsigned int high_classes[high_class_count];
+ /* pre-fetched tex dst is limited, on current gens to regs
+ * 0x3f and below. An additional register class, with one
+ * vreg, that is setup to conflict with any regs above that
+ * limit.
+ */
+ unsigned prefetch_exclude_class;
+ unsigned prefetch_exclude_reg;
+
/* The virtual register space flattens out all the classes,
* starting with full, followed by half and then high, ie:
*
unsigned alloc_count;
unsigned r0_xyz_nodes; /* ra node numbers for r0.[xyz] precolors */
- unsigned hr0_xyz_nodes; /* ra node numbers for hr0.[xyz] precolors pre-a6xx */
+ unsigned hr0_xyz_nodes; /* ra node numbers for hr0.[xyz] precolors */
+ unsigned prefetch_exclude_node;
/* one per class, plus one slot for arrays: */
unsigned class_alloc_count[total_class_count + 1];
unsigned class_base[total_class_count + 1];
reg++;
}
}
+
+ /*
+ * Setup conflicts with registers over 0x3f for the special vreg
+ * that exists to use as interference for tex-prefetch:
+ */
+
+ for (unsigned i = 0x40; i < CLASS_REGS(0); i++) {
+ ra_add_transitive_reg_conflict(set->regs, i,
+ set->prefetch_exclude_reg);
+ }
+
+ for (unsigned i = 0x40; i < HALF_CLASS_REGS(0); i++) {
+ ra_add_transitive_reg_conflict(set->regs, i + set->first_half_reg,
+ set->prefetch_exclude_reg);
+ }
}
/* One-time setup of RA register-set, which describes all the possible
for (unsigned i = 0; i < high_class_count; i++)
ra_reg_count += HIGH_CLASS_REGS(i);
+ ra_reg_count += 1; /* for tex-prefetch excludes */
+
/* allocate the reg-set.. */
set->regs = ra_alloc_reg_set(set, ra_reg_count, true);
set->ra_reg_to_gpr = ralloc_array(set, uint16_t, ra_reg_count);
}
}
- /* starting a6xx, half precision regs conflict w/ full precision regs: */
+ /*
+ * Setup an additional class, with one vreg, to simply conflict
+ * with registers that are too high to encode tex-prefetch. This
+ * vreg is only used to setup additional conflicts so that RA
+ * knows to allocate prefetch dst regs below the limit:
+ */
+ set->prefetch_exclude_class = ra_alloc_reg_class(set->regs);
+ ra_class_add_reg(set->regs, set->prefetch_exclude_class, reg);
+ set->prefetch_exclude_reg = reg++;
+
+ /*
+ * And finally setup conflicts. Starting a6xx, half precision regs
+ * conflict w/ full precision regs (when using MERGEDREGS):
+ */
if (compiler->gpu_id >= 600) {
for (unsigned i = 0; i < CLASS_REGS(0) / 2; i++) {
unsigned freg = set->gpr_to_ra_reg[0][i];