#include "util/bitset.h"
#include "ir3.h"
-#include "ir3_compiler.h"
+#include "ir3_shader.h"
#include "ir3_ra.h"
ctx->hr0_xyz_nodes = ctx->alloc_count;
ctx->alloc_count += 3;
+ /* Add vreg name for prefetch-exclusion range: */
+ ctx->prefetch_exclude_node = ctx->alloc_count++;
+
ctx->g = ra_alloc_interference_graph(ctx->set->regs, ctx->alloc_count);
ralloc_steal(ctx->g, ctx->instrd);
ctx->def = rzalloc_array(ctx->g, unsigned, ctx->alloc_count);
ctx->use = rzalloc_array(ctx->g, unsigned, ctx->alloc_count);
/* TODO add selector callback for split (pre-a6xx) register file: */
- if (ctx->ir->compiler->gpu_id >= 600) {
+ if (ctx->v->mergedregs) {
ra_set_select_reg_callback(ctx->g, ra_select_reg_merged, ctx);
if (ctx->scalar_pass) {
*/
if (is_tex_or_prefetch(instr)) {
int writemask_skipped_regs = ffs(instr->regs[0]->wrmask) - 1;
- int r0_xyz = (instr->regs[0]->flags & IR3_REG_HALF) ?
+ int r0_xyz = is_half(instr) ?
ctx->hr0_xyz_nodes : ctx->r0_xyz_nodes;
for (int i = 0; i < writemask_skipped_regs; i++)
ra_add_node_interference(ctx->g, name, r0_xyz + i);
}
+
+ /* Pre-fetched textures have a lower limit for bits to encode dst
+ * register, so add additional interference with registers above
+ * that limit.
+ */
+ if (instr->opc == OPC_META_TEX_PREFETCH) {
+ ra_add_node_interference(ctx->g, name,
+ ctx->prefetch_exclude_node);
+ }
}
foreach_use (name, ctx, instr) {
arr->end_ip = 0;
}
-
/* set up the r0.xyz precolor regs. */
for (int i = 0; i < 3; i++) {
ra_set_node_reg(ctx->g, ctx->r0_xyz_nodes + i, i);
ctx->set->first_half_reg + i);
}
+ /* pre-color node that conflict with half/full regs higher than what
+ * can be encoded for tex-prefetch:
+ */
+ ra_set_node_reg(ctx->g, ctx->prefetch_exclude_node,
+ ctx->set->prefetch_exclude_reg);
+
/* compute live ranges (use/def) on a block level, also updating
* block's def/use bitmasks (used below to calculate per-block
* livein/liveout):
}
}
-/* some instructions need fix-up if dst register is half precision: */
-static void fixup_half_instr_dst(struct ir3_instruction *instr)
-{
- switch (opc_cat(instr->opc)) {
- case 1: /* move instructions */
- instr->cat1.dst_type = half_type(instr->cat1.dst_type);
- break;
- case 4:
- switch (instr->opc) {
- case OPC_RSQ:
- instr->opc = OPC_HRSQ;
- break;
- case OPC_LOG2:
- instr->opc = OPC_HLOG2;
- break;
- case OPC_EXP2:
- instr->opc = OPC_HEXP2;
- break;
- default:
- break;
- }
- break;
- case 5:
- instr->cat5.type = half_type(instr->cat5.type);
- break;
- }
-}
-/* some instructions need fix-up if src register is half precision: */
-static void fixup_half_instr_src(struct ir3_instruction *instr)
-{
- switch (instr->opc) {
- case OPC_MOV:
- instr->cat1.src_type = half_type(instr->cat1.src_type);
- break;
- case OPC_MAD_F32:
- instr->opc = OPC_MAD_F16;
- break;
- case OPC_SEL_B32:
- instr->opc = OPC_SEL_B16;
- break;
- case OPC_SEL_S32:
- instr->opc = OPC_SEL_S16;
- break;
- case OPC_SEL_F32:
- instr->opc = OPC_SEL_F16;
- break;
- case OPC_SAD_S32:
- instr->opc = OPC_SAD_S16;
- break;
- default:
- break;
- }
-}
-
/* NOTE: instr could be NULL for IR3_REG_ARRAY case, for the first
* array access(es) which do not have any previous access to depend
* on from scheduling point of view
if (writes_gpr(instr)) {
if (should_assign(ctx, instr)) {
reg_assign(ctx, instr->regs[0], instr);
- if (instr->regs[0]->flags & IR3_REG_HALF)
- fixup_half_instr_dst(instr);
}
}
/* Note: reg->instr could be null for IR3_REG_ARRAY */
if (src || (reg->flags & IR3_REG_ARRAY))
reg_assign(ctx, instr->regs[n+1], src);
-
- if (instr->regs[n+1]->flags & IR3_REG_HALF)
- fixup_half_instr_src(instr);
}
}
debug_assert(!(instr->regs[0]->flags & (IR3_REG_HALF | IR3_REG_HIGH)));
- /* only consider the first component: */
- if (id->off > 0)
- continue;
-
- if (ctx->scalar_pass && !should_assign(ctx, instr))
- continue;
-
/* 'base' is in scalar (class 0) but we need to map that
* the conflicting register of the appropriate class (ie.
* input could be vec2/vec3/etc)
* .. and so on..
*/
unsigned regid = instr->regs[0]->num;
+ assert(regid >= id->off);
+ regid -= id->off;
+
unsigned reg = ctx->set->gpr_to_ra_reg[id->cls][regid];
unsigned name = ra_name(ctx, id);
ra_set_node_reg(ctx->g, name, reg);
struct ir3_ra_ctx ctx = {
.v = v,
.ir = v->ir,
- .set = v->ir->compiler->set,
+ .set = v->mergedregs ?
+ v->ir->compiler->mergedregs_set : v->ir->compiler->set,
.scalar_pass = scalar_pass,
};
int ret;