* creating duplicate variants..
*/
- if (ir3_key_lowers_nir(&so->key)) {
- nir_shader *s = nir_shader_clone(ctx, so->shader->nir);
- ctx->s = ir3_optimize_nir(so->shader, s, &so->key);
- } else {
- /* fast-path for shader key that lowers nothing in NIR: */
- ctx->s = nir_shader_clone(ctx, so->shader->nir);
- }
+ ctx->s = nir_shader_clone(ctx, so->shader->nir);
+ if (ir3_key_lowers_nir(&so->key))
+ ir3_optimize_nir(so->shader, ctx->s, &so->key);
/* this needs to be the last pass run, so do this here instead of
* in ir3_optimize_nir():
* However, we want a final swing of a few passes to have a chance
* at optimizing the result.
*/
- bool progress;
+ bool progress = false;
NIR_PASS(progress, ctx->s, ir3_nir_lower_imul);
if (progress) {
NIR_PASS_V(ctx->s, nir_opt_algebraic);
NIR_PASS_V(ctx->s, nir_opt_constant_folding);
}
- NIR_PASS_V(ctx->s, nir_convert_from_ssa, true);
+ /* Enable the texture pre-fetch feature only a4xx onwards. But
+ * only enable it on generations that have been tested:
+ */
+ if ((so->type == MESA_SHADER_FRAGMENT) && (compiler->gpu_id >= 600))
+ NIR_PASS_V(ctx->s, ir3_nir_lower_tex_prefetch);
- if (ir3_shader_debug & IR3_DBG_DISASM) {
- DBG("dump nir%dv%d: type=%d, k={cts=%u,hp=%u}",
- so->shader->id, so->id, so->type,
- so->key.color_two_side, so->key.half_precision);
- nir_print_shader(ctx->s, stdout);
- }
+ NIR_PASS_V(ctx->s, nir_convert_from_ssa, true);
if (shader_debug_enabled(so->type)) {
- fprintf(stderr, "NIR (final form) for %s shader:\n",
- _mesa_shader_stage_to_string(so->type));
- nir_print_shader(ctx->s, stderr);
+ fprintf(stdout, "NIR (final form) for %s shader %s:\n",
+ ir3_shader_stage(so), so->shader->nir->info.name);
+ nir_print_shader(ctx->s, stdout);
}
ir3_ibo_mapping_init(&so->image_mapping, ctx->s->info.num_textures);
for (unsigned i = 0; i < ctx->last_dst_n; i++) {
struct ir3_instruction *dst = ctx->last_dst[i];
dst->regs[0]->flags |= IR3_REG_HALF;
- if (ctx->last_dst[i]->opc == OPC_META_FO)
+ if (ctx->last_dst[i]->opc == OPC_META_SPLIT)
dst->regs[1]->instr->regs[0]->flags |= IR3_REG_HALF;
}
}
ctx->last_dst_n = 0;
}
+static unsigned
+dest_flags(struct ir3_instruction *instr)
+{
+ return instr->regs[0]->flags & (IR3_REG_HALF | IR3_REG_HIGH);
+}
+
struct ir3_instruction *
ir3_create_collect(struct ir3_context *ctx, struct ir3_instruction *const *arr,
unsigned arrsz)
if (arrsz == 0)
return NULL;
- unsigned flags = arr[0]->regs[0]->flags & IR3_REG_HALF;
+ unsigned flags = dest_flags(arr[0]);
- collect = ir3_instr_create2(block, OPC_META_FI, 1 + arrsz);
- ir3_reg_create(collect, 0, flags); /* dst */
+ collect = ir3_instr_create2(block, OPC_META_COLLECT, 1 + arrsz);
+ __ssa_dst(collect)->flags |= flags;
for (unsigned i = 0; i < arrsz; i++) {
struct ir3_instruction *elem = arr[i];
elem = ir3_MOV(block, elem, type);
}
- compile_assert(ctx, (elem->regs[0]->flags & IR3_REG_HALF) == flags);
- ir3_reg_create(collect, 0, IR3_REG_SSA | flags)->instr = elem;
+ compile_assert(ctx, dest_flags(elem) == flags);
+ __ssa_src(collect, elem, flags);
}
collect->regs[0]->wrmask = MASK(arrsz);
}
/* helper for instructions that produce multiple consecutive scalar
- * outputs which need to have a split/fanout meta instruction inserted
+ * outputs which need to have a split meta instruction inserted
*/
void
ir3_split_dest(struct ir3_block *block, struct ir3_instruction **dst,
return;
}
- unsigned flags = src->regs[0]->flags & (IR3_REG_HALF | IR3_REG_HIGH);
+ unsigned flags = dest_flags(src);
for (int i = 0, j = 0; i < n; i++) {
- struct ir3_instruction *split = ir3_instr_create(block, OPC_META_FO);
- ir3_reg_create(split, 0, IR3_REG_SSA | flags);
- ir3_reg_create(split, 0, IR3_REG_SSA | flags)->instr = src;
- split->fo.off = i + base;
+ struct ir3_instruction *split =
+ ir3_instr_create(block, OPC_META_SPLIT);
+ __ssa_dst(split)->flags |= flags;
+ __ssa_src(split, src, flags);
+ split->split.off = i + base;
if (prev) {
split->cp.left = prev;
instr = ir3_MOV(block, instr, TYPE_S16);
instr->regs[0]->num = regid(REG_A0, 0);
+ instr->regs[0]->flags &= ~IR3_REG_SSA;
instr->regs[0]->flags |= IR3_REG_HALF;
instr->regs[1]->flags |= IR3_REG_HALF;
/* condition always goes in predicate register: */
cond->regs[0]->num = regid(REG_P0, 0);
+ cond->regs[0]->flags &= ~IR3_REG_SSA;
return cond;
}
struct ir3_array *
ir3_get_array(struct ir3_context *ctx, nir_register *reg)
{
- list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) {
+ foreach_array (arr, &ctx->ir->array_list) {
if (arr->r == reg)
return arr;
}
mov->barrier_class = IR3_BARRIER_ARRAY_R;
mov->barrier_conflict = IR3_BARRIER_ARRAY_W;
- ir3_reg_create(mov, 0, flags);
+ __ssa_dst(mov)->flags |= flags;
src = ir3_reg_create(mov, 0, IR3_REG_ARRAY |
COND(address, IR3_REG_RELATIV) | flags);
src->instr = arr->last_write;
/* if not relative store, don't create an extra mov, since that
* ends up being difficult for cp to remove.
+ *
+ * Also, don't skip the mov if the src is meta (like fanout/split),
+ * since that creates a situation that RA can't really handle properly.
*/
- if (!address) {
+ if (!address && !is_meta(src)) {
dst = src->regs[0];
src->barrier_class |= IR3_BARRIER_ARRAY_W;