cat0->sync = !!(instr->flags & IR3_INSTR_SY);
cat0->opc_cat = 0;
- if (instr->opc == OPC_CONDEND || instr->opc == OPC_ENDPATCH)
+ switch (instr->opc) {
+ case OPC_IF:
+ case OPC_ELSE:
+ case OPC_ENDIF:
cat0->dummy4 = 16;
+ break;
+ default:
+ break;
+ }
return 0;
}
struct ir3_register *src2;
instr_cat5_t *cat5 = ptr;
- iassert((instr->regs_count == 2) ||
- (instr->regs_count == 3) || (instr->regs_count == 4));
+ iassert((instr->regs_count == 1) ||
+ (instr->regs_count == 2) ||
+ (instr->regs_count == 3) ||
+ (instr->regs_count == 4));
- switch (instr->opc) {
- case OPC_DSX:
- case OPC_DSXPP_1:
- case OPC_DSY:
- case OPC_DSYPP_1:
- case OPC_RGETPOS:
- case OPC_RGETINFO:
- iassert((instr->flags & IR3_INSTR_S2EN) == 0);
- src1 = instr->regs[1];
- src2 = instr->regs_count > 2 ? instr->regs[2] : NULL;
- break;
- default:
+ if (instr->flags & IR3_INSTR_S2EN) {
src1 = instr->regs[2];
src2 = instr->regs_count > 3 ? instr->regs[3] : NULL;
- break;
+ } else {
+ src1 = instr->regs_count > 1 ? instr->regs[1] : NULL;
+ src2 = instr->regs_count > 2 ? instr->regs[2] : NULL;
}
assume(src1 || !src2);
cat5->src1 = reg(src1, info, instr->repeat, IR3_REG_HALF);
}
+ if (src2) {
+ iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF));
+ cat5->src2 = reg(src2, info, instr->repeat, IR3_REG_HALF);
+ }
+
if (instr->flags & IR3_INSTR_S2EN) {
struct ir3_register *samp_tex = instr->regs[1];
- if (src2) {
- iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF));
- cat5->s2en.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF);
- }
iassert(samp_tex->flags & IR3_REG_HALF);
- cat5->s2en.src3 = reg(samp_tex, info, instr->repeat, IR3_REG_HALF);
+ cat5->s2en_bindless.src3 = reg(samp_tex, info, instr->repeat, IR3_REG_HALF);
+ /* TODO: This should probably be CAT5_UNIFORM, at least on a6xx, as
+ * this is what the blob does and it is presumably faster, but first
+ * we should confirm it is actually nonuniform and figure out when the
+ * whole descriptor mode mechanism was introduced.
+ */
+ cat5->s2en_bindless.desc_mode = CAT5_NONUNIFORM;
iassert(!(instr->cat5.samp | instr->cat5.tex));
} else {
- if (src2) {
- iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF));
- cat5->norm.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF);
- }
cat5->norm.samp = instr->cat5.samp;
cat5->norm.tex = instr->cat5.tex;
}
cat5->is_3d = !!(instr->flags & IR3_INSTR_3D);
cat5->is_a = !!(instr->flags & IR3_INSTR_A);
cat5->is_s = !!(instr->flags & IR3_INSTR_S);
- cat5->is_s2en = !!(instr->flags & IR3_INSTR_S2EN);
+ cat5->is_s2en_bindless = !!(instr->flags & IR3_INSTR_S2EN);
cat5->is_o = !!(instr->flags & IR3_INSTR_O);
cat5->is_p = !!(instr->flags & IR3_INSTR_P);
cat5->opc = instr->opc;
case OPC_ATOMIC_OR:
case OPC_ATOMIC_XOR:
cat6->pad1 = 0x1;
- cat6->pad2 = 0xc;
- cat6->pad3 = 0x0;
- cat6->pad4 = 0x3;
+ cat6->pad3 = 0xc;
+ cat6->pad5 = 0x3;
break;
case OPC_STIB:
cat6->pad1 = 0x0;
- cat6->pad2 = 0xc;
- cat6->pad3 = 0x0;
- cat6->pad4 = 0x2;
+ cat6->pad3 = 0xc;
+ cat6->pad5 = 0x2;
break;
case OPC_LDIB:
cat6->pad1 = 0x1;
- cat6->pad2 = 0xc;
- cat6->pad3 = 0x0;
- cat6->pad4 = 0x2;
+ cat6->pad3 = 0xc;
+ cat6->pad5 = 0x2;
break;
case OPC_LDC:
cat6->pad1 = 0x0;
- cat6->pad2 = 0x8;
- cat6->pad3 = 0x0;
- cat6->pad4 = 0x2;
+ cat6->pad3 = 0x8;
+ cat6->pad5 = 0x2;
break;
default:
iassert(0);
}
+ cat6->pad2 = 0x0;
+ cat6->pad4 = 0x0;
return 0;
}
info->sizedwords = 0;
info->ss = info->sy = 0;
- list_for_each_entry (struct ir3_block, block, &shader->block_list, node) {
- list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+ foreach_block (block, &shader->block_list) {
+ foreach_instr (instr, &block->instr_list) {
info->sizedwords += 2;
}
}
ptr = dwords = calloc(4, info->sizedwords);
- list_for_each_entry (struct ir3_block, block, &shader->block_list, node) {
- list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+ foreach_block (block, &shader->block_list) {
+ unsigned sfu_delay = 0;
+
+ foreach_instr (instr, &block->instr_list) {
int ret = emit[opc_cat(instr->opc)](instr, dwords, info);
if (ret)
goto fail;
+
+ if ((instr->opc == OPC_BARY_F) && (instr->regs[0]->flags & IR3_REG_EI))
+ info->last_baryf = info->instrs_count;
+
info->instrs_count += 1 + instr->repeat + instr->nop;
info->nops_count += instr->nop;
if (instr->opc == OPC_NOP)
info->nops_count += 1 + instr->repeat;
dwords += 2;
- if (instr->flags & IR3_INSTR_SS)
+ if (instr->flags & IR3_INSTR_SS) {
info->ss++;
+ info->sstall += sfu_delay;
+ }
if (instr->flags & IR3_INSTR_SY)
info->sy++;
+
+ if (is_sfu(instr)) {
+ sfu_delay = 10;
+ } else if (sfu_delay > 0) {
+ sfu_delay--;
+ }
}
}
debug_assert(instr->block == addr->block);
instr->address = addr;
- array_insert(ir, ir->indirects, instr);
+ debug_assert(reg_num(addr->regs[0]) == REG_A0);
+ unsigned comp = reg_comp(addr->regs[0]);
+ if (comp == 0) {
+ array_insert(ir, ir->a0_users, instr);
+ } else {
+ debug_assert(comp == 1);
+ array_insert(ir, ir->a1_users, instr);
+ }
}
}
void
ir3_block_clear_mark(struct ir3_block *block)
{
- list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node)
+ foreach_instr (instr, &block->instr_list)
instr->flags &= ~IR3_INSTR_MARK;
}
void
ir3_clear_mark(struct ir3 *ir)
{
- list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+ foreach_block (block, &ir->block_list) {
ir3_block_clear_mark(block);
}
}
unsigned
ir3_count_instructions(struct ir3 *ir)
{
- unsigned cnt = 0;
- list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+ unsigned cnt = 1;
+ foreach_block (block, &ir->block_list) {
block->start_ip = cnt;
block->end_ip = cnt;
- list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+ foreach_instr (instr, &block->instr_list) {
instr->ip = cnt++;
block->end_ip = instr->ip;
}
struct ir3_array *
ir3_lookup_array(struct ir3 *ir, unsigned id)
{
- list_for_each_entry (struct ir3_array, arr, &ir->array_list, node)
+ foreach_array (arr, &ir->array_list)
if (arr->id == id)
return arr;
return NULL;
}
+
+void
+ir3_find_ssa_uses(struct ir3 *ir, void *mem_ctx)
+{
+ /* We could do this in a single pass if we can assume instructions
+ * are always sorted. Which currently might not always be true.
+ * (In particular after ir3_group pass, but maybe other places.)
+ */
+ foreach_block (block, &ir->block_list)
+ foreach_instr (instr, &block->instr_list)
+ instr->uses = NULL;
+
+ foreach_block (block, &ir->block_list) {
+ foreach_instr (instr, &block->instr_list) {
+ struct ir3_instruction *src;
+
+ foreach_ssa_src (src, instr) {
+ if (!src->uses)
+ src->uses = _mesa_pointer_set_create(mem_ctx);
+ _mesa_set_add(src->uses, instr);
+ }
+ }
+ }
+}