ir3: Plumb through support for a1.x
[mesa.git] / src / freedreno / ir3 / ir3.c
index b89e9b316a59cef1e1f600f0977bb5a90551c38c..7af46055c02d59c8181df400d98747f88dace163 100644 (file)
@@ -148,8 +148,15 @@ static int emit_cat0(struct ir3_instruction *instr, void *ptr,
        cat0->sync     = !!(instr->flags & IR3_INSTR_SY);
        cat0->opc_cat  = 0;
 
-       if (instr->opc == OPC_CONDEND || instr->opc == OPC_ENDPATCH)
+       switch (instr->opc) {
+       case OPC_IF:
+       case OPC_ELSE:
+       case OPC_ENDIF:
                cat0->dummy4 = 16;
+               break;
+       default:
+               break;
+       }
 
        return 0;
 }
@@ -455,24 +462,17 @@ static int emit_cat5(struct ir3_instruction *instr, void *ptr,
        struct ir3_register *src2;
        instr_cat5_t *cat5 = ptr;
 
-       iassert((instr->regs_count == 2) ||
-                       (instr->regs_count == 3) || (instr->regs_count == 4));
+       iassert((instr->regs_count == 1) ||
+                       (instr->regs_count == 2) ||
+                       (instr->regs_count == 3) ||
+                       (instr->regs_count == 4));
 
-       switch (instr->opc) {
-       case OPC_DSX:
-       case OPC_DSXPP_1:
-       case OPC_DSY:
-       case OPC_DSYPP_1:
-       case OPC_RGETPOS:
-       case OPC_RGETINFO:
-               iassert((instr->flags & IR3_INSTR_S2EN) == 0);
-               src1 = instr->regs[1];
-               src2 = instr->regs_count > 2 ? instr->regs[2] : NULL;
-               break;
-       default:
+       if (instr->flags & IR3_INSTR_S2EN) {
                src1 = instr->regs[2];
                src2 = instr->regs_count > 3 ? instr->regs[3] : NULL;
-               break;
+       } else {
+               src1 = instr->regs_count > 1 ? instr->regs[1] : NULL;
+               src2 = instr->regs_count > 2 ? instr->regs[2] : NULL;
        }
 
        assume(src1 || !src2);
@@ -482,20 +482,23 @@ static int emit_cat5(struct ir3_instruction *instr, void *ptr,
                cat5->src1 = reg(src1, info, instr->repeat, IR3_REG_HALF);
        }
 
+       if (src2) {
+               iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF));
+               cat5->src2 = reg(src2, info, instr->repeat, IR3_REG_HALF);
+       }
+
        if (instr->flags & IR3_INSTR_S2EN) {
                struct ir3_register *samp_tex = instr->regs[1];
-               if (src2) {
-                       iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF));
-                       cat5->s2en.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF);
-               }
                iassert(samp_tex->flags & IR3_REG_HALF);
-               cat5->s2en.src3 = reg(samp_tex, info, instr->repeat, IR3_REG_HALF);
+               cat5->s2en_bindless.src3 = reg(samp_tex, info, instr->repeat, IR3_REG_HALF);
+               /* TODO: This should probably be CAT5_UNIFORM, at least on a6xx, as
+                * this is what the blob does and it is presumably faster, but first
+                * we should confirm it is actually nonuniform and figure out when the
+                * whole descriptor mode mechanism was introduced.
+                */
+               cat5->s2en_bindless.desc_mode = CAT5_NONUNIFORM;
                iassert(!(instr->cat5.samp | instr->cat5.tex));
        } else {
-               if (src2) {
-                       iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF));
-                       cat5->norm.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF);
-               }
                cat5->norm.samp = instr->cat5.samp;
                cat5->norm.tex  = instr->cat5.tex;
        }
@@ -506,7 +509,7 @@ static int emit_cat5(struct ir3_instruction *instr, void *ptr,
        cat5->is_3d    = !!(instr->flags & IR3_INSTR_3D);
        cat5->is_a     = !!(instr->flags & IR3_INSTR_A);
        cat5->is_s     = !!(instr->flags & IR3_INSTR_S);
-       cat5->is_s2en  = !!(instr->flags & IR3_INSTR_S2EN);
+       cat5->is_s2en_bindless = !!(instr->flags & IR3_INSTR_S2EN);
        cat5->is_o     = !!(instr->flags & IR3_INSTR_O);
        cat5->is_p     = !!(instr->flags & IR3_INSTR_P);
        cat5->opc      = instr->opc;
@@ -564,31 +567,29 @@ static int emit_cat6_a6xx(struct ir3_instruction *instr, void *ptr,
        case OPC_ATOMIC_OR:
        case OPC_ATOMIC_XOR:
                cat6->pad1 = 0x1;
-               cat6->pad2 = 0xc;
-               cat6->pad3 = 0x0;
-               cat6->pad4 = 0x3;
+               cat6->pad3 = 0xc;
+               cat6->pad5 = 0x3;
                break;
        case OPC_STIB:
                cat6->pad1 = 0x0;
-               cat6->pad2 = 0xc;
-               cat6->pad3 = 0x0;
-               cat6->pad4 = 0x2;
+               cat6->pad3 = 0xc;
+               cat6->pad5 = 0x2;
                break;
        case OPC_LDIB:
                cat6->pad1 = 0x1;
-               cat6->pad2 = 0xc;
-               cat6->pad3 = 0x0;
-               cat6->pad4 = 0x2;
+               cat6->pad3 = 0xc;
+               cat6->pad5 = 0x2;
                break;
        case OPC_LDC:
                cat6->pad1 = 0x0;
-               cat6->pad2 = 0x8;
-               cat6->pad3 = 0x0;
-               cat6->pad4 = 0x2;
+               cat6->pad3 = 0x8;
+               cat6->pad5 = 0x2;
                break;
        default:
                iassert(0);
        }
+       cat6->pad2 = 0x0;
+       cat6->pad4 = 0x0;
 
        return 0;
 }
@@ -892,8 +893,8 @@ void * ir3_assemble(struct ir3 *shader, struct ir3_info *info,
        info->sizedwords    = 0;
        info->ss = info->sy = 0;
 
-       list_for_each_entry (struct ir3_block, block, &shader->block_list, node) {
-               list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+       foreach_block (block, &shader->block_list) {
+               foreach_instr (instr, &block->instr_list) {
                        info->sizedwords += 2;
                }
        }
@@ -910,22 +911,36 @@ void * ir3_assemble(struct ir3 *shader, struct ir3_info *info,
 
        ptr = dwords = calloc(4, info->sizedwords);
 
-       list_for_each_entry (struct ir3_block, block, &shader->block_list, node) {
-               list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+       foreach_block (block, &shader->block_list) {
+               unsigned sfu_delay = 0;
+
+               foreach_instr (instr, &block->instr_list) {
                        int ret = emit[opc_cat(instr->opc)](instr, dwords, info);
                        if (ret)
                                goto fail;
+
+                       if ((instr->opc == OPC_BARY_F) && (instr->regs[0]->flags & IR3_REG_EI))
+                               info->last_baryf = info->instrs_count;
+
                        info->instrs_count += 1 + instr->repeat + instr->nop;
                        info->nops_count += instr->nop;
                        if (instr->opc == OPC_NOP)
                                info->nops_count += 1 + instr->repeat;
                        dwords += 2;
 
-                       if (instr->flags & IR3_INSTR_SS)
+                       if (instr->flags & IR3_INSTR_SS) {
                                info->ss++;
+                               info->sstall += sfu_delay;
+                       }
 
                        if (instr->flags & IR3_INSTR_SY)
                                info->sy++;
+
+                       if (is_sfu(instr)) {
+                               sfu_delay = 10;
+                       } else if (sfu_delay > 0) {
+                               sfu_delay--;
+                       }
                }
        }
 
@@ -1071,21 +1086,28 @@ ir3_instr_set_address(struct ir3_instruction *instr,
                debug_assert(instr->block == addr->block);
 
                instr->address = addr;
-               array_insert(ir, ir->indirects, instr);
+               debug_assert(reg_num(addr->regs[0]) == REG_A0);
+               unsigned comp = reg_comp(addr->regs[0]);
+               if (comp == 0) {
+                       array_insert(ir, ir->a0_users, instr);
+               } else {
+                       debug_assert(comp == 1);
+                       array_insert(ir, ir->a1_users, instr);
+               }
        }
 }
 
 void
 ir3_block_clear_mark(struct ir3_block *block)
 {
-       list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node)
+       foreach_instr (instr, &block->instr_list)
                instr->flags &= ~IR3_INSTR_MARK;
 }
 
 void
 ir3_clear_mark(struct ir3 *ir)
 {
-       list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+       foreach_block (block, &ir->block_list) {
                ir3_block_clear_mark(block);
        }
 }
@@ -1094,11 +1116,11 @@ ir3_clear_mark(struct ir3 *ir)
 unsigned
 ir3_count_instructions(struct ir3 *ir)
 {
-       unsigned cnt = 0;
-       list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+       unsigned cnt = 1;
+       foreach_block (block, &ir->block_list) {
                block->start_ip = cnt;
                block->end_ip = cnt;
-               list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+               foreach_instr (instr, &block->instr_list) {
                        instr->ip = cnt++;
                        block->end_ip = instr->ip;
                }
@@ -1109,8 +1131,32 @@ ir3_count_instructions(struct ir3 *ir)
 struct ir3_array *
 ir3_lookup_array(struct ir3 *ir, unsigned id)
 {
-       list_for_each_entry (struct ir3_array, arr, &ir->array_list, node)
+       foreach_array (arr, &ir->array_list)
                if (arr->id == id)
                        return arr;
        return NULL;
 }
+
+void
+ir3_find_ssa_uses(struct ir3 *ir, void *mem_ctx)
+{
+       /* We could do this in a single pass if we can assume instructions
+        * are always sorted.  Which currently might not always be true.
+        * (In particular after ir3_group pass, but maybe other places.)
+        */
+       foreach_block (block, &ir->block_list)
+               foreach_instr (instr, &block->instr_list)
+                       instr->uses = NULL;
+
+       foreach_block (block, &ir->block_list) {
+               foreach_instr (instr, &block->instr_list) {
+                       struct ir3_instruction *src;
+
+                       foreach_ssa_src (src, instr) {
+                               if (!src->uses)
+                                       src->uses = _mesa_pointer_set_create(mem_ctx);
+                               _mesa_set_add(src->uses, instr);
+                       }
+               }
+       }
+}