radeonsi/gfx10: implement most performance counters
[mesa.git] / src / freedreno / ir3 / ir3.c
index 7bdf8a39ba88f3b238b5205e0203bbbd22d16e43..23f29664e82c093baba5f51c3802cbaa0a5dda2a 100644 (file)
@@ -141,23 +141,14 @@ static int emit_cat0(struct ir3_instruction *instr, void *ptr,
        }
        cat0->repeat   = instr->repeat;
        cat0->ss       = !!(instr->flags & IR3_INSTR_SS);
-       cat0->inv      = instr->cat0.inv;
-       cat0->comp     = instr->cat0.comp;
+       cat0->inv0     = instr->cat0.inv;
+       cat0->comp0    = instr->cat0.comp;
        cat0->opc      = instr->opc;
+       cat0->opc_hi   = instr->opc >= 16;
        cat0->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
        cat0->sync     = !!(instr->flags & IR3_INSTR_SY);
        cat0->opc_cat  = 0;
 
-       switch (instr->opc) {
-       case OPC_IF:
-       case OPC_ELSE:
-       case OPC_ENDIF:
-               cat0->dummy4 = 16;
-               break;
-       default:
-               break;
-       }
-
        return 0;
 }
 
@@ -487,17 +478,38 @@ static int emit_cat5(struct ir3_instruction *instr, void *ptr,
                cat5->src2 = reg(src2, info, instr->repeat, IR3_REG_HALF);
        }
 
+       if (instr->flags & IR3_INSTR_B) {
+               cat5->s2en_bindless.base_hi = instr->cat5.tex_base >> 1;
+               cat5->base_lo = instr->cat5.tex_base & 1;
+       }
+
        if (instr->flags & IR3_INSTR_S2EN) {
                struct ir3_register *samp_tex = instr->regs[1];
                iassert(samp_tex->flags & IR3_REG_HALF);
-               cat5->s2en_bindless.src3 = reg(samp_tex, info, instr->repeat, IR3_REG_HALF);
-               /* TODO: This should probably be CAT5_UNIFORM, at least on a6xx, as
-                * this is what the blob does and it is presumably faster, but first
-                * we should confirm it is actually nonuniform and figure out when the
-                * whole descriptor mode mechanism was introduced.
-                */
-               cat5->s2en_bindless.desc_mode = CAT5_NONUNIFORM;
+               cat5->s2en_bindless.src3 = reg(samp_tex, info, instr->repeat,
+                                                                          (instr->flags & IR3_INSTR_B) ? 0 : IR3_REG_HALF);
+               if (instr->flags & IR3_INSTR_B) {
+                       if (instr->flags & IR3_INSTR_A1EN) {
+                               cat5->s2en_bindless.desc_mode = CAT5_BINDLESS_A1_UNIFORM;
+                       } else {
+                               cat5->s2en_bindless.desc_mode = CAT5_BINDLESS_UNIFORM;
+                       }
+               } else {
+                       /* TODO: This should probably be CAT5_UNIFORM, at least on a6xx,
+                        * as this is what the blob does and it is presumably faster, but
+                        * first we should confirm it is actually nonuniform and figure
+                        * out when the whole descriptor mode mechanism was introduced.
+                        */
+                       cat5->s2en_bindless.desc_mode = CAT5_NONUNIFORM;
+               }
                iassert(!(instr->cat5.samp | instr->cat5.tex));
+       } else if (instr->flags & IR3_INSTR_B) {
+               cat5->s2en_bindless.src3 = instr->cat5.samp;
+               if (instr->flags & IR3_INSTR_A1EN) {
+                       cat5->s2en_bindless.desc_mode = CAT5_BINDLESS_A1_IMM;
+               } else {
+                       cat5->s2en_bindless.desc_mode = CAT5_BINDLESS_IMM;
+               }
        } else {
                cat5->norm.samp = instr->cat5.samp;
                cat5->norm.tex  = instr->cat5.tex;
@@ -509,7 +521,7 @@ static int emit_cat5(struct ir3_instruction *instr, void *ptr,
        cat5->is_3d    = !!(instr->flags & IR3_INSTR_3D);
        cat5->is_a     = !!(instr->flags & IR3_INSTR_A);
        cat5->is_s     = !!(instr->flags & IR3_INSTR_S);
-       cat5->is_s2en_bindless = !!(instr->flags & IR3_INSTR_S2EN);
+       cat5->is_s2en_bindless = !!(instr->flags & (IR3_INSTR_S2EN | IR3_INSTR_B));
        cat5->is_o     = !!(instr->flags & IR3_INSTR_O);
        cat5->is_p     = !!(instr->flags & IR3_INSTR_P);
        cat5->opc      = instr->opc;
@@ -523,13 +535,11 @@ static int emit_cat5(struct ir3_instruction *instr, void *ptr,
 static int emit_cat6_a6xx(struct ir3_instruction *instr, void *ptr,
                struct ir3_info *info)
 {
-       struct ir3_register *src1, *src2;
+       struct ir3_register *src1, *src2, *ssbo;
        instr_cat6_a6xx_t *cat6 = ptr;
-       bool has_dest = (instr->opc == OPC_LDIB);
-
-       /* first reg should be SSBO binding point: */
-       iassert(instr->regs[1]->flags & IR3_REG_IMMED);
+       bool has_dest = (instr->opc == OPC_LDIB || instr->opc == OPC_LDC);
 
+       ssbo = instr->regs[1];
        src1 = instr->regs[2];
 
        if (has_dest) {
@@ -542,7 +552,7 @@ static int emit_cat6_a6xx(struct ir3_instruction *instr, void *ptr,
        }
 
        cat6->type      = instr->cat6.type;
-       cat6->d         = instr->cat6.d - 1;
+       cat6->d         = instr->cat6.d - (instr->opc == OPC_LDC ? 0 : 1);
        cat6->typed     = instr->cat6.typed;
        cat6->type_size = instr->cat6.iim_val - 1;
        cat6->opc       = instr->opc;
@@ -552,7 +562,21 @@ static int emit_cat6_a6xx(struct ir3_instruction *instr, void *ptr,
 
        cat6->src1 = reg(src1, info, instr->repeat, 0);
        cat6->src2 = reg(src2, info, instr->repeat, 0);
-       cat6->ssbo = instr->regs[1]->iim_val;
+       cat6->ssbo = reg(ssbo, info, instr->repeat, IR3_REG_IMMED);
+
+       if (instr->flags & IR3_INSTR_B) {
+               if (ssbo->flags & IR3_REG_IMMED) {
+                       cat6->desc_mode = CAT6_BINDLESS_IMM;
+               } else {
+                       cat6->desc_mode = CAT6_BINDLESS_UNIFORM;
+               }
+               cat6->base = instr->cat6.base;
+       } else {
+               if (ssbo->flags & IR3_REG_IMMED)
+                       cat6->desc_mode = CAT6_IMM;
+               else
+                       cat6->desc_mode = CAT6_UNIFORM;
+       }
 
        switch (instr->opc) {
        case OPC_ATOMIC_ADD:
@@ -885,13 +909,11 @@ void * ir3_assemble(struct ir3 *shader, struct ir3_info *info,
 {
        uint32_t *ptr, *dwords;
 
+       memset(info, 0, sizeof(*info));
        info->gpu_id        = gpu_id;
        info->max_reg       = -1;
        info->max_half_reg  = -1;
        info->max_const     = -1;
-       info->instrs_count  = 0;
-       info->sizedwords    = 0;
-       info->ss = info->sy = 0;
 
        foreach_block (block, &shader->block_list) {
                foreach_instr (instr, &block->instr_list) {
@@ -926,6 +948,13 @@ void * ir3_assemble(struct ir3 *shader, struct ir3_info *info,
                        info->nops_count += instr->nop;
                        if (instr->opc == OPC_NOP)
                                info->nops_count += 1 + instr->repeat;
+                       if (instr->opc == OPC_MOV) {
+                               if (instr->cat1.src_type == instr->cat1.dst_type) {
+                                       info->mov_count += 1 + instr->repeat;
+                               } else {
+                                       info->cov_count += 1 + instr->repeat;
+                               }
+                       }
                        dwords += 2;
 
                        if (instr->flags & IR3_INSTR_SS) {
@@ -986,6 +1015,7 @@ struct ir3_block * ir3_block_create(struct ir3 *shader)
        block->shader = shader;
        list_inithead(&block->node);
        list_inithead(&block->instr_list);
+       block->predecessors = _mesa_pointer_set_create(block);
        return block;
 }
 
@@ -1086,7 +1116,14 @@ ir3_instr_set_address(struct ir3_instruction *instr,
                debug_assert(instr->block == addr->block);
 
                instr->address = addr;
-               array_insert(ir, ir->indirects, instr);
+               debug_assert(reg_num(addr->regs[0]) == REG_A0);
+               unsigned comp = reg_comp(addr->regs[0]);
+               if (comp == 0) {
+                       array_insert(ir, ir->a0_users, instr);
+               } else {
+                       debug_assert(comp == 1);
+                       array_insert(ir, ir->a1_users, instr);
+               }
        }
 }
 
@@ -1105,18 +1142,39 @@ ir3_clear_mark(struct ir3 *ir)
        }
 }
 
-/* note: this will destroy instr->depth, don't do it until after sched! */
 unsigned
 ir3_count_instructions(struct ir3 *ir)
 {
        unsigned cnt = 1;
        foreach_block (block, &ir->block_list) {
                block->start_ip = cnt;
+               foreach_instr (instr, &block->instr_list) {
+                       instr->ip = cnt++;
+               }
                block->end_ip = cnt;
+       }
+       return cnt;
+}
+
+/* When counting instructions for RA, we insert extra fake instructions at the
+ * beginning of each block, where values become live, and at the end where
+ * values die. This prevents problems where values live-in at the beginning or
+ * live-out at the end of a block from being treated as if they were
+ * live-in/live-out at the first/last instruction, which would be incorrect.
+ * In ir3_legalize these ip's are assumed to be actual ip's of the final
+ * program, so it would be incorrect to use this everywhere.
+ */
+
+unsigned
+ir3_count_instructions_ra(struct ir3 *ir)
+{
+       unsigned cnt = 1;
+       foreach_block (block, &ir->block_list) {
+               block->start_ip = cnt++;
                foreach_instr (instr, &block->instr_list) {
                        instr->ip = cnt++;
-                       block->end_ip = instr->ip;
                }
+               block->end_ip = cnt++;
        }
        return cnt;
 }
@@ -1131,7 +1189,7 @@ ir3_lookup_array(struct ir3 *ir, unsigned id)
 }
 
 void
-ir3_find_ssa_uses(struct ir3 *ir, void *mem_ctx)
+ir3_find_ssa_uses(struct ir3 *ir, void *mem_ctx, bool falsedeps)
 {
        /* We could do this in a single pass if we can assume instructions
         * are always sorted.  Which currently might not always be true.
@@ -1143,9 +1201,9 @@ ir3_find_ssa_uses(struct ir3 *ir, void *mem_ctx)
 
        foreach_block (block, &ir->block_list) {
                foreach_instr (instr, &block->instr_list) {
-                       struct ir3_instruction *src;
-
-                       foreach_ssa_src (src, instr) {
+                       foreach_ssa_src_n (src, n, instr) {
+                               if (__is_false_dep(instr, n) && !falsedeps)
+                                       continue;
                                if (!src->uses)
                                        src->uses = _mesa_pointer_set_create(mem_ctx);
                                _mesa_set_add(src->uses, instr);
@@ -1153,3 +1211,69 @@ ir3_find_ssa_uses(struct ir3 *ir, void *mem_ctx)
                }
        }
 }
+
+/**
+ * Set the destination type of an instruction, for example if a
+ * conversion is folded in, handling the special cases where the
+ * instruction's dest type or opcode needs to be fixed up.
+ */
+void
+ir3_set_dst_type(struct ir3_instruction *instr, bool half)
+{
+       if (half) {
+               instr->regs[0]->flags |= IR3_REG_HALF;
+       } else {
+               instr->regs[0]->flags &= ~IR3_REG_HALF;
+       }
+
+       switch (opc_cat(instr->opc)) {
+       case 1: /* move instructions */
+               if (half) {
+                       instr->cat1.dst_type = half_type(instr->cat1.dst_type);
+               } else {
+                       instr->cat1.dst_type = full_type(instr->cat1.dst_type);
+               }
+               break;
+       case 4:
+               if (half) {
+                       instr->opc = cat4_half_opc(instr->opc);
+               } else {
+                       instr->opc = cat4_full_opc(instr->opc);
+               }
+               break;
+       case 5:
+               if (half) {
+                       instr->cat5.type = half_type(instr->cat5.type);
+               } else {
+                       instr->cat5.type = full_type(instr->cat5.type);
+               }
+               break;
+       }
+}
+
+/**
+ * One-time fixup for instruction src-types.  Other than cov's that
+ * are folded, an instruction's src type does not change.
+ */
+void
+ir3_fixup_src_type(struct ir3_instruction *instr)
+{
+       bool half = !!(instr->regs[1]->flags & IR3_REG_HALF);
+
+       switch (opc_cat(instr->opc)) {
+       case 1: /* move instructions */
+               if (half) {
+                       instr->cat1.src_type = half_type(instr->cat1.src_type);
+               } else {
+                       instr->cat1.src_type = full_type(instr->cat1.src_type);
+               }
+               break;
+       case 3:
+               if (half) {
+                       instr->opc = cat3_half_opc(instr->opc);
+               } else {
+                       instr->opc = cat3_full_opc(instr->opc);
+               }
+               break;
+       }
+}