freedreno: small fix for flushing dependent batches
[mesa.git] / src / gallium / drivers / freedreno / ir3 / ir3.c
index c5a030282d8f832ef172c7ede24eb22b358cfdfd..dd5fb2fbbe5ae483d5cb69f7601233883cf80d03 100644 (file)
@@ -106,7 +106,7 @@ static uint32_t reg(struct ir3_register *reg, struct ir3_info *info,
                        info->max_const = MAX2(info->max_const, max);
                } else if (val.num == 63) {
                        /* ignore writes to dummy register r63.x */
-               } else if ((max != REG_A0) && (max != REG_P0)) {
+               } else if (max < 48) {
                        if (reg->flags & IR3_REG_HALF) {
                                info->max_half_reg = MAX2(info->max_half_reg, max);
                        } else {
@@ -258,6 +258,7 @@ static int emit_cat2(struct ir3_instruction *instr, void *ptr,
        cat2->dst      = reg(dst, info, instr->repeat,
                        IR3_REG_R | IR3_REG_EI | IR3_REG_HALF);
        cat2->repeat   = instr->repeat;
+       cat2->sat      = !!(instr->flags & IR3_INSTR_SAT);
        cat2->ss       = !!(instr->flags & IR3_INSTR_SS);
        cat2->ul       = !!(instr->flags & IR3_INSTR_UL);
        cat2->dst_half = !!((src1->flags ^ dst->flags) & IR3_REG_HALF);
@@ -354,6 +355,7 @@ static int emit_cat3(struct ir3_instruction *instr, void *ptr,
 
        cat3->dst      = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
        cat3->repeat   = instr->repeat;
+       cat3->sat      = !!(instr->flags & IR3_INSTR_SAT);
        cat3->ss       = !!(instr->flags & IR3_INSTR_SS);
        cat3->ul       = !!(instr->flags & IR3_INSTR_UL);
        cat3->dst_half = !!((src_flags ^ dst->flags) & IR3_REG_HALF);
@@ -401,6 +403,7 @@ static int emit_cat4(struct ir3_instruction *instr, void *ptr,
 
        cat4->dst      = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
        cat4->repeat   = instr->repeat;
+       cat4->sat      = !!(instr->flags & IR3_INSTR_SAT);
        cat4->ss       = !!(instr->flags & IR3_INSTR_SS);
        cat4->ul       = !!(instr->flags & IR3_INSTR_UL);
        cat4->dst_half = !!((src->flags ^ dst->flags) & IR3_REG_HALF);
@@ -475,6 +478,13 @@ static int emit_cat6(struct ir3_instruction *instr, void *ptr,
        struct ir3_register *dst, *src1, *src2;
        instr_cat6_t *cat6 = ptr;
 
+       cat6->type     = instr->cat6.type;
+       cat6->opc      = instr->opc;
+       cat6->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
+       cat6->sync     = !!(instr->flags & IR3_INSTR_SY);
+       cat6->g        = !!(instr->flags & IR3_INSTR_G);
+       cat6->opc_cat  = 6;
+
        /* the "dst" for a store instruction is (from the perspective
         * of data flow in the shader, ie. register use/def, etc) in
         * fact a register that is read by the instruction, rather
@@ -494,13 +504,114 @@ static int emit_cat6(struct ir3_instruction *instr, void *ptr,
                src2 = (instr->regs_count >= 3) ? instr->regs[2] : NULL;
        }
 
-
        /* TODO we need a more comprehensive list about which instructions
         * can be encoded which way.  Or possibly use IR3_INSTR_0 flag to
         * indicate to use the src_off encoding even if offset is zero
         * (but then what to do about dst_off?)
         */
-       if (instr->cat6.src_offset || (instr->opc == OPC_LDG)) {
+       if (is_atomic(instr->opc)) {
+               instr_cat6ldgb_t *ldgb = ptr;
+
+               /* maybe these two bits both determine the instruction encoding? */
+               cat6->src_off = false;
+
+               ldgb->d = instr->cat6.d - 1;
+               ldgb->typed = instr->cat6.typed;
+               ldgb->type_size = instr->cat6.iim_val - 1;
+
+               ldgb->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
+
+               if (ldgb->g) {
+                       struct ir3_register *src3 = instr->regs[3];
+                       struct ir3_register *src4 = instr->regs[4];
+
+                       /* first src is src_ssbo: */
+                       iassert(src1->flags & IR3_REG_IMMED);
+                       ldgb->src_ssbo = src1->uim_val;
+
+                       ldgb->src1 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
+                       ldgb->src1_im = !!(src2->flags & IR3_REG_IMMED);
+                       ldgb->src2 = reg(src3, info, instr->repeat, IR3_REG_IMMED);
+                       ldgb->src2_im = !!(src3->flags & IR3_REG_IMMED);
+
+                       ldgb->src3 = reg(src4, info, instr->repeat, 0);
+                       ldgb->pad0 = 0x1;
+                       ldgb->pad3 = 0x1;
+               } else {
+                       ldgb->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED);
+                       ldgb->src1_im = !!(src1->flags & IR3_REG_IMMED);
+                       ldgb->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
+                       ldgb->src2_im = !!(src2->flags & IR3_REG_IMMED);
+                       ldgb->pad0 = 0x1;
+                       ldgb->pad3 = 0x0;
+               }
+
+               return 0;
+       } else if (instr->opc == OPC_LDGB) {
+               struct ir3_register *src3 = instr->regs[3];
+               instr_cat6ldgb_t *ldgb = ptr;
+
+               /* maybe these two bits both determine the instruction encoding? */
+               cat6->src_off = false;
+
+               ldgb->d = instr->cat6.d - 1;
+               ldgb->typed = instr->cat6.typed;
+               ldgb->type_size = instr->cat6.iim_val - 1;
+
+               ldgb->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
+
+               /* first src is src_ssbo: */
+               iassert(src1->flags & IR3_REG_IMMED);
+               ldgb->src_ssbo = src1->uim_val;
+
+               /* then next two are src1/src2: */
+               ldgb->src1 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
+               ldgb->src1_im = !!(src2->flags & IR3_REG_IMMED);
+               ldgb->src2 = reg(src3, info, instr->repeat, IR3_REG_IMMED);
+               ldgb->src2_im = !!(src3->flags & IR3_REG_IMMED);
+
+               ldgb->pad0 = 0x0;
+               ldgb->pad3 = 0x1;
+
+               return 0;
+       } else if (instr->opc == OPC_RESINFO) {
+               instr_cat6ldgb_t *ldgb = ptr;
+
+               ldgb->d = instr->cat6.d - 1;
+
+               ldgb->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
+
+               /* first src is src_ssbo: */
+               iassert(src1->flags & IR3_REG_IMMED);
+               ldgb->src_ssbo = src1->uim_val;
+
+               return 0;
+       } else if ((instr->opc == OPC_STGB) || (instr->opc == OPC_STIB)) {
+               struct ir3_register *src3 = instr->regs[4];
+               instr_cat6stgb_t *stgb = ptr;
+
+               /* maybe these two bits both determine the instruction encoding? */
+               cat6->src_off = true;
+               stgb->pad3 = 0x2;
+
+               stgb->d = instr->cat6.d - 1;
+               stgb->typed = instr->cat6.typed;
+               stgb->type_size = instr->cat6.iim_val - 1;
+
+               /* first src is dst_ssbo: */
+               iassert(dst->flags & IR3_REG_IMMED);
+               stgb->dst_ssbo = dst->uim_val;
+
+               /* then src1/src2/src3: */
+               stgb->src1 = reg(src1, info, instr->repeat, 0);
+               stgb->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
+               stgb->src2_im = !!(src2->flags & IR3_REG_IMMED);
+               stgb->src3 = reg(src3, info, instr->repeat, IR3_REG_IMMED);
+               stgb->src3_im = !!(src3->flags & IR3_REG_IMMED);
+
+               return 0;
+       } else if (instr->cat6.src_offset || (instr->opc == OPC_LDG) ||
+                       (instr->opc == OPC_LDL)) {
                instr_cat6a_t *cat6a = ptr;
 
                cat6->src_off = true;
@@ -525,7 +636,8 @@ static int emit_cat6(struct ir3_instruction *instr, void *ptr,
                }
        }
 
-       if (instr->cat6.dst_offset || (instr->opc == OPC_STG)) {
+       if (instr->cat6.dst_offset || (instr->opc == OPC_STG) ||
+                       (instr->opc == OPC_STL)) {
                instr_cat6c_t *cat6c = ptr;
                cat6->dst_off = true;
                cat6c->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
@@ -536,12 +648,23 @@ static int emit_cat6(struct ir3_instruction *instr, void *ptr,
                cat6d->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
        }
 
-       cat6->type     = instr->cat6.type;
-       cat6->opc      = instr->opc;
-       cat6->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
-       cat6->sync     = !!(instr->flags & IR3_INSTR_SY);
-       cat6->g        = !!(instr->flags & IR3_INSTR_G);
-       cat6->opc_cat  = 6;
+       return 0;
+}
+
+static int emit_cat7(struct ir3_instruction *instr, void *ptr,
+               struct ir3_info *info)
+{
+       instr_cat7_t *cat7 = ptr;
+
+       cat7->ss      = !!(instr->flags & IR3_INSTR_SS);
+       cat7->w       = instr->cat7.w;
+       cat7->r       = instr->cat7.r;
+       cat7->l       = instr->cat7.l;
+       cat7->g       = instr->cat7.g;
+       cat7->opc     = instr->opc;
+       cat7->jmp_tgt = !!(instr->flags & IR3_INSTR_JP);
+       cat7->sync    = !!(instr->flags & IR3_INSTR_SY);
+       cat7->opc_cat = 7;
 
        return 0;
 }
@@ -549,6 +672,7 @@ static int emit_cat6(struct ir3_instruction *instr, void *ptr,
 static int (*emit[])(struct ir3_instruction *instr, void *ptr,
                struct ir3_info *info) = {
        emit_cat0, emit_cat1, emit_cat2, emit_cat3, emit_cat4, emit_cat5, emit_cat6,
+       emit_cat7,
 };
 
 void * ir3_assemble(struct ir3 *shader, struct ir3_info *info,
@@ -562,6 +686,7 @@ void * ir3_assemble(struct ir3 *shader, struct ir3_info *info,
        info->max_const     = -1;
        info->instrs_count  = 0;
        info->sizedwords    = 0;
+       info->ss = info->sy = 0;
 
        list_for_each_entry (struct ir3_block, block, &shader->block_list, node) {
                list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
@@ -588,6 +713,12 @@ void * ir3_assemble(struct ir3 *shader, struct ir3_info *info,
                                goto fail;
                        info->instrs_count += 1 + instr->repeat;
                        dwords += 2;
+
+                       if (instr->flags & IR3_INSTR_SS)
+                               info->ss++;
+
+                       if (instr->flags & IR3_INSTR_SY)
+                               info->sy++;
                }
        }
 
@@ -614,8 +745,7 @@ static void insert_instr(struct ir3_block *block,
 {
        struct ir3 *shader = block->shader;
 #ifdef DEBUG
-       static uint32_t serialno = 0;
-       instr->serialno = ++serialno;
+       instr->serialno = ++shader->instr_count;
 #endif
        list_addtail(&instr->node, &block->instr_list);
 
@@ -627,8 +757,7 @@ struct ir3_block * ir3_block_create(struct ir3 *shader)
 {
        struct ir3_block *block = ir3_alloc(shader, sizeof(*block));
 #ifdef DEBUG
-       static uint32_t serialno = 0;
-       block->serialno = ++serialno;
+       block->serialno = ++shader->block_count;
 #endif
        block->shader = shader;
        list_inithead(&block->node);
@@ -696,6 +825,12 @@ struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr)
        return new_instr;
 }
 
+/* Add a false dependency to instruction, to ensure it is scheduled first: */
+void ir3_instr_add_dep(struct ir3_instruction *instr, struct ir3_instruction *dep)
+{
+       array_insert(instr, instr->deps, dep);
+}
+
 struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
                int num, int flags)
 {