freedreno: small fix for flushing dependent batches
[mesa.git] / src / gallium / drivers / freedreno / ir3 / ir3.c
index 3de8fdc11b3ea82f0b368649309ba81b3569bc94..dd5fb2fbbe5ae483d5cb69f7601233883cf80d03 100644 (file)
 #include <stdbool.h>
 #include <errno.h>
 
+#include "util/ralloc.h"
+
 #include "freedreno_util.h"
 #include "instr-a3xx.h"
 
-#define CHUNK_SZ 1020
-
-struct ir3_heap_chunk {
-       struct ir3_heap_chunk *next;
-       uint32_t heap[CHUNK_SZ];
-};
-
-static void grow_heap(struct ir3 *shader)
-{
-       struct ir3_heap_chunk *chunk = calloc(1, sizeof(*chunk));
-       chunk->next = shader->chunk;
-       shader->chunk = chunk;
-       shader->heap_idx = 0;
-}
-
 /* simple allocator to carve allocations out of an up-front allocated heap,
  * so that we can free everything easily in one shot.
  */
 void * ir3_alloc(struct ir3 *shader, int sz)
 {
-       void *ptr;
-
-       sz = align(sz, 4) / 4;
-
-       if ((shader->heap_idx + sz) > CHUNK_SZ)
-               grow_heap(shader);
-
-       ptr = &shader->chunk->heap[shader->heap_idx];
-       shader->heap_idx += sz;
-
-       return ptr;
+       return rzalloc_size(shader, sz); /* TODO: don't use rzalloc */
 }
 
 struct ir3 * ir3_create(struct ir3_compiler *compiler,
                unsigned nin, unsigned nout)
 {
-       struct ir3 *shader = calloc(1, sizeof(struct ir3));
-
-       grow_heap(shader);
+       struct ir3 *shader = rzalloc(compiler, struct ir3);
 
        shader->compiler = compiler;
        shader->ninputs = nin;
@@ -88,15 +63,7 @@ struct ir3 * ir3_create(struct ir3_compiler *compiler,
 
 void ir3_destroy(struct ir3 *shader)
 {
-       while (shader->chunk) {
-               struct ir3_heap_chunk *chunk = shader->chunk;
-               shader->chunk = chunk->next;
-               free(chunk);
-       }
-       free(shader->indirects);
-       free(shader->predicates);
-       free(shader->baryfs);
-       free(shader);
+       ralloc_free(shader);
 }
 
 #define iassert(cond) do { \
@@ -139,7 +106,7 @@ static uint32_t reg(struct ir3_register *reg, struct ir3_info *info,
                        info->max_const = MAX2(info->max_const, max);
                } else if (val.num == 63) {
                        /* ignore writes to dummy register r63.x */
-               } else if ((max != REG_A0) && (max != REG_P0)) {
+               } else if (max < 48) {
                        if (reg->flags & IR3_REG_HALF) {
                                info->max_half_reg = MAX2(info->max_half_reg, max);
                        } else {
@@ -156,7 +123,9 @@ static int emit_cat0(struct ir3_instruction *instr, void *ptr,
 {
        instr_cat0_t *cat0 = ptr;
 
-       if (info->gpu_id >= 400) {
+       if (info->gpu_id >= 500) {
+               cat0->a5xx.immed = instr->cat0.immed;
+       } else if (info->gpu_id >= 400) {
                cat0->a4xx.immed = instr->cat0.immed;
        } else {
                cat0->a3xx.immed = instr->cat0.immed;
@@ -289,6 +258,7 @@ static int emit_cat2(struct ir3_instruction *instr, void *ptr,
        cat2->dst      = reg(dst, info, instr->repeat,
                        IR3_REG_R | IR3_REG_EI | IR3_REG_HALF);
        cat2->repeat   = instr->repeat;
+       cat2->sat      = !!(instr->flags & IR3_INSTR_SAT);
        cat2->ss       = !!(instr->flags & IR3_INSTR_SS);
        cat2->ul       = !!(instr->flags & IR3_INSTR_UL);
        cat2->dst_half = !!((src1->flags ^ dst->flags) & IR3_REG_HALF);
@@ -385,6 +355,7 @@ static int emit_cat3(struct ir3_instruction *instr, void *ptr,
 
        cat3->dst      = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
        cat3->repeat   = instr->repeat;
+       cat3->sat      = !!(instr->flags & IR3_INSTR_SAT);
        cat3->ss       = !!(instr->flags & IR3_INSTR_SS);
        cat3->ul       = !!(instr->flags & IR3_INSTR_UL);
        cat3->dst_half = !!((src_flags ^ dst->flags) & IR3_REG_HALF);
@@ -432,6 +403,7 @@ static int emit_cat4(struct ir3_instruction *instr, void *ptr,
 
        cat4->dst      = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
        cat4->repeat   = instr->repeat;
+       cat4->sat      = !!(instr->flags & IR3_INSTR_SAT);
        cat4->ss       = !!(instr->flags & IR3_INSTR_SS);
        cat4->ul       = !!(instr->flags & IR3_INSTR_UL);
        cat4->dst_half = !!((src->flags ^ dst->flags) & IR3_REG_HALF);
@@ -455,12 +427,14 @@ static int emit_cat5(struct ir3_instruction *instr, void *ptr,
 
        iassert(!((dst->flags ^ type_flags(instr->cat5.type)) & IR3_REG_HALF));
 
+       assume(src1 || !src2);
+       assume(src2 || !src3);
+
        if (src1) {
                cat5->full = ! (src1->flags & IR3_REG_HALF);
                cat5->src1 = reg(src1, info, instr->repeat, IR3_REG_HALF);
        }
 
-
        if (instr->flags & IR3_INSTR_S2EN) {
                if (src2) {
                        iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF));
@@ -504,6 +478,13 @@ static int emit_cat6(struct ir3_instruction *instr, void *ptr,
        struct ir3_register *dst, *src1, *src2;
        instr_cat6_t *cat6 = ptr;
 
+       cat6->type     = instr->cat6.type;
+       cat6->opc      = instr->opc;
+       cat6->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
+       cat6->sync     = !!(instr->flags & IR3_INSTR_SY);
+       cat6->g        = !!(instr->flags & IR3_INSTR_G);
+       cat6->opc_cat  = 6;
+
        /* the "dst" for a store instruction is (from the perspective
         * of data flow in the shader, ie. register use/def, etc) in
         * fact a register that is read by the instruction, rather
@@ -523,13 +504,114 @@ static int emit_cat6(struct ir3_instruction *instr, void *ptr,
                src2 = (instr->regs_count >= 3) ? instr->regs[2] : NULL;
        }
 
-
        /* TODO we need a more comprehensive list about which instructions
         * can be encoded which way.  Or possibly use IR3_INSTR_0 flag to
         * indicate to use the src_off encoding even if offset is zero
         * (but then what to do about dst_off?)
         */
-       if (instr->cat6.src_offset || (instr->opc == OPC_LDG)) {
+       if (is_atomic(instr->opc)) {
+               instr_cat6ldgb_t *ldgb = ptr;
+
+               /* maybe these two bits both determine the instruction encoding? */
+               cat6->src_off = false;
+
+               ldgb->d = instr->cat6.d - 1;
+               ldgb->typed = instr->cat6.typed;
+               ldgb->type_size = instr->cat6.iim_val - 1;
+
+               ldgb->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
+
+               if (ldgb->g) {
+                       struct ir3_register *src3 = instr->regs[3];
+                       struct ir3_register *src4 = instr->regs[4];
+
+                       /* first src is src_ssbo: */
+                       iassert(src1->flags & IR3_REG_IMMED);
+                       ldgb->src_ssbo = src1->uim_val;
+
+                       ldgb->src1 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
+                       ldgb->src1_im = !!(src2->flags & IR3_REG_IMMED);
+                       ldgb->src2 = reg(src3, info, instr->repeat, IR3_REG_IMMED);
+                       ldgb->src2_im = !!(src3->flags & IR3_REG_IMMED);
+
+                       ldgb->src3 = reg(src4, info, instr->repeat, 0);
+                       ldgb->pad0 = 0x1;
+                       ldgb->pad3 = 0x1;
+               } else {
+                       ldgb->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED);
+                       ldgb->src1_im = !!(src1->flags & IR3_REG_IMMED);
+                       ldgb->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
+                       ldgb->src2_im = !!(src2->flags & IR3_REG_IMMED);
+                       ldgb->pad0 = 0x1;
+                       ldgb->pad3 = 0x0;
+               }
+
+               return 0;
+       } else if (instr->opc == OPC_LDGB) {
+               struct ir3_register *src3 = instr->regs[3];
+               instr_cat6ldgb_t *ldgb = ptr;
+
+               /* maybe these two bits both determine the instruction encoding? */
+               cat6->src_off = false;
+
+               ldgb->d = instr->cat6.d - 1;
+               ldgb->typed = instr->cat6.typed;
+               ldgb->type_size = instr->cat6.iim_val - 1;
+
+               ldgb->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
+
+               /* first src is src_ssbo: */
+               iassert(src1->flags & IR3_REG_IMMED);
+               ldgb->src_ssbo = src1->uim_val;
+
+               /* then next two are src1/src2: */
+               ldgb->src1 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
+               ldgb->src1_im = !!(src2->flags & IR3_REG_IMMED);
+               ldgb->src2 = reg(src3, info, instr->repeat, IR3_REG_IMMED);
+               ldgb->src2_im = !!(src3->flags & IR3_REG_IMMED);
+
+               ldgb->pad0 = 0x0;
+               ldgb->pad3 = 0x1;
+
+               return 0;
+       } else if (instr->opc == OPC_RESINFO) {
+               instr_cat6ldgb_t *ldgb = ptr;
+
+               ldgb->d = instr->cat6.d - 1;
+
+               ldgb->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
+
+               /* first src is src_ssbo: */
+               iassert(src1->flags & IR3_REG_IMMED);
+               ldgb->src_ssbo = src1->uim_val;
+
+               return 0;
+       } else if ((instr->opc == OPC_STGB) || (instr->opc == OPC_STIB)) {
+               struct ir3_register *src3 = instr->regs[4];
+               instr_cat6stgb_t *stgb = ptr;
+
+               /* maybe these two bits both determine the instruction encoding? */
+               cat6->src_off = true;
+               stgb->pad3 = 0x2;
+
+               stgb->d = instr->cat6.d - 1;
+               stgb->typed = instr->cat6.typed;
+               stgb->type_size = instr->cat6.iim_val - 1;
+
+               /* first src is dst_ssbo: */
+               iassert(dst->flags & IR3_REG_IMMED);
+               stgb->dst_ssbo = dst->uim_val;
+
+               /* then src1/src2/src3: */
+               stgb->src1 = reg(src1, info, instr->repeat, 0);
+               stgb->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
+               stgb->src2_im = !!(src2->flags & IR3_REG_IMMED);
+               stgb->src3 = reg(src3, info, instr->repeat, IR3_REG_IMMED);
+               stgb->src3_im = !!(src3->flags & IR3_REG_IMMED);
+
+               return 0;
+       } else if (instr->cat6.src_offset || (instr->opc == OPC_LDG) ||
+                       (instr->opc == OPC_LDL)) {
                instr_cat6a_t *cat6a = ptr;
 
                cat6->src_off = true;
@@ -554,7 +636,8 @@ static int emit_cat6(struct ir3_instruction *instr, void *ptr,
                }
        }
 
-       if (instr->cat6.dst_offset || (instr->opc == OPC_STG)) {
+       if (instr->cat6.dst_offset || (instr->opc == OPC_STG) ||
+                       (instr->opc == OPC_STL)) {
                instr_cat6c_t *cat6c = ptr;
                cat6->dst_off = true;
                cat6c->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
@@ -565,12 +648,23 @@ static int emit_cat6(struct ir3_instruction *instr, void *ptr,
                cat6d->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
        }
 
-       cat6->type     = instr->cat6.type;
-       cat6->opc      = instr->opc;
-       cat6->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
-       cat6->sync     = !!(instr->flags & IR3_INSTR_SY);
-       cat6->g        = !!(instr->flags & IR3_INSTR_G);
-       cat6->opc_cat  = 6;
+       return 0;
+}
+
+static int emit_cat7(struct ir3_instruction *instr, void *ptr,
+               struct ir3_info *info)
+{
+       instr_cat7_t *cat7 = ptr;
+
+       cat7->ss      = !!(instr->flags & IR3_INSTR_SS);
+       cat7->w       = instr->cat7.w;
+       cat7->r       = instr->cat7.r;
+       cat7->l       = instr->cat7.l;
+       cat7->g       = instr->cat7.g;
+       cat7->opc     = instr->opc;
+       cat7->jmp_tgt = !!(instr->flags & IR3_INSTR_JP);
+       cat7->sync    = !!(instr->flags & IR3_INSTR_SY);
+       cat7->opc_cat = 7;
 
        return 0;
 }
@@ -578,6 +672,7 @@ static int emit_cat6(struct ir3_instruction *instr, void *ptr,
 static int (*emit[])(struct ir3_instruction *instr, void *ptr,
                struct ir3_info *info) = {
        emit_cat0, emit_cat1, emit_cat2, emit_cat3, emit_cat4, emit_cat5, emit_cat6,
+       emit_cat7,
 };
 
 void * ir3_assemble(struct ir3 *shader, struct ir3_info *info,
@@ -591,6 +686,7 @@ void * ir3_assemble(struct ir3 *shader, struct ir3_info *info,
        info->max_const     = -1;
        info->instrs_count  = 0;
        info->sizedwords    = 0;
+       info->ss = info->sy = 0;
 
        list_for_each_entry (struct ir3_block, block, &shader->block_list, node) {
                list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
@@ -598,7 +694,7 @@ void * ir3_assemble(struct ir3 *shader, struct ir3_info *info,
                }
        }
 
-       /* need a integer number of instruction "groups" (sets of 16
+       /* need an integer number of instruction "groups" (sets of 16
         * instructions on a4xx or sets of 4 instructions on a3xx),
         * so pad out w/ NOPs if needed: (NOTE each instruction is 64bits)
         */
@@ -617,6 +713,12 @@ void * ir3_assemble(struct ir3 *shader, struct ir3_info *info,
                                goto fail;
                        info->instrs_count += 1 + instr->repeat;
                        dwords += 2;
+
+                       if (instr->flags & IR3_INSTR_SS)
+                               info->ss++;
+
+                       if (instr->flags & IR3_INSTR_SY)
+                               info->sy++;
                }
        }
 
@@ -643,21 +745,19 @@ static void insert_instr(struct ir3_block *block,
 {
        struct ir3 *shader = block->shader;
 #ifdef DEBUG
-       static uint32_t serialno = 0;
-       instr->serialno = ++serialno;
+       instr->serialno = ++shader->instr_count;
 #endif
        list_addtail(&instr->node, &block->instr_list);
 
        if (is_input(instr))
-               array_insert(shader->baryfs, instr);
+               array_insert(shader, shader->baryfs, instr);
 }
 
 struct ir3_block * ir3_block_create(struct ir3 *shader)
 {
        struct ir3_block *block = ir3_alloc(shader, sizeof(*block));
 #ifdef DEBUG
-       static uint32_t serialno = 0;
-       block->serialno = ++serialno;
+       block->serialno = ++shader->block_count;
 #endif
        block->shader = shader;
        list_inithead(&block->node);
@@ -725,6 +825,12 @@ struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr)
        return new_instr;
 }
 
+/* Add a false dependency to instruction, to ensure it is scheduled first: */
+void ir3_instr_add_dep(struct ir3_instruction *instr, struct ir3_instruction *dep)
+{
+       array_insert(instr, instr->deps, dep);
+}
+
 struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
                int num, int flags)
 {
@@ -752,7 +858,7 @@ ir3_instr_set_address(struct ir3_instruction *instr,
        if (instr->address != addr) {
                struct ir3 *ir = instr->block->shader;
                instr->address = addr;
-               array_insert(ir->indirects, instr);
+               array_insert(ir, ir->indirects, instr);
        }
 }