freedreno/ir3: updated cat6 encoding
authorRob Clark <robclark@freedesktop.org>
Thu, 23 Jul 2015 19:31:13 +0000 (15:31 -0400)
committerRob Clark <robclark@freedesktop.org>
Mon, 27 Jul 2015 17:51:05 +0000 (13:51 -0400)
Sync updated cat6 encoding from freedreno.git, needed to properly encode
store instructions.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
src/gallium/drivers/freedreno/ir3/disasm-a3xx.c
src/gallium/drivers/freedreno/ir3/instr-a3xx.h
src/gallium/drivers/freedreno/ir3/ir3.c
src/gallium/drivers/freedreno/ir3/ir3.h
src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c

index 48ae7c71b9f16e3d5b4f0b479cffe7985fe847f8..83ed5ffdca05fa37ae214e8aa72c9382fcfc6b58 100644 (file)
@@ -103,7 +103,7 @@ static void print_reg(reg_t reg, bool full, bool r, bool c, bool im,
        } else if ((reg.num == REG_P0) && !c) {
                printf("p0.%c", component[reg.comp]);
        } else {
-               printf("%s%c%d.%c", full ? "" : "h", type, reg.num, component[reg.comp]);
+               printf("%s%c%d.%c", full ? "" : "h", type, reg.num & 0x3f, component[reg.comp]);
        }
 }
 
@@ -122,6 +122,32 @@ static void print_reg_src(reg_t reg, bool full, bool r, bool c, bool im,
        print_reg(reg, full, r, c, im, neg, abs, addr_rel);
 }
 
+/* TODO switch to using reginfo struct everywhere, since more readable
+ * than passing a bunch of bools to print_reg_src
+ */
+
+struct reginfo {
+       reg_t reg;
+       bool full;
+       bool r;
+       bool c;
+       bool im;
+       bool neg;
+       bool abs;
+       bool addr_rel;
+};
+
+static void print_src(struct reginfo *info)
+{
+       print_reg_src(info->reg, info->full, info->r, info->c, info->im,
+                       info->neg, info->abs, info->addr_rel);
+}
+
+//static void print_dst(struct reginfo *info)
+//{
+//     print_reg_dst(info->reg, info->full, info->addr_rel);
+//}
+
 static void print_instr_cat0(instr_t *instr)
 {
        instr_cat0_t *cat0 = &instr->cat0;
@@ -454,10 +480,70 @@ static void print_instr_cat6(instr_t *instr)
 {
        instr_cat6_t *cat6 = &instr->cat6;
        char sd = 0, ss = 0;  /* dst/src address space */
-       bool full = type_size(cat6->type) == 32;
        bool nodst = false;
+       struct reginfo dst, src1, src2;
+       int src1off = 0, dstoff = 0;
 
-       printf(".%s ", type[cat6->type]);
+       memset(&dst, 0, sizeof(dst));
+       memset(&src1, 0, sizeof(src1));
+       memset(&src2, 0, sizeof(src2));
+
+       switch (cat6->opc) {
+       case OPC_RESINFO:
+       case OPC_RESFMT:
+               dst.full  = type_size(cat6->type) == 32;
+               src1.full = type_size(cat6->type) == 32;
+               src2.full = type_size(cat6->type) == 32;
+               break;
+       case OPC_L2G:
+       case OPC_G2L:
+               dst.full = true;
+               src1.full = true;
+               src2.full = true;
+               break;
+       case OPC_STG:
+       case OPC_STL:
+       case OPC_STP:
+       case OPC_STI:
+       case OPC_STLW:
+       case OPC_STGB_4D_4:
+       case OPC_STIB:
+               dst.full  = true;
+               src1.full = type_size(cat6->type) == 32;
+               src2.full = type_size(cat6->type) == 32;
+               break;
+       default:
+               dst.full  = type_size(cat6->type) == 32;
+               src1.full = true;
+               src2.full = true;
+               break;
+       }
+
+       switch (cat6->opc) {
+       case OPC_PREFETCH:
+       case OPC_RESINFO:
+               break;
+       case OPC_ATOMIC_ADD:
+       case OPC_ATOMIC_SUB:
+       case OPC_ATOMIC_XCHG:
+       case OPC_ATOMIC_INC:
+       case OPC_ATOMIC_DEC:
+       case OPC_ATOMIC_CMPXCHG:
+       case OPC_ATOMIC_MIN:
+       case OPC_ATOMIC_MAX:
+       case OPC_ATOMIC_AND:
+       case OPC_ATOMIC_OR:
+       case OPC_ATOMIC_XOR:
+               ss = cat6->g ? 'g' : 'l';
+               printf(".%c", ss);
+               printf(".%s", type[cat6->type]);
+               break;
+       default:
+               dst.im = cat6->g && !cat6->dst_off;
+               printf(".%s", type[cat6->type]);
+               break;
+       }
+       printf(" ");
 
        switch (cat6->opc) {
        case OPC_STG:
@@ -499,68 +585,65 @@ static void print_instr_cat6(instr_t *instr)
                break;
 
        case OPC_STI:
-               full = false;  // XXX or inverts??
+               dst.full = false;  // XXX or inverts??
                break;
        }
 
-       if (cat6->has_off) {
-               if (!nodst) {
-                       if (sd)
-                               printf("%c[", sd);
-                       print_reg_dst((reg_t)(cat6->a.dst), full, false);
-                       if (sd)
-                               printf("]");
-                       printf(", ");
-               }
-               if (ss)
-                       printf("%c[", ss);
-               print_reg_src((reg_t)(cat6->a.src1), true,
-                               false, false, cat6->a.src1_im, false, false, false);
-               if (cat6->a.off)
-                       printf("%+d", cat6->a.off);
-               if (ss)
-                       printf("]");
-               printf(", ");
-               print_reg_src((reg_t)(cat6->a.src2), full,
-                               false, false, cat6->a.src2_im, false, false, false);
+       if (cat6->dst_off) {
+               dst.reg = (reg_t)(cat6->c.dst);
+               dstoff  = cat6->c.off;
        } else {
-               if (!nodst) {
-                       if (sd)
-                               printf("%c[", sd);
-                       print_reg_dst((reg_t)(cat6->b.dst), full, false);
-                       if (sd)
-                               printf("]");
-                       printf(", ");
-               }
-               if (ss)
-                       printf("%c[", ss);
-               print_reg_src((reg_t)(cat6->b.src1), true,
-                               false, false, cat6->b.src1_im, false, false, false);
-               if (ss)
+               dst.reg = (reg_t)(cat6->d.dst);
+       }
+
+       if (cat6->src_off) {
+               src1.reg = (reg_t)(cat6->a.src1);
+               src1.im  = cat6->a.src1_im;
+               src2.reg = (reg_t)(cat6->a.src2);
+               src2.im  = cat6->a.src2_im;
+               src1off  = cat6->a.off;
+       } else {
+               src1.reg = (reg_t)(cat6->b.src1);
+               src1.im  = cat6->b.src1_im;
+               src2.reg = (reg_t)(cat6->b.src2);
+               src2.im  = cat6->b.src2_im;
+       }
+
+       if (!nodst) {
+               if (sd)
+                       printf("%c[", sd);
+               /* note: dst might actually be a src (ie. address to store to) */
+               print_src(&dst);
+               if (dstoff)
+                       printf("%+d", dstoff);
+               if (sd)
                        printf("]");
                printf(", ");
-               print_reg_src((reg_t)(cat6->b.src2), full,
-                               false, false, cat6->b.src2_im, false, false, false);
        }
 
-       if (debug & PRINT_VERBOSE) {
-               switch (cat6->opc) {
-               case OPC_LDG:
-               case OPC_LDP:
-                       /* load instructions: */
-                       if (cat6->a.dummy2|cat6->a.dummy3)
-                               printf("\t{6: %x,%x}", cat6->a.dummy2, cat6->a.dummy3);
-                       break;
-               case OPC_STG:
-               case OPC_STP:
-               case OPC_STI:
-                       /* store instructions: */
-                       if (cat6->b.dummy2|cat6->b.dummy2)
-                               printf("\t{6: %x,%x}", cat6->b.dummy2, cat6->b.dummy3);
-                       if (cat6->b.ignore0)
-                               printf("\t{?? %x}", cat6->b.ignore0);
-                       break;
-               }
+       if (ss)
+               printf("%c[", ss);
+
+       /* can have a larger than normal immed, so hack: */
+       if (src1.im) {
+               printf("%u", src1.reg.dummy13);
+       } else {
+               print_src(&src1);
+       }
+
+       if (src1off)
+               printf("%+d", src1off);
+       if (ss)
+               printf("]");
+
+       switch (cat6->opc) {
+       case OPC_RESINFO:
+       case OPC_RESFMT:
+               break;
+       default:
+               printf(", ");
+               print_src(&src2);
+               break;
        }
 }
 
@@ -711,19 +794,19 @@ struct opc_info {
        OPC(6, OPC_LDLW,         ldlw),
        OPC(6, OPC_STLW,         stlw),
        OPC(6, OPC_RESFMT,       resfmt),
-       OPC(6, OPC_RESINFO,      resinf),
-       OPC(6, OPC_ATOMIC_ADD_L,     atomic.add.l),
-       OPC(6, OPC_ATOMIC_SUB_L,     atomic.sub.l),
-       OPC(6, OPC_ATOMIC_XCHG_L,    atomic.xchg.l),
-       OPC(6, OPC_ATOMIC_INC_L,     atomic.inc.l),
-       OPC(6, OPC_ATOMIC_DEC_L,     atomic.dec.l),
-       OPC(6, OPC_ATOMIC_CMPXCHG_L, atomic.cmpxchg.l),
-       OPC(6, OPC_ATOMIC_MIN_L,     atomic.min.l),
-       OPC(6, OPC_ATOMIC_MAX_L,     atomic.max.l),
-       OPC(6, OPC_ATOMIC_AND_L,     atomic.and.l),
-       OPC(6, OPC_ATOMIC_OR_L,      atomic.or.l),
-       OPC(6, OPC_ATOMIC_XOR_L,     atomic.xor.l),
-       OPC(6, OPC_LDGB_TYPED_4D,    ldgb.typed.4d),
+       OPC(6, OPC_RESINFO,      resinfo),
+       OPC(6, OPC_ATOMIC_ADD,     atomic.add),
+       OPC(6, OPC_ATOMIC_SUB,     atomic.sub),
+       OPC(6, OPC_ATOMIC_XCHG,    atomic.xchg),
+       OPC(6, OPC_ATOMIC_INC,     atomic.inc),
+       OPC(6, OPC_ATOMIC_DEC,     atomic.dec),
+       OPC(6, OPC_ATOMIC_CMPXCHG, atomic.cmpxchg),
+       OPC(6, OPC_ATOMIC_MIN,     atomic.min),
+       OPC(6, OPC_ATOMIC_MAX,     atomic.max),
+       OPC(6, OPC_ATOMIC_AND,     atomic.and),
+       OPC(6, OPC_ATOMIC_OR,      atomic.or),
+       OPC(6, OPC_ATOMIC_XOR,     atomic.xor),
+       OPC(6, OPC_LDGB_TYPED_4D,    ldgb.typed.3d),
        OPC(6, OPC_STGB_4D_4,    stgb.4d.4),
        OPC(6, OPC_STIB,         stib),
        OPC(6, OPC_LDC_4,        ldc.4),
index efb07ea479e0c8043199684c3644c70eb7d80ccb..c3fb68d511c83f0f9f16821d383218d440318f8a 100644 (file)
@@ -173,17 +173,17 @@ typedef enum {
        OPC_STLW = 11,
        OPC_RESFMT = 14,
        OPC_RESINFO = 15,
-       OPC_ATOMIC_ADD_L = 16,
-       OPC_ATOMIC_SUB_L = 17,
-       OPC_ATOMIC_XCHG_L = 18,
-       OPC_ATOMIC_INC_L = 19,
-       OPC_ATOMIC_DEC_L = 20,
-       OPC_ATOMIC_CMPXCHG_L = 21,
-       OPC_ATOMIC_MIN_L = 22,
-       OPC_ATOMIC_MAX_L = 23,
-       OPC_ATOMIC_AND_L = 24,
-       OPC_ATOMIC_OR_L = 25,
-       OPC_ATOMIC_XOR_L = 26,
+       OPC_ATOMIC_ADD = 16,
+       OPC_ATOMIC_SUB = 17,
+       OPC_ATOMIC_XCHG = 18,
+       OPC_ATOMIC_INC = 19,
+       OPC_ATOMIC_DEC = 20,
+       OPC_ATOMIC_CMPXCHG = 21,
+       OPC_ATOMIC_MIN = 22,
+       OPC_ATOMIC_MAX = 23,
+       OPC_ATOMIC_AND = 24,
+       OPC_ATOMIC_OR = 25,
+       OPC_ATOMIC_XOR = 26,
        OPC_LDGB_TYPED_4D = 27,
        OPC_STGB_4D_4 = 28,
        OPC_STIB = 29,
@@ -575,7 +575,7 @@ typedef struct PACKED {
        uint32_t opc_cat  : 3;
 } instr_cat5_t;
 
-/* [src1 + off], src2: */
+/* dword0 encoding for src_off: [src1 + off], src2: */
 typedef struct PACKED {
        /* dword0: */
        uint32_t mustbe1  : 1;
@@ -586,37 +586,50 @@ typedef struct PACKED {
        uint32_t src2     : 8;
 
        /* dword1: */
-       uint32_t dst      : 8;
-       uint32_t dummy2   : 9;
-       uint32_t type     : 3;
-       uint32_t dummy3   : 2;
-       uint32_t opc      : 5;
-       uint32_t jmp_tgt  : 1;
-       uint32_t sync     : 1;
-       uint32_t opc_cat  : 3;
+       uint32_t dword1;
 } instr_cat6a_t;
 
-/* [src1], src2: */
+/* dword0 encoding for !src_off: [src1], src2 */
 typedef struct PACKED {
        /* dword0: */
        uint32_t mustbe0  : 1;
-       uint32_t src1     : 8;
-       uint32_t ignore0  : 13;
+       uint32_t src1     : 13;
+       uint32_t ignore0  : 8;
        uint32_t src1_im  : 1;
        uint32_t src2_im  : 1;
        uint32_t src2     : 8;
 
        /* dword1: */
-       uint32_t dst      : 8;
-       uint32_t dummy2   : 9;
-       uint32_t type     : 3;
-       uint32_t dummy3   : 2;
-       uint32_t opc      : 5;
-       uint32_t jmp_tgt  : 1;
-       uint32_t sync     : 1;
-       uint32_t opc_cat  : 3;
+       uint32_t dword1;
 } instr_cat6b_t;
 
+/* dword1 encoding for dst_off: */
+typedef struct PACKED {
+       /* dword0: */
+       uint32_t dword0;
+
+       /* note: there is some weird stuff going on where sometimes
+        * cat6->a.off is involved.. but that seems like a bug in
+        * the blob, since it is used even if !cat6->src_off
+        * It would make sense for there to be some more bits to
+        * bring us to 11 bits worth of offset, but not sure..
+        */
+       int32_t off       : 8;
+       uint32_t mustbe1  : 1;
+       uint32_t dst      : 8;
+       uint32_t pad1     : 15;
+} instr_cat6c_t;
+
+/* dword1 encoding for !dst_off: */
+typedef struct PACKED {
+       /* dword0: */
+       uint32_t dword0;
+
+       uint32_t dst      : 8;
+       uint32_t mustbe0  : 1;
+       uint32_t pad0     : 23;
+} instr_cat6d_t;
+
 /* I think some of the other cat6 instructions use additional
  * sub-encodings..
  */
@@ -624,16 +637,20 @@ typedef struct PACKED {
 typedef union PACKED {
        instr_cat6a_t a;
        instr_cat6b_t b;
+       instr_cat6c_t c;
+       instr_cat6d_t d;
        struct PACKED {
                /* dword0: */
-               uint32_t has_off  : 1;
+               uint32_t src_off  : 1;
                uint32_t pad1     : 31;
 
                /* dword1: */
-               uint32_t dst      : 8;
-               uint32_t dummy2   : 9;
+               uint32_t pad2     : 8;
+               uint32_t dst_off  : 1;
+               uint32_t pad3     : 8;
                uint32_t type     : 3;
-               uint32_t dummy3   : 2;
+               uint32_t g        : 1;  /* or in some cases it means dst immed */
+               uint32_t pad4     : 1;
                uint32_t opc      : 5;
                uint32_t jmp_tgt  : 1;
                uint32_t sync     : 1;
index a0cb74498eceeac13ec9db442dd8f74a25d0e75d..6d19a29275b86004e4e504f790f2c7d2bbbb931a 100644 (file)
@@ -506,25 +506,28 @@ static int emit_cat6(struct ir3_instruction *instr, void *ptr,
 
        iassert(instr->regs_count >= 2);
 
-       if (instr->cat6.offset || instr->opc == OPC_LDG) {
+       /* TODO we need a more comprehensive list about which instructions
+        * can be encoded which way.  Or possibly use IR3_INSTR_0 flag to
+        * indicate to use the src_off encoding even if offset is zero
+        * (but then what to do about dst_off?)
+        */
+       if (instr->cat6.src_offset || (instr->opc == OPC_LDG)) {
                instr_cat6a_t *cat6a = ptr;
 
-               cat6->has_off = true;
+               cat6->src_off = true;
 
-               cat6a->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
                cat6a->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED);
                cat6a->src1_im = !!(src1->flags & IR3_REG_IMMED);
                if (src2) {
                        cat6a->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
                        cat6a->src2_im = !!(src2->flags & IR3_REG_IMMED);
                }
-               cat6a->off = instr->cat6.offset;
+               cat6a->off = instr->cat6.src_offset;
        } else {
                instr_cat6b_t *cat6b = ptr;
 
-               cat6->has_off = false;
+               cat6->src_off = false;
 
-               cat6b->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
                cat6b->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED);
                cat6b->src1_im = !!(src1->flags & IR3_REG_IMMED);
                if (src2) {
@@ -533,10 +536,22 @@ static int emit_cat6(struct ir3_instruction *instr, void *ptr,
                }
        }
 
+       if (instr->cat6.dst_offset || (instr->opc == OPC_STG)) {
+               instr_cat6c_t *cat6c = ptr;
+               cat6->dst_off = true;
+               cat6c->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
+               cat6c->off = instr->cat6.dst_offset;
+       } else {
+               instr_cat6d_t *cat6d = ptr;
+               cat6->dst_off = false;
+               cat6d->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
+       }
+
        cat6->type     = instr->cat6.type;
        cat6->opc      = instr->opc;
        cat6->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
        cat6->sync     = !!(instr->flags & IR3_INSTR_SY);
+       cat6->g        = !!(instr->flags & IR3_INSTR_G);
        cat6->opc_cat  = 6;
 
        return 0;
index f11d8eda5f255dbbe173bd1ddd5b04157da4162e..c3b61a0fe0111d1f1755bbdb661edfeb5c772d96 100644 (file)
@@ -172,6 +172,7 @@ struct ir3_instruction {
                IR3_INSTR_P     = 0x080,
                IR3_INSTR_S     = 0x100,
                IR3_INSTR_S2EN  = 0x200,
+               IR3_INSTR_G     = 0x400,
                /* meta-flags, for intermediate stages of IR, ie.
                 * before register assignment is done:
                 */
@@ -209,7 +210,8 @@ struct ir3_instruction {
                } cat5;
                struct {
                        type_t type;
-                       int offset;
+                       int src_offset;
+                       int dst_offset;
                        int iim_val;
                } cat6;
                /* for meta-instructions, just used to hold extra data
index 22885ff85f3539ba705fb66ee4ba906a37b8e3a4..bdba3aae36f34b14bcb40a235d18a3ec74be1958 100644 (file)
@@ -1215,7 +1215,7 @@ emit_intrinsic_load_ubo(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
                struct ir3_instruction *load =
                                ir3_LDG(b, addr, 0, create_immed(b, 1), 0);
                load->cat6.type = TYPE_U32;
-               load->cat6.offset = off + i * 4;    /* byte offset */
+               load->cat6.src_offset = off + i * 4;     /* byte offset */
                dst[i] = load;
        }
 }