ir3: Add bindless instruction encoding
authorConnor Abbott <cwabbott0@gmail.com>
Fri, 6 Mar 2020 17:06:06 +0000 (18:06 +0100)
committerMarge Bot <eric+marge@anholt.net>
Thu, 9 Apr 2020 15:56:55 +0000 (15:56 +0000)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>

src/freedreno/ir3/disasm-a3xx.c
src/freedreno/ir3/instr-a3xx.h
src/freedreno/ir3/ir3.c

index cd45e912703e0326add99ab36405065e842d0282..d957e77f853ae49a3e475039ceac0584e0fa01aa 100644 (file)
@@ -117,7 +117,10 @@ static void print_reg(struct disasm_ctx *ctx, reg_t reg, bool full, bool r,
                else
                        fprintf(ctx->out, "%s%c<a0.x>", full ? "" : "h", type);
        } else if ((reg.num == REG_A0) && !c) {
-               fprintf(ctx->out, "a0.%c", component[reg.comp]);
+               /* This matches libllvm output, the second (scalar) address register
+                * seems to be called a1.x instead of a0.y.
+                */
+               fprintf(ctx->out, "a%d.x", reg.comp);
        } else if ((reg.num == REG_P0) && !c) {
                fprintf(ctx->out, "p0.%c", component[reg.comp]);
        } else {
@@ -448,15 +451,70 @@ static void print_instr_cat5(struct disasm_ctx *ctx, instr_t *instr)
                        [opc_op(OPC_RGETPOS)]  = { true,  false, false, false, },
                        [opc_op(OPC_RGETINFO)] = { false, false, false, false, },
        };
+
+       static const struct {
+               bool indirect;
+               bool bindless;
+               bool use_a1;
+               bool uniform;
+       } desc_features[8] = {
+               [CAT5_NONUNIFORM] = { .indirect = true, },
+               [CAT5_UNIFORM] = { .indirect = true, .uniform = true, },
+               [CAT5_BINDLESS_IMM] = { .bindless = true, },
+               [CAT5_BINDLESS_UNIFORM] = {
+                       .bindless = true,
+                       .indirect = true,
+                       .uniform = true,
+               },
+               [CAT5_BINDLESS_NONUNIFORM] = {
+                       .bindless = true,
+                       .indirect = true,
+               },
+               [CAT5_BINDLESS_A1_IMM] = {
+                       .bindless = true,
+                       .use_a1 = true,
+               },
+               [CAT5_BINDLESS_A1_UNIFORM] = {
+                       .bindless = true,
+                       .indirect = true,
+                       .uniform = true,
+                       .use_a1 = true,
+               },
+               [CAT5_BINDLESS_A1_NONUNIFORM] = {
+                       .bindless = true,
+                       .indirect = true,
+                       .use_a1 = true,
+               },
+       };
+
        instr_cat5_t *cat5 = &instr->cat5;
        int i;
 
+       bool desc_indirect =
+               cat5->is_s2en_bindless &&
+               desc_features[cat5->s2en_bindless.desc_mode].indirect;
+       bool bindless =
+               cat5->is_s2en_bindless &&
+               desc_features[cat5->s2en_bindless.desc_mode].bindless;
+       bool use_a1 =
+               cat5->is_s2en_bindless &&
+               desc_features[cat5->s2en_bindless.desc_mode].use_a1;
+       bool uniform =
+               cat5->is_s2en_bindless &&
+               desc_features[cat5->s2en_bindless.desc_mode].uniform;
+
        if (cat5->is_3d)   fprintf(ctx->out, ".3d");
        if (cat5->is_a)    fprintf(ctx->out, ".a");
        if (cat5->is_o)    fprintf(ctx->out, ".o");
        if (cat5->is_p)    fprintf(ctx->out, ".p");
        if (cat5->is_s)    fprintf(ctx->out, ".s");
-       if (cat5->is_s2en) fprintf(ctx->out, ".s2en");
+       if (desc_indirect) fprintf(ctx->out, ".s2en");
+       if (uniform)       fprintf(ctx->out, ".uniform");
+
+       if (bindless) {
+               unsigned base = (cat5->s2en_bindless.base_hi << 1) | cat5->base_lo;
+               fprintf(ctx->out, ".base%d", base);
+       }
 
        fprintf(ctx->out, " ");
 
@@ -483,34 +541,47 @@ static void print_instr_cat5(struct disasm_ctx *ctx, instr_t *instr)
                                false, false, false);
        }
 
-       if (cat5->is_s2en) {
-               if (cat5->is_o || info[cat5->opc].src2) {
-                       fprintf(ctx->out, ", ");
-                       print_reg_src(ctx, (reg_t)(cat5->s2en.src2), cat5->full,
-                                       false, false, false, false, false, false);
-               }
+       if (cat5->is_o || info[cat5->opc].src2) {
                fprintf(ctx->out, ", ");
-               print_reg_src(ctx, (reg_t)(cat5->s2en.src3), false, false, false, false,
-                               false, false, false);
-       } else {
-               if (cat5->is_o || info[cat5->opc].src2) {
-                       fprintf(ctx->out, ", ");
-                       print_reg_src(ctx, (reg_t)(cat5->norm.src2), cat5->full,
-                                       false, false, false, false, false, false);
+               print_reg_src(ctx, (reg_t)(cat5->src2), cat5->full,
+                               false, false, false, false, false, false);
+       }
+       if (cat5->is_s2en_bindless) {
+               if (!desc_indirect) {
+                       if (info[cat5->opc].samp) {
+                               if (use_a1)
+                                       fprintf(ctx->out, ", s#%d", cat5->s2en_bindless.src3);
+                               else
+                                       fprintf(ctx->out, ", s#%d", cat5->s2en_bindless.src3 & 0xf);
+                       }
+
+                       if (info[cat5->opc].tex && !use_a1) {
+                               fprintf(ctx->out, ", t#%d", cat5->s2en_bindless.src3 >> 4);
+                       }
                }
+       } else {
                if (info[cat5->opc].samp)
                        fprintf(ctx->out, ", s#%d", cat5->norm.samp);
                if (info[cat5->opc].tex)
                        fprintf(ctx->out, ", t#%d", cat5->norm.tex);
        }
 
+       if (desc_indirect) {
+               fprintf(ctx->out, ", ");
+               print_reg_src(ctx, (reg_t)(cat5->s2en_bindless.src3), bindless,
+                                         false, false, false, false, false, false);
+       }
+
+       if (use_a1)
+               fprintf(ctx->out, ", a1.x");
+
        if (debug & PRINT_VERBOSE) {
-               if (cat5->is_s2en) {
-                       if ((debug & PRINT_VERBOSE) && (cat5->s2en.dummy1|cat5->s2en.dummy2|cat5->dummy2))
-                               fprintf(ctx->out, "\t{5: %x,%x,%x}", cat5->s2en.dummy1, cat5->s2en.dummy2, cat5->dummy2);
+               if (cat5->is_s2en_bindless) {
+                       if ((debug & PRINT_VERBOSE) && cat5->s2en_bindless.dummy1)
+                               fprintf(ctx->out, "\t{5: %x}", cat5->s2en_bindless.dummy1);
                } else {
-                       if ((debug & PRINT_VERBOSE) && (cat5->norm.dummy1|cat5->dummy2))
-                               fprintf(ctx->out, "\t{5: %x,%x}", cat5->norm.dummy1, cat5->dummy2);
+                       if ((debug & PRINT_VERBOSE) && cat5->norm.dummy1)
+                               fprintf(ctx->out, "\t{5: %x}", cat5->norm.dummy1);
                }
        }
 }
@@ -833,46 +904,66 @@ static void print_instr_cat6_a3xx(struct disasm_ctx *ctx, instr_t *instr)
 static void print_instr_cat6_a6xx(struct disasm_ctx *ctx, instr_t *instr)
 {
        instr_cat6_a6xx_t *cat6 = &instr->cat6_a6xx;
-       struct reginfo src1, src2;
-       bool has_dest = _OPC(6, cat6->opc) == OPC_LDIB;
-       char ss = 0;
+       struct reginfo src1, src2, ssbo;
+       bool uses_type = _OPC(6, cat6->opc) != OPC_LDC;
 
-       memset(&src1, 0, sizeof(src1));
-       memset(&src2, 0, sizeof(src2));
+       static const struct {
+               bool indirect;
+               bool bindless;
+               bool uniform;
+       } desc_features[8] = {
+               [CAT6_IMM] = { false },
+               [CAT6_BINDLESS_IMM] = { .bindless = true, },
+               [CAT6_BINDLESS_UNIFORM] = {
+                       .bindless = true,
+                       .indirect = true,
+                       .uniform = true,
+               },
+               [CAT6_BINDLESS_NONUNIFORM] = {
+                       .bindless = true,
+                       .indirect = true,
+               },
+       };
 
-       fprintf(ctx->out, ".%s", cat6->typed ? "typed" : "untyped");
-       fprintf(ctx->out, ".%dd", cat6->d + 1);
-       fprintf(ctx->out, ".%s", type[cat6->type]);
-       fprintf(ctx->out, ".%u ", cat6->type_size + 1);
+       bool indirect_ssbo = desc_features[cat6->desc_mode].indirect;
+       bool bindless = desc_features[cat6->desc_mode].bindless;
+       bool uniform = desc_features[cat6->desc_mode].uniform;
 
-       if (has_dest) {
-               src2.reg = (reg_t)(cat6->src2);
-               src2.full = true; // XXX
-               print_src(ctx, &src2);
 
-               fprintf(ctx->out, ", ");
+       memset(&src1, 0, sizeof(src1));
+       memset(&src2, 0, sizeof(src2));
+       memset(&ssbo, 0, sizeof(ssbo));
+
+       if (uses_type) {
+               fprintf(ctx->out, ".%s", cat6->typed ? "typed" : "untyped");
+               fprintf(ctx->out, ".%dd", cat6->d + 1);
+               fprintf(ctx->out, ".%s", type[cat6->type]);
        }
+       fprintf(ctx->out, ".%u", cat6->type_size + 1);
 
-       /* NOTE: blob seems to use old encoding for ldl/stl (local memory) */
-       ss = 'g';
+       if (bindless)
+               fprintf(ctx->out, ".base%d", cat6->base);
+       if (uniform)
+               fprintf(ctx->out, ".uniform");
+       fprintf(ctx->out, " ");
+
+       src2.reg = (reg_t)(cat6->src2);
+       src2.full = true; // XXX
+       print_src(ctx, &src2);
+       fprintf(ctx->out, ", ");
 
-       fprintf(ctx->out, "%c[%u", ss, cat6->ssbo);
-       fprintf(ctx->out, "] + ");
        src1.reg = (reg_t)(cat6->src1);
        src1.full = true; // XXX
        print_src(ctx, &src1);
-
-       if (!has_dest) {
-               fprintf(ctx->out, ", ");
-
-               src2.reg = (reg_t)(cat6->src2);
-               src2.full = true; // XXX
-               print_src(ctx, &src2);
-       }
+       fprintf(ctx->out, ", ");
+       ssbo.reg = (reg_t)(cat6->ssbo);
+       ssbo.im = !indirect_ssbo;
+       ssbo.full = true;
+       print_src(ctx, &ssbo);
 
        if (debug & PRINT_VERBOSE) {
-               fprintf(ctx->out, " (pad1=%x, pad2=%x, pad3=%x, pad4=%x)", cat6->pad1,
-                               cat6->pad2, cat6->pad3, cat6->pad4);
+               fprintf(ctx->out, " (pad1=%x, pad2=%x, pad3=%x, pad4=%x, pad5=%x)",
+                               cat6->pad1, cat6->pad2, cat6->pad3, cat6->pad4, cat6->pad5);
        }
 }
 
index b3649f24bdf43f034aaafc8fccd793ff48285b95..f36c73b88e243bf806613c7a4f445338a3538abb 100644 (file)
@@ -567,6 +567,57 @@ typedef struct PACKED {
        uint32_t opc_cat  : 3;
 } instr_cat4_t;
 
+/* With is_bindless_s2en = 1, this determines whether bindless is enabled and
+ * if so, how to get the (base, index) pair for both sampler and texture.
+ * There is a single base embedded in the instruction, which is always used
+ * for the texture.
+ */
+typedef enum {
+       /* Use traditional GL binding model, get texture and sampler index
+        * from src3 which is not presumed to be uniform. This is
+        * backwards-compatible with earlier generations, where this field was
+        * always 0 and nonuniform-indexed sampling always worked.
+        */
+       CAT5_NONUNIFORM = 0,
+
+       /* The sampler base comes from the low 3 bits of a1.x, and the sampler
+        * and texture index come from src3 which is presumed to be uniform.
+        */
+       CAT5_BINDLESS_A1_UNIFORM = 1,
+
+       /* The texture and sampler share the same base, and the sampler and
+        * texture index come from src3 which is *not* presumed to be uniform.
+        */
+       CAT5_BINDLESS_NONUNIFORM = 2,
+
+       /* The sampler base comes from the low 3 bits of a1.x, and the sampler
+        * and texture index come from src3 which is *not* presumed to be
+        * uniform.
+        */
+       CAT5_BINDLESS_A1_NONUNIFORM = 3,
+
+       /* Use traditional GL binding model, get texture and sampler index
+        * from src3 which is presumed to be uniform.
+        */
+       CAT5_UNIFORM = 4,
+
+       /* The texture and sampler share the same base, and the sampler and
+        * texture index come from src3 which is presumed to be uniform.
+        */
+       CAT5_BINDLESS_UNIFORM = 5,
+
+       /* The texture and sampler share the same base, get sampler index from low
+        * 4 bits of src3 and texture index from high 4 bits.
+        */
+       CAT5_BINDLESS_IMM = 6,
+
+       /* The sampler base comes from the low 3 bits of a1.x, and the texture
+        * index comes from the next 8 bits of a1.x. The sampler index is an
+        * immediate in src3.
+        */
+       CAT5_BINDLESS_A1_IMM = 7,
+} cat5_desc_mode_t;
+
 typedef struct PACKED {
        /* dword0: */
        union PACKED {
@@ -581,39 +632,41 @@ typedef struct PACKED {
                } norm;
                /* s2en case: */
                struct PACKED {
-                       uint32_t full     : 1;   /* not half */
-                       uint32_t src1     : 8;
-                       uint32_t src2     : 11;
-                       uint32_t dummy1   : 1;
-                       uint32_t src3     : 8;
-                       uint32_t dummy2   : 3;
-               } s2en;
+                       uint32_t full         : 1;   /* not half */
+                       uint32_t src1         : 8;
+                       uint32_t src2         : 8;
+                       uint32_t dummy1       : 2;
+                       uint32_t base_hi      : 2;
+                       uint32_t src3         : 8;
+                       uint32_t desc_mode    : 3;
+               } s2en_bindless;
                /* same in either case: */
                // XXX I think, confirm this
                struct PACKED {
                        uint32_t full     : 1;   /* not half */
                        uint32_t src1     : 8;
-                       uint32_t pad      : 23;
+                       uint32_t src2     : 8;
+                       uint32_t pad      : 15;
                };
        };
 
        /* dword1: */
-       uint32_t dst      : 8;
-       uint32_t wrmask   : 4;   /* write-mask */
-       uint32_t type     : 3;
-       uint32_t dummy2   : 1;   /* seems to be ignored */
-       uint32_t is_3d    : 1;
-
-       uint32_t is_a     : 1;
-       uint32_t is_s     : 1;
-       uint32_t is_s2en  : 1;
-       uint32_t is_o     : 1;
-       uint32_t is_p     : 1;
-
-       uint32_t opc      : 5;
-       uint32_t jmp_tgt  : 1;
-       uint32_t sync     : 1;
-       uint32_t opc_cat  : 3;
+       uint32_t dst              : 8;
+       uint32_t wrmask           : 4;   /* write-mask */
+       uint32_t type             : 3;
+       uint32_t base_lo          : 1;   /* used with bindless */
+       uint32_t is_3d            : 1;
+
+       uint32_t is_a             : 1;
+       uint32_t is_s             : 1;
+       uint32_t is_s2en_bindless : 1;
+       uint32_t is_o             : 1;
+       uint32_t is_p             : 1;
+
+       uint32_t opc              : 5;
+       uint32_t jmp_tgt          : 1;
+       uint32_t sync             : 1;
+       uint32_t opc_cat          : 3;
 } instr_cat5_t;
 
 /* dword0 encoding for src_off: [src1 + off], src2: */
@@ -748,43 +801,72 @@ typedef union PACKED {
        };
 } instr_cat6_t;
 
+/* Similar to cat5_desc_mode_t, describes how the descriptor is loaded.
+ */
+typedef enum {
+       /* Use old GL binding model with an immediate index.
+        * TODO: find CAT6_UNIFORM and CAT6_NONUNIFORM
+        */
+       CAT6_IMM = 0,
+
+       /* Use the bindless model, with an immediate index.
+        */
+       CAT6_BINDLESS_IMM = 4,
+
+       /* Use the bindless model, with a uniform register index.
+        */
+       CAT6_BINDLESS_UNIFORM = 5,
+
+       /* Use the bindless model, with a register index that isn't guaranteed
+        * to be uniform. This presumably checks if the indices are equal and
+        * splits up the load/store, because it works the way you would
+        * expect.
+        */
+       CAT6_BINDLESS_NONUNIFORM = 6,
+} cat6_desc_mode_t;
+
 /**
  * For atomic ops (which return a value):
  *
- *    pad1=1, pad2=c, pad3=0, pad4=3
+ *    pad1=1, pad3=c, pad5=3
  *    src1    - vecN offset/coords
  *    src2.x  - is actually dest register
  *    src2.y  - is 'data' except for cmpxchg where src2.y is 'compare'
  *              and src2.z is 'data'
  *
  * For stib (which does not return a value):
- *    pad1=0, pad2=c, pad3=0, pad4=2
+ *    pad1=0, pad3=c, pad5=2
  *    src1    - vecN offset/coords
  *    src2    - value to store
  *
  * For ldib:
- *    pad1=1, pad2=c, pad3=0, pad4=2
+ *    pad1=1, pad3=c, pad5=2
  *    src1    - vecN offset/coords
  *
  * for ldc (load from UBO using descriptor):
- *    pad1=0, pad2=8, pad3=0, pad4=2
+ *    pad1=0, pad3=8, pad5=2
+ *
+ * pad2 and pad5 are only observed to be 0.
  */
 typedef struct PACKED {
        /* dword0: */
-       uint32_t pad1     : 9;
+       uint32_t pad1     : 1;
+       uint32_t base     : 3;
+       uint32_t pad2     : 2;
+       uint32_t desc_mode : 3;
        uint32_t d        : 2;
        uint32_t typed    : 1;
        uint32_t type_size : 2;
        uint32_t opc      : 5;
-       uint32_t pad2     : 5;
+       uint32_t pad3     : 5;
        uint32_t src1     : 8;  /* coordinate/offset */
 
        /* dword1: */
        uint32_t src2     : 8;  /* or the dst for load instructions */
-       uint32_t pad3     : 1;  //mustbe0 ?? or zero means imm vs reg for ssbo??
+       uint32_t pad4     : 1;  //mustbe0 ??
        uint32_t ssbo     : 8;  /* ssbo/image binding point */
        uint32_t type     : 3;
-       uint32_t pad4     : 7;
+       uint32_t pad5     : 7;
        uint32_t jmp_tgt  : 1;
        uint32_t sync     : 1;
        uint32_t opc_cat  : 3;
@@ -869,7 +951,7 @@ static inline bool is_cat6_legacy(instr_t *instr, unsigned gpu_id)
         * cmdstream traces I have indicates that the pad bit is zero
         * in all cases.  So we can use this to detect new encoding:
         */
-       if ((cat6->pad2 & 0x8) && (cat6->pad4 & 0x2)) {
+       if ((cat6->pad3 & 0x8) && (cat6->pad5 & 0x2)) {
                assert(gpu_id >= 600);
                assert(instr->cat6.opc == 0);
                return false;
index 4ac50aec0a3e86219ab50c69af3974c3723b7144..7bdf8a39ba88f3b238b5205e0203bbbd22d16e43 100644 (file)
@@ -482,20 +482,23 @@ static int emit_cat5(struct ir3_instruction *instr, void *ptr,
                cat5->src1 = reg(src1, info, instr->repeat, IR3_REG_HALF);
        }
 
+       if (src2) {
+               iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF));
+               cat5->src2 = reg(src2, info, instr->repeat, IR3_REG_HALF);
+       }
+
        if (instr->flags & IR3_INSTR_S2EN) {
                struct ir3_register *samp_tex = instr->regs[1];
-               if (src2) {
-                       iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF));
-                       cat5->s2en.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF);
-               }
                iassert(samp_tex->flags & IR3_REG_HALF);
-               cat5->s2en.src3 = reg(samp_tex, info, instr->repeat, IR3_REG_HALF);
+               cat5->s2en_bindless.src3 = reg(samp_tex, info, instr->repeat, IR3_REG_HALF);
+               /* TODO: This should probably be CAT5_UNIFORM, at least on a6xx, as
+                * this is what the blob does and it is presumably faster, but first
+                * we should confirm it is actually nonuniform and figure out when the
+                * whole descriptor mode mechanism was introduced.
+                */
+               cat5->s2en_bindless.desc_mode = CAT5_NONUNIFORM;
                iassert(!(instr->cat5.samp | instr->cat5.tex));
        } else {
-               if (src2) {
-                       iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF));
-                       cat5->norm.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF);
-               }
                cat5->norm.samp = instr->cat5.samp;
                cat5->norm.tex  = instr->cat5.tex;
        }
@@ -506,7 +509,7 @@ static int emit_cat5(struct ir3_instruction *instr, void *ptr,
        cat5->is_3d    = !!(instr->flags & IR3_INSTR_3D);
        cat5->is_a     = !!(instr->flags & IR3_INSTR_A);
        cat5->is_s     = !!(instr->flags & IR3_INSTR_S);
-       cat5->is_s2en  = !!(instr->flags & IR3_INSTR_S2EN);
+       cat5->is_s2en_bindless = !!(instr->flags & IR3_INSTR_S2EN);
        cat5->is_o     = !!(instr->flags & IR3_INSTR_O);
        cat5->is_p     = !!(instr->flags & IR3_INSTR_P);
        cat5->opc      = instr->opc;
@@ -564,31 +567,29 @@ static int emit_cat6_a6xx(struct ir3_instruction *instr, void *ptr,
        case OPC_ATOMIC_OR:
        case OPC_ATOMIC_XOR:
                cat6->pad1 = 0x1;
-               cat6->pad2 = 0xc;
-               cat6->pad3 = 0x0;
-               cat6->pad4 = 0x3;
+               cat6->pad3 = 0xc;
+               cat6->pad5 = 0x3;
                break;
        case OPC_STIB:
                cat6->pad1 = 0x0;
-               cat6->pad2 = 0xc;
-               cat6->pad3 = 0x0;
-               cat6->pad4 = 0x2;
+               cat6->pad3 = 0xc;
+               cat6->pad5 = 0x2;
                break;
        case OPC_LDIB:
                cat6->pad1 = 0x1;
-               cat6->pad2 = 0xc;
-               cat6->pad3 = 0x0;
-               cat6->pad4 = 0x2;
+               cat6->pad3 = 0xc;
+               cat6->pad5 = 0x2;
                break;
        case OPC_LDC:
                cat6->pad1 = 0x0;
-               cat6->pad2 = 0x8;
-               cat6->pad3 = 0x0;
-               cat6->pad4 = 0x2;
+               cat6->pad3 = 0x8;
+               cat6->pad5 = 0x2;
                break;
        default:
                iassert(0);
        }
+       cat6->pad2 = 0x0;
+       cat6->pad4 = 0x0;
 
        return 0;
 }