From: Rob Clark Date: Thu, 23 Jul 2015 19:31:13 +0000 (-0400) Subject: freedreno/ir3: updated cat6 encoding X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=bc5e2bec303acd7fd962996bf369be5ce0e15cd2;p=mesa.git freedreno/ir3: updated cat6 encoding Sync updated cat6 encoding from freedreno.git, needed to properly encode store instructions. Signed-off-by: Rob Clark --- diff --git a/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c b/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c index 48ae7c71b9f..83ed5ffdca0 100644 --- a/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c +++ b/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c @@ -103,7 +103,7 @@ static void print_reg(reg_t reg, bool full, bool r, bool c, bool im, } else if ((reg.num == REG_P0) && !c) { printf("p0.%c", component[reg.comp]); } else { - printf("%s%c%d.%c", full ? "" : "h", type, reg.num, component[reg.comp]); + printf("%s%c%d.%c", full ? "" : "h", type, reg.num & 0x3f, component[reg.comp]); } } @@ -122,6 +122,32 @@ static void print_reg_src(reg_t reg, bool full, bool r, bool c, bool im, print_reg(reg, full, r, c, im, neg, abs, addr_rel); } +/* TODO switch to using reginfo struct everywhere, since more readable + * than passing a bunch of bools to print_reg_src + */ + +struct reginfo { + reg_t reg; + bool full; + bool r; + bool c; + bool im; + bool neg; + bool abs; + bool addr_rel; +}; + +static void print_src(struct reginfo *info) +{ + print_reg_src(info->reg, info->full, info->r, info->c, info->im, + info->neg, info->abs, info->addr_rel); +} + +//static void print_dst(struct reginfo *info) +//{ +// print_reg_dst(info->reg, info->full, info->addr_rel); +//} + static void print_instr_cat0(instr_t *instr) { instr_cat0_t *cat0 = &instr->cat0; @@ -454,10 +480,70 @@ static void print_instr_cat6(instr_t *instr) { instr_cat6_t *cat6 = &instr->cat6; char sd = 0, ss = 0; /* dst/src address space */ - bool full = type_size(cat6->type) == 32; bool nodst = false; + struct reginfo dst, src1, src2; + int src1off = 0, dstoff = 0; - printf(".%s ", type[cat6->type]); + memset(&dst, 0, sizeof(dst)); + memset(&src1, 0, sizeof(src1)); + memset(&src2, 0, sizeof(src2)); + + switch (cat6->opc) { + case OPC_RESINFO: + case OPC_RESFMT: + dst.full = type_size(cat6->type) == 32; + src1.full = type_size(cat6->type) == 32; + src2.full = type_size(cat6->type) == 32; + break; + case OPC_L2G: + case OPC_G2L: + dst.full = true; + src1.full = true; + src2.full = true; + break; + case OPC_STG: + case OPC_STL: + case OPC_STP: + case OPC_STI: + case OPC_STLW: + case OPC_STGB_4D_4: + case OPC_STIB: + dst.full = true; + src1.full = type_size(cat6->type) == 32; + src2.full = type_size(cat6->type) == 32; + break; + default: + dst.full = type_size(cat6->type) == 32; + src1.full = true; + src2.full = true; + break; + } + + switch (cat6->opc) { + case OPC_PREFETCH: + case OPC_RESINFO: + break; + case OPC_ATOMIC_ADD: + case OPC_ATOMIC_SUB: + case OPC_ATOMIC_XCHG: + case OPC_ATOMIC_INC: + case OPC_ATOMIC_DEC: + case OPC_ATOMIC_CMPXCHG: + case OPC_ATOMIC_MIN: + case OPC_ATOMIC_MAX: + case OPC_ATOMIC_AND: + case OPC_ATOMIC_OR: + case OPC_ATOMIC_XOR: + ss = cat6->g ? 'g' : 'l'; + printf(".%c", ss); + printf(".%s", type[cat6->type]); + break; + default: + dst.im = cat6->g && !cat6->dst_off; + printf(".%s", type[cat6->type]); + break; + } + printf(" "); switch (cat6->opc) { case OPC_STG: @@ -499,68 +585,65 @@ static void print_instr_cat6(instr_t *instr) break; case OPC_STI: - full = false; // XXX or inverts?? + dst.full = false; // XXX or inverts?? break; } - if (cat6->has_off) { - if (!nodst) { - if (sd) - printf("%c[", sd); - print_reg_dst((reg_t)(cat6->a.dst), full, false); - if (sd) - printf("]"); - printf(", "); - } - if (ss) - printf("%c[", ss); - print_reg_src((reg_t)(cat6->a.src1), true, - false, false, cat6->a.src1_im, false, false, false); - if (cat6->a.off) - printf("%+d", cat6->a.off); - if (ss) - printf("]"); - printf(", "); - print_reg_src((reg_t)(cat6->a.src2), full, - false, false, cat6->a.src2_im, false, false, false); + if (cat6->dst_off) { + dst.reg = (reg_t)(cat6->c.dst); + dstoff = cat6->c.off; } else { - if (!nodst) { - if (sd) - printf("%c[", sd); - print_reg_dst((reg_t)(cat6->b.dst), full, false); - if (sd) - printf("]"); - printf(", "); - } - if (ss) - printf("%c[", ss); - print_reg_src((reg_t)(cat6->b.src1), true, - false, false, cat6->b.src1_im, false, false, false); - if (ss) + dst.reg = (reg_t)(cat6->d.dst); + } + + if (cat6->src_off) { + src1.reg = (reg_t)(cat6->a.src1); + src1.im = cat6->a.src1_im; + src2.reg = (reg_t)(cat6->a.src2); + src2.im = cat6->a.src2_im; + src1off = cat6->a.off; + } else { + src1.reg = (reg_t)(cat6->b.src1); + src1.im = cat6->b.src1_im; + src2.reg = (reg_t)(cat6->b.src2); + src2.im = cat6->b.src2_im; + } + + if (!nodst) { + if (sd) + printf("%c[", sd); + /* note: dst might actually be a src (ie. address to store to) */ + print_src(&dst); + if (dstoff) + printf("%+d", dstoff); + if (sd) printf("]"); printf(", "); - print_reg_src((reg_t)(cat6->b.src2), full, - false, false, cat6->b.src2_im, false, false, false); } - if (debug & PRINT_VERBOSE) { - switch (cat6->opc) { - case OPC_LDG: - case OPC_LDP: - /* load instructions: */ - if (cat6->a.dummy2|cat6->a.dummy3) - printf("\t{6: %x,%x}", cat6->a.dummy2, cat6->a.dummy3); - break; - case OPC_STG: - case OPC_STP: - case OPC_STI: - /* store instructions: */ - if (cat6->b.dummy2|cat6->b.dummy2) - printf("\t{6: %x,%x}", cat6->b.dummy2, cat6->b.dummy3); - if (cat6->b.ignore0) - printf("\t{?? %x}", cat6->b.ignore0); - break; - } + if (ss) + printf("%c[", ss); + + /* can have a larger than normal immed, so hack: */ + if (src1.im) { + printf("%u", src1.reg.dummy13); + } else { + print_src(&src1); + } + + if (src1off) + printf("%+d", src1off); + if (ss) + printf("]"); + + switch (cat6->opc) { + case OPC_RESINFO: + case OPC_RESFMT: + break; + default: + printf(", "); + print_src(&src2); + break; } } @@ -711,19 +794,19 @@ struct opc_info { OPC(6, OPC_LDLW, ldlw), OPC(6, OPC_STLW, stlw), OPC(6, OPC_RESFMT, resfmt), - OPC(6, OPC_RESINFO, resinf), - OPC(6, OPC_ATOMIC_ADD_L, atomic.add.l), - OPC(6, OPC_ATOMIC_SUB_L, atomic.sub.l), - OPC(6, OPC_ATOMIC_XCHG_L, atomic.xchg.l), - OPC(6, OPC_ATOMIC_INC_L, atomic.inc.l), - OPC(6, OPC_ATOMIC_DEC_L, atomic.dec.l), - OPC(6, OPC_ATOMIC_CMPXCHG_L, atomic.cmpxchg.l), - OPC(6, OPC_ATOMIC_MIN_L, atomic.min.l), - OPC(6, OPC_ATOMIC_MAX_L, atomic.max.l), - OPC(6, OPC_ATOMIC_AND_L, atomic.and.l), - OPC(6, OPC_ATOMIC_OR_L, atomic.or.l), - OPC(6, OPC_ATOMIC_XOR_L, atomic.xor.l), - OPC(6, OPC_LDGB_TYPED_4D, ldgb.typed.4d), + OPC(6, OPC_RESINFO, resinfo), + OPC(6, OPC_ATOMIC_ADD, atomic.add), + OPC(6, OPC_ATOMIC_SUB, atomic.sub), + OPC(6, OPC_ATOMIC_XCHG, atomic.xchg), + OPC(6, OPC_ATOMIC_INC, atomic.inc), + OPC(6, OPC_ATOMIC_DEC, atomic.dec), + OPC(6, OPC_ATOMIC_CMPXCHG, atomic.cmpxchg), + OPC(6, OPC_ATOMIC_MIN, atomic.min), + OPC(6, OPC_ATOMIC_MAX, atomic.max), + OPC(6, OPC_ATOMIC_AND, atomic.and), + OPC(6, OPC_ATOMIC_OR, atomic.or), + OPC(6, OPC_ATOMIC_XOR, atomic.xor), + OPC(6, OPC_LDGB_TYPED_4D, ldgb.typed.3d), OPC(6, OPC_STGB_4D_4, stgb.4d.4), OPC(6, OPC_STIB, stib), OPC(6, OPC_LDC_4, ldc.4), diff --git a/src/gallium/drivers/freedreno/ir3/instr-a3xx.h b/src/gallium/drivers/freedreno/ir3/instr-a3xx.h index efb07ea479e..c3fb68d511c 100644 --- a/src/gallium/drivers/freedreno/ir3/instr-a3xx.h +++ b/src/gallium/drivers/freedreno/ir3/instr-a3xx.h @@ -173,17 +173,17 @@ typedef enum { OPC_STLW = 11, OPC_RESFMT = 14, OPC_RESINFO = 15, - OPC_ATOMIC_ADD_L = 16, - OPC_ATOMIC_SUB_L = 17, - OPC_ATOMIC_XCHG_L = 18, - OPC_ATOMIC_INC_L = 19, - OPC_ATOMIC_DEC_L = 20, - OPC_ATOMIC_CMPXCHG_L = 21, - OPC_ATOMIC_MIN_L = 22, - OPC_ATOMIC_MAX_L = 23, - OPC_ATOMIC_AND_L = 24, - OPC_ATOMIC_OR_L = 25, - OPC_ATOMIC_XOR_L = 26, + OPC_ATOMIC_ADD = 16, + OPC_ATOMIC_SUB = 17, + OPC_ATOMIC_XCHG = 18, + OPC_ATOMIC_INC = 19, + OPC_ATOMIC_DEC = 20, + OPC_ATOMIC_CMPXCHG = 21, + OPC_ATOMIC_MIN = 22, + OPC_ATOMIC_MAX = 23, + OPC_ATOMIC_AND = 24, + OPC_ATOMIC_OR = 25, + OPC_ATOMIC_XOR = 26, OPC_LDGB_TYPED_4D = 27, OPC_STGB_4D_4 = 28, OPC_STIB = 29, @@ -575,7 +575,7 @@ typedef struct PACKED { uint32_t opc_cat : 3; } instr_cat5_t; -/* [src1 + off], src2: */ +/* dword0 encoding for src_off: [src1 + off], src2: */ typedef struct PACKED { /* dword0: */ uint32_t mustbe1 : 1; @@ -586,37 +586,50 @@ typedef struct PACKED { uint32_t src2 : 8; /* dword1: */ - uint32_t dst : 8; - uint32_t dummy2 : 9; - uint32_t type : 3; - uint32_t dummy3 : 2; - uint32_t opc : 5; - uint32_t jmp_tgt : 1; - uint32_t sync : 1; - uint32_t opc_cat : 3; + uint32_t dword1; } instr_cat6a_t; -/* [src1], src2: */ +/* dword0 encoding for !src_off: [src1], src2 */ typedef struct PACKED { /* dword0: */ uint32_t mustbe0 : 1; - uint32_t src1 : 8; - uint32_t ignore0 : 13; + uint32_t src1 : 13; + uint32_t ignore0 : 8; uint32_t src1_im : 1; uint32_t src2_im : 1; uint32_t src2 : 8; /* dword1: */ - uint32_t dst : 8; - uint32_t dummy2 : 9; - uint32_t type : 3; - uint32_t dummy3 : 2; - uint32_t opc : 5; - uint32_t jmp_tgt : 1; - uint32_t sync : 1; - uint32_t opc_cat : 3; + uint32_t dword1; } instr_cat6b_t; +/* dword1 encoding for dst_off: */ +typedef struct PACKED { + /* dword0: */ + uint32_t dword0; + + /* note: there is some weird stuff going on where sometimes + * cat6->a.off is involved.. but that seems like a bug in + * the blob, since it is used even if !cat6->src_off + * It would make sense for there to be some more bits to + * bring us to 11 bits worth of offset, but not sure.. + */ + int32_t off : 8; + uint32_t mustbe1 : 1; + uint32_t dst : 8; + uint32_t pad1 : 15; +} instr_cat6c_t; + +/* dword1 encoding for !dst_off: */ +typedef struct PACKED { + /* dword0: */ + uint32_t dword0; + + uint32_t dst : 8; + uint32_t mustbe0 : 1; + uint32_t pad0 : 23; +} instr_cat6d_t; + /* I think some of the other cat6 instructions use additional * sub-encodings.. */ @@ -624,16 +637,20 @@ typedef struct PACKED { typedef union PACKED { instr_cat6a_t a; instr_cat6b_t b; + instr_cat6c_t c; + instr_cat6d_t d; struct PACKED { /* dword0: */ - uint32_t has_off : 1; + uint32_t src_off : 1; uint32_t pad1 : 31; /* dword1: */ - uint32_t dst : 8; - uint32_t dummy2 : 9; + uint32_t pad2 : 8; + uint32_t dst_off : 1; + uint32_t pad3 : 8; uint32_t type : 3; - uint32_t dummy3 : 2; + uint32_t g : 1; /* or in some cases it means dst immed */ + uint32_t pad4 : 1; uint32_t opc : 5; uint32_t jmp_tgt : 1; uint32_t sync : 1; diff --git a/src/gallium/drivers/freedreno/ir3/ir3.c b/src/gallium/drivers/freedreno/ir3/ir3.c index a0cb74498ec..6d19a29275b 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.c +++ b/src/gallium/drivers/freedreno/ir3/ir3.c @@ -506,25 +506,28 @@ static int emit_cat6(struct ir3_instruction *instr, void *ptr, iassert(instr->regs_count >= 2); - if (instr->cat6.offset || instr->opc == OPC_LDG) { + /* TODO we need a more comprehensive list about which instructions + * can be encoded which way. Or possibly use IR3_INSTR_0 flag to + * indicate to use the src_off encoding even if offset is zero + * (but then what to do about dst_off?) + */ + if (instr->cat6.src_offset || (instr->opc == OPC_LDG)) { instr_cat6a_t *cat6a = ptr; - cat6->has_off = true; + cat6->src_off = true; - cat6a->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); cat6a->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED); cat6a->src1_im = !!(src1->flags & IR3_REG_IMMED); if (src2) { cat6a->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED); cat6a->src2_im = !!(src2->flags & IR3_REG_IMMED); } - cat6a->off = instr->cat6.offset; + cat6a->off = instr->cat6.src_offset; } else { instr_cat6b_t *cat6b = ptr; - cat6->has_off = false; + cat6->src_off = false; - cat6b->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); cat6b->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED); cat6b->src1_im = !!(src1->flags & IR3_REG_IMMED); if (src2) { @@ -533,10 +536,22 @@ static int emit_cat6(struct ir3_instruction *instr, void *ptr, } } + if (instr->cat6.dst_offset || (instr->opc == OPC_STG)) { + instr_cat6c_t *cat6c = ptr; + cat6->dst_off = true; + cat6c->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); + cat6c->off = instr->cat6.dst_offset; + } else { + instr_cat6d_t *cat6d = ptr; + cat6->dst_off = false; + cat6d->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); + } + cat6->type = instr->cat6.type; cat6->opc = instr->opc; cat6->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); cat6->sync = !!(instr->flags & IR3_INSTR_SY); + cat6->g = !!(instr->flags & IR3_INSTR_G); cat6->opc_cat = 6; return 0; diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h index f11d8eda5f2..c3b61a0fe01 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.h +++ b/src/gallium/drivers/freedreno/ir3/ir3.h @@ -172,6 +172,7 @@ struct ir3_instruction { IR3_INSTR_P = 0x080, IR3_INSTR_S = 0x100, IR3_INSTR_S2EN = 0x200, + IR3_INSTR_G = 0x400, /* meta-flags, for intermediate stages of IR, ie. * before register assignment is done: */ @@ -209,7 +210,8 @@ struct ir3_instruction { } cat5; struct { type_t type; - int offset; + int src_offset; + int dst_offset; int iim_val; } cat6; /* for meta-instructions, just used to hold extra data diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 22885ff85f3..bdba3aae36f 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -1215,7 +1215,7 @@ emit_intrinsic_load_ubo(struct ir3_compile *ctx, nir_intrinsic_instr *intr, struct ir3_instruction *load = ir3_LDG(b, addr, 0, create_immed(b, 1), 0); load->cat6.type = TYPE_U32; - load->cat6.offset = off + i * 4; /* byte offset */ + load->cat6.src_offset = off + i * 4; /* byte offset */ dst[i] = load; } }