From 0038deb256dd17d2c1cc61d7def422a08fef9fd1 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 8 Nov 2017 17:51:40 -0500 Subject: [PATCH] freedreno/ir3: cat6 encoding fixes Instruction encoding/decoding fixes needed for images, shared variables, etc. Signed-off-by: Rob Clark --- .../drivers/freedreno/ir3/disasm-a3xx.c | 74 ++++++++++++++--- .../drivers/freedreno/ir3/instr-a3xx.h | 24 +++++- src/gallium/drivers/freedreno/ir3/ir3.c | 80 ++++++++++++++----- src/gallium/drivers/freedreno/ir3/ir3.h | 5 +- 4 files changed, 151 insertions(+), 32 deletions(-) diff --git a/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c b/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c index 96b34649c37..4cb174cb2a4 100644 --- a/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c +++ b/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c @@ -520,7 +520,9 @@ static void print_instr_cat6(instr_t *instr) switch (_OPC(6, cat6->opc)) { case OPC_PREFETCH: + break; case OPC_RESINFO: + printf(".%dd", cat6->ldgb.d + 1); break; case OPC_LDGB: printf(".%s", cat6->ldgb.typed ? "typed" : "untyped"); @@ -529,6 +531,7 @@ static void print_instr_cat6(instr_t *instr) printf(".%d", cat6->ldgb.type_size + 1); break; case OPC_STGB: + case OPC_STIB: printf(".%s", cat6->stgb.typed ? "typed" : "untyped"); printf(".%dd", cat6->stgb.d + 1); printf(".%s", type[cat6->type]); @@ -546,8 +549,11 @@ static void print_instr_cat6(instr_t *instr) case OPC_ATOMIC_OR: case OPC_ATOMIC_XOR: ss = cat6->g ? 'g' : 'l'; - printf(".%c", ss); + printf(".%s", cat6->ldgb.typed ? "typed" : "untyped"); + printf(".%dd", cat6->ldgb.d + 1); printf(".%s", type[cat6->type]); + printf(".%d", cat6->ldgb.type_size + 1); + printf(".%c", ss); break; default: dst.im = cat6->g && !cat6->dst_off; @@ -601,7 +607,7 @@ static void print_instr_cat6(instr_t *instr) break; } - if (_OPC(6, cat6->opc) == OPC_STGB) { + if ((_OPC(6, cat6->opc) == OPC_STGB) || (_OPC(6, cat6->opc) == OPC_STIB)) { struct reginfo src3; memset(&src3, 0, sizeof(src3)); @@ -626,7 +632,7 @@ static void print_instr_cat6(instr_t *instr) return; } - if ((_OPC(6, cat6->opc) == OPC_LDGB) || is_atomic(_OPC(6, cat6->opc))) { + if (is_atomic(_OPC(6, cat6->opc))) { src1.reg = (reg_t)(cat6->ldgb.src1); src1.im = cat6->ldgb.src1_im; @@ -636,21 +642,69 @@ static void print_instr_cat6(instr_t *instr) print_src(&dst); printf(", "); - printf("g[%u], ", cat6->ldgb.src_ssbo); - print_src(&src1); - printf(", "); - print_src(&src2); - - if (is_atomic(_OPC(6, cat6->opc))) { + if (ss == 'g') { struct reginfo src3; memset(&src3, 0, sizeof(src3)); + src3.reg = (reg_t)(cat6->ldgb.src3); src3.full = true; + /* For images, the ".typed" variant is used and src2 is + * the ivecN coordinates, ie ivec2 for 2d. + * + * For SSBOs, the ".untyped" variant is used and src2 is + * a simple dword offset.. src3 appears to be + * uvec2(offset * 4, 0). Not sure the point of that. + */ + + printf("g[%u], ", cat6->ldgb.src_ssbo); + print_src(&src1); /* value */ printf(", "); - print_src(&src3); + print_src(&src2); /* offset/coords */ + printf(", "); + print_src(&src3); /* 64b byte offset.. */ + + if (debug & PRINT_VERBOSE) { + printf(" (pad0=%x, pad3=%x, mustbe0=%x)", cat6->ldgb.pad0, + cat6->ldgb.pad3, cat6->ldgb.mustbe0); + } + } else { /* ss == 'l' */ + printf("l["); + print_src(&src1); /* simple byte offset */ + printf("], "); + print_src(&src2); /* value */ + + if (debug & PRINT_VERBOSE) { + printf(" (src3=%x, pad0=%x, pad3=%x, mustbe0=%x)", + cat6->ldgb.src3, cat6->ldgb.pad0, + cat6->ldgb.pad3, cat6->ldgb.mustbe0); + } } + return; + } else if (_OPC(6, cat6->opc) == OPC_RESINFO) { + dst.reg = (reg_t)(cat6->ldgb.dst); + + print_src(&dst); + printf(", "); + printf("g[%u]", cat6->ldgb.src_ssbo); + + return; + } else if (_OPC(6, cat6->opc) == OPC_LDGB) { + + src1.reg = (reg_t)(cat6->ldgb.src1); + src1.im = cat6->ldgb.src1_im; + src2.reg = (reg_t)(cat6->ldgb.src2); + src2.im = cat6->ldgb.src2_im; + dst.reg = (reg_t)(cat6->ldgb.dst); + + print_src(&dst); + printf(", "); + printf("g[%u], ", cat6->ldgb.src_ssbo); + print_src(&src1); + printf(", "); + print_src(&src2); + if (debug & PRINT_VERBOSE) printf(" (pad0=%x, pad3=%x, mustbe0=%x)", cat6->ldgb.pad0, cat6->ldgb.pad3, cat6->ldgb.mustbe0); diff --git a/src/gallium/drivers/freedreno/ir3/instr-a3xx.h b/src/gallium/drivers/freedreno/ir3/instr-a3xx.h index 9edcc58ce9c..55198cc053e 100644 --- a/src/gallium/drivers/freedreno/ir3/instr-a3xx.h +++ b/src/gallium/drivers/freedreno/ir3/instr-a3xx.h @@ -647,8 +647,11 @@ typedef struct PACKED { uint32_t pad0 : 15; } instr_cat6d_t; -/* ldgb and atomics.. atomics use 3rd src and pad0=1, pad3=3. For - * ldgb pad0=0, pad3=2 +/* ldgb and atomics.. + * + * ldgb: pad0=0, pad3=1 + * atomic .g: pad0=1, pad3=1 + * .l: pad0=1, pad3=0 */ typedef struct PACKED { /* dword0: */ @@ -667,7 +670,8 @@ typedef struct PACKED { uint32_t mustbe0 : 1; uint32_t src_ssbo : 8; uint32_t pad2 : 3; // type - uint32_t pad3 : 2; + uint32_t g : 1; + uint32_t pad3 : 1; uint32_t pad4 : 10; // opc/jmp_tgt/sync/opc_cat } instr_cat6ldgb_t; @@ -822,4 +826,18 @@ static inline bool is_atomic(opc_t opc) } } +static inline bool is_ssbo(opc_t opc) +{ + switch (opc) { + case OPC_RESFMT: + case OPC_RESINFO: + case OPC_LDGB: + case OPC_STGB: + case OPC_STIB: + return true; + default: + return false; + } +} + #endif /* INSTR_A3XX_H_ */ diff --git a/src/gallium/drivers/freedreno/ir3/ir3.c b/src/gallium/drivers/freedreno/ir3/ir3.c index 61bc3b5ad47..6db0a2a20cd 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.c +++ b/src/gallium/drivers/freedreno/ir3/ir3.c @@ -501,21 +501,58 @@ static int emit_cat6(struct ir3_instruction *instr, void *ptr, src2 = (instr->regs_count >= 3) ? instr->regs[2] : NULL; } - /* TODO we need a more comprehensive list about which instructions * can be encoded which way. Or possibly use IR3_INSTR_0 flag to * indicate to use the src_off encoding even if offset is zero * (but then what to do about dst_off?) */ - if ((instr->opc == OPC_LDGB) || is_atomic(instr->opc)) { + if (is_atomic(instr->opc)) { + instr_cat6ldgb_t *ldgb = ptr; + + /* maybe these two bits both determine the instruction encoding? */ + cat6->src_off = false; + + ldgb->d = instr->cat6.d - 1; + ldgb->typed = instr->cat6.typed; + ldgb->type_size = instr->cat6.iim_val - 1; + + ldgb->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); + + if (ldgb->g) { + struct ir3_register *src3 = instr->regs[3]; + struct ir3_register *src4 = instr->regs[4]; + + /* first src is src_ssbo: */ + iassert(src1->flags & IR3_REG_IMMED); + ldgb->src_ssbo = src1->uim_val; + + ldgb->src1 = reg(src2, info, instr->repeat, IR3_REG_IMMED); + ldgb->src1_im = !!(src2->flags & IR3_REG_IMMED); + ldgb->src2 = reg(src3, info, instr->repeat, IR3_REG_IMMED); + ldgb->src2_im = !!(src3->flags & IR3_REG_IMMED); + + ldgb->src3 = reg(src4, info, instr->repeat, 0); + ldgb->pad0 = 0x1; + ldgb->pad3 = 0x1; + } else { + ldgb->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED); + ldgb->src1_im = !!(src1->flags & IR3_REG_IMMED); + ldgb->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED); + ldgb->src2_im = !!(src2->flags & IR3_REG_IMMED); + ldgb->pad0 = 0x1; + ldgb->pad3 = 0x0; + } + + return 0; + } else if (instr->opc == OPC_LDGB) { struct ir3_register *src3 = instr->regs[3]; instr_cat6ldgb_t *ldgb = ptr; /* maybe these two bits both determine the instruction encoding? */ cat6->src_off = false; - ldgb->d = 4 - 1; /* always .4d ? */ - ldgb->typed = false; /* TODO true for images */ + ldgb->d = instr->cat6.d - 1; + ldgb->typed = instr->cat6.typed; ldgb->type_size = instr->cat6.iim_val - 1; ldgb->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); @@ -530,18 +567,23 @@ static int emit_cat6(struct ir3_instruction *instr, void *ptr, ldgb->src2 = reg(src3, info, instr->repeat, IR3_REG_IMMED); ldgb->src2_im = !!(src3->flags & IR3_REG_IMMED); - if (is_atomic(instr->opc)) { - struct ir3_register *src4 = instr->regs[4]; - ldgb->src3 = reg(src4, info, instr->repeat, 0); - ldgb->pad0 = 0x1; - ldgb->pad3 = 0x3; - } else { - ldgb->pad0 = 0x0; - ldgb->pad3 = 0x2; - } + ldgb->pad0 = 0x0; + ldgb->pad3 = 0x1; + + return 0; + } else if (instr->opc == OPC_RESINFO) { + instr_cat6ldgb_t *ldgb = ptr; + + ldgb->d = instr->cat6.d - 1; + + ldgb->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); + + /* first src is src_ssbo: */ + iassert(src1->flags & IR3_REG_IMMED); + ldgb->src_ssbo = src1->uim_val; return 0; - } else if (instr->opc == OPC_STGB) { + } else if ((instr->opc == OPC_STGB) || (instr->opc == OPC_STIB)) { struct ir3_register *src3 = instr->regs[4]; instr_cat6stgb_t *stgb = ptr; @@ -549,8 +591,8 @@ static int emit_cat6(struct ir3_instruction *instr, void *ptr, cat6->src_off = true; stgb->pad3 = 0x2; - stgb->d = 4 - 1; /* always .4d ? */ - stgb->typed = false; + stgb->d = instr->cat6.d - 1; + stgb->typed = instr->cat6.typed; stgb->type_size = instr->cat6.iim_val - 1; /* first src is dst_ssbo: */ @@ -565,7 +607,8 @@ static int emit_cat6(struct ir3_instruction *instr, void *ptr, stgb->src3_im = !!(src3->flags & IR3_REG_IMMED); return 0; - } else if (instr->cat6.src_offset || (instr->opc == OPC_LDG)) { + } else if (instr->cat6.src_offset || (instr->opc == OPC_LDG) || + (instr->opc == OPC_LDL)) { instr_cat6a_t *cat6a = ptr; cat6->src_off = true; @@ -590,7 +633,8 @@ static int emit_cat6(struct ir3_instruction *instr, void *ptr, } } - if (instr->cat6.dst_offset || (instr->opc == OPC_STG)) { + if (instr->cat6.dst_offset || (instr->opc == OPC_STG) || + (instr->opc == OPC_STL)) { instr_cat6c_t *cat6c = ptr; cat6->dst_off = true; cat6c->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h index 0ff8aba63bd..4658674488a 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.h +++ b/src/gallium/drivers/freedreno/ir3/ir3.h @@ -226,7 +226,9 @@ struct ir3_instruction { type_t type; int src_offset; int dst_offset; - int iim_val; /* for ldgb/stgb, # of components */ + int iim_val : 3; /* for ldgb/stgb, # of components */ + int d : 3; + bool typed : 1; } cat6; struct { unsigned w : 1; /* write */ @@ -631,6 +633,7 @@ is_store(struct ir3_instruction *instr) switch (instr->opc) { case OPC_STG: case OPC_STGB: + case OPC_STIB: case OPC_STP: case OPC_STL: case OPC_STLW: -- 2.30.2