From 1bce7fdecd86601a300be9a58a346b8c110d9587 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Fri, 20 Dec 2019 13:48:24 -0500 Subject: [PATCH] pan/midgard: Do witchcraft on texture offsets My latest divination spell has uncovered a pattern in the aether. Although the swizzle is unaligned, its format is otherwise standard. Document this, removing the old incorrect understanding of the swizzle (which coincided on common special swizzles only). Fixes dEQP-GLES3.functional.shaders.texture_functions.texelfetchoffset.sampler2d_fixed_fragment Signed-off-by: Alyssa Rosenzweig --- src/panfrost/midgard/disassemble.c | 40 ++++++++++-------------------- src/panfrost/midgard/midgard.h | 26 ++++++++++++++----- src/panfrost/midgard/midgard_ra.c | 32 ++++++++++++------------ 3 files changed, 49 insertions(+), 49 deletions(-) diff --git a/src/panfrost/midgard/disassemble.c b/src/panfrost/midgard/disassemble.c index cfce1110318..d4354388959 100644 --- a/src/panfrost/midgard/disassemble.c +++ b/src/panfrost/midgard/disassemble.c @@ -1366,9 +1366,9 @@ print_texture_word(uint32_t *word, unsigned tabs, unsigned in_reg_base, unsigned if (texture->offset_register) { printf(" + "); - bool full = texture->offset_x & 1; - bool select = texture->offset_x & 2; - bool upper = texture->offset_x & 4; + bool full = texture->offset & 1; + bool select = texture->offset & 2; + bool upper = texture->offset & 4; printf("%sr%u", full ? "" : "h", in_reg_base + select); assert(!(texture->out_full && texture->out_upper)); @@ -1377,30 +1377,19 @@ print_texture_word(uint32_t *word, unsigned tabs, unsigned in_reg_base, unsigned if (upper) printf("'"); - /* The less questions you ask, the better. */ - - unsigned swizzle_lo, swizzle_hi; - unsigned orig_y = texture->offset_y; - unsigned orig_z = texture->offset_z; - - memcpy(&swizzle_lo, &orig_y, sizeof(unsigned)); - memcpy(&swizzle_hi, &orig_z, sizeof(unsigned)); - - /* Duplicate hi swizzle over */ - assert(swizzle_hi < 4); - swizzle_hi = (swizzle_hi << 2) | swizzle_hi; - - unsigned swiz = (swizzle_lo << 4) | swizzle_hi; - unsigned reversed = util_bitreverse(swiz) >> 24; - print_swizzle_vec4(reversed, false, false); + print_swizzle_vec4(texture->offset >> 3, false, false); printf(", "); - } else if (texture->offset_x || texture->offset_y || texture->offset_z) { + } else if (texture->offset) { /* Only select ops allow negative immediate offsets, verify */ - bool neg_x = texture->offset_x < 0; - bool neg_y = texture->offset_y < 0; - bool neg_z = texture->offset_z < 0; + signed offset_x = (texture->offset & 0xF); + signed offset_y = ((texture->offset >> 4) & 0xF); + signed offset_z = ((texture->offset >> 8) & 0xF); + + bool neg_x = offset_x < 0; + bool neg_y = offset_y < 0; + bool neg_z = offset_z < 0; bool any_neg = neg_x || neg_y || neg_z; if (any_neg && texture->op != TEXTURE_OP_TEXEL_FETCH) @@ -1408,10 +1397,7 @@ print_texture_word(uint32_t *word, unsigned tabs, unsigned in_reg_base, unsigned /* Regardless, just print the immediate offset */ - printf(" + <%d, %d, %d>, ", - texture->offset_x, - texture->offset_y, - texture->offset_z); + printf(" + <%d, %d, %d>, ", offset_x, offset_y, offset_z); } else { printf(", "); } diff --git a/src/panfrost/midgard/midgard.h b/src/panfrost/midgard/midgard.h index ea026621db3..807f8f1ede9 100644 --- a/src/panfrost/midgard/midgard.h +++ b/src/panfrost/midgard/midgard.h @@ -691,16 +691,30 @@ __attribute__((__packed__)) /* In immediate mode, each offset field is an immediate range [0, 7]. * * In register mode, offset_x becomes a register full / select / upper - * triplet and a vec3 swizzle is splattered across offset_y/offset_z in - * a genuinely bizarre way. + * triplet followed by a vec3 swizzle is splattered across + * offset_y/offset_z in a genuinely bizarre way. * * For texel fetches in immediate mode, the range is the full [-8, 7], * but for normal texturing the top bit must be zero and a register - * used instead. It's not clear where this limitation is from. */ + * used instead. It's not clear where this limitation is from. + * + * union { + * struct { + * signed offset_x : 4; + * signed offset_y : 4; + * signed offset_z : 4; + * } immediate; + * struct { + * bool full : 1; + * bool select : 1; + * bool upper : 1; + * unsigned swizzle : 8; + * unsigned zero : 1; + * } register; + * } + */ - signed offset_x : 4; - signed offset_y : 4; - signed offset_z : 4; + unsigned offset : 12; /* In immediate bias mode, for a normal texture op, this is * texture bias, computed as int(2^8 * frac(biasf)), with diff --git a/src/panfrost/midgard/midgard_ra.c b/src/panfrost/midgard/midgard_ra.c index 7d116e176bc..65e4d246282 100644 --- a/src/panfrost/midgard/midgard_ra.c +++ b/src/panfrost/midgard/midgard_ra.c @@ -518,6 +518,13 @@ allocate_registers(compiler_context *ctx, bool *spilled) set_class(l->class, ins->src[1], REG_CLASS_TEXR); set_class(l->class, ins->src[2], REG_CLASS_TEXR); set_class(l->class, ins->src[3], REG_CLASS_TEXR); + + /* Texture offsets need to be aligned to vec4, since + * the swizzle for x is forced to x in hardware, while + * the other components are free. TODO: Relax to 8 for + * half-registers if that ever occurs. */ + + //lcra_restrict_range(l, ins->src[3], 16); } } @@ -549,13 +556,6 @@ allocate_registers(compiler_context *ctx, bool *spilled) return l; } -/* Reverses 2 bits, used to pack swizzles of offsets for some reason */ - -static unsigned -mir_reverse2(unsigned in) -{ - return (in >> 1) | ((in & 1) << 1); -} /* Once registers have been decided via register allocation * (allocate_registers), we need to rewrite the MIR to use registers instead of @@ -694,20 +694,20 @@ install_registers_instr( /* If there is an offset register, install it */ if (ins->src[3] != ~0) { - ins->texture.offset_x = - (1) | /* full */ - (offset.reg & 1) << 1 | /* select */ - 0 << 2; /* upper */ - unsigned x = offset.offset / 4; unsigned y = x + 1; unsigned z = x + 2; - ins->texture.offset_y = - mir_reverse2(y) | (mir_reverse2(x) << 2); + /* Check range, TODO: half-registers */ + assert(z < 4); - ins->texture.offset_z = - mir_reverse2(z); + ins->texture.offset = + (1) | /* full */ + (offset.reg & 1) << 1 | /* select */ + (0 << 2) | /* upper */ + (x << 3) | /* swizzle */ + (y << 5) | /* swizzle */ + (z << 7); /* swizzle */ } break; -- 2.30.2