pan/midgard: Do witchcraft on texture offsets
authorAlyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Fri, 20 Dec 2019 18:48:24 +0000 (13:48 -0500)
committerAlyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Tue, 24 Dec 2019 23:46:23 +0000 (23:46 +0000)
My latest divination spell has uncovered a pattern in the aether.
Although the swizzle is unaligned, its format is otherwise standard.
Document this, removing the old incorrect understanding of the swizzle
(which coincided on common special swizzles only).

Fixes dEQP-GLES3.functional.shaders.texture_functions.texelfetchoffset.sampler2d_fixed_fragment

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
src/panfrost/midgard/disassemble.c
src/panfrost/midgard/midgard.h
src/panfrost/midgard/midgard_ra.c

index cfce1110318ec29f502df90c9b84a3d31a84370b..d4354388959f58fa2156fb305a04aea7d382b389 100644 (file)
@@ -1366,9 +1366,9 @@ print_texture_word(uint32_t *word, unsigned tabs, unsigned in_reg_base, unsigned
         if (texture->offset_register) {
                 printf(" + ");
 
-                bool full = texture->offset_x & 1;
-                bool select = texture->offset_x & 2;
-                bool upper = texture->offset_x & 4;
+                bool full = texture->offset & 1;
+                bool select = texture->offset & 2;
+                bool upper = texture->offset & 4;
 
                 printf("%sr%u", full ? "" : "h", in_reg_base + select);
                 assert(!(texture->out_full && texture->out_upper));
@@ -1377,30 +1377,19 @@ print_texture_word(uint32_t *word, unsigned tabs, unsigned in_reg_base, unsigned
                 if (upper)
                         printf("'");
 
-                /* The less questions you ask, the better. */
-
-                unsigned swizzle_lo, swizzle_hi;
-                unsigned orig_y = texture->offset_y;
-                unsigned orig_z = texture->offset_z;
-
-                memcpy(&swizzle_lo, &orig_y, sizeof(unsigned));
-                memcpy(&swizzle_hi, &orig_z, sizeof(unsigned));
-
-                /* Duplicate hi swizzle over */
-                assert(swizzle_hi < 4);
-                swizzle_hi = (swizzle_hi << 2) | swizzle_hi;
-
-                unsigned swiz = (swizzle_lo << 4) | swizzle_hi;
-                unsigned reversed = util_bitreverse(swiz) >> 24;
-                print_swizzle_vec4(reversed, false, false);
+                print_swizzle_vec4(texture->offset >> 3, false, false);
 
                 printf(", ");
-        } else if (texture->offset_x || texture->offset_y || texture->offset_z) {
+        } else if (texture->offset) {
                 /* Only select ops allow negative immediate offsets, verify */
 
-                bool neg_x = texture->offset_x < 0;
-                bool neg_y = texture->offset_y < 0;
-                bool neg_z = texture->offset_z < 0;
+                signed offset_x = (texture->offset & 0xF);
+                signed offset_y = ((texture->offset >> 4) & 0xF);
+                signed offset_z = ((texture->offset >> 8) & 0xF);
+
+                bool neg_x = offset_x < 0;
+                bool neg_y = offset_y < 0;
+                bool neg_z = offset_z < 0;
                 bool any_neg = neg_x || neg_y || neg_z;
 
                 if (any_neg && texture->op != TEXTURE_OP_TEXEL_FETCH)
@@ -1408,10 +1397,7 @@ print_texture_word(uint32_t *word, unsigned tabs, unsigned in_reg_base, unsigned
 
                 /* Regardless, just print the immediate offset */
 
-                printf(" + <%d, %d, %d>, ",
-                       texture->offset_x,
-                       texture->offset_y,
-                       texture->offset_z);
+                printf(" + <%d, %d, %d>, ", offset_x, offset_y, offset_z);
         } else {
                 printf(", ");
         }
index ea026621db35c390f2d0f42516fabe412dde56ff..807f8f1ede945beaa68664833255e8be4ef91d3f 100644 (file)
@@ -691,16 +691,30 @@ __attribute__((__packed__))
         /* In immediate mode, each offset field is an immediate range [0, 7].
          *
          * In register mode, offset_x becomes a register full / select / upper
-         * triplet and a vec3 swizzle is splattered across offset_y/offset_z in
-         * a genuinely bizarre way.
+         * triplet followed by a vec3 swizzle is splattered across
+         * offset_y/offset_z in a genuinely bizarre way.
          *
          * For texel fetches in immediate mode, the range is the full [-8, 7],
          * but for normal texturing the top bit must be zero and a register
-         * used instead. It's not clear where this limitation is from. */
+         * used instead. It's not clear where this limitation is from.
+         *
+         * union {
+         *      struct {
+         *              signed offset_x  : 4;
+         *              signed offset_y  : 4;
+         *              signed offset_z  : 4;
+         *      } immediate;
+         *      struct {
+         *              bool full        : 1;
+         *              bool select      : 1;
+         *              bool upper       : 1;
+         *              unsigned swizzle : 8;
+         *              unsigned zero    : 1;
+         *      } register;
+         * }
+         */
 
-        signed offset_x : 4;
-        signed offset_y : 4;
-        signed offset_z : 4;
+        unsigned offset : 12;
 
         /* In immediate bias mode, for a normal texture op, this is
          * texture bias, computed as int(2^8 * frac(biasf)), with
index 7d116e176bc985bf1703f29cc328391f8c8ce3fa..65e4d2462823c162e4820a7deb50034b4ec23fc6 100644 (file)
@@ -518,6 +518,13 @@ allocate_registers(compiler_context *ctx, bool *spilled)
                         set_class(l->class, ins->src[1], REG_CLASS_TEXR);
                         set_class(l->class, ins->src[2], REG_CLASS_TEXR);
                         set_class(l->class, ins->src[3], REG_CLASS_TEXR);
+
+                        /* Texture offsets need to be aligned to vec4, since
+                         * the swizzle for x is forced to x in hardware, while
+                         * the other components are free. TODO: Relax to 8 for
+                         * half-registers if that ever occurs. */
+
+                        //lcra_restrict_range(l, ins->src[3], 16);
                 }
         }
 
@@ -549,13 +556,6 @@ allocate_registers(compiler_context *ctx, bool *spilled)
         return l;
 }
 
-/* Reverses 2 bits, used to pack swizzles of offsets for some reason */
-
-static unsigned
-mir_reverse2(unsigned in)
-{
-        return (in >> 1) | ((in & 1) << 1);
-}
 
 /* Once registers have been decided via register allocation
  * (allocate_registers), we need to rewrite the MIR to use registers instead of
@@ -694,20 +694,20 @@ install_registers_instr(
 
                 /* If there is an offset register, install it */
                 if (ins->src[3] != ~0) {
-                        ins->texture.offset_x = 
-                                (1)                   | /* full */
-                                (offset.reg & 1) << 1 | /* select */
-                                0 << 2;                 /* upper */
-
                         unsigned x = offset.offset / 4;
                         unsigned y = x + 1;
                         unsigned z = x + 2;
 
-                        ins->texture.offset_y =
-                                mir_reverse2(y) | (mir_reverse2(x) << 2);
+                        /* Check range, TODO: half-registers */
+                        assert(z < 4);
 
-                        ins->texture.offset_z =
-                                mir_reverse2(z);
+                        ins->texture.offset =
+                                (1)                   | /* full */
+                                (offset.reg & 1) << 1 | /* select */
+                                (0 << 2)              | /* upper */
+                                (x << 3)              | /* swizzle */
+                                (y << 5)              | /* swizzle */
+                                (z << 7);               /* swizzle */
                 }
 
                 break;