freedreno/a6xx: document LRZ flag buffer
authorRob Clark <robdclark@chromium.org>
Wed, 27 May 2020 20:50:05 +0000 (13:50 -0700)
committerMarge Bot <eric+marge@anholt.net>
Fri, 29 May 2020 00:38:28 +0000 (00:38 +0000)
Doesn't seem to be a big win, although I could still be missing
something in my implementation.  But might as well add the
documentation.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5217>

src/freedreno/registers/a6xx.xml
src/gallium/drivers/freedreno/a6xx/fd6_gmem.c

index a718b24149f4c64ae810596d8728c19910943bb1..72c0c384fca9e985214c7c610d9e1e1daea292e7 100644 (file)
@@ -1973,7 +1973,7 @@ to upconvert to 32b float internally?
                <bitfield name="LRZ_WRITE" pos="1" type="boolean"/>
                <doc>update MAX instead of MIN value, ie. GL_GREATER/GL_GEQUAL</doc>
                <bitfield name="GREATER" pos="2" type="boolean"/>
-               <bitfield name="UNK3" pos="3" type="boolean"/>
+               <bitfield name="FC_ENABLE" pos="3" type="boolean"/>
                <!-- set when depth-test + depth-write enabled -->
                <bitfield name="Z_TEST_ENABLE" pos="4" type="boolean"/>
        </reg32>
@@ -1988,6 +1988,37 @@ to upconvert to 32b float internally?
                <bitfield name="PITCH" low="0" high="10" shr="5" type="uint"/>
                <bitfield name="ARRAY_PITCH" low="11" high="21" shr="5" type="uint"/> <!-- ??? -->
        </reg32>
+
+       <!--
+       The LRZ "fast clear" buffer is initialized to zero's by blob, and
+       read/written when GRAS_LRZ_CNTL.FC_ENABLE (b3) is set.  It appears
+       to store 1b/block.  It appears that '0' means block has original
+       depth clear value, and '1' means that the corresponding block in
+       LRZ has been modified.  Ignoring alignment/padding, the size is
+       given by the formula:
+
+               // calculate LRZ size from depth size:
+               if (nr_samples == 4) {
+                       width *= 2;
+                       height *= 2;
+               } else if (nr_samples == 2) {
+                       height *= 2;
+               }
+
+               lrz_width = div_round_up(width, 8);
+               lrz_heigh = div_round_up(height, 8);
+
+               // calculate # of blocks:
+               nblocksx = div_round_up(lrz_width, 16);
+               nblocksy = div_round_up(lrz_height, 4);
+
+               // fast-clear buffer is 1bit/block:
+               fc_sz = div_round_up(nblocksx * nblocksy, 8);
+
+       In practice the blob seems to switch off FC_ENABLE once the size
+       increases beyond 1 page.  Not sure if that is an actual limit or
+       not.
+        -->
        <reg32 offset="0x8106" name="GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO"/>
        <reg32 offset="0x8107" name="GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI"/>
        <reg32 offset="0x8106" name="GRAS_LRZ_FAST_CLEAR_BUFFER_BASE" type="waddress"/>
index b71c77ddc6cb046e5cbc800c287d850fd888e9d5..790215dbc0bab0f7a7bb44cbdca53d96301ccb63 100644 (file)
@@ -1381,7 +1381,7 @@ fd6_emit_tile_fini(struct fd_batch *batch)
                fd6_emit_ib(batch->gmem, batch->epilogue);
 
        OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_CNTL, 1);
-       OUT_RING(ring, A6XX_GRAS_LRZ_CNTL_ENABLE | A6XX_GRAS_LRZ_CNTL_UNK3);
+       OUT_RING(ring, A6XX_GRAS_LRZ_CNTL_ENABLE | A6XX_GRAS_LRZ_CNTL_FC_ENABLE);
 
        fd6_emit_lrz_flush(ring);