freedreno/fdl6: rework layout code a bit (reduce linear align to 64 bytes)
authorJonathan Marek <jonathan@marek.ca>
Tue, 9 Jun 2020 22:48:34 +0000 (18:48 -0400)
committerMarge Bot <eric+marge@anholt.net>
Thu, 18 Jun 2020 02:26:43 +0000 (02:26 +0000)
Reduce linear alignment, and rework the layout code a bit.

This rework has a side effect of also increasing the alignment on linear
levels of tiled (non-ubwc) cpp=1 and cpp=2 layouts. Since we should be
UBWC for those cases anyway, its not a big loss.

Signed-off-by: Jonathan Marek <jonathan@marek.ca>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5013>

src/freedreno/fdl/fd6_layout.c
src/freedreno/fdl/fd6_layout_test.c
src/freedreno/vulkan/tu_clear_blit.c
src/gallium/drivers/freedreno/freedreno_resource.c

index 146e86900aa8360facfc6cb9678845984549e310..b5ffde5f8fe7e8956867300e51227b4ff3a8cd83 100644 (file)
 
 #include "freedreno_layout.h"
 
-/* indexed by cpp, including msaa 2x and 4x:
- * TODO:
- * cpp=1 UBWC needs testing at larger texture sizes
- * missing UBWC blockwidth/blockheight for npot+64 cpp
- * missing 96/128 CPP for 8x MSAA with 32_32_32/32_32_32_32
- */
-static const struct tile_alignment {
-       unsigned basealign;
-       unsigned pitchalign;
-       unsigned heightalign;
-       /* UBWC block width/height.  Used in size alignment, and calculating a
-        * descriptor's FLAG_BUFFER_LOG2W/H for mipmapping.
-        */
-       uint8_t ubwc_blockwidth;
-       uint8_t ubwc_blockheight;
-} tile_alignment[] = {
-       [1]  = {  64, 128, 32, 16, 4 },
-       [2]  = { 128, 128, 16, 16, 4 },
-       [3]  = { 256,  64, 32 },
-       [4]  = { 256,  64, 16, 16, 4 },
-       [6]  = { 256,  64, 16 },
-       [8]  = { 256,  64, 16, 8, 4, },
-       [12] = { 256,  64, 16 },
-       [16] = { 256,  64, 16, 4, 4, },
-       [24] = { 256,  64, 16 },
-       [32] = { 256,  64, 16, 4, 2 },
-       [48] = { 256,  64, 16 },
-       [64] = { 256,  64, 16 },
-
-       /* special cases for r8g8: */
-       [0]  = { 256, 64, 32, 16, 8 },
-};
-
 #define RGB_TILE_WIDTH_ALIGNMENT 64
 #define RGB_TILE_HEIGHT_ALIGNMENT 16
 #define UBWC_PLANE_SIZE_ALIGNMENT 4096
 
-static const struct tile_alignment *
-fdl6_tile_alignment(struct fdl_layout *layout)
+static bool
+is_r8g8(struct fdl_layout *layout)
 {
-       debug_assert(layout->cpp < ARRAY_SIZE(tile_alignment));
+       return layout->cpp == 2 &&
+                  util_format_get_nr_components(layout->format) == 2;
+}
 
-       if ((layout->cpp == 2) && (util_format_get_nr_components(layout->format) == 2))
-               return &tile_alignment[0];
-       else
-               return &tile_alignment[layout->cpp];
+void
+fdl6_get_ubwc_blockwidth(struct fdl_layout *layout,
+               uint32_t *blockwidth, uint32_t *blockheight)
+{
+       static const struct {
+               uint8_t width;
+               uint8_t height;
+       } blocksize[] = {
+               { 16, 4 }, /* cpp = 1 */
+               { 16, 4 }, /* cpp = 2 */
+               { 16, 4 }, /* cpp = 4 */
+               { 8, 4, }, /* cpp = 8 */
+               { 4, 4, }, /* cpp = 16 */
+               { 4, 2 },  /* cpp = 32 */
+               { 0, 0 },  /* cpp = 64 (TODO) */
+       };
+
+       /* special case for r8g8: */
+       if (is_r8g8(layout)) {
+               *blockwidth = 16;
+               *blockheight = 8;
+               return;
+       }
+
+       uint32_t cpp = fdl_cpp_shift(layout);
+       assert(cpp < ARRAY_SIZE(blocksize));
+       *blockwidth = blocksize[cpp].width;
+       *blockheight = blocksize[cpp].height;
 }
 
-static int
-fdl6_pitchalign(struct fdl_layout *layout, int level)
+static void
+fdl6_tile_alignment(struct fdl_layout *layout, uint32_t *heightalign)
 {
-       uint32_t pitchalign = 64;
-       if (fdl_tile_mode(layout, level))
-               pitchalign = fdl6_tile_alignment(layout)->pitchalign;
+       layout->pitchalign = fdl_cpp_shift(layout);
+       *heightalign = 16;
+
+       if (is_r8g8(layout) || layout->cpp == 1) {
+               layout->pitchalign = 1;
+               *heightalign = 32;
+       } else if (layout->cpp == 2) {
+               layout->pitchalign = 2;
+       }
 
-       return pitchalign;
+       /* note: this base_align is *probably* not always right,
+        * it doesn't really get tested. for example with UBWC we might
+        * want 4k alignment, since we align UBWC levels to 4k
+        */
+       if (layout->cpp == 1)
+               layout->base_align = 64;
+       else if (layout->cpp == 2)
+               layout->base_align = 128;
+       else
+               layout->base_align = 256;
 }
 
 /* NOTE: good way to test this is:  (for example)
@@ -97,8 +105,9 @@ fdl6_layout(struct fdl_layout *layout,
                uint32_t mip_levels, uint32_t array_size, bool is_3d,
                struct fdl_slice *plane_layout)
 {
-       uint32_t offset;
-       uint32_t pitch0;
+       uint32_t offset, pitch0;
+       uint32_t pitchalign, heightalign;
+       uint32_t ubwc_blockwidth, ubwc_blockheight;
 
        assert(nr_samples > 0);
        layout->width0 = width0;
@@ -113,37 +122,54 @@ fdl6_layout(struct fdl_layout *layout,
        layout->nr_samples = nr_samples;
        layout->layer_first = !is_3d;
 
-       if (depth0 > 1)
-               layout->ubwc = false;
-       if (tile_alignment[layout->cpp].ubwc_blockwidth == 0)
-               layout->ubwc = false;
+       fdl6_get_ubwc_blockwidth(layout, &ubwc_blockwidth, &ubwc_blockheight);
 
-       const struct tile_alignment *ta = fdl6_tile_alignment(layout);
+       if (depth0 > 1 || ubwc_blockwidth == 0)
+               layout->ubwc = false;
 
        /* in layer_first layout, the level (slice) contains just one
         * layer (since in fact the layer contains the slices)
         */
        uint32_t layers_in_level = layout->layer_first ? 1 : array_size;
 
-       debug_assert(ta->pitchalign);
-
+       /* note: for tiled+noubwc layouts, we can use a lower pitchalign
+        * which will affect the linear levels only, (the hardware will still
+        * expect the tiled alignment on the tiled levels)
+        */
        if (layout->tile_mode) {
-               layout->base_align = ta->basealign;
+               fdl6_tile_alignment(layout, &heightalign);
        } else {
                layout->base_align = 64;
+               layout->pitchalign = 0;
+               /* align pitch to at least 16 pixels:
+                * both turnip and galium assume there is enough alignment for 16x4
+                * aligned gmem store. turnip can use CP_BLIT to work without this
+                * extra alignment, but gallium driver doesn't implement it yet
+                */
+               if (layout->cpp > 4)
+                       layout->pitchalign = fdl_cpp_shift(layout) - 2;
+
+               /* when possible, use a bit more alignment than necessary
+                * presumably this is better for performance?
+                */
+               if (!plane_layout)
+                       layout->pitchalign = fdl_cpp_shift(layout);
+
+               /* not used, avoid "may be used uninitialized" warning */
+               heightalign = 1;
        }
 
+       pitchalign = 64 << layout->pitchalign;
+
        if (plane_layout) {
                offset = plane_layout->offset;
                pitch0 = plane_layout->pitch;
-               if (align(pitch0, fdl6_pitchalign(layout, 0) * layout->cpp) != pitch0)
-                       return false;
-               pitch0 /= layout->cpp; /* explicit pitch is in bytes */
-               if (pitch0 < width0 && height0 > 1)
+               if (align(pitch0, pitchalign) != pitch0)
                        return false;
        } else {
+               uint32_t nblocksx = util_format_get_nblocksx(format, width0);
                offset = 0;
-               pitch0 = util_align_npot(width0, fdl6_pitchalign(layout, 0));
+               pitch0 = util_align_npot(nblocksx * layout->cpp, pitchalign);
        }
 
        uint32_t ubwc_width0 = width0;
@@ -159,15 +185,11 @@ fdl6_layout(struct fdl_layout *layout,
                ubwc_height0 = util_next_power_of_two(height0);
                ubwc_tile_height_alignment = 64;
        }
-       ubwc_width0 = align(DIV_ROUND_UP(ubwc_width0, ta->ubwc_blockwidth),
+       ubwc_width0 = align(DIV_ROUND_UP(ubwc_width0, ubwc_blockwidth),
                        RGB_TILE_WIDTH_ALIGNMENT);
-       ubwc_height0 = align(DIV_ROUND_UP(ubwc_height0,
-                                       ta->ubwc_blockheight),
+       ubwc_height0 = align(DIV_ROUND_UP(ubwc_height0, ubwc_blockheight),
                        ubwc_tile_height_alignment);
 
-       layout->pitchalign =
-               util_logbase2_ceil(fdl6_pitchalign(layout, mip_levels - 1) * layout->cpp >> 6);
-
        for (uint32_t level = 0; level < mip_levels; level++) {
                uint32_t depth = u_minify(depth0, level);
                struct fdl_slice *slice = &layout->slices[level];
@@ -184,7 +206,7 @@ fdl6_layout(struct fdl_layout *layout,
 
                uint32_t nblocksy = util_format_get_nblocksy(format, height);
                if (tile_mode)
-                       nblocksy = align(nblocksy, ta->heightalign);
+                       nblocksy = align(nblocksy, heightalign);
 
                /* The blits used for mem<->gmem work at a granularity of
                 * 16x4, which can cause faults due to over-fetch on the
@@ -196,14 +218,8 @@ fdl6_layout(struct fdl_layout *layout,
                if (level == mip_levels - 1)
                        height = align(nblocksy, 4);
 
-               uint32_t nblocksx =
-                       util_align_npot(util_format_get_nblocksx(format, u_minify(pitch0, level)),
-                                       fdl6_pitchalign(layout, level));
-
-               slice->offset = offset + layout->size;
-               uint32_t blocks = nblocksx * nblocksy;
-
-               slice->pitch = nblocksx * layout->cpp;
+               slice->offset = layout->size;
+               slice->pitch = align(u_minify(pitch0, level), pitchalign);
 
                /* 1d array and 2d array textures must all have the same layer size
                 * for each miplevel on a6xx. 3d textures can have different layer
@@ -213,12 +229,12 @@ fdl6_layout(struct fdl_layout *layout,
                 */
                if (is_3d) {
                        if (level < 1 || layout->slices[level - 1].size0 > 0xf000) {
-                               slice->size0 = align(blocks * layout->cpp, 4096);
+                               slice->size0 = align(nblocksy * slice->pitch, 4096);
                        } else {
                                slice->size0 = layout->slices[level - 1].size0;
                        }
                } else {
-                       slice->size0 = blocks * layout->cpp;
+                       slice->size0 = nblocksy * slice->pitch;
                }
 
                layout->size += slice->size0 * depth * layers_in_level;
@@ -260,12 +276,3 @@ fdl6_layout(struct fdl_layout *layout,
 
        return true;
 }
-
-void
-fdl6_get_ubwc_blockwidth(struct fdl_layout *layout,
-               uint32_t *blockwidth, uint32_t *blockheight)
-{
-       const struct tile_alignment *ta = fdl6_tile_alignment(layout);
-       *blockwidth = ta->ubwc_blockwidth;
-       *blockheight = ta->ubwc_blockheight;
-}
index c5b693a931d47215dab2d7d4c53093c3205737be..2a8083a866c4bad9488e2e9cf04e220ee533217d 100644 (file)
@@ -356,10 +356,10 @@ static const struct testcase testcases[] = {
                                { .offset = 8192, .pitch = 128 },
                                { .offset = 12288, .pitch = 128 },
                                { .offset = 16384, .pitch = 128 },
-                               { .offset = 20480, .pitch = 64 },
-                               { .offset = 20544, .pitch = 64 },
-                               { .offset = 20608, .pitch = 64 },
-                               { .offset = 20672, .pitch = 64 },
+                               { .offset = 20480, .pitch = 128 },
+                               { .offset = 20608, .pitch = 128 },
+                               { .offset = 20736, .pitch = 128 },
+                               { .offset = 20864, .pitch = 128 },
                        },
                },
        },
index 0ebe3ba2c90ed01ef5430e455e162d1f65189f03..df2359f3bb0888d25c3ef676a3c33014a925218d 100644 (file)
@@ -1129,10 +1129,6 @@ tu_copy_buffer_to_image(struct tu_cmd_buffer *cmd,
    uint32_t pitch = src_width * vk_format_get_blocksize(src_format);
    uint32_t layer_size = src_height * pitch;
 
-   /* note: the src_va/pitch alignment of 64 is for 2D engine,
-    * it is also valid for 1cpp format with shader path (stencil aspect path)
-    */
-
    ops->setup(cmd, cs, dst_format, ROTATE_0, false, mask);
 
    struct tu_image_view dst;
@@ -1212,10 +1208,6 @@ tu_copy_image_to_buffer(struct tu_cmd_buffer *cmd,
    uint32_t pitch = dst_width * vk_format_get_blocksize(dst_format);
    uint32_t layer_size = pitch * dst_height;
 
-   /* note: the dst_va/pitch alignment of 64 is for 2D engine,
-    * it is also valid for 1cpp format with shader path (stencil aspect)
-    */
-
    ops->setup(cmd, cs, dst_format, ROTATE_0, false, 0xf);
 
    struct tu_image_view src;
index f7111a499e7b682a38e2c3427f42974197098728..7343dbcc9c61f9c9b976ab3abb658c4bbead8ae1 100644 (file)
@@ -1032,9 +1032,11 @@ fd_resource_from_handle(struct pipe_screen *pscreen,
 
        uint32_t pitchalign = fd_screen(pscreen)->gmem_alignw * rsc->layout.cpp;
 
-       /* use 64 pitchalign on a6xx where gmem_alignw is not right */
+       /* pitchalign is 64-bytes for linear formats on a6xx
+        * layout_resource_for_modifier will validate tiled pitch
+        */
        if (is_a6xx(screen))
-               pitchalign = 64 * rsc->layout.cpp;
+               pitchalign = 64;
 
        if ((slice->pitch < align(prsc->width0 * rsc->layout.cpp, pitchalign)) ||
                        (slice->pitch & (pitchalign - 1)))