src/freedreno/fdl/fd6_layout.c

   1 /*
   2  * Copyright (C) 2018 Rob Clark <robclark@freedesktop.org>
   3  * Copyright © 2018-2019 Google, Inc.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the "Software"),
   7  * to deal in the Software without restriction, including without limitation
   8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9  * and/or sell copies of the Software, and to permit persons to whom the
  10  * Software is furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice (including the next
  13  * paragraph) shall be included in all copies or substantial portions of the
  14  * Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22  * SOFTWARE.
  23  *
  24  * Authors:
  25  *    Rob Clark <robclark@freedesktop.org>
  26  */
  27
  28 #include <stdio.h>
  29
  30 #include "freedreno_layout.h"
  31
  32 /* indexed by cpp, including msaa 2x and 4x:
  33  * TODO:
  34  * cpp=1 UBWC needs testing at larger texture sizes
  35  * missing UBWC blockwidth/blockheight for npot+64 cpp
  36  * missing 96/128 CPP for 8x MSAA with 32_32_32/32_32_32_32
  37  */
  38 static const struct tile_alignment {
  39         unsigned basealign;
  40         unsigned pitchalign;
  41         unsigned heightalign;
  42         /* UBWC block width/height.  Used in size alignment, and calculating a
  43          * descriptor's FLAG_BUFFER_LOG2W/H for mipmapping.
  44          */
  45         uint8_t ubwc_blockwidth;
  46         uint8_t ubwc_blockheight;
  47 } tile_alignment[] = {
  48         [1]  = {  64, 128, 32, 16, 4 },
  49         [2]  = { 128, 128, 16, 16, 4 },
  50         [3]  = { 256,  64, 32 },
  51         [4]  = { 256,  64, 16, 16, 4 },
  52         [6]  = { 256,  64, 16 },
  53         [8]  = { 256,  64, 16, 8, 4, },
  54         [12] = { 256,  64, 16 },
  55         [16] = { 256,  64, 16, 4, 4, },
  56         [24] = { 256,  64, 16 },
  57         [32] = { 256,  64, 16, 4, 2 },
  58         [48] = { 256,  64, 16 },
  59         [64] = { 256,  64, 16 },
  60
  61         /* special cases for r8g8: */
  62         [0]  = { 256, 64, 32, 16, 8 },
  63 };
  64
  65 #define RGB_TILE_WIDTH_ALIGNMENT 64
  66 #define RGB_TILE_HEIGHT_ALIGNMENT 16
  67 #define UBWC_PLANE_SIZE_ALIGNMENT 4096
  68
  69 static const struct tile_alignment *
  70 fdl6_tile_alignment(struct fdl_layout *layout)
  71 {
  72         debug_assert(layout->cpp < ARRAY_SIZE(tile_alignment));
  73
  74         if ((layout->cpp == 2) && (util_format_get_nr_components(layout->format) == 2))
  75                 return &tile_alignment[0];
  76         else
  77                 return &tile_alignment[layout->cpp];
  78 }
  79
  80 static int
  81 fdl6_pitchalign(struct fdl_layout *layout, int level)
  82 {
  83         uint32_t pitchalign = 64;
  84         if (fdl_tile_mode(layout, level))
  85                 pitchalign = fdl6_tile_alignment(layout)->pitchalign;
  86
  87         return pitchalign;
  88 }
  89
  90 /* NOTE: good way to test this is:  (for example)
  91  *  piglit/bin/texelFetch fs sampler3D 100x100x8
  92  */
  93 void
  94 fdl6_layout(struct fdl_layout *layout,
  95                 enum pipe_format format, uint32_t nr_samples,
  96                 uint32_t width0, uint32_t height0, uint32_t depth0,
  97                 uint32_t mip_levels, uint32_t array_size, bool is_3d)
  98 {
  99         assert(nr_samples > 0);
 100         layout->width0 = width0;
 101         layout->height0 = height0;
 102         layout->depth0 = depth0;
 103
 104         layout->cpp = util_format_get_blocksize(format);
 105         layout->cpp *= nr_samples;
 106         layout->cpp_shift = ffs(layout->cpp) - 1;
 107
 108         layout->format = format;
 109         layout->nr_samples = nr_samples;
 110         layout->layer_first = !is_3d;
 111
 112         if (depth0 > 1)
 113                 layout->ubwc = false;
 114         if (tile_alignment[layout->cpp].ubwc_blockwidth == 0)
 115                 layout->ubwc = false;
 116
 117         const struct tile_alignment *ta = fdl6_tile_alignment(layout);
 118
 119         /* in layer_first layout, the level (slice) contains just one
 120          * layer (since in fact the layer contains the slices)
 121          */
 122         uint32_t layers_in_level = layout->layer_first ? 1 : array_size;
 123
 124         debug_assert(ta->pitchalign);
 125
 126         if (layout->tile_mode) {
 127                 layout->base_align = ta->basealign;
 128         } else {
 129                 layout->base_align = 64;
 130         }
 131
 132         uint32_t pitch0 = util_align_npot(width0, fdl6_pitchalign(layout, 0));
 133
 134         uint32_t ubwc_width0 = width0;
 135         uint32_t ubwc_height0 = height0;
 136         if (mip_levels > 1) {
 137                 /* With mipmapping enabled, UBWC layout is power-of-two sized,
 138                  * specified in log2 width/height in the descriptors.
 139                  */
 140                 ubwc_width0 = util_next_power_of_two(width0);
 141                 ubwc_height0 = util_next_power_of_two(height0);
 142         }
 143         ubwc_width0 = align(DIV_ROUND_UP(ubwc_width0, ta->ubwc_blockwidth),
 144                         RGB_TILE_WIDTH_ALIGNMENT);
 145         ubwc_height0 = align(DIV_ROUND_UP(ubwc_height0, ta->ubwc_blockheight),
 146                         RGB_TILE_HEIGHT_ALIGNMENT);
 147
 148         for (uint32_t level = 0; level < mip_levels; level++) {
 149                 uint32_t depth = u_minify(depth0, level);
 150                 struct fdl_slice *slice = &layout->slices[level];
 151                 struct fdl_slice *ubwc_slice = &layout->ubwc_slices[level];
 152                 uint32_t tile_mode = fdl_tile_mode(layout, level);
 153                 uint32_t height;
 154
 155                 /* tiled levels of 3D textures are rounded up to PoT dimensions: */
 156                 if (is_3d && tile_mode) {
 157                         height = u_minify(util_next_power_of_two(height0), level);
 158                 } else {
 159                         height = u_minify(height0, level);
 160                 }
 161
 162                 uint32_t nblocksy = util_format_get_nblocksy(format, height);
 163                 if (tile_mode)
 164                         nblocksy = align(nblocksy, ta->heightalign);
 165
 166                 /* The blits used for mem<->gmem work at a granularity of
 167                  * 32x32, which can cause faults due to over-fetch on the
 168                  * last level.  The simple solution is to over-allocate a
 169                  * bit the last level to ensure any over-fetch is harmless.
 170                  * The pitch is already sufficiently aligned, but height
 171                  * may not be:
 172                  */
 173                 if (level == mip_levels - 1)
 174                         nblocksy = align(nblocksy, 32);
 175
 176                 uint32_t nblocksx =
 177                         util_align_npot(util_format_get_nblocksx(format, u_minify(pitch0, level)),
 178                                         fdl6_pitchalign(layout, level));
 179
 180                 slice->offset = layout->size;
 181                 uint32_t blocks = nblocksx * nblocksy;
 182
 183                 slice->pitch = nblocksx * layout->cpp;
 184
 185                 /* 1d array and 2d array textures must all have the same layer size
 186                  * for each miplevel on a6xx. 3d textures can have different layer
 187                  * sizes for high levels, but the hw auto-sizer is buggy (or at least
 188                  * different than what this code does), so as soon as the layer size
 189                  * range gets into range, we stop reducing it.
 190                  */
 191                 if (is_3d) {
 192                         if (level < 1 || layout->slices[level - 1].size0 > 0xf000) {
 193                                 slice->size0 = align(blocks * layout->cpp, 4096);
 194                         } else {
 195                                 slice->size0 = layout->slices[level - 1].size0;
 196                         }
 197                 } else {
 198                         slice->size0 = blocks * layout->cpp;
 199                 }
 200
 201                 layout->size += slice->size0 * depth * layers_in_level;
 202
 203                 if (layout->ubwc) {
 204                         /* with UBWC every level is aligned to 4K */
 205                         layout->size = align(layout->size, 4096);
 206
 207                         uint32_t meta_pitch = align(u_minify(ubwc_width0, level),
 208                                         RGB_TILE_WIDTH_ALIGNMENT);
 209                         uint32_t meta_height = align(u_minify(ubwc_height0, level),
 210                                         RGB_TILE_HEIGHT_ALIGNMENT);
 211
 212                         ubwc_slice->size0 = align(meta_pitch * meta_height, UBWC_PLANE_SIZE_ALIGNMENT);
 213                         ubwc_slice->pitch = meta_pitch;
 214                         ubwc_slice->offset = layout->ubwc_layer_size;
 215                         layout->ubwc_layer_size += ubwc_slice->size0;
 216                 }
 217         }
 218
 219         if (layout->layer_first) {
 220                 layout->layer_size = align(layout->size, 4096);
 221                 layout->size = layout->layer_size * array_size;
 222         }
 223
 224         /* Place the UBWC slices before the uncompressed slices, because the
 225          * kernel expects UBWC to be at the start of the buffer.  In the HW, we
 226          * get to program the UBWC and non-UBWC offset/strides
 227          * independently.
 228          */
 229         if (layout->ubwc) {
 230                 for (uint32_t level = 0; level < mip_levels; level++)
 231                         layout->slices[level].offset += layout->ubwc_layer_size * array_size;
 232                 layout->size += layout->ubwc_layer_size * array_size;
 233         }
 234 }
 235
 236 void
 237 fdl6_get_ubwc_blockwidth(struct fdl_layout *layout,
 238                 uint32_t *blockwidth, uint32_t *blockheight)
 239 {
 240         const struct tile_alignment *ta = fdl6_tile_alignment(layout);
 241         *blockwidth = ta->ubwc_blockwidth;
 242         *blockheight = ta->ubwc_blockheight;
 243 }