#include "freedreno_layout.h"
-/* indexed by cpp, including msaa 2x and 4x:
- * TODO:
- * cpp=1 UBWC needs testing at larger texture sizes
- * missing UBWC blockwidth/blockheight for npot+64 cpp
- * missing 96/128 CPP for 8x MSAA with 32_32_32/32_32_32_32
- */
-static const struct {
- unsigned basealign;
- unsigned pitchalign;
- unsigned heightalign;
- uint8_t ubwc_blockwidth;
- uint8_t ubwc_blockheight;
-} tile_alignment[] = {
- [1] = { 64, 128, 32, 16, 4 },
- [2] = { 128, 128, 16, 16, 4 },
- [3] = { 256, 64, 32 },
- [4] = { 256, 64, 16, 16, 4 },
- [6] = { 256, 64, 16 },
- [8] = { 256, 64, 16, 8, 4, },
- [12] = { 256, 64, 16 },
- [16] = { 256, 64, 16, 4, 4, },
- [24] = { 256, 64, 16 },
- [32] = { 256, 64, 16, 4, 2 },
- [48] = { 256, 64, 16 },
- [64] = { 256, 64, 16 },
-
- /* special cases for r8g8: */
- [0] = { 256, 64, 32, 16, 4 },
-};
-
-#define RGB_TILE_WIDTH_ALIGNMENT 64
-#define RGB_TILE_HEIGHT_ALIGNMENT 16
-#define UBWC_PLANE_SIZE_ALIGNMENT 4096
-
-static int
-fdl6_pitchalign(struct fdl_layout *layout, int ta, int level)
+static bool
+is_r8g8(struct fdl_layout *layout)
{
- const struct util_format_description *format_desc =
- util_format_description(layout->format);
-
- uint32_t pitchalign = 64;
- if (fdl_tile_mode(layout, level))
- pitchalign = tile_alignment[ta].pitchalign;
- if (format_desc->layout == UTIL_FORMAT_LAYOUT_ASTC)
- pitchalign *= util_format_get_blockwidth(layout->format);
- return pitchalign;
+ return layout->cpp == 2 &&
+ util_format_get_nr_components(layout->format) == 2;
+}
+
+void
+fdl6_get_ubwc_blockwidth(struct fdl_layout *layout,
+ uint32_t *blockwidth, uint32_t *blockheight)
+{
+ static const struct {
+ uint8_t width;
+ uint8_t height;
+ } blocksize[] = {
+ { 16, 4 }, /* cpp = 1 */
+ { 16, 4 }, /* cpp = 2 */
+ { 16, 4 }, /* cpp = 4 */
+ { 8, 4, }, /* cpp = 8 */
+ { 4, 4, }, /* cpp = 16 */
+ { 4, 2 }, /* cpp = 32 */
+ { 0, 0 }, /* cpp = 64 (TODO) */
+ };
+
+ /* special case for r8g8: */
+ if (is_r8g8(layout)) {
+ *blockwidth = 16;
+ *blockheight = 8;
+ return;
+ }
+
+ uint32_t cpp = fdl_cpp_shift(layout);
+ assert(cpp < ARRAY_SIZE(blocksize));
+ *blockwidth = blocksize[cpp].width;
+ *blockheight = blocksize[cpp].height;
+}
+
+static void
+fdl6_tile_alignment(struct fdl_layout *layout, uint32_t *heightalign)
+{
+ layout->pitchalign = fdl_cpp_shift(layout);
+ *heightalign = 16;
+
+ if (is_r8g8(layout) || layout->cpp == 1) {
+ layout->pitchalign = 1;
+ *heightalign = 32;
+ } else if (layout->cpp == 2) {
+ layout->pitchalign = 2;
+ }
+
+ /* note: this base_align is *probably* not always right,
+ * it doesn't really get tested. for example with UBWC we might
+ * want 4k alignment, since we align UBWC levels to 4k
+ */
+ if (layout->cpp == 1)
+ layout->base_align = 64;
+ else if (layout->cpp == 2)
+ layout->base_align = 128;
+ else
+ layout->base_align = 256;
}
/* NOTE: good way to test this is: (for example)
* piglit/bin/texelFetch fs sampler3D 100x100x8
*/
-void
+bool
fdl6_layout(struct fdl_layout *layout,
enum pipe_format format, uint32_t nr_samples,
uint32_t width0, uint32_t height0, uint32_t depth0,
- uint32_t mip_levels, uint32_t array_size, bool is_3d)
+ uint32_t mip_levels, uint32_t array_size, bool is_3d,
+ struct fdl_explicit_layout *explicit_layout)
{
+ uint32_t offset = 0, heightalign;
+ uint32_t ubwc_blockwidth, ubwc_blockheight;
+
assert(nr_samples > 0);
layout->width0 = width0;
layout->height0 = height0;
layout->nr_samples = nr_samples;
layout->layer_first = !is_3d;
- if (depth0 > 1)
- layout->ubwc = false;
- if (tile_alignment[layout->cpp].ubwc_blockwidth == 0)
- layout->ubwc = false;
-
- int ta = layout->cpp;
+ fdl6_get_ubwc_blockwidth(layout, &ubwc_blockwidth, &ubwc_blockheight);
- /* The z16/r16 formats seem to not play by the normal tiling rules: */
- if ((layout->cpp == 2) && (util_format_get_nr_components(format) == 2))
- ta = 0;
+ if (depth0 > 1 || ubwc_blockwidth == 0)
+ layout->ubwc = false;
/* in layer_first layout, the level (slice) contains just one
* layer (since in fact the layer contains the slices)
*/
uint32_t layers_in_level = layout->layer_first ? 1 : array_size;
- debug_assert(ta < ARRAY_SIZE(tile_alignment));
- debug_assert(tile_alignment[ta].pitchalign);
-
+ /* note: for tiled+noubwc layouts, we can use a lower pitchalign
+ * which will affect the linear levels only, (the hardware will still
+ * expect the tiled alignment on the tiled levels)
+ */
if (layout->tile_mode) {
- layout->base_align = tile_alignment[ta].basealign;
+ fdl6_tile_alignment(layout, &heightalign);
} else {
layout->base_align = 64;
+ layout->pitchalign = 0;
+ /* align pitch to at least 16 pixels:
+ * both turnip and galium assume there is enough alignment for 16x4
+ * aligned gmem store. turnip can use CP_BLIT to work without this
+ * extra alignment, but gallium driver doesn't implement it yet
+ */
+ if (layout->cpp > 4)
+ layout->pitchalign = fdl_cpp_shift(layout) - 2;
+
+ /* when possible, use a bit more alignment than necessary
+ * presumably this is better for performance?
+ */
+ if (!explicit_layout)
+ layout->pitchalign = fdl_cpp_shift(layout);
+
+ /* not used, avoid "may be used uninitialized" warning */
+ heightalign = 1;
+ }
+
+ fdl_set_pitchalign(layout, layout->pitchalign + 6);
+
+ if (explicit_layout) {
+ offset = explicit_layout->offset;
+ layout->pitch0 = explicit_layout->pitch;
+ if (align(layout->pitch0, 1 << layout->pitchalign) != layout->pitch0)
+ return false;
}
- uint32_t pitch0 = util_align_npot(width0, fdl6_pitchalign(layout, ta, 0));
+ uint32_t ubwc_width0 = width0;
+ uint32_t ubwc_height0 = height0;
+ uint32_t ubwc_tile_height_alignment = RGB_TILE_HEIGHT_ALIGNMENT;
+ if (mip_levels > 1) {
+ /* With mipmapping enabled, UBWC layout is power-of-two sized,
+ * specified in log2 width/height in the descriptors. The height
+ * alignment is 64 for mipmapping, but for buffer sharing (always
+ * single level) other participants expect 16.
+ */
+ ubwc_width0 = util_next_power_of_two(width0);
+ ubwc_height0 = util_next_power_of_two(height0);
+ ubwc_tile_height_alignment = 64;
+ }
+ layout->ubwc_width0 = align(DIV_ROUND_UP(ubwc_width0, ubwc_blockwidth),
+ RGB_TILE_WIDTH_ALIGNMENT);
+ ubwc_height0 = align(DIV_ROUND_UP(ubwc_height0, ubwc_blockheight),
+ ubwc_tile_height_alignment);
for (uint32_t level = 0; level < mip_levels; level++) {
uint32_t depth = u_minify(depth0, level);
struct fdl_slice *slice = &layout->slices[level];
struct fdl_slice *ubwc_slice = &layout->ubwc_slices[level];
uint32_t tile_mode = fdl_tile_mode(layout, level);
- uint32_t width, height;
+ uint32_t pitch = fdl_pitch(layout, level);
+ uint32_t height;
/* tiled levels of 3D textures are rounded up to PoT dimensions: */
if (is_3d && tile_mode) {
- width = u_minify(util_next_power_of_two(width0), level);
height = u_minify(util_next_power_of_two(height0), level);
} else {
- width = u_minify(width0, level);
height = u_minify(height0, level);
}
+ uint32_t nblocksy = util_format_get_nblocksy(format, height);
if (tile_mode)
- height = align(height, tile_alignment[ta].heightalign);
+ nblocksy = align(nblocksy, heightalign);
/* The blits used for mem<->gmem work at a granularity of
- * 32x32, which can cause faults due to over-fetch on the
+ * 16x4, which can cause faults due to over-fetch on the
* last level. The simple solution is to over-allocate a
* bit the last level to ensure any over-fetch is harmless.
* The pitch is already sufficiently aligned, but height
- * may not be:
+ * may not be. note this only matters if last level is linear
*/
if (level == mip_levels - 1)
- height = align(height, 32);
-
- slice->pitch = util_align_npot(u_minify(pitch0, level),
- fdl6_pitchalign(layout, ta, level));
+ height = align(nblocksy, 4);
- slice->offset = layout->size;
- uint32_t blocks = util_format_get_nblocks(format,
- slice->pitch, height);
+ slice->offset = offset + layout->size;
/* 1d array and 2d array textures must all have the same layer size
* for each miplevel on a6xx. 3d textures can have different layer
*/
if (is_3d) {
if (level < 1 || layout->slices[level - 1].size0 > 0xf000) {
- slice->size0 = align(blocks * layout->cpp, 4096);
+ slice->size0 = align(nblocksy * pitch, 4096);
} else {
slice->size0 = layout->slices[level - 1].size0;
}
} else {
- slice->size0 = blocks * layout->cpp;
+ slice->size0 = nblocksy * pitch;
}
layout->size += slice->size0 * depth * layers_in_level;
/* with UBWC every level is aligned to 4K */
layout->size = align(layout->size, 4096);
- uint32_t block_width = tile_alignment[ta].ubwc_blockwidth;
- uint32_t block_height = tile_alignment[ta].ubwc_blockheight;
- uint32_t meta_pitch = align(DIV_ROUND_UP(width, block_width),
- RGB_TILE_WIDTH_ALIGNMENT);
- uint32_t meta_height = align(DIV_ROUND_UP(height, block_height),
- RGB_TILE_HEIGHT_ALIGNMENT);
-
- /* it looks like mipmaps need alignment to power of two
- * TODO: needs testing with large npot textures
- * (needed for the first level?)
- */
- if (mip_levels > 1) {
- meta_pitch = util_next_power_of_two(meta_pitch);
- meta_height = util_next_power_of_two(meta_height);
- }
+ uint32_t meta_pitch = fdl_ubwc_pitch(layout, level);
+ uint32_t meta_height = align(u_minify(ubwc_height0, level),
+ ubwc_tile_height_alignment);
ubwc_slice->size0 = align(meta_pitch * meta_height, UBWC_PLANE_SIZE_ALIGNMENT);
- ubwc_slice->pitch = meta_pitch;
- ubwc_slice->offset = layout->ubwc_layer_size;
+ ubwc_slice->offset = offset + layout->ubwc_layer_size;
layout->ubwc_layer_size += ubwc_slice->size0;
}
}
layout->slices[level].offset += layout->ubwc_layer_size * array_size;
layout->size += layout->ubwc_layer_size * array_size;
}
-}
-void
-fdl6_get_ubwc_blockwidth(struct fdl_layout *layout,
- uint32_t *blockwidth, uint32_t *blockheight)
-{
- *blockwidth = tile_alignment[layout->cpp].ubwc_blockwidth;
- *blockheight = tile_alignment[layout->cpp].ubwc_blockheight;
+ /* include explicit offset in size */
+ layout->size += offset;
+
+ return true;
}