X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Ffreedreno%2Fa6xx%2Ffd6_resource.c;h=430044aebbb243ab4243314088092696c85c16b1;hb=8d9f5a28e3879523fbdd018a2b87223313333379;hp=fb24d230c446fd9130dc561e82d8fa7307767e56;hpb=51a44c3aac7642a4b26dcfb3a80d3dd87892aa6f;p=mesa.git

diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_resource.c b/src/gallium/drivers/freedreno/a6xx/fd6_resource.c
index fb24d230c44..430044aebbb 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_resource.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_resource.c
@@ -26,19 +26,30 @@
  */
 
 #include "fd6_resource.h"
+#include "fd6_format.h"
 
-/* indexed by cpp: */
+#include "a6xx.xml.h"
+
+/* indexed by cpp, including msaa 2x and 4x: */
 static const struct {
 	unsigned pitchalign;
 	unsigned heightalign;
 } tile_alignment[] = {
 	[1]  = { 128, 32 },
-	[2]  = { 128, 16 },
-	[3]  = { 128, 16 },
+	[2]  = {  64, 32 },
+	[3]  = {  64, 32 },
 	[4]  = {  64, 16 },
+	[6]  = {  64, 16 },
 	[8]  = {  64, 16 },
 	[12] = {  64, 16 },
 	[16] = {  64, 16 },
+	[24] = {  64, 16 },
+	[32] = {  64, 16 },
+	[48] = {  64, 16 },
+	[64] = {  64, 16 },
+
+	/* special cases for r16: */
+	[0]  = { 128, 16 },
 };
 
 /* NOTE: good way to test this is:  (for example)
@@ -51,53 +62,61 @@ setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format forma
 	struct fd_screen *screen = fd_screen(prsc->screen);
 	enum util_format_layout layout = util_format_description(format)->layout;
 	uint32_t pitchalign = screen->gmem_alignw;
-	uint32_t heightalign;
 	uint32_t level, size = 0;
-	uint32_t width = prsc->width0;
-	uint32_t height = prsc->height0;
 	uint32_t depth = prsc->depth0;
+	/* linear dimensions: */
+	uint32_t lwidth = prsc->width0;
+	uint32_t lheight = prsc->height0;
+	/* tile_mode dimensions: */
+	uint32_t twidth = util_next_power_of_two(lwidth);
+	uint32_t theight = util_next_power_of_two(lheight);
 	/* in layer_first layout, the level (slice) contains just one
 	 * layer (since in fact the layer contains the slices)
 	 */
-	uint32_t layers_in_level = rsc->layer_first ? 1 : prsc->array_size;
+	uint32_t layers_in_level = rsc->layout.layer_first ? 1 : prsc->array_size;
+	int ta = rsc->layout.cpp;
+
+	/* The z16/r16 formats seem to not play by the normal tiling rules: */
+	if ((rsc->layout.cpp == 2) && (util_format_get_nr_components(format) == 1))
+		ta = 0;
 
-	heightalign = tile_alignment[rsc->cpp].heightalign;
+	debug_assert(ta < ARRAY_SIZE(tile_alignment));
+	debug_assert(tile_alignment[ta].pitchalign);
 
 	for (level = 0; level <= prsc->last_level; level++) {
-		struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
-		bool linear_level = fd_resource_level_linear(prsc, level);
+		struct fdl_slice *slice = fd_resource_slice(rsc, level);
+		uint32_t tile_mode = fd_resource_tile_mode(prsc, level);
+		uint32_t width, height;
+
+		/* tiled levels of 3D textures are rounded up to PoT dimensions: */
+		if ((prsc->target == PIPE_TEXTURE_3D) && tile_mode) {
+			width = twidth;
+			height = theight;
+		} else {
+			width = lwidth;
+			height = lheight;
+		}
 		uint32_t aligned_height = height;
 		uint32_t blocks;
 
-		if (rsc->tile_mode && !linear_level) {
-			pitchalign = tile_alignment[rsc->cpp].pitchalign;
-			aligned_height = align(aligned_height, heightalign);
+		if (tile_mode) {
+			pitchalign = tile_alignment[ta].pitchalign;
+			aligned_height = align(aligned_height,
+					tile_alignment[ta].heightalign);
 		} else {
-			if (prsc->target == PIPE_TEXTURE_3D) {
-				unsigned a;
-				if (width >= 64) {
-					a = util_next_power_of_two(MAX2(width, height));
-				} else {
-					a = 16;
-				}
-
-				pitchalign = align(a, 64);
-				aligned_height = align(aligned_height, a);
-			} else {
-				pitchalign = 64;
-			}
-
-			/* The blits used for mem<->gmem work at a granularity of
-			 * 32x32, which can cause faults due to over-fetch on the
-			 * last level.  The simple solution is to over-allocate a
-			 * bit the last level to ensure any over-fetch is harmless.
-			 * The pitch is already sufficiently aligned, but height
-			 * may not be:
-			 */
-			if ((level == prsc->last_level) && (prsc->target != PIPE_BUFFER))
-				aligned_height = align(aligned_height, 32);
+			pitchalign = 64;
 		}
 
+		/* The blits used for mem<->gmem work at a granularity of
+		 * 32x32, which can cause faults due to over-fetch on the
+		 * last level.  The simple solution is to over-allocate a
+		 * bit the last level to ensure any over-fetch is harmless.
+		 * The pitch is already sufficiently aligned, but height
+		 * may not be:
+		 */
+		if ((level == prsc->last_level) && (prsc->target != PIPE_BUFFER))
+			aligned_height = align(aligned_height, 32);
+
 		if (layout == UTIL_FORMAT_LAYOUT_ASTC)
 			slice->pitch =
 				util_align_npot(width, pitchalign * util_format_get_blockwidth(format));
@@ -114,33 +133,162 @@ setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format forma
 		 * range gets into range, we stop reducing it.
 		 */
 		if (prsc->target == PIPE_TEXTURE_3D) {
-			if (level <= 1 || (rsc->slices[level - 1].size0 > 0xf000)) {
-				slice->size0 = align(blocks * rsc->cpp, alignment);
+			if (level < 1 || fd_resource_slice(rsc, level - 1)->size0 > 0xf000) {
+				slice->size0 = align(blocks * rsc->layout.cpp, alignment);
 			} else {
-				slice->size0 = rsc->slices[level - 1].size0;
+				slice->size0 = fd_resource_slice(rsc, level - 1)->size0;
 			}
 		} else {
-			slice->size0 = align(blocks * rsc->cpp, alignment);
+			slice->size0 = align(blocks * rsc->layout.cpp, alignment);
 		}
 
 		size += slice->size0 * depth * layers_in_level;
 
 #if 0
-		debug_printf("%s: %ux%ux%u@%u:\t%2u: stride=%4u, size=%6u,%7u, aligned_height=%3u, blocks=%u\n",
+		fprintf(stderr, "%s: %ux%ux%u@%u:\t%2u: stride=%4u, size=%6u,%7u, aligned_height=%3u, blocks=%u, offset=0x%x tiling=%d\n",
 				util_format_name(prsc->format),
-				width, height, depth, rsc->cpp,
-				level, slice->pitch * rsc->cpp,
-				slice->size0, size, aligned_height, blocks);
+				width, height, depth, rsc->layout.cpp,
+				level, slice->pitch * rsc->layout.cpp,
+				slice->size0, size, aligned_height, blocks,
+				slice->offset, fd_resource_tile_mode(prsc, level));
 #endif
 
-		width = u_minify(width, 1);
-		height = u_minify(height, 1);
 		depth = u_minify(depth, 1);
+		lwidth = u_minify(lwidth, 1);
+		lheight = u_minify(lheight, 1);
+		twidth = u_minify(twidth, 1);
+		theight = u_minify(theight, 1);
 	}
 
 	return size;
 }
 
+/* A subset of the valid tiled formats can be compressed.  We do
+ * already require tiled in order to be compressed, but just because
+ * it can be tiled doesn't mean it can be compressed.
+ */
+static bool
+ok_ubwc_format(enum pipe_format pfmt)
+{
+	/* NOTE: both x24s8 and z24s8 map to RB6_X8Z24_UNORM, but UBWC
+	 * does not seem to work properly when sampling x24s8.. possibly
+	 * because we sample it as TFMT6_8_8_8_8_UINT.
+	 *
+	 * This could possibly be a hw limitation, or maybe something
+	 * else wrong somewhere (although z24s8 blits and sampling with
+	 * UBWC seem fine).  Recheck on a later revision of a6xx
+	 */
+	if (pfmt == PIPE_FORMAT_X24S8_UINT)
+		return false;
+
+	switch (fd6_pipe2color(pfmt)) {
+	case RB6_R10G10B10A2_UINT:
+	case RB6_R10G10B10A2_UNORM:
+	case RB6_R11G11B10_FLOAT:
+	case RB6_R16_FLOAT:
+	case RB6_R16G16B16A16_FLOAT:
+	case RB6_R16G16B16A16_SINT:
+	case RB6_R16G16B16A16_UINT:
+	case RB6_R16G16_FLOAT:
+	case RB6_R16G16_SINT:
+	case RB6_R16G16_UINT:
+	case RB6_R16_SINT:
+	case RB6_R16_UINT:
+	case RB6_R32G32B32A32_SINT:
+	case RB6_R32G32B32A32_UINT:
+	case RB6_R32G32_SINT:
+	case RB6_R32G32_UINT:
+	case RB6_R5G6B5_UNORM:
+	case RB6_R8G8B8A8_SINT:
+	case RB6_R8G8B8A8_UINT:
+	case RB6_R8G8B8A8_UNORM:
+	case RB6_R8G8B8_UNORM:
+	case RB6_R8G8_SINT:
+	case RB6_R8G8_UINT:
+	case RB6_R8G8_UNORM:
+	case RB6_Z24_UNORM_S8_UINT:
+	case RB6_Z24_UNORM_S8_UINT_AS_R8G8B8A8:
+		return true;
+	default:
+		return false;
+	}
+}
+
+uint32_t
+fd6_fill_ubwc_buffer_sizes(struct fd_resource *rsc)
+{
+#define RBG_TILE_WIDTH_ALIGNMENT 64
+#define RGB_TILE_HEIGHT_ALIGNMENT 16
+#define UBWC_PLANE_SIZE_ALIGNMENT 4096
+
+	struct pipe_resource *prsc = &rsc->base;
+	uint32_t width = prsc->width0;
+	uint32_t height = prsc->height0;
+
+	if (!ok_ubwc_format(prsc->format))
+		return 0;
+
+	/* limit things to simple single level 2d for now: */
+	if ((prsc->depth0 != 1) || (prsc->array_size != 1) || (prsc->last_level != 0))
+		return 0;
+
+	uint32_t block_width, block_height;
+	switch (rsc->layout.cpp) {
+	case 2:
+	case 4:
+		block_width = 16;
+		block_height = 4;
+		break;
+	case 8:
+		block_width = 8;
+		block_height = 4;
+		break;
+	case 16:
+		block_width = 4;
+		block_height = 4;
+		break;
+	default:
+		return 0;
+	}
+
+	uint32_t meta_stride =
+		ALIGN_POT(DIV_ROUND_UP(width, block_width), RBG_TILE_WIDTH_ALIGNMENT);
+	uint32_t meta_height =
+		ALIGN_POT(DIV_ROUND_UP(height, block_height), RGB_TILE_HEIGHT_ALIGNMENT);
+	uint32_t meta_size =
+		ALIGN_POT(meta_stride * meta_height, UBWC_PLANE_SIZE_ALIGNMENT);
+
+	/* UBWC goes first, then color data.. this constraint is mainly only
+	 * because it is what the kernel expects for scanout.  For non-2D we
+	 * could just use a separate UBWC buffer..
+	 */
+	rsc->layout.ubwc_offset = 0;
+	rsc->layout.offset = meta_size;
+	rsc->layout.ubwc_pitch = meta_stride;
+	rsc->layout.ubwc_size = meta_size >> 2;   /* in dwords??? */
+	rsc->layout.tile_mode = TILE6_3;
+
+	return meta_size;
+}
+
+/**
+ * Ensure the rsc is in an ok state to be used with the specified format.
+ * This handles the case of UBWC buffers used with non-UBWC compatible
+ * formats, by triggering an uncompress.
+ */
+void
+fd6_validate_format(struct fd_context *ctx, struct fd_resource *rsc,
+		enum pipe_format format)
+{
+	if (!rsc->layout.ubwc_size)
+		return;
+
+	if (ok_ubwc_format(format))
+		return;
+
+	fd_resource_uncompress(ctx, rsc);
+}
+
 uint32_t
 fd6_setup_slices(struct fd_resource *rsc)
 {
@@ -148,11 +296,11 @@ fd6_setup_slices(struct fd_resource *rsc)
 
 	switch (rsc->base.target) {
 	case PIPE_TEXTURE_3D:
-		rsc->layer_first = false;
+		rsc->layout.layer_first = false;
 		alignment = 4096;
 		break;
 	default:
-		rsc->layer_first = true;
+		rsc->layout.layer_first = true;
 		alignment = 1;
 		break;
 	}