freedreno/a5xx: texture tiling
authorRob Clark <robdclark@gmail.com>
Sun, 31 Dec 2017 20:01:51 +0000 (15:01 -0500)
committerRob Clark <robdclark@gmail.com>
Sun, 14 Jan 2018 21:13:39 +0000 (16:13 -0500)
Overall a nice 5-10% gain for most games.  And more for things like
glmark2 texture benchmark.

There are some rough edges.  In particular, the hardware seems to only
support tiling or component swap.  (Ie. from hw PoV, ARGB/ABGR/RGBA/
BGRA are all the same format but with different component swap.)  For
tiled formats, only ARGB is possible.  This isn't a big problem for
*sampling* since we also have swizzle state there (and since
util_format_compose_swizzles() already takes into account the component
order, we didn't use COLOR_SWAP for sampling).  But it is a problem if
you try to render to a tiled BGRA (for example) surface.

The next patch introduces a workaround for blitter, so we can generate
tiled textures in ABGR/RGBA/BGRA, but that doesn't help the render-
target case.  To handle that, I think we'd need to keep track that the
tiled format is different from the linear format, which seems like it
would get extra fun with sampler views/etc.

So for now, disabled by default, enable with FD_MESA_DEBUG=ttile.  In
practice it works fine for all the games I've tried, but makes piglit
grumpy.

Signed-off-by: Rob Clark <robdclark@gmail.com>
16 files changed:
src/gallium/drivers/freedreno/Makefile.sources
src/gallium/drivers/freedreno/a5xx/fd5_blitter.c
src/gallium/drivers/freedreno/a5xx/fd5_blitter.h
src/gallium/drivers/freedreno/a5xx/fd5_emit.c
src/gallium/drivers/freedreno/a5xx/fd5_gmem.c
src/gallium/drivers/freedreno/a5xx/fd5_resource.c [new file with mode: 0644]
src/gallium/drivers/freedreno/a5xx/fd5_resource.h [new file with mode: 0644]
src/gallium/drivers/freedreno/a5xx/fd5_screen.c
src/gallium/drivers/freedreno/a5xx/fd5_texture.c
src/gallium/drivers/freedreno/freedreno_context.h
src/gallium/drivers/freedreno/freedreno_resource.c
src/gallium/drivers/freedreno/freedreno_resource.h
src/gallium/drivers/freedreno/freedreno_screen.c
src/gallium/drivers/freedreno/freedreno_screen.h
src/gallium/drivers/freedreno/freedreno_util.h
src/gallium/drivers/freedreno/meson.build

index 447a6f990e057659f00ca916a9c7f87ed70b985d..18e65fbbfac575024a0aa24791dbf26970f6c718 100644 (file)
@@ -150,6 +150,8 @@ a5xx_SOURCES := \
        a5xx/fd5_query.h \
        a5xx/fd5_rasterizer.c \
        a5xx/fd5_rasterizer.h \
+       a5xx/fd5_resource.c \
+       a5xx/fd5_resource.h \
        a5xx/fd5_screen.c \
        a5xx/fd5_screen.h \
        a5xx/fd5_texture.c \
index 5769b7eb8f37531a226827b0e72de0afeaa2f60d..6d4fa2c684cfcaebb9610720a32dca92f6759139 100644 (file)
@@ -52,6 +52,9 @@ ok_dims(const struct pipe_resource *r, const struct pipe_box *b, int lvl)
 static bool
 ok_format(enum pipe_format fmt)
 {
+       if (util_format_is_compressed(fmt))
+               return false;
+
        switch (fmt) {
        case PIPE_FORMAT_R10G10B10A2_SSCALED:
        case PIPE_FORMAT_R10G10B10A2_SNORM:
@@ -66,8 +69,13 @@ ok_format(enum pipe_format fmt)
        case PIPE_FORMAT_R10G10B10A2_UINT:
                return false;
        default:
-               return true;
+               break;
        }
+
+       if (fd5_pipe2color(fmt) == ~0)
+               return false;
+
+       return true;
 }
 
 static bool
@@ -128,12 +136,6 @@ can_do_blit(const struct pipe_blit_info *info)
        if (info->mask != util_format_get_mask(info->dst.format))
                return false;
 
-       if (util_format_is_compressed(info->dst.format))
-               return false;
-
-       if (util_format_is_compressed(info->src.format))
-               return false;
-
        return true;
 }
 
@@ -254,6 +256,7 @@ emit_blit_buffer(struct fd_ringbuffer *ring, const struct pipe_blit_info *info)
                 */
                OUT_PKT4(ring, REG_A5XX_RB_2D_SRC_INFO, 9);
                OUT_RING(ring, A5XX_RB_2D_SRC_INFO_COLOR_FORMAT(RB5_R8_UNORM) |
+                               A5XX_RB_2D_SRC_INFO_TILE_MODE(TILE5_LINEAR) |
                                A5XX_RB_2D_SRC_INFO_COLOR_SWAP(WZYX));
                OUT_RELOC(ring, src->bo, soff, 0, 0);    /* RB_2D_SRC_LO/HI */
                OUT_RING(ring, A5XX_RB_2D_SRC_SIZE_PITCH(p) |
@@ -273,6 +276,7 @@ emit_blit_buffer(struct fd_ringbuffer *ring, const struct pipe_blit_info *info)
                 */
                OUT_PKT4(ring, REG_A5XX_RB_2D_DST_INFO, 9);
                OUT_RING(ring, A5XX_RB_2D_DST_INFO_COLOR_FORMAT(RB5_R8_UNORM) |
+                               A5XX_RB_2D_DST_INFO_TILE_MODE(TILE5_LINEAR) |
                                A5XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX));
                OUT_RELOCW(ring, dst->bo, doff, 0, 0);   /* RB_2D_DST_LO/HI */
                OUT_RING(ring, A5XX_RB_2D_DST_SIZE_PITCH(p) |
@@ -312,6 +316,7 @@ emit_blit(struct fd_ringbuffer *ring, const struct pipe_blit_info *info)
        struct fd_resource *src, *dst;
        struct fd_resource_slice *sslice, *dslice;
        enum a5xx_color_fmt sfmt, dfmt;
+       enum a5xx_tile_mode stile, dtile;
        enum a3xx_color_swap sswap, dswap;
        unsigned ssize, dsize, spitch, dpitch;
        unsigned sx1, sy1, sx2, sy2;
@@ -326,6 +331,11 @@ emit_blit(struct fd_ringbuffer *ring, const struct pipe_blit_info *info)
        sfmt = fd5_pipe2color(info->src.format);
        dfmt = fd5_pipe2color(info->dst.format);
 
+       stile = fd_resource_level_linear(info->src.resource, info->src.level) ?
+                       TILE5_LINEAR : src->tile_mode;
+       dtile = fd_resource_level_linear(info->dst.resource, info->dst.level) ?
+                       TILE5_LINEAR : dst->tile_mode;
+
        sswap = fd5_pipe2swap(info->src.format);
        dswap = fd5_pipe2swap(info->dst.format);
 
@@ -367,6 +377,7 @@ emit_blit(struct fd_ringbuffer *ring, const struct pipe_blit_info *info)
                 */
                OUT_PKT4(ring, REG_A5XX_RB_2D_SRC_INFO, 9);
                OUT_RING(ring, A5XX_RB_2D_SRC_INFO_COLOR_FORMAT(sfmt) |
+                               A5XX_RB_2D_SRC_INFO_TILE_MODE(stile) |
                                A5XX_RB_2D_SRC_INFO_COLOR_SWAP(sswap));
                OUT_RELOC(ring, src->bo, soff, 0, 0);    /* RB_2D_SRC_LO/HI */
                OUT_RING(ring, A5XX_RB_2D_SRC_SIZE_PITCH(spitch) |
@@ -379,6 +390,7 @@ emit_blit(struct fd_ringbuffer *ring, const struct pipe_blit_info *info)
 
                OUT_PKT4(ring, REG_A5XX_GRAS_2D_SRC_INFO, 1);
                OUT_RING(ring, A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT(sfmt) |
+                               A5XX_GRAS_2D_SRC_INFO_TILE_MODE(stile) |
                                A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP(sswap));
 
                /*
@@ -386,6 +398,7 @@ emit_blit(struct fd_ringbuffer *ring, const struct pipe_blit_info *info)
                 */
                OUT_PKT4(ring, REG_A5XX_RB_2D_DST_INFO, 9);
                OUT_RING(ring, A5XX_RB_2D_DST_INFO_COLOR_FORMAT(dfmt) |
+                               A5XX_RB_2D_DST_INFO_TILE_MODE(dtile) |
                                A5XX_RB_2D_DST_INFO_COLOR_SWAP(dswap));
                OUT_RELOCW(ring, dst->bo, doff, 0, 0);   /* RB_2D_DST_LO/HI */
                OUT_RING(ring, A5XX_RB_2D_DST_SIZE_PITCH(dpitch) |
@@ -398,6 +411,7 @@ emit_blit(struct fd_ringbuffer *ring, const struct pipe_blit_info *info)
 
                OUT_PKT4(ring, REG_A5XX_GRAS_2D_DST_INFO, 1);
                OUT_RING(ring, A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT(dfmt) |
+                               A5XX_GRAS_2D_DST_INFO_TILE_MODE(dtile) |
                                A5XX_GRAS_2D_DST_INFO_COLOR_SWAP(dswap));
 
                /*
@@ -434,6 +448,8 @@ fd5_blitter_blit(struct fd_context *ctx, const struct pipe_blit_info *info)
 
        if ((info->src.resource->target == PIPE_BUFFER) &&
                        (info->dst.resource->target == PIPE_BUFFER)) {
+               assert(fd_resource(info->src.resource)->tile_mode == TILE5_LINEAR);
+               assert(fd_resource(info->dst.resource)->tile_mode == TILE5_LINEAR);
                emit_blit_buffer(batch->draw, info);
        } else {
                /* I don't *think* we need to handle blits between buffer <-> !buffer */
@@ -441,8 +457,21 @@ fd5_blitter_blit(struct fd_context *ctx, const struct pipe_blit_info *info)
                debug_assert(info->dst.resource->target != PIPE_BUFFER);
                emit_blit(batch->draw, info);
        }
+
        fd_resource(info->dst.resource)->valid = true;
        batch->needs_flush = true;
 
        fd_batch_flush(batch, false, false);
 }
+
+unsigned
+fd5_tile_mode(const struct pipe_resource *tmpl)
+{
+       /* basically just has to be a format we can blit, so uploads/downloads
+        * via linear staging buffer works:
+        */
+       if (ok_format(tmpl->format))
+               return TILE5_3;
+
+       return TILE5_LINEAR;
+}
index c5688a89474051a2475070af6e43758a06e58ce7..b03c77eb1abe420ed478e03656330a9eaa75c724 100644 (file)
@@ -32,5 +32,6 @@
 #include "freedreno_context.h"
 
 void fd5_blitter_blit(struct fd_context *ctx, const struct pipe_blit_info *info);
+unsigned fd5_tile_mode(const struct pipe_resource *tmpl);
 
 #endif /* FD5_BLIT_H_ */
index b7ce084a8271bb47c0e8533e05070254f3f17fe9..b2809d6464677f6f64164c133f5438f0da2a5e38 100644 (file)
@@ -36,6 +36,7 @@
 
 #include "fd5_emit.h"
 #include "fd5_blend.h"
+#include "fd5_blitter.h"
 #include "fd5_context.h"
 #include "fd5_image.h"
 #include "fd5_program.h"
@@ -337,8 +338,13 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
                        const struct fd5_pipe_sampler_view *view = tex->textures[i] ?
                                        fd5_pipe_sampler_view(tex->textures[i]) :
                                        &dummy_view;
+                       enum a5xx_tile_mode tile_mode = TILE5_LINEAR;
 
-                       OUT_RING(ring, view->texconst0);
+                       if (view->base.texture)
+                               tile_mode = fd_resource(view->base.texture)->tile_mode;
+
+                       OUT_RING(ring, view->texconst0 |
+                                       A5XX_TEX_CONST_0_TILE_MODE(tile_mode));
                        OUT_RING(ring, view->texconst1);
                        OUT_RING(ring, view->texconst2);
                        OUT_RING(ring, view->texconst3);
index e16ed1afa111361f3794f7bda6848dc58744bb74..ae73bc181ba3549bd852b1bb88fefab3af4757a3 100644 (file)
@@ -49,12 +49,6 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
        enum a5xx_tile_mode tile_mode;
        unsigned i;
 
-       if (gmem) {
-               tile_mode = TILE5_2;
-       } else {
-               tile_mode = TILE5_LINEAR;
-       }
-
        for (i = 0; i < A5XX_MAX_RENDER_TARGETS; i++) {
                enum a5xx_color_fmt format = 0;
                enum a3xx_color_swap swap = WZYX;
@@ -66,6 +60,12 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
                uint32_t base = 0;
                uint32_t offset = 0;
 
+               if (gmem) {
+                       tile_mode = TILE5_2;
+               } else {
+                       tile_mode = TILE5_LINEAR;
+               }
+
                if ((i < nr_bufs) && bufs[i]) {
                        struct pipe_surface *psurf = bufs[i];
                        enum pipe_format pformat = psurf->format;
@@ -91,6 +91,9 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
                        } else {
                                stride = slice->pitch * rsc->cpp;
                                size = slice->size0;
+
+                               if (!fd_resource_level_linear(psurf->texture, psurf->u.tex.level))
+                                       tile_mode = rsc->tile_mode;
                        }
                }
 
@@ -488,7 +491,7 @@ emit_mem2gmem_surf(struct fd_batch *batch, uint32_t base,
 
                OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(0), 5);
                OUT_RING(ring, A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
-                               A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(TILE5_LINEAR) |
+                               A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(rsc->tile_mode) |
                                A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(WZYX));
                OUT_RING(ring, A5XX_RB_MRT_PITCH(slice->pitch * rsc->cpp));
                OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(slice->size0));
@@ -606,6 +609,7 @@ emit_gmem2mem_surf(struct fd_batch *batch, uint32_t base,
        struct fd_ringbuffer *ring = batch->gmem;
        struct fd_resource *rsc = fd_resource(psurf->texture);
        struct fd_resource_slice *slice;
+       bool tiled;
        uint32_t offset;
 
        if (buf == BLIT_S)
@@ -623,8 +627,12 @@ emit_gmem2mem_surf(struct fd_batch *batch, uint32_t base,
        OUT_RING(ring, 0x00000000);   /* RB_BLIT_FLAG_DST_PITCH */
        OUT_RING(ring, 0x00000000);   /* RB_BLIT_FLAG_DST_ARRAY_PITCH */
 
+       tiled = rsc->tile_mode &&
+               !fd_resource_level_linear(psurf->texture, psurf->u.tex.level);
+
        OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_3, 5);
-       OUT_RING(ring, 0x00000004);   /* XXX RB_RESOLVE_CNTL_3 */
+       OUT_RING(ring, 0x00000004 |   /* XXX RB_RESOLVE_CNTL_3 */
+                       COND(tiled, A5XX_RB_RESOLVE_CNTL_3_TILED));
        OUT_RELOCW(ring, rsc->bo, offset, 0, 0);     /* RB_BLIT_DST_LO/HI */
        OUT_RING(ring, A5XX_RB_BLIT_DST_PITCH(slice->pitch * rsc->cpp));
        OUT_RING(ring, A5XX_RB_BLIT_DST_ARRAY_PITCH(slice->size0));
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_resource.c b/src/gallium/drivers/freedreno/a5xx/fd5_resource.c
new file mode 100644 (file)
index 0000000..12ee6d7
--- /dev/null
@@ -0,0 +1,138 @@
+/*
+ * Copyright (C) 2018 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "fd5_resource.h"
+
+/* indexed by cpp: */
+static const struct {
+       unsigned pitchalign;
+       unsigned heightalign;
+} tile_alignment[] = {
+       [1]  = { 128, 32 },
+       [2]  = { 128, 16 },
+       [3]  = { 128, 16 },
+       [4]  = {  64, 16 },
+       [8]  = {  64, 16 },
+       [12] = {  64, 16 },
+       [16] = {  64, 16 },
+};
+
+/* NOTE: good way to test this is:  (for example)
+ *  piglit/bin/texelFetch fs sampler2D 100x100x1-100x300x1
+ */
+static uint32_t
+setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format format)
+{
+       struct pipe_resource *prsc = &rsc->base;
+       struct fd_screen *screen = fd_screen(prsc->screen);
+       enum util_format_layout layout = util_format_description(format)->layout;
+       uint32_t pitchalign = screen->gmem_alignw;
+       uint32_t heightalign;
+       uint32_t level, size = 0;
+       uint32_t width = prsc->width0;
+       uint32_t height = prsc->height0;
+       uint32_t depth = prsc->depth0;
+       /* in layer_first layout, the level (slice) contains just one
+        * layer (since in fact the layer contains the slices)
+        */
+       uint32_t layers_in_level = rsc->layer_first ? 1 : prsc->array_size;
+
+       heightalign = tile_alignment[rsc->cpp].heightalign;
+
+       for (level = 0; level <= prsc->last_level; level++) {
+               struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
+               bool linear_level = fd_resource_level_linear(prsc, level);
+               uint32_t aligned_height = height;
+               uint32_t blocks;
+
+               if (rsc->tile_mode && !linear_level) {
+                       pitchalign = tile_alignment[rsc->cpp].pitchalign;
+                       aligned_height = align(aligned_height, heightalign);
+               } else {
+                       pitchalign = 64;
+               }
+
+               if (layout == UTIL_FORMAT_LAYOUT_ASTC)
+                       slice->pitch =
+                               util_align_npot(width, pitchalign * util_format_get_blockwidth(format));
+               else
+                       slice->pitch = align(width, pitchalign);
+
+               slice->offset = size;
+               blocks = util_format_get_nblocks(format, slice->pitch, aligned_height);
+
+               /* 1d array and 2d array textures must all have the same layer size
+                * for each miplevel on a3xx. 3d textures can have different layer
+                * sizes for high levels, but the hw auto-sizer is buggy (or at least
+                * different than what this code does), so as soon as the layer size
+                * range gets into range, we stop reducing it.
+                */
+               if (prsc->target == PIPE_TEXTURE_3D && (
+                                       level == 1 ||
+                                       (level > 1 && rsc->slices[level - 1].size0 > 0xf000)))
+                       slice->size0 = align(blocks * rsc->cpp, alignment);
+               else if (level == 0 || rsc->layer_first || alignment == 1)
+                       slice->size0 = align(blocks * rsc->cpp, alignment);
+               else
+                       slice->size0 = rsc->slices[level - 1].size0;
+
+#if 0
+               debug_printf("%s: %ux%ux%u@%u: %2u: stride=%4u, size=%7u, aligned_height=%3u\n",
+                               util_format_name(prsc->format),
+                               prsc->width0, prsc->height0, prsc->depth0, rsc->cpp,
+                               level, slice->pitch * rsc->cpp,
+                               slice->size0 * depth * layers_in_level,
+                               aligned_height);
+#endif
+
+               size += slice->size0 * depth * layers_in_level;
+
+               width = u_minify(width, 1);
+               height = u_minify(height, 1);
+               depth = u_minify(depth, 1);
+       }
+
+       return size;
+}
+
+uint32_t
+fd5_setup_slices(struct fd_resource *rsc)
+{
+       uint32_t alignment;
+
+       switch (rsc->base.target) {
+       case PIPE_TEXTURE_3D:
+               rsc->layer_first = false;
+               alignment = 4096;
+               break;
+       default:
+               rsc->layer_first = true;
+               alignment = 1;
+               break;
+       }
+
+       return setup_slices(rsc, alignment, rsc->base.format);
+}
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_resource.h b/src/gallium/drivers/freedreno/a5xx/fd5_resource.h
new file mode 100644 (file)
index 0000000..f24fe4e
--- /dev/null
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2018 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD5_RESOURCE_H_
+#define FD5_RESOURCE_H_
+
+#include "freedreno_resource.h"
+
+uint32_t fd5_setup_slices(struct fd_resource *rsc);
+
+#endif /* FD5_RESOURCE_H_ */
index 2816c3615493ee5eafdb5b4b7f6e6d633781c23b..8feb279feaad9a4524f96b0d316587a0d068b353 100644 (file)
 #include "util/u_format.h"
 
 #include "fd5_screen.h"
+#include "fd5_blitter.h"
 #include "fd5_context.h"
 #include "fd5_format.h"
+#include "fd5_resource.h"
+
 #include "ir3_compiler.h"
 
 static boolean
@@ -106,4 +109,8 @@ fd5_screen_init(struct pipe_screen *pscreen)
        screen->compiler = ir3_compiler_create(screen->dev, screen->gpu_id);
        pscreen->context_create = fd5_context_create;
        pscreen->is_format_supported = fd5_screen_is_format_supported;
+
+       screen->setup_slices = fd5_setup_slices;
+       if (fd_mesa_debug & FD_DBG_TTILE)
+               screen->tile_mode = fd5_tile_mode;
 }
index 2501b7a341c142e6c244fb0fa76cf93931134571..9795189b6efc94d211df38a4db9144a0eb0b1b32 100644 (file)
@@ -272,6 +272,7 @@ fd5_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
        so->texconst2 |= A5XX_TEX_CONST_2_TYPE(fd5_tex_type(cso->target));
 
        switch (cso->target) {
+       case PIPE_TEXTURE_RECT:
        case PIPE_TEXTURE_1D:
        case PIPE_TEXTURE_2D:
                so->texconst3 =
index a4e1e4bb86c511a59b22143f63e2986decfb104a..1653e8a3b85b4f76c6655ac39858fc5b57a76ccd 100644 (file)
@@ -329,7 +329,7 @@ struct fd_context {
                        struct fd_ringbuffer *ring);
        void (*query_set_stage)(struct fd_batch *batch, enum fd_render_stage stage);
 
-       /* blit: */
+       /* blitter: */
        void (*blit)(struct fd_context *ctx, const struct pipe_blit_info *info);
 
        /* simple gpu "memcpy": */
index eaaf2e8b2292472c9e9b3a3e880b0455631cae83..7e6de8c057f3ea74fef43b5b79b2f1dad3be0602 100644 (file)
@@ -290,6 +290,7 @@ fd_alloc_staging(struct fd_context *ctx, struct fd_resource *rsc,
        tmpl.depth0  = box->depth;
        tmpl.array_size = 1;
        tmpl.last_level = 0;
+       tmpl.bind |= PIPE_BIND_LINEAR;
 
        struct pipe_resource *pstaging =
                pctx->screen->resource_create(pctx->screen, &tmpl);
@@ -300,7 +301,7 @@ fd_alloc_staging(struct fd_context *ctx, struct fd_resource *rsc,
 }
 
 static void
-fd_blit_staging(struct fd_context *ctx, struct fd_transfer *trans)
+fd_blit_from_staging(struct fd_context *ctx, struct fd_transfer *trans)
 {
        struct pipe_resource *dst = trans->base.resource;
        struct pipe_blit_info blit = {0};
@@ -317,7 +318,26 @@ fd_blit_staging(struct fd_context *ctx, struct fd_transfer *trans)
        blit.filter = PIPE_TEX_FILTER_NEAREST;
 
        do_blit(ctx, &blit, false);
-       pipe_resource_reference(&trans->staging_prsc, NULL);
+}
+
+static void
+fd_blit_to_staging(struct fd_context *ctx, struct fd_transfer *trans)
+{
+       struct pipe_resource *src = trans->base.resource;
+       struct pipe_blit_info blit = {0};
+
+       blit.src.resource = src;
+       blit.src.format   = src->format;
+       blit.src.level    = trans->base.level;
+       blit.src.box      = trans->base.box;
+       blit.dst.resource = trans->staging_prsc;
+       blit.dst.format   = trans->staging_prsc->format;
+       blit.dst.level    = 0;
+       blit.dst.box      = trans->staging_box;
+       blit.mask = util_format_get_mask(trans->staging_prsc->format);
+       blit.filter = PIPE_TEX_FILTER_NEAREST;
+
+       do_blit(ctx, &blit, false);
 }
 
 static unsigned
@@ -396,8 +416,11 @@ fd_resource_transfer_unmap(struct pipe_context *pctx,
        struct fd_resource *rsc = fd_resource(ptrans->resource);
        struct fd_transfer *trans = fd_transfer(ptrans);
 
-       if (trans->staging_prsc)
-               fd_blit_staging(ctx, trans);
+       if (trans->staging_prsc) {
+               if (ptrans->usage & PIPE_TRANSFER_WRITE)
+                       fd_blit_from_staging(ctx, trans);
+               pipe_resource_reference(&trans->staging_prsc, NULL);
+       }
 
        if (!(ptrans->usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
                fd_bo_cpu_fini(rsc->bo);
@@ -447,6 +470,44 @@ fd_resource_transfer_map(struct pipe_context *pctx,
        ptrans->stride = util_format_get_nblocksx(format, slice->pitch) * rsc->cpp;
        ptrans->layer_stride = rsc->layer_first ? rsc->layer_size : slice->size0;
 
+       /* we always need a staging texture for tiled buffers:
+        *
+        * TODO we might sometimes want to *also* shadow the resource to avoid
+        * splitting a batch.. for ex, mid-frame texture uploads to a tiled
+        * texture.
+        */
+       if (rsc->tile_mode) {
+               struct fd_resource *staging_rsc;
+
+               staging_rsc = fd_alloc_staging(ctx, rsc, level, box);
+               if (staging_rsc) {
+                       // TODO for PIPE_TRANSFER_READ, need to do untiling blit..
+                       trans->staging_prsc = &staging_rsc->base;
+                       trans->base.stride = util_format_get_nblocksx(format,
+                               staging_rsc->slices[0].pitch) * staging_rsc->cpp;
+                       trans->base.layer_stride = staging_rsc->layer_first ?
+                               staging_rsc->layer_size : staging_rsc->slices[0].size0;
+                       trans->staging_box = *box;
+                       trans->staging_box.x = 0;
+                       trans->staging_box.y = 0;
+                       trans->staging_box.z = 0;
+
+                       if (usage & PIPE_TRANSFER_READ) {
+                               fd_blit_to_staging(ctx, trans);
+                               fd_bo_cpu_prep(rsc->bo, ctx->pipe, DRM_FREEDRENO_PREP_READ);
+                       }
+
+                       buf = fd_bo_map(staging_rsc->bo);
+                       offset = 0;
+
+                       *pptrans = ptrans;
+
+                       ctx->stats.staging_uploads++;
+
+                       return buf;
+               }
+       }
+
        if (ctx->in_shadow && !(usage & PIPE_TRANSFER_READ))
                usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
 
@@ -616,9 +677,6 @@ setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format forma
         */
        uint32_t layers_in_level = rsc->layer_first ? 1 : prsc->array_size;
 
-       if (is_a5xx(screen) && (rsc->base.target >= PIPE_TEXTURE_2D))
-               height = align(height, screen->gmem_alignh);
-
        for (level = 0; level <= prsc->last_level; level++) {
                struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
                uint32_t blocks;
@@ -686,7 +744,7 @@ fd_setup_slices(struct fd_resource *rsc)
        alignment = slice_alignment(rsc->base.target);
 
        struct fd_screen *screen = fd_screen(rsc->base.screen);
-       if (is_a4xx(screen) || is_a5xx(screen)) {
+       if (is_a4xx(screen)) {
                switch (rsc->base.target) {
                case PIPE_TEXTURE_3D:
                        rsc->layer_first = false;
@@ -759,6 +817,18 @@ fd_resource_create(struct pipe_screen *pscreen,
 
        *prsc = *tmpl;
 
+#define LINEAR \
+       (PIPE_BIND_SCANOUT | \
+        PIPE_BIND_LINEAR  | \
+        PIPE_BIND_DISPLAY_TARGET)
+
+       if (screen->tile_mode &&
+                       (tmpl->target != PIPE_BUFFER) &&
+                       (tmpl->bind & PIPE_BIND_SAMPLER_VIEW) &&
+                       !(tmpl->bind & LINEAR)) {
+               rsc->tile_mode = screen->tile_mode(tmpl);
+       }
+
        pipe_reference_init(&prsc->reference, 1);
 
        prsc->screen = pscreen;
index 0b41a13be334d69118fd553e8ddb94c31222b1ea..2834969110bfad06dbcbd18a44587698609f336e 100644 (file)
@@ -97,6 +97,9 @@ struct fd_resource {
         */
        uint32_t bc_batch_mask;
 
+       unsigned tile_mode : 2;
+       unsigned preferred_tile_mode : 2;
+
        /*
         * LRZ
         */
@@ -164,6 +167,16 @@ fd_resource_offset(struct fd_resource *rsc, unsigned level, unsigned layer)
        return offset;
 }
 
+/* This might be a5xx specific, but higher mipmap levels are always linear: */
+static inline bool
+fd_resource_level_linear(struct pipe_resource *prsc, int level)
+{
+       unsigned w = u_minify(prsc->width0, level);
+       if (w < 16)
+               return true;
+       return false;
+}
+
 void fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond, bool discard,
                enum fd_render_stage stage);
 void fd_blitter_pipe_end(struct fd_context *ctx);
index 1fca3c768bf82bce59f5d6aac2113e8e77d6acbb..e0a9048031c55599071548b0520435df852daf24 100644 (file)
@@ -82,6 +82,7 @@ static const struct debug_named_value debug_options[] = {
                {"noindirect",FD_DBG_NOINDR, "Disable hw indirect draws (emulate on CPU)"},
                {"noblit",    FD_DBG_NOBLIT, "Disable blitter (fallback to generic blit path)"},
                {"hiprio",    FD_DBG_HIPRIO, "Force high-priority context"},
+               {"ttile",     FD_DBG_TTILE,  "Enable texture tiling (a5xx)"},
                DEBUG_NAMED_VALUE_END
 };
 
index 5a825763f243d552783e2073e68165e6c7b26ff2..55537f2d1c47f049d76d85a527b50f2ee372f594 100644 (file)
@@ -81,6 +81,7 @@ struct fd_screen {
        struct fd_pipe *pipe;
 
        uint32_t (*setup_slices)(struct fd_resource *rsc);
+       unsigned (*tile_mode)(const struct pipe_resource *prsc);
 
        int64_t cpu_gpu_time_delta;
 
index 687d874b007b4353ed395263caa79891850eddfe..bfeec4c17dd93af006e9c7af013eb62f3e38da33 100644 (file)
@@ -83,6 +83,7 @@ enum adreno_stencil_op fd_stencil_op(unsigned op);
 #define FD_DBG_NOINDR  0x40000
 #define FD_DBG_NOBLIT  0x80000
 #define FD_DBG_HIPRIO 0x100000
+#define FD_DBG_TTILE  0x200000
 
 extern int fd_mesa_debug;
 extern bool fd_binning_enabled;
index 33a64d73aa49fb8ce0cd78380a7986ed669bbf06..ef70cc08c74dfc38ea8267865ea575caedd6913e 100644 (file)
@@ -174,6 +174,8 @@ files_libfreedreno = files(
   'a5xx/fd5_query.h',
   'a5xx/fd5_rasterizer.c',
   'a5xx/fd5_rasterizer.h',
+  'a5xx/fd5_resource.c',
+  'a5xx/fd5_resource.h',
   'a5xx/fd5_screen.c',
   'a5xx/fd5_screen.h',
   'a5xx/fd5_texture.c',