From 39b63c18f1a838d57458ceb99ca58d3c0257c402 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sun, 31 Dec 2017 15:01:51 -0500 Subject: [PATCH] freedreno/a5xx: texture tiling Overall a nice 5-10% gain for most games. And more for things like glmark2 texture benchmark. There are some rough edges. In particular, the hardware seems to only support tiling or component swap. (Ie. from hw PoV, ARGB/ABGR/RGBA/ BGRA are all the same format but with different component swap.) For tiled formats, only ARGB is possible. This isn't a big problem for *sampling* since we also have swizzle state there (and since util_format_compose_swizzles() already takes into account the component order, we didn't use COLOR_SWAP for sampling). But it is a problem if you try to render to a tiled BGRA (for example) surface. The next patch introduces a workaround for blitter, so we can generate tiled textures in ABGR/RGBA/BGRA, but that doesn't help the render- target case. To handle that, I think we'd need to keep track that the tiled format is different from the linear format, which seems like it would get extra fun with sampler views/etc. So for now, disabled by default, enable with FD_MESA_DEBUG=ttile. In practice it works fine for all the games I've tried, but makes piglit grumpy. Signed-off-by: Rob Clark --- .../drivers/freedreno/Makefile.sources | 2 + .../drivers/freedreno/a5xx/fd5_blitter.c | 43 +++++- .../drivers/freedreno/a5xx/fd5_blitter.h | 1 + src/gallium/drivers/freedreno/a5xx/fd5_emit.c | 8 +- src/gallium/drivers/freedreno/a5xx/fd5_gmem.c | 24 ++- .../drivers/freedreno/a5xx/fd5_resource.c | 138 ++++++++++++++++++ .../drivers/freedreno/a5xx/fd5_resource.h | 34 +++++ .../drivers/freedreno/a5xx/fd5_screen.c | 7 + .../drivers/freedreno/a5xx/fd5_texture.c | 1 + .../drivers/freedreno/freedreno_context.h | 2 +- .../drivers/freedreno/freedreno_resource.c | 86 ++++++++++- .../drivers/freedreno/freedreno_resource.h | 13 ++ .../drivers/freedreno/freedreno_screen.c | 1 + .../drivers/freedreno/freedreno_screen.h | 1 + .../drivers/freedreno/freedreno_util.h | 1 + src/gallium/drivers/freedreno/meson.build | 2 + 16 files changed, 339 insertions(+), 25 deletions(-) create mode 100644 src/gallium/drivers/freedreno/a5xx/fd5_resource.c create mode 100644 src/gallium/drivers/freedreno/a5xx/fd5_resource.h diff --git a/src/gallium/drivers/freedreno/Makefile.sources b/src/gallium/drivers/freedreno/Makefile.sources index 447a6f990e0..18e65fbbfac 100644 --- a/src/gallium/drivers/freedreno/Makefile.sources +++ b/src/gallium/drivers/freedreno/Makefile.sources @@ -150,6 +150,8 @@ a5xx_SOURCES := \ a5xx/fd5_query.h \ a5xx/fd5_rasterizer.c \ a5xx/fd5_rasterizer.h \ + a5xx/fd5_resource.c \ + a5xx/fd5_resource.h \ a5xx/fd5_screen.c \ a5xx/fd5_screen.h \ a5xx/fd5_texture.c \ diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_blitter.c b/src/gallium/drivers/freedreno/a5xx/fd5_blitter.c index 5769b7eb8f3..6d4fa2c684c 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_blitter.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_blitter.c @@ -52,6 +52,9 @@ ok_dims(const struct pipe_resource *r, const struct pipe_box *b, int lvl) static bool ok_format(enum pipe_format fmt) { + if (util_format_is_compressed(fmt)) + return false; + switch (fmt) { case PIPE_FORMAT_R10G10B10A2_SSCALED: case PIPE_FORMAT_R10G10B10A2_SNORM: @@ -66,8 +69,13 @@ ok_format(enum pipe_format fmt) case PIPE_FORMAT_R10G10B10A2_UINT: return false; default: - return true; + break; } + + if (fd5_pipe2color(fmt) == ~0) + return false; + + return true; } static bool @@ -128,12 +136,6 @@ can_do_blit(const struct pipe_blit_info *info) if (info->mask != util_format_get_mask(info->dst.format)) return false; - if (util_format_is_compressed(info->dst.format)) - return false; - - if (util_format_is_compressed(info->src.format)) - return false; - return true; } @@ -254,6 +256,7 @@ emit_blit_buffer(struct fd_ringbuffer *ring, const struct pipe_blit_info *info) */ OUT_PKT4(ring, REG_A5XX_RB_2D_SRC_INFO, 9); OUT_RING(ring, A5XX_RB_2D_SRC_INFO_COLOR_FORMAT(RB5_R8_UNORM) | + A5XX_RB_2D_SRC_INFO_TILE_MODE(TILE5_LINEAR) | A5XX_RB_2D_SRC_INFO_COLOR_SWAP(WZYX)); OUT_RELOC(ring, src->bo, soff, 0, 0); /* RB_2D_SRC_LO/HI */ OUT_RING(ring, A5XX_RB_2D_SRC_SIZE_PITCH(p) | @@ -273,6 +276,7 @@ emit_blit_buffer(struct fd_ringbuffer *ring, const struct pipe_blit_info *info) */ OUT_PKT4(ring, REG_A5XX_RB_2D_DST_INFO, 9); OUT_RING(ring, A5XX_RB_2D_DST_INFO_COLOR_FORMAT(RB5_R8_UNORM) | + A5XX_RB_2D_DST_INFO_TILE_MODE(TILE5_LINEAR) | A5XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX)); OUT_RELOCW(ring, dst->bo, doff, 0, 0); /* RB_2D_DST_LO/HI */ OUT_RING(ring, A5XX_RB_2D_DST_SIZE_PITCH(p) | @@ -312,6 +316,7 @@ emit_blit(struct fd_ringbuffer *ring, const struct pipe_blit_info *info) struct fd_resource *src, *dst; struct fd_resource_slice *sslice, *dslice; enum a5xx_color_fmt sfmt, dfmt; + enum a5xx_tile_mode stile, dtile; enum a3xx_color_swap sswap, dswap; unsigned ssize, dsize, spitch, dpitch; unsigned sx1, sy1, sx2, sy2; @@ -326,6 +331,11 @@ emit_blit(struct fd_ringbuffer *ring, const struct pipe_blit_info *info) sfmt = fd5_pipe2color(info->src.format); dfmt = fd5_pipe2color(info->dst.format); + stile = fd_resource_level_linear(info->src.resource, info->src.level) ? + TILE5_LINEAR : src->tile_mode; + dtile = fd_resource_level_linear(info->dst.resource, info->dst.level) ? + TILE5_LINEAR : dst->tile_mode; + sswap = fd5_pipe2swap(info->src.format); dswap = fd5_pipe2swap(info->dst.format); @@ -367,6 +377,7 @@ emit_blit(struct fd_ringbuffer *ring, const struct pipe_blit_info *info) */ OUT_PKT4(ring, REG_A5XX_RB_2D_SRC_INFO, 9); OUT_RING(ring, A5XX_RB_2D_SRC_INFO_COLOR_FORMAT(sfmt) | + A5XX_RB_2D_SRC_INFO_TILE_MODE(stile) | A5XX_RB_2D_SRC_INFO_COLOR_SWAP(sswap)); OUT_RELOC(ring, src->bo, soff, 0, 0); /* RB_2D_SRC_LO/HI */ OUT_RING(ring, A5XX_RB_2D_SRC_SIZE_PITCH(spitch) | @@ -379,6 +390,7 @@ emit_blit(struct fd_ringbuffer *ring, const struct pipe_blit_info *info) OUT_PKT4(ring, REG_A5XX_GRAS_2D_SRC_INFO, 1); OUT_RING(ring, A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT(sfmt) | + A5XX_GRAS_2D_SRC_INFO_TILE_MODE(stile) | A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP(sswap)); /* @@ -386,6 +398,7 @@ emit_blit(struct fd_ringbuffer *ring, const struct pipe_blit_info *info) */ OUT_PKT4(ring, REG_A5XX_RB_2D_DST_INFO, 9); OUT_RING(ring, A5XX_RB_2D_DST_INFO_COLOR_FORMAT(dfmt) | + A5XX_RB_2D_DST_INFO_TILE_MODE(dtile) | A5XX_RB_2D_DST_INFO_COLOR_SWAP(dswap)); OUT_RELOCW(ring, dst->bo, doff, 0, 0); /* RB_2D_DST_LO/HI */ OUT_RING(ring, A5XX_RB_2D_DST_SIZE_PITCH(dpitch) | @@ -398,6 +411,7 @@ emit_blit(struct fd_ringbuffer *ring, const struct pipe_blit_info *info) OUT_PKT4(ring, REG_A5XX_GRAS_2D_DST_INFO, 1); OUT_RING(ring, A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT(dfmt) | + A5XX_GRAS_2D_DST_INFO_TILE_MODE(dtile) | A5XX_GRAS_2D_DST_INFO_COLOR_SWAP(dswap)); /* @@ -434,6 +448,8 @@ fd5_blitter_blit(struct fd_context *ctx, const struct pipe_blit_info *info) if ((info->src.resource->target == PIPE_BUFFER) && (info->dst.resource->target == PIPE_BUFFER)) { + assert(fd_resource(info->src.resource)->tile_mode == TILE5_LINEAR); + assert(fd_resource(info->dst.resource)->tile_mode == TILE5_LINEAR); emit_blit_buffer(batch->draw, info); } else { /* I don't *think* we need to handle blits between buffer <-> !buffer */ @@ -441,8 +457,21 @@ fd5_blitter_blit(struct fd_context *ctx, const struct pipe_blit_info *info) debug_assert(info->dst.resource->target != PIPE_BUFFER); emit_blit(batch->draw, info); } + fd_resource(info->dst.resource)->valid = true; batch->needs_flush = true; fd_batch_flush(batch, false, false); } + +unsigned +fd5_tile_mode(const struct pipe_resource *tmpl) +{ + /* basically just has to be a format we can blit, so uploads/downloads + * via linear staging buffer works: + */ + if (ok_format(tmpl->format)) + return TILE5_3; + + return TILE5_LINEAR; +} diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_blitter.h b/src/gallium/drivers/freedreno/a5xx/fd5_blitter.h index c5688a89474..b03c77eb1ab 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_blitter.h +++ b/src/gallium/drivers/freedreno/a5xx/fd5_blitter.h @@ -32,5 +32,6 @@ #include "freedreno_context.h" void fd5_blitter_blit(struct fd_context *ctx, const struct pipe_blit_info *info); +unsigned fd5_tile_mode(const struct pipe_resource *tmpl); #endif /* FD5_BLIT_H_ */ diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_emit.c b/src/gallium/drivers/freedreno/a5xx/fd5_emit.c index b7ce084a827..b2809d64646 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_emit.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_emit.c @@ -36,6 +36,7 @@ #include "fd5_emit.h" #include "fd5_blend.h" +#include "fd5_blitter.h" #include "fd5_context.h" #include "fd5_image.h" #include "fd5_program.h" @@ -337,8 +338,13 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, const struct fd5_pipe_sampler_view *view = tex->textures[i] ? fd5_pipe_sampler_view(tex->textures[i]) : &dummy_view; + enum a5xx_tile_mode tile_mode = TILE5_LINEAR; - OUT_RING(ring, view->texconst0); + if (view->base.texture) + tile_mode = fd_resource(view->base.texture)->tile_mode; + + OUT_RING(ring, view->texconst0 | + A5XX_TEX_CONST_0_TILE_MODE(tile_mode)); OUT_RING(ring, view->texconst1); OUT_RING(ring, view->texconst2); OUT_RING(ring, view->texconst3); diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c b/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c index e16ed1afa11..ae73bc181ba 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c @@ -49,12 +49,6 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, enum a5xx_tile_mode tile_mode; unsigned i; - if (gmem) { - tile_mode = TILE5_2; - } else { - tile_mode = TILE5_LINEAR; - } - for (i = 0; i < A5XX_MAX_RENDER_TARGETS; i++) { enum a5xx_color_fmt format = 0; enum a3xx_color_swap swap = WZYX; @@ -66,6 +60,12 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, uint32_t base = 0; uint32_t offset = 0; + if (gmem) { + tile_mode = TILE5_2; + } else { + tile_mode = TILE5_LINEAR; + } + if ((i < nr_bufs) && bufs[i]) { struct pipe_surface *psurf = bufs[i]; enum pipe_format pformat = psurf->format; @@ -91,6 +91,9 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, } else { stride = slice->pitch * rsc->cpp; size = slice->size0; + + if (!fd_resource_level_linear(psurf->texture, psurf->u.tex.level)) + tile_mode = rsc->tile_mode; } } @@ -488,7 +491,7 @@ emit_mem2gmem_surf(struct fd_batch *batch, uint32_t base, OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(0), 5); OUT_RING(ring, A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) | - A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(TILE5_LINEAR) | + A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(rsc->tile_mode) | A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(WZYX)); OUT_RING(ring, A5XX_RB_MRT_PITCH(slice->pitch * rsc->cpp)); OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(slice->size0)); @@ -606,6 +609,7 @@ emit_gmem2mem_surf(struct fd_batch *batch, uint32_t base, struct fd_ringbuffer *ring = batch->gmem; struct fd_resource *rsc = fd_resource(psurf->texture); struct fd_resource_slice *slice; + bool tiled; uint32_t offset; if (buf == BLIT_S) @@ -623,8 +627,12 @@ emit_gmem2mem_surf(struct fd_batch *batch, uint32_t base, OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_PITCH */ OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_ARRAY_PITCH */ + tiled = rsc->tile_mode && + !fd_resource_level_linear(psurf->texture, psurf->u.tex.level); + OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_3, 5); - OUT_RING(ring, 0x00000004); /* XXX RB_RESOLVE_CNTL_3 */ + OUT_RING(ring, 0x00000004 | /* XXX RB_RESOLVE_CNTL_3 */ + COND(tiled, A5XX_RB_RESOLVE_CNTL_3_TILED)); OUT_RELOCW(ring, rsc->bo, offset, 0, 0); /* RB_BLIT_DST_LO/HI */ OUT_RING(ring, A5XX_RB_BLIT_DST_PITCH(slice->pitch * rsc->cpp)); OUT_RING(ring, A5XX_RB_BLIT_DST_ARRAY_PITCH(slice->size0)); diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_resource.c b/src/gallium/drivers/freedreno/a5xx/fd5_resource.c new file mode 100644 index 00000000000..12ee6d76fc2 --- /dev/null +++ b/src/gallium/drivers/freedreno/a5xx/fd5_resource.c @@ -0,0 +1,138 @@ +/* + * Copyright (C) 2018 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include "fd5_resource.h" + +/* indexed by cpp: */ +static const struct { + unsigned pitchalign; + unsigned heightalign; +} tile_alignment[] = { + [1] = { 128, 32 }, + [2] = { 128, 16 }, + [3] = { 128, 16 }, + [4] = { 64, 16 }, + [8] = { 64, 16 }, + [12] = { 64, 16 }, + [16] = { 64, 16 }, +}; + +/* NOTE: good way to test this is: (for example) + * piglit/bin/texelFetch fs sampler2D 100x100x1-100x300x1 + */ +static uint32_t +setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format format) +{ + struct pipe_resource *prsc = &rsc->base; + struct fd_screen *screen = fd_screen(prsc->screen); + enum util_format_layout layout = util_format_description(format)->layout; + uint32_t pitchalign = screen->gmem_alignw; + uint32_t heightalign; + uint32_t level, size = 0; + uint32_t width = prsc->width0; + uint32_t height = prsc->height0; + uint32_t depth = prsc->depth0; + /* in layer_first layout, the level (slice) contains just one + * layer (since in fact the layer contains the slices) + */ + uint32_t layers_in_level = rsc->layer_first ? 1 : prsc->array_size; + + heightalign = tile_alignment[rsc->cpp].heightalign; + + for (level = 0; level <= prsc->last_level; level++) { + struct fd_resource_slice *slice = fd_resource_slice(rsc, level); + bool linear_level = fd_resource_level_linear(prsc, level); + uint32_t aligned_height = height; + uint32_t blocks; + + if (rsc->tile_mode && !linear_level) { + pitchalign = tile_alignment[rsc->cpp].pitchalign; + aligned_height = align(aligned_height, heightalign); + } else { + pitchalign = 64; + } + + if (layout == UTIL_FORMAT_LAYOUT_ASTC) + slice->pitch = + util_align_npot(width, pitchalign * util_format_get_blockwidth(format)); + else + slice->pitch = align(width, pitchalign); + + slice->offset = size; + blocks = util_format_get_nblocks(format, slice->pitch, aligned_height); + + /* 1d array and 2d array textures must all have the same layer size + * for each miplevel on a3xx. 3d textures can have different layer + * sizes for high levels, but the hw auto-sizer is buggy (or at least + * different than what this code does), so as soon as the layer size + * range gets into range, we stop reducing it. + */ + if (prsc->target == PIPE_TEXTURE_3D && ( + level == 1 || + (level > 1 && rsc->slices[level - 1].size0 > 0xf000))) + slice->size0 = align(blocks * rsc->cpp, alignment); + else if (level == 0 || rsc->layer_first || alignment == 1) + slice->size0 = align(blocks * rsc->cpp, alignment); + else + slice->size0 = rsc->slices[level - 1].size0; + +#if 0 + debug_printf("%s: %ux%ux%u@%u: %2u: stride=%4u, size=%7u, aligned_height=%3u\n", + util_format_name(prsc->format), + prsc->width0, prsc->height0, prsc->depth0, rsc->cpp, + level, slice->pitch * rsc->cpp, + slice->size0 * depth * layers_in_level, + aligned_height); +#endif + + size += slice->size0 * depth * layers_in_level; + + width = u_minify(width, 1); + height = u_minify(height, 1); + depth = u_minify(depth, 1); + } + + return size; +} + +uint32_t +fd5_setup_slices(struct fd_resource *rsc) +{ + uint32_t alignment; + + switch (rsc->base.target) { + case PIPE_TEXTURE_3D: + rsc->layer_first = false; + alignment = 4096; + break; + default: + rsc->layer_first = true; + alignment = 1; + break; + } + + return setup_slices(rsc, alignment, rsc->base.format); +} diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_resource.h b/src/gallium/drivers/freedreno/a5xx/fd5_resource.h new file mode 100644 index 00000000000..f24fe4e9837 --- /dev/null +++ b/src/gallium/drivers/freedreno/a5xx/fd5_resource.h @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2018 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#ifndef FD5_RESOURCE_H_ +#define FD5_RESOURCE_H_ + +#include "freedreno_resource.h" + +uint32_t fd5_setup_slices(struct fd_resource *rsc); + +#endif /* FD5_RESOURCE_H_ */ diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_screen.c b/src/gallium/drivers/freedreno/a5xx/fd5_screen.c index 2816c361549..8feb279feaa 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_screen.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_screen.c @@ -28,8 +28,11 @@ #include "util/u_format.h" #include "fd5_screen.h" +#include "fd5_blitter.h" #include "fd5_context.h" #include "fd5_format.h" +#include "fd5_resource.h" + #include "ir3_compiler.h" static boolean @@ -106,4 +109,8 @@ fd5_screen_init(struct pipe_screen *pscreen) screen->compiler = ir3_compiler_create(screen->dev, screen->gpu_id); pscreen->context_create = fd5_context_create; pscreen->is_format_supported = fd5_screen_is_format_supported; + + screen->setup_slices = fd5_setup_slices; + if (fd_mesa_debug & FD_DBG_TTILE) + screen->tile_mode = fd5_tile_mode; } diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_texture.c b/src/gallium/drivers/freedreno/a5xx/fd5_texture.c index 2501b7a341c..9795189b6ef 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_texture.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_texture.c @@ -272,6 +272,7 @@ fd5_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, so->texconst2 |= A5XX_TEX_CONST_2_TYPE(fd5_tex_type(cso->target)); switch (cso->target) { + case PIPE_TEXTURE_RECT: case PIPE_TEXTURE_1D: case PIPE_TEXTURE_2D: so->texconst3 = diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h index a4e1e4bb86c..1653e8a3b85 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.h +++ b/src/gallium/drivers/freedreno/freedreno_context.h @@ -329,7 +329,7 @@ struct fd_context { struct fd_ringbuffer *ring); void (*query_set_stage)(struct fd_batch *batch, enum fd_render_stage stage); - /* blit: */ + /* blitter: */ void (*blit)(struct fd_context *ctx, const struct pipe_blit_info *info); /* simple gpu "memcpy": */ diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c index eaaf2e8b229..7e6de8c057f 100644 --- a/src/gallium/drivers/freedreno/freedreno_resource.c +++ b/src/gallium/drivers/freedreno/freedreno_resource.c @@ -290,6 +290,7 @@ fd_alloc_staging(struct fd_context *ctx, struct fd_resource *rsc, tmpl.depth0 = box->depth; tmpl.array_size = 1; tmpl.last_level = 0; + tmpl.bind |= PIPE_BIND_LINEAR; struct pipe_resource *pstaging = pctx->screen->resource_create(pctx->screen, &tmpl); @@ -300,7 +301,7 @@ fd_alloc_staging(struct fd_context *ctx, struct fd_resource *rsc, } static void -fd_blit_staging(struct fd_context *ctx, struct fd_transfer *trans) +fd_blit_from_staging(struct fd_context *ctx, struct fd_transfer *trans) { struct pipe_resource *dst = trans->base.resource; struct pipe_blit_info blit = {0}; @@ -317,7 +318,26 @@ fd_blit_staging(struct fd_context *ctx, struct fd_transfer *trans) blit.filter = PIPE_TEX_FILTER_NEAREST; do_blit(ctx, &blit, false); - pipe_resource_reference(&trans->staging_prsc, NULL); +} + +static void +fd_blit_to_staging(struct fd_context *ctx, struct fd_transfer *trans) +{ + struct pipe_resource *src = trans->base.resource; + struct pipe_blit_info blit = {0}; + + blit.src.resource = src; + blit.src.format = src->format; + blit.src.level = trans->base.level; + blit.src.box = trans->base.box; + blit.dst.resource = trans->staging_prsc; + blit.dst.format = trans->staging_prsc->format; + blit.dst.level = 0; + blit.dst.box = trans->staging_box; + blit.mask = util_format_get_mask(trans->staging_prsc->format); + blit.filter = PIPE_TEX_FILTER_NEAREST; + + do_blit(ctx, &blit, false); } static unsigned @@ -396,8 +416,11 @@ fd_resource_transfer_unmap(struct pipe_context *pctx, struct fd_resource *rsc = fd_resource(ptrans->resource); struct fd_transfer *trans = fd_transfer(ptrans); - if (trans->staging_prsc) - fd_blit_staging(ctx, trans); + if (trans->staging_prsc) { + if (ptrans->usage & PIPE_TRANSFER_WRITE) + fd_blit_from_staging(ctx, trans); + pipe_resource_reference(&trans->staging_prsc, NULL); + } if (!(ptrans->usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { fd_bo_cpu_fini(rsc->bo); @@ -447,6 +470,44 @@ fd_resource_transfer_map(struct pipe_context *pctx, ptrans->stride = util_format_get_nblocksx(format, slice->pitch) * rsc->cpp; ptrans->layer_stride = rsc->layer_first ? rsc->layer_size : slice->size0; + /* we always need a staging texture for tiled buffers: + * + * TODO we might sometimes want to *also* shadow the resource to avoid + * splitting a batch.. for ex, mid-frame texture uploads to a tiled + * texture. + */ + if (rsc->tile_mode) { + struct fd_resource *staging_rsc; + + staging_rsc = fd_alloc_staging(ctx, rsc, level, box); + if (staging_rsc) { + // TODO for PIPE_TRANSFER_READ, need to do untiling blit.. + trans->staging_prsc = &staging_rsc->base; + trans->base.stride = util_format_get_nblocksx(format, + staging_rsc->slices[0].pitch) * staging_rsc->cpp; + trans->base.layer_stride = staging_rsc->layer_first ? + staging_rsc->layer_size : staging_rsc->slices[0].size0; + trans->staging_box = *box; + trans->staging_box.x = 0; + trans->staging_box.y = 0; + trans->staging_box.z = 0; + + if (usage & PIPE_TRANSFER_READ) { + fd_blit_to_staging(ctx, trans); + fd_bo_cpu_prep(rsc->bo, ctx->pipe, DRM_FREEDRENO_PREP_READ); + } + + buf = fd_bo_map(staging_rsc->bo); + offset = 0; + + *pptrans = ptrans; + + ctx->stats.staging_uploads++; + + return buf; + } + } + if (ctx->in_shadow && !(usage & PIPE_TRANSFER_READ)) usage |= PIPE_TRANSFER_UNSYNCHRONIZED; @@ -616,9 +677,6 @@ setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format forma */ uint32_t layers_in_level = rsc->layer_first ? 1 : prsc->array_size; - if (is_a5xx(screen) && (rsc->base.target >= PIPE_TEXTURE_2D)) - height = align(height, screen->gmem_alignh); - for (level = 0; level <= prsc->last_level; level++) { struct fd_resource_slice *slice = fd_resource_slice(rsc, level); uint32_t blocks; @@ -686,7 +744,7 @@ fd_setup_slices(struct fd_resource *rsc) alignment = slice_alignment(rsc->base.target); struct fd_screen *screen = fd_screen(rsc->base.screen); - if (is_a4xx(screen) || is_a5xx(screen)) { + if (is_a4xx(screen)) { switch (rsc->base.target) { case PIPE_TEXTURE_3D: rsc->layer_first = false; @@ -759,6 +817,18 @@ fd_resource_create(struct pipe_screen *pscreen, *prsc = *tmpl; +#define LINEAR \ + (PIPE_BIND_SCANOUT | \ + PIPE_BIND_LINEAR | \ + PIPE_BIND_DISPLAY_TARGET) + + if (screen->tile_mode && + (tmpl->target != PIPE_BUFFER) && + (tmpl->bind & PIPE_BIND_SAMPLER_VIEW) && + !(tmpl->bind & LINEAR)) { + rsc->tile_mode = screen->tile_mode(tmpl); + } + pipe_reference_init(&prsc->reference, 1); prsc->screen = pscreen; diff --git a/src/gallium/drivers/freedreno/freedreno_resource.h b/src/gallium/drivers/freedreno/freedreno_resource.h index 0b41a13be33..2834969110b 100644 --- a/src/gallium/drivers/freedreno/freedreno_resource.h +++ b/src/gallium/drivers/freedreno/freedreno_resource.h @@ -97,6 +97,9 @@ struct fd_resource { */ uint32_t bc_batch_mask; + unsigned tile_mode : 2; + unsigned preferred_tile_mode : 2; + /* * LRZ */ @@ -164,6 +167,16 @@ fd_resource_offset(struct fd_resource *rsc, unsigned level, unsigned layer) return offset; } +/* This might be a5xx specific, but higher mipmap levels are always linear: */ +static inline bool +fd_resource_level_linear(struct pipe_resource *prsc, int level) +{ + unsigned w = u_minify(prsc->width0, level); + if (w < 16) + return true; + return false; +} + void fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond, bool discard, enum fd_render_stage stage); void fd_blitter_pipe_end(struct fd_context *ctx); diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index 1fca3c768bf..e0a9048031c 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -82,6 +82,7 @@ static const struct debug_named_value debug_options[] = { {"noindirect",FD_DBG_NOINDR, "Disable hw indirect draws (emulate on CPU)"}, {"noblit", FD_DBG_NOBLIT, "Disable blitter (fallback to generic blit path)"}, {"hiprio", FD_DBG_HIPRIO, "Force high-priority context"}, + {"ttile", FD_DBG_TTILE, "Enable texture tiling (a5xx)"}, DEBUG_NAMED_VALUE_END }; diff --git a/src/gallium/drivers/freedreno/freedreno_screen.h b/src/gallium/drivers/freedreno/freedreno_screen.h index 5a825763f24..55537f2d1c4 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.h +++ b/src/gallium/drivers/freedreno/freedreno_screen.h @@ -81,6 +81,7 @@ struct fd_screen { struct fd_pipe *pipe; uint32_t (*setup_slices)(struct fd_resource *rsc); + unsigned (*tile_mode)(const struct pipe_resource *prsc); int64_t cpu_gpu_time_delta; diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h index 687d874b007..bfeec4c17dd 100644 --- a/src/gallium/drivers/freedreno/freedreno_util.h +++ b/src/gallium/drivers/freedreno/freedreno_util.h @@ -83,6 +83,7 @@ enum adreno_stencil_op fd_stencil_op(unsigned op); #define FD_DBG_NOINDR 0x40000 #define FD_DBG_NOBLIT 0x80000 #define FD_DBG_HIPRIO 0x100000 +#define FD_DBG_TTILE 0x200000 extern int fd_mesa_debug; extern bool fd_binning_enabled; diff --git a/src/gallium/drivers/freedreno/meson.build b/src/gallium/drivers/freedreno/meson.build index 33a64d73aa4..ef70cc08c74 100644 --- a/src/gallium/drivers/freedreno/meson.build +++ b/src/gallium/drivers/freedreno/meson.build @@ -174,6 +174,8 @@ files_libfreedreno = files( 'a5xx/fd5_query.h', 'a5xx/fd5_rasterizer.c', 'a5xx/fd5_rasterizer.h', + 'a5xx/fd5_resource.c', + 'a5xx/fd5_resource.h', 'a5xx/fd5_screen.c', 'a5xx/fd5_screen.h', 'a5xx/fd5_texture.c', -- 2.30.2