<bitfield name="COLOR_FORMAT" low="0" high="7" type="a6xx_format"/>
<bitfield name="TILE_MODE" low="8" high="9" type="a6xx_tile_mode"/>
<bitfield name="COLOR_SWAP" low="10" high="11" type="a3xx_color_swap"/>
- <!-- b12 seems to be set when UBWC "FLAGS" buffer enabled -->
<bitfield name="FLAGS" pos="12" type="boolean"/>
<bitfield name="SRGB" pos="13" type="boolean"/>
<!-- the rest is only for src -->
<bitfield name="SAMPLES" low="14" high="15" type="a3xx_msaa_samples"/>
<bitfield name="FILTER" pos="16" type="boolean"/>
<bitfield name="SAMPLES_AVERAGE" pos="18" type="boolean"/>
+ <bitfield name="UNK20" pos="20" type="boolean"/>
+ <bitfield name="UNK22" pos="22" type="boolean"/>
</bitset>
<reg32 offset="0x8c17" name="RB_2D_DST_INFO" type="a6xx_2d_surf_info"/>
<reg32 offset="0x8c18" name="RB_2D_DST_LO"/>
<reg32 offset="0x8c19" name="RB_2D_DST_HI"/>
+ <reg64 offset="0x8c18" name="RB_2D_DST" type="waddress"/>
<reg32 offset="0x8c1a" name="RB_2D_DST_SIZE">
<bitfield name="PITCH" low="0" high="15" shr="6" type="uint"/>
</reg32>
<reg32 offset="0x8c20" name="RB_2D_DST_FLAGS_LO"/>
<reg32 offset="0x8c21" name="RB_2D_DST_FLAGS_HI"/>
+ <reg64 offset="0x8c20" name="RB_2D_DST_FLAGS" type="waddress"/>
<reg32 offset="0x8c22" name="RB_2D_DST_FLAGS_PITCH">
<bitfield name="PITCH" low="0" high="10" shr="6" type="uint"/>
<bitfield name="ARRAY_PITCH" low="11" high="21" shr="7" type="uint"/>
</reg32>
<reg32 offset="0xb4c2" name="SP_PS_2D_SRC_LO"/>
<reg32 offset="0xb4c3" name="SP_PS_2D_SRC_HI"/>
+ <reg64 offset="0xb4c2" name="SP_PS_2D_SRC" type="waddress"/>
<reg32 offset="0xb4c4" name="SP_PS_2D_SRC_PITCH">
<bitfield name="PITCH" low="9" high="24" shr="6" type="uint"/>
</reg32>
<reg32 offset="0xb4ca" name="SP_PS_2D_SRC_FLAGS_LO"/>
<reg32 offset="0xb4cb" name="SP_PS_2D_SRC_FLAGS_HI"/>
+ <reg64 offset="0xb4ca" name="SP_PS_2D_SRC_FLAGS" type="waddress"/>
<reg32 offset="0xb4cc" name="SP_PS_2D_SRC_FLAGS_PITCH">
<bitfield name="PITCH" low="0" high="10" shr="6" type="uint"/>
<bitfield name="ARRAY_PITCH" low="11" high="21" shr="7" type="uint"/>
)
libtu_files = files(
- 'tu_blit.c',
- 'tu_blit.h',
+ 'tu_clear_blit.c',
'tu_cmd_buffer.c',
'tu_cs.c',
'tu_cs.h',
'tu_fence.c',
'tu_formats.c',
'tu_image.c',
- 'tu_meta_blit.c',
- 'tu_meta_buffer.c',
- 'tu_meta_clear.c',
- 'tu_meta_copy.c',
- 'tu_meta_resolve.c',
'tu_pass.c',
'tu_pipeline.c',
'tu_pipeline_cache.c',
+++ /dev/null
-/*
- * Copyright © 2019 Valve Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- * Jonathan Marek <jonathan@marek.ca>
- *
- */
-
-#include "tu_blit.h"
-
-#include "a6xx.xml.h"
-#include "adreno_common.xml.h"
-#include "adreno_pm4.xml.h"
-
-#include "vk_format.h"
-
-#include "tu_cs.h"
-
-/* TODO:
- * - Avoid disabling tiling for swapped formats
- * (image_to_image copy doesn't deal with it)
- * - Fix d24_unorm_s8_uint support & aspects
- * - UBWC
- */
-
-static VkFormat
-blit_copy_format(VkFormat format)
-{
- switch (vk_format_get_blocksizebits(format)) {
- case 8: return VK_FORMAT_R8_UINT;
- case 16: return VK_FORMAT_R16_UINT;
- case 32: return VK_FORMAT_R32_UINT;
- case 64: return VK_FORMAT_R32G32_UINT;
- case 96: return VK_FORMAT_R32G32B32_UINT;
- case 128:return VK_FORMAT_R32G32B32A32_UINT;
- default:
- unreachable("unhandled format size");
- }
-}
-
-static uint32_t
-blit_image_info(const struct tu_blit_surf *img, struct tu_native_format fmt, bool stencil_read)
-{
- if (fmt.fmt == FMT6_Z24_UNORM_S8_UINT)
- fmt.fmt = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
-
- if (stencil_read)
- fmt.swap = XYZW;
-
- return A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(fmt.fmt) |
- A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(img->tile_mode) |
- A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(fmt.swap) |
- COND(vk_format_is_srgb(img->fmt), A6XX_SP_PS_2D_SRC_INFO_SRGB) |
- COND(img->ubwc_size, A6XX_SP_PS_2D_SRC_INFO_FLAGS);
-}
-
-static void
-emit_blit_step(struct tu_cmd_buffer *cmdbuf, struct tu_cs *cs,
- const struct tu_blit *blt)
-{
- struct tu_physical_device *phys_dev = cmdbuf->device->physical_device;
-
- struct tu_native_format dfmt = tu6_format_color(blt->dst.fmt, blt->dst.image_tile_mode);
- struct tu_native_format sfmt = tu6_format_texture(blt->src.fmt, blt->src.image_tile_mode);
-
- if (dfmt.fmt == FMT6_Z24_UNORM_S8_UINT)
- dfmt.fmt = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
-
- enum a6xx_2d_ifmt ifmt = tu6_fmt_to_ifmt(dfmt.fmt);
-
- if (vk_format_is_srgb(blt->dst.fmt)) {
- assert(ifmt == R2D_UNORM8);
- ifmt = R2D_UNORM8_SRGB;
- }
-
- uint32_t blit_cntl = A6XX_RB_2D_BLIT_CNTL_ROTATE(blt->rotation) |
- COND(blt->type == TU_BLIT_CLEAR, A6XX_RB_2D_BLIT_CNTL_SOLID_COLOR) |
- A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(dfmt.fmt) | /* not required? */
- COND(dfmt.fmt == FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8,
- A6XX_RB_2D_BLIT_CNTL_D24S8) |
- A6XX_RB_2D_BLIT_CNTL_MASK(0xf) |
- A6XX_RB_2D_BLIT_CNTL_IFMT(ifmt);
-
- tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_BLIT_CNTL, 1);
- tu_cs_emit(cs, blit_cntl);
-
- tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
- tu_cs_emit(cs, blit_cntl);
-
- /*
- * Emit source:
- */
- if (blt->type == TU_BLIT_CLEAR) {
- tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_SRC_SOLID_C0, 4);
- tu_cs_emit(cs, blt->clear_value[0]);
- tu_cs_emit(cs, blt->clear_value[1]);
- tu_cs_emit(cs, blt->clear_value[2]);
- tu_cs_emit(cs, blt->clear_value[3]);
- } else {
- tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_2D_SRC_INFO, 10);
- tu_cs_emit(cs, blit_image_info(&blt->src, sfmt, blt->stencil_read) |
- A6XX_SP_PS_2D_SRC_INFO_SAMPLES(tu_msaa_samples(blt->src.samples)) |
- /* TODO: should disable this bit for integer formats ? */
- COND(blt->src.samples > 1, A6XX_SP_PS_2D_SRC_INFO_SAMPLES_AVERAGE) |
- COND(blt->filter, A6XX_SP_PS_2D_SRC_INFO_FILTER) |
- 0x500000);
- tu_cs_emit(cs, A6XX_SP_PS_2D_SRC_SIZE_WIDTH(blt->src.x + blt->src.width) |
- A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(blt->src.y + blt->src.height));
- tu_cs_emit_qw(cs, blt->src.va);
- tu_cs_emit(cs, A6XX_SP_PS_2D_SRC_PITCH_PITCH(blt->src.pitch));
-
- tu_cs_emit(cs, 0x00000000);
- tu_cs_emit(cs, 0x00000000);
- tu_cs_emit(cs, 0x00000000);
- tu_cs_emit(cs, 0x00000000);
- tu_cs_emit(cs, 0x00000000);
-
- if (blt->src.ubwc_size) {
- tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_2D_SRC_FLAGS_LO, 6);
- tu_cs_emit_qw(cs, blt->src.ubwc_va);
- tu_cs_emit(cs, A6XX_SP_PS_2D_SRC_FLAGS_PITCH_PITCH(blt->src.ubwc_pitch) |
- A6XX_SP_PS_2D_SRC_FLAGS_PITCH_ARRAY_PITCH(blt->src.ubwc_size >> 2));
- tu_cs_emit(cs, 0x00000000);
- tu_cs_emit(cs, 0x00000000);
- tu_cs_emit(cs, 0x00000000);
- }
- }
-
- /*
- * Emit destination:
- */
- tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_INFO, 9);
- tu_cs_emit(cs, blit_image_info(&blt->dst, dfmt, false));
- tu_cs_emit_qw(cs, blt->dst.va);
- tu_cs_emit(cs, A6XX_RB_2D_DST_SIZE_PITCH(blt->dst.pitch));
- tu_cs_emit(cs, 0x00000000);
- tu_cs_emit(cs, 0x00000000);
- tu_cs_emit(cs, 0x00000000);
- tu_cs_emit(cs, 0x00000000);
- tu_cs_emit(cs, 0x00000000);
-
- if (blt->dst.ubwc_size) {
- tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_FLAGS_LO, 6);
- tu_cs_emit_qw(cs, blt->dst.ubwc_va);
- tu_cs_emit(cs, A6XX_RB_2D_DST_FLAGS_PITCH_PITCH(blt->dst.ubwc_pitch) |
- A6XX_RB_2D_DST_FLAGS_PITCH_ARRAY_PITCH(blt->dst.ubwc_size >> 2));
- tu_cs_emit(cs, 0x00000000);
- tu_cs_emit(cs, 0x00000000);
- tu_cs_emit(cs, 0x00000000);
- }
-
- tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
- tu_cs_emit(cs, A6XX_GRAS_2D_SRC_TL_X_X(blt->src.x));
- tu_cs_emit(cs, A6XX_GRAS_2D_SRC_BR_X_X(blt->src.x + blt->src.width - 1));
- tu_cs_emit(cs, A6XX_GRAS_2D_SRC_TL_Y_Y(blt->src.y));
- tu_cs_emit(cs, A6XX_GRAS_2D_SRC_BR_Y_Y(blt->src.y + blt->src.height - 1));
-
- tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_2D_DST_TL, 2);
- tu_cs_emit(cs, A6XX_GRAS_2D_DST_TL_X(blt->dst.x) |
- A6XX_GRAS_2D_DST_TL_Y(blt->dst.y));
- tu_cs_emit(cs, A6XX_GRAS_2D_DST_BR_X(blt->dst.x + blt->dst.width - 1) |
- A6XX_GRAS_2D_DST_BR_Y(blt->dst.y + blt->dst.height - 1));
-
- tu_cs_emit_pkt7(cs, CP_EVENT_WRITE, 1);
- tu_cs_emit(cs, 0x3f);
- tu_cs_emit_wfi(cs);
-
- tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_8C01, 1);
- tu_cs_emit(cs, 0);
-
- if (dfmt.fmt == FMT6_10_10_10_2_UNORM_DEST)
- dfmt.fmt = FMT6_16_16_16_16_FLOAT;
-
- tu_cs_emit_pkt4(cs, REG_A6XX_SP_2D_SRC_FORMAT, 1);
- tu_cs_emit(cs, COND(vk_format_is_sint(blt->src.fmt), A6XX_SP_2D_SRC_FORMAT_SINT) |
- COND(vk_format_is_uint(blt->src.fmt), A6XX_SP_2D_SRC_FORMAT_UINT) |
- A6XX_SP_2D_SRC_FORMAT_COLOR_FORMAT(dfmt.fmt) |
- COND(ifmt == R2D_UNORM8_SRGB, A6XX_SP_2D_SRC_FORMAT_SRGB) |
- A6XX_SP_2D_SRC_FORMAT_MASK(0xf));
-
- tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
- tu_cs_emit(cs, phys_dev->magic.RB_UNKNOWN_8E04_blit);
-
- tu_cs_emit_pkt7(cs, CP_BLIT, 1);
- tu_cs_emit(cs, CP_BLIT_0_OP(BLIT_OP_SCALE));
-
- tu_cs_emit_wfi(cs);
-
- tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
- tu_cs_emit(cs, 0);
-}
-
-void tu_blit(struct tu_cmd_buffer *cmdbuf, struct tu_cs *cs,
- struct tu_blit *blt)
-{
- struct tu_physical_device *phys_dev = cmdbuf->device->physical_device;
-
- switch (blt->type) {
- case TU_BLIT_COPY:
- blt->stencil_read =
- blt->dst.fmt == VK_FORMAT_R8_UNORM &&
- blt->src.fmt == VK_FORMAT_D24_UNORM_S8_UINT;
-
- assert(vk_format_get_blocksize(blt->dst.fmt) ==
- vk_format_get_blocksize(blt->src.fmt) || blt->stencil_read);
- assert(blt->src.samples == blt->dst.samples);
-
- if (vk_format_is_compressed(blt->src.fmt)) {
- unsigned block_width = vk_format_get_blockwidth(blt->src.fmt);
- unsigned block_height = vk_format_get_blockheight(blt->src.fmt);
-
- blt->src.pitch /= block_width;
- blt->src.x /= block_width;
- blt->src.y /= block_height;
- blt->src.fmt = blit_copy_format(blt->src.fmt);
-
- /* for image_to_image copy, width/height is on the src format */
- blt->dst.width = blt->src.width = DIV_ROUND_UP(blt->src.width, block_width);
- blt->dst.height = blt->src.height = DIV_ROUND_UP(blt->src.height, block_height);
- }
-
- if (vk_format_is_compressed(blt->dst.fmt)) {
- unsigned block_width = vk_format_get_blockwidth(blt->dst.fmt);
- unsigned block_height = vk_format_get_blockheight(blt->dst.fmt);
-
- blt->dst.pitch /= block_width;
- blt->dst.x /= block_width;
- blt->dst.y /= block_height;
- blt->dst.fmt = blit_copy_format(blt->dst.fmt);
- }
-
- if (blt->dst.fmt == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
- blt->dst.fmt = blit_copy_format(blt->dst.fmt);
-
- if (blt->src.fmt == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
- blt->src.fmt = blit_copy_format(blt->src.fmt);
-
- /* TODO: multisample image copy does not work correctly with tiling/UBWC */
- blt->src.x *= blt->src.samples;
- blt->dst.x *= blt->dst.samples;
- blt->src.width *= blt->src.samples;
- blt->dst.width *= blt->dst.samples;
- blt->src.samples = 1;
- blt->dst.samples = 1;
- break;
- case TU_BLIT_CLEAR:
- /* unsupported format cleared as UINT32 */
- if (blt->dst.fmt == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
- blt->dst.fmt = VK_FORMAT_R32_UINT;
- /* TODO: multisample image clearing also seems not to work with certain
- * formats. The blob uses a shader-based clear in these cases.
- */
- blt->dst.x *= blt->dst.samples;
- blt->dst.width *= blt->dst.samples;
- blt->dst.samples = 1;
- blt->src = blt->dst;
- break;
- default:
- assert(blt->dst.samples == 1);
- }
-
- tu6_emit_event_write(cmdbuf, cs, LRZ_FLUSH, false);
- tu6_emit_event_write(cmdbuf, cs, PC_CCU_FLUSH_COLOR_TS, true);
- tu6_emit_event_write(cmdbuf, cs, PC_CCU_FLUSH_DEPTH_TS, true);
- tu6_emit_event_write(cmdbuf, cs, PC_CCU_INVALIDATE_COLOR, false);
- tu6_emit_event_write(cmdbuf, cs, PC_CCU_INVALIDATE_DEPTH, false);
-
- tu_cs_emit_wfi(cs);
- tu_cs_emit_regs(cs,
- A6XX_RB_CCU_CNTL(.offset = phys_dev->ccu_offset_bypass));
-
- /* buffer copy setup */
- tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
- tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
-
- for (unsigned layer = 0; layer < blt->layers; layer++) {
- if (blt->buffer) {
- struct tu_blit line_blt = *blt;
- uint64_t dst_va = line_blt.dst.va, src_va = line_blt.src.va;
- unsigned blocksize = vk_format_get_blocksize(blt->src.fmt);
- uint32_t size = line_blt.src.width, tmp;
-
- while (size) {
- line_blt.src.x = (src_va & 63) / blocksize;
- line_blt.src.va = src_va & ~63;
- tmp = MIN2(size, 0x4000 - line_blt.src.x);
-
- line_blt.dst.x = (dst_va & 63) / blocksize;
- line_blt.dst.va = dst_va & ~63;
- tmp = MIN2(tmp, 0x4000 - line_blt.dst.x);
-
- line_blt.src.width = line_blt.dst.width = tmp;
-
- emit_blit_step(cmdbuf, cs, &line_blt);
-
- src_va += tmp * blocksize;
- dst_va += tmp * blocksize;
- size -= tmp;
- }
- } else if ((blt->src.va & 63) || (blt->src.pitch & 63)) {
- /* per line copy path (buffer_to_image) */
- assert(blt->type == TU_BLIT_COPY && !blt->src.image_tile_mode);
- struct tu_blit line_blt = *blt;
- uint64_t src_va = line_blt.src.va + blt->src.pitch * blt->src.y;
-
- line_blt.src.y = 0;
- line_blt.src.pitch = 0;
- line_blt.src.height = 1;
- line_blt.dst.height = 1;
-
- for (unsigned y = 0; y < blt->src.height; y++) {
- line_blt.src.x = blt->src.x + (src_va & 63) / vk_format_get_blocksize(blt->src.fmt);
- line_blt.src.va = src_va & ~63;
-
- emit_blit_step(cmdbuf, cs, &line_blt);
-
- line_blt.dst.y++;
- src_va += blt->src.pitch;
- }
- } else if ((blt->dst.va & 63) || (blt->dst.pitch & 63)) {
- /* per line copy path (image_to_buffer) */
- assert(blt->type == TU_BLIT_COPY && !blt->dst.image_tile_mode);
- struct tu_blit line_blt = *blt;
- uint64_t dst_va = line_blt.dst.va + blt->dst.pitch * blt->dst.y;
-
- line_blt.dst.y = 0;
- line_blt.dst.pitch = 0;
- line_blt.src.height = 1;
- line_blt.dst.height = 1;
-
- for (unsigned y = 0; y < blt->src.height; y++) {
- line_blt.dst.x = blt->dst.x + (dst_va & 63) / vk_format_get_blocksize(blt->dst.fmt);
- line_blt.dst.va = dst_va & ~63;
-
- emit_blit_step(cmdbuf, cs, &line_blt);
-
- line_blt.src.y++;
- dst_va += blt->dst.pitch;
- }
- } else {
- emit_blit_step(cmdbuf, cs, blt);
- }
- blt->dst.va += blt->dst.layer_size;
- blt->src.va += blt->src.layer_size;
- blt->dst.ubwc_va += blt->dst.ubwc_size;
- blt->src.ubwc_va += blt->src.ubwc_size;
- }
-
- tu6_emit_event_write(cmdbuf, cs, PC_CCU_FLUSH_COLOR_TS, true);
- tu6_emit_event_write(cmdbuf, cs, PC_CCU_FLUSH_DEPTH_TS, true);
- tu6_emit_event_write(cmdbuf, cs, CACHE_FLUSH_TS, true);
- tu6_emit_event_write(cmdbuf, cs, CACHE_INVALIDATE, false);
-}
+++ /dev/null
-/*
- * Copyright © 2019 Valve Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- * Jonathan Marek <jonathan@marek.ca>
- *
- */
-
-#ifndef TU_BLIT_H
-#define TU_BLIT_H
-
-#include "tu_private.h"
-
-#include "vk_format.h"
-
-struct tu_blit_surf {
- VkFormat fmt;
- enum a6xx_tile_mode tile_mode;
- enum a6xx_tile_mode image_tile_mode;
- uint64_t va;
- uint32_t pitch, layer_size;
- uint32_t x, y;
- uint32_t width, height;
- unsigned samples;
- uint64_t ubwc_va;
- uint32_t ubwc_pitch;
- uint32_t ubwc_size;
-};
-
-static inline struct tu_blit_surf
-tu_blit_surf(struct tu_image *image,
- VkImageSubresourceLayers subres,
- const VkOffset3D *offsets)
-{
- unsigned layer = subres.baseArrayLayer;
- if (image->type == VK_IMAGE_TYPE_3D) {
- assert(layer == 0);
- layer = MIN2(offsets[0].z, offsets[1].z);
- }
-
- return (struct tu_blit_surf) {
- .fmt = image->vk_format,
- .tile_mode = tu6_get_image_tile_mode(image, subres.mipLevel),
- .image_tile_mode = image->layout.tile_mode,
- .va = tu_image_base(image, subres.mipLevel, layer),
- .pitch = tu_image_stride(image, subres.mipLevel),
- .layer_size = tu_layer_size(image, subres.mipLevel),
- .x = MIN2(offsets[0].x, offsets[1].x),
- .y = MIN2(offsets[0].y, offsets[1].y),
- .width = abs(offsets[1].x - offsets[0].x),
- .height = abs(offsets[1].y - offsets[0].y),
- .samples = image->samples,
- .ubwc_va = tu_image_ubwc_base(image, subres.mipLevel, layer),
- .ubwc_pitch = tu_image_ubwc_pitch(image, subres.mipLevel),
- .ubwc_size = tu_image_ubwc_size(image, subres.mipLevel),
- };
-}
-
-static inline struct tu_blit_surf
-tu_blit_surf_ext(struct tu_image *image,
- VkImageSubresourceLayers subres,
- VkOffset3D offset,
- VkExtent3D extent)
-{
- return tu_blit_surf(image, subres, (VkOffset3D[]) {
- offset, {.x = offset.x + extent.width,
- .y = offset.y + extent.height,
- .z = offset.z}
- });
-}
-
-static inline struct tu_blit_surf
-tu_blit_surf_whole(struct tu_image *image, int level, int layer)
-{
- return tu_blit_surf(image, (VkImageSubresourceLayers){
- .mipLevel = level,
- .baseArrayLayer = layer,
- }, (VkOffset3D[]) {
- {}, {
- u_minify(image->extent.width, level),
- u_minify(image->extent.height, level),
- }
- });
-}
-
-static inline struct tu_blit_surf
-sysmem_attachment_surf(const struct tu_image_view *view, uint32_t base_layer,
- const VkRect2D *rect)
-{
- return tu_blit_surf_ext(view->image, (VkImageSubresourceLayers) {
- .mipLevel = view->base_mip,
- .baseArrayLayer = base_layer,
- }, (VkOffset3D) {
- .x = rect->offset.x,
- .y = rect->offset.y,
- .z = 0,
- }, (VkExtent3D) {
- .width = rect->extent.width,
- .height = rect->extent.height,
- .depth = 1,
- });
-}
-
-
-enum tu_blit_type {
- TU_BLIT_DEFAULT,
- TU_BLIT_COPY,
- TU_BLIT_CLEAR,
-};
-
-struct tu_blit {
- struct tu_blit_surf dst;
- struct tu_blit_surf src;
- uint32_t layers;
- bool filter;
- bool stencil_read;
- bool buffer; /* 1d copy/clear */
- enum a6xx_rotation rotation;
- uint32_t clear_value[4];
- enum tu_blit_type type;
-};
-
-void tu_blit(struct tu_cmd_buffer *cmdbuf, struct tu_cs *cs,
- struct tu_blit *blt);
-
-#endif /* TU_BLIT_H */
--- /dev/null
+/*
+ * Copyright 2019-2020 Valve Corporation
+ * SPDX-License-Identifier: MIT
+ *
+ * Authors:
+ * Jonathan Marek <jonathan@marek.ca>
+ */
+
+#include "tu_private.h"
+
+#include "tu_cs.h"
+#include "vk_format.h"
+
+#include "util/format_r11g11b10f.h"
+#include "util/format_rgb9e5.h"
+#include "util/format_srgb.h"
+#include "util/u_half.h"
+
+/* helper functions previously in tu_formats.c */
+
+static uint32_t
+tu_pack_mask(int bits)
+{
+ assert(bits <= 32);
+ return (1ull << bits) - 1;
+}
+
+static uint32_t
+tu_pack_float32_for_unorm(float val, int bits)
+{
+ const uint32_t max = tu_pack_mask(bits);
+ if (val < 0.0f)
+ return 0;
+ else if (val > 1.0f)
+ return max;
+ else
+ return _mesa_lroundevenf(val * (float) max);
+}
+
+static uint32_t
+tu_pack_float32_for_snorm(float val, int bits)
+{
+ const int32_t max = tu_pack_mask(bits - 1);
+ int32_t tmp;
+ if (val < -1.0f)
+ tmp = -max;
+ else if (val > 1.0f)
+ tmp = max;
+ else
+ tmp = _mesa_lroundevenf(val * (float) max);
+
+ return tmp & tu_pack_mask(bits);
+}
+
+static uint32_t
+tu_pack_float32_for_uscaled(float val, int bits)
+{
+ const uint32_t max = tu_pack_mask(bits);
+ if (val < 0.0f)
+ return 0;
+ else if (val > (float) max)
+ return max;
+ else
+ return (uint32_t) val;
+}
+
+static uint32_t
+tu_pack_float32_for_sscaled(float val, int bits)
+{
+ const int32_t max = tu_pack_mask(bits - 1);
+ const int32_t min = -max - 1;
+ int32_t tmp;
+ if (val < (float) min)
+ tmp = min;
+ else if (val > (float) max)
+ tmp = max;
+ else
+ tmp = (int32_t) val;
+
+ return tmp & tu_pack_mask(bits);
+}
+
+static uint32_t
+tu_pack_uint32_for_uint(uint32_t val, int bits)
+{
+ return val & tu_pack_mask(bits);
+}
+
+static uint32_t
+tu_pack_int32_for_sint(int32_t val, int bits)
+{
+ return val & tu_pack_mask(bits);
+}
+
+static uint32_t
+tu_pack_float32_for_sfloat(float val, int bits)
+{
+ assert(bits == 16 || bits == 32);
+ return bits == 16 ? util_float_to_half(val) : fui(val);
+}
+
+union tu_clear_component_value {
+ float float32;
+ int32_t int32;
+ uint32_t uint32;
+};
+
+static uint32_t
+tu_pack_clear_component_value(union tu_clear_component_value val,
+ const struct util_format_channel_description *ch)
+{
+ uint32_t packed;
+
+ switch (ch->type) {
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ /* normalized, scaled, or pure integer */
+ if (ch->normalized)
+ packed = tu_pack_float32_for_unorm(val.float32, ch->size);
+ else if (ch->pure_integer)
+ packed = tu_pack_uint32_for_uint(val.uint32, ch->size);
+ else
+ packed = tu_pack_float32_for_uscaled(val.float32, ch->size);
+ break;
+ case UTIL_FORMAT_TYPE_SIGNED:
+ /* normalized, scaled, or pure integer */
+ if (ch->normalized)
+ packed = tu_pack_float32_for_snorm(val.float32, ch->size);
+ else if (ch->pure_integer)
+ packed = tu_pack_int32_for_sint(val.int32, ch->size);
+ else
+ packed = tu_pack_float32_for_sscaled(val.float32, ch->size);
+ break;
+ case UTIL_FORMAT_TYPE_FLOAT:
+ packed = tu_pack_float32_for_sfloat(val.float32, ch->size);
+ break;
+ default:
+ unreachable("unexpected channel type");
+ packed = 0;
+ break;
+ }
+
+ assert((packed & tu_pack_mask(ch->size)) == packed);
+ return packed;
+}
+
+static const struct util_format_channel_description *
+tu_get_format_channel_description(const struct util_format_description *desc,
+ int comp)
+{
+ switch (desc->swizzle[comp]) {
+ case PIPE_SWIZZLE_X:
+ return &desc->channel[0];
+ case PIPE_SWIZZLE_Y:
+ return &desc->channel[1];
+ case PIPE_SWIZZLE_Z:
+ return &desc->channel[2];
+ case PIPE_SWIZZLE_W:
+ return &desc->channel[3];
+ default:
+ return NULL;
+ }
+}
+
+static union tu_clear_component_value
+tu_get_clear_component_value(const VkClearValue *val, int comp,
+ enum util_format_colorspace colorspace)
+{
+ assert(comp < 4);
+
+ union tu_clear_component_value tmp;
+ switch (colorspace) {
+ case UTIL_FORMAT_COLORSPACE_ZS:
+ assert(comp < 2);
+ if (comp == 0)
+ tmp.float32 = val->depthStencil.depth;
+ else
+ tmp.uint32 = val->depthStencil.stencil;
+ break;
+ case UTIL_FORMAT_COLORSPACE_SRGB:
+ if (comp < 3) {
+ tmp.float32 = util_format_linear_to_srgb_float(val->color.float32[comp]);
+ break;
+ }
+ default:
+ assert(comp < 4);
+ tmp.uint32 = val->color.uint32[comp];
+ break;
+ }
+
+ return tmp;
+}
+
+/* r2d_ = BLIT_OP_SCALE operations */
+
+static enum a6xx_2d_ifmt
+format_to_ifmt(enum a6xx_format fmt)
+{
+ switch (fmt) {
+ case FMT6_A8_UNORM:
+ case FMT6_8_UNORM:
+ case FMT6_8_SNORM:
+ case FMT6_8_8_UNORM:
+ case FMT6_8_8_SNORM:
+ case FMT6_8_8_8_8_UNORM:
+ case FMT6_8_8_8_X8_UNORM:
+ case FMT6_8_8_8_8_SNORM:
+ case FMT6_4_4_4_4_UNORM:
+ case FMT6_5_5_5_1_UNORM:
+ case FMT6_5_6_5_UNORM:
+ case FMT6_Z24_UNORM_S8_UINT:
+ case FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8:
+ return R2D_UNORM8;
+
+ case FMT6_32_UINT:
+ case FMT6_32_SINT:
+ case FMT6_32_32_UINT:
+ case FMT6_32_32_SINT:
+ case FMT6_32_32_32_32_UINT:
+ case FMT6_32_32_32_32_SINT:
+ return R2D_INT32;
+
+ case FMT6_16_UINT:
+ case FMT6_16_SINT:
+ case FMT6_16_16_UINT:
+ case FMT6_16_16_SINT:
+ case FMT6_16_16_16_16_UINT:
+ case FMT6_16_16_16_16_SINT:
+ case FMT6_10_10_10_2_UINT:
+ return R2D_INT16;
+
+ case FMT6_8_UINT:
+ case FMT6_8_SINT:
+ case FMT6_8_8_UINT:
+ case FMT6_8_8_SINT:
+ case FMT6_8_8_8_8_UINT:
+ case FMT6_8_8_8_8_SINT:
+ return R2D_INT8;
+
+ case FMT6_16_UNORM:
+ case FMT6_16_SNORM:
+ case FMT6_16_16_UNORM:
+ case FMT6_16_16_SNORM:
+ case FMT6_16_16_16_16_UNORM:
+ case FMT6_16_16_16_16_SNORM:
+ case FMT6_32_FLOAT:
+ case FMT6_32_32_FLOAT:
+ case FMT6_32_32_32_32_FLOAT:
+ return R2D_FLOAT32;
+
+ case FMT6_16_FLOAT:
+ case FMT6_16_16_FLOAT:
+ case FMT6_16_16_16_16_FLOAT:
+ case FMT6_11_11_10_FLOAT:
+ case FMT6_10_10_10_2_UNORM:
+ case FMT6_10_10_10_2_UNORM_DEST:
+ return R2D_FLOAT16;
+
+ default:
+ unreachable("bad format");
+ return 0;
+ }
+}
+
+static void
+r2d_coords(struct tu_cs *cs,
+ const VkOffset2D *dst,
+ const VkOffset2D *src,
+ const VkExtent2D *extent)
+{
+ tu_cs_emit_regs(cs,
+ A6XX_GRAS_2D_DST_TL(.x = dst->x, .y = dst->y),
+ A6XX_GRAS_2D_DST_BR(.x = dst->x + extent->width - 1, .y = dst->y + extent->height - 1));
+
+ if (!src)
+ return;
+
+ tu_cs_emit_regs(cs,
+ A6XX_GRAS_2D_SRC_TL_X(.x = src->x),
+ A6XX_GRAS_2D_SRC_BR_X(.x = src->x + extent->width - 1),
+ A6XX_GRAS_2D_SRC_TL_Y(.y = src->y),
+ A6XX_GRAS_2D_SRC_BR_Y(.y = src->y + extent->height - 1));
+}
+
+static void
+r2d_clear_value(struct tu_cs *cs, VkFormat format, const VkClearValue *val)
+{
+ uint32_t clear_value[4] = {};
+
+ switch (format) {
+ case VK_FORMAT_X8_D24_UNORM_PACK32:
+ case VK_FORMAT_D24_UNORM_S8_UINT:
+ /* cleared as r8g8b8a8_unorm using special format */
+ clear_value[0] = tu_pack_float32_for_unorm(val->depthStencil.depth, 24);
+ clear_value[1] = clear_value[0] >> 8;
+ clear_value[2] = clear_value[0] >> 16;
+ clear_value[3] = val->depthStencil.stencil;
+ break;
+ case VK_FORMAT_D16_UNORM:
+ case VK_FORMAT_D32_SFLOAT:
+ /* R2D_FLOAT32 */
+ clear_value[0] = fui(val->depthStencil.depth);
+ break;
+ case VK_FORMAT_S8_UINT:
+ clear_value[0] = val->depthStencil.stencil;
+ break;
+ case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
+ /* cleared as UINT32 */
+ clear_value[0] = float3_to_rgb9e5(val->color.float32);
+ break;
+ default:
+ assert(!vk_format_is_depth_or_stencil(format));
+ const struct util_format_description *desc = vk_format_description(format);
+ enum a6xx_2d_ifmt ifmt = format_to_ifmt(tu6_base_format(format));
+
+ assert(desc && (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN ||
+ format == VK_FORMAT_B10G11R11_UFLOAT_PACK32));
+
+ for (unsigned i = 0; i < desc->nr_channels; i++) {
+ const struct util_format_channel_description *ch = &desc->channel[i];
+ if (ifmt == R2D_UNORM8) {
+ float linear = val->color.float32[i];
+ if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB && i < 3)
+ linear = util_format_linear_to_srgb_float(val->color.float32[i]);
+
+ if (ch->type == UTIL_FORMAT_TYPE_SIGNED)
+ clear_value[i] = tu_pack_float32_for_snorm(linear, 8);
+ else
+ clear_value[i] = tu_pack_float32_for_unorm(linear, 8);
+ } else if (ifmt == R2D_FLOAT16) {
+ clear_value[i] = util_float_to_half(val->color.float32[i]);
+ } else {
+ assert(ifmt == R2D_FLOAT32 || ifmt == R2D_INT32 ||
+ ifmt == R2D_INT16 || ifmt == R2D_INT8);
+ clear_value[i] = val->color.uint32[i];
+ }
+ }
+ break;
+ }
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_SRC_SOLID_C0, 4);
+ tu_cs_emit_array(cs, clear_value, 4);
+}
+
+static void
+r2d_src(struct tu_cmd_buffer *cmd,
+ struct tu_cs *cs,
+ struct tu_image *image,
+ VkFormat vk_format,
+ uint32_t level,
+ uint32_t layer,
+ bool linear_filter,
+ bool stencil_read)
+{
+ struct tu_native_format format = tu6_format_image_src(image, vk_format, level);
+
+ /* stencil readout path fails with UBWC enabled (why?) */
+ assert(!stencil_read || !image->layout.ubwc_layer_size);
+
+ if (stencil_read)
+ format.swap = XYZW;
+
+ tu_cs_emit_regs(cs,
+ A6XX_SP_PS_2D_SRC_INFO(
+ .color_format = format.fmt,
+ .tile_mode = format.tile_mode,
+ .color_swap = format.swap,
+ .flags = image->layout.ubwc_layer_size != 0,
+ .srgb = vk_format_is_srgb(vk_format),
+ .samples = tu_msaa_samples(image->samples),
+ .filter = linear_filter,
+ .samples_average = image->samples > 1 &&
+ !vk_format_is_int(vk_format) &&
+ !vk_format_is_depth_or_stencil(vk_format),
+ .unk20 = 1,
+ .unk22 = 1),
+ A6XX_SP_PS_2D_SRC_SIZE(
+ .width = tu_minify(image->extent.width, level),
+ .height = tu_minify(image->extent.height, level)),
+ A6XX_SP_PS_2D_SRC(tu_image_base_ref(image, level, layer)),
+ A6XX_SP_PS_2D_SRC_PITCH(.pitch = tu_image_pitch(image, level)));
+
+ if (image->layout.ubwc_layer_size) {
+ tu_cs_emit_regs(cs,
+ A6XX_SP_PS_2D_SRC_FLAGS(tu_image_ubwc_base_ref(image, level, layer)),
+ A6XX_SP_PS_2D_SRC_FLAGS_PITCH(.pitch = tu_image_ubwc_pitch(image, level)));
+ }
+}
+
+static void
+r2d_src_buffer(struct tu_cmd_buffer *cmd,
+ struct tu_cs *cs,
+ VkFormat vk_format,
+ uint64_t va, uint32_t pitch,
+ uint32_t width, uint32_t height)
+{
+ struct tu_native_format format = tu6_format_texture(vk_format, TILE6_LINEAR);
+
+ tu_cs_emit_regs(cs,
+ A6XX_SP_PS_2D_SRC_INFO(
+ .color_format = format.fmt,
+ .color_swap = format.swap,
+ .srgb = vk_format_is_srgb(vk_format),
+ .unk20 = 1,
+ .unk22 = 1),
+ A6XX_SP_PS_2D_SRC_SIZE(.width = width, .height = height),
+ A6XX_SP_PS_2D_SRC_LO((uint32_t) va),
+ A6XX_SP_PS_2D_SRC_HI(va >> 32),
+ A6XX_SP_PS_2D_SRC_PITCH(.pitch = pitch));
+}
+
+static void
+r2d_dst(struct tu_cs *cs,
+ struct tu_image *image,
+ VkFormat vk_format,
+ uint32_t level,
+ uint32_t layer)
+{
+ struct tu_native_format format = tu6_format_image(image, vk_format, level);
+
+ assert(image->samples == 1);
+
+ tu_cs_emit_regs(cs,
+ A6XX_RB_2D_DST_INFO(
+ .color_format = format.fmt,
+ .tile_mode = format.tile_mode,
+ .color_swap = format.swap,
+ .flags = image->layout.ubwc_layer_size != 0,
+ .srgb = vk_format_is_srgb(image->vk_format)),
+ A6XX_RB_2D_DST(tu_image_base_ref(image, level, layer)),
+ A6XX_RB_2D_DST_SIZE(.pitch = tu_image_pitch(image, level)));
+
+ if (image->layout.ubwc_layer_size) {
+ tu_cs_emit_regs(cs,
+ A6XX_RB_2D_DST_FLAGS(tu_image_ubwc_base_ref(image, level, layer)),
+ A6XX_RB_2D_DST_FLAGS_PITCH(.pitch = tu_image_ubwc_pitch(image, level)));
+ }
+}
+
+static void
+r2d_dst_buffer(struct tu_cs *cs, VkFormat vk_format, uint64_t va, uint32_t pitch)
+{
+ struct tu_native_format format = tu6_format_color(vk_format, TILE6_LINEAR);
+
+ tu_cs_emit_regs(cs,
+ A6XX_RB_2D_DST_INFO(
+ .color_format = format.fmt,
+ .color_swap = format.swap,
+ .srgb = vk_format_is_srgb(vk_format)),
+ A6XX_RB_2D_DST_LO((uint32_t) va),
+ A6XX_RB_2D_DST_HI(va >> 32),
+ A6XX_RB_2D_DST_SIZE(.pitch = pitch));
+}
+
+static void
+r2d_setup_common(struct tu_cmd_buffer *cmd,
+ struct tu_cs *cs,
+ VkFormat vk_format,
+ enum a6xx_rotation rotation,
+ bool clear,
+ uint8_t mask,
+ bool scissor)
+{
+ enum a6xx_format format = tu6_base_format(vk_format);
+ enum a6xx_2d_ifmt ifmt = format_to_ifmt(format);
+ uint32_t unknown_8c01 = 0;
+
+ if (format == FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8) {
+ /* preserve depth channels */
+ if (mask == 0x8)
+ unknown_8c01 = 0x00084001;
+ /* preserve stencil channel */
+ if (mask == 0x7)
+ unknown_8c01 = 0x08000041;
+ }
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_8C01, 1);
+ tu_cs_emit(cs, unknown_8c01);
+
+ uint32_t blit_cntl = A6XX_RB_2D_BLIT_CNTL(
+ .scissor = scissor,
+ .rotate = rotation,
+ .solid_color = clear,
+ .d24s8 = format == FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8 && !clear,
+ .color_format = format,
+ .mask = 0xf,
+ .ifmt = vk_format_is_srgb(vk_format) ? R2D_UNORM8_SRGB : ifmt,
+ ).value;
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_BLIT_CNTL, 1);
+ tu_cs_emit(cs, blit_cntl);
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
+ tu_cs_emit(cs, blit_cntl);
+
+ if (format == FMT6_10_10_10_2_UNORM_DEST)
+ format = FMT6_16_16_16_16_FLOAT;
+
+ tu_cs_emit_regs(cs, A6XX_SP_2D_SRC_FORMAT(
+ .sint = vk_format_is_sint(vk_format),
+ .uint = vk_format_is_uint(vk_format),
+ .color_format = format,
+ .srgb = vk_format_is_srgb(vk_format),
+ .mask = 0xf));
+}
+
+static void
+r2d_setup(struct tu_cmd_buffer *cmd,
+ struct tu_cs *cs,
+ VkFormat vk_format,
+ enum a6xx_rotation rotation,
+ bool clear,
+ uint8_t mask)
+{
+ const struct tu_physical_device *phys_dev = cmd->device->physical_device;
+
+ /* TODO: flushing with barriers instead of blindly always flushing */
+ tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS, true);
+ tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_DEPTH_TS, true);
+ tu6_emit_event_write(cmd, cs, PC_CCU_INVALIDATE_COLOR, false);
+ tu6_emit_event_write(cmd, cs, PC_CCU_INVALIDATE_DEPTH, false);
+ tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE, false);
+
+ tu_cs_emit_wfi(cs);
+ tu_cs_emit_regs(cs,
+ A6XX_RB_CCU_CNTL(.offset = phys_dev->ccu_offset_bypass));
+
+ r2d_setup_common(cmd, cs, vk_format, rotation, clear, mask, false);
+}
+
+static void
+r2d_run(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
+{
+ tu_cs_emit_pkt7(cs, CP_BLIT, 1);
+ tu_cs_emit(cs, CP_BLIT_0_OP(BLIT_OP_SCALE));
+
+ /* TODO: flushing with barriers instead of blindly always flushing */
+ tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS, true);
+ tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_DEPTH_TS, true);
+ tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE, false);
+}
+
+/* r3d_ = shader path operations */
+
+static void
+r3d_pipeline(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool blit, uint32_t num_rts)
+{
+ static const instr_t vs_code[] = {
+ /* r0.xyz = r0.w ? c1.xyz : c0.xyz
+ * r1.xy = r0.w ? c1.zw : c0.zw
+ * r0.w = 1.0f
+ */
+ { .cat3 = {
+ .opc_cat = 3, .opc = OPC_SEL_B32 & 63, .repeat = 2, .dst = 0,
+ .c1 = {.src1_c = 1, .src1 = 4}, .src1_r = 1,
+ .src2 = 3,
+ .c2 = {.src3_c = 1, .dummy = 1, .src3 = 0},
+ } },
+ { .cat3 = {
+ .opc_cat = 3, .opc = OPC_SEL_B32 & 63, .repeat = 1, .dst = 4,
+ .c1 = {.src1_c = 1, .src1 = 6}, .src1_r = 1,
+ .src2 = 3,
+ .c2 = {.src3_c = 1, .dummy = 1, .src3 = 2},
+ } },
+ { .cat1 = { .opc_cat = 1, .src_type = TYPE_F32, .dst_type = TYPE_F32, .dst = 3,
+ .src_im = 1, .fim_val = 1.0f } },
+ { .cat0 = { .opc = OPC_END } },
+ };
+#define FS_OFFSET (16 * sizeof(instr_t))
+ STATIC_ASSERT(sizeof(vs_code) <= FS_OFFSET);
+
+ /* vs inputs: only vtx id in r0.w */
+ tu_cs_emit_pkt4(cs, REG_A6XX_VFD_CONTROL_0, 7);
+ tu_cs_emit(cs, 0x00000000);
+ tu_cs_emit(cs, 0xfcfcfc00 | A6XX_VFD_CONTROL_1_REGID4VTX(3));
+ tu_cs_emit(cs, 0x0000fcfc);
+ tu_cs_emit(cs, 0xfcfcfcfc);
+ tu_cs_emit(cs, 0x000000fc);
+ tu_cs_emit(cs, 0x0000fcfc);
+ tu_cs_emit(cs, 0x00000000);
+
+ /* vs outputs: position in r0.xyzw, blit coords in r1.xy */
+ tu_cs_emit_pkt4(cs, REG_A6XX_VPC_VAR_DISABLE(0), 4);
+ tu_cs_emit(cs, blit ? 0xffffffcf : 0xffffffff);
+ tu_cs_emit(cs, 0xffffffff);
+ tu_cs_emit(cs, 0xffffffff);
+ tu_cs_emit(cs, 0xffffffff);
+
+ tu_cs_emit_regs(cs, A6XX_SP_VS_OUT_REG(0,
+ .a_regid = 0, .a_compmask = 0xf,
+ .b_regid = 4, .b_compmask = 0x3));
+ tu_cs_emit_regs(cs, A6XX_SP_VS_VPC_DST_REG(0, .outloc0 = 0, .outloc1 = 4));
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_VPC_CNTL_0, 1);
+ tu_cs_emit(cs, 0xff00ff00 |
+ COND(blit, A6XX_VPC_CNTL_0_VARYING) |
+ A6XX_VPC_CNTL_0_NUMNONPOSVAR(blit ? 8 : 0));
+
+ tu_cs_emit_regs(cs, A6XX_VPC_PACK(
+ .positionloc = 0,
+ .psizeloc = 0xff,
+ .stride_in_vpc = blit ? 6 : 4));
+ tu_cs_emit_regs(cs, A6XX_SP_PRIMITIVE_CNTL(.vsout = blit ? 2 : 1));
+ tu_cs_emit_regs(cs,
+ A6XX_PC_PRIMITIVE_CNTL_0(),
+ A6XX_PC_PRIMITIVE_CNTL_1(.stride_in_vpc = blit ? 6 : 4));
+
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_VPC_VARYING_INTERP_MODE(0), 8);
+ tu_cs_emit(cs, blit ? 0xe000 : 0); // I think this can just be 0
+ for (uint32_t i = 1; i < 8; i++)
+ tu_cs_emit(cs, 0);
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_VPC_VARYING_PS_REPL_MODE(0), 8);
+ for (uint32_t i = 0; i < 8; i++)
+ tu_cs_emit(cs, 0x99999999);
+
+ /* fs inputs: none, prefetch in blit case */
+ tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_PREFETCH_CNTL, 1 + blit);
+ tu_cs_emit(cs, A6XX_SP_FS_PREFETCH_CNTL_COUNT(blit) |
+ A6XX_SP_FS_PREFETCH_CNTL_UNK4(0xfc) |
+ 0x7000);
+ if (blit) {
+ tu_cs_emit(cs, A6XX_SP_FS_PREFETCH_CMD_SRC(4) |
+ A6XX_SP_FS_PREFETCH_CMD_SAMP_ID(0) |
+ A6XX_SP_FS_PREFETCH_CMD_TEX_ID(0) |
+ A6XX_SP_FS_PREFETCH_CMD_DST(0) |
+ A6XX_SP_FS_PREFETCH_CMD_WRMASK(0xf) |
+ A6XX_SP_FS_PREFETCH_CMD_CMD(0x4));
+ }
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_CONTROL_1_REG, 5);
+ tu_cs_emit(cs, 0x3); // XXX blob uses 3 in blit path
+ tu_cs_emit(cs, 0xfcfcfcfc);
+ tu_cs_emit(cs, A6XX_HLSQ_CONTROL_3_REG_BARY_IJ_PIXEL(blit ? 0 : 0xfc) |
+ A6XX_HLSQ_CONTROL_3_REG_BARY_IJ_CENTROID(0xfc) |
+ 0xfc00fc00);
+ tu_cs_emit(cs, 0xfcfcfcfc);
+ tu_cs_emit(cs, 0xfcfc);
+
+ tu_cs_emit_regs(cs, A6XX_HLSQ_UNKNOWN_B980(blit ? 3 : 1));
+ tu_cs_emit_regs(cs, A6XX_GRAS_CNTL(.varying = blit));
+ tu_cs_emit_regs(cs,
+ A6XX_RB_RENDER_CONTROL0(.varying = blit, .unk10 = blit),
+ A6XX_RB_RENDER_CONTROL1());
+
+ tu_cs_emit_regs(cs, A6XX_RB_SAMPLE_CNTL());
+ tu_cs_emit_regs(cs, A6XX_GRAS_UNKNOWN_8101());
+ tu_cs_emit_regs(cs, A6XX_GRAS_SAMPLE_CNTL());
+
+ /* shaders */
+ struct ts_cs_memory shaders = { };
+ VkResult result = tu_cs_alloc(&cmd->sub_cs, 2, 16 * sizeof(instr_t), &shaders);
+ assert(result == VK_SUCCESS);
+
+ memcpy(shaders.map, vs_code, sizeof(vs_code));
+
+ instr_t *fs = (instr_t*) ((uint8_t*) shaders.map + FS_OFFSET);
+ for (uint32_t i = 0; i < num_rts; i++) {
+ /* (rpt3)mov.s32s32 r0.x, (r)c[i].x */
+ fs[i] = (instr_t) { .cat1 = { .opc_cat = 1, .src_type = TYPE_S32, .dst_type = TYPE_S32,
+ .repeat = 3, .dst = i * 4, .src_c = 1, .src_r = 1, .src = i * 4 } };
+ }
+ fs[num_rts] = (instr_t) { .cat0 = { .opc = OPC_END } };
+ /* note: assumed <= 16 instructions (MAX_RTS is 8) */
+
+ tu_cs_emit_regs(cs, A6XX_HLSQ_UPDATE_CNTL(0x7ffff));
+ tu_cs_emit_regs(cs,
+ A6XX_HLSQ_VS_CNTL(.constlen = 8, .enabled = true),
+ A6XX_HLSQ_HS_CNTL(),
+ A6XX_HLSQ_DS_CNTL(),
+ A6XX_HLSQ_GS_CNTL());
+ tu_cs_emit_regs(cs, A6XX_HLSQ_FS_CNTL(.constlen = 4 * num_rts, .enabled = true));
+
+ tu_cs_emit_regs(cs,
+ A6XX_SP_VS_CONFIG(.enabled = true),
+ A6XX_SP_VS_INSTRLEN(1));
+ tu_cs_emit_regs(cs, A6XX_SP_HS_CONFIG());
+ tu_cs_emit_regs(cs, A6XX_SP_DS_CONFIG());
+ tu_cs_emit_regs(cs, A6XX_SP_GS_CONFIG());
+ tu_cs_emit_regs(cs,
+ A6XX_SP_FS_CONFIG(.enabled = true, .ntex = blit, .nsamp = blit),
+ A6XX_SP_FS_INSTRLEN(1));
+
+ tu_cs_emit_regs(cs, A6XX_SP_VS_CTRL_REG0(
+ .threadsize = FOUR_QUADS,
+ .fullregfootprint = 2,
+ .mergedregs = true));
+ tu_cs_emit_regs(cs, A6XX_SP_FS_CTRL_REG0(
+ .varying = blit,
+ .threadsize = FOUR_QUADS,
+ /* could this be 0 in !blit && !num_rts case ? */
+ .fullregfootprint = MAX2(1, num_rts),
+ .mergedregs = true)); /* note: tu_pipeline also sets 0x1000000 bit */
+
+ tu_cs_emit_regs(cs, A6XX_SP_IBO_COUNT(0));
+
+ tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_GEOM, 3);
+ tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) |
+ CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) |
+ CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
+ CP_LOAD_STATE6_0_STATE_BLOCK(SB6_VS_SHADER) |
+ CP_LOAD_STATE6_0_NUM_UNIT(1));
+ tu_cs_emit_qw(cs, shaders.iova);
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_SP_VS_OBJ_START_LO, 2);
+ tu_cs_emit_qw(cs, shaders.iova);
+
+ tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_FRAG, 3);
+ tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) |
+ CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) |
+ CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
+ CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_SHADER) |
+ CP_LOAD_STATE6_0_NUM_UNIT(1));
+ tu_cs_emit_qw(cs, shaders.iova + FS_OFFSET);
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OBJ_START_LO, 2);
+ tu_cs_emit_qw(cs, shaders.iova + FS_OFFSET);
+
+ tu_cs_emit_regs(cs,
+ A6XX_GRAS_CL_CNTL(
+ .persp_division_disable = 1,
+ .vp_xform_disable = 1,
+ .vp_clip_code_ignore = 1,
+ .clip_disable = 1),
+ A6XX_GRAS_UNKNOWN_8001(0));
+ tu_cs_emit_regs(cs, A6XX_GRAS_SU_CNTL()); // XXX msaa enable?
+
+ tu_cs_emit_regs(cs,
+ A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0(.x = 0, .y = 0),
+ A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0(.x = 0x7fff, .y = 0x7fff));
+ tu_cs_emit_regs(cs,
+ A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0(.x = 0, .y = 0),
+ A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0(.x = 0x7fff, .y = 0x7fff));
+}
+
+static void
+r3d_coords_raw(struct tu_cs *cs, const float *coords)
+{
+ tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_GEOM, 3 + 8);
+ tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) |
+ CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
+ CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
+ CP_LOAD_STATE6_0_STATE_BLOCK(SB6_VS_SHADER) |
+ CP_LOAD_STATE6_0_NUM_UNIT(2));
+ tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
+ tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
+ tu_cs_emit_array(cs, (const uint32_t *) coords, 8);
+}
+
+static void
+r3d_coords(struct tu_cs *cs,
+ const VkOffset2D *dst,
+ const VkOffset2D *src,
+ const VkExtent2D *extent)
+{
+ int32_t src_x1 = src ? src->x : 0;
+ int32_t src_y1 = src ? src->y : 0;
+ r3d_coords_raw(cs, (float[]) {
+ dst->x, dst->y,
+ src_x1, src_y1,
+ dst->x + extent->width, dst->y + extent->height,
+ src_x1 + extent->width, src_y1 + extent->height,
+ });
+}
+
+static void
+r3d_clear_value(struct tu_cs *cs, VkFormat format, const VkClearValue *val)
+{
+ tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_FRAG, 3 + 4);
+ tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) |
+ CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
+ CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
+ CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_SHADER) |
+ CP_LOAD_STATE6_0_NUM_UNIT(1));
+ tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
+ tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
+ switch (format) {
+ case VK_FORMAT_X8_D24_UNORM_PACK32:
+ case VK_FORMAT_D24_UNORM_S8_UINT: {
+ /* cleared as r8g8b8a8_unorm using special format */
+ uint32_t tmp = tu_pack_float32_for_unorm(val->depthStencil.depth, 24);
+ tu_cs_emit(cs, fui((tmp & 0xff) / 255.0f));
+ tu_cs_emit(cs, fui((tmp >> 8 & 0xff) / 255.0f));
+ tu_cs_emit(cs, fui((tmp >> 16 & 0xff) / 255.0f));
+ tu_cs_emit(cs, fui((val->depthStencil.stencil & 0xff) / 255.0f));
+ } break;
+ case VK_FORMAT_D16_UNORM:
+ case VK_FORMAT_D32_SFLOAT:
+ tu_cs_emit(cs, fui(val->depthStencil.depth));
+ tu_cs_emit(cs, 0);
+ tu_cs_emit(cs, 0);
+ tu_cs_emit(cs, 0);
+ break;
+ case VK_FORMAT_S8_UINT:
+ tu_cs_emit(cs, val->depthStencil.stencil & 0xff);
+ tu_cs_emit(cs, 0);
+ tu_cs_emit(cs, 0);
+ tu_cs_emit(cs, 0);
+ break;
+ default:
+ /* as color formats use clear value as-is */
+ assert(!vk_format_is_depth_or_stencil(format));
+ tu_cs_emit_array(cs, val->color.uint32, 4);
+ break;
+ }
+}
+
+static void
+r3d_src_common(struct tu_cmd_buffer *cmd, struct tu_cs *cs, uint32_t *tex_const, bool linear_filter)
+{
+ struct ts_cs_memory texture = { };
+ VkResult result = tu_cs_alloc(&cmd->sub_cs,
+ 2, /* allocate space for a sampler too */
+ A6XX_TEX_CONST_DWORDS, &texture);
+ assert(result == VK_SUCCESS);
+
+ memcpy(texture.map, tex_const, A6XX_TEX_CONST_DWORDS * 4);
+
+ texture.map[A6XX_TEX_CONST_DWORDS + 0] =
+ A6XX_TEX_SAMP_0_XY_MAG(linear_filter ? A6XX_TEX_LINEAR : A6XX_TEX_NEAREST) |
+ A6XX_TEX_SAMP_0_XY_MIN(linear_filter ? A6XX_TEX_LINEAR : A6XX_TEX_NEAREST) |
+ A6XX_TEX_SAMP_0_WRAP_S(A6XX_TEX_CLAMP_TO_EDGE) |
+ A6XX_TEX_SAMP_0_WRAP_T(A6XX_TEX_CLAMP_TO_EDGE) |
+ A6XX_TEX_SAMP_0_WRAP_R(A6XX_TEX_CLAMP_TO_EDGE) |
+ 0x60000; /* XXX used by blob, doesn't seem necessary */
+ texture.map[A6XX_TEX_CONST_DWORDS + 1] =
+ 0x1 | /* XXX used by blob, doesn't seem necessary */
+ A6XX_TEX_SAMP_1_UNNORM_COORDS |
+ A6XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR;
+ texture.map[A6XX_TEX_CONST_DWORDS + 2] = 0;
+ texture.map[A6XX_TEX_CONST_DWORDS + 3] = 0;
+
+ tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_FRAG, 3);
+ tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) |
+ CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) |
+ CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
+ CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_TEX) |
+ CP_LOAD_STATE6_0_NUM_UNIT(1));
+ tu_cs_emit_qw(cs, texture.iova + A6XX_TEX_CONST_DWORDS * 4);
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_TEX_SAMP_LO, 2);
+ tu_cs_emit_qw(cs, texture.iova + A6XX_TEX_CONST_DWORDS * 4);
+
+ tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_FRAG, 3);
+ tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) |
+ CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
+ CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
+ CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_TEX) |
+ CP_LOAD_STATE6_0_NUM_UNIT(1));
+ tu_cs_emit_qw(cs, texture.iova);
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_TEX_CONST_LO, 2);
+ tu_cs_emit_qw(cs, texture.iova);
+
+ tu_cs_emit_regs(cs, A6XX_SP_FS_TEX_COUNT(1));
+}
+
+static void
+r3d_src(struct tu_cmd_buffer *cmd,
+ struct tu_cs *cs,
+ struct tu_image *image,
+ VkFormat format,
+ uint32_t level,
+ uint32_t layer,
+ bool linear_filter,
+ bool stencil_read)
+{
+ struct tu_image_view view;
+
+ /* use tu_image_view_init to fill out a view descriptor */
+ tu_image_view_init(&view, cmd->device, &(VkImageViewCreateInfo) {
+ .image = tu_image_to_handle(image),
+ .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ .format = format,
+ /* image_to_buffer from d24s8 with stencil aspect mask writes out to r8 */
+ .components.r = stencil_read ? VK_COMPONENT_SWIZZLE_A : VK_COMPONENT_SWIZZLE_R,
+ .subresourceRange = {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = level,
+ .levelCount = 1,
+ .baseArrayLayer = layer,
+ .layerCount = 1,
+ },
+ });
+ r3d_src_common(cmd, cs, view.descriptor, linear_filter);
+}
+
+static void
+r3d_src_buffer(struct tu_cmd_buffer *cmd,
+ struct tu_cs *cs,
+ VkFormat vk_format,
+ uint64_t va, uint32_t pitch,
+ uint32_t width, uint32_t height)
+{
+ uint32_t desc[A6XX_TEX_CONST_DWORDS];
+
+ struct tu_native_format format = tu6_format_texture(vk_format, TILE6_LINEAR);
+
+ desc[0] =
+ COND(vk_format_is_srgb(vk_format), A6XX_TEX_CONST_0_SRGB) |
+ A6XX_TEX_CONST_0_FMT(format.fmt) |
+ A6XX_TEX_CONST_0_SWAP(format.swap) |
+ A6XX_TEX_CONST_0_SWIZ_X(A6XX_TEX_X) |
+ // XXX to swizzle into .w for stencil buffer_to_image
+ A6XX_TEX_CONST_0_SWIZ_Y(vk_format == VK_FORMAT_R8_UNORM ? A6XX_TEX_X : A6XX_TEX_Y) |
+ A6XX_TEX_CONST_0_SWIZ_Z(vk_format == VK_FORMAT_R8_UNORM ? A6XX_TEX_X : A6XX_TEX_Z) |
+ A6XX_TEX_CONST_0_SWIZ_W(vk_format == VK_FORMAT_R8_UNORM ? A6XX_TEX_X : A6XX_TEX_W);
+ desc[1] = A6XX_TEX_CONST_1_WIDTH(width) | A6XX_TEX_CONST_1_HEIGHT(height);
+ desc[2] =
+ A6XX_TEX_CONST_2_FETCHSIZE(tu6_fetchsize(vk_format)) |
+ A6XX_TEX_CONST_2_PITCH(pitch) |
+ A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D);
+ desc[3] = 0;
+ desc[4] = va;
+ desc[5] = va >> 32;
+ for (uint32_t i = 6; i < A6XX_TEX_CONST_DWORDS; i++)
+ desc[i] = 0;
+
+ r3d_src_common(cmd, cs, desc, false);
+}
+
+static void
+r3d_dst(struct tu_cs *cs,
+ struct tu_image *image,
+ VkFormat vk_format,
+ uint32_t level,
+ uint32_t layer)
+{
+ tu6_emit_msaa(cs, image->samples); /* TODO: move to setup */
+
+ struct tu_native_format format = tu6_format_image(image, vk_format, level);
+
+ tu_cs_emit_regs(cs,
+ A6XX_RB_MRT_BUF_INFO(0,
+ .color_tile_mode = format.tile_mode,
+ .color_format = format.fmt,
+ .color_swap = format.swap),
+ A6XX_RB_MRT_PITCH(0, tu_image_pitch(image, level)),
+ A6XX_RB_MRT_ARRAY_PITCH(0, image->layout.layer_size),
+ A6XX_RB_MRT_BASE(0, tu_image_base_ref(image, level, layer)),
+ A6XX_RB_MRT_BASE_GMEM(0, 0));
+
+ tu_cs_emit_regs(cs,
+ A6XX_RB_MRT_FLAG_BUFFER_ADDR(0, tu_image_ubwc_base_ref(image, level, layer)),
+ A6XX_RB_MRT_FLAG_BUFFER_PITCH(0, .pitch = tu_image_ubwc_pitch(image, level)));
+
+ tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL(.flag_mrts = image->layout.ubwc_layer_size != 0));
+}
+
+static void
+r3d_dst_buffer(struct tu_cs *cs, VkFormat vk_format, uint64_t va, uint32_t pitch)
+{
+ struct tu_native_format format = tu6_format_color(vk_format, TILE6_LINEAR);
+
+ tu6_emit_msaa(cs, 1); /* TODO: move to setup */
+
+ tu_cs_emit_regs(cs,
+ A6XX_RB_MRT_BUF_INFO(0, .color_format = format.fmt, .color_swap = format.swap),
+ A6XX_RB_MRT_PITCH(0, pitch),
+ A6XX_RB_MRT_ARRAY_PITCH(0, 0),
+ A6XX_RB_MRT_BASE_LO(0, (uint32_t) va),
+ A6XX_RB_MRT_BASE_HI(0, va >> 32),
+ A6XX_RB_MRT_BASE_GMEM(0, 0));
+
+ tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL());
+}
+
+static void
+r3d_setup(struct tu_cmd_buffer *cmd,
+ struct tu_cs *cs,
+ VkFormat vk_format,
+ enum a6xx_rotation rotation,
+ bool clear,
+ uint8_t mask)
+{
+ const struct tu_physical_device *phys_dev = cmd->device->physical_device;
+
+ if (!cmd->state.pass) {
+ /* TODO: flushing with barriers instead of blindly always flushing */
+ tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS, true);
+ tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_DEPTH_TS, true);
+ tu6_emit_event_write(cmd, cs, PC_CCU_INVALIDATE_COLOR, false);
+ tu6_emit_event_write(cmd, cs, PC_CCU_INVALIDATE_DEPTH, false);
+ tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE, false);
+
+ tu_cs_emit_regs(cs,
+ A6XX_RB_CCU_CNTL(.offset = phys_dev->ccu_offset_bypass));
+
+ tu6_emit_window_scissor(cs, 0, 0, 0x7fff, 0x7fff);
+ }
+ tu_cs_emit_regs(cs, A6XX_GRAS_BIN_CONTROL(.dword = 0xc00000));
+ tu_cs_emit_regs(cs, A6XX_RB_BIN_CONTROL(.dword = 0xc00000));
+
+ r3d_pipeline(cmd, cs, !clear, clear ? 1 : 0);
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_CNTL0, 2);
+ tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(0xfc) |
+ A6XX_SP_FS_OUTPUT_CNTL0_SAMPMASK_REGID(0xfc) |
+ 0xfc000000);
+ tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL1_MRT(1));
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_REG(0), 1);
+ tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_REG_REGID(0));
+
+ tu_cs_emit_regs(cs,
+ A6XX_RB_FS_OUTPUT_CNTL0(),
+ A6XX_RB_FS_OUTPUT_CNTL1(.mrt = 1));
+
+ tu_cs_emit_regs(cs, A6XX_SP_BLEND_CNTL());
+ tu_cs_emit_regs(cs, A6XX_RB_BLEND_CNTL(.sample_mask = 0xffff));
+ tu_cs_emit_regs(cs, A6XX_RB_ALPHA_CONTROL());
+
+ tu_cs_emit_regs(cs, A6XX_RB_DEPTH_PLANE_CNTL());
+ tu_cs_emit_regs(cs, A6XX_RB_DEPTH_CNTL());
+ tu_cs_emit_regs(cs, A6XX_GRAS_SU_DEPTH_PLANE_CNTL());
+ tu_cs_emit_regs(cs, A6XX_RB_STENCIL_CONTROL());
+ tu_cs_emit_regs(cs, A6XX_RB_STENCILMASK());
+ tu_cs_emit_regs(cs, A6XX_RB_STENCILWRMASK());
+ tu_cs_emit_regs(cs, A6XX_RB_STENCILREF());
+
+ tu_cs_emit_regs(cs, A6XX_RB_RENDER_COMPONENTS(.rt0 = 0xf));
+ tu_cs_emit_regs(cs, A6XX_SP_FS_RENDER_COMPONENTS(.rt0 = 0xf));
+
+ tu_cs_emit_regs(cs, A6XX_SP_FS_MRT_REG(0,
+ .color_format = tu6_base_format(vk_format),
+ .color_sint = vk_format_is_sint(vk_format),
+ .color_uint = vk_format_is_uint(vk_format)));
+
+ tu_cs_emit_regs(cs, A6XX_RB_MRT_CONTROL(0, .component_enable = mask));
+ tu_cs_emit_regs(cs, A6XX_RB_SRGB_CNTL(vk_format_is_srgb(vk_format)));
+ tu_cs_emit_regs(cs, A6XX_SP_SRGB_CNTL(vk_format_is_srgb(vk_format)));
+}
+
+static void
+r3d_run(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
+{
+ tu_cs_emit_pkt7(cs, CP_DRAW_INDX_OFFSET, 3);
+ tu_cs_emit(cs, CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(DI_PT_RECTLIST) |
+ CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(DI_SRC_SEL_AUTO_INDEX) |
+ CP_DRAW_INDX_OFFSET_0_VIS_CULL(IGNORE_VISIBILITY));
+ tu_cs_emit(cs, 1); /* instance count */
+ tu_cs_emit(cs, 2); /* vertex count */
+
+ if (!cmd->state.pass) {
+ /* TODO: flushing with barriers instead of blindly always flushing */
+ tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS, true);
+ tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_DEPTH_TS, true);
+ tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE, false);
+ }
+}
+
+/* blit ops - common interface for 2d/shader paths */
+
+struct blit_ops {
+ void (*coords)(struct tu_cs *cs,
+ const VkOffset2D *dst,
+ const VkOffset2D *src,
+ const VkExtent2D *extent);
+ void (*clear_value)(struct tu_cs *cs, VkFormat format, const VkClearValue *val);
+ void (*src)(
+ struct tu_cmd_buffer *cmd,
+ struct tu_cs *cs,
+ struct tu_image *image,
+ VkFormat format,
+ uint32_t level,
+ uint32_t layer,
+ bool linear_filter,
+ bool stencil_read);
+ void (*src_buffer)(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
+ VkFormat vk_format,
+ uint64_t va, uint32_t pitch,
+ uint32_t width, uint32_t height);
+ void (*dst)(struct tu_cs *cs,
+ struct tu_image *image,
+ VkFormat format,
+ uint32_t level,
+ uint32_t layer);
+ void (*dst_buffer)(struct tu_cs *cs, VkFormat vk_format, uint64_t va, uint32_t pitch);
+ void (*setup)(struct tu_cmd_buffer *cmd,
+ struct tu_cs *cs,
+ VkFormat vk_format,
+ enum a6xx_rotation rotation,
+ bool clear,
+ uint8_t mask);
+ void (*run)(struct tu_cmd_buffer *cmd, struct tu_cs *cs);
+};
+
+static const struct blit_ops r2d_ops = {
+ .coords = r2d_coords,
+ .clear_value = r2d_clear_value,
+ .src = r2d_src,
+ .src_buffer = r2d_src_buffer,
+ .dst = r2d_dst,
+ .dst_buffer = r2d_dst_buffer,
+ .setup = r2d_setup,
+ .run = r2d_run,
+};
+
+static const struct blit_ops r3d_ops = {
+ .coords = r3d_coords,
+ .clear_value = r3d_clear_value,
+ .src = r3d_src,
+ .src_buffer = r3d_src_buffer,
+ .dst = r3d_dst,
+ .dst_buffer = r3d_dst_buffer,
+ .setup = r3d_setup,
+ .run = r3d_run,
+};
+
+/* passthrough set coords from 3D extents */
+static void
+coords(const struct blit_ops *ops,
+ struct tu_cs *cs,
+ const VkOffset3D *dst,
+ const VkOffset3D *src,
+ const VkExtent3D *extent)
+{
+ ops->coords(cs, (const VkOffset2D*) dst, (const VkOffset2D*) src, (const VkExtent2D*) extent);
+}
+
+static void
+tu6_blit_image(struct tu_cmd_buffer *cmd,
+ struct tu_image *src_image,
+ struct tu_image *dst_image,
+ const VkImageBlit *info,
+ VkFilter filter)
+{
+ const struct blit_ops *ops = &r2d_ops;
+ struct tu_cs *cs = &cmd->cs;
+ uint32_t layers;
+
+ /* 2D blit can't do rotation mirroring from just coordinates */
+ static const enum a6xx_rotation rotate[2][2] = {
+ {ROTATE_0, ROTATE_HFLIP},
+ {ROTATE_VFLIP, ROTATE_180},
+ };
+
+ bool mirror_x = (info->srcOffsets[1].x < info->srcOffsets[0].x) !=
+ (info->dstOffsets[1].x < info->dstOffsets[0].x);
+ bool mirror_y = (info->srcOffsets[1].y < info->srcOffsets[0].y) !=
+ (info->dstOffsets[1].y < info->dstOffsets[0].y);
+ bool mirror_z = (info->srcOffsets[1].z < info->srcOffsets[0].z) !=
+ (info->dstOffsets[1].z < info->dstOffsets[0].z);
+
+ if (mirror_z) {
+ tu_finishme("blit z mirror\n");
+ return;
+ }
+
+ if (info->srcOffsets[1].z - info->srcOffsets[0].z !=
+ info->dstOffsets[1].z - info->dstOffsets[0].z) {
+ tu_finishme("blit z filter\n");
+ return;
+ }
+
+ layers = info->srcOffsets[1].z - info->srcOffsets[0].z;
+ if (info->dstSubresource.layerCount > 1) {
+ assert(layers <= 1);
+ layers = info->dstSubresource.layerCount;
+ }
+
+ uint8_t mask = 0xf;
+ if (dst_image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
+ assert(info->srcSubresource.aspectMask == info->dstSubresource.aspectMask);
+ if (info->dstSubresource.aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT)
+ mask = 0x7;
+ if (info->dstSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT)
+ mask = 0x8;
+ }
+
+ if (dst_image->samples > 1)
+ ops = &r3d_ops;
+
+ /* TODO: shader path fails some of blit_image.all_formats.generate_mipmaps.* tests,
+ * figure out why (should be able to pass all tests with only shader path)
+ */
+
+ ops->setup(cmd, cs, dst_image->vk_format, rotate[mirror_y][mirror_x], false, mask);
+
+ if (ops == &r3d_ops) {
+ r3d_coords_raw(cs, (float[]) {
+ info->dstOffsets[0].x, info->dstOffsets[0].y,
+ info->srcOffsets[0].x, info->srcOffsets[0].y,
+ info->dstOffsets[1].x, info->dstOffsets[1].y,
+ info->srcOffsets[1].x, info->srcOffsets[1].y
+ });
+ } else {
+ tu_cs_emit_regs(cs,
+ A6XX_GRAS_2D_DST_TL(.x = MIN2(info->dstOffsets[0].x, info->dstOffsets[1].x),
+ .y = MIN2(info->dstOffsets[0].y, info->dstOffsets[1].y)),
+ A6XX_GRAS_2D_DST_BR(.x = MAX2(info->dstOffsets[0].x, info->dstOffsets[1].x) - 1,
+ .y = MAX2(info->dstOffsets[0].y, info->dstOffsets[1].y) - 1));
+ tu_cs_emit_regs(cs,
+ A6XX_GRAS_2D_SRC_TL_X(.x = MIN2(info->srcOffsets[0].x, info->srcOffsets[1].x)),
+ A6XX_GRAS_2D_SRC_BR_X(.x = MAX2(info->srcOffsets[0].x, info->srcOffsets[1].x) - 1),
+ A6XX_GRAS_2D_SRC_TL_Y(.y = MIN2(info->srcOffsets[0].y, info->srcOffsets[1].y)),
+ A6XX_GRAS_2D_SRC_BR_Y(.y = MAX2(info->srcOffsets[0].y, info->srcOffsets[1].y) - 1));
+ }
+
+ for (uint32_t i = 0; i < layers; i++) {
+ ops->src(cmd, cs, src_image, src_image->vk_format,
+ info->srcSubresource.mipLevel,
+ info->srcSubresource.baseArrayLayer + info->srcOffsets[0].z + i,
+ filter == VK_FILTER_LINEAR, false);
+ ops->dst(cs, dst_image, dst_image->vk_format,
+ info->dstSubresource.mipLevel,
+ info->dstSubresource.baseArrayLayer + info->dstOffsets[0].z + i);
+ ops->run(cmd, cs);
+ }
+}
+
+void
+tu_CmdBlitImage(VkCommandBuffer commandBuffer,
+ VkImage srcImage,
+ VkImageLayout srcImageLayout,
+ VkImage dstImage,
+ VkImageLayout dstImageLayout,
+ uint32_t regionCount,
+ const VkImageBlit *pRegions,
+ VkFilter filter)
+
+{
+ TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
+ TU_FROM_HANDLE(tu_image, src_image, srcImage);
+ TU_FROM_HANDLE(tu_image, dst_image, dstImage);
+
+ tu_bo_list_add(&cmd->bo_list, src_image->bo, MSM_SUBMIT_BO_READ);
+ tu_bo_list_add(&cmd->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE);
+
+ for (uint32_t i = 0; i < regionCount; ++i)
+ tu6_blit_image(cmd, src_image, dst_image, pRegions + i, filter);
+}
+
+static VkFormat
+copy_format(VkFormat format)
+{
+ switch (vk_format_get_blocksizebits(format)) {
+ case 8: return VK_FORMAT_R8_UINT;
+ case 16: return VK_FORMAT_R16_UINT;
+ case 32: return VK_FORMAT_R32_UINT;
+ case 64: return VK_FORMAT_R32G32_UINT;
+ case 96: return VK_FORMAT_R32G32B32_UINT;
+ case 128:return VK_FORMAT_R32G32B32A32_UINT;
+ default:
+ unreachable("unhandled format size");
+ }
+}
+
+static void
+copy_compressed(VkFormat format,
+ VkOffset3D *offset,
+ VkExtent3D *extent,
+ uint32_t *pitch,
+ uint32_t *layer_size)
+{
+ if (!vk_format_is_compressed(format))
+ return;
+
+ uint32_t block_width = vk_format_get_blockwidth(format);
+ uint32_t block_height = vk_format_get_blockheight(format);
+
+ offset->x /= block_width;
+ offset->y /= block_height;
+
+ if (extent) {
+ extent->width = DIV_ROUND_UP(extent->width, block_width);
+ extent->height = DIV_ROUND_UP(extent->height, block_height);
+ }
+ if (pitch)
+ *pitch /= block_width;
+ if (layer_size)
+ *layer_size /= (block_width * block_height);
+}
+
+static void
+tu_copy_buffer_to_image(struct tu_cmd_buffer *cmd,
+ struct tu_buffer *src_buffer,
+ struct tu_image *dst_image,
+ const VkBufferImageCopy *info)
+{
+ struct tu_cs *cs = &cmd->cs;
+ uint32_t layers = MAX2(info->imageExtent.depth, info->imageSubresource.layerCount);
+ VkFormat dst_format = dst_image->vk_format;
+ VkFormat src_format = dst_image->vk_format;
+ const struct blit_ops *ops = &r2d_ops;
+ uint8_t mask = 0xf;
+
+ if (dst_image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
+ switch (info->imageSubresource.aspectMask) {
+ case VK_IMAGE_ASPECT_STENCIL_BIT:
+ src_format = VK_FORMAT_R8_UNORM; /* changes how src buffer is interpreted */
+ mask = 0x8;
+ ops = &r3d_ops;
+ break;
+ case VK_IMAGE_ASPECT_DEPTH_BIT:
+ mask = 0x7;
+ break;
+ }
+ }
+
+ VkOffset3D offset = info->imageOffset;
+ VkExtent3D extent = info->imageExtent;
+ uint32_t pitch =
+ (info->bufferRowLength ?: extent.width) * vk_format_get_blocksize(src_format);
+ uint32_t layer_size = (info->bufferImageHeight ?: extent.height) * pitch;
+
+ if (dst_format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32 || vk_format_is_compressed(dst_format)) {
+ assert(src_format == dst_format);
+ copy_compressed(dst_format, &offset, &extent, &pitch, &layer_size);
+ src_format = dst_format = copy_format(dst_format);
+ }
+
+ /* note: the src_va/pitch alignment of 64 is for 2D engine,
+ * it is also valid for 1cpp format with shader path (stencil aspect path)
+ */
+
+ ops->setup(cmd, cs, dst_format, ROTATE_0, false, mask);
+
+ for (uint32_t i = 0; i < layers; i++) {
+ ops->dst(cs, dst_image, dst_format,
+ info->imageSubresource.mipLevel,
+ info->imageSubresource.baseArrayLayer + info->imageOffset.z + i);
+
+ uint64_t src_va = tu_buffer_iova(src_buffer) + info->bufferOffset + layer_size * i;
+ if ((src_va & 63) || (pitch & 63)) {
+ for (uint32_t y = 0; y < extent.height; y++) {
+ uint32_t x = (src_va & 63) / vk_format_get_blocksize(src_format);
+ ops->src_buffer(cmd, cs, src_format, src_va & ~63, pitch,
+ x + extent.width, 1);
+ ops->coords(cs, &(VkOffset2D){offset.x, offset.y + y}, &(VkOffset2D){x},
+ &(VkExtent2D) {extent.width, 1});
+ ops->run(cmd, cs);
+ src_va += pitch;
+ }
+ } else {
+ ops->src_buffer(cmd, cs, src_format, src_va, pitch, extent.width, extent.height);
+ coords(ops, cs, &offset, &(VkOffset3D){}, &extent);
+ ops->run(cmd, cs);
+ }
+ }
+}
+
+void
+tu_CmdCopyBufferToImage(VkCommandBuffer commandBuffer,
+ VkBuffer srcBuffer,
+ VkImage dstImage,
+ VkImageLayout dstImageLayout,
+ uint32_t regionCount,
+ const VkBufferImageCopy *pRegions)
+{
+ TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
+ TU_FROM_HANDLE(tu_image, dst_image, dstImage);
+ TU_FROM_HANDLE(tu_buffer, src_buffer, srcBuffer);
+
+ tu_bo_list_add(&cmd->bo_list, src_buffer->bo, MSM_SUBMIT_BO_READ);
+ tu_bo_list_add(&cmd->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE);
+
+ for (unsigned i = 0; i < regionCount; ++i)
+ tu_copy_buffer_to_image(cmd, src_buffer, dst_image, pRegions + i);
+}
+
+static void
+tu_copy_image_to_buffer(struct tu_cmd_buffer *cmd,
+ struct tu_image *src_image,
+ struct tu_buffer *dst_buffer,
+ const VkBufferImageCopy *info)
+{
+ struct tu_cs *cs = &cmd->cs;
+ uint32_t layers = MAX2(info->imageExtent.depth, info->imageSubresource.layerCount);
+ VkFormat src_format = src_image->vk_format;
+ VkFormat dst_format = src_image->vk_format;
+ bool stencil_read = false;
+
+ if (src_image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT &&
+ info->imageSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) {
+ dst_format = VK_FORMAT_R8_UNORM;
+ stencil_read = true;
+ }
+
+ const struct blit_ops *ops = stencil_read ? &r3d_ops : &r2d_ops;
+ VkOffset3D offset = info->imageOffset;
+ VkExtent3D extent = info->imageExtent;
+ uint32_t pitch = (info->bufferRowLength ?: extent.width) * vk_format_get_blocksize(dst_format);
+ uint32_t layer_size = (info->bufferImageHeight ?: extent.height) * pitch;
+
+ if (src_format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32 || vk_format_is_compressed(src_format)) {
+ assert(src_format == dst_format);
+ copy_compressed(dst_format, &offset, &extent, &pitch, &layer_size);
+ src_format = dst_format = copy_format(dst_format);
+ }
+
+ /* note: the dst_va/pitch alignment of 64 is for 2D engine,
+ * it is also valid for 1cpp format with shader path (stencil aspect)
+ */
+
+ ops->setup(cmd, cs, dst_format, ROTATE_0, false, 0xf);
+
+ for (uint32_t i = 0; i < layers; i++) {
+ ops->src(cmd, cs, src_image, src_format,
+ info->imageSubresource.mipLevel,
+ info->imageSubresource.baseArrayLayer + info->imageOffset.z + i,
+ false, stencil_read);
+
+ uint64_t dst_va = tu_buffer_iova(dst_buffer) + info->bufferOffset + layer_size * i;
+ if ((dst_va & 63) || (pitch & 63)) {
+ for (uint32_t y = 0; y < extent.height; y++) {
+ uint32_t x = (dst_va & 63) / vk_format_get_blocksize(dst_format);
+ ops->dst_buffer(cs, dst_format, dst_va & ~63, 0);
+ ops->coords(cs, &(VkOffset2D) {x}, &(VkOffset2D){offset.x, offset.y + y},
+ &(VkExtent2D) {extent.width, 1});
+ ops->run(cmd, cs);
+ dst_va += pitch;
+ }
+ } else {
+ ops->dst_buffer(cs, dst_format, dst_va, pitch);
+ coords(ops, cs, &(VkOffset3D) {0, 0}, &offset, &extent);
+ ops->run(cmd, cs);
+ }
+ }
+}
+
+void
+tu_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer,
+ VkImage srcImage,
+ VkImageLayout srcImageLayout,
+ VkBuffer dstBuffer,
+ uint32_t regionCount,
+ const VkBufferImageCopy *pRegions)
+{
+ TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
+ TU_FROM_HANDLE(tu_image, src_image, srcImage);
+ TU_FROM_HANDLE(tu_buffer, dst_buffer, dstBuffer);
+
+ tu_bo_list_add(&cmd->bo_list, src_image->bo, MSM_SUBMIT_BO_READ);
+ tu_bo_list_add(&cmd->bo_list, dst_buffer->bo, MSM_SUBMIT_BO_WRITE);
+
+ for (unsigned i = 0; i < regionCount; ++i)
+ tu_copy_image_to_buffer(cmd, src_image, dst_buffer, pRegions + i);
+}
+
+static void
+tu_copy_image_to_image(struct tu_cmd_buffer *cmd,
+ struct tu_image *src_image,
+ struct tu_image *dst_image,
+ const VkImageCopy *info)
+{
+ const struct blit_ops *ops = &r2d_ops;
+ struct tu_cs *cs = &cmd->cs;
+
+ uint8_t mask = 0xf;
+ if (dst_image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
+ if (info->dstSubresource.aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT)
+ mask = 0x7;
+ if (info->dstSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT)
+ mask = 0x8;
+ }
+
+ if (dst_image->samples > 1)
+ ops = &r3d_ops;
+
+ assert(info->srcSubresource.aspectMask == info->dstSubresource.aspectMask);
+
+ VkFormat format = VK_FORMAT_UNDEFINED;
+ VkOffset3D src_offset = info->srcOffset;
+ VkOffset3D dst_offset = info->dstOffset;
+ VkExtent3D extent = info->extent;
+
+ /* TODO: should check (ubwc || (tile_mode && swap)) instead */
+ if (src_image->layout.tile_mode && src_image->vk_format != VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
+ format = src_image->vk_format;
+
+ if (dst_image->layout.tile_mode && dst_image->vk_format != VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) {
+ if (format != VK_FORMAT_UNDEFINED && format != dst_image->vk_format) {
+ /* can be clever in some cases but in some cases we need and intermediate
+ * linear buffer
+ */
+ tu_finishme("image copy between two tiled/ubwc images\n");
+ return;
+ }
+ format = dst_image->vk_format;
+ }
+
+ if (format == VK_FORMAT_UNDEFINED)
+ format = copy_format(src_image->vk_format);
+
+ copy_compressed(src_image->vk_format, &src_offset, &extent, NULL, NULL);
+ copy_compressed(dst_image->vk_format, &dst_offset, NULL, NULL, NULL);
+
+ ops->setup(cmd, cs, format, ROTATE_0, false, mask);
+ coords(ops, cs, &dst_offset, &src_offset, &extent);
+
+ for (uint32_t i = 0; i < info->extent.depth; i++) {
+ ops->src(cmd, cs, src_image, format,
+ info->srcSubresource.mipLevel,
+ info->srcSubresource.baseArrayLayer + info->srcOffset.z + i,
+ false, false);
+ ops->dst(cs, dst_image, format,
+ info->dstSubresource.mipLevel,
+ info->dstSubresource.baseArrayLayer + info->dstOffset.z + i);
+ ops->run(cmd, cs);
+ }
+}
+
+void
+tu_CmdCopyImage(VkCommandBuffer commandBuffer,
+ VkImage srcImage,
+ VkImageLayout srcImageLayout,
+ VkImage destImage,
+ VkImageLayout destImageLayout,
+ uint32_t regionCount,
+ const VkImageCopy *pRegions)
+{
+ TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
+ TU_FROM_HANDLE(tu_image, src_image, srcImage);
+ TU_FROM_HANDLE(tu_image, dst_image, destImage);
+
+ tu_bo_list_add(&cmd->bo_list, src_image->bo, MSM_SUBMIT_BO_READ);
+ tu_bo_list_add(&cmd->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE);
+
+ for (uint32_t i = 0; i < regionCount; ++i)
+ tu_copy_image_to_image(cmd, src_image, dst_image, pRegions + i);
+}
+
+static void
+copy_buffer(struct tu_cmd_buffer *cmd,
+ uint64_t dst_va,
+ uint64_t src_va,
+ uint64_t size,
+ uint32_t block_size)
+{
+ const struct blit_ops *ops = &r2d_ops;
+ struct tu_cs *cs = &cmd->cs;
+ VkFormat format = block_size == 4 ? VK_FORMAT_R32_UINT : VK_FORMAT_R8_UNORM;
+ uint64_t blocks = size / block_size;
+
+ ops->setup(cmd, cs, format, ROTATE_0, false, 0xf);
+
+ while (blocks) {
+ uint32_t src_x = (src_va & 63) / block_size;
+ uint32_t dst_x = (dst_va & 63) / block_size;
+ uint32_t width = MIN2(MIN2(blocks, 0x4000 - src_x), 0x4000 - dst_x);
+
+ ops->src_buffer(cmd, cs, format, src_va & ~63, 0, src_x + width, 1);
+ ops->dst_buffer( cs, format, dst_va & ~63, 0);
+ ops->coords(cs, &(VkOffset2D) {dst_x}, &(VkOffset2D) {src_x}, &(VkExtent2D) {width, 1});
+ ops->run(cmd, cs);
+
+ src_va += width * block_size;
+ dst_va += width * block_size;
+ blocks -= width;
+ }
+}
+
+void
+tu_CmdCopyBuffer(VkCommandBuffer commandBuffer,
+ VkBuffer srcBuffer,
+ VkBuffer dstBuffer,
+ uint32_t regionCount,
+ const VkBufferCopy *pRegions)
+{
+ TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
+ TU_FROM_HANDLE(tu_buffer, src_buffer, srcBuffer);
+ TU_FROM_HANDLE(tu_buffer, dst_buffer, dstBuffer);
+
+ tu_bo_list_add(&cmd->bo_list, src_buffer->bo, MSM_SUBMIT_BO_READ);
+ tu_bo_list_add(&cmd->bo_list, dst_buffer->bo, MSM_SUBMIT_BO_WRITE);
+
+ for (unsigned i = 0; i < regionCount; ++i) {
+ copy_buffer(cmd,
+ tu_buffer_iova(dst_buffer) + pRegions[i].dstOffset,
+ tu_buffer_iova(src_buffer) + pRegions[i].srcOffset,
+ pRegions[i].size, 1);
+ }
+}
+
+void
+tu_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
+ VkBuffer dstBuffer,
+ VkDeviceSize dstOffset,
+ VkDeviceSize dataSize,
+ const void *pData)
+{
+ TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
+ TU_FROM_HANDLE(tu_buffer, buffer, dstBuffer);
+
+ tu_bo_list_add(&cmd->bo_list, buffer->bo, MSM_SUBMIT_BO_WRITE);
+
+ struct ts_cs_memory tmp;
+ VkResult result = tu_cs_alloc(&cmd->sub_cs, DIV_ROUND_UP(dataSize, 64), 64, &tmp);
+ if (result != VK_SUCCESS) {
+ cmd->record_result = result;
+ return;
+ }
+
+ memcpy(tmp.map, pData, dataSize);
+ copy_buffer(cmd, tu_buffer_iova(buffer) + dstOffset, tmp.iova, dataSize, 4);
+}
+
+void
+tu_CmdFillBuffer(VkCommandBuffer commandBuffer,
+ VkBuffer dstBuffer,
+ VkDeviceSize dstOffset,
+ VkDeviceSize fillSize,
+ uint32_t data)
+{
+ TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
+ TU_FROM_HANDLE(tu_buffer, buffer, dstBuffer);
+ const struct blit_ops *ops = &r2d_ops;
+ struct tu_cs *cs = &cmd->cs;
+
+ tu_bo_list_add(&cmd->bo_list, buffer->bo, MSM_SUBMIT_BO_WRITE);
+
+ if (fillSize == VK_WHOLE_SIZE)
+ fillSize = buffer->size - dstOffset;
+
+ uint64_t dst_va = tu_buffer_iova(buffer) + dstOffset;
+ uint32_t blocks = fillSize / 4;
+
+ ops->setup(cmd, cs, VK_FORMAT_R32_UINT, ROTATE_0, true, 0xf);
+ ops->clear_value(cs, VK_FORMAT_R32_UINT, &(VkClearValue){.color = {.uint32[0] = data}});
+
+ while (blocks) {
+ uint32_t dst_x = (dst_va & 63) / 4;
+ uint32_t width = MIN2(blocks, 0x4000 - dst_x);
+
+ ops->dst_buffer(cs, VK_FORMAT_R32_UINT, dst_va & ~63, 0);
+ ops->coords(cs, &(VkOffset2D) {dst_x}, NULL, &(VkExtent2D) {width, 1});
+ ops->run(cmd, cs);
+
+ dst_va += width * 4;
+ blocks -= width;
+ }
+}
+
+void
+tu_CmdResolveImage(VkCommandBuffer commandBuffer,
+ VkImage srcImage,
+ VkImageLayout srcImageLayout,
+ VkImage dstImage,
+ VkImageLayout dstImageLayout,
+ uint32_t regionCount,
+ const VkImageResolve *pRegions)
+{
+ TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
+ TU_FROM_HANDLE(tu_image, src_image, srcImage);
+ TU_FROM_HANDLE(tu_image, dst_image, dstImage);
+ const struct blit_ops *ops = &r2d_ops;
+ struct tu_cs *cs = &cmd->cs;
+
+ tu_bo_list_add(&cmd->bo_list, src_image->bo, MSM_SUBMIT_BO_READ);
+ tu_bo_list_add(&cmd->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE);
+
+ ops->setup(cmd, cs, dst_image->vk_format, ROTATE_0, false, 0xf);
+
+ for (uint32_t i = 0; i < regionCount; ++i) {
+ const VkImageResolve *info = &pRegions[i];
+ uint32_t layers = MAX2(info->extent.depth, info->dstSubresource.layerCount);
+
+ assert(info->srcSubresource.layerCount == info->dstSubresource.layerCount);
+ /* TODO: aspect masks possible ? */
+
+ coords(ops, cs, &info->dstOffset, &info->srcOffset, &info->extent);
+
+ for (uint32_t i = 0; i < layers; i++) {
+ ops->src(cmd, cs, src_image, src_image->vk_format,
+ info->srcSubresource.mipLevel,
+ info->srcSubresource.baseArrayLayer + info->srcOffset.z + i,
+ false, false);
+ ops->dst(cs, dst_image, dst_image->vk_format,
+ info->dstSubresource.mipLevel,
+ info->dstSubresource.baseArrayLayer + info->dstOffset.z + i);
+ ops->run(cmd, cs);
+ }
+ }
+}
+
+void
+tu_resolve_sysmem(struct tu_cmd_buffer *cmd,
+ struct tu_cs *cs,
+ struct tu_image_view *src,
+ struct tu_image_view *dst,
+ uint32_t layers,
+ const VkRect2D *rect)
+{
+ const struct blit_ops *ops = &r2d_ops;
+
+ tu_bo_list_add(&cmd->bo_list, src->image->bo, MSM_SUBMIT_BO_READ);
+ tu_bo_list_add(&cmd->bo_list, dst->image->bo, MSM_SUBMIT_BO_WRITE);
+
+ assert(src->vk_format == dst->vk_format);
+
+ ops->setup(cmd, cs, dst->vk_format, ROTATE_0, false, 0xf);
+ ops->coords(cs, &rect->offset, &rect->offset, &rect->extent);
+
+ for (uint32_t i = 0; i < layers; i++) {
+ ops->src(cmd, cs, src->image, src->vk_format,
+ src->base_mip,
+ src->base_layer + i,
+ false, false);
+ ops->dst(cs, dst->image, dst->vk_format,
+ dst->base_mip,
+ dst->base_layer + i);
+ ops->run(cmd, cs);
+ }
+}
+
+static void
+clear_image(struct tu_cmd_buffer *cmd,
+ struct tu_image *image,
+ const VkClearValue *clear_value,
+ const VkImageSubresourceRange *range)
+{
+ uint32_t level_count = tu_get_levelCount(image, range);
+ uint32_t layer_count = tu_get_layerCount(image, range);
+ struct tu_cs *cs = &cmd->cs;
+ VkFormat format = image->vk_format;
+ if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
+ format = VK_FORMAT_R32_UINT;
+
+ if (image->type == VK_IMAGE_TYPE_3D) {
+ assert(layer_count == 1);
+ assert(range->baseArrayLayer == 0);
+ }
+
+ uint8_t mask = 0xf;
+ if (image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
+ mask = 0;
+ if (range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
+ mask |= 0x7;
+ if (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)
+ mask |= 0x8;
+ }
+
+ const struct blit_ops *ops = image->samples > 1 ? &r3d_ops : &r2d_ops;
+
+ ops->setup(cmd, cs, format, ROTATE_0, true, mask);
+ ops->clear_value(cs, image->vk_format, clear_value);
+
+ for (unsigned j = 0; j < level_count; j++) {
+ if (image->type == VK_IMAGE_TYPE_3D)
+ layer_count = u_minify(image->extent.depth, range->baseMipLevel + j);
+
+ ops->coords(cs, &(VkOffset2D){}, NULL, &(VkExtent2D) {
+ u_minify(image->extent.width, range->baseMipLevel + j),
+ u_minify(image->extent.height, range->baseMipLevel + j)
+ });
+
+ for (uint32_t i = 0; i < layer_count; i++) {
+ ops->dst(cs, image, format, range->baseMipLevel + j, range->baseArrayLayer + i);
+ ops->run(cmd, cs);
+ }
+ }
+}
+
+void
+tu_CmdClearColorImage(VkCommandBuffer commandBuffer,
+ VkImage image_h,
+ VkImageLayout imageLayout,
+ const VkClearColorValue *pColor,
+ uint32_t rangeCount,
+ const VkImageSubresourceRange *pRanges)
+{
+ TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
+ TU_FROM_HANDLE(tu_image, image, image_h);
+
+ tu_bo_list_add(&cmd->bo_list, image->bo, MSM_SUBMIT_BO_WRITE);
+
+ for (unsigned i = 0; i < rangeCount; i++)
+ clear_image(cmd, image, (const VkClearValue*) pColor, pRanges + i);
+}
+
+void
+tu_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,
+ VkImage image_h,
+ VkImageLayout imageLayout,
+ const VkClearDepthStencilValue *pDepthStencil,
+ uint32_t rangeCount,
+ const VkImageSubresourceRange *pRanges)
+{
+ TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
+ TU_FROM_HANDLE(tu_image, image, image_h);
+
+ tu_bo_list_add(&cmd->bo_list, image->bo, MSM_SUBMIT_BO_WRITE);
+
+ for (unsigned i = 0; i < rangeCount; i++)
+ clear_image(cmd, image, (const VkClearValue*) pDepthStencil, pRanges + i);
+}
+
+static void
+tu_clear_sysmem_attachments_2d(struct tu_cmd_buffer *cmd,
+ uint32_t attachment_count,
+ const VkClearAttachment *attachments,
+ uint32_t rect_count,
+ const VkClearRect *rects)
+{
+ const struct tu_subpass *subpass = cmd->state.subpass;
+ /* note: cannot use shader path here.. there is a special shader path
+ * in tu_clear_sysmem_attachments()
+ */
+ const struct blit_ops *ops = &r2d_ops;
+ struct tu_cs *cs = &cmd->draw_cs;
+
+ for (uint32_t j = 0; j < attachment_count; j++) {
+ uint32_t a;
+ uint8_t mask = 0xf;
+
+ if (attachments[j].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
+ a = subpass->color_attachments[attachments[j].colorAttachment].attachment;
+ } else {
+ a = subpass->depth_stencil_attachment.attachment;
+
+ /* sync depth into color */
+ tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_DEPTH_TS, true);
+ /* also flush color to avoid losing contents from invalidate */
+ tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS, true);
+ tu6_emit_event_write(cmd, cs, PC_CCU_INVALIDATE_COLOR, false);
+
+
+ if (!(attachments[j].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT))
+ mask &= ~0x7;
+ if (!(attachments[j].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT))
+ mask &= ~0x8;
+ }
+
+ if (a == VK_ATTACHMENT_UNUSED)
+ continue;
+
+ const struct tu_image_view *iview =
+ cmd->state.framebuffer->attachments[a].attachment;
+
+ ops->setup(cmd, cs, iview->vk_format, ROTATE_0, true, mask);
+ ops->clear_value(cs, iview->vk_format, &attachments[j].clearValue);
+
+ for (uint32_t i = 0; i < rect_count; i++) {
+ ops->coords(cs, &rects[i].rect.offset, NULL, &rects[i].rect.extent);
+ for (uint32_t layer = 0; layer < rects[i].layerCount; layer++) {
+ ops->dst(cs, iview->image, iview->vk_format, iview->base_mip,
+ iview->base_layer + rects[i].baseArrayLayer + layer);
+ ops->run(cmd, cs);
+ }
+ }
+
+ if (attachments[j].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
+ /* does not use CCU - flush
+ * note: cache invalidate might be needed to, and just not covered by test cases
+ */
+ if (attachments[j].colorAttachment > 0)
+ tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS, true);
+ } else {
+ /* sync color into depth */
+ tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS, true);
+ tu6_emit_event_write(cmd, cs, PC_CCU_INVALIDATE_DEPTH, false);
+ }
+ }
+}
+
+static void
+tu_clear_sysmem_attachments(struct tu_cmd_buffer *cmd,
+ uint32_t attachment_count,
+ const VkClearAttachment *attachments,
+ uint32_t rect_count,
+ const VkClearRect *rects)
+{
+ /* the shader path here is special, it avoids changing MRT/etc state */
+ const struct tu_render_pass *pass = cmd->state.pass;
+ const struct tu_subpass *subpass = cmd->state.subpass;
+ const uint32_t mrt_count = subpass->color_count;
+ struct tu_cs *cs = &cmd->draw_cs;
+ uint32_t clear_value[MAX_RTS][4];
+ float z_clear_val = 0.0f;
+ uint8_t s_clear_val = 0;
+ uint32_t clear_rts = 0, num_rts = 0, b;
+ bool z_clear = false;
+ bool s_clear = false;
+ uint32_t max_samples = 1;
+
+ for (uint32_t i = 0; i < attachment_count; i++) {
+ uint32_t a;
+ if (attachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
+ uint32_t c = attachments[i].colorAttachment;
+ a = subpass->color_attachments[c].attachment;
+ if (a == VK_ATTACHMENT_UNUSED)
+ continue;
+
+ clear_rts |= 1 << c;
+ memcpy(clear_value[c], &attachments[i].clearValue, 4 * sizeof(uint32_t));
+ } else {
+ a = subpass->depth_stencil_attachment.attachment;
+ if (a == VK_ATTACHMENT_UNUSED)
+ continue;
+
+ if (attachments[i].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
+ z_clear = true;
+ z_clear_val = attachments[i].clearValue.depthStencil.depth;
+ }
+
+ if (attachments[i].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) {
+ s_clear = true;
+ s_clear_val = attachments[i].clearValue.depthStencil.stencil & 0xff;
+ }
+ }
+
+ max_samples = MAX2(max_samples, pass->attachments[a].samples);
+ }
+
+ /* prefer to use 2D path for clears
+ * 2D can't clear separate depth/stencil and msaa, needs known framebuffer
+ */
+ if (max_samples == 1 && cmd->state.framebuffer) {
+ tu_clear_sysmem_attachments_2d(cmd, attachment_count, attachments, rect_count, rects);
+ return;
+ }
+
+ /* TODO: this path doesn't take into account multilayer rendering */
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_CNTL0, 2);
+ tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(0xfc) |
+ A6XX_SP_FS_OUTPUT_CNTL0_SAMPMASK_REGID(0xfc) |
+ 0xfc000000);
+ tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL1_MRT(mrt_count));
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_REG(0), mrt_count);
+ for (uint32_t i = 0; i < mrt_count; i++) {
+ if (clear_rts & (1 << i))
+ tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_REG_REGID(num_rts++ * 4));
+ else
+ tu_cs_emit(cs, 0);
+ }
+
+ r3d_pipeline(cmd, cs, false, num_rts);
+
+ tu_cs_emit_regs(cs,
+ A6XX_RB_FS_OUTPUT_CNTL0(),
+ A6XX_RB_FS_OUTPUT_CNTL1(.mrt = mrt_count));
+
+ tu_cs_emit_regs(cs, A6XX_SP_BLEND_CNTL());
+ tu_cs_emit_regs(cs, A6XX_RB_BLEND_CNTL(.independent_blend = 1, .sample_mask = 0xffff));
+ tu_cs_emit_regs(cs, A6XX_RB_ALPHA_CONTROL());
+ for (uint32_t i = 0; i < mrt_count; i++) {
+ tu_cs_emit_regs(cs, A6XX_RB_MRT_CONTROL(i,
+ .component_enable = COND(clear_rts & (1 << i), 0xf)));
+ }
+
+ tu_cs_emit_regs(cs, A6XX_RB_DEPTH_PLANE_CNTL());
+ tu_cs_emit_regs(cs, A6XX_RB_DEPTH_CNTL(
+ .z_enable = z_clear,
+ .z_write_enable = z_clear,
+ .zfunc = FUNC_ALWAYS));
+ tu_cs_emit_regs(cs, A6XX_GRAS_SU_DEPTH_PLANE_CNTL());
+ tu_cs_emit_regs(cs, A6XX_RB_STENCIL_CONTROL(
+ .stencil_enable = s_clear,
+ .func = FUNC_ALWAYS,
+ .zpass = VK_STENCIL_OP_REPLACE));
+ tu_cs_emit_regs(cs, A6XX_RB_STENCILMASK(.mask = 0xff));
+ tu_cs_emit_regs(cs, A6XX_RB_STENCILWRMASK(.wrmask = 0xff));
+ tu_cs_emit_regs(cs, A6XX_RB_STENCILREF(.ref = s_clear_val));
+
+ tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_FRAG, 3 + 4 * num_rts);
+ tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) |
+ CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
+ CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
+ CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_SHADER) |
+ CP_LOAD_STATE6_0_NUM_UNIT(num_rts));
+ tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
+ tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
+ for_each_bit(b, clear_rts)
+ tu_cs_emit_array(cs, clear_value[b], 4);
+
+ for (uint32_t i = 0; i < rect_count; i++) {
+ r3d_coords_raw(cs, (float[]) {
+ rects[i].rect.offset.x, rects[i].rect.offset.y,
+ z_clear_val, 1.0f,
+ rects[i].rect.offset.x + rects[i].rect.extent.width,
+ rects[i].rect.offset.y + rects[i].rect.extent.height,
+ z_clear_val, 1.0f
+ });
+ r3d_run(cmd, cs);
+ }
+
+ cmd->state.dirty |= TU_CMD_DIRTY_PIPELINE |
+ TU_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK |
+ TU_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK |
+ TU_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE |
+ TU_CMD_DIRTY_DYNAMIC_VIEWPORT |
+ TU_CMD_DIRTY_DYNAMIC_SCISSOR;
+}
+
+/**
+ * Pack a VkClearValue into a 128-bit buffer. format is respected except
+ * for the component order. The components are always packed in WZYX order,
+ * because gmem is tiled and tiled formats always have WZYX swap
+ */
+static void
+pack_gmem_clear_value(const VkClearValue *val, VkFormat format, uint32_t buf[4])
+{
+ const struct util_format_description *desc = vk_format_description(format);
+
+ switch (format) {
+ case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
+ buf[0] = float3_to_r11g11b10f(val->color.float32);
+ return;
+ case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
+ buf[0] = float3_to_rgb9e5(val->color.float32);
+ return;
+ default:
+ break;
+ }
+
+ assert(desc && desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
+
+ /* S8_UINT is special and has no depth */
+ const int max_components =
+ format == VK_FORMAT_S8_UINT ? 2 : desc->nr_channels;
+
+ int buf_offset = 0;
+ int bit_shift = 0;
+ for (int comp = 0; comp < max_components; comp++) {
+ const struct util_format_channel_description *ch =
+ tu_get_format_channel_description(desc, comp);
+ if (!ch) {
+ assert((format == VK_FORMAT_S8_UINT && comp == 0) ||
+ (format == VK_FORMAT_X8_D24_UNORM_PACK32 && comp == 1));
+ continue;
+ }
+
+ union tu_clear_component_value v = tu_get_clear_component_value(
+ val, comp, desc->colorspace);
+
+ /* move to the next uint32_t when there is not enough space */
+ assert(ch->size <= 32);
+ if (bit_shift + ch->size > 32) {
+ buf_offset++;
+ bit_shift = 0;
+ }
+
+ if (bit_shift == 0)
+ buf[buf_offset] = 0;
+
+ buf[buf_offset] |= tu_pack_clear_component_value(v, ch) << bit_shift;
+ bit_shift += ch->size;
+ }
+}
+
+static void
+tu_emit_clear_gmem_attachment(struct tu_cmd_buffer *cmd,
+ struct tu_cs *cs,
+ uint32_t attachment,
+ uint8_t component_mask,
+ const VkClearValue *value)
+{
+ VkFormat vk_format = cmd->state.pass->attachments[attachment].format;
+ /* note: component_mask is 0x7 for depth and 0x8 for stencil
+ * because D24S8 is cleared with AS_R8G8B8A8 format
+ */
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 1);
+ tu_cs_emit(cs, A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(tu6_base_format(vk_format)));
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_INFO, 1);
+ tu_cs_emit(cs, A6XX_RB_BLIT_INFO_GMEM | A6XX_RB_BLIT_INFO_CLEAR_MASK(component_mask));
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
+ tu_cs_emit(cs, cmd->state.pass->attachments[attachment].gmem_offset);
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_88D0, 1);
+ tu_cs_emit(cs, 0);
+
+ uint32_t clear_vals[4] = {};
+ pack_gmem_clear_value(value, vk_format, clear_vals);
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4);
+ tu_cs_emit_array(cs, clear_vals, 4);
+
+ tu6_emit_event_write(cmd, cs, BLIT, false);
+}
+
+static void
+tu_clear_gmem_attachments(struct tu_cmd_buffer *cmd,
+ uint32_t attachment_count,
+ const VkClearAttachment *attachments,
+ uint32_t rect_count,
+ const VkClearRect *rects)
+{
+ const struct tu_subpass *subpass = cmd->state.subpass;
+ struct tu_cs *cs = &cmd->draw_cs;
+
+ /* TODO: swap the loops for smaller cmdstream */
+ for (unsigned i = 0; i < rect_count; i++) {
+ unsigned x1 = rects[i].rect.offset.x;
+ unsigned y1 = rects[i].rect.offset.y;
+ unsigned x2 = x1 + rects[i].rect.extent.width - 1;
+ unsigned y2 = y1 + rects[i].rect.extent.height - 1;
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_SCISSOR_TL, 2);
+ tu_cs_emit(cs, A6XX_RB_BLIT_SCISSOR_TL_X(x1) | A6XX_RB_BLIT_SCISSOR_TL_Y(y1));
+ tu_cs_emit(cs, A6XX_RB_BLIT_SCISSOR_BR_X(x2) | A6XX_RB_BLIT_SCISSOR_BR_Y(y2));
+
+ for (unsigned j = 0; j < attachment_count; j++) {
+ uint32_t a;
+ unsigned clear_mask = 0;
+ if (attachments[j].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
+ clear_mask = 0xf;
+ a = subpass->color_attachments[attachments[j].colorAttachment].attachment;
+ } else {
+ a = subpass->depth_stencil_attachment.attachment;
+ if (attachments[j].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
+ clear_mask |= 0x7;
+ if (attachments[j].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)
+ clear_mask |= 0x8;
+ }
+
+ if (a == VK_ATTACHMENT_UNUSED)
+ continue;
+
+ tu_emit_clear_gmem_attachment(cmd, cs, a, clear_mask,
+ &attachments[j].clearValue);
+ }
+ }
+}
+
+void
+tu_CmdClearAttachments(VkCommandBuffer commandBuffer,
+ uint32_t attachmentCount,
+ const VkClearAttachment *pAttachments,
+ uint32_t rectCount,
+ const VkClearRect *pRects)
+{
+ TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
+ struct tu_cs *cs = &cmd->draw_cs;
+
+ tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_GMEM);
+ tu_clear_gmem_attachments(cmd, attachmentCount, pAttachments, rectCount, pRects);
+ tu_cond_exec_end(cs);
+
+ tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_SYSMEM);
+ tu_clear_sysmem_attachments(cmd, attachmentCount, pAttachments, rectCount, pRects);
+ tu_cond_exec_end(cs);
+}
+
+void
+tu_clear_sysmem_attachment(struct tu_cmd_buffer *cmd,
+ struct tu_cs *cs,
+ uint32_t a,
+ const VkRenderPassBeginInfo *info)
+{
+ const struct tu_framebuffer *fb = cmd->state.framebuffer;
+ const struct tu_image_view *iview = fb->attachments[a].attachment;
+ const struct tu_render_pass_attachment *attachment =
+ &cmd->state.pass->attachments[a];
+ uint8_t mask = 0;
+
+ if (attachment->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR)
+ mask = 0xf;
+
+ if (iview->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
+ mask &= 0x7;
+ if (attachment->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR)
+ mask |= 0x8;
+ }
+
+ /* gmem_offset<0 means it isn't used by any subpass and shouldn't be cleared */
+ if (attachment->gmem_offset < 0 || !mask)
+ return;
+
+ const struct blit_ops *ops = &r2d_ops;
+ if (attachment->samples > 1)
+ ops = &r3d_ops;
+
+ ops->setup(cmd, cs, iview->vk_format, ROTATE_0, true, mask);
+ ops->coords(cs, &info->renderArea.offset, NULL, &info->renderArea.extent);
+ ops->clear_value(cs, iview->vk_format, &info->pClearValues[a]);
+
+ for (uint32_t i = 0; i < fb->layers; i++) {
+ ops->dst(cs, iview->image, iview->vk_format, iview->base_mip, iview->base_layer + i);
+ ops->run(cmd, cs);
+ }
+}
+
+void
+tu_clear_gmem_attachment(struct tu_cmd_buffer *cmd,
+ struct tu_cs *cs,
+ uint32_t a,
+ const VkRenderPassBeginInfo *info)
+{
+ const struct tu_framebuffer *fb = cmd->state.framebuffer;
+ const struct tu_image_view *iview = fb->attachments[a].attachment;
+ const struct tu_render_pass_attachment *attachment =
+ &cmd->state.pass->attachments[a];
+ unsigned clear_mask = 0;
+
+ /* note: this means it isn't used by any subpass and shouldn't be cleared anyway */
+ if (attachment->gmem_offset < 0)
+ return;
+
+ if (attachment->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR)
+ clear_mask = 0xf;
+
+ if (vk_format_has_stencil(iview->vk_format)) {
+ clear_mask &= 0x7;
+ if (attachment->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR)
+ clear_mask |= 0x8;
+ }
+ if (!clear_mask)
+ return;
+
+ tu_cs_emit_regs(cs, A6XX_RB_MSAA_CNTL(tu_msaa_samples(attachment->samples)));
+
+ tu_emit_clear_gmem_attachment(cmd, cs, a, clear_mask,
+ &info->pClearValues[a]);
+}
+
+static void
+tu_emit_blit(struct tu_cmd_buffer *cmd,
+ struct tu_cs *cs,
+ const struct tu_image_view *iview,
+ struct tu_render_pass_attachment *attachment,
+ bool resolve)
+{
+ const struct tu_native_format format =
+ tu6_format_image(iview->image, iview->vk_format, iview->base_mip);
+
+ tu_cs_emit_regs(cs,
+ A6XX_RB_MSAA_CNTL(tu_msaa_samples(attachment->samples)));
+
+ tu_cs_emit_regs(cs, A6XX_RB_BLIT_INFO(
+ .unk0 = !resolve,
+ .gmem = !resolve,
+ /* "integer" bit disables msaa resolve averaging */
+ .integer = vk_format_is_int(iview->vk_format)));
+
+ tu_cs_emit_regs(cs,
+ A6XX_RB_BLIT_DST_INFO(
+ .tile_mode = format.tile_mode,
+ .samples = tu_msaa_samples(iview->image->samples),
+ .color_format = format.fmt,
+ .color_swap = format.swap,
+ .flags = iview->image->layout.ubwc_layer_size != 0),
+ A6XX_RB_BLIT_DST(tu_image_view_base_ref(iview)),
+ A6XX_RB_BLIT_DST_PITCH(tu_image_stride(iview->image, iview->base_mip)),
+ A6XX_RB_BLIT_DST_ARRAY_PITCH(iview->image->layout.layer_size));
+
+ if (iview->image->layout.ubwc_layer_size) {
+ tu_cs_emit_regs(cs,
+ A6XX_RB_BLIT_FLAG_DST(tu_image_view_ubwc_base_ref(iview)),
+ A6XX_RB_BLIT_FLAG_DST_PITCH(tu_image_view_ubwc_pitches(iview)));
+ }
+
+ tu_cs_emit_regs(cs,
+ A6XX_RB_BLIT_BASE_GMEM(attachment->gmem_offset));
+
+ tu6_emit_event_write(cmd, cs, BLIT, false);
+}
+
+static bool
+blit_can_resolve(VkFormat format)
+{
+ const struct util_format_description *desc = vk_format_description(format);
+
+ /* blit event can only do resolve for simple cases:
+ * averaging samples as unsigned integers or choosing only one sample
+ */
+ if (vk_format_is_snorm(format) || vk_format_is_srgb(format))
+ return false;
+
+ /* can't do formats with larger channel sizes
+ * note: this includes all float formats
+ * note2: single channel integer formats seem OK
+ */
+ if (desc->channel[0].size > 10)
+ return false;
+
+ switch (format) {
+ /* for unknown reasons blit event can't msaa resolve these formats when tiled
+ * likely related to these formats having different layout from other cpp=2 formats
+ */
+ case VK_FORMAT_R8G8_UNORM:
+ case VK_FORMAT_R8G8_UINT:
+ case VK_FORMAT_R8G8_SINT:
+ /* TODO: this one should be able to work? */
+ case VK_FORMAT_D24_UNORM_S8_UINT:
+ return false;
+ default:
+ break;
+ }
+
+ return true;
+}
+
+void
+tu_emit_load_gmem_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs, uint32_t a)
+{
+ tu_emit_blit(cmd, cs,
+ cmd->state.framebuffer->attachments[a].attachment,
+ &cmd->state.pass->attachments[a],
+ false);
+}
+
+void
+tu_load_gmem_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs, uint32_t a)
+{
+ const struct tu_render_pass_attachment *attachment =
+ &cmd->state.pass->attachments[a];
+
+ if (attachment->gmem_offset < 0)
+ return;
+
+ if (attachment->load_op == VK_ATTACHMENT_LOAD_OP_LOAD ||
+ (vk_format_has_stencil(attachment->format) &&
+ attachment->stencil_load_op == VK_ATTACHMENT_LOAD_OP_LOAD)) {
+ tu_emit_load_gmem_attachment(cmd, cs, a);
+ }
+}
+
+void
+tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
+ struct tu_cs *cs,
+ uint32_t a,
+ uint32_t gmem_a)
+{
+ const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
+ const VkRect2D *render_area = &tiling->render_area;
+ struct tu_render_pass_attachment *dst = &cmd->state.pass->attachments[a];
+ struct tu_image_view *iview = cmd->state.framebuffer->attachments[a].attachment;
+ struct tu_render_pass_attachment *src = &cmd->state.pass->attachments[gmem_a];
+
+ if (dst->store_op == VK_ATTACHMENT_STORE_OP_DONT_CARE)
+ return;
+
+ uint32_t x1 = render_area->offset.x;
+ uint32_t y1 = render_area->offset.y;
+ uint32_t x2 = x1 + render_area->extent.width;
+ uint32_t y2 = y1 + render_area->extent.height;
+ /* x2/y2 can be unaligned if equal to the size of the image,
+ * since it will write into padding space
+ * the one exception is linear levels which don't have the
+ * required y padding in the layout (except for the last level)
+ */
+ bool need_y2_align =
+ y2 != iview->extent.height ||
+ (tu6_get_image_tile_mode(iview->image, iview->base_mip) == TILE6_LINEAR &&
+ iview->base_mip != iview->image->level_count - 1);
+
+ bool unaligned =
+ x1 % GMEM_ALIGN_W || (x2 % GMEM_ALIGN_W && x2 != iview->extent.width) ||
+ y1 % GMEM_ALIGN_H || (y2 % GMEM_ALIGN_H && need_y2_align);
+
+ /* use fast path when render area is aligned, except for unsupported resolve cases */
+ if (!unaligned && (a == gmem_a || blit_can_resolve(iview->vk_format))) {
+ tu_emit_blit(cmd, cs, iview, src, true);
+ return;
+ }
+
+ if (dst->samples > 1) {
+ /* I guess we need to use shader path in this case?
+ * need a testcase which fails because of this
+ */
+ tu_finishme("unaligned store of msaa attachment\n");
+ return;
+ }
+
+ r2d_setup_common(cmd, cs, iview->vk_format, ROTATE_0, false, 0xf, true);
+ r2d_dst(cs, iview->image, iview->vk_format, iview->base_mip, iview->base_layer);
+ r2d_coords(cs, &render_area->offset, &render_area->offset, &render_area->extent);
+
+ tu_cs_emit_regs(cs,
+ A6XX_SP_PS_2D_SRC_INFO(
+ .color_format = tu6_format_texture(src->format, TILE6_2).fmt,
+ .tile_mode = TILE6_2,
+ .srgb = vk_format_is_srgb(src->format),
+ .samples = tu_msaa_samples(src->samples),
+ .samples_average = !vk_format_is_int(src->format),
+ .unk20 = 1,
+ .unk22 = 1),
+ /* note: src size does not matter when not scaling */
+ A6XX_SP_PS_2D_SRC_SIZE( .width = 0x3fff, .height = 0x3fff),
+ A6XX_SP_PS_2D_SRC_LO(cmd->device->physical_device->gmem_base + src->gmem_offset),
+ A6XX_SP_PS_2D_SRC_HI(),
+ A6XX_SP_PS_2D_SRC_PITCH(.pitch = tiling->tile0.extent.width * src->cpp));
+
+ /* sync GMEM writes with CACHE */
+ tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE, false);
+
+ tu_cs_emit_pkt7(cs, CP_BLIT, 1);
+ tu_cs_emit(cs, CP_BLIT_0_OP(BLIT_OP_SCALE));
+
+ /* TODO: flushing with barriers instead of blindly always flushing */
+ tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS, true);
+ tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_DEPTH_TS, true);
+ tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE, false);
+}
#include "vk_format.h"
#include "tu_cs.h"
-#include "tu_blit.h"
#define OVERFLOW_FLAG_REG REG_A6XX_CP_SCRATCH_REG(0)
return VK_SUCCESS;
}
-static bool
-is_linear_mipmapped(const struct tu_image_view *iview)
-{
- return iview->image->layout.tile_mode == TILE6_LINEAR &&
- iview->base_mip != iview->image->level_count - 1;
-}
-
-static bool
-force_sysmem(const struct tu_cmd_buffer *cmd,
- const struct VkRect2D *render_area)
-{
- const struct tu_framebuffer *fb = cmd->state.framebuffer;
- bool has_linear_mipmapped_store = false;
- const struct tu_render_pass *pass = cmd->state.pass;
-
- /* Layered rendering requires sysmem. */
- if (fb->layers > 1)
- return true;
-
- /* Iterate over all the places we call tu6_emit_store_attachment() */
- for (unsigned i = 0; i < pass->subpass_count; i++) {
- const struct tu_subpass *subpass = &pass->subpasses[i];
- if (subpass->resolve_attachments) {
- for (unsigned i = 0; i < subpass->color_count; i++) {
- uint32_t a = subpass->resolve_attachments[i].attachment;
- if (a != VK_ATTACHMENT_UNUSED &&
- cmd->state.pass->attachments[a].store_op == VK_ATTACHMENT_STORE_OP_STORE) {
- const struct tu_image_view *iview = fb->attachments[a].attachment;
- if (is_linear_mipmapped(iview)) {
- has_linear_mipmapped_store = true;
- break;
- }
- }
- }
- }
- }
-
- for (unsigned i = 0; i < pass->attachment_count; i++) {
- if (pass->attachments[i].gmem_offset >= 0 &&
- cmd->state.pass->attachments[i].store_op == VK_ATTACHMENT_STORE_OP_STORE) {
- const struct tu_image_view *iview = fb->attachments[i].attachment;
- if (is_linear_mipmapped(iview)) {
- has_linear_mipmapped_store = true;
- break;
- }
- }
- }
-
- /* Linear textures cannot have any padding between mipmap levels and their
- * height isn't padded, while at the same time the GMEM->MEM resolve does
- * not have per-pixel granularity, so if the image height isn't aligned to
- * the resolve granularity and the render area is tall enough, we may wind
- * up writing past the bottom of the image into the next miplevel or even
- * past the end of the image. For the last miplevel, the layout code should
- * insert enough padding so that the overdraw writes to the padding. To
- * work around this, we force-enable sysmem rendering.
- */
- const uint32_t y2 = render_area->offset.y + render_area->extent.height;
- const uint32_t aligned_y2 = ALIGN_POT(y2, GMEM_ALIGN_H);
-
- return has_linear_mipmapped_store && aligned_y2 > fb->height;
-}
-
static void
tu_tiling_config_update_tile_layout(struct tu_tiling_config *tiling,
const struct tu_device *dev,
}
}
-#define tu_image_view_ubwc_pitches(iview) \
- .pitch = tu_image_ubwc_pitch(iview->image, iview->base_mip), \
- .array_pitch = tu_image_ubwc_size(iview->image, iview->base_mip) >> 2
-
static void
tu6_emit_zs(struct tu_cmd_buffer *cmd,
const struct tu_subpass *subpass,
continue;
const struct tu_image_view *iview = fb->attachments[a].attachment;
- const enum a6xx_tile_mode tile_mode =
- tu6_get_image_tile_mode(iview->image, iview->base_mip);
mrt_comp[i] = 0xf;
if (vk_format_is_srgb(iview->vk_format))
srgb_cntl |= (1 << i);
- const struct tu_native_format format =
- tu6_format_color(iview->vk_format, iview->image->layout.tile_mode);
+ struct tu_native_format format =
+ tu6_format_image(iview->image, iview->vk_format, iview->base_mip);
tu_cs_emit_regs(cs,
A6XX_RB_MRT_BUF_INFO(i,
- .color_tile_mode = tile_mode,
+ .color_tile_mode = format.tile_mode,
.color_format = format.fmt,
.color_swap = format.swap),
A6XX_RB_MRT_PITCH(i, tu_image_stride(iview->image, iview->base_mip)),
.type = LAYER_2D_ARRAY));
}
-static void
-tu6_emit_msaa(struct tu_cmd_buffer *cmd,
- const struct tu_subpass *subpass,
- struct tu_cs *cs)
+void
+tu6_emit_msaa(struct tu_cs *cs, VkSampleCountFlagBits vk_samples)
{
- const enum a3xx_msaa_samples samples = tu_msaa_samples(subpass->samples);
+ const enum a3xx_msaa_samples samples = tu_msaa_samples(vk_samples);
bool msaa_disable = samples == MSAA_ONE;
tu_cs_emit_regs(cs,
A6XX_RB_BLIT_SCISSOR_BR(.x = x2, .y = y2));
}
-static void
-tu6_emit_blit_info(struct tu_cmd_buffer *cmd,
- struct tu_cs *cs,
- const struct tu_image_view *iview,
- uint32_t gmem_offset,
- bool resolve)
-{
- tu_cs_emit_regs(cs,
- A6XX_RB_BLIT_INFO(.unk0 = !resolve, .gmem = !resolve));
-
- const struct tu_native_format format =
- tu6_format_color(iview->vk_format, iview->image->layout.tile_mode);
-
- enum a6xx_tile_mode tile_mode =
- tu6_get_image_tile_mode(iview->image, iview->base_mip);
- tu_cs_emit_regs(cs,
- A6XX_RB_BLIT_DST_INFO(
- .tile_mode = tile_mode,
- .samples = tu_msaa_samples(iview->image->samples),
- .color_format = format.fmt,
- .color_swap = format.swap,
- .flags = iview->image->layout.ubwc_layer_size != 0),
- A6XX_RB_BLIT_DST(tu_image_view_base_ref(iview)),
- A6XX_RB_BLIT_DST_PITCH(tu_image_stride(iview->image, iview->base_mip)),
- A6XX_RB_BLIT_DST_ARRAY_PITCH(iview->image->layout.layer_size));
-
- if (iview->image->layout.ubwc_layer_size) {
- tu_cs_emit_regs(cs,
- A6XX_RB_BLIT_FLAG_DST(tu_image_view_ubwc_base_ref(iview)),
- A6XX_RB_BLIT_FLAG_DST_PITCH(tu_image_view_ubwc_pitches(iview)));
- }
-
- tu_cs_emit_regs(cs,
- A6XX_RB_BLIT_BASE_GMEM(gmem_offset));
-}
-
-static void
-tu6_emit_blit(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
-{
- tu6_emit_event_write(cmd, cs, BLIT, false);
-}
-
-static void
-tu6_emit_window_scissor(struct tu_cmd_buffer *cmd,
- struct tu_cs *cs,
+void
+tu6_emit_window_scissor(struct tu_cs *cs,
uint32_t x1,
uint32_t y1,
uint32_t x2,
A6XX_GRAS_RESOLVE_CNTL_2(.x = x2, .y = y2));
}
-static void
-tu6_emit_window_offset(struct tu_cmd_buffer *cmd,
- struct tu_cs *cs,
- uint32_t x1,
- uint32_t y1)
+void
+tu6_emit_window_offset(struct tu_cs *cs, uint32_t x1, uint32_t y1)
{
tu_cs_emit_regs(cs,
A6XX_RB_WINDOW_OFFSET(.x = x1, .y = y1));
if (!cmd->state.pass->gmem_pixels)
return true;
+ if (cmd->state.framebuffer->layers > 1)
+ return true;
+
return cmd->state.tiling_config.force_sysmem;
}
const uint32_t y1 = tile->begin.y;
const uint32_t x2 = tile->end.x - 1;
const uint32_t y2 = tile->end.y - 1;
- tu6_emit_window_scissor(cmd, cs, x1, y1, x2, y2);
- tu6_emit_window_offset(cmd, cs, x1, y1);
+ tu6_emit_window_scissor(cs, x1, y1, x2, y2);
+ tu6_emit_window_offset(cs, x1, y1);
tu_cs_emit_regs(cs,
A6XX_VPC_SO_OVERRIDE(.so_disable = false));
}
}
-static void
-tu6_emit_load_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs, uint32_t a)
-{
- const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
- const struct tu_framebuffer *fb = cmd->state.framebuffer;
- const struct tu_image_view *iview = fb->attachments[a].attachment;
- const struct tu_render_pass_attachment *attachment =
- &cmd->state.pass->attachments[a];
-
- if (attachment->gmem_offset < 0)
- return;
-
- const uint32_t x1 = tiling->render_area.offset.x;
- const uint32_t y1 = tiling->render_area.offset.y;
- const uint32_t x2 = x1 + tiling->render_area.extent.width;
- const uint32_t y2 = y1 + tiling->render_area.extent.height;
- const uint32_t tile_x2 =
- tiling->tile0.offset.x + tiling->tile0.extent.width * tiling->tile_count.width;
- const uint32_t tile_y2 =
- tiling->tile0.offset.y + tiling->tile0.extent.height * tiling->tile_count.height;
- bool need_load =
- x1 != tiling->tile0.offset.x || x2 != MIN2(fb->width, tile_x2) ||
- y1 != tiling->tile0.offset.y || y2 != MIN2(fb->height, tile_y2);
-
- if (need_load)
- tu_finishme("improve handling of unaligned render area");
-
- if (attachment->load_op == VK_ATTACHMENT_LOAD_OP_LOAD)
- need_load = true;
-
- if (vk_format_has_stencil(iview->vk_format) &&
- attachment->stencil_load_op == VK_ATTACHMENT_LOAD_OP_LOAD)
- need_load = true;
-
- if (need_load) {
- tu6_emit_blit_info(cmd, cs, iview, attachment->gmem_offset, false);
- tu6_emit_blit(cmd, cs);
- }
-}
-
-static void
-tu6_emit_clear_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
- uint32_t a,
- const VkRenderPassBeginInfo *info)
-{
- const struct tu_framebuffer *fb = cmd->state.framebuffer;
- const struct tu_image_view *iview = fb->attachments[a].attachment;
- const struct tu_render_pass_attachment *attachment =
- &cmd->state.pass->attachments[a];
- unsigned clear_mask = 0;
-
- /* note: this means it isn't used by any subpass and shouldn't be cleared anyway */
- if (attachment->gmem_offset < 0)
- return;
-
- if (attachment->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR)
- clear_mask = 0xf;
-
- if (vk_format_has_stencil(iview->vk_format)) {
- clear_mask &= 0x1;
- if (attachment->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR)
- clear_mask |= 0x2;
- }
- if (!clear_mask)
- return;
-
- tu_clear_gmem_attachment(cmd, cs, a, clear_mask,
- &info->pClearValues[a]);
-}
-
-static void
-tu6_emit_predicated_blit(struct tu_cmd_buffer *cmd,
- struct tu_cs *cs,
- uint32_t a,
- uint32_t gmem_a,
- bool resolve)
-{
- tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_GMEM);
-
- tu6_emit_blit_info(cmd, cs,
- cmd->state.framebuffer->attachments[a].attachment,
- cmd->state.pass->attachments[gmem_a].gmem_offset, resolve);
- tu6_emit_blit(cmd, cs);
-
- tu_cond_exec_end(cs);
-}
-
static void
tu6_emit_sysmem_resolve(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
uint32_t gmem_a)
{
const struct tu_framebuffer *fb = cmd->state.framebuffer;
- const struct tu_image_view *dst = fb->attachments[a].attachment;
- const struct tu_image_view *src = fb->attachments[gmem_a].attachment;
-
- tu_blit(cmd, cs, &(struct tu_blit) {
- .dst = sysmem_attachment_surf(dst, dst->base_layer,
- &cmd->state.tiling_config.render_area),
- .src = sysmem_attachment_surf(src, src->base_layer,
- &cmd->state.tiling_config.render_area),
- .layers = fb->layers,
- });
-}
-
+ struct tu_image_view *dst = fb->attachments[a].attachment;
+ struct tu_image_view *src = fb->attachments[gmem_a].attachment;
-/* Emit a MSAA resolve operation, with both gmem and sysmem paths. */
-static void tu6_emit_resolve(struct tu_cmd_buffer *cmd,
- struct tu_cs *cs,
- uint32_t a,
- uint32_t gmem_a)
-{
- if (cmd->state.pass->attachments[a].store_op == VK_ATTACHMENT_STORE_OP_DONT_CARE)
- return;
-
- tu6_emit_predicated_blit(cmd, cs, a, gmem_a, true);
-
- tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_SYSMEM);
- tu6_emit_sysmem_resolve(cmd, cs, a, gmem_a);
- tu_cond_exec_end(cs);
-}
-
-static void
-tu6_emit_store_attachment(struct tu_cmd_buffer *cmd,
- struct tu_cs *cs,
- uint32_t a,
- uint32_t gmem_a)
-{
- if (cmd->state.pass->attachments[a].store_op == VK_ATTACHMENT_STORE_OP_DONT_CARE)
- return;
-
- tu6_emit_blit_info(cmd, cs,
- cmd->state.framebuffer->attachments[a].attachment,
- cmd->state.pass->attachments[gmem_a].gmem_offset, true);
- tu6_emit_blit(cmd, cs);
+ tu_resolve_sysmem(cmd, cs, src, dst, fb->layers, &cmd->state.tiling_config.render_area);
}
static void
tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_RESOLVE));
- tu6_emit_blit_scissor(cmd, cs, true);
+ /* blit scissor may have been changed by CmdClearAttachments */
+ tu6_emit_blit_scissor(cmd, cs, false);
for (uint32_t a = 0; a < pass->attachment_count; ++a) {
if (pass->attachments[a].gmem_offset >= 0)
- tu6_emit_store_attachment(cmd, cs, a, a);
+ tu_store_gmem_attachment(cmd, cs, a, a);
}
if (subpass->resolve_attachments) {
for (unsigned i = 0; i < subpass->color_count; i++) {
uint32_t a = subpass->resolve_attachments[i].attachment;
if (a != VK_ATTACHMENT_UNUSED)
- tu6_emit_store_attachment(cmd, cs, a,
- subpass->color_attachments[i].attachment);
+ tu_store_gmem_attachment(cmd, cs, a,
+ subpass->color_attachments[i].attachment);
}
}
}
uint32_t x2 = tiling->render_area.offset.x + tiling->render_area.extent.width - 1;
uint32_t y2 = tiling->render_area.offset.y + tiling->render_area.extent.height - 1;
- tu6_emit_window_scissor(cmd, cs, x1, y1, x2, y2);
+ tu6_emit_window_scissor(cs, x1, y1, x2, y2);
tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_BINNING));
cmd->wait_for_idle = false;
}
-static void
-tu_emit_sysmem_clear_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
- uint32_t a,
- const VkRenderPassBeginInfo *info)
-{
- const struct tu_framebuffer *fb = cmd->state.framebuffer;
- const struct tu_image_view *iview = fb->attachments[a].attachment;
- const struct tu_render_pass_attachment *attachment =
- &cmd->state.pass->attachments[a];
- unsigned clear_mask = 0;
-
- /* note: this means it isn't used by any subpass and shouldn't be cleared anyway */
- if (attachment->gmem_offset < 0)
- return;
-
- if (attachment->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
- clear_mask = 0xf;
- }
-
- if (vk_format_has_stencil(iview->vk_format)) {
- clear_mask &= 0x1;
- if (attachment->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR)
- clear_mask |= 0x2;
- if (clear_mask != 0x3)
- tu_finishme("depth/stencil only load op");
- }
-
- if (!clear_mask)
- return;
-
- tu_clear_sysmem_attachment(cmd, cs, a,
- &info->pClearValues[a], &(struct VkClearRect) {
- .rect = info->renderArea,
- .baseArrayLayer = iview->base_layer,
- .layerCount = iview->layer_count,
- });
-}
-
static void
tu_emit_load_clear(struct tu_cmd_buffer *cmd,
const VkRenderPassBeginInfo *info)
tu6_emit_blit_scissor(cmd, cs, true);
for (uint32_t i = 0; i < cmd->state.pass->attachment_count; ++i)
- tu6_emit_load_attachment(cmd, cs, i);
+ tu_load_gmem_attachment(cmd, cs, i);
tu6_emit_blit_scissor(cmd, cs, false);
for (uint32_t i = 0; i < cmd->state.pass->attachment_count; ++i)
- tu6_emit_clear_attachment(cmd, cs, i, info);
+ tu_clear_gmem_attachment(cmd, cs, i, info);
tu_cond_exec_end(cs);
- /* invalidate because reading input attachments will cache GMEM and
- * the cache isn''t updated when GMEM is written
- * TODO: is there a no-cache bit for textures?
- */
- if (cmd->state.subpass->input_count)
- tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE, false);
-
tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_SYSMEM);
for (uint32_t i = 0; i < cmd->state.pass->attachment_count; ++i)
- tu_emit_sysmem_clear_attachment(cmd, cs, i, info);
+ tu_clear_sysmem_attachment(cmd, cs, i, info);
tu_cond_exec_end(cs);
}
const struct tu_framebuffer *fb = cmd->state.framebuffer;
assert(fb->width > 0 && fb->height > 0);
- tu6_emit_window_scissor(cmd, cs, 0, 0, fb->width - 1, fb->height - 1);
- tu6_emit_window_offset(cmd, cs, 0, 0);
+ tu6_emit_window_scissor(cs, 0, 0, fb->width - 1, fb->height - 1);
+ tu6_emit_window_offset(cs, 0, 0);
tu6_emit_bin_size(cs, 0, 0, 0xc00000); /* 0xc00000 = BYPASS? */
/* Do any resolves of the last subpass. These are handled in the
* tile_store_ib in the gmem path.
*/
-
const struct tu_subpass *subpass = cmd->state.subpass;
if (subpass->resolve_attachments) {
for (unsigned i = 0; i < subpass->color_count; i++) {
tu_cs_emit_pkt7(cs, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
tu_cs_emit(cs, 0x0);
- tu6_emit_wfi(cmd, cs);
+ /* TODO: flushing with barriers instead of blindly always flushing */
+ tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS, true);
+ tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_DEPTH_TS, true);
+ tu6_emit_event_write(cmd, cs, PC_CCU_INVALIDATE_COLOR, false);
+ tu6_emit_event_write(cmd, cs, PC_CCU_INVALIDATE_DEPTH, false);
+
+ tu_cs_emit_wfi(cs);
tu_cs_emit_regs(cs,
A6XX_RB_CCU_CNTL(.offset = phys_dev->ccu_offset_gmem, .gmem = 1));
static void
tu_cmd_prepare_tile_store_ib(struct tu_cmd_buffer *cmd)
{
- const uint32_t tile_store_space = 32 + 23 * cmd->state.pass->attachment_count;
+ const uint32_t tile_store_space = 11 + (35 * 2) * cmd->state.pass->attachment_count;
struct tu_cs sub_cs;
VkResult result =
struct tu_tiling_config *tiling = &cmd->state.tiling_config;
tiling->render_area = *render_area;
- tiling->force_sysmem = force_sysmem(cmd, render_area);
+ tiling->force_sysmem = false;
tu_tiling_config_update_tile_layout(tiling, dev, cmd->state.pass->gmem_pixels);
tu_tiling_config_update_pipe_layout(tiling, dev);
tu6_emit_zs(cmd, cmd->state.subpass, &cmd->draw_cs);
tu6_emit_mrt(cmd, cmd->state.subpass, &cmd->draw_cs);
- tu6_emit_msaa(cmd, cmd->state.subpass, &cmd->draw_cs);
+ tu6_emit_msaa(&cmd->draw_cs, cmd->state.subpass->samples);
tu6_emit_render_cntl(cmd, cmd->state.subpass, &cmd->draw_cs, false);
/* note: use_hw_binning only checks tiling config */
struct tu_cs *cs = &cmd->draw_cs;
const struct tu_subpass *subpass = cmd->state.subpass++;
- /* TODO:
- * if msaa samples change between subpasses,
- * attachment store is broken for some attachments
- */
+
+ tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_GMEM);
+
if (subpass->resolve_attachments) {
- tu6_emit_blit_scissor(cmd, cs, true);
for (unsigned i = 0; i < subpass->color_count; i++) {
uint32_t a = subpass->resolve_attachments[i].attachment;
- if (a != VK_ATTACHMENT_UNUSED) {
- tu6_emit_resolve(cmd, cs, a,
- subpass->color_attachments[i].attachment);
- }
+ if (a == VK_ATTACHMENT_UNUSED)
+ continue;
+
+ tu_store_gmem_attachment(cmd, cs, a,
+ subpass->color_attachments[i].attachment);
+
+ if (pass->attachments[a].gmem_offset < 0)
+ continue;
+
+ /* TODO:
+ * check if the resolved attachment is needed by later subpasses,
+ * if it is, should be doing a GMEM->GMEM resolve instead of GMEM->MEM->GMEM..
+ */
+ tu_finishme("missing GMEM->GMEM resolve path\n");
+ tu_emit_load_gmem_attachment(cmd, cs, a);
}
}
- /* invalidate because reading input attachments will cache GMEM and
- * the cache isn''t updated when GMEM is written
- * TODO: is there a no-cache bit for textures?
- */
- if (cmd->state.subpass->input_count)
- tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE, false);
+ tu_cond_exec_end(cs);
- /* emit mrt/zs/msaa/ubwc state for the subpass that is starting */
- tu6_emit_zs(cmd, cmd->state.subpass, cs);
- tu6_emit_mrt(cmd, cmd->state.subpass, cs);
- tu6_emit_msaa(cmd, cmd->state.subpass, cs);
- tu6_emit_render_cntl(cmd, cmd->state.subpass, cs, false);
+ tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_SYSMEM);
- /* Emit flushes so that input attachments will read the correct value. This
- * is for sysmem only, although it shouldn't do much harm on gmem.
+ /* Emit flushes so that input attachments will read the correct value.
+ * TODO: use subpass dependencies to flush or not
*/
tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS, true);
tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_DEPTH_TS, true);
- /* TODO:
- * since we don't know how to do GMEM->GMEM resolve,
- * resolve attachments are resolved to memory then loaded to GMEM again if needed
- */
if (subpass->resolve_attachments) {
+ tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE, false);
+
for (unsigned i = 0; i < subpass->color_count; i++) {
uint32_t a = subpass->resolve_attachments[i].attachment;
- if (a != VK_ATTACHMENT_UNUSED && pass->attachments[a].gmem_offset >= 0) {
- tu_finishme("missing GMEM->GMEM resolve, performance will suffer\n");
- tu6_emit_predicated_blit(cmd, cs, a, a, false);
- }
+ if (a == VK_ATTACHMENT_UNUSED)
+ continue;
+
+ tu6_emit_sysmem_resolve(cmd, cs, a,
+ subpass->color_attachments[i].attachment);
}
+
+ tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS, true);
}
+
+ tu_cond_exec_end(cs);
+
+ /* subpass->input_count > 0 then texture cache invalidate is likely to be needed */
+ if (cmd->state.subpass->input_count)
+ tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE, false);
+
+ /* emit mrt/zs/msaa/ubwc state for the subpass that is starting */
+ tu6_emit_zs(cmd, cmd->state.subpass, cs);
+ tu6_emit_mrt(cmd, cmd->state.subpass, cs);
+ tu6_emit_msaa(cs, cmd->state.subpass->samples);
+ tu6_emit_render_cntl(cmd, cmd->state.subpass, cs, false);
}
void
};
static void
-tu_barrier(struct tu_cmd_buffer *cmd_buffer,
+tu_barrier(struct tu_cmd_buffer *cmd,
uint32_t memoryBarrierCount,
const VkMemoryBarrier *pMemoryBarriers,
uint32_t bufferMemoryBarrierCount,
const VkImageMemoryBarrier *pImageMemoryBarriers,
const struct tu_barrier_info *info)
{
+ /* renderpass case is only for subpass self-dependencies
+ * which means syncing the render output with texture cache
+ * note: only the CACHE_INVALIDATE is needed in GMEM mode
+ * and in sysmem mode we might not need either color/depth flush
+ */
+ if (cmd->state.pass) {
+ tu6_emit_event_write(cmd, &cmd->draw_cs, PC_CCU_FLUSH_COLOR_TS, true);
+ tu6_emit_event_write(cmd, &cmd->draw_cs, PC_CCU_FLUSH_DEPTH_TS, true);
+ tu6_emit_event_write(cmd, &cmd->draw_cs, CACHE_INVALIDATE, false);
+ return;
+ }
}
void
tu_CmdPipelineBarrier(VkCommandBuffer commandBuffer,
VkPipelineStageFlags srcStageMask,
- VkPipelineStageFlags destStageMask,
- VkBool32 byRegion,
+ VkPipelineStageFlags dstStageMask,
+ VkDependencyFlags dependencyFlags,
uint32_t memoryBarrierCount,
const VkMemoryBarrier *pMemoryBarriers,
uint32_t bufferMemoryBarrierCount,
#include "registers/adreno_common.xml.h"
#include "registers/a6xx.xml.h"
-#include "util/format_r11g11b10f.h"
-#include "util/format_rgb9e5.h"
-#include "util/format_srgb.h"
-#include "util/u_half.h"
#include "vk_format.h"
#include "vk_util.h"
#include "drm-uapi/drm_fourcc.h"
TU6_xTx(E5B9G9R9_UFLOAT_PACK32, 9_9_9_E5_FLOAT, WZYX), /* 123 */
/* depth/stencil */
- TU6_xTC(D16_UNORM, 16_UNORM, WZYX), /* 124 */
- TU6_xTC(X8_D24_UNORM_PACK32, Z24_UNORM_S8_UINT, WZYX), /* 125 */
- TU6_xTC(D32_SFLOAT, 32_FLOAT, WZYX), /* 126 */
- TU6_xTC(S8_UINT, 8_UINT, WZYX), /* 127 */
- TU6_xxx(D16_UNORM_S8_UINT, X8Z16_UNORM, WZYX), /* 128 */
- TU6_xTC(D24_UNORM_S8_UINT, Z24_UNORM_S8_UINT, WZYX), /* 129 */
- TU6_xxx(D32_SFLOAT_S8_UINT, x, WZYX), /* 130 */
+ TU6_xTC(D16_UNORM, 16_UNORM, WZYX), /* 124 */
+ TU6_xTC(X8_D24_UNORM_PACK32, Z24_UNORM_S8_UINT_AS_R8G8B8A8, WZYX), /* 125 */
+ TU6_xTC(D32_SFLOAT, 32_FLOAT, WZYX), /* 126 */
+ TU6_xTC(S8_UINT, 8_UINT, WZYX), /* 127 */
+ TU6_xxx(D16_UNORM_S8_UINT, X8Z16_UNORM, WZYX), /* 128 */
+ TU6_xTC(D24_UNORM_S8_UINT, Z24_UNORM_S8_UINT_AS_R8G8B8A8, WZYX), /* 129 */
+ TU6_xxx(D32_SFLOAT_S8_UINT, x, WZYX), /* 130 */
/* compressed */
TU6_xTx(BC1_RGB_UNORM_BLOCK, DXT1, WZYX), /* 131 */
return fmt;
}
-enum a6xx_2d_ifmt
-tu6_fmt_to_ifmt(enum a6xx_format fmt)
-{
- switch (fmt) {
- case FMT6_A8_UNORM:
- case FMT6_8_UNORM:
- case FMT6_8_SNORM:
- case FMT6_8_8_UNORM:
- case FMT6_8_8_SNORM:
- case FMT6_8_8_8_8_UNORM:
- case FMT6_8_8_8_X8_UNORM:
- case FMT6_8_8_8_8_SNORM:
- case FMT6_4_4_4_4_UNORM:
- case FMT6_5_5_5_1_UNORM:
- case FMT6_5_6_5_UNORM:
- case FMT6_Z24_UNORM_S8_UINT:
- case FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8:
- return R2D_UNORM8;
-
- case FMT6_32_UINT:
- case FMT6_32_SINT:
- case FMT6_32_32_UINT:
- case FMT6_32_32_SINT:
- case FMT6_32_32_32_32_UINT:
- case FMT6_32_32_32_32_SINT:
- return R2D_INT32;
-
- case FMT6_16_UINT:
- case FMT6_16_SINT:
- case FMT6_16_16_UINT:
- case FMT6_16_16_SINT:
- case FMT6_16_16_16_16_UINT:
- case FMT6_16_16_16_16_SINT:
- case FMT6_10_10_10_2_UINT:
- return R2D_INT16;
-
- case FMT6_8_UINT:
- case FMT6_8_SINT:
- case FMT6_8_8_UINT:
- case FMT6_8_8_SINT:
- case FMT6_8_8_8_8_UINT:
- case FMT6_8_8_8_8_SINT:
- return R2D_INT8;
-
- case FMT6_16_UNORM:
- case FMT6_16_SNORM:
- case FMT6_16_16_UNORM:
- case FMT6_16_16_SNORM:
- case FMT6_16_16_16_16_UNORM:
- case FMT6_16_16_16_16_SNORM:
- case FMT6_32_FLOAT:
- case FMT6_32_32_FLOAT:
- case FMT6_32_32_32_32_FLOAT:
- return R2D_FLOAT32;
-
- case FMT6_16_FLOAT:
- case FMT6_16_16_FLOAT:
- case FMT6_16_16_16_16_FLOAT:
- case FMT6_11_11_10_FLOAT:
- case FMT6_10_10_10_2_UNORM:
- case FMT6_10_10_10_2_UNORM_DEST:
- return R2D_FLOAT16;
-
- default:
- unreachable("bad format");
- return 0;
- }
-}
-
enum a6xx_depth_format
tu6_pipe2depth(VkFormat format)
{
}
}
-static uint32_t
-tu_pack_mask(int bits)
-{
- assert(bits <= 32);
- return (1ull << bits) - 1;
-}
-
-static uint32_t
-tu_pack_float32_for_unorm(float val, int bits)
-{
- const uint32_t max = tu_pack_mask(bits);
- if (val < 0.0f)
- return 0;
- else if (val > 1.0f)
- return max;
- else
- return _mesa_lroundevenf(val * (float) max);
-}
-
-static uint32_t
-tu_pack_float32_for_snorm(float val, int bits)
-{
- const int32_t max = tu_pack_mask(bits - 1);
- int32_t tmp;
- if (val < -1.0f)
- tmp = -max;
- else if (val > 1.0f)
- tmp = max;
- else
- tmp = _mesa_lroundevenf(val * (float) max);
-
- return tmp & tu_pack_mask(bits);
-}
-
-static uint32_t
-tu_pack_float32_for_uscaled(float val, int bits)
-{
- const uint32_t max = tu_pack_mask(bits);
- if (val < 0.0f)
- return 0;
- else if (val > (float) max)
- return max;
- else
- return (uint32_t) val;
-}
-
-static uint32_t
-tu_pack_float32_for_sscaled(float val, int bits)
-{
- const int32_t max = tu_pack_mask(bits - 1);
- const int32_t min = -max - 1;
- int32_t tmp;
- if (val < (float) min)
- tmp = min;
- else if (val > (float) max)
- tmp = max;
- else
- tmp = (int32_t) val;
-
- return tmp & tu_pack_mask(bits);
-}
-
-static uint32_t
-tu_pack_uint32_for_uint(uint32_t val, int bits)
-{
- return val & tu_pack_mask(bits);
-}
-
-static uint32_t
-tu_pack_int32_for_sint(int32_t val, int bits)
-{
- return val & tu_pack_mask(bits);
-}
-
-static uint32_t
-tu_pack_float32_for_sfloat(float val, int bits)
-{
- assert(bits == 16 || bits == 32);
- return bits == 16 ? util_float_to_half(val) : fui(val);
-}
-
-union tu_clear_component_value {
- float float32;
- int32_t int32;
- uint32_t uint32;
-};
-
-static uint32_t
-tu_pack_clear_component_value(union tu_clear_component_value val,
- const struct util_format_channel_description *ch)
-{
- uint32_t packed;
-
- switch (ch->type) {
- case UTIL_FORMAT_TYPE_UNSIGNED:
- /* normalized, scaled, or pure integer */
- if (ch->normalized)
- packed = tu_pack_float32_for_unorm(val.float32, ch->size);
- else if (ch->pure_integer)
- packed = tu_pack_uint32_for_uint(val.uint32, ch->size);
- else
- packed = tu_pack_float32_for_uscaled(val.float32, ch->size);
- break;
- case UTIL_FORMAT_TYPE_SIGNED:
- /* normalized, scaled, or pure integer */
- if (ch->normalized)
- packed = tu_pack_float32_for_snorm(val.float32, ch->size);
- else if (ch->pure_integer)
- packed = tu_pack_int32_for_sint(val.int32, ch->size);
- else
- packed = tu_pack_float32_for_sscaled(val.float32, ch->size);
- break;
- case UTIL_FORMAT_TYPE_FLOAT:
- packed = tu_pack_float32_for_sfloat(val.float32, ch->size);
- break;
- default:
- unreachable("unexpected channel type");
- packed = 0;
- break;
- }
-
- assert((packed & tu_pack_mask(ch->size)) == packed);
- return packed;
-}
-
-static const struct util_format_channel_description *
-tu_get_format_channel_description(const struct util_format_description *desc,
- int comp)
-{
- switch (desc->swizzle[comp]) {
- case PIPE_SWIZZLE_X:
- return &desc->channel[0];
- case PIPE_SWIZZLE_Y:
- return &desc->channel[1];
- case PIPE_SWIZZLE_Z:
- return &desc->channel[2];
- case PIPE_SWIZZLE_W:
- return &desc->channel[3];
- default:
- return NULL;
- }
-}
-
-static union tu_clear_component_value
-tu_get_clear_component_value(const VkClearValue *val, int comp,
- enum util_format_colorspace colorspace)
-{
- assert(comp < 4);
-
- union tu_clear_component_value tmp;
- switch (colorspace) {
- case UTIL_FORMAT_COLORSPACE_ZS:
- assert(comp < 2);
- if (comp == 0)
- tmp.float32 = val->depthStencil.depth;
- else
- tmp.uint32 = val->depthStencil.stencil;
- break;
- case UTIL_FORMAT_COLORSPACE_SRGB:
- if (comp < 3) {
- tmp.float32 = util_format_linear_to_srgb_float(val->color.float32[comp]);
- break;
- }
- default:
- assert(comp < 4);
- tmp.uint32 = val->color.uint32[comp];
- break;
- }
-
- return tmp;
-}
-
-/**
- * Pack a VkClearValue into a 128-bit buffer. \a format is respected except
- * for the component order. The components are always packed in WZYX order
- * (i.e., msb is white and lsb is red).
- *
- * Return the number of uint32_t's used.
- */
-void
-tu_pack_clear_value(const VkClearValue *val, VkFormat format, uint32_t buf[4])
-{
- const struct util_format_description *desc = vk_format_description(format);
-
- switch (format) {
- case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
- buf[0] = float3_to_r11g11b10f(val->color.float32);
- return;
- case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
- buf[0] = float3_to_rgb9e5(val->color.float32);
- return;
- default:
- break;
- }
-
- assert(desc && desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
-
- /* S8_UINT is special and has no depth */
- const int max_components =
- format == VK_FORMAT_S8_UINT ? 2 : desc->nr_channels;
-
- int buf_offset = 0;
- int bit_shift = 0;
- for (int comp = 0; comp < max_components; comp++) {
- const struct util_format_channel_description *ch =
- tu_get_format_channel_description(desc, comp);
- if (!ch) {
- assert((format == VK_FORMAT_S8_UINT && comp == 0) ||
- (format == VK_FORMAT_X8_D24_UNORM_PACK32 && comp == 1));
- continue;
- }
-
- union tu_clear_component_value v = tu_get_clear_component_value(
- val, comp, desc->colorspace);
-
- /* move to the next uint32_t when there is not enough space */
- assert(ch->size <= 32);
- if (bit_shift + ch->size > 32) {
- buf_offset++;
- bit_shift = 0;
- }
-
- if (bit_shift == 0)
- buf[buf_offset] = 0;
-
- buf[buf_offset] |= tu_pack_clear_component_value(v, ch) << bit_shift;
- bit_shift += ch->size;
- }
-}
-
-void
-tu_2d_clear_color(const VkClearColorValue *val, VkFormat format, uint32_t buf[4])
-{
- const struct util_format_description *desc = vk_format_description(format);
-
- /* not supported by 2D engine, cleared as U32 */
- if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) {
- buf[0] = float3_to_rgb9e5(val->float32);
- return;
- }
-
- enum a6xx_2d_ifmt ifmt = tu6_fmt_to_ifmt(tu6_get_native_format(format).fmt);
-
- assert(desc && (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN ||
- format == VK_FORMAT_B10G11R11_UFLOAT_PACK32));
-
- for (unsigned i = 0; i < desc->nr_channels; i++) {
- const struct util_format_channel_description *ch = &desc->channel[i];
-
- switch (ifmt) {
- case R2D_INT32:
- case R2D_INT16:
- case R2D_INT8:
- case R2D_FLOAT32:
- buf[i] = val->uint32[i];
- break;
- case R2D_FLOAT16:
- buf[i] = util_float_to_half(val->float32[i]);
- break;
- case R2D_UNORM8: {
- float linear = val->float32[i];
- if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB && i < 3)
- linear = util_format_linear_to_srgb_float(val->float32[i]);
-
- if (ch->type == UTIL_FORMAT_TYPE_SIGNED)
- buf[i] = tu_pack_float32_for_snorm(linear, 8);
- else
- buf[i] = tu_pack_float32_for_unorm(linear, 8);
- } break;
- default:
- unreachable("unexpected ifmt");
- break;
- }
- }
-}
-
-void
-tu_2d_clear_zs(const VkClearDepthStencilValue *val, VkFormat format, uint32_t buf[4])
-{
- switch (format) {
- case VK_FORMAT_X8_D24_UNORM_PACK32:
- case VK_FORMAT_D24_UNORM_S8_UINT:
- buf[0] = tu_pack_float32_for_unorm(val->depth, 24);
- buf[1] = buf[0] >> 8;
- buf[2] = buf[0] >> 16;
- buf[3] = val->stencil;
- return;
- case VK_FORMAT_D16_UNORM:
- case VK_FORMAT_D32_SFLOAT:
- buf[0] = fui(val->depth);
- return;
- case VK_FORMAT_S8_UINT:
- buf[0] = val->stencil;
- return;
- default:
- unreachable("unexpected zs format");
- break;
- }
-}
-
static void
tu_physical_device_get_format_properties(
struct tu_physical_device *physical_device,
ubwc_enabled = false;
}
- /* using UBWC with D24S8 breaks the "stencil read" copy path (why?)
- * (causes any deqp tests that need to check stencil to fail)
- * disable UBWC for this format until we properly support copy aspect masks
- */
- if (image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT)
- ubwc_enabled = false;
-
/* UBWC can't be used with E5B9G9R9 */
if (image->vk_format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
ubwc_enabled = false;
return VK_SUCCESS;
}
-static enum a6xx_tex_fetchsize
+enum a6xx_tex_fetchsize
tu6_fetchsize(VkFormat format)
{
if (vk_format_description(format)->layout == UTIL_FORMAT_LAYOUT_ASTC)
memset(iview->descriptor, 0, sizeof(iview->descriptor));
struct tu_native_format fmt =
- tu6_format_texture(iview->vk_format, image->layout.tile_mode);
+ tu6_format_image_src(image, iview->vk_format, iview->base_mip);
uint64_t base_addr = tu_image_base(image, iview->base_mip, iview->base_layer);
uint64_t ubwc_addr = tu_image_ubwc_base(image, iview->base_mip, iview->base_layer);
- uint32_t pitch = tu_image_stride(image, iview->base_mip) / vk_format_get_blockwidth(iview->vk_format);
- enum a6xx_tile_mode tile_mode = tu6_get_image_tile_mode(image, iview->base_mip);
+ uint32_t pitch = tu_image_pitch(image, iview->base_mip);
uint32_t width = iview->extent.width;
uint32_t height = iview->extent.height;
uint32_t depth = pCreateInfo->viewType == VK_IMAGE_VIEW_TYPE_3D ?
iview->extent.depth : iview->layer_count;
unsigned fmt_tex = fmt.fmt;
- if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT &&
- iview->vk_format == VK_FORMAT_D24_UNORM_S8_UINT)
- fmt_tex = FMT6_S8Z24_UINT;
+ if (fmt_tex == FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8) {
+ if (iview->aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT)
+ fmt_tex = FMT6_Z24_UNORM_S8_UINT;
+ if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
+ fmt_tex = FMT6_S8Z24_UINT;
+ /* TODO: also use this format with storage descriptor ? */
+ }
iview->descriptor[0] =
- A6XX_TEX_CONST_0_TILE_MODE(tile_mode) |
+ A6XX_TEX_CONST_0_TILE_MODE(fmt.tile_mode) |
COND(vk_format_is_srgb(iview->vk_format), A6XX_TEX_CONST_0_SRGB) |
A6XX_TEX_CONST_0_FMT(fmt_tex) |
A6XX_TEX_CONST_0_SAMPLES(tu_msaa_samples(image->samples)) |
iview->storage_descriptor[0] =
A6XX_IBO_0_FMT(fmt.fmt) |
- A6XX_IBO_0_TILE_MODE(tile_mode);
+ A6XX_IBO_0_TILE_MODE(fmt.tile_mode);
iview->storage_descriptor[1] =
A6XX_IBO_1_WIDTH(width) |
A6XX_IBO_1_HEIGHT(height);
+++ /dev/null
-/*
- * Copyright © 2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#include "tu_private.h"
-
-#include "tu_blit.h"
-
-static void
-tu_blit_image(struct tu_cmd_buffer *cmdbuf,
- struct tu_image *src_image,
- struct tu_image *dst_image,
- const VkImageBlit *info,
- VkFilter filter)
-{
- static const enum a6xx_rotation rotate[2][2] = {
- {ROTATE_0, ROTATE_HFLIP},
- {ROTATE_VFLIP, ROTATE_180},
- };
- bool mirror_x = (info->srcOffsets[1].x < info->srcOffsets[0].x) !=
- (info->dstOffsets[1].x < info->dstOffsets[0].x);
- bool mirror_y = (info->srcOffsets[1].y < info->srcOffsets[0].y) !=
- (info->dstOffsets[1].y < info->dstOffsets[0].y);
- bool mirror_z = (info->srcOffsets[1].z < info->srcOffsets[0].z) !=
- (info->dstOffsets[1].z < info->dstOffsets[0].z);
-
- if (mirror_z) {
- tu_finishme("blit z mirror\n");
- return;
- }
-
- if (info->srcOffsets[1].z - info->srcOffsets[0].z !=
- info->dstOffsets[1].z - info->dstOffsets[0].z) {
- tu_finishme("blit z filter\n");
- return;
- }
- assert(info->dstSubresource.layerCount == info->srcSubresource.layerCount);
-
- struct tu_blit blt = {
- .dst = tu_blit_surf(dst_image, info->dstSubresource, info->dstOffsets),
- .src = tu_blit_surf(src_image, info->srcSubresource, info->srcOffsets),
- .layers = MAX2(info->srcOffsets[1].z - info->srcOffsets[0].z,
- info->dstSubresource.layerCount),
- .filter = filter == VK_FILTER_LINEAR,
- .rotation = rotate[mirror_y][mirror_x],
- };
-
- tu_blit(cmdbuf, &cmdbuf->cs, &blt);
-}
-
-void
-tu_CmdBlitImage(VkCommandBuffer commandBuffer,
- VkImage srcImage,
- VkImageLayout srcImageLayout,
- VkImage destImage,
- VkImageLayout destImageLayout,
- uint32_t regionCount,
- const VkImageBlit *pRegions,
- VkFilter filter)
-
-{
- TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
- TU_FROM_HANDLE(tu_image, src_image, srcImage);
- TU_FROM_HANDLE(tu_image, dst_image, destImage);
-
- tu_bo_list_add(&cmdbuf->bo_list, src_image->bo, MSM_SUBMIT_BO_READ);
- tu_bo_list_add(&cmdbuf->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE);
-
- for (uint32_t i = 0; i < regionCount; ++i) {
- tu_blit_image(cmdbuf, src_image, dst_image, pRegions + i, filter);
- }
-}
+++ /dev/null
-#include "tu_private.h"
-#include "tu_blit.h"
-#include "tu_cs.h"
-
-void
-tu_CmdFillBuffer(VkCommandBuffer commandBuffer,
- VkBuffer dstBuffer,
- VkDeviceSize dstOffset,
- VkDeviceSize fillSize,
- uint32_t data)
-{
- TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
- TU_FROM_HANDLE(tu_buffer, buffer, dstBuffer);
-
- if (fillSize == VK_WHOLE_SIZE)
- fillSize = buffer->size - dstOffset;
-
- tu_bo_list_add(&cmd->bo_list, buffer->bo, MSM_SUBMIT_BO_WRITE);
-
- tu_blit(cmd, &cmd->cs, &(struct tu_blit) {
- .dst = {
- .fmt = VK_FORMAT_R32_UINT,
- .va = tu_buffer_iova(buffer) + dstOffset,
- .width = fillSize / 4,
- .height = 1,
- .samples = 1,
- },
- .layers = 1,
- .clear_value[0] = data,
- .type = TU_BLIT_CLEAR,
- .buffer = true,
- });
-}
-
-void
-tu_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
- VkBuffer dstBuffer,
- VkDeviceSize dstOffset,
- VkDeviceSize dataSize,
- const void *pData)
-{
- TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
- TU_FROM_HANDLE(tu_buffer, buffer, dstBuffer);
-
- tu_bo_list_add(&cmd->bo_list, buffer->bo, MSM_SUBMIT_BO_WRITE);
-
- struct ts_cs_memory tmp;
- VkResult result = tu_cs_alloc(&cmd->sub_cs, DIV_ROUND_UP(dataSize, 64), 64, &tmp);
- if (result != VK_SUCCESS) {
- cmd->record_result = result;
- return;
- }
-
- memcpy(tmp.map, pData, dataSize);
-
- tu_blit(cmd, &cmd->cs, &(struct tu_blit) {
- .dst = {
- .fmt = VK_FORMAT_R32_UINT,
- .va = tu_buffer_iova(buffer) + dstOffset,
- .width = dataSize / 4,
- .height = 1,
- .samples = 1,
- },
- .src = {
- .fmt = VK_FORMAT_R32_UINT,
- .va = tmp.iova,
- .width = dataSize / 4,
- .height = 1,
- .samples = 1,
- },
- .layers = 1,
- .type = TU_BLIT_COPY,
- .buffer = true,
- });
-}
+++ /dev/null
-/*
- * Copyright © 2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#include "tu_private.h"
-#include "tu_blit.h"
-#include "tu_cs.h"
-
-static void
-clear_image(struct tu_cmd_buffer *cmdbuf,
- struct tu_image *image,
- uint32_t clear_value[4],
- const VkImageSubresourceRange *range)
-{
- uint32_t level_count = tu_get_levelCount(image, range);
- uint32_t layer_count = tu_get_layerCount(image, range);
-
- if (image->type == VK_IMAGE_TYPE_3D) {
- assert(layer_count == 1);
- assert(range->baseArrayLayer == 0);
- }
-
- for (unsigned j = 0; j < level_count; j++) {
- if (image->type == VK_IMAGE_TYPE_3D)
- layer_count = u_minify(image->extent.depth, range->baseMipLevel + j);
-
- tu_blit(cmdbuf, &cmdbuf->cs, &(struct tu_blit) {
- .dst = tu_blit_surf_whole(image, range->baseMipLevel + j, range->baseArrayLayer),
- .layers = layer_count,
- .clear_value = {clear_value[0], clear_value[1], clear_value[2], clear_value[3]},
- .type = TU_BLIT_CLEAR,
- });
- }
-}
-
-void
-tu_CmdClearColorImage(VkCommandBuffer commandBuffer,
- VkImage image_h,
- VkImageLayout imageLayout,
- const VkClearColorValue *pColor,
- uint32_t rangeCount,
- const VkImageSubresourceRange *pRanges)
-{
- TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
- TU_FROM_HANDLE(tu_image, image, image_h);
- uint32_t clear_value[4] = {};
-
- tu_2d_clear_color(pColor, image->vk_format, clear_value);
-
- tu_bo_list_add(&cmdbuf->bo_list, image->bo, MSM_SUBMIT_BO_WRITE);
-
- for (unsigned i = 0; i < rangeCount; i++)
- clear_image(cmdbuf, image, clear_value, pRanges + i);
-}
-
-void
-tu_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,
- VkImage image_h,
- VkImageLayout imageLayout,
- const VkClearDepthStencilValue *pDepthStencil,
- uint32_t rangeCount,
- const VkImageSubresourceRange *pRanges)
-{
- TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
- TU_FROM_HANDLE(tu_image, image, image_h);
- uint32_t clear_value[4] = {};
-
- tu_2d_clear_zs(pDepthStencil, image->vk_format, clear_value);
-
- tu_bo_list_add(&cmdbuf->bo_list, image->bo, MSM_SUBMIT_BO_WRITE);
-
- for (unsigned i = 0; i < rangeCount; i++)
- clear_image(cmdbuf, image, clear_value, pRanges + i);
-}
-
-void
-tu_clear_sysmem_attachment(struct tu_cmd_buffer *cmd,
- struct tu_cs *cs,
- uint32_t attachment,
- const VkClearValue *value,
- const VkClearRect *rect)
-{
- if (!cmd->state.framebuffer) {
- tu_finishme("sysmem CmdClearAttachments in secondary command buffer");
- return;
- }
-
- const struct tu_image_view *iview =
- cmd->state.framebuffer->attachments[attachment].attachment;
-
- uint32_t clear_vals[4] = { 0 };
- if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT |
- VK_IMAGE_ASPECT_STENCIL_BIT)) {
- tu_2d_clear_zs(&value->depthStencil, iview->vk_format,
- clear_vals);
- } else {
- tu_2d_clear_color(&value->color, iview->vk_format,
- clear_vals);
- }
-
- tu_blit(cmd, cs, &(struct tu_blit) {
- .dst = sysmem_attachment_surf(iview, rect->baseArrayLayer, &rect->rect),
- .layers = rect->layerCount,
- .clear_value = { clear_vals[0], clear_vals[1], clear_vals[2], clear_vals[3] },
- .type = TU_BLIT_CLEAR,
- });
-}
-
-void
-tu_clear_gmem_attachment(struct tu_cmd_buffer *cmd,
- struct tu_cs *cs,
- uint32_t attachment,
- uint8_t component_mask,
- const VkClearValue *value)
-{
- VkFormat fmt = cmd->state.pass->attachments[attachment].format;
-
- tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 1);
- tu_cs_emit(cs, A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(tu6_base_format(fmt)));
-
- tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_INFO, 1);
- tu_cs_emit(cs, A6XX_RB_BLIT_INFO_GMEM | A6XX_RB_BLIT_INFO_CLEAR_MASK(component_mask));
-
- tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
- tu_cs_emit(cs, cmd->state.pass->attachments[attachment].gmem_offset);
-
- tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_88D0, 1);
- tu_cs_emit(cs, 0);
-
- uint32_t clear_vals[4] = { 0 };
- tu_pack_clear_value(value, fmt, clear_vals);
-
- tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4);
- tu_cs_emit(cs, clear_vals[0]);
- tu_cs_emit(cs, clear_vals[1]);
- tu_cs_emit(cs, clear_vals[2]);
- tu_cs_emit(cs, clear_vals[3]);
-
- tu6_emit_event_write(cmd, cs, BLIT, false);
-}
-
-void
-tu_CmdClearAttachments(VkCommandBuffer commandBuffer,
- uint32_t attachmentCount,
- const VkClearAttachment *pAttachments,
- uint32_t rectCount,
- const VkClearRect *pRects)
-{
- TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
- const struct tu_subpass *subpass = cmd->state.subpass;
- struct tu_cs *cs = &cmd->draw_cs;
-
- tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_GMEM);
-
- for (unsigned i = 0; i < rectCount; i++) {
- unsigned x1 = pRects[i].rect.offset.x;
- unsigned y1 = pRects[i].rect.offset.y;
- unsigned x2 = x1 + pRects[i].rect.extent.width - 1;
- unsigned y2 = y1 + pRects[i].rect.extent.height - 1;
-
- tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_SCISSOR_TL, 2);
- tu_cs_emit(cs, A6XX_RB_BLIT_SCISSOR_TL_X(x1) | A6XX_RB_BLIT_SCISSOR_TL_Y(y1));
- tu_cs_emit(cs, A6XX_RB_BLIT_SCISSOR_BR_X(x2) | A6XX_RB_BLIT_SCISSOR_BR_Y(y2));
-
- for (unsigned j = 0; j < attachmentCount; j++) {
- uint32_t a;
- unsigned clear_mask = 0;
- if (pAttachments[j].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
- clear_mask = 0xf;
- a = subpass->color_attachments[pAttachments[j].colorAttachment].attachment;
- } else {
- a = subpass->depth_stencil_attachment.attachment;
- if (pAttachments[j].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
- clear_mask |= 1;
- if (pAttachments[j].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)
- clear_mask |= 2;
- }
-
- if (a == VK_ATTACHMENT_UNUSED)
- continue;
-
- tu_clear_gmem_attachment(cmd, cs, a, clear_mask,
- &pAttachments[j].clearValue);
-
- }
- }
-
- tu_cond_exec_end(cs);
-
- tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_SYSMEM);
-
- for (unsigned i = 0; i < rectCount; i++) {
- for (unsigned j = 0; j < attachmentCount; j++) {
- uint32_t a;
- unsigned clear_mask = 0;
- if (pAttachments[j].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
- clear_mask = 0xf;
- a = subpass->color_attachments[pAttachments[j].colorAttachment].attachment;
- } else {
- a = subpass->depth_stencil_attachment.attachment;
- if (pAttachments[j].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
- clear_mask |= 1;
- if (pAttachments[j].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)
- clear_mask |= 2;
- if (clear_mask != 3)
- tu_finishme("sysmem depth/stencil only clears");
- }
-
- if (a == VK_ATTACHMENT_UNUSED)
- continue;
-
- tu_clear_sysmem_attachment(cmd, cs, a,
- &pAttachments[j].clearValue,
- &pRects[i]);
- }
- }
-
- tu_cond_exec_end(cs);
-}
+++ /dev/null
-/*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#include "tu_private.h"
-
-#include "a6xx.xml.h"
-#include "adreno_common.xml.h"
-#include "adreno_pm4.xml.h"
-
-#include "vk_format.h"
-
-#include "tu_cs.h"
-#include "tu_blit.h"
-
-static void
-tu_copy_buffer(struct tu_cmd_buffer *cmd,
- struct tu_buffer *src,
- struct tu_buffer *dst,
- const VkBufferCopy *region)
-{
- tu_bo_list_add(&cmd->bo_list, src->bo, MSM_SUBMIT_BO_READ);
- tu_bo_list_add(&cmd->bo_list, dst->bo, MSM_SUBMIT_BO_WRITE);
-
- tu_blit(cmd, &cmd->cs, &(struct tu_blit) {
- .dst = {
- .fmt = VK_FORMAT_R8_UNORM,
- .va = tu_buffer_iova(dst) + region->dstOffset,
- .width = region->size,
- .height = 1,
- .samples = 1,
- },
- .src = {
- .fmt = VK_FORMAT_R8_UNORM,
- .va = tu_buffer_iova(src) + region->srcOffset,
- .width = region->size,
- .height = 1,
- .samples = 1,
- },
- .layers = 1,
- .type = TU_BLIT_COPY,
- .buffer = true,
- });
-}
-
-static struct tu_blit_surf
-tu_blit_buffer(struct tu_buffer *buffer,
- VkFormat format,
- const VkBufferImageCopy *info)
-{
- if (info->imageSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT)
- format = VK_FORMAT_R8_UNORM;
-
- unsigned pitch = (info->bufferRowLength ?: info->imageExtent.width) *
- vk_format_get_blocksize(format);
-
- return (struct tu_blit_surf) {
- .fmt = format,
- .tile_mode = TILE6_LINEAR,
- .va = tu_buffer_iova(buffer) + info->bufferOffset,
- .pitch = pitch,
- .layer_size = (info->bufferImageHeight ?: info->imageExtent.height) * pitch / vk_format_get_blockwidth(format) / vk_format_get_blockheight(format),
- .width = info->imageExtent.width,
- .height = info->imageExtent.height,
- .samples = 1,
- };
-}
-
-static void
-tu_copy_buffer_to_image(struct tu_cmd_buffer *cmdbuf,
- struct tu_buffer *src_buffer,
- struct tu_image *dst_image,
- const VkBufferImageCopy *info)
-{
- if (info->imageSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT &&
- vk_format_get_blocksize(dst_image->vk_format) == 4) {
- tu_finishme("aspect mask\n");
- return;
- }
-
- tu_blit(cmdbuf, &cmdbuf->cs, &(struct tu_blit) {
- .dst = tu_blit_surf_ext(dst_image, info->imageSubresource, info->imageOffset, info->imageExtent),
- .src = tu_blit_buffer(src_buffer, dst_image->vk_format, info),
- .layers = MAX2(info->imageExtent.depth, info->imageSubresource.layerCount),
- .type = TU_BLIT_COPY,
- });
-}
-
-static void
-tu_copy_image_to_buffer(struct tu_cmd_buffer *cmdbuf,
- struct tu_image *src_image,
- struct tu_buffer *dst_buffer,
- const VkBufferImageCopy *info)
-{
- tu_blit(cmdbuf, &cmdbuf->cs, &(struct tu_blit) {
- .dst = tu_blit_buffer(dst_buffer, src_image->vk_format, info),
- .src = tu_blit_surf_ext(src_image, info->imageSubresource, info->imageOffset, info->imageExtent),
- .layers = MAX2(info->imageExtent.depth, info->imageSubresource.layerCount),
- .type = TU_BLIT_COPY,
- });
-}
-
-static void
-tu_copy_image_to_image(struct tu_cmd_buffer *cmdbuf,
- struct tu_image *src_image,
- struct tu_image *dst_image,
- const VkImageCopy *info)
-{
- if ((info->dstSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT &&
- vk_format_get_blocksize(dst_image->vk_format) == 4) ||
- (info->srcSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT &&
- vk_format_get_blocksize(src_image->vk_format) == 4)) {
- tu_finishme("aspect mask\n");
- return;
- }
-
- tu_blit(cmdbuf, &cmdbuf->cs, &(struct tu_blit) {
- .dst = tu_blit_surf_ext(dst_image, info->dstSubresource, info->dstOffset, info->extent),
- .src = tu_blit_surf_ext(src_image, info->srcSubresource, info->srcOffset, info->extent),
- .layers = info->extent.depth,
- .type = TU_BLIT_COPY,
- });
-}
-
-void
-tu_CmdCopyBuffer(VkCommandBuffer commandBuffer,
- VkBuffer srcBuffer,
- VkBuffer destBuffer,
- uint32_t regionCount,
- const VkBufferCopy *pRegions)
-{
- TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
- TU_FROM_HANDLE(tu_buffer, src_buffer, srcBuffer);
- TU_FROM_HANDLE(tu_buffer, dst_buffer, destBuffer);
-
- for (unsigned i = 0; i < regionCount; ++i)
- tu_copy_buffer(cmdbuf, src_buffer, dst_buffer, &pRegions[i]);
-}
-
-void
-tu_CmdCopyBufferToImage(VkCommandBuffer commandBuffer,
- VkBuffer srcBuffer,
- VkImage destImage,
- VkImageLayout destImageLayout,
- uint32_t regionCount,
- const VkBufferImageCopy *pRegions)
-{
- TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
- TU_FROM_HANDLE(tu_image, dst_image, destImage);
- TU_FROM_HANDLE(tu_buffer, src_buffer, srcBuffer);
-
- tu_bo_list_add(&cmdbuf->bo_list, src_buffer->bo, MSM_SUBMIT_BO_READ);
- tu_bo_list_add(&cmdbuf->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE);
-
- for (unsigned i = 0; i < regionCount; ++i)
- tu_copy_buffer_to_image(cmdbuf, src_buffer, dst_image, pRegions + i);
-}
-
-void
-tu_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer,
- VkImage srcImage,
- VkImageLayout srcImageLayout,
- VkBuffer destBuffer,
- uint32_t regionCount,
- const VkBufferImageCopy *pRegions)
-{
- TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
- TU_FROM_HANDLE(tu_image, src_image, srcImage);
- TU_FROM_HANDLE(tu_buffer, dst_buffer, destBuffer);
-
- tu_bo_list_add(&cmdbuf->bo_list, src_image->bo, MSM_SUBMIT_BO_READ);
- tu_bo_list_add(&cmdbuf->bo_list, dst_buffer->bo, MSM_SUBMIT_BO_WRITE);
-
- for (unsigned i = 0; i < regionCount; ++i)
- tu_copy_image_to_buffer(cmdbuf, src_image, dst_buffer, pRegions + i);
-}
-
-void
-tu_CmdCopyImage(VkCommandBuffer commandBuffer,
- VkImage srcImage,
- VkImageLayout srcImageLayout,
- VkImage destImage,
- VkImageLayout destImageLayout,
- uint32_t regionCount,
- const VkImageCopy *pRegions)
-{
- TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
- TU_FROM_HANDLE(tu_image, src_image, srcImage);
- TU_FROM_HANDLE(tu_image, dst_image, destImage);
-
- tu_bo_list_add(&cmdbuf->bo_list, src_image->bo, MSM_SUBMIT_BO_READ);
- tu_bo_list_add(&cmdbuf->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE);
-
- for (uint32_t i = 0; i < regionCount; ++i)
- tu_copy_image_to_image(cmdbuf, src_image, dst_image, pRegions + i);
-}
+++ /dev/null
-/*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#include "tu_private.h"
-
-#include <assert.h>
-#include <stdbool.h>
-
-#include "nir/nir_builder.h"
-#include "vk_format.h"
-
-#include "tu_blit.h"
-
-static void
-tu_resolve_image(struct tu_cmd_buffer *cmdbuf,
- struct tu_image *src_image,
- struct tu_image *dst_image,
- const VkImageResolve *info)
-{
- assert(info->dstSubresource.layerCount == info->srcSubresource.layerCount);
-
- tu_blit(cmdbuf, &cmdbuf->cs, &(struct tu_blit) {
- .dst = tu_blit_surf_ext(dst_image, info->dstSubresource, info->dstOffset, info->extent),
- .src = tu_blit_surf_ext(src_image, info->srcSubresource, info->srcOffset, info->extent),
- .layers = MAX2(info->extent.depth, info->dstSubresource.layerCount)
- });
-}
-
-void
-tu_CmdResolveImage(VkCommandBuffer cmd_buffer_h,
- VkImage src_image_h,
- VkImageLayout src_image_layout,
- VkImage dest_image_h,
- VkImageLayout dest_image_layout,
- uint32_t region_count,
- const VkImageResolve *regions)
-{
- TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, cmd_buffer_h);
- TU_FROM_HANDLE(tu_image, src_image, src_image_h);
- TU_FROM_HANDLE(tu_image, dst_image, dest_image_h);
-
- tu_bo_list_add(&cmdbuf->bo_list, src_image->bo, MSM_SUBMIT_BO_READ);
- tu_bo_list_add(&cmdbuf->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE);
-
- for (uint32_t i = 0; i < region_count; ++i)
- tu_resolve_image(cmdbuf, src_image, dst_image, regions + i);
-}
#define GMEM_ALIGN 0x4000
static void
-compute_gmem_offsets(struct tu_render_pass *pass, uint32_t gmem_size)
+compute_gmem_offsets(struct tu_render_pass *pass,
+ const struct tu_physical_device *phys_dev)
{
/* calculate total bytes per pixel */
uint32_t cpp_total = 0;
return;
}
- /* TODO: this algorithm isn't optimal
+ /* TODO: using ccu_offset_gmem so that BLIT_OP_SCALE resolve path
+ * doesn't break things. maybe there is a better solution?
+ * TODO: this algorithm isn't optimal
* for example, two attachments with cpp = {1, 4}
* result: nblocks = {12, 52}, pixels = 196608
* optimal: nblocks = {13, 51}, pixels = 208896
*/
- uint32_t gmem_blocks = gmem_size / GMEM_ALIGN;
+ uint32_t gmem_blocks = phys_dev->ccu_offset_gmem / GMEM_ALIGN;
uint32_t offset = 0, pixels = ~0u;
for (uint32_t i = 0; i < pass->attachment_count; i++) {
struct tu_render_pass_attachment *att = &pass->attachments[i];
*pRenderPass = tu_render_pass_to_handle(pass);
- compute_gmem_offsets(pass, device->physical_device->gmem_size);
+ compute_gmem_offsets(pass, device->physical_device);
return VK_SUCCESS;
}
*pRenderPass = tu_render_pass_to_handle(pass);
- compute_gmem_offsets(pass, device->physical_device->gmem_size);
+ compute_gmem_offsets(pass, device->physical_device);
return VK_SUCCESS;
}
#include "tu_entrypoints.h"
+#include "vk_format.h"
+
#define MAX_VBS 32
#define MAX_VERTEX_ATTRIBS 32
#define MAX_RTS 8
void
tu6_emit_blend_constants(struct tu_cs *cs, const float constants[4]);
+void tu6_emit_msaa(struct tu_cs *cs, VkSampleCountFlagBits samples);
+
+void tu6_emit_window_scissor(struct tu_cs *cs, uint32_t x1, uint32_t y1, uint32_t x2, uint32_t y2);
+
+void tu6_emit_window_offset(struct tu_cs *cs, uint32_t x1, uint32_t y1);
+
+struct tu_image_view;
+
+void
+tu_resolve_sysmem(struct tu_cmd_buffer *cmd,
+ struct tu_cs *cs,
+ struct tu_image_view *src,
+ struct tu_image_view *dst,
+ uint32_t layers,
+ const VkRect2D *rect);
+
+void
+tu_clear_sysmem_attachment(struct tu_cmd_buffer *cmd,
+ struct tu_cs *cs,
+ uint32_t a,
+ const VkRenderPassBeginInfo *info);
+
+void
+tu_clear_gmem_attachment(struct tu_cmd_buffer *cmd,
+ struct tu_cs *cs,
+ uint32_t a,
+ const VkRenderPassBeginInfo *info);
+
+void
+tu_load_gmem_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs, uint32_t a);
+
+/* expose this function to be able to emit load without checking LOAD_OP */
+void
+tu_emit_load_gmem_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs, uint32_t a);
+
+/* note: gmem store can also resolve */
+void
+tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
+ struct tu_cs *cs,
+ uint32_t a,
+ uint32_t gmem_a);
+
struct tu_userdata_info *
tu_lookup_user_sgpr(struct tu_pipeline *pipeline,
gl_shader_stage stage,
return tu6_format_color(format, TILE6_LINEAR).fmt;
}
-void
-tu_pack_clear_value(const VkClearValue *val,
- VkFormat format,
- uint32_t buf[4]);
-
-void
-tu_2d_clear_color(const VkClearColorValue *val, VkFormat format, uint32_t buf[4]);
-
-void
-tu_2d_clear_zs(const VkClearDepthStencilValue *val, VkFormat format, uint32_t buf[4]);
-
-enum a6xx_2d_ifmt tu6_fmt_to_ifmt(enum a6xx_format fmt);
enum a6xx_depth_format tu6_pipe2depth(VkFormat format);
struct tu_image
return image->layout.slices[level].pitch * image->layout.cpp;
}
+/* to get the right pitch for compressed formats */
+static inline uint32_t
+tu_image_pitch(struct tu_image *image, int level)
+{
+ uint32_t stride = tu_image_stride(image, level);
+ return stride / vk_format_get_blockwidth(image->vk_format);
+}
+
static inline uint64_t
tu_image_base(struct tu_image *image, int level, int layer)
{
#define tu_image_view_ubwc_base_ref(iview) \
tu_image_ubwc_base_ref(iview->image, iview->base_mip, iview->base_layer)
+#define tu_image_view_ubwc_pitches(iview) \
+ .pitch = tu_image_ubwc_pitch(iview->image, iview->base_mip), \
+ .array_pitch = tu_image_ubwc_size(iview->image, iview->base_mip) >> 2
+
enum a6xx_tile_mode
tu6_get_image_tile_mode(struct tu_image *image, int level);
enum a3xx_msaa_samples
tu_msaa_samples(uint32_t samples);
+enum a6xx_tex_fetchsize
+tu6_fetchsize(VkFormat format);
static inline struct tu_native_format
tu6_format_image(struct tu_image *image, VkFormat format, uint32_t level)
uint64_t
tu_gem_info_iova(const struct tu_device *dev, uint32_t gem_handle);
-
-void
-tu_clear_sysmem_attachment(struct tu_cmd_buffer *cmd,
- struct tu_cs *cs,
- uint32_t attachment,
- const VkClearValue *value,
- const VkClearRect *rect);
-
-void
-tu_clear_gmem_attachment(struct tu_cmd_buffer *cmd,
- struct tu_cs *cs,
- uint32_t attachment,
- uint8_t component_mask,
- const VkClearValue *value);
-
#define TU_DEFINE_HANDLE_CASTS(__tu_type, __VkType) \
\
static inline struct __tu_type *__tu_type##_from_handle(__VkType _handle) \