2 * Copyright © 2019 Valve Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Jonathan Marek <jonathan@marek.ca>
31 #include "adreno_common.xml.h"
32 #include "adreno_pm4.xml.h"
34 #include "vk_format.h"
39 * - Avoid disabling tiling for swapped formats
40 * (image_to_image copy doesn't deal with it)
41 * - Fix d24_unorm_s8_uint support & aspects
46 blit_copy_format(VkFormat format
)
48 switch (vk_format_get_blocksizebits(format
)) {
49 case 8: return VK_FORMAT_R8_UINT
;
50 case 16: return VK_FORMAT_R16_UINT
;
51 case 32: return VK_FORMAT_R32_UINT
;
52 case 64: return VK_FORMAT_R32G32_UINT
;
53 case 96: return VK_FORMAT_R32G32B32_UINT
;
54 case 128:return VK_FORMAT_R32G32B32A32_UINT
;
56 unreachable("unhandled format size");
61 blit_image_info(const struct tu_blit_surf
*img
, bool src
, bool stencil_read
)
63 const struct tu_native_format
*fmt
= tu6_get_native_format(img
->fmt
);
64 enum a6xx_format rb
= fmt
->rb
;
65 enum a3xx_color_swap swap
= img
->tiled
? WZYX
: fmt
->swap
;
66 if (rb
== FMT6_10_10_10_2_UNORM_DEST
&& src
)
67 rb
= FMT6_10_10_10_2_UNORM
;
68 if (rb
== FMT6_Z24_UNORM_S8_UINT
)
69 rb
= FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8
;
74 return A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(rb
) |
75 A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(img
->tile_mode
) |
76 A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(swap
) |
77 COND(vk_format_is_srgb(img
->fmt
), A6XX_SP_PS_2D_SRC_INFO_SRGB
) |
78 COND(img
->ubwc_size
, A6XX_SP_PS_2D_SRC_INFO_FLAGS
);
82 emit_blit_step(struct tu_cmd_buffer
*cmdbuf
, struct tu_cs
*cs
,
83 const struct tu_blit
*blt
)
85 struct tu_physical_device
*phys_dev
= cmdbuf
->device
->physical_device
;
87 tu_cs_reserve_space(cmdbuf
->device
, cs
, 66);
89 enum a6xx_format fmt
= tu6_get_native_format(blt
->dst
.fmt
)->rb
;
90 if (fmt
== FMT6_Z24_UNORM_S8_UINT
)
91 fmt
= FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8
;
93 enum a6xx_2d_ifmt ifmt
= tu6_fmt_to_ifmt(fmt
);
95 if (vk_format_is_srgb(blt
->dst
.fmt
)) {
96 assert(ifmt
== R2D_UNORM8
);
97 ifmt
= R2D_UNORM8_SRGB
;
100 uint32_t blit_cntl
= A6XX_RB_2D_BLIT_CNTL_ROTATE(blt
->rotation
) |
101 COND(blt
->type
== TU_BLIT_CLEAR
, A6XX_RB_2D_BLIT_CNTL_SOLID_COLOR
) |
102 A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(fmt
) | /* not required? */
103 COND(fmt
== FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8
, A6XX_RB_2D_BLIT_CNTL_D24S8
) |
104 A6XX_RB_2D_BLIT_CNTL_MASK(0xf) |
105 A6XX_RB_2D_BLIT_CNTL_IFMT(ifmt
);
107 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_2D_BLIT_CNTL
, 1);
108 tu_cs_emit(cs
, blit_cntl
);
110 tu_cs_emit_pkt4(cs
, REG_A6XX_GRAS_2D_BLIT_CNTL
, 1);
111 tu_cs_emit(cs
, blit_cntl
);
116 if (blt
->type
== TU_BLIT_CLEAR
) {
117 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_2D_SRC_SOLID_C0
, 4);
118 tu_cs_emit(cs
, blt
->clear_value
[0]);
119 tu_cs_emit(cs
, blt
->clear_value
[1]);
120 tu_cs_emit(cs
, blt
->clear_value
[2]);
121 tu_cs_emit(cs
, blt
->clear_value
[3]);
123 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_PS_2D_SRC_INFO
, 10);
124 tu_cs_emit(cs
, blit_image_info(&blt
->src
, true, blt
->stencil_read
) |
125 A6XX_SP_PS_2D_SRC_INFO_SAMPLES(tu_msaa_samples(blt
->src
.samples
)) |
126 /* TODO: should disable this bit for integer formats ? */
127 COND(blt
->src
.samples
> 1, A6XX_SP_PS_2D_SRC_INFO_SAMPLES_AVERAGE
) |
128 COND(blt
->filter
, A6XX_SP_PS_2D_SRC_INFO_FILTER
) |
130 tu_cs_emit(cs
, A6XX_SP_PS_2D_SRC_SIZE_WIDTH(blt
->src
.x
+ blt
->src
.width
) |
131 A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(blt
->src
.y
+ blt
->src
.height
));
132 tu_cs_emit_qw(cs
, blt
->src
.va
);
133 tu_cs_emit(cs
, A6XX_SP_PS_2D_SRC_PITCH_PITCH(blt
->src
.pitch
));
135 tu_cs_emit(cs
, 0x00000000);
136 tu_cs_emit(cs
, 0x00000000);
137 tu_cs_emit(cs
, 0x00000000);
138 tu_cs_emit(cs
, 0x00000000);
139 tu_cs_emit(cs
, 0x00000000);
141 if (blt
->src
.ubwc_size
) {
142 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_PS_2D_SRC_FLAGS_LO
, 6);
143 tu_cs_emit_qw(cs
, blt
->src
.ubwc_va
);
144 tu_cs_emit(cs
, A6XX_SP_PS_2D_SRC_FLAGS_PITCH_PITCH(blt
->src
.ubwc_pitch
) |
145 A6XX_SP_PS_2D_SRC_FLAGS_PITCH_ARRAY_PITCH(blt
->src
.ubwc_size
>> 2));
146 tu_cs_emit(cs
, 0x00000000);
147 tu_cs_emit(cs
, 0x00000000);
148 tu_cs_emit(cs
, 0x00000000);
155 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_2D_DST_INFO
, 9);
156 tu_cs_emit(cs
, blit_image_info(&blt
->dst
, false, false));
157 tu_cs_emit_qw(cs
, blt
->dst
.va
);
158 tu_cs_emit(cs
, A6XX_RB_2D_DST_SIZE_PITCH(blt
->dst
.pitch
));
159 tu_cs_emit(cs
, 0x00000000);
160 tu_cs_emit(cs
, 0x00000000);
161 tu_cs_emit(cs
, 0x00000000);
162 tu_cs_emit(cs
, 0x00000000);
163 tu_cs_emit(cs
, 0x00000000);
165 if (blt
->dst
.ubwc_size
) {
166 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_2D_DST_FLAGS_LO
, 6);
167 tu_cs_emit_qw(cs
, blt
->dst
.ubwc_va
);
168 tu_cs_emit(cs
, A6XX_RB_2D_DST_FLAGS_PITCH_PITCH(blt
->dst
.ubwc_pitch
) |
169 A6XX_RB_2D_DST_FLAGS_PITCH_ARRAY_PITCH(blt
->dst
.ubwc_size
>> 2));
170 tu_cs_emit(cs
, 0x00000000);
171 tu_cs_emit(cs
, 0x00000000);
172 tu_cs_emit(cs
, 0x00000000);
175 tu_cs_emit_pkt4(cs
, REG_A6XX_GRAS_2D_SRC_TL_X
, 4);
176 tu_cs_emit(cs
, A6XX_GRAS_2D_SRC_TL_X_X(blt
->src
.x
));
177 tu_cs_emit(cs
, A6XX_GRAS_2D_SRC_BR_X_X(blt
->src
.x
+ blt
->src
.width
- 1));
178 tu_cs_emit(cs
, A6XX_GRAS_2D_SRC_TL_Y_Y(blt
->src
.y
));
179 tu_cs_emit(cs
, A6XX_GRAS_2D_SRC_BR_Y_Y(blt
->src
.y
+ blt
->src
.height
- 1));
181 tu_cs_emit_pkt4(cs
, REG_A6XX_GRAS_2D_DST_TL
, 2);
182 tu_cs_emit(cs
, A6XX_GRAS_2D_DST_TL_X(blt
->dst
.x
) |
183 A6XX_GRAS_2D_DST_TL_Y(blt
->dst
.y
));
184 tu_cs_emit(cs
, A6XX_GRAS_2D_DST_BR_X(blt
->dst
.x
+ blt
->dst
.width
- 1) |
185 A6XX_GRAS_2D_DST_BR_Y(blt
->dst
.y
+ blt
->dst
.height
- 1));
187 tu_cs_emit_pkt7(cs
, CP_EVENT_WRITE
, 1);
188 tu_cs_emit(cs
, 0x3f);
191 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_UNKNOWN_8C01
, 1);
194 if (fmt
== FMT6_10_10_10_2_UNORM_DEST
)
195 fmt
= FMT6_16_16_16_16_FLOAT
;
197 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_2D_SRC_FORMAT
, 1);
198 tu_cs_emit(cs
, COND(vk_format_is_sint(blt
->src
.fmt
), A6XX_SP_2D_SRC_FORMAT_SINT
) |
199 COND(vk_format_is_uint(blt
->src
.fmt
), A6XX_SP_2D_SRC_FORMAT_UINT
) |
200 A6XX_SP_2D_SRC_FORMAT_COLOR_FORMAT(fmt
) |
201 COND(ifmt
== R2D_UNORM8_SRGB
, A6XX_SP_2D_SRC_FORMAT_SRGB
) |
202 A6XX_SP_2D_SRC_FORMAT_MASK(0xf));
204 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_UNKNOWN_8E04
, 1);
205 tu_cs_emit(cs
, phys_dev
->magic
.RB_UNKNOWN_8E04_blit
);
207 tu_cs_emit_pkt7(cs
, CP_BLIT
, 1);
208 tu_cs_emit(cs
, CP_BLIT_0_OP(BLIT_OP_SCALE
));
212 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_UNKNOWN_8E04
, 1);
216 void tu_blit(struct tu_cmd_buffer
*cmdbuf
, struct tu_cs
*cs
,
222 blt
->dst
.fmt
== VK_FORMAT_R8_UNORM
&&
223 blt
->src
.fmt
== VK_FORMAT_D24_UNORM_S8_UINT
;
225 assert(vk_format_get_blocksize(blt
->dst
.fmt
) ==
226 vk_format_get_blocksize(blt
->src
.fmt
) || blt
->stencil_read
);
227 assert(blt
->src
.samples
== blt
->dst
.samples
);
229 if (vk_format_is_compressed(blt
->src
.fmt
)) {
230 unsigned block_width
= vk_format_get_blockwidth(blt
->src
.fmt
);
231 unsigned block_height
= vk_format_get_blockheight(blt
->src
.fmt
);
233 blt
->src
.pitch
/= block_width
;
234 blt
->src
.x
/= block_width
;
235 blt
->src
.y
/= block_height
;
236 blt
->src
.fmt
= blit_copy_format(blt
->src
.fmt
);
238 /* for image_to_image copy, width/height is on the src format */
239 blt
->dst
.width
= blt
->src
.width
= DIV_ROUND_UP(blt
->src
.width
, block_width
);
240 blt
->dst
.height
= blt
->src
.height
= DIV_ROUND_UP(blt
->src
.height
, block_height
);
243 if (vk_format_is_compressed(blt
->dst
.fmt
)) {
244 unsigned block_width
= vk_format_get_blockwidth(blt
->dst
.fmt
);
245 unsigned block_height
= vk_format_get_blockheight(blt
->dst
.fmt
);
247 blt
->dst
.pitch
/= block_width
;
248 blt
->dst
.x
/= block_width
;
249 blt
->dst
.y
/= block_height
;
250 blt
->dst
.fmt
= blit_copy_format(blt
->dst
.fmt
);
253 if (blt
->dst
.fmt
== VK_FORMAT_E5B9G9R9_UFLOAT_PACK32
)
254 blt
->dst
.fmt
= blit_copy_format(blt
->dst
.fmt
);
256 if (blt
->src
.fmt
== VK_FORMAT_E5B9G9R9_UFLOAT_PACK32
)
257 blt
->src
.fmt
= blit_copy_format(blt
->src
.fmt
);
259 /* TODO: multisample image copy does not work correctly with tiling/UBWC */
260 blt
->src
.x
*= blt
->src
.samples
;
261 blt
->dst
.x
*= blt
->dst
.samples
;
262 blt
->src
.width
*= blt
->src
.samples
;
263 blt
->dst
.width
*= blt
->dst
.samples
;
264 blt
->src
.samples
= 1;
265 blt
->dst
.samples
= 1;
268 /* unsupported format cleared as UINT32 */
269 if (blt
->dst
.fmt
== VK_FORMAT_E5B9G9R9_UFLOAT_PACK32
)
270 blt
->dst
.fmt
= VK_FORMAT_R32_UINT
;
271 /* TODO: multisample image clearing also seems not to work with certain
272 * formats. The blob uses a shader-based clear in these cases.
274 blt
->dst
.x
*= blt
->dst
.samples
;
275 blt
->dst
.width
*= blt
->dst
.samples
;
276 blt
->dst
.samples
= 1;
280 assert(blt
->dst
.samples
== 1);
283 tu_cs_reserve_space(cmdbuf
->device
, cs
, 18);
285 tu6_emit_event_write(cmdbuf
, cs
, LRZ_FLUSH
, false);
286 tu6_emit_event_write(cmdbuf
, cs
, 0x1d, true);
287 tu6_emit_event_write(cmdbuf
, cs
, FACENESS_FLUSH
, true);
288 tu6_emit_event_write(cmdbuf
, cs
, PC_CCU_INVALIDATE_COLOR
, false);
289 tu6_emit_event_write(cmdbuf
, cs
, PC_CCU_INVALIDATE_DEPTH
, false);
291 /* buffer copy setup */
292 tu_cs_emit_pkt7(cs
, CP_SET_MARKER
, 1);
293 tu_cs_emit(cs
, A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE
));
295 for (unsigned layer
= 0; layer
< blt
->layers
; layer
++) {
297 struct tu_blit line_blt
= *blt
;
298 uint64_t dst_va
= line_blt
.dst
.va
, src_va
= line_blt
.src
.va
;
299 unsigned blocksize
= vk_format_get_blocksize(blt
->src
.fmt
);
300 uint32_t size
= line_blt
.src
.width
, tmp
;
303 line_blt
.src
.x
= (src_va
& 63) / blocksize
;
304 line_blt
.src
.va
= src_va
& ~63;
305 tmp
= MIN2(size
, 0x4000 - line_blt
.src
.x
);
307 line_blt
.dst
.x
= (dst_va
& 63) / blocksize
;
308 line_blt
.dst
.va
= dst_va
& ~63;
309 tmp
= MIN2(tmp
, 0x4000 - line_blt
.dst
.x
);
311 line_blt
.src
.width
= line_blt
.dst
.width
= tmp
;
313 emit_blit_step(cmdbuf
, cs
, &line_blt
);
315 src_va
+= tmp
* blocksize
;
316 dst_va
+= tmp
* blocksize
;
319 } else if ((blt
->src
.va
& 63) || (blt
->src
.pitch
& 63)) {
320 /* per line copy path (buffer_to_image) */
321 assert(blt
->type
== TU_BLIT_COPY
&& !blt
->src
.tiled
);
322 struct tu_blit line_blt
= *blt
;
323 uint64_t src_va
= line_blt
.src
.va
+ blt
->src
.pitch
* blt
->src
.y
;
326 line_blt
.src
.pitch
= 0;
327 line_blt
.src
.height
= 1;
328 line_blt
.dst
.height
= 1;
330 for (unsigned y
= 0; y
< blt
->src
.height
; y
++) {
331 line_blt
.src
.x
= blt
->src
.x
+ (src_va
& 63) / vk_format_get_blocksize(blt
->src
.fmt
);
332 line_blt
.src
.va
= src_va
& ~63;
334 emit_blit_step(cmdbuf
, cs
, &line_blt
);
337 src_va
+= blt
->src
.pitch
;
339 } else if ((blt
->dst
.va
& 63) || (blt
->dst
.pitch
& 63)) {
340 /* per line copy path (image_to_buffer) */
341 assert(blt
->type
== TU_BLIT_COPY
&& !blt
->dst
.tiled
);
342 struct tu_blit line_blt
= *blt
;
343 uint64_t dst_va
= line_blt
.dst
.va
+ blt
->dst
.pitch
* blt
->dst
.y
;
346 line_blt
.dst
.pitch
= 0;
347 line_blt
.src
.height
= 1;
348 line_blt
.dst
.height
= 1;
350 for (unsigned y
= 0; y
< blt
->src
.height
; y
++) {
351 line_blt
.dst
.x
= blt
->dst
.x
+ (dst_va
& 63) / vk_format_get_blocksize(blt
->dst
.fmt
);
352 line_blt
.dst
.va
= dst_va
& ~63;
354 emit_blit_step(cmdbuf
, cs
, &line_blt
);
357 dst_va
+= blt
->dst
.pitch
;
360 emit_blit_step(cmdbuf
, cs
, blt
);
362 blt
->dst
.va
+= blt
->dst
.layer_size
;
363 blt
->src
.va
+= blt
->src
.layer_size
;
364 blt
->dst
.ubwc_va
+= blt
->dst
.ubwc_size
;
365 blt
->src
.ubwc_va
+= blt
->src
.ubwc_size
;
368 tu_cs_reserve_space(cmdbuf
->device
, cs
, 17);
370 tu6_emit_event_write(cmdbuf
, cs
, 0x1d, true);
371 tu6_emit_event_write(cmdbuf
, cs
, FACENESS_FLUSH
, true);
372 tu6_emit_event_write(cmdbuf
, cs
, CACHE_FLUSH_TS
, true);
373 tu6_emit_event_write(cmdbuf
, cs
, CACHE_INVALIDATE
, false);