2 * Copyright © 2019 Valve Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Jonathan Marek <jonathan@marek.ca>
31 #include "adreno_common.xml.h"
32 #include "adreno_pm4.xml.h"
34 #include "vk_format.h"
39 * - Avoid disabling tiling for swapped formats
40 * (image_to_image copy doesn't deal with it)
41 * - Fix d24_unorm_s8_uint support & aspects
46 blit_copy_format(VkFormat format
)
48 switch (vk_format_get_blocksizebits(format
)) {
49 case 8: return VK_FORMAT_R8_UINT
;
50 case 16: return VK_FORMAT_R16_UINT
;
51 case 32: return VK_FORMAT_R32_UINT
;
52 case 64: return VK_FORMAT_R32G32_UINT
;
53 case 96: return VK_FORMAT_R32G32B32_UINT
;
54 case 128:return VK_FORMAT_R32G32B32A32_UINT
;
56 unreachable("unhandled format size");
61 blit_image_info(const struct tu_blit_surf
*img
, bool src
, bool stencil_read
)
63 const struct tu_native_format
*fmt
= tu6_get_native_format(img
->fmt
);
64 enum a6xx_format rb
= fmt
->rb
;
65 enum a3xx_color_swap swap
= img
->tiled
? WZYX
: fmt
->swap
;
66 if (rb
== FMT6_10_10_10_2_UNORM_DEST
&& src
)
67 rb
= FMT6_10_10_10_2_UNORM
;
68 if (rb
== FMT6_Z24_UNORM_S8_UINT
)
69 rb
= FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8
;
74 return A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(rb
) |
75 A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(img
->tile_mode
) |
76 A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(swap
) |
77 COND(vk_format_is_srgb(img
->fmt
), A6XX_SP_PS_2D_SRC_INFO_SRGB
) |
78 COND(img
->ubwc_size
, A6XX_SP_PS_2D_SRC_INFO_FLAGS
);
82 emit_blit_step(struct tu_cmd_buffer
*cmdbuf
, const struct tu_blit
*blt
)
84 struct tu_physical_device
*phys_dev
= cmdbuf
->device
->physical_device
;
85 struct tu_cs
*cs
= &cmdbuf
->cs
;
87 tu_cs_reserve_space(cmdbuf
->device
, cs
, 66);
89 enum a6xx_format fmt
= tu6_get_native_format(blt
->dst
.fmt
)->rb
;
90 if (fmt
== FMT6_Z24_UNORM_S8_UINT
)
91 fmt
= FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8
;
93 enum a6xx_2d_ifmt ifmt
= tu6_fmt_to_ifmt(fmt
);
95 if (vk_format_is_srgb(blt
->dst
.fmt
)) {
96 assert(ifmt
== R2D_UNORM8
);
97 ifmt
= R2D_UNORM8_SRGB
;
100 uint32_t blit_cntl
= A6XX_RB_2D_BLIT_CNTL_ROTATE(blt
->rotation
) |
101 COND(blt
->type
== TU_BLIT_CLEAR
, A6XX_RB_2D_BLIT_CNTL_SOLID_COLOR
) |
102 A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(fmt
) | /* not required? */
103 COND(fmt
== FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8
, A6XX_RB_2D_BLIT_CNTL_D24S8
) |
104 A6XX_RB_2D_BLIT_CNTL_MASK(0xf) |
105 A6XX_RB_2D_BLIT_CNTL_IFMT(ifmt
);
107 tu_cs_emit_pkt4(&cmdbuf
->cs
, REG_A6XX_RB_2D_BLIT_CNTL
, 1);
108 tu_cs_emit(&cmdbuf
->cs
, blit_cntl
);
110 tu_cs_emit_pkt4(&cmdbuf
->cs
, REG_A6XX_GRAS_2D_BLIT_CNTL
, 1);
111 tu_cs_emit(&cmdbuf
->cs
, blit_cntl
);
116 if (blt
->type
== TU_BLIT_CLEAR
) {
117 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_2D_SRC_SOLID_C0
, 4);
118 tu_cs_emit(cs
, blt
->clear_value
[0]);
119 tu_cs_emit(cs
, blt
->clear_value
[1]);
120 tu_cs_emit(cs
, blt
->clear_value
[2]);
121 tu_cs_emit(cs
, blt
->clear_value
[3]);
123 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_PS_2D_SRC_INFO
, 10);
124 tu_cs_emit(cs
, blit_image_info(&blt
->src
, true, blt
->stencil_read
) |
125 A6XX_SP_PS_2D_SRC_INFO_SAMPLES(tu_msaa_samples(blt
->src
.samples
)) |
126 /* TODO: should disable this bit for integer formats ? */
127 COND(blt
->src
.samples
> 1, A6XX_SP_PS_2D_SRC_INFO_SAMPLES_AVERAGE
) |
128 COND(blt
->filter
, A6XX_SP_PS_2D_SRC_INFO_FILTER
) |
130 tu_cs_emit(cs
, A6XX_SP_PS_2D_SRC_SIZE_WIDTH(blt
->src
.x
+ blt
->src
.width
) |
131 A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(blt
->src
.y
+ blt
->src
.height
));
132 tu_cs_emit_qw(cs
, blt
->src
.va
);
133 tu_cs_emit(cs
, A6XX_SP_PS_2D_SRC_PITCH_PITCH(blt
->src
.pitch
));
135 tu_cs_emit(cs
, 0x00000000);
136 tu_cs_emit(cs
, 0x00000000);
137 tu_cs_emit(cs
, 0x00000000);
138 tu_cs_emit(cs
, 0x00000000);
139 tu_cs_emit(cs
, 0x00000000);
141 if (blt
->src
.ubwc_size
) {
142 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_PS_2D_SRC_FLAGS_LO
, 6);
143 tu_cs_emit_qw(cs
, blt
->src
.ubwc_va
);
144 tu_cs_emit(cs
, A6XX_SP_PS_2D_SRC_FLAGS_PITCH_PITCH(blt
->src
.ubwc_pitch
) |
145 A6XX_SP_PS_2D_SRC_FLAGS_PITCH_ARRAY_PITCH(blt
->src
.ubwc_size
>> 2));
146 tu_cs_emit(cs
, 0x00000000);
147 tu_cs_emit(cs
, 0x00000000);
148 tu_cs_emit(cs
, 0x00000000);
155 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_2D_DST_INFO
, 9);
156 tu_cs_emit(cs
, blit_image_info(&blt
->dst
, false, false));
157 tu_cs_emit_qw(cs
, blt
->dst
.va
);
158 tu_cs_emit(cs
, A6XX_RB_2D_DST_SIZE_PITCH(blt
->dst
.pitch
));
159 tu_cs_emit(cs
, 0x00000000);
160 tu_cs_emit(cs
, 0x00000000);
161 tu_cs_emit(cs
, 0x00000000);
162 tu_cs_emit(cs
, 0x00000000);
163 tu_cs_emit(cs
, 0x00000000);
165 if (blt
->dst
.ubwc_size
) {
166 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_2D_DST_FLAGS_LO
, 6);
167 tu_cs_emit_qw(cs
, blt
->dst
.ubwc_va
);
168 tu_cs_emit(cs
, A6XX_RB_2D_DST_FLAGS_PITCH_PITCH(blt
->dst
.ubwc_pitch
) |
169 A6XX_RB_2D_DST_FLAGS_PITCH_ARRAY_PITCH(blt
->dst
.ubwc_size
>> 2));
170 tu_cs_emit(cs
, 0x00000000);
171 tu_cs_emit(cs
, 0x00000000);
172 tu_cs_emit(cs
, 0x00000000);
175 tu_cs_emit_pkt4(cs
, REG_A6XX_GRAS_2D_SRC_TL_X
, 4);
176 tu_cs_emit(cs
, A6XX_GRAS_2D_SRC_TL_X_X(blt
->src
.x
));
177 tu_cs_emit(cs
, A6XX_GRAS_2D_SRC_BR_X_X(blt
->src
.x
+ blt
->src
.width
- 1));
178 tu_cs_emit(cs
, A6XX_GRAS_2D_SRC_TL_Y_Y(blt
->src
.y
));
179 tu_cs_emit(cs
, A6XX_GRAS_2D_SRC_BR_Y_Y(blt
->src
.y
+ blt
->src
.height
- 1));
181 tu_cs_emit_pkt4(cs
, REG_A6XX_GRAS_2D_DST_TL
, 2);
182 tu_cs_emit(cs
, A6XX_GRAS_2D_DST_TL_X(blt
->dst
.x
) |
183 A6XX_GRAS_2D_DST_TL_Y(blt
->dst
.y
));
184 tu_cs_emit(cs
, A6XX_GRAS_2D_DST_BR_X(blt
->dst
.x
+ blt
->dst
.width
- 1) |
185 A6XX_GRAS_2D_DST_BR_Y(blt
->dst
.y
+ blt
->dst
.height
- 1));
187 tu_cs_emit_pkt7(cs
, CP_EVENT_WRITE
, 1);
188 tu_cs_emit(cs
, 0x3f);
191 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_UNKNOWN_8C01
, 1);
194 if (fmt
== FMT6_10_10_10_2_UNORM_DEST
)
195 fmt
= FMT6_16_16_16_16_FLOAT
;
197 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_2D_SRC_FORMAT
, 1);
198 tu_cs_emit(cs
, COND(vk_format_is_sint(blt
->src
.fmt
), A6XX_SP_2D_SRC_FORMAT_SINT
) |
199 COND(vk_format_is_uint(blt
->src
.fmt
), A6XX_SP_2D_SRC_FORMAT_UINT
) |
200 A6XX_SP_2D_SRC_FORMAT_COLOR_FORMAT(fmt
) |
201 COND(ifmt
== R2D_UNORM8_SRGB
, A6XX_SP_2D_SRC_FORMAT_SRGB
) |
202 A6XX_SP_2D_SRC_FORMAT_MASK(0xf));
204 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_UNKNOWN_8E04
, 1);
205 tu_cs_emit(cs
, phys_dev
->magic
.RB_UNKNOWN_8E04_blit
);
207 tu_cs_emit_pkt7(cs
, CP_BLIT
, 1);
208 tu_cs_emit(cs
, CP_BLIT_0_OP(BLIT_OP_SCALE
));
212 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_UNKNOWN_8E04
, 1);
216 void tu_blit(struct tu_cmd_buffer
*cmdbuf
, struct tu_blit
*blt
)
221 blt
->dst
.fmt
== VK_FORMAT_R8_UNORM
&&
222 blt
->src
.fmt
== VK_FORMAT_D24_UNORM_S8_UINT
;
224 assert(vk_format_get_blocksize(blt
->dst
.fmt
) ==
225 vk_format_get_blocksize(blt
->src
.fmt
) || blt
->stencil_read
);
226 assert(blt
->src
.samples
== blt
->dst
.samples
);
228 if (vk_format_is_compressed(blt
->src
.fmt
)) {
229 unsigned block_width
= vk_format_get_blockwidth(blt
->src
.fmt
);
230 unsigned block_height
= vk_format_get_blockheight(blt
->src
.fmt
);
232 blt
->src
.pitch
/= block_width
;
233 blt
->src
.x
/= block_width
;
234 blt
->src
.y
/= block_height
;
235 blt
->src
.fmt
= blit_copy_format(blt
->src
.fmt
);
237 /* for image_to_image copy, width/height is on the src format */
238 blt
->dst
.width
= blt
->src
.width
= DIV_ROUND_UP(blt
->src
.width
, block_width
);
239 blt
->dst
.height
= blt
->src
.height
= DIV_ROUND_UP(blt
->src
.height
, block_height
);
242 if (vk_format_is_compressed(blt
->dst
.fmt
)) {
243 unsigned block_width
= vk_format_get_blockwidth(blt
->dst
.fmt
);
244 unsigned block_height
= vk_format_get_blockheight(blt
->dst
.fmt
);
246 blt
->dst
.pitch
/= block_width
;
247 blt
->dst
.x
/= block_width
;
248 blt
->dst
.y
/= block_height
;
249 blt
->dst
.fmt
= blit_copy_format(blt
->dst
.fmt
);
252 if (blt
->dst
.fmt
== VK_FORMAT_E5B9G9R9_UFLOAT_PACK32
)
253 blt
->dst
.fmt
= blit_copy_format(blt
->dst
.fmt
);
255 if (blt
->src
.fmt
== VK_FORMAT_E5B9G9R9_UFLOAT_PACK32
)
256 blt
->src
.fmt
= blit_copy_format(blt
->src
.fmt
);
258 /* TODO: multisample image copy does not work correctly with tiling/UBWC */
259 blt
->src
.x
*= blt
->src
.samples
;
260 blt
->dst
.x
*= blt
->dst
.samples
;
261 blt
->src
.width
*= blt
->src
.samples
;
262 blt
->dst
.width
*= blt
->dst
.samples
;
263 blt
->src
.samples
= 1;
264 blt
->dst
.samples
= 1;
267 /* unsupported format cleared as UINT32 */
268 if (blt
->dst
.fmt
== VK_FORMAT_E5B9G9R9_UFLOAT_PACK32
)
269 blt
->dst
.fmt
= VK_FORMAT_R32_UINT
;
270 assert(blt
->dst
.samples
== 1); /* TODO */
274 assert(blt
->dst
.samples
== 1);
277 tu_cs_reserve_space(cmdbuf
->device
, &cmdbuf
->cs
, 18);
279 tu6_emit_event_write(cmdbuf
, &cmdbuf
->cs
, LRZ_FLUSH
, false);
280 tu6_emit_event_write(cmdbuf
, &cmdbuf
->cs
, 0x1d, true);
281 tu6_emit_event_write(cmdbuf
, &cmdbuf
->cs
, FACENESS_FLUSH
, true);
282 tu6_emit_event_write(cmdbuf
, &cmdbuf
->cs
, PC_CCU_INVALIDATE_COLOR
, false);
283 tu6_emit_event_write(cmdbuf
, &cmdbuf
->cs
, PC_CCU_INVALIDATE_DEPTH
, false);
285 /* buffer copy setup */
286 tu_cs_emit_pkt7(&cmdbuf
->cs
, CP_SET_MARKER
, 1);
287 tu_cs_emit(&cmdbuf
->cs
, A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE
));
289 for (unsigned layer
= 0; layer
< blt
->layers
; layer
++) {
291 struct tu_blit line_blt
= *blt
;
292 uint64_t dst_va
= line_blt
.dst
.va
, src_va
= line_blt
.src
.va
;
293 unsigned blocksize
= vk_format_get_blocksize(blt
->src
.fmt
);
294 uint32_t size
= line_blt
.src
.width
, tmp
;
297 line_blt
.src
.x
= (src_va
& 63) / blocksize
;
298 line_blt
.src
.va
= src_va
& ~63;
299 tmp
= MIN2(size
, 0x4000 - line_blt
.src
.x
);
301 line_blt
.dst
.x
= (dst_va
& 63) / blocksize
;
302 line_blt
.dst
.va
= dst_va
& ~63;
303 tmp
= MIN2(tmp
, 0x4000 - line_blt
.dst
.x
);
305 line_blt
.src
.width
= line_blt
.dst
.width
= tmp
;
307 emit_blit_step(cmdbuf
, &line_blt
);
309 src_va
+= tmp
* blocksize
;
310 dst_va
+= tmp
* blocksize
;
313 } else if ((blt
->src
.va
& 63) || (blt
->src
.pitch
& 63)) {
314 /* per line copy path (buffer_to_image) */
315 assert(blt
->type
== TU_BLIT_COPY
&& !blt
->src
.tiled
);
316 struct tu_blit line_blt
= *blt
;
317 uint64_t src_va
= line_blt
.src
.va
+ blt
->src
.pitch
* blt
->src
.y
;
320 line_blt
.src
.pitch
= 0;
321 line_blt
.src
.height
= 1;
322 line_blt
.dst
.height
= 1;
324 for (unsigned y
= 0; y
< blt
->src
.height
; y
++) {
325 line_blt
.src
.x
= blt
->src
.x
+ (src_va
& 63) / vk_format_get_blocksize(blt
->src
.fmt
);
326 line_blt
.src
.va
= src_va
& ~63;
328 emit_blit_step(cmdbuf
, &line_blt
);
331 src_va
+= blt
->src
.pitch
;
333 } else if ((blt
->dst
.va
& 63) || (blt
->dst
.pitch
& 63)) {
334 /* per line copy path (image_to_buffer) */
335 assert(blt
->type
== TU_BLIT_COPY
&& !blt
->dst
.tiled
);
336 struct tu_blit line_blt
= *blt
;
337 uint64_t dst_va
= line_blt
.dst
.va
+ blt
->dst
.pitch
* blt
->dst
.y
;
340 line_blt
.dst
.pitch
= 0;
341 line_blt
.src
.height
= 1;
342 line_blt
.dst
.height
= 1;
344 for (unsigned y
= 0; y
< blt
->src
.height
; y
++) {
345 line_blt
.dst
.x
= blt
->dst
.x
+ (dst_va
& 63) / vk_format_get_blocksize(blt
->dst
.fmt
);
346 line_blt
.dst
.va
= dst_va
& ~63;
348 emit_blit_step(cmdbuf
, &line_blt
);
351 dst_va
+= blt
->dst
.pitch
;
354 emit_blit_step(cmdbuf
, blt
);
356 blt
->dst
.va
+= blt
->dst
.layer_size
;
357 blt
->src
.va
+= blt
->src
.layer_size
;
358 blt
->dst
.ubwc_va
+= blt
->dst
.ubwc_size
;
359 blt
->src
.ubwc_va
+= blt
->src
.ubwc_size
;
362 tu_cs_reserve_space(cmdbuf
->device
, &cmdbuf
->cs
, 17);
364 tu6_emit_event_write(cmdbuf
, &cmdbuf
->cs
, 0x1d, true);
365 tu6_emit_event_write(cmdbuf
, &cmdbuf
->cs
, FACENESS_FLUSH
, true);
366 tu6_emit_event_write(cmdbuf
, &cmdbuf
->cs
, CACHE_FLUSH_TS
, true);
367 tu6_emit_event_write(cmdbuf
, &cmdbuf
->cs
, CACHE_INVALIDATE
, false);