2 * Copyright © 2019 Valve Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Jonathan Marek <jonathan@marek.ca>
31 #include "adreno_common.xml.h"
32 #include "adreno_pm4.xml.h"
34 #include "vk_format.h"
39 * - Avoid disabling tiling for swapped formats
40 * (image_to_image copy doesn't deal with it)
41 * - Fix d24_unorm_s8_uint support & aspects
46 blit_copy_format(VkFormat format
)
48 switch (vk_format_get_blocksizebits(format
)) {
49 case 8: return VK_FORMAT_R8_UINT
;
50 case 16: return VK_FORMAT_R16_UINT
;
51 case 32: return VK_FORMAT_R32_UINT
;
52 case 64: return VK_FORMAT_R32G32_UINT
;
53 case 96: return VK_FORMAT_R32G32B32_UINT
;
54 case 128:return VK_FORMAT_R32G32B32A32_UINT
;
56 unreachable("unhandled format size");
61 blit_image_info(const struct tu_blit_surf
*img
, struct tu_native_format fmt
, bool stencil_read
)
63 if (fmt
.fmt
== FMT6_Z24_UNORM_S8_UINT
)
64 fmt
.fmt
= FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8
;
69 return A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(fmt
.fmt
) |
70 A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(img
->tile_mode
) |
71 A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(fmt
.swap
) |
72 COND(vk_format_is_srgb(img
->fmt
), A6XX_SP_PS_2D_SRC_INFO_SRGB
) |
73 COND(img
->ubwc_size
, A6XX_SP_PS_2D_SRC_INFO_FLAGS
);
77 emit_blit_step(struct tu_cmd_buffer
*cmdbuf
, struct tu_cs
*cs
,
78 const struct tu_blit
*blt
)
80 struct tu_physical_device
*phys_dev
= cmdbuf
->device
->physical_device
;
82 struct tu_native_format dfmt
= tu6_format_color(blt
->dst
.fmt
, blt
->dst
.tiled
);
83 struct tu_native_format sfmt
= tu6_format_texture(blt
->src
.fmt
, blt
->src
.tiled
);
85 if (dfmt
.fmt
== FMT6_Z24_UNORM_S8_UINT
)
86 dfmt
.fmt
= FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8
;
88 enum a6xx_2d_ifmt ifmt
= tu6_fmt_to_ifmt(dfmt
.fmt
);
90 if (vk_format_is_srgb(blt
->dst
.fmt
)) {
91 assert(ifmt
== R2D_UNORM8
);
92 ifmt
= R2D_UNORM8_SRGB
;
95 uint32_t blit_cntl
= A6XX_RB_2D_BLIT_CNTL_ROTATE(blt
->rotation
) |
96 COND(blt
->type
== TU_BLIT_CLEAR
, A6XX_RB_2D_BLIT_CNTL_SOLID_COLOR
) |
97 A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(dfmt
.fmt
) | /* not required? */
98 COND(dfmt
.fmt
== FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8
,
99 A6XX_RB_2D_BLIT_CNTL_D24S8
) |
100 A6XX_RB_2D_BLIT_CNTL_MASK(0xf) |
101 A6XX_RB_2D_BLIT_CNTL_IFMT(ifmt
);
103 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_2D_BLIT_CNTL
, 1);
104 tu_cs_emit(cs
, blit_cntl
);
106 tu_cs_emit_pkt4(cs
, REG_A6XX_GRAS_2D_BLIT_CNTL
, 1);
107 tu_cs_emit(cs
, blit_cntl
);
112 if (blt
->type
== TU_BLIT_CLEAR
) {
113 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_2D_SRC_SOLID_C0
, 4);
114 tu_cs_emit(cs
, blt
->clear_value
[0]);
115 tu_cs_emit(cs
, blt
->clear_value
[1]);
116 tu_cs_emit(cs
, blt
->clear_value
[2]);
117 tu_cs_emit(cs
, blt
->clear_value
[3]);
119 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_PS_2D_SRC_INFO
, 10);
120 tu_cs_emit(cs
, blit_image_info(&blt
->src
, sfmt
, blt
->stencil_read
) |
121 A6XX_SP_PS_2D_SRC_INFO_SAMPLES(tu_msaa_samples(blt
->src
.samples
)) |
122 /* TODO: should disable this bit for integer formats ? */
123 COND(blt
->src
.samples
> 1, A6XX_SP_PS_2D_SRC_INFO_SAMPLES_AVERAGE
) |
124 COND(blt
->filter
, A6XX_SP_PS_2D_SRC_INFO_FILTER
) |
126 tu_cs_emit(cs
, A6XX_SP_PS_2D_SRC_SIZE_WIDTH(blt
->src
.x
+ blt
->src
.width
) |
127 A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(blt
->src
.y
+ blt
->src
.height
));
128 tu_cs_emit_qw(cs
, blt
->src
.va
);
129 tu_cs_emit(cs
, A6XX_SP_PS_2D_SRC_PITCH_PITCH(blt
->src
.pitch
));
131 tu_cs_emit(cs
, 0x00000000);
132 tu_cs_emit(cs
, 0x00000000);
133 tu_cs_emit(cs
, 0x00000000);
134 tu_cs_emit(cs
, 0x00000000);
135 tu_cs_emit(cs
, 0x00000000);
137 if (blt
->src
.ubwc_size
) {
138 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_PS_2D_SRC_FLAGS_LO
, 6);
139 tu_cs_emit_qw(cs
, blt
->src
.ubwc_va
);
140 tu_cs_emit(cs
, A6XX_SP_PS_2D_SRC_FLAGS_PITCH_PITCH(blt
->src
.ubwc_pitch
) |
141 A6XX_SP_PS_2D_SRC_FLAGS_PITCH_ARRAY_PITCH(blt
->src
.ubwc_size
>> 2));
142 tu_cs_emit(cs
, 0x00000000);
143 tu_cs_emit(cs
, 0x00000000);
144 tu_cs_emit(cs
, 0x00000000);
151 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_2D_DST_INFO
, 9);
152 tu_cs_emit(cs
, blit_image_info(&blt
->dst
, dfmt
, false));
153 tu_cs_emit_qw(cs
, blt
->dst
.va
);
154 tu_cs_emit(cs
, A6XX_RB_2D_DST_SIZE_PITCH(blt
->dst
.pitch
));
155 tu_cs_emit(cs
, 0x00000000);
156 tu_cs_emit(cs
, 0x00000000);
157 tu_cs_emit(cs
, 0x00000000);
158 tu_cs_emit(cs
, 0x00000000);
159 tu_cs_emit(cs
, 0x00000000);
161 if (blt
->dst
.ubwc_size
) {
162 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_2D_DST_FLAGS_LO
, 6);
163 tu_cs_emit_qw(cs
, blt
->dst
.ubwc_va
);
164 tu_cs_emit(cs
, A6XX_RB_2D_DST_FLAGS_PITCH_PITCH(blt
->dst
.ubwc_pitch
) |
165 A6XX_RB_2D_DST_FLAGS_PITCH_ARRAY_PITCH(blt
->dst
.ubwc_size
>> 2));
166 tu_cs_emit(cs
, 0x00000000);
167 tu_cs_emit(cs
, 0x00000000);
168 tu_cs_emit(cs
, 0x00000000);
171 tu_cs_emit_pkt4(cs
, REG_A6XX_GRAS_2D_SRC_TL_X
, 4);
172 tu_cs_emit(cs
, A6XX_GRAS_2D_SRC_TL_X_X(blt
->src
.x
));
173 tu_cs_emit(cs
, A6XX_GRAS_2D_SRC_BR_X_X(blt
->src
.x
+ blt
->src
.width
- 1));
174 tu_cs_emit(cs
, A6XX_GRAS_2D_SRC_TL_Y_Y(blt
->src
.y
));
175 tu_cs_emit(cs
, A6XX_GRAS_2D_SRC_BR_Y_Y(blt
->src
.y
+ blt
->src
.height
- 1));
177 tu_cs_emit_pkt4(cs
, REG_A6XX_GRAS_2D_DST_TL
, 2);
178 tu_cs_emit(cs
, A6XX_GRAS_2D_DST_TL_X(blt
->dst
.x
) |
179 A6XX_GRAS_2D_DST_TL_Y(blt
->dst
.y
));
180 tu_cs_emit(cs
, A6XX_GRAS_2D_DST_BR_X(blt
->dst
.x
+ blt
->dst
.width
- 1) |
181 A6XX_GRAS_2D_DST_BR_Y(blt
->dst
.y
+ blt
->dst
.height
- 1));
183 tu_cs_emit_pkt7(cs
, CP_EVENT_WRITE
, 1);
184 tu_cs_emit(cs
, 0x3f);
187 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_UNKNOWN_8C01
, 1);
190 if (dfmt
.fmt
== FMT6_10_10_10_2_UNORM_DEST
)
191 dfmt
.fmt
= FMT6_16_16_16_16_FLOAT
;
193 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_2D_SRC_FORMAT
, 1);
194 tu_cs_emit(cs
, COND(vk_format_is_sint(blt
->src
.fmt
), A6XX_SP_2D_SRC_FORMAT_SINT
) |
195 COND(vk_format_is_uint(blt
->src
.fmt
), A6XX_SP_2D_SRC_FORMAT_UINT
) |
196 A6XX_SP_2D_SRC_FORMAT_COLOR_FORMAT(dfmt
.fmt
) |
197 COND(ifmt
== R2D_UNORM8_SRGB
, A6XX_SP_2D_SRC_FORMAT_SRGB
) |
198 A6XX_SP_2D_SRC_FORMAT_MASK(0xf));
200 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_UNKNOWN_8E04
, 1);
201 tu_cs_emit(cs
, phys_dev
->magic
.RB_UNKNOWN_8E04_blit
);
203 tu_cs_emit_pkt7(cs
, CP_BLIT
, 1);
204 tu_cs_emit(cs
, CP_BLIT_0_OP(BLIT_OP_SCALE
));
208 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_UNKNOWN_8E04
, 1);
212 void tu_blit(struct tu_cmd_buffer
*cmdbuf
, struct tu_cs
*cs
,
218 blt
->dst
.fmt
== VK_FORMAT_R8_UNORM
&&
219 blt
->src
.fmt
== VK_FORMAT_D24_UNORM_S8_UINT
;
221 assert(vk_format_get_blocksize(blt
->dst
.fmt
) ==
222 vk_format_get_blocksize(blt
->src
.fmt
) || blt
->stencil_read
);
223 assert(blt
->src
.samples
== blt
->dst
.samples
);
225 if (vk_format_is_compressed(blt
->src
.fmt
)) {
226 unsigned block_width
= vk_format_get_blockwidth(blt
->src
.fmt
);
227 unsigned block_height
= vk_format_get_blockheight(blt
->src
.fmt
);
229 blt
->src
.pitch
/= block_width
;
230 blt
->src
.x
/= block_width
;
231 blt
->src
.y
/= block_height
;
232 blt
->src
.fmt
= blit_copy_format(blt
->src
.fmt
);
234 /* for image_to_image copy, width/height is on the src format */
235 blt
->dst
.width
= blt
->src
.width
= DIV_ROUND_UP(blt
->src
.width
, block_width
);
236 blt
->dst
.height
= blt
->src
.height
= DIV_ROUND_UP(blt
->src
.height
, block_height
);
239 if (vk_format_is_compressed(blt
->dst
.fmt
)) {
240 unsigned block_width
= vk_format_get_blockwidth(blt
->dst
.fmt
);
241 unsigned block_height
= vk_format_get_blockheight(blt
->dst
.fmt
);
243 blt
->dst
.pitch
/= block_width
;
244 blt
->dst
.x
/= block_width
;
245 blt
->dst
.y
/= block_height
;
246 blt
->dst
.fmt
= blit_copy_format(blt
->dst
.fmt
);
249 if (blt
->dst
.fmt
== VK_FORMAT_E5B9G9R9_UFLOAT_PACK32
)
250 blt
->dst
.fmt
= blit_copy_format(blt
->dst
.fmt
);
252 if (blt
->src
.fmt
== VK_FORMAT_E5B9G9R9_UFLOAT_PACK32
)
253 blt
->src
.fmt
= blit_copy_format(blt
->src
.fmt
);
255 /* TODO: multisample image copy does not work correctly with tiling/UBWC */
256 blt
->src
.x
*= blt
->src
.samples
;
257 blt
->dst
.x
*= blt
->dst
.samples
;
258 blt
->src
.width
*= blt
->src
.samples
;
259 blt
->dst
.width
*= blt
->dst
.samples
;
260 blt
->src
.samples
= 1;
261 blt
->dst
.samples
= 1;
264 /* unsupported format cleared as UINT32 */
265 if (blt
->dst
.fmt
== VK_FORMAT_E5B9G9R9_UFLOAT_PACK32
)
266 blt
->dst
.fmt
= VK_FORMAT_R32_UINT
;
267 /* TODO: multisample image clearing also seems not to work with certain
268 * formats. The blob uses a shader-based clear in these cases.
270 blt
->dst
.x
*= blt
->dst
.samples
;
271 blt
->dst
.width
*= blt
->dst
.samples
;
272 blt
->dst
.samples
= 1;
276 assert(blt
->dst
.samples
== 1);
279 tu6_emit_event_write(cmdbuf
, cs
, LRZ_FLUSH
, false);
280 tu6_emit_event_write(cmdbuf
, cs
, PC_CCU_FLUSH_COLOR_TS
, true);
281 tu6_emit_event_write(cmdbuf
, cs
, PC_CCU_FLUSH_DEPTH_TS
, true);
282 tu6_emit_event_write(cmdbuf
, cs
, PC_CCU_INVALIDATE_COLOR
, false);
283 tu6_emit_event_write(cmdbuf
, cs
, PC_CCU_INVALIDATE_DEPTH
, false);
285 /* buffer copy setup */
286 tu_cs_emit_pkt7(cs
, CP_SET_MARKER
, 1);
287 tu_cs_emit(cs
, A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE
));
289 for (unsigned layer
= 0; layer
< blt
->layers
; layer
++) {
291 struct tu_blit line_blt
= *blt
;
292 uint64_t dst_va
= line_blt
.dst
.va
, src_va
= line_blt
.src
.va
;
293 unsigned blocksize
= vk_format_get_blocksize(blt
->src
.fmt
);
294 uint32_t size
= line_blt
.src
.width
, tmp
;
297 line_blt
.src
.x
= (src_va
& 63) / blocksize
;
298 line_blt
.src
.va
= src_va
& ~63;
299 tmp
= MIN2(size
, 0x4000 - line_blt
.src
.x
);
301 line_blt
.dst
.x
= (dst_va
& 63) / blocksize
;
302 line_blt
.dst
.va
= dst_va
& ~63;
303 tmp
= MIN2(tmp
, 0x4000 - line_blt
.dst
.x
);
305 line_blt
.src
.width
= line_blt
.dst
.width
= tmp
;
307 emit_blit_step(cmdbuf
, cs
, &line_blt
);
309 src_va
+= tmp
* blocksize
;
310 dst_va
+= tmp
* blocksize
;
313 } else if ((blt
->src
.va
& 63) || (blt
->src
.pitch
& 63)) {
314 /* per line copy path (buffer_to_image) */
315 assert(blt
->type
== TU_BLIT_COPY
&& !blt
->src
.tiled
);
316 struct tu_blit line_blt
= *blt
;
317 uint64_t src_va
= line_blt
.src
.va
+ blt
->src
.pitch
* blt
->src
.y
;
320 line_blt
.src
.pitch
= 0;
321 line_blt
.src
.height
= 1;
322 line_blt
.dst
.height
= 1;
324 for (unsigned y
= 0; y
< blt
->src
.height
; y
++) {
325 line_blt
.src
.x
= blt
->src
.x
+ (src_va
& 63) / vk_format_get_blocksize(blt
->src
.fmt
);
326 line_blt
.src
.va
= src_va
& ~63;
328 emit_blit_step(cmdbuf
, cs
, &line_blt
);
331 src_va
+= blt
->src
.pitch
;
333 } else if ((blt
->dst
.va
& 63) || (blt
->dst
.pitch
& 63)) {
334 /* per line copy path (image_to_buffer) */
335 assert(blt
->type
== TU_BLIT_COPY
&& !blt
->dst
.tiled
);
336 struct tu_blit line_blt
= *blt
;
337 uint64_t dst_va
= line_blt
.dst
.va
+ blt
->dst
.pitch
* blt
->dst
.y
;
340 line_blt
.dst
.pitch
= 0;
341 line_blt
.src
.height
= 1;
342 line_blt
.dst
.height
= 1;
344 for (unsigned y
= 0; y
< blt
->src
.height
; y
++) {
345 line_blt
.dst
.x
= blt
->dst
.x
+ (dst_va
& 63) / vk_format_get_blocksize(blt
->dst
.fmt
);
346 line_blt
.dst
.va
= dst_va
& ~63;
348 emit_blit_step(cmdbuf
, cs
, &line_blt
);
351 dst_va
+= blt
->dst
.pitch
;
354 emit_blit_step(cmdbuf
, cs
, blt
);
356 blt
->dst
.va
+= blt
->dst
.layer_size
;
357 blt
->src
.va
+= blt
->src
.layer_size
;
358 blt
->dst
.ubwc_va
+= blt
->dst
.ubwc_size
;
359 blt
->src
.ubwc_va
+= blt
->src
.ubwc_size
;
362 tu6_emit_event_write(cmdbuf
, cs
, PC_CCU_FLUSH_COLOR_TS
, true);
363 tu6_emit_event_write(cmdbuf
, cs
, PC_CCU_FLUSH_DEPTH_TS
, true);
364 tu6_emit_event_write(cmdbuf
, cs
, CACHE_FLUSH_TS
, true);
365 tu6_emit_event_write(cmdbuf
, cs
, CACHE_INVALIDATE
, false);