turnip: rework format helpers
[mesa.git] / src / freedreno / vulkan / tu_blit.c
1 /*
2 * Copyright © 2019 Valve Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Jonathan Marek <jonathan@marek.ca>
25 *
26 */
27
28 #include "tu_blit.h"
29
30 #include "a6xx.xml.h"
31 #include "adreno_common.xml.h"
32 #include "adreno_pm4.xml.h"
33
34 #include "vk_format.h"
35
36 #include "tu_cs.h"
37
38 /* TODO:
39 * - Avoid disabling tiling for swapped formats
40 * (image_to_image copy doesn't deal with it)
41 * - Fix d24_unorm_s8_uint support & aspects
42 * - UBWC
43 */
44
45 static VkFormat
46 blit_copy_format(VkFormat format)
47 {
48 switch (vk_format_get_blocksizebits(format)) {
49 case 8: return VK_FORMAT_R8_UINT;
50 case 16: return VK_FORMAT_R16_UINT;
51 case 32: return VK_FORMAT_R32_UINT;
52 case 64: return VK_FORMAT_R32G32_UINT;
53 case 96: return VK_FORMAT_R32G32B32_UINT;
54 case 128:return VK_FORMAT_R32G32B32A32_UINT;
55 default:
56 unreachable("unhandled format size");
57 }
58 }
59
60 static uint32_t
61 blit_image_info(const struct tu_blit_surf *img, struct tu_native_format fmt, bool stencil_read)
62 {
63 if (fmt.fmt == FMT6_Z24_UNORM_S8_UINT)
64 fmt.fmt = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
65
66 if (stencil_read)
67 fmt.swap = XYZW;
68
69 return A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(fmt.fmt) |
70 A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(img->tile_mode) |
71 A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(fmt.swap) |
72 COND(vk_format_is_srgb(img->fmt), A6XX_SP_PS_2D_SRC_INFO_SRGB) |
73 COND(img->ubwc_size, A6XX_SP_PS_2D_SRC_INFO_FLAGS);
74 }
75
76 static void
77 emit_blit_step(struct tu_cmd_buffer *cmdbuf, struct tu_cs *cs,
78 const struct tu_blit *blt)
79 {
80 struct tu_physical_device *phys_dev = cmdbuf->device->physical_device;
81
82 struct tu_native_format dfmt = tu6_format_color(blt->dst.fmt, blt->dst.image_tile_mode);
83 struct tu_native_format sfmt = tu6_format_texture(blt->src.fmt, blt->src.image_tile_mode);
84
85 if (dfmt.fmt == FMT6_Z24_UNORM_S8_UINT)
86 dfmt.fmt = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
87
88 enum a6xx_2d_ifmt ifmt = tu6_fmt_to_ifmt(dfmt.fmt);
89
90 if (vk_format_is_srgb(blt->dst.fmt)) {
91 assert(ifmt == R2D_UNORM8);
92 ifmt = R2D_UNORM8_SRGB;
93 }
94
95 uint32_t blit_cntl = A6XX_RB_2D_BLIT_CNTL_ROTATE(blt->rotation) |
96 COND(blt->type == TU_BLIT_CLEAR, A6XX_RB_2D_BLIT_CNTL_SOLID_COLOR) |
97 A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(dfmt.fmt) | /* not required? */
98 COND(dfmt.fmt == FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8,
99 A6XX_RB_2D_BLIT_CNTL_D24S8) |
100 A6XX_RB_2D_BLIT_CNTL_MASK(0xf) |
101 A6XX_RB_2D_BLIT_CNTL_IFMT(ifmt);
102
103 tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_BLIT_CNTL, 1);
104 tu_cs_emit(cs, blit_cntl);
105
106 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
107 tu_cs_emit(cs, blit_cntl);
108
109 /*
110 * Emit source:
111 */
112 if (blt->type == TU_BLIT_CLEAR) {
113 tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_SRC_SOLID_C0, 4);
114 tu_cs_emit(cs, blt->clear_value[0]);
115 tu_cs_emit(cs, blt->clear_value[1]);
116 tu_cs_emit(cs, blt->clear_value[2]);
117 tu_cs_emit(cs, blt->clear_value[3]);
118 } else {
119 tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_2D_SRC_INFO, 10);
120 tu_cs_emit(cs, blit_image_info(&blt->src, sfmt, blt->stencil_read) |
121 A6XX_SP_PS_2D_SRC_INFO_SAMPLES(tu_msaa_samples(blt->src.samples)) |
122 /* TODO: should disable this bit for integer formats ? */
123 COND(blt->src.samples > 1, A6XX_SP_PS_2D_SRC_INFO_SAMPLES_AVERAGE) |
124 COND(blt->filter, A6XX_SP_PS_2D_SRC_INFO_FILTER) |
125 0x500000);
126 tu_cs_emit(cs, A6XX_SP_PS_2D_SRC_SIZE_WIDTH(blt->src.x + blt->src.width) |
127 A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(blt->src.y + blt->src.height));
128 tu_cs_emit_qw(cs, blt->src.va);
129 tu_cs_emit(cs, A6XX_SP_PS_2D_SRC_PITCH_PITCH(blt->src.pitch));
130
131 tu_cs_emit(cs, 0x00000000);
132 tu_cs_emit(cs, 0x00000000);
133 tu_cs_emit(cs, 0x00000000);
134 tu_cs_emit(cs, 0x00000000);
135 tu_cs_emit(cs, 0x00000000);
136
137 if (blt->src.ubwc_size) {
138 tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_2D_SRC_FLAGS_LO, 6);
139 tu_cs_emit_qw(cs, blt->src.ubwc_va);
140 tu_cs_emit(cs, A6XX_SP_PS_2D_SRC_FLAGS_PITCH_PITCH(blt->src.ubwc_pitch) |
141 A6XX_SP_PS_2D_SRC_FLAGS_PITCH_ARRAY_PITCH(blt->src.ubwc_size >> 2));
142 tu_cs_emit(cs, 0x00000000);
143 tu_cs_emit(cs, 0x00000000);
144 tu_cs_emit(cs, 0x00000000);
145 }
146 }
147
148 /*
149 * Emit destination:
150 */
151 tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_INFO, 9);
152 tu_cs_emit(cs, blit_image_info(&blt->dst, dfmt, false));
153 tu_cs_emit_qw(cs, blt->dst.va);
154 tu_cs_emit(cs, A6XX_RB_2D_DST_SIZE_PITCH(blt->dst.pitch));
155 tu_cs_emit(cs, 0x00000000);
156 tu_cs_emit(cs, 0x00000000);
157 tu_cs_emit(cs, 0x00000000);
158 tu_cs_emit(cs, 0x00000000);
159 tu_cs_emit(cs, 0x00000000);
160
161 if (blt->dst.ubwc_size) {
162 tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_FLAGS_LO, 6);
163 tu_cs_emit_qw(cs, blt->dst.ubwc_va);
164 tu_cs_emit(cs, A6XX_RB_2D_DST_FLAGS_PITCH_PITCH(blt->dst.ubwc_pitch) |
165 A6XX_RB_2D_DST_FLAGS_PITCH_ARRAY_PITCH(blt->dst.ubwc_size >> 2));
166 tu_cs_emit(cs, 0x00000000);
167 tu_cs_emit(cs, 0x00000000);
168 tu_cs_emit(cs, 0x00000000);
169 }
170
171 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
172 tu_cs_emit(cs, A6XX_GRAS_2D_SRC_TL_X_X(blt->src.x));
173 tu_cs_emit(cs, A6XX_GRAS_2D_SRC_BR_X_X(blt->src.x + blt->src.width - 1));
174 tu_cs_emit(cs, A6XX_GRAS_2D_SRC_TL_Y_Y(blt->src.y));
175 tu_cs_emit(cs, A6XX_GRAS_2D_SRC_BR_Y_Y(blt->src.y + blt->src.height - 1));
176
177 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_2D_DST_TL, 2);
178 tu_cs_emit(cs, A6XX_GRAS_2D_DST_TL_X(blt->dst.x) |
179 A6XX_GRAS_2D_DST_TL_Y(blt->dst.y));
180 tu_cs_emit(cs, A6XX_GRAS_2D_DST_BR_X(blt->dst.x + blt->dst.width - 1) |
181 A6XX_GRAS_2D_DST_BR_Y(blt->dst.y + blt->dst.height - 1));
182
183 tu_cs_emit_pkt7(cs, CP_EVENT_WRITE, 1);
184 tu_cs_emit(cs, 0x3f);
185 tu_cs_emit_wfi(cs);
186
187 tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_8C01, 1);
188 tu_cs_emit(cs, 0);
189
190 if (dfmt.fmt == FMT6_10_10_10_2_UNORM_DEST)
191 dfmt.fmt = FMT6_16_16_16_16_FLOAT;
192
193 tu_cs_emit_pkt4(cs, REG_A6XX_SP_2D_SRC_FORMAT, 1);
194 tu_cs_emit(cs, COND(vk_format_is_sint(blt->src.fmt), A6XX_SP_2D_SRC_FORMAT_SINT) |
195 COND(vk_format_is_uint(blt->src.fmt), A6XX_SP_2D_SRC_FORMAT_UINT) |
196 A6XX_SP_2D_SRC_FORMAT_COLOR_FORMAT(dfmt.fmt) |
197 COND(ifmt == R2D_UNORM8_SRGB, A6XX_SP_2D_SRC_FORMAT_SRGB) |
198 A6XX_SP_2D_SRC_FORMAT_MASK(0xf));
199
200 tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
201 tu_cs_emit(cs, phys_dev->magic.RB_UNKNOWN_8E04_blit);
202
203 tu_cs_emit_pkt7(cs, CP_BLIT, 1);
204 tu_cs_emit(cs, CP_BLIT_0_OP(BLIT_OP_SCALE));
205
206 tu_cs_emit_wfi(cs);
207
208 tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
209 tu_cs_emit(cs, 0);
210 }
211
212 void tu_blit(struct tu_cmd_buffer *cmdbuf, struct tu_cs *cs,
213 struct tu_blit *blt)
214 {
215 struct tu_physical_device *phys_dev = cmdbuf->device->physical_device;
216
217 switch (blt->type) {
218 case TU_BLIT_COPY:
219 blt->stencil_read =
220 blt->dst.fmt == VK_FORMAT_R8_UNORM &&
221 blt->src.fmt == VK_FORMAT_D24_UNORM_S8_UINT;
222
223 assert(vk_format_get_blocksize(blt->dst.fmt) ==
224 vk_format_get_blocksize(blt->src.fmt) || blt->stencil_read);
225 assert(blt->src.samples == blt->dst.samples);
226
227 if (vk_format_is_compressed(blt->src.fmt)) {
228 unsigned block_width = vk_format_get_blockwidth(blt->src.fmt);
229 unsigned block_height = vk_format_get_blockheight(blt->src.fmt);
230
231 blt->src.pitch /= block_width;
232 blt->src.x /= block_width;
233 blt->src.y /= block_height;
234 blt->src.fmt = blit_copy_format(blt->src.fmt);
235
236 /* for image_to_image copy, width/height is on the src format */
237 blt->dst.width = blt->src.width = DIV_ROUND_UP(blt->src.width, block_width);
238 blt->dst.height = blt->src.height = DIV_ROUND_UP(blt->src.height, block_height);
239 }
240
241 if (vk_format_is_compressed(blt->dst.fmt)) {
242 unsigned block_width = vk_format_get_blockwidth(blt->dst.fmt);
243 unsigned block_height = vk_format_get_blockheight(blt->dst.fmt);
244
245 blt->dst.pitch /= block_width;
246 blt->dst.x /= block_width;
247 blt->dst.y /= block_height;
248 blt->dst.fmt = blit_copy_format(blt->dst.fmt);
249 }
250
251 if (blt->dst.fmt == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
252 blt->dst.fmt = blit_copy_format(blt->dst.fmt);
253
254 if (blt->src.fmt == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
255 blt->src.fmt = blit_copy_format(blt->src.fmt);
256
257 /* TODO: multisample image copy does not work correctly with tiling/UBWC */
258 blt->src.x *= blt->src.samples;
259 blt->dst.x *= blt->dst.samples;
260 blt->src.width *= blt->src.samples;
261 blt->dst.width *= blt->dst.samples;
262 blt->src.samples = 1;
263 blt->dst.samples = 1;
264 break;
265 case TU_BLIT_CLEAR:
266 /* unsupported format cleared as UINT32 */
267 if (blt->dst.fmt == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
268 blt->dst.fmt = VK_FORMAT_R32_UINT;
269 /* TODO: multisample image clearing also seems not to work with certain
270 * formats. The blob uses a shader-based clear in these cases.
271 */
272 blt->dst.x *= blt->dst.samples;
273 blt->dst.width *= blt->dst.samples;
274 blt->dst.samples = 1;
275 blt->src = blt->dst;
276 break;
277 default:
278 assert(blt->dst.samples == 1);
279 }
280
281 tu6_emit_event_write(cmdbuf, cs, LRZ_FLUSH, false);
282 tu6_emit_event_write(cmdbuf, cs, PC_CCU_FLUSH_COLOR_TS, true);
283 tu6_emit_event_write(cmdbuf, cs, PC_CCU_FLUSH_DEPTH_TS, true);
284 tu6_emit_event_write(cmdbuf, cs, PC_CCU_INVALIDATE_COLOR, false);
285 tu6_emit_event_write(cmdbuf, cs, PC_CCU_INVALIDATE_DEPTH, false);
286
287 tu_cs_emit_wfi(cs);
288 tu_cs_emit_regs(cs,
289 A6XX_RB_CCU_CNTL(.offset = phys_dev->ccu_offset_bypass));
290
291 /* buffer copy setup */
292 tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
293 tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
294
295 for (unsigned layer = 0; layer < blt->layers; layer++) {
296 if (blt->buffer) {
297 struct tu_blit line_blt = *blt;
298 uint64_t dst_va = line_blt.dst.va, src_va = line_blt.src.va;
299 unsigned blocksize = vk_format_get_blocksize(blt->src.fmt);
300 uint32_t size = line_blt.src.width, tmp;
301
302 while (size) {
303 line_blt.src.x = (src_va & 63) / blocksize;
304 line_blt.src.va = src_va & ~63;
305 tmp = MIN2(size, 0x4000 - line_blt.src.x);
306
307 line_blt.dst.x = (dst_va & 63) / blocksize;
308 line_blt.dst.va = dst_va & ~63;
309 tmp = MIN2(tmp, 0x4000 - line_blt.dst.x);
310
311 line_blt.src.width = line_blt.dst.width = tmp;
312
313 emit_blit_step(cmdbuf, cs, &line_blt);
314
315 src_va += tmp * blocksize;
316 dst_va += tmp * blocksize;
317 size -= tmp;
318 }
319 } else if ((blt->src.va & 63) || (blt->src.pitch & 63)) {
320 /* per line copy path (buffer_to_image) */
321 assert(blt->type == TU_BLIT_COPY && !blt->src.image_tile_mode);
322 struct tu_blit line_blt = *blt;
323 uint64_t src_va = line_blt.src.va + blt->src.pitch * blt->src.y;
324
325 line_blt.src.y = 0;
326 line_blt.src.pitch = 0;
327 line_blt.src.height = 1;
328 line_blt.dst.height = 1;
329
330 for (unsigned y = 0; y < blt->src.height; y++) {
331 line_blt.src.x = blt->src.x + (src_va & 63) / vk_format_get_blocksize(blt->src.fmt);
332 line_blt.src.va = src_va & ~63;
333
334 emit_blit_step(cmdbuf, cs, &line_blt);
335
336 line_blt.dst.y++;
337 src_va += blt->src.pitch;
338 }
339 } else if ((blt->dst.va & 63) || (blt->dst.pitch & 63)) {
340 /* per line copy path (image_to_buffer) */
341 assert(blt->type == TU_BLIT_COPY && !blt->dst.image_tile_mode);
342 struct tu_blit line_blt = *blt;
343 uint64_t dst_va = line_blt.dst.va + blt->dst.pitch * blt->dst.y;
344
345 line_blt.dst.y = 0;
346 line_blt.dst.pitch = 0;
347 line_blt.src.height = 1;
348 line_blt.dst.height = 1;
349
350 for (unsigned y = 0; y < blt->src.height; y++) {
351 line_blt.dst.x = blt->dst.x + (dst_va & 63) / vk_format_get_blocksize(blt->dst.fmt);
352 line_blt.dst.va = dst_va & ~63;
353
354 emit_blit_step(cmdbuf, cs, &line_blt);
355
356 line_blt.src.y++;
357 dst_va += blt->dst.pitch;
358 }
359 } else {
360 emit_blit_step(cmdbuf, cs, blt);
361 }
362 blt->dst.va += blt->dst.layer_size;
363 blt->src.va += blt->src.layer_size;
364 blt->dst.ubwc_va += blt->dst.ubwc_size;
365 blt->src.ubwc_va += blt->src.ubwc_size;
366 }
367
368 tu6_emit_event_write(cmdbuf, cs, PC_CCU_FLUSH_COLOR_TS, true);
369 tu6_emit_event_write(cmdbuf, cs, PC_CCU_FLUSH_DEPTH_TS, true);
370 tu6_emit_event_write(cmdbuf, cs, CACHE_FLUSH_TS, true);
371 tu6_emit_event_write(cmdbuf, cs, CACHE_INVALIDATE, false);
372 }