turnip: Add todo for d24_s8 copies
[mesa.git] / src / freedreno / vulkan / tu_meta_copy.c
1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "tu_private.h"
25
26 #include "a6xx.xml.h"
27 #include "adreno_common.xml.h"
28 #include "adreno_pm4.xml.h"
29
30 #include "vk_format.h"
31
32 #include "tu_cs.h"
33
34 /*
35 * TODO:
36 * - 3D textures
37 * - compressed image formats (need to divide offset/extent)
38 * - Fix d24_unorm_s8_uint support & aspects
39 */
40
41 static uint32_t
42 blit_control(enum a6xx_color_fmt fmt)
43 {
44 unsigned blit_cntl = 0xf00000;
45 blit_cntl |= A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(fmt);
46 blit_cntl |= A6XX_RB_2D_BLIT_CNTL_IFMT(tu6_rb_fmt_to_ifmt(fmt));
47 return blit_cntl;
48 }
49
50 static uint32_t tu6_sp_2d_src_format(VkFormat format)
51 {
52 const struct vk_format_description *desc = vk_format_description(format);
53 uint32_t reg = 0xf000 | A6XX_SP_2D_SRC_FORMAT_COLOR_FORMAT(tu6_get_native_format(format)->rb);
54
55 int channel = vk_format_get_first_non_void_channel(format);
56 if (channel < 0) {
57 /* TODO special format. */
58 return reg;
59 }
60 if (desc->channel[channel].normalized) {
61 if (desc->channel[channel].type == VK_FORMAT_TYPE_SIGNED)
62 reg |= A6XX_SP_2D_SRC_FORMAT_SINT;
63 reg |= A6XX_SP_2D_SRC_FORMAT_NORM;
64 } else if (desc->channel[channel].pure_integer) {
65 if (desc->channel[channel].type == VK_FORMAT_TYPE_SIGNED)
66 reg |= A6XX_SP_2D_SRC_FORMAT_SINT;
67 else
68 reg |= A6XX_SP_2D_SRC_FORMAT_UINT;
69 }
70 return reg;
71 }
72
73 static void
74 tu_dma_prepare(struct tu_cmd_buffer *cmdbuf)
75 {
76 tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 10);
77
78 tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1);
79 tu_cs_emit(&cmdbuf->cs, PC_CCU_INVALIDATE_COLOR);
80
81 tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1);
82 tu_cs_emit(&cmdbuf->cs, LRZ_FLUSH);
83
84 tu_cs_emit_pkt7(&cmdbuf->cs, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
85 tu_cs_emit(&cmdbuf->cs, 0x0);
86
87 tu_cs_emit_wfi(&cmdbuf->cs);
88
89 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_CCU_CNTL, 1);
90 tu_cs_emit(&cmdbuf->cs, 0x10000000);
91 }
92
93 /* Always use UINT formats to avoid precision issues.
94 *
95 * Example failure it avoids:
96 * - dEQP-VK.api.copy_and_blit.core.image_to_image.all_formats.color.r16_unorm.r16_unorm.general_general
97 */
98 static VkFormat
99 tu_canonical_copy_format(VkFormat format)
100 {
101 switch (vk_format_get_blocksizebits(format)) {
102 case 8:
103 return VK_FORMAT_R8_UINT;
104 case 16:
105 return VK_FORMAT_R16_UINT;
106 case 32:
107 return VK_FORMAT_R32_UINT;
108 case 64:
109 return VK_FORMAT_R32G32_UINT;
110 case 96:
111 return VK_FORMAT_R32G32B32_UINT;
112 case 128:
113 return VK_FORMAT_R32G32B32A32_UINT;
114 default:
115 unreachable("unhandled format size");
116 }
117 }
118
119 static void
120 tu_copy_buffer(struct tu_cmd_buffer *cmdbuf,
121 struct tu_bo *src_bo,
122 uint64_t src_offset,
123 struct tu_bo *dst_bo,
124 uint64_t dst_offset,
125 uint64_t size)
126 {
127 const unsigned max_size_per_iter = 0x4000 - 0x40;
128 const unsigned max_iterations =
129 (size + max_size_per_iter) / max_size_per_iter;
130
131 tu_bo_list_add(&cmdbuf->bo_list, src_bo, MSM_SUBMIT_BO_READ);
132 tu_bo_list_add(&cmdbuf->bo_list, dst_bo, MSM_SUBMIT_BO_WRITE);
133
134 tu_dma_prepare(cmdbuf);
135
136 tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 21 + 48 * max_iterations);
137
138 /* buffer copy setup */
139 tu_cs_emit_pkt7(&cmdbuf->cs, CP_SET_MARKER, 1);
140 tu_cs_emit(&cmdbuf->cs, A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
141
142 const uint32_t blit_cntl = blit_control(RB6_R8_UNORM) | 0x20000000;
143
144 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_BLIT_CNTL, 1);
145 tu_cs_emit(&cmdbuf->cs, blit_cntl);
146
147 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
148 tu_cs_emit(&cmdbuf->cs, blit_cntl);
149
150 for (; size;) {
151 uint64_t src_va = src_bo->iova + src_offset;
152 uint64_t dst_va = dst_bo->iova + dst_offset;
153
154 unsigned src_shift = src_va & 0x3f;
155 unsigned dst_shift = dst_va & 0x3f;
156 unsigned max_shift = MAX2(src_shift, dst_shift);
157
158 src_va -= src_shift;
159 dst_va -= dst_shift;
160
161 uint32_t size_todo = MIN2(0x4000 - max_shift, size);
162 unsigned pitch = (size_todo + max_shift + 63) & ~63;
163
164 /*
165 * Emit source:
166 */
167 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_PS_2D_SRC_INFO, 13);
168 tu_cs_emit(&cmdbuf->cs,
169 A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(RB6_R8_UNORM) |
170 A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(TILE6_LINEAR) |
171 A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(WZYX) | 0x500000);
172 tu_cs_emit(&cmdbuf->cs,
173 A6XX_SP_PS_2D_SRC_SIZE_WIDTH(src_shift + size_todo) |
174 A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(1)); /* SP_PS_2D_SRC_SIZE */
175 tu_cs_emit_qw(&cmdbuf->cs, src_va);
176 tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_PITCH_PITCH(pitch));
177
178 tu_cs_emit(&cmdbuf->cs, 0x00000000);
179 tu_cs_emit(&cmdbuf->cs, 0x00000000);
180 tu_cs_emit(&cmdbuf->cs, 0x00000000);
181 tu_cs_emit(&cmdbuf->cs, 0x00000000);
182 tu_cs_emit(&cmdbuf->cs, 0x00000000);
183
184 tu_cs_emit(&cmdbuf->cs, 0x00000000);
185 tu_cs_emit(&cmdbuf->cs, 0x00000000);
186 tu_cs_emit(&cmdbuf->cs, 0x00000000);
187
188 /*
189 * Emit destination:
190 */
191 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_DST_INFO, 9);
192 tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(RB6_R8_UNORM) |
193 A6XX_RB_2D_DST_INFO_TILE_MODE(TILE6_LINEAR) |
194 A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX));
195 tu_cs_emit_qw(&cmdbuf->cs, dst_va);
196
197 tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_SIZE_PITCH(pitch));
198 tu_cs_emit(&cmdbuf->cs, 0x00000000);
199 tu_cs_emit(&cmdbuf->cs, 0x00000000);
200 tu_cs_emit(&cmdbuf->cs, 0x00000000);
201 tu_cs_emit(&cmdbuf->cs, 0x00000000);
202 tu_cs_emit(&cmdbuf->cs, 0x00000000);
203
204 /*
205 * Blit command:
206 */
207 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
208 tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_X_X(src_shift));
209 tu_cs_emit(&cmdbuf->cs,
210 A6XX_GRAS_2D_SRC_BR_X_X(src_shift + size_todo - 1));
211 tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_Y_Y(0));
212 tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_BR_Y_Y(0));
213
214 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_DST_TL, 2);
215 tu_cs_emit(&cmdbuf->cs,
216 A6XX_GRAS_2D_DST_TL_X(dst_shift) | A6XX_GRAS_2D_DST_TL_Y(0));
217 tu_cs_emit(&cmdbuf->cs,
218 A6XX_GRAS_2D_DST_BR_X(dst_shift + size_todo - 1) |
219 A6XX_GRAS_2D_DST_BR_Y(0));
220
221 tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1);
222 tu_cs_emit(&cmdbuf->cs, 0x3f);
223 tu_cs_emit_wfi(&cmdbuf->cs);
224
225 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8C01, 1);
226 tu_cs_emit(&cmdbuf->cs, 0);
227
228 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_2D_SRC_FORMAT, 1);
229 tu_cs_emit(&cmdbuf->cs, 0xf180);
230
231 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
232 tu_cs_emit(&cmdbuf->cs, 0x01000000);
233
234 tu_cs_emit_pkt7(&cmdbuf->cs, CP_BLIT, 1);
235 tu_cs_emit(&cmdbuf->cs, CP_BLIT_0_OP(BLIT_OP_SCALE));
236
237 tu_cs_emit_wfi(&cmdbuf->cs);
238
239 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
240 tu_cs_emit(&cmdbuf->cs, 0);
241
242 src_offset += size_todo;
243 dst_offset += size_todo;
244 size -= size_todo;
245 }
246
247 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, 0x1d, true);
248 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, FACENESS_FLUSH, true);
249 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_FLUSH_TS, true);
250 }
251
252 static void
253 tu_copy_buffer_to_image_step(struct tu_cmd_buffer *cmdbuf,
254 struct tu_buffer *src_buffer,
255 struct tu_image *dst_image,
256 const VkBufferImageCopy *copy_info,
257 VkFormat format,
258 uint32_t layer,
259 uint64_t src_va)
260 {
261 const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb;
262
263 uint64_t dst_va = dst_image->bo->iova + dst_image->bo_offset + dst_image->layer_size * layer + dst_image->levels[copy_info->imageSubresource.mipLevel].offset;
264 unsigned dst_pitch = dst_image->levels[copy_info->imageSubresource.mipLevel].pitch *
265 vk_format_get_blocksize(format);
266
267 unsigned src_pitch;
268 unsigned src_offset = 0;
269 if (copy_info->imageExtent.height == 1) {
270 /* Can't find this in the spec, but not having it is sort of insane? */
271 assert(src_va % vk_format_get_blocksize(format) == 0);
272
273 src_offset = (src_va & 63) / vk_format_get_blocksize(format);
274 src_va &= ~63;
275
276 src_pitch = align((src_offset + copy_info->imageExtent.width) * vk_format_get_blocksize(format), 64);
277 } else {
278 unsigned src_pixel_stride = copy_info->bufferRowLength
279 ? copy_info->bufferRowLength
280 : copy_info->imageExtent.width;
281 src_pitch = src_pixel_stride * vk_format_get_blocksize(format);
282 assert(!(src_pitch & 63));
283 assert(!(src_va & 63));
284 }
285
286 tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 48);
287
288 /*
289 * Emit source:
290 */
291 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_PS_2D_SRC_INFO, 13);
292 tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(rb_fmt) |
293 A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(TILE6_LINEAR) |
294 A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(WZYX) |
295 0x500000);
296 tu_cs_emit(&cmdbuf->cs,
297 A6XX_SP_PS_2D_SRC_SIZE_WIDTH(src_offset + copy_info->imageExtent.width) |
298 A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(
299 copy_info->imageExtent.height)); /* SP_PS_2D_SRC_SIZE */
300 tu_cs_emit_qw(&cmdbuf->cs, src_va);
301 tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_PITCH_PITCH(src_pitch));
302
303 tu_cs_emit(&cmdbuf->cs, 0x00000000);
304 tu_cs_emit(&cmdbuf->cs, 0x00000000);
305 tu_cs_emit(&cmdbuf->cs, 0x00000000);
306 tu_cs_emit(&cmdbuf->cs, 0x00000000);
307 tu_cs_emit(&cmdbuf->cs, 0x00000000);
308
309 tu_cs_emit(&cmdbuf->cs, 0x00000000);
310 tu_cs_emit(&cmdbuf->cs, 0x00000000);
311 tu_cs_emit(&cmdbuf->cs, 0x00000000);
312
313 /*
314 * Emit destination:
315 */
316 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_DST_INFO, 9);
317 tu_cs_emit(&cmdbuf->cs,
318 A6XX_RB_2D_DST_INFO_COLOR_FORMAT(rb_fmt) |
319 A6XX_RB_2D_DST_INFO_TILE_MODE(dst_image->tile_mode) |
320 A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX));
321 tu_cs_emit_qw(&cmdbuf->cs, dst_va);
322 tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_SIZE_PITCH(dst_pitch));
323 tu_cs_emit(&cmdbuf->cs, 0x00000000);
324 tu_cs_emit(&cmdbuf->cs, 0x00000000);
325 tu_cs_emit(&cmdbuf->cs, 0x00000000);
326 tu_cs_emit(&cmdbuf->cs, 0x00000000);
327 tu_cs_emit(&cmdbuf->cs, 0x00000000);
328
329 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
330 tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_X_X(src_offset));
331 tu_cs_emit(&cmdbuf->cs,
332 A6XX_GRAS_2D_SRC_BR_X_X(src_offset + copy_info->imageExtent.width - 1));
333 tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_Y_Y(0));
334 tu_cs_emit(&cmdbuf->cs,
335 A6XX_GRAS_2D_SRC_BR_Y_Y(copy_info->imageExtent.height - 1));
336
337 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_DST_TL, 2);
338 tu_cs_emit(&cmdbuf->cs,
339 A6XX_GRAS_2D_DST_TL_X(copy_info->imageOffset.x) |
340 A6XX_GRAS_2D_DST_TL_Y(copy_info->imageOffset.y));
341 tu_cs_emit(&cmdbuf->cs,
342 A6XX_GRAS_2D_DST_BR_X(copy_info->imageOffset.x +
343 copy_info->imageExtent.width - 1) |
344 A6XX_GRAS_2D_DST_BR_Y(copy_info->imageOffset.y +
345 copy_info->imageExtent.height - 1));
346
347 tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1);
348 tu_cs_emit(&cmdbuf->cs, 0x3f);
349 tu_cs_emit_wfi(&cmdbuf->cs);
350
351 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8C01, 1);
352 tu_cs_emit(&cmdbuf->cs, 0);
353
354 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_2D_SRC_FORMAT, 1);
355 tu_cs_emit(&cmdbuf->cs, tu6_sp_2d_src_format(format));
356
357 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
358 tu_cs_emit(&cmdbuf->cs, 0x01000000);
359
360 tu_cs_emit_pkt7(&cmdbuf->cs, CP_BLIT, 1);
361 tu_cs_emit(&cmdbuf->cs, CP_BLIT_0_OP(BLIT_OP_SCALE));
362
363 tu_cs_emit_wfi(&cmdbuf->cs);
364
365 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
366 tu_cs_emit(&cmdbuf->cs, 0);
367 }
368
369 static void
370 tu_copy_buffer_to_image(struct tu_cmd_buffer *cmdbuf,
371 struct tu_buffer *src_buffer,
372 struct tu_image *dst_image,
373 const VkBufferImageCopy *copy_info)
374 {
375 tu_bo_list_add(&cmdbuf->bo_list, src_buffer->bo, MSM_SUBMIT_BO_READ);
376 tu_bo_list_add(&cmdbuf->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE);
377
378 /* general setup */
379 tu_dma_prepare(cmdbuf);
380
381 tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 6);
382
383 /* buffer copy setup */
384 tu_cs_emit_pkt7(&cmdbuf->cs, CP_SET_MARKER, 1);
385 tu_cs_emit(&cmdbuf->cs, A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
386
387 VkFormat format = tu_canonical_copy_format(dst_image->vk_format);
388 const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb;
389
390 const uint32_t blit_cntl = blit_control(rb_fmt) | 0x20000000;
391
392 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_BLIT_CNTL, 1);
393 tu_cs_emit(&cmdbuf->cs, blit_cntl);
394
395 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
396 tu_cs_emit(&cmdbuf->cs, blit_cntl);
397
398 unsigned src_pixel_stride = copy_info->bufferRowLength
399 ? copy_info->bufferRowLength
400 : copy_info->imageExtent.width;
401 unsigned cpp = vk_format_get_blocksize(format);
402 unsigned src_pitch = src_pixel_stride * cpp;
403
404 for (unsigned layer_offset = 0; layer_offset < copy_info->imageSubresource.layerCount; ++layer_offset) {
405 unsigned layer = copy_info->imageSubresource.baseArrayLayer + layer_offset;
406 uint64_t src_va = src_buffer->bo->iova + src_buffer->bo_offset + copy_info->bufferOffset + layer_offset * copy_info->bufferImageHeight * src_pitch;
407
408 if ((src_pitch & 63) || (src_va & 63)) {
409 /* Do a per line copy */
410 VkBufferImageCopy line_copy_info = *copy_info;
411 line_copy_info.imageExtent.height = 1;
412 for (unsigned r = 0; r < copy_info->imageExtent.height; ++r) {
413 /*
414 * if src_va is not aligned the line copy will need to adjust. Give it
415 * room to do so.
416 */
417 unsigned max_width = 16384 - (src_va & 0x3f) ? 64 : 0;
418 line_copy_info.imageOffset.x = copy_info->imageOffset.x;
419 line_copy_info.imageExtent.width = copy_info->imageExtent.width;
420
421 for (unsigned c = 0; c < copy_info->imageExtent.width; c += max_width) {
422 tu_copy_buffer_to_image_step(cmdbuf, src_buffer, dst_image, &line_copy_info, format, layer, src_va + c * cpp);
423
424 line_copy_info.imageOffset.x += max_width;
425 line_copy_info.imageExtent.width -= max_width;
426 }
427
428 line_copy_info.imageOffset.y++;
429 src_va += src_pitch;
430 }
431 } else {
432 tu_copy_buffer_to_image_step(cmdbuf, src_buffer, dst_image, copy_info, format, layer, src_va);
433 }
434 }
435
436 tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 15);
437
438 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, 0x1d, true);
439 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, FACENESS_FLUSH, true);
440 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_FLUSH_TS, true);
441 }
442
443 static void
444 tu_copy_image_to_buffer_step(struct tu_cmd_buffer *cmdbuf,
445 struct tu_image *src_image,
446 struct tu_buffer *dst_buffer,
447 const VkBufferImageCopy *copy_info,
448 VkFormat format,
449 uint32_t layer,
450 uint64_t dst_va)
451 {
452 const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb;
453
454 uint64_t src_va = src_image->bo->iova + src_image->bo_offset + src_image->layer_size * layer + src_image->levels[copy_info->imageSubresource.mipLevel].offset;
455 unsigned src_pitch = src_image->levels[copy_info->imageSubresource.mipLevel].pitch *
456 vk_format_get_blocksize(format);
457
458 unsigned dst_pitch;
459 unsigned dst_offset = 0;
460 if (copy_info->imageExtent.height == 1) {
461 /* Can't find this in the spec, but not having it is sort of insane? */
462 assert(dst_va % vk_format_get_blocksize(format) == 0);
463
464 dst_offset = (dst_va & 63) / vk_format_get_blocksize(format);
465 dst_va &= ~63;
466
467 dst_pitch = align((dst_offset + copy_info->imageExtent.width) * vk_format_get_blocksize(format), 64);
468 } else {
469 unsigned dst_pixel_stride = copy_info->bufferRowLength
470 ? copy_info->bufferRowLength
471 : copy_info->imageExtent.width;
472 dst_pitch = dst_pixel_stride * vk_format_get_blocksize(format);
473 assert(!(dst_pitch & 63));
474 assert(!(dst_va & 63));
475 }
476
477
478 tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 48);
479
480 /*
481 * Emit source:
482 */
483 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_PS_2D_SRC_INFO, 13);
484 tu_cs_emit(&cmdbuf->cs,
485 A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(rb_fmt) |
486 A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(src_image->tile_mode) |
487 A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(WZYX) | 0x500000);
488 tu_cs_emit(&cmdbuf->cs,
489 A6XX_SP_PS_2D_SRC_SIZE_WIDTH(src_image->extent.width) |
490 A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(
491 src_image->extent.height)); /* SP_PS_2D_SRC_SIZE */
492 tu_cs_emit_qw(&cmdbuf->cs, src_va);
493 tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_PITCH_PITCH(src_pitch));
494
495 tu_cs_emit(&cmdbuf->cs, 0x00000000);
496 tu_cs_emit(&cmdbuf->cs, 0x00000000);
497 tu_cs_emit(&cmdbuf->cs, 0x00000000);
498 tu_cs_emit(&cmdbuf->cs, 0x00000000);
499 tu_cs_emit(&cmdbuf->cs, 0x00000000);
500
501 tu_cs_emit(&cmdbuf->cs, 0x00000000);
502 tu_cs_emit(&cmdbuf->cs, 0x00000000);
503 tu_cs_emit(&cmdbuf->cs, 0x00000000);
504
505 /*
506 * Emit destination:
507 */
508 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_DST_INFO, 9);
509 tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(rb_fmt) |
510 A6XX_RB_2D_DST_INFO_TILE_MODE(TILE6_LINEAR) |
511 A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX));
512 tu_cs_emit_qw(&cmdbuf->cs, dst_va);
513 tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_SIZE_PITCH(dst_pitch));
514 tu_cs_emit(&cmdbuf->cs, 0x00000000);
515 tu_cs_emit(&cmdbuf->cs, 0x00000000);
516 tu_cs_emit(&cmdbuf->cs, 0x00000000);
517 tu_cs_emit(&cmdbuf->cs, 0x00000000);
518 tu_cs_emit(&cmdbuf->cs, 0x00000000);
519
520 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
521 tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_X_X(copy_info->imageOffset.x));
522 tu_cs_emit(&cmdbuf->cs,
523 A6XX_GRAS_2D_SRC_BR_X_X(copy_info->imageOffset.x +
524 copy_info->imageExtent.width - 1));
525 tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_Y_Y(copy_info->imageOffset.y));
526 tu_cs_emit(&cmdbuf->cs,
527 A6XX_GRAS_2D_SRC_BR_Y_Y(copy_info->imageOffset.y +
528 copy_info->imageExtent.height - 1));
529
530 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_DST_TL, 2);
531 tu_cs_emit(&cmdbuf->cs,
532 A6XX_GRAS_2D_DST_TL_X(dst_offset) | A6XX_GRAS_2D_DST_TL_Y(0));
533 tu_cs_emit(&cmdbuf->cs,
534 A6XX_GRAS_2D_DST_BR_X(dst_offset + copy_info->imageExtent.width - 1) |
535 A6XX_GRAS_2D_DST_BR_Y(copy_info->imageExtent.height - 1));
536
537 tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1);
538 tu_cs_emit(&cmdbuf->cs, 0x3f);
539 tu_cs_emit_wfi(&cmdbuf->cs);
540
541 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8C01, 1);
542 tu_cs_emit(&cmdbuf->cs, 0);
543
544 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_2D_SRC_FORMAT, 1);
545 tu_cs_emit(&cmdbuf->cs, tu6_sp_2d_src_format(format));
546
547 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
548 tu_cs_emit(&cmdbuf->cs, 0x01000000);
549
550 tu_cs_emit_pkt7(&cmdbuf->cs, CP_BLIT, 1);
551 tu_cs_emit(&cmdbuf->cs, CP_BLIT_0_OP(BLIT_OP_SCALE));
552
553 tu_cs_emit_wfi(&cmdbuf->cs);
554
555 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
556 tu_cs_emit(&cmdbuf->cs, 0);
557 }
558
559 static void
560 tu_copy_image_to_buffer(struct tu_cmd_buffer *cmdbuf,
561 struct tu_image *src_image,
562 struct tu_buffer *dst_buffer,
563 const VkBufferImageCopy *copy_info)
564 {
565 tu_bo_list_add(&cmdbuf->bo_list, src_image->bo, MSM_SUBMIT_BO_READ);
566 tu_bo_list_add(&cmdbuf->bo_list, dst_buffer->bo, MSM_SUBMIT_BO_WRITE);
567
568 /* general setup */
569 tu_dma_prepare(cmdbuf);
570
571 tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 6);
572
573 /* buffer copy setup */
574 tu_cs_emit_pkt7(&cmdbuf->cs, CP_SET_MARKER, 1);
575 tu_cs_emit(&cmdbuf->cs, A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
576
577 VkFormat format = tu_canonical_copy_format(src_image->vk_format);
578 const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb;
579
580 unsigned dst_pixel_stride = copy_info->bufferRowLength
581 ? copy_info->bufferRowLength
582 : copy_info->imageExtent.width;
583 unsigned cpp = vk_format_get_blocksize(format);
584 unsigned dst_pitch = dst_pixel_stride * cpp;
585
586
587 const uint32_t blit_cntl = blit_control(rb_fmt) | 0x20000000;
588
589 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_BLIT_CNTL, 1);
590 tu_cs_emit(&cmdbuf->cs, blit_cntl);
591
592 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
593 tu_cs_emit(&cmdbuf->cs, blit_cntl);
594
595 for (unsigned layer_offset = 0; layer_offset < copy_info->imageSubresource.layerCount; ++layer_offset) {
596 unsigned layer = copy_info->imageSubresource.baseArrayLayer + layer_offset;
597 uint64_t dst_va = dst_buffer->bo->iova + dst_buffer->bo_offset + copy_info->bufferOffset + layer_offset * copy_info->bufferImageHeight * dst_pitch;
598
599 if ((dst_pitch & 63) || (dst_va & 63)) {
600 /* Do a per line copy */
601 VkBufferImageCopy line_copy_info = *copy_info;
602 line_copy_info.imageExtent.height = 1;
603 for (unsigned r = 0; r < copy_info->imageExtent.height; ++r) {
604 /*
605 * if dst_va is not aligned the line copy will need to adjust. Give it
606 * room to do so.
607 */
608 unsigned max_width = 16384 - (dst_va & 0x3f) ? 64 : 0;
609 line_copy_info.imageOffset.x = copy_info->imageOffset.x;
610 line_copy_info.imageExtent.width = copy_info->imageExtent.width;
611
612 for (unsigned c = 0; c < copy_info->imageExtent.width; c += max_width) {
613 tu_copy_image_to_buffer_step(cmdbuf, src_image, dst_buffer, &line_copy_info, format, layer, dst_va + c * cpp);
614
615 line_copy_info.imageOffset.x += max_width;
616 line_copy_info.imageExtent.width -= max_width;
617 }
618
619 line_copy_info.imageOffset.y++;
620 dst_va += dst_pitch;
621 }
622 } else {
623 tu_copy_image_to_buffer_step(cmdbuf, src_image, dst_buffer, copy_info, format, layer, dst_va);
624 }
625 }
626
627 tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 15);
628
629 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, 0x1d, true);
630 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, FACENESS_FLUSH, true);
631 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_FLUSH_TS, true);
632 }
633
634 static void
635 tu_copy_image_to_image_step(struct tu_cmd_buffer *cmdbuf,
636 struct tu_image *src_image,
637 struct tu_image *dst_image,
638 const VkImageCopy *copy_info,
639 VkFormat format,
640 uint32_t layer_offset)
641 {
642 const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb;
643
644 unsigned src_layer =
645 copy_info->srcSubresource.baseArrayLayer + layer_offset;
646 uint64_t src_va =
647 src_image->bo->iova + src_image->bo_offset +
648 src_image->layer_size * src_layer +
649 src_image->levels[copy_info->srcSubresource.mipLevel].offset;
650 unsigned src_pitch =
651 src_image->levels[copy_info->srcSubresource.mipLevel].pitch *
652 vk_format_get_blocksize(format);
653
654 unsigned dst_layer =
655 copy_info->dstSubresource.baseArrayLayer + layer_offset;
656 uint64_t dst_va =
657 dst_image->bo->iova + dst_image->bo_offset +
658 dst_image->layer_size * dst_layer +
659 dst_image->levels[copy_info->dstSubresource.mipLevel].offset;
660 unsigned dst_pitch =
661 src_image->levels[copy_info->dstSubresource.mipLevel].pitch *
662 vk_format_get_blocksize(format);
663
664 tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 48);
665
666 /*
667 * Emit source:
668 */
669 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_PS_2D_SRC_INFO, 13);
670 tu_cs_emit(&cmdbuf->cs,
671 A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(rb_fmt) |
672 A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(src_image->tile_mode) |
673 A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(WZYX) | 0x500000);
674 tu_cs_emit(&cmdbuf->cs,
675 A6XX_SP_PS_2D_SRC_SIZE_WIDTH(src_image->extent.width) |
676 A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(
677 src_image->extent.height)); /* SP_PS_2D_SRC_SIZE */
678 tu_cs_emit_qw(&cmdbuf->cs, src_va);
679 tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_PITCH_PITCH(src_pitch));
680
681 tu_cs_emit(&cmdbuf->cs, 0x00000000);
682 tu_cs_emit(&cmdbuf->cs, 0x00000000);
683 tu_cs_emit(&cmdbuf->cs, 0x00000000);
684 tu_cs_emit(&cmdbuf->cs, 0x00000000);
685 tu_cs_emit(&cmdbuf->cs, 0x00000000);
686
687 tu_cs_emit(&cmdbuf->cs, 0x00000000);
688 tu_cs_emit(&cmdbuf->cs, 0x00000000);
689 tu_cs_emit(&cmdbuf->cs, 0x00000000);
690
691 /*
692 * Emit destination:
693 */
694 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_DST_INFO, 9);
695 tu_cs_emit(&cmdbuf->cs,
696 A6XX_RB_2D_DST_INFO_COLOR_FORMAT(rb_fmt) |
697 A6XX_RB_2D_DST_INFO_TILE_MODE(dst_image->tile_mode) |
698 A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX));
699 tu_cs_emit_qw(&cmdbuf->cs, dst_va);
700 tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_SIZE_PITCH(dst_pitch));
701 tu_cs_emit(&cmdbuf->cs, 0x00000000);
702 tu_cs_emit(&cmdbuf->cs, 0x00000000);
703 tu_cs_emit(&cmdbuf->cs, 0x00000000);
704 tu_cs_emit(&cmdbuf->cs, 0x00000000);
705 tu_cs_emit(&cmdbuf->cs, 0x00000000);
706
707 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
708 tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_X_X(copy_info->srcOffset.x));
709 tu_cs_emit(&cmdbuf->cs,
710 A6XX_GRAS_2D_SRC_BR_X_X(copy_info->srcOffset.x +
711 copy_info->extent.width - 1));
712 tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_Y_Y(copy_info->srcOffset.y));
713 tu_cs_emit(&cmdbuf->cs,
714 A6XX_GRAS_2D_SRC_BR_Y_Y(copy_info->srcOffset.y +
715 copy_info->extent.height - 1));
716
717 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_DST_TL, 2);
718 tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_DST_TL_X(copy_info->dstOffset.x) |
719 A6XX_GRAS_2D_DST_TL_Y(copy_info->dstOffset.y));
720 tu_cs_emit(&cmdbuf->cs,
721 A6XX_GRAS_2D_DST_BR_X(copy_info->dstOffset.x +
722 copy_info->extent.width - 1) |
723 A6XX_GRAS_2D_DST_BR_Y(copy_info->dstOffset.y +
724 copy_info->extent.height - 1));
725
726 tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1);
727 tu_cs_emit(&cmdbuf->cs, 0x3f);
728 tu_cs_emit_wfi(&cmdbuf->cs);
729
730 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8C01, 1);
731 tu_cs_emit(&cmdbuf->cs, 0);
732
733 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_2D_SRC_FORMAT, 1);
734 tu_cs_emit(&cmdbuf->cs, tu6_sp_2d_src_format(format));
735
736 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
737 tu_cs_emit(&cmdbuf->cs, 0x01000000);
738
739 tu_cs_emit_pkt7(&cmdbuf->cs, CP_BLIT, 1);
740 tu_cs_emit(&cmdbuf->cs, CP_BLIT_0_OP(BLIT_OP_SCALE));
741
742 tu_cs_emit_wfi(&cmdbuf->cs);
743
744 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
745 tu_cs_emit(&cmdbuf->cs, 0);
746 }
747
748 static void
749 tu_copy_image_to_image(struct tu_cmd_buffer *cmdbuf,
750 struct tu_image *src_image,
751 struct tu_image *dst_image,
752 const VkImageCopy *copy_info)
753 {
754 /* TODO:
755 * - Handle 3D images.
756 * - In some cases where src and dst format are different this may
757 * have tiling implications. Not sure if things happen correctly
758 * in that case.
759 */
760
761 tu_bo_list_add(&cmdbuf->bo_list, src_image->bo, MSM_SUBMIT_BO_READ);
762 tu_bo_list_add(&cmdbuf->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE);
763
764 /* general setup */
765 tu_dma_prepare(cmdbuf);
766
767 tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 6);
768
769 /* buffer copy setup */
770 tu_cs_emit_pkt7(&cmdbuf->cs, CP_SET_MARKER, 1);
771 tu_cs_emit(&cmdbuf->cs, A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
772
773 VkFormat format = tu_canonical_copy_format(src_image->vk_format);
774 const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb;
775 const uint32_t blit_cntl = blit_control(rb_fmt) | 0x20000000;
776
777 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_BLIT_CNTL, 1);
778 tu_cs_emit(&cmdbuf->cs, blit_cntl);
779
780 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
781 tu_cs_emit(&cmdbuf->cs, blit_cntl);
782
783 for (unsigned layer_offset = 0;
784 layer_offset < copy_info->srcSubresource.layerCount; ++layer_offset) {
785 tu_copy_image_to_image_step(cmdbuf, src_image, dst_image, copy_info,
786 format, layer_offset);
787 }
788
789 tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 15);
790
791 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, 0x1d, true);
792 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, FACENESS_FLUSH, true);
793 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_FLUSH_TS, true);
794 }
795
796 void
797 tu_CmdCopyBuffer(VkCommandBuffer commandBuffer,
798 VkBuffer srcBuffer,
799 VkBuffer destBuffer,
800 uint32_t regionCount,
801 const VkBufferCopy *pRegions)
802 {
803 TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
804 TU_FROM_HANDLE(tu_buffer, src_buffer, srcBuffer);
805 TU_FROM_HANDLE(tu_buffer, dst_buffer, destBuffer);
806
807 for (unsigned i = 0; i < regionCount; ++i) {
808 uint64_t src_offset = src_buffer->bo_offset + pRegions[i].srcOffset;
809 uint64_t dst_offset = dst_buffer->bo_offset + pRegions[i].dstOffset;
810
811 tu_copy_buffer(cmdbuf, src_buffer->bo, src_offset, dst_buffer->bo,
812 dst_offset, pRegions[i].size);
813 }
814 }
815
816 void
817 tu_CmdCopyBufferToImage(VkCommandBuffer commandBuffer,
818 VkBuffer srcBuffer,
819 VkImage destImage,
820 VkImageLayout destImageLayout,
821 uint32_t regionCount,
822 const VkBufferImageCopy *pRegions)
823 {
824 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
825 TU_FROM_HANDLE(tu_image, dest_image, destImage);
826 TU_FROM_HANDLE(tu_buffer, src_buffer, srcBuffer);
827
828 for (unsigned i = 0; i < regionCount; ++i) {
829 tu_copy_buffer_to_image(cmd_buffer, src_buffer, dest_image,
830 pRegions + i);
831 }
832 }
833
834 void
835 tu_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer,
836 VkImage srcImage,
837 VkImageLayout srcImageLayout,
838 VkBuffer destBuffer,
839 uint32_t regionCount,
840 const VkBufferImageCopy *pRegions)
841 {
842 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
843 TU_FROM_HANDLE(tu_image, src_image, srcImage);
844 TU_FROM_HANDLE(tu_buffer, dst_buffer, destBuffer);
845
846 for (unsigned i = 0; i < regionCount; ++i) {
847 tu_copy_image_to_buffer(cmd_buffer, src_image, dst_buffer,
848 pRegions + i);
849 }
850 }
851
852 void
853 tu_CmdCopyImage(VkCommandBuffer commandBuffer,
854 VkImage srcImage,
855 VkImageLayout srcImageLayout,
856 VkImage destImage,
857 VkImageLayout destImageLayout,
858 uint32_t regionCount,
859 const VkImageCopy *pRegions)
860 {
861 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
862 TU_FROM_HANDLE(tu_image, src_image, srcImage);
863 TU_FROM_HANDLE(tu_image, dest_image, destImage);
864
865 for (uint32_t i = 0; i < regionCount; ++i) {
866 tu_copy_image_to_image(cmd_buffer, src_image, dest_image, pRegions + i);
867 }
868 }