turnip: Add buffer->image DMA copies.
[mesa.git] / src / freedreno / vulkan / tu_meta_copy.c
1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "tu_private.h"
25
26 #include "a6xx.xml.h"
27 #include "adreno_common.xml.h"
28 #include "adreno_pm4.xml.h"
29
30 #include "vk_format.h"
31
32 #include "tu_cs.h"
33
34 static uint32_t
35 blit_control(enum a6xx_color_fmt fmt)
36 {
37 unsigned blit_cntl = 0xf00000;
38 blit_cntl |= A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(fmt);
39 blit_cntl |= A6XX_RB_2D_BLIT_CNTL_IFMT(tu6_rb_fmt_to_ifmt(fmt));
40 return blit_cntl;
41 }
42
43 static uint32_t tu6_sp_2d_src_format(VkFormat format)
44 {
45 const struct vk_format_description *desc = vk_format_description(format);
46 uint32_t reg = 0xf000 | A6XX_SP_2D_SRC_FORMAT_COLOR_FORMAT(tu6_get_native_format(format)->rb);
47
48 int channel = vk_format_get_first_non_void_channel(format);
49 if (channel < 0) {
50 /* TODO special format. */
51 return reg;
52 }
53 if (desc->channel[channel].normalized) {
54 if (desc->channel[channel].type == VK_FORMAT_TYPE_SIGNED)
55 reg |= A6XX_SP_2D_SRC_FORMAT_SINT;
56 reg |= A6XX_SP_2D_SRC_FORMAT_NORM;
57 } else if (desc->channel[channel].pure_integer) {
58 if (desc->channel[channel].type == VK_FORMAT_TYPE_SIGNED)
59 reg |= A6XX_SP_2D_SRC_FORMAT_SINT;
60 else
61 reg |= A6XX_SP_2D_SRC_FORMAT_UINT;
62 }
63 return reg;
64 }
65
66 static void
67 tu_dma_prepare(struct tu_cmd_buffer *cmdbuf)
68 {
69 tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 10);
70
71 tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1);
72 tu_cs_emit(&cmdbuf->cs, PC_CCU_INVALIDATE_COLOR);
73
74 tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1);
75 tu_cs_emit(&cmdbuf->cs, LRZ_FLUSH);
76
77 tu_cs_emit_pkt7(&cmdbuf->cs, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
78 tu_cs_emit(&cmdbuf->cs, 0x0);
79
80 tu_cs_emit_wfi(&cmdbuf->cs);
81
82 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_CCU_CNTL, 1);
83 tu_cs_emit(&cmdbuf->cs, 0x10000000);
84 }
85
86 static void
87 tu_copy_buffer(struct tu_cmd_buffer *cmdbuf,
88 struct tu_bo *src_bo,
89 uint64_t src_offset,
90 struct tu_bo *dst_bo,
91 uint64_t dst_offset,
92 uint64_t size)
93 {
94 const unsigned max_size_per_iter = 0x4000 - 0x40;
95 const unsigned max_iterations =
96 (size + max_size_per_iter) / max_size_per_iter;
97
98 tu_bo_list_add(&cmdbuf->bo_list, src_bo, MSM_SUBMIT_BO_READ);
99 tu_bo_list_add(&cmdbuf->bo_list, dst_bo, MSM_SUBMIT_BO_WRITE);
100
101 tu_dma_prepare(cmdbuf);
102
103 tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 21 + 48 * max_iterations);
104
105 /* buffer copy setup */
106 tu_cs_emit_pkt7(&cmdbuf->cs, CP_SET_MARKER, 1);
107 tu_cs_emit(&cmdbuf->cs, A2XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
108
109 const uint32_t blit_cntl = blit_control(RB6_R8_UNORM) | 0x20000000;
110
111 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_BLIT_CNTL, 1);
112 tu_cs_emit(&cmdbuf->cs, blit_cntl);
113
114 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
115 tu_cs_emit(&cmdbuf->cs, blit_cntl);
116
117 for (; size;) {
118 uint64_t src_va = src_bo->iova + src_offset;
119 uint64_t dst_va = dst_bo->iova + dst_offset;
120
121 unsigned src_shift = src_va & 0x3f;
122 unsigned dst_shift = dst_va & 0x3f;
123 unsigned max_shift = MAX2(src_shift, dst_shift);
124
125 src_va -= src_shift;
126 dst_va -= dst_shift;
127
128 uint32_t size_todo = MIN2(0x4000 - max_shift, size);
129 unsigned pitch = (size_todo + max_shift + 63) & ~63;
130
131 /*
132 * Emit source:
133 */
134 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_PS_2D_SRC_INFO, 13);
135 tu_cs_emit(&cmdbuf->cs,
136 A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(RB6_R8_UNORM) |
137 A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(TILE6_LINEAR) |
138 A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(WZYX) | 0x500000);
139 tu_cs_emit(&cmdbuf->cs,
140 A6XX_SP_PS_2D_SRC_SIZE_WIDTH(src_shift + size_todo) |
141 A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(1)); /* SP_PS_2D_SRC_SIZE */
142 tu_cs_emit_qw(&cmdbuf->cs, src_va);
143 tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_PITCH_PITCH(pitch));
144
145 tu_cs_emit(&cmdbuf->cs, 0x00000000);
146 tu_cs_emit(&cmdbuf->cs, 0x00000000);
147 tu_cs_emit(&cmdbuf->cs, 0x00000000);
148 tu_cs_emit(&cmdbuf->cs, 0x00000000);
149 tu_cs_emit(&cmdbuf->cs, 0x00000000);
150
151 tu_cs_emit(&cmdbuf->cs, 0x00000000);
152 tu_cs_emit(&cmdbuf->cs, 0x00000000);
153 tu_cs_emit(&cmdbuf->cs, 0x00000000);
154
155 /*
156 * Emit destination:
157 */
158 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_DST_INFO, 9);
159 tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(RB6_R8_UNORM) |
160 A6XX_RB_2D_DST_INFO_TILE_MODE(TILE6_LINEAR) |
161 A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX));
162 tu_cs_emit_qw(&cmdbuf->cs, dst_va);
163
164 tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_SIZE_PITCH(pitch));
165 tu_cs_emit(&cmdbuf->cs, 0x00000000);
166 tu_cs_emit(&cmdbuf->cs, 0x00000000);
167 tu_cs_emit(&cmdbuf->cs, 0x00000000);
168 tu_cs_emit(&cmdbuf->cs, 0x00000000);
169 tu_cs_emit(&cmdbuf->cs, 0x00000000);
170
171 /*
172 * Blit command:
173 */
174 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
175 tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_X_X(src_shift));
176 tu_cs_emit(&cmdbuf->cs,
177 A6XX_GRAS_2D_SRC_BR_X_X(src_shift + size_todo - 1));
178 tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_Y_Y(0));
179 tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_BR_Y_Y(0));
180
181 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_DST_TL, 2);
182 tu_cs_emit(&cmdbuf->cs,
183 A6XX_GRAS_2D_DST_TL_X(dst_shift) | A6XX_GRAS_2D_DST_TL_Y(0));
184 tu_cs_emit(&cmdbuf->cs,
185 A6XX_GRAS_2D_DST_BR_X(dst_shift + size_todo - 1) |
186 A6XX_GRAS_2D_DST_BR_Y(0));
187
188 tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1);
189 tu_cs_emit(&cmdbuf->cs, 0x3f);
190 tu_cs_emit_wfi(&cmdbuf->cs);
191
192 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8C01, 1);
193 tu_cs_emit(&cmdbuf->cs, 0);
194
195 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_2D_SRC_FORMAT, 1);
196 tu_cs_emit(&cmdbuf->cs, 0xf180);
197
198 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
199 tu_cs_emit(&cmdbuf->cs, 0x01000000);
200
201 tu_cs_emit_pkt7(&cmdbuf->cs, CP_BLIT, 1);
202 tu_cs_emit(&cmdbuf->cs, CP_BLIT_0_OP(BLIT_OP_SCALE));
203
204 tu_cs_emit_wfi(&cmdbuf->cs);
205
206 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
207 tu_cs_emit(&cmdbuf->cs, 0);
208
209 src_offset += size_todo;
210 dst_offset += size_todo;
211 size -= size_todo;
212 }
213
214 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, 0x1d, true);
215 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, FACENESS_FLUSH, true);
216 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_FLUSH_TS, true);
217 }
218
219 static void
220 tu_copy_buffer_to_image_step(struct tu_cmd_buffer *cmdbuf,
221 struct tu_buffer *src_buffer,
222 struct tu_image *dst_image,
223 const VkBufferImageCopy *copy_info,
224 VkFormat format,
225 uint32_t layer,
226 uint64_t src_va)
227 {
228 const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb;
229
230 uint64_t dst_va = dst_image->bo->iova + dst_image->bo_offset + dst_image->layer_size * layer + dst_image->levels[copy_info->imageSubresource.mipLevel].offset;
231 unsigned dst_pitch = dst_image->levels[copy_info->imageSubresource.mipLevel].pitch *
232 vk_format_get_blocksize(format);
233
234 unsigned src_pitch;
235 unsigned src_offset = 0;
236 if (copy_info->imageExtent.height == 1) {
237 /* Can't find this in the spec, but not having it is sort of insane? */
238 assert(src_va % vk_format_get_blocksize(format) == 0);
239
240 src_offset = (src_va & 63) / vk_format_get_blocksize(format);
241 src_va &= ~63;
242
243 src_pitch = align((src_offset + copy_info->imageExtent.width) * vk_format_get_blocksize(format), 64);
244 } else {
245 unsigned src_pixel_stride = copy_info->bufferRowLength
246 ? copy_info->bufferRowLength
247 : copy_info->imageExtent.width;
248 src_pitch = src_pixel_stride * vk_format_get_blocksize(format);
249 assert(!(src_pitch & 63));
250 assert(!(src_va & 63));
251 }
252
253 tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 48);
254
255 /*
256 * Emit source:
257 */
258 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_PS_2D_SRC_INFO, 13);
259 tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(rb_fmt) |
260 A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(TILE6_LINEAR) |
261 A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(WZYX) |
262 0x500000);
263 tu_cs_emit(&cmdbuf->cs,
264 A6XX_SP_PS_2D_SRC_SIZE_WIDTH(src_offset + copy_info->imageExtent.width) |
265 A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(
266 copy_info->imageExtent.height)); /* SP_PS_2D_SRC_SIZE */
267 tu_cs_emit_qw(&cmdbuf->cs, src_va);
268 tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_PITCH_PITCH(src_pitch));
269
270 tu_cs_emit(&cmdbuf->cs, 0x00000000);
271 tu_cs_emit(&cmdbuf->cs, 0x00000000);
272 tu_cs_emit(&cmdbuf->cs, 0x00000000);
273 tu_cs_emit(&cmdbuf->cs, 0x00000000);
274 tu_cs_emit(&cmdbuf->cs, 0x00000000);
275
276 tu_cs_emit(&cmdbuf->cs, 0x00000000);
277 tu_cs_emit(&cmdbuf->cs, 0x00000000);
278 tu_cs_emit(&cmdbuf->cs, 0x00000000);
279
280 /*
281 * Emit destination:
282 */
283 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_DST_INFO, 9);
284 tu_cs_emit(&cmdbuf->cs,
285 A6XX_RB_2D_DST_INFO_COLOR_FORMAT(rb_fmt) |
286 A6XX_RB_2D_DST_INFO_TILE_MODE(dst_image->tile_mode) |
287 A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX));
288 tu_cs_emit_qw(&cmdbuf->cs, dst_va);
289 tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_SIZE_PITCH(dst_pitch));
290 tu_cs_emit(&cmdbuf->cs, 0x00000000);
291 tu_cs_emit(&cmdbuf->cs, 0x00000000);
292 tu_cs_emit(&cmdbuf->cs, 0x00000000);
293 tu_cs_emit(&cmdbuf->cs, 0x00000000);
294 tu_cs_emit(&cmdbuf->cs, 0x00000000);
295
296 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
297 tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_X_X(src_offset));
298 tu_cs_emit(&cmdbuf->cs,
299 A6XX_GRAS_2D_SRC_BR_X_X(src_offset + copy_info->imageExtent.width - 1));
300 tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_Y_Y(0));
301 tu_cs_emit(&cmdbuf->cs,
302 A6XX_GRAS_2D_SRC_BR_Y_Y(copy_info->imageExtent.height - 1));
303
304 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_DST_TL, 2);
305 tu_cs_emit(&cmdbuf->cs,
306 A6XX_GRAS_2D_DST_TL_X(copy_info->imageOffset.x) |
307 A6XX_GRAS_2D_DST_TL_Y(copy_info->imageOffset.y));
308 tu_cs_emit(&cmdbuf->cs,
309 A6XX_GRAS_2D_DST_BR_X(copy_info->imageOffset.x +
310 copy_info->imageExtent.width - 1) |
311 A6XX_GRAS_2D_DST_BR_Y(copy_info->imageOffset.y +
312 copy_info->imageExtent.height - 1));
313
314 tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1);
315 tu_cs_emit(&cmdbuf->cs, 0x3f);
316 tu_cs_emit_wfi(&cmdbuf->cs);
317
318 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8C01, 1);
319 tu_cs_emit(&cmdbuf->cs, 0);
320
321 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_2D_SRC_FORMAT, 1);
322 tu_cs_emit(&cmdbuf->cs, tu6_sp_2d_src_format(format));
323
324 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
325 tu_cs_emit(&cmdbuf->cs, 0x01000000);
326
327 tu_cs_emit_pkt7(&cmdbuf->cs, CP_BLIT, 1);
328 tu_cs_emit(&cmdbuf->cs, CP_BLIT_0_OP(BLIT_OP_SCALE));
329
330 tu_cs_emit_wfi(&cmdbuf->cs);
331
332 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
333 tu_cs_emit(&cmdbuf->cs, 0);
334 }
335
336 static void
337 tu_copy_buffer_to_image(struct tu_cmd_buffer *cmdbuf,
338 struct tu_buffer *src_buffer,
339 struct tu_image *dst_image,
340 const VkBufferImageCopy *copy_info)
341 {
342 tu_bo_list_add(&cmdbuf->bo_list, src_buffer->bo, MSM_SUBMIT_BO_READ);
343 tu_bo_list_add(&cmdbuf->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE);
344
345 /* general setup */
346 tu_dma_prepare(cmdbuf);
347
348 tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 6);
349
350 /* buffer copy setup */
351 tu_cs_emit_pkt7(&cmdbuf->cs, CP_SET_MARKER, 1);
352 tu_cs_emit(&cmdbuf->cs, A2XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
353
354 VkFormat format = dst_image->vk_format;
355 const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb;
356
357 const uint32_t blit_cntl = blit_control(rb_fmt) | 0x20000000;
358
359 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_BLIT_CNTL, 1);
360 tu_cs_emit(&cmdbuf->cs, blit_cntl);
361
362 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
363 tu_cs_emit(&cmdbuf->cs, blit_cntl);
364
365 unsigned src_pixel_stride = copy_info->bufferRowLength
366 ? copy_info->bufferRowLength
367 : copy_info->imageExtent.width;
368 unsigned cpp = vk_format_get_blocksize(format);
369 unsigned src_pitch = src_pixel_stride * cpp;
370
371 for (unsigned layer_offset = 0; layer_offset < copy_info->imageSubresource.layerCount; ++layer_offset) {
372 unsigned layer = copy_info->imageSubresource.baseArrayLayer + layer_offset;
373 uint64_t src_va = src_buffer->bo->iova + src_buffer->bo_offset + copy_info->bufferOffset + layer_offset * copy_info->bufferImageHeight * src_pitch;
374
375 if ((src_pitch & 63) || (src_va & 63)) {
376 /* Do a per line copy */
377 VkBufferImageCopy line_copy_info = *copy_info;
378 line_copy_info.imageExtent.height = 1;
379 for (unsigned r = 0; r < copy_info->imageExtent.height; ++r) {
380 /*
381 * if src_va is not aligned the line copy will need to adjust. Give it
382 * room to do so.
383 */
384 unsigned max_width = 16384 - (src_va & 0x3f) ? 64 : 0;
385 line_copy_info.imageOffset.x = copy_info->imageOffset.x;
386 line_copy_info.imageExtent.width = copy_info->imageExtent.width;
387
388 for (unsigned c = 0; c < copy_info->imageExtent.width; c += max_width) {
389 tu_copy_buffer_to_image_step(cmdbuf, src_buffer, dst_image, &line_copy_info, format, layer, src_va + c * cpp);
390
391 line_copy_info.imageOffset.x += max_width;
392 line_copy_info.imageExtent.width -= max_width;
393 }
394
395 line_copy_info.imageOffset.y++;
396 src_va += src_pitch;
397 }
398 } else {
399 tu_copy_buffer_to_image_step(cmdbuf, src_buffer, dst_image, copy_info, format, layer, src_va);
400 }
401 }
402
403 tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 15);
404
405 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, 0x1d, true);
406 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, FACENESS_FLUSH, true);
407 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_FLUSH_TS, true);
408 }
409
410 static void
411 tu_copy_image_to_buffer_step(struct tu_cmd_buffer *cmdbuf,
412 struct tu_image *src_image,
413 struct tu_buffer *dst_buffer,
414 const VkBufferImageCopy *copy_info,
415 VkFormat format,
416 uint32_t layer,
417 uint64_t dst_va)
418 {
419 const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb;
420
421 uint64_t src_va = src_image->bo->iova + src_image->bo_offset + src_image->layer_size * layer + src_image->levels[copy_info->imageSubresource.mipLevel].offset;
422 unsigned src_pitch = src_image->levels[copy_info->imageSubresource.mipLevel].pitch *
423 vk_format_get_blocksize(format);
424
425 unsigned dst_pitch;
426 unsigned dst_offset = 0;
427 if (copy_info->imageExtent.height == 1) {
428 /* Can't find this in the spec, but not having it is sort of insane? */
429 assert(dst_va % vk_format_get_blocksize(format) == 0);
430
431 dst_offset = (dst_va & 63) / vk_format_get_blocksize(format);
432 dst_va &= ~63;
433
434 dst_pitch = align((dst_offset + copy_info->imageExtent.width) * vk_format_get_blocksize(format), 64);
435 } else {
436 unsigned dst_pixel_stride = copy_info->bufferRowLength
437 ? copy_info->bufferRowLength
438 : copy_info->imageExtent.width;
439 dst_pitch = dst_pixel_stride * vk_format_get_blocksize(format);
440 assert(!(dst_pitch & 63));
441 assert(!(dst_va & 63));
442 }
443
444
445 tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 48);
446
447 /*
448 * Emit source:
449 */
450 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_PS_2D_SRC_INFO, 13);
451 tu_cs_emit(&cmdbuf->cs,
452 A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(rb_fmt) |
453 A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(src_image->tile_mode) |
454 A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(WZYX) | 0x500000);
455 tu_cs_emit(&cmdbuf->cs,
456 A6XX_SP_PS_2D_SRC_SIZE_WIDTH(src_image->extent.width) |
457 A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(
458 src_image->extent.height)); /* SP_PS_2D_SRC_SIZE */
459 tu_cs_emit_qw(&cmdbuf->cs, src_va);
460 tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_PITCH_PITCH(src_pitch));
461
462 tu_cs_emit(&cmdbuf->cs, 0x00000000);
463 tu_cs_emit(&cmdbuf->cs, 0x00000000);
464 tu_cs_emit(&cmdbuf->cs, 0x00000000);
465 tu_cs_emit(&cmdbuf->cs, 0x00000000);
466 tu_cs_emit(&cmdbuf->cs, 0x00000000);
467
468 tu_cs_emit(&cmdbuf->cs, 0x00000000);
469 tu_cs_emit(&cmdbuf->cs, 0x00000000);
470 tu_cs_emit(&cmdbuf->cs, 0x00000000);
471
472 /*
473 * Emit destination:
474 */
475 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_DST_INFO, 9);
476 tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(rb_fmt) |
477 A6XX_RB_2D_DST_INFO_TILE_MODE(TILE6_LINEAR) |
478 A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX));
479 tu_cs_emit_qw(&cmdbuf->cs, dst_va);
480 tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_SIZE_PITCH(dst_pitch));
481 tu_cs_emit(&cmdbuf->cs, 0x00000000);
482 tu_cs_emit(&cmdbuf->cs, 0x00000000);
483 tu_cs_emit(&cmdbuf->cs, 0x00000000);
484 tu_cs_emit(&cmdbuf->cs, 0x00000000);
485 tu_cs_emit(&cmdbuf->cs, 0x00000000);
486
487 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
488 tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_X_X(copy_info->imageOffset.x));
489 tu_cs_emit(&cmdbuf->cs,
490 A6XX_GRAS_2D_SRC_BR_X_X(copy_info->imageOffset.x +
491 copy_info->imageExtent.width - 1));
492 tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_Y_Y(copy_info->imageOffset.y));
493 tu_cs_emit(&cmdbuf->cs,
494 A6XX_GRAS_2D_SRC_BR_Y_Y(copy_info->imageOffset.y +
495 copy_info->imageExtent.height - 1));
496
497 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_DST_TL, 2);
498 tu_cs_emit(&cmdbuf->cs,
499 A6XX_GRAS_2D_DST_TL_X(dst_offset) | A6XX_GRAS_2D_DST_TL_Y(0));
500 tu_cs_emit(&cmdbuf->cs,
501 A6XX_GRAS_2D_DST_BR_X(dst_offset + copy_info->imageExtent.width - 1) |
502 A6XX_GRAS_2D_DST_BR_Y(copy_info->imageExtent.height - 1));
503
504 tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1);
505 tu_cs_emit(&cmdbuf->cs, 0x3f);
506 tu_cs_emit_wfi(&cmdbuf->cs);
507
508 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8C01, 1);
509 tu_cs_emit(&cmdbuf->cs, 0);
510
511 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_2D_SRC_FORMAT, 1);
512 tu_cs_emit(&cmdbuf->cs, tu6_sp_2d_src_format(format));
513
514 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
515 tu_cs_emit(&cmdbuf->cs, 0x01000000);
516
517 tu_cs_emit_pkt7(&cmdbuf->cs, CP_BLIT, 1);
518 tu_cs_emit(&cmdbuf->cs, CP_BLIT_0_OP(BLIT_OP_SCALE));
519
520 tu_cs_emit_wfi(&cmdbuf->cs);
521
522 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
523 tu_cs_emit(&cmdbuf->cs, 0);
524 }
525
526 static void
527 tu_copy_image_to_buffer(struct tu_cmd_buffer *cmdbuf,
528 struct tu_image *src_image,
529 struct tu_buffer *dst_buffer,
530 const VkBufferImageCopy *copy_info)
531 {
532 tu_bo_list_add(&cmdbuf->bo_list, src_image->bo, MSM_SUBMIT_BO_READ);
533 tu_bo_list_add(&cmdbuf->bo_list, dst_buffer->bo, MSM_SUBMIT_BO_WRITE);
534
535 /* general setup */
536 tu_dma_prepare(cmdbuf);
537
538 tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 6);
539
540 /* buffer copy setup */
541 tu_cs_emit_pkt7(&cmdbuf->cs, CP_SET_MARKER, 1);
542 tu_cs_emit(&cmdbuf->cs, A2XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
543
544 VkFormat format = src_image->vk_format;
545 const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb;
546
547 unsigned dst_pixel_stride = copy_info->bufferRowLength
548 ? copy_info->bufferRowLength
549 : copy_info->imageExtent.width;
550 unsigned cpp = vk_format_get_blocksize(format);
551 unsigned dst_pitch = dst_pixel_stride * cpp;
552
553
554 const uint32_t blit_cntl = blit_control(rb_fmt) | 0x20000000;
555
556 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_BLIT_CNTL, 1);
557 tu_cs_emit(&cmdbuf->cs, blit_cntl);
558
559 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
560 tu_cs_emit(&cmdbuf->cs, blit_cntl);
561
562 for (unsigned layer_offset = 0; layer_offset < copy_info->imageSubresource.layerCount; ++layer_offset) {
563 unsigned layer = copy_info->imageSubresource.baseArrayLayer + layer_offset;
564 uint64_t dst_va = dst_buffer->bo->iova + dst_buffer->bo_offset + copy_info->bufferOffset + layer_offset * copy_info->bufferImageHeight * dst_pitch;
565
566 if ((dst_pitch & 63) || (dst_va & 63)) {
567 /* Do a per line copy */
568 VkBufferImageCopy line_copy_info = *copy_info;
569 line_copy_info.imageExtent.height = 1;
570 for (unsigned r = 0; r < copy_info->imageExtent.height; ++r) {
571 /*
572 * if dst_va is not aligned the line copy will need to adjust. Give it
573 * room to do so.
574 */
575 unsigned max_width = 16384 - (dst_va & 0x3f) ? 64 : 0;
576 line_copy_info.imageOffset.x = copy_info->imageOffset.x;
577 line_copy_info.imageExtent.width = copy_info->imageExtent.width;
578
579 for (unsigned c = 0; c < copy_info->imageExtent.width; c += max_width) {
580 tu_copy_image_to_buffer_step(cmdbuf, src_image, dst_buffer, &line_copy_info, format, layer, dst_va + c * cpp);
581
582 line_copy_info.imageOffset.x += max_width;
583 line_copy_info.imageExtent.width -= max_width;
584 }
585
586 line_copy_info.imageOffset.y++;
587 dst_va += dst_pitch;
588 }
589 } else {
590 tu_copy_image_to_buffer_step(cmdbuf, src_image, dst_buffer, copy_info, format, layer, dst_va);
591 }
592 }
593
594 tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 15);
595
596 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, 0x1d, true);
597 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, FACENESS_FLUSH, true);
598 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_FLUSH_TS, true);
599 }
600
601 void
602 tu_CmdCopyBuffer(VkCommandBuffer commandBuffer,
603 VkBuffer srcBuffer,
604 VkBuffer destBuffer,
605 uint32_t regionCount,
606 const VkBufferCopy *pRegions)
607 {
608 TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
609 TU_FROM_HANDLE(tu_buffer, src_buffer, srcBuffer);
610 TU_FROM_HANDLE(tu_buffer, dst_buffer, destBuffer);
611
612 for (unsigned i = 0; i < regionCount; ++i) {
613 uint64_t src_offset = src_buffer->bo_offset + pRegions[i].srcOffset;
614 uint64_t dst_offset = dst_buffer->bo_offset + pRegions[i].dstOffset;
615
616 tu_copy_buffer(cmdbuf, src_buffer->bo, src_offset, dst_buffer->bo,
617 dst_offset, pRegions[i].size);
618 }
619 }
620
621 void
622 tu_CmdCopyBufferToImage(VkCommandBuffer commandBuffer,
623 VkBuffer srcBuffer,
624 VkImage destImage,
625 VkImageLayout destImageLayout,
626 uint32_t regionCount,
627 const VkBufferImageCopy *pRegions)
628 {
629 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
630 TU_FROM_HANDLE(tu_image, dest_image, destImage);
631 TU_FROM_HANDLE(tu_buffer, src_buffer, srcBuffer);
632
633 for (unsigned i = 0; i < regionCount; ++i) {
634 tu_copy_buffer_to_image(cmd_buffer, src_buffer, dest_image,
635 pRegions + i);
636 }
637 }
638
639 void
640 tu_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer,
641 VkImage srcImage,
642 VkImageLayout srcImageLayout,
643 VkBuffer destBuffer,
644 uint32_t regionCount,
645 const VkBufferImageCopy *pRegions)
646 {
647 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
648 TU_FROM_HANDLE(tu_image, src_image, srcImage);
649 TU_FROM_HANDLE(tu_buffer, dst_buffer, destBuffer);
650
651 for (unsigned i = 0; i < regionCount; ++i) {
652 tu_copy_image_to_buffer(cmd_buffer, src_image, dst_buffer,
653 pRegions + i);
654 }
655 }
656
657 static void
658 meta_copy_image(struct tu_cmd_buffer *cmd_buffer,
659 struct tu_image *src_image,
660 VkImageLayout src_image_layout,
661 struct tu_image *dest_image,
662 VkImageLayout dest_image_layout,
663 uint32_t regionCount,
664 const VkImageCopy *pRegions)
665 {
666 }
667
668 void
669 tu_CmdCopyImage(VkCommandBuffer commandBuffer,
670 VkImage srcImage,
671 VkImageLayout srcImageLayout,
672 VkImage destImage,
673 VkImageLayout destImageLayout,
674 uint32_t regionCount,
675 const VkImageCopy *pRegions)
676 {
677 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
678 TU_FROM_HANDLE(tu_image, src_image, srcImage);
679 TU_FROM_HANDLE(tu_image, dest_image, destImage);
680
681 meta_copy_image(cmd_buffer, src_image, srcImageLayout, dest_image,
682 destImageLayout, regionCount, pRegions);
683 }