freedreno: Generate headers from xml files
[mesa.git] / src / freedreno / vulkan / tu_meta_copy.c
1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "tu_private.h"
25
26 #include "a6xx.xml.h"
27 #include "adreno_common.xml.h"
28 #include "adreno_pm4.xml.h"
29
30 #include "vk_format.h"
31
32 #include "tu_cs.h"
33
34 /*
35 * TODO:
36 * - image -> image copies
37 * - 3D textures
38 * - compressed image formats (need to divide offset/extent)
39 */
40
41 static uint32_t
42 blit_control(enum a6xx_color_fmt fmt)
43 {
44 unsigned blit_cntl = 0xf00000;
45 blit_cntl |= A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(fmt);
46 blit_cntl |= A6XX_RB_2D_BLIT_CNTL_IFMT(tu6_rb_fmt_to_ifmt(fmt));
47 return blit_cntl;
48 }
49
50 static uint32_t tu6_sp_2d_src_format(VkFormat format)
51 {
52 const struct vk_format_description *desc = vk_format_description(format);
53 uint32_t reg = 0xf000 | A6XX_SP_2D_SRC_FORMAT_COLOR_FORMAT(tu6_get_native_format(format)->rb);
54
55 int channel = vk_format_get_first_non_void_channel(format);
56 if (channel < 0) {
57 /* TODO special format. */
58 return reg;
59 }
60 if (desc->channel[channel].normalized) {
61 if (desc->channel[channel].type == VK_FORMAT_TYPE_SIGNED)
62 reg |= A6XX_SP_2D_SRC_FORMAT_SINT;
63 reg |= A6XX_SP_2D_SRC_FORMAT_NORM;
64 } else if (desc->channel[channel].pure_integer) {
65 if (desc->channel[channel].type == VK_FORMAT_TYPE_SIGNED)
66 reg |= A6XX_SP_2D_SRC_FORMAT_SINT;
67 else
68 reg |= A6XX_SP_2D_SRC_FORMAT_UINT;
69 }
70 return reg;
71 }
72
73 static void
74 tu_dma_prepare(struct tu_cmd_buffer *cmdbuf)
75 {
76 tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 10);
77
78 tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1);
79 tu_cs_emit(&cmdbuf->cs, PC_CCU_INVALIDATE_COLOR);
80
81 tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1);
82 tu_cs_emit(&cmdbuf->cs, LRZ_FLUSH);
83
84 tu_cs_emit_pkt7(&cmdbuf->cs, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
85 tu_cs_emit(&cmdbuf->cs, 0x0);
86
87 tu_cs_emit_wfi(&cmdbuf->cs);
88
89 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_CCU_CNTL, 1);
90 tu_cs_emit(&cmdbuf->cs, 0x10000000);
91 }
92
93 static void
94 tu_copy_buffer(struct tu_cmd_buffer *cmdbuf,
95 struct tu_bo *src_bo,
96 uint64_t src_offset,
97 struct tu_bo *dst_bo,
98 uint64_t dst_offset,
99 uint64_t size)
100 {
101 const unsigned max_size_per_iter = 0x4000 - 0x40;
102 const unsigned max_iterations =
103 (size + max_size_per_iter) / max_size_per_iter;
104
105 tu_bo_list_add(&cmdbuf->bo_list, src_bo, MSM_SUBMIT_BO_READ);
106 tu_bo_list_add(&cmdbuf->bo_list, dst_bo, MSM_SUBMIT_BO_WRITE);
107
108 tu_dma_prepare(cmdbuf);
109
110 tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 21 + 48 * max_iterations);
111
112 /* buffer copy setup */
113 tu_cs_emit_pkt7(&cmdbuf->cs, CP_SET_MARKER, 1);
114 tu_cs_emit(&cmdbuf->cs, A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
115
116 const uint32_t blit_cntl = blit_control(RB6_R8_UNORM) | 0x20000000;
117
118 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_BLIT_CNTL, 1);
119 tu_cs_emit(&cmdbuf->cs, blit_cntl);
120
121 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
122 tu_cs_emit(&cmdbuf->cs, blit_cntl);
123
124 for (; size;) {
125 uint64_t src_va = src_bo->iova + src_offset;
126 uint64_t dst_va = dst_bo->iova + dst_offset;
127
128 unsigned src_shift = src_va & 0x3f;
129 unsigned dst_shift = dst_va & 0x3f;
130 unsigned max_shift = MAX2(src_shift, dst_shift);
131
132 src_va -= src_shift;
133 dst_va -= dst_shift;
134
135 uint32_t size_todo = MIN2(0x4000 - max_shift, size);
136 unsigned pitch = (size_todo + max_shift + 63) & ~63;
137
138 /*
139 * Emit source:
140 */
141 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_PS_2D_SRC_INFO, 13);
142 tu_cs_emit(&cmdbuf->cs,
143 A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(RB6_R8_UNORM) |
144 A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(TILE6_LINEAR) |
145 A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(WZYX) | 0x500000);
146 tu_cs_emit(&cmdbuf->cs,
147 A6XX_SP_PS_2D_SRC_SIZE_WIDTH(src_shift + size_todo) |
148 A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(1)); /* SP_PS_2D_SRC_SIZE */
149 tu_cs_emit_qw(&cmdbuf->cs, src_va);
150 tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_PITCH_PITCH(pitch));
151
152 tu_cs_emit(&cmdbuf->cs, 0x00000000);
153 tu_cs_emit(&cmdbuf->cs, 0x00000000);
154 tu_cs_emit(&cmdbuf->cs, 0x00000000);
155 tu_cs_emit(&cmdbuf->cs, 0x00000000);
156 tu_cs_emit(&cmdbuf->cs, 0x00000000);
157
158 tu_cs_emit(&cmdbuf->cs, 0x00000000);
159 tu_cs_emit(&cmdbuf->cs, 0x00000000);
160 tu_cs_emit(&cmdbuf->cs, 0x00000000);
161
162 /*
163 * Emit destination:
164 */
165 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_DST_INFO, 9);
166 tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(RB6_R8_UNORM) |
167 A6XX_RB_2D_DST_INFO_TILE_MODE(TILE6_LINEAR) |
168 A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX));
169 tu_cs_emit_qw(&cmdbuf->cs, dst_va);
170
171 tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_SIZE_PITCH(pitch));
172 tu_cs_emit(&cmdbuf->cs, 0x00000000);
173 tu_cs_emit(&cmdbuf->cs, 0x00000000);
174 tu_cs_emit(&cmdbuf->cs, 0x00000000);
175 tu_cs_emit(&cmdbuf->cs, 0x00000000);
176 tu_cs_emit(&cmdbuf->cs, 0x00000000);
177
178 /*
179 * Blit command:
180 */
181 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
182 tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_X_X(src_shift));
183 tu_cs_emit(&cmdbuf->cs,
184 A6XX_GRAS_2D_SRC_BR_X_X(src_shift + size_todo - 1));
185 tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_Y_Y(0));
186 tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_BR_Y_Y(0));
187
188 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_DST_TL, 2);
189 tu_cs_emit(&cmdbuf->cs,
190 A6XX_GRAS_2D_DST_TL_X(dst_shift) | A6XX_GRAS_2D_DST_TL_Y(0));
191 tu_cs_emit(&cmdbuf->cs,
192 A6XX_GRAS_2D_DST_BR_X(dst_shift + size_todo - 1) |
193 A6XX_GRAS_2D_DST_BR_Y(0));
194
195 tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1);
196 tu_cs_emit(&cmdbuf->cs, 0x3f);
197 tu_cs_emit_wfi(&cmdbuf->cs);
198
199 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8C01, 1);
200 tu_cs_emit(&cmdbuf->cs, 0);
201
202 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_2D_SRC_FORMAT, 1);
203 tu_cs_emit(&cmdbuf->cs, 0xf180);
204
205 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
206 tu_cs_emit(&cmdbuf->cs, 0x01000000);
207
208 tu_cs_emit_pkt7(&cmdbuf->cs, CP_BLIT, 1);
209 tu_cs_emit(&cmdbuf->cs, CP_BLIT_0_OP(BLIT_OP_SCALE));
210
211 tu_cs_emit_wfi(&cmdbuf->cs);
212
213 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
214 tu_cs_emit(&cmdbuf->cs, 0);
215
216 src_offset += size_todo;
217 dst_offset += size_todo;
218 size -= size_todo;
219 }
220
221 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, 0x1d, true);
222 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, FACENESS_FLUSH, true);
223 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_FLUSH_TS, true);
224 }
225
226 static void
227 tu_copy_buffer_to_image_step(struct tu_cmd_buffer *cmdbuf,
228 struct tu_buffer *src_buffer,
229 struct tu_image *dst_image,
230 const VkBufferImageCopy *copy_info,
231 VkFormat format,
232 uint32_t layer,
233 uint64_t src_va)
234 {
235 const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb;
236
237 uint64_t dst_va = dst_image->bo->iova + dst_image->bo_offset + dst_image->layer_size * layer + dst_image->levels[copy_info->imageSubresource.mipLevel].offset;
238 unsigned dst_pitch = dst_image->levels[copy_info->imageSubresource.mipLevel].pitch *
239 vk_format_get_blocksize(format);
240
241 unsigned src_pitch;
242 unsigned src_offset = 0;
243 if (copy_info->imageExtent.height == 1) {
244 /* Can't find this in the spec, but not having it is sort of insane? */
245 assert(src_va % vk_format_get_blocksize(format) == 0);
246
247 src_offset = (src_va & 63) / vk_format_get_blocksize(format);
248 src_va &= ~63;
249
250 src_pitch = align((src_offset + copy_info->imageExtent.width) * vk_format_get_blocksize(format), 64);
251 } else {
252 unsigned src_pixel_stride = copy_info->bufferRowLength
253 ? copy_info->bufferRowLength
254 : copy_info->imageExtent.width;
255 src_pitch = src_pixel_stride * vk_format_get_blocksize(format);
256 assert(!(src_pitch & 63));
257 assert(!(src_va & 63));
258 }
259
260 tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 48);
261
262 /*
263 * Emit source:
264 */
265 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_PS_2D_SRC_INFO, 13);
266 tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(rb_fmt) |
267 A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(TILE6_LINEAR) |
268 A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(WZYX) |
269 0x500000);
270 tu_cs_emit(&cmdbuf->cs,
271 A6XX_SP_PS_2D_SRC_SIZE_WIDTH(src_offset + copy_info->imageExtent.width) |
272 A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(
273 copy_info->imageExtent.height)); /* SP_PS_2D_SRC_SIZE */
274 tu_cs_emit_qw(&cmdbuf->cs, src_va);
275 tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_PITCH_PITCH(src_pitch));
276
277 tu_cs_emit(&cmdbuf->cs, 0x00000000);
278 tu_cs_emit(&cmdbuf->cs, 0x00000000);
279 tu_cs_emit(&cmdbuf->cs, 0x00000000);
280 tu_cs_emit(&cmdbuf->cs, 0x00000000);
281 tu_cs_emit(&cmdbuf->cs, 0x00000000);
282
283 tu_cs_emit(&cmdbuf->cs, 0x00000000);
284 tu_cs_emit(&cmdbuf->cs, 0x00000000);
285 tu_cs_emit(&cmdbuf->cs, 0x00000000);
286
287 /*
288 * Emit destination:
289 */
290 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_DST_INFO, 9);
291 tu_cs_emit(&cmdbuf->cs,
292 A6XX_RB_2D_DST_INFO_COLOR_FORMAT(rb_fmt) |
293 A6XX_RB_2D_DST_INFO_TILE_MODE(dst_image->tile_mode) |
294 A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX));
295 tu_cs_emit_qw(&cmdbuf->cs, dst_va);
296 tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_SIZE_PITCH(dst_pitch));
297 tu_cs_emit(&cmdbuf->cs, 0x00000000);
298 tu_cs_emit(&cmdbuf->cs, 0x00000000);
299 tu_cs_emit(&cmdbuf->cs, 0x00000000);
300 tu_cs_emit(&cmdbuf->cs, 0x00000000);
301 tu_cs_emit(&cmdbuf->cs, 0x00000000);
302
303 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
304 tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_X_X(src_offset));
305 tu_cs_emit(&cmdbuf->cs,
306 A6XX_GRAS_2D_SRC_BR_X_X(src_offset + copy_info->imageExtent.width - 1));
307 tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_Y_Y(0));
308 tu_cs_emit(&cmdbuf->cs,
309 A6XX_GRAS_2D_SRC_BR_Y_Y(copy_info->imageExtent.height - 1));
310
311 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_DST_TL, 2);
312 tu_cs_emit(&cmdbuf->cs,
313 A6XX_GRAS_2D_DST_TL_X(copy_info->imageOffset.x) |
314 A6XX_GRAS_2D_DST_TL_Y(copy_info->imageOffset.y));
315 tu_cs_emit(&cmdbuf->cs,
316 A6XX_GRAS_2D_DST_BR_X(copy_info->imageOffset.x +
317 copy_info->imageExtent.width - 1) |
318 A6XX_GRAS_2D_DST_BR_Y(copy_info->imageOffset.y +
319 copy_info->imageExtent.height - 1));
320
321 tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1);
322 tu_cs_emit(&cmdbuf->cs, 0x3f);
323 tu_cs_emit_wfi(&cmdbuf->cs);
324
325 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8C01, 1);
326 tu_cs_emit(&cmdbuf->cs, 0);
327
328 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_2D_SRC_FORMAT, 1);
329 tu_cs_emit(&cmdbuf->cs, tu6_sp_2d_src_format(format));
330
331 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
332 tu_cs_emit(&cmdbuf->cs, 0x01000000);
333
334 tu_cs_emit_pkt7(&cmdbuf->cs, CP_BLIT, 1);
335 tu_cs_emit(&cmdbuf->cs, CP_BLIT_0_OP(BLIT_OP_SCALE));
336
337 tu_cs_emit_wfi(&cmdbuf->cs);
338
339 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
340 tu_cs_emit(&cmdbuf->cs, 0);
341 }
342
343 static void
344 tu_copy_buffer_to_image(struct tu_cmd_buffer *cmdbuf,
345 struct tu_buffer *src_buffer,
346 struct tu_image *dst_image,
347 const VkBufferImageCopy *copy_info)
348 {
349 tu_bo_list_add(&cmdbuf->bo_list, src_buffer->bo, MSM_SUBMIT_BO_READ);
350 tu_bo_list_add(&cmdbuf->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE);
351
352 /* general setup */
353 tu_dma_prepare(cmdbuf);
354
355 tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 6);
356
357 /* buffer copy setup */
358 tu_cs_emit_pkt7(&cmdbuf->cs, CP_SET_MARKER, 1);
359 tu_cs_emit(&cmdbuf->cs, A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
360
361 VkFormat format = dst_image->vk_format;
362 const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb;
363
364 const uint32_t blit_cntl = blit_control(rb_fmt) | 0x20000000;
365
366 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_BLIT_CNTL, 1);
367 tu_cs_emit(&cmdbuf->cs, blit_cntl);
368
369 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
370 tu_cs_emit(&cmdbuf->cs, blit_cntl);
371
372 unsigned src_pixel_stride = copy_info->bufferRowLength
373 ? copy_info->bufferRowLength
374 : copy_info->imageExtent.width;
375 unsigned cpp = vk_format_get_blocksize(format);
376 unsigned src_pitch = src_pixel_stride * cpp;
377
378 for (unsigned layer_offset = 0; layer_offset < copy_info->imageSubresource.layerCount; ++layer_offset) {
379 unsigned layer = copy_info->imageSubresource.baseArrayLayer + layer_offset;
380 uint64_t src_va = src_buffer->bo->iova + src_buffer->bo_offset + copy_info->bufferOffset + layer_offset * copy_info->bufferImageHeight * src_pitch;
381
382 if ((src_pitch & 63) || (src_va & 63)) {
383 /* Do a per line copy */
384 VkBufferImageCopy line_copy_info = *copy_info;
385 line_copy_info.imageExtent.height = 1;
386 for (unsigned r = 0; r < copy_info->imageExtent.height; ++r) {
387 /*
388 * if src_va is not aligned the line copy will need to adjust. Give it
389 * room to do so.
390 */
391 unsigned max_width = 16384 - (src_va & 0x3f) ? 64 : 0;
392 line_copy_info.imageOffset.x = copy_info->imageOffset.x;
393 line_copy_info.imageExtent.width = copy_info->imageExtent.width;
394
395 for (unsigned c = 0; c < copy_info->imageExtent.width; c += max_width) {
396 tu_copy_buffer_to_image_step(cmdbuf, src_buffer, dst_image, &line_copy_info, format, layer, src_va + c * cpp);
397
398 line_copy_info.imageOffset.x += max_width;
399 line_copy_info.imageExtent.width -= max_width;
400 }
401
402 line_copy_info.imageOffset.y++;
403 src_va += src_pitch;
404 }
405 } else {
406 tu_copy_buffer_to_image_step(cmdbuf, src_buffer, dst_image, copy_info, format, layer, src_va);
407 }
408 }
409
410 tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 15);
411
412 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, 0x1d, true);
413 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, FACENESS_FLUSH, true);
414 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_FLUSH_TS, true);
415 }
416
417 static void
418 tu_copy_image_to_buffer_step(struct tu_cmd_buffer *cmdbuf,
419 struct tu_image *src_image,
420 struct tu_buffer *dst_buffer,
421 const VkBufferImageCopy *copy_info,
422 VkFormat format,
423 uint32_t layer,
424 uint64_t dst_va)
425 {
426 const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb;
427
428 uint64_t src_va = src_image->bo->iova + src_image->bo_offset + src_image->layer_size * layer + src_image->levels[copy_info->imageSubresource.mipLevel].offset;
429 unsigned src_pitch = src_image->levels[copy_info->imageSubresource.mipLevel].pitch *
430 vk_format_get_blocksize(format);
431
432 unsigned dst_pitch;
433 unsigned dst_offset = 0;
434 if (copy_info->imageExtent.height == 1) {
435 /* Can't find this in the spec, but not having it is sort of insane? */
436 assert(dst_va % vk_format_get_blocksize(format) == 0);
437
438 dst_offset = (dst_va & 63) / vk_format_get_blocksize(format);
439 dst_va &= ~63;
440
441 dst_pitch = align((dst_offset + copy_info->imageExtent.width) * vk_format_get_blocksize(format), 64);
442 } else {
443 unsigned dst_pixel_stride = copy_info->bufferRowLength
444 ? copy_info->bufferRowLength
445 : copy_info->imageExtent.width;
446 dst_pitch = dst_pixel_stride * vk_format_get_blocksize(format);
447 assert(!(dst_pitch & 63));
448 assert(!(dst_va & 63));
449 }
450
451
452 tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 48);
453
454 /*
455 * Emit source:
456 */
457 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_PS_2D_SRC_INFO, 13);
458 tu_cs_emit(&cmdbuf->cs,
459 A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(rb_fmt) |
460 A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(src_image->tile_mode) |
461 A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(WZYX) | 0x500000);
462 tu_cs_emit(&cmdbuf->cs,
463 A6XX_SP_PS_2D_SRC_SIZE_WIDTH(src_image->extent.width) |
464 A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(
465 src_image->extent.height)); /* SP_PS_2D_SRC_SIZE */
466 tu_cs_emit_qw(&cmdbuf->cs, src_va);
467 tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_PITCH_PITCH(src_pitch));
468
469 tu_cs_emit(&cmdbuf->cs, 0x00000000);
470 tu_cs_emit(&cmdbuf->cs, 0x00000000);
471 tu_cs_emit(&cmdbuf->cs, 0x00000000);
472 tu_cs_emit(&cmdbuf->cs, 0x00000000);
473 tu_cs_emit(&cmdbuf->cs, 0x00000000);
474
475 tu_cs_emit(&cmdbuf->cs, 0x00000000);
476 tu_cs_emit(&cmdbuf->cs, 0x00000000);
477 tu_cs_emit(&cmdbuf->cs, 0x00000000);
478
479 /*
480 * Emit destination:
481 */
482 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_DST_INFO, 9);
483 tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(rb_fmt) |
484 A6XX_RB_2D_DST_INFO_TILE_MODE(TILE6_LINEAR) |
485 A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX));
486 tu_cs_emit_qw(&cmdbuf->cs, dst_va);
487 tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_SIZE_PITCH(dst_pitch));
488 tu_cs_emit(&cmdbuf->cs, 0x00000000);
489 tu_cs_emit(&cmdbuf->cs, 0x00000000);
490 tu_cs_emit(&cmdbuf->cs, 0x00000000);
491 tu_cs_emit(&cmdbuf->cs, 0x00000000);
492 tu_cs_emit(&cmdbuf->cs, 0x00000000);
493
494 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
495 tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_X_X(copy_info->imageOffset.x));
496 tu_cs_emit(&cmdbuf->cs,
497 A6XX_GRAS_2D_SRC_BR_X_X(copy_info->imageOffset.x +
498 copy_info->imageExtent.width - 1));
499 tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_Y_Y(copy_info->imageOffset.y));
500 tu_cs_emit(&cmdbuf->cs,
501 A6XX_GRAS_2D_SRC_BR_Y_Y(copy_info->imageOffset.y +
502 copy_info->imageExtent.height - 1));
503
504 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_DST_TL, 2);
505 tu_cs_emit(&cmdbuf->cs,
506 A6XX_GRAS_2D_DST_TL_X(dst_offset) | A6XX_GRAS_2D_DST_TL_Y(0));
507 tu_cs_emit(&cmdbuf->cs,
508 A6XX_GRAS_2D_DST_BR_X(dst_offset + copy_info->imageExtent.width - 1) |
509 A6XX_GRAS_2D_DST_BR_Y(copy_info->imageExtent.height - 1));
510
511 tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1);
512 tu_cs_emit(&cmdbuf->cs, 0x3f);
513 tu_cs_emit_wfi(&cmdbuf->cs);
514
515 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8C01, 1);
516 tu_cs_emit(&cmdbuf->cs, 0);
517
518 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_2D_SRC_FORMAT, 1);
519 tu_cs_emit(&cmdbuf->cs, tu6_sp_2d_src_format(format));
520
521 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
522 tu_cs_emit(&cmdbuf->cs, 0x01000000);
523
524 tu_cs_emit_pkt7(&cmdbuf->cs, CP_BLIT, 1);
525 tu_cs_emit(&cmdbuf->cs, CP_BLIT_0_OP(BLIT_OP_SCALE));
526
527 tu_cs_emit_wfi(&cmdbuf->cs);
528
529 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
530 tu_cs_emit(&cmdbuf->cs, 0);
531 }
532
533 static void
534 tu_copy_image_to_buffer(struct tu_cmd_buffer *cmdbuf,
535 struct tu_image *src_image,
536 struct tu_buffer *dst_buffer,
537 const VkBufferImageCopy *copy_info)
538 {
539 tu_bo_list_add(&cmdbuf->bo_list, src_image->bo, MSM_SUBMIT_BO_READ);
540 tu_bo_list_add(&cmdbuf->bo_list, dst_buffer->bo, MSM_SUBMIT_BO_WRITE);
541
542 /* general setup */
543 tu_dma_prepare(cmdbuf);
544
545 tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 6);
546
547 /* buffer copy setup */
548 tu_cs_emit_pkt7(&cmdbuf->cs, CP_SET_MARKER, 1);
549 tu_cs_emit(&cmdbuf->cs, A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
550
551 VkFormat format = src_image->vk_format;
552 const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb;
553
554 unsigned dst_pixel_stride = copy_info->bufferRowLength
555 ? copy_info->bufferRowLength
556 : copy_info->imageExtent.width;
557 unsigned cpp = vk_format_get_blocksize(format);
558 unsigned dst_pitch = dst_pixel_stride * cpp;
559
560
561 const uint32_t blit_cntl = blit_control(rb_fmt) | 0x20000000;
562
563 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_BLIT_CNTL, 1);
564 tu_cs_emit(&cmdbuf->cs, blit_cntl);
565
566 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
567 tu_cs_emit(&cmdbuf->cs, blit_cntl);
568
569 for (unsigned layer_offset = 0; layer_offset < copy_info->imageSubresource.layerCount; ++layer_offset) {
570 unsigned layer = copy_info->imageSubresource.baseArrayLayer + layer_offset;
571 uint64_t dst_va = dst_buffer->bo->iova + dst_buffer->bo_offset + copy_info->bufferOffset + layer_offset * copy_info->bufferImageHeight * dst_pitch;
572
573 if ((dst_pitch & 63) || (dst_va & 63)) {
574 /* Do a per line copy */
575 VkBufferImageCopy line_copy_info = *copy_info;
576 line_copy_info.imageExtent.height = 1;
577 for (unsigned r = 0; r < copy_info->imageExtent.height; ++r) {
578 /*
579 * if dst_va is not aligned the line copy will need to adjust. Give it
580 * room to do so.
581 */
582 unsigned max_width = 16384 - (dst_va & 0x3f) ? 64 : 0;
583 line_copy_info.imageOffset.x = copy_info->imageOffset.x;
584 line_copy_info.imageExtent.width = copy_info->imageExtent.width;
585
586 for (unsigned c = 0; c < copy_info->imageExtent.width; c += max_width) {
587 tu_copy_image_to_buffer_step(cmdbuf, src_image, dst_buffer, &line_copy_info, format, layer, dst_va + c * cpp);
588
589 line_copy_info.imageOffset.x += max_width;
590 line_copy_info.imageExtent.width -= max_width;
591 }
592
593 line_copy_info.imageOffset.y++;
594 dst_va += dst_pitch;
595 }
596 } else {
597 tu_copy_image_to_buffer_step(cmdbuf, src_image, dst_buffer, copy_info, format, layer, dst_va);
598 }
599 }
600
601 tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 15);
602
603 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, 0x1d, true);
604 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, FACENESS_FLUSH, true);
605 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_FLUSH_TS, true);
606 }
607
608 void
609 tu_CmdCopyBuffer(VkCommandBuffer commandBuffer,
610 VkBuffer srcBuffer,
611 VkBuffer destBuffer,
612 uint32_t regionCount,
613 const VkBufferCopy *pRegions)
614 {
615 TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
616 TU_FROM_HANDLE(tu_buffer, src_buffer, srcBuffer);
617 TU_FROM_HANDLE(tu_buffer, dst_buffer, destBuffer);
618
619 for (unsigned i = 0; i < regionCount; ++i) {
620 uint64_t src_offset = src_buffer->bo_offset + pRegions[i].srcOffset;
621 uint64_t dst_offset = dst_buffer->bo_offset + pRegions[i].dstOffset;
622
623 tu_copy_buffer(cmdbuf, src_buffer->bo, src_offset, dst_buffer->bo,
624 dst_offset, pRegions[i].size);
625 }
626 }
627
628 void
629 tu_CmdCopyBufferToImage(VkCommandBuffer commandBuffer,
630 VkBuffer srcBuffer,
631 VkImage destImage,
632 VkImageLayout destImageLayout,
633 uint32_t regionCount,
634 const VkBufferImageCopy *pRegions)
635 {
636 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
637 TU_FROM_HANDLE(tu_image, dest_image, destImage);
638 TU_FROM_HANDLE(tu_buffer, src_buffer, srcBuffer);
639
640 for (unsigned i = 0; i < regionCount; ++i) {
641 tu_copy_buffer_to_image(cmd_buffer, src_buffer, dest_image,
642 pRegions + i);
643 }
644 }
645
646 void
647 tu_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer,
648 VkImage srcImage,
649 VkImageLayout srcImageLayout,
650 VkBuffer destBuffer,
651 uint32_t regionCount,
652 const VkBufferImageCopy *pRegions)
653 {
654 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
655 TU_FROM_HANDLE(tu_image, src_image, srcImage);
656 TU_FROM_HANDLE(tu_buffer, dst_buffer, destBuffer);
657
658 for (unsigned i = 0; i < regionCount; ++i) {
659 tu_copy_image_to_buffer(cmd_buffer, src_image, dst_buffer,
660 pRegions + i);
661 }
662 }
663
664 static void
665 meta_copy_image(struct tu_cmd_buffer *cmd_buffer,
666 struct tu_image *src_image,
667 VkImageLayout src_image_layout,
668 struct tu_image *dest_image,
669 VkImageLayout dest_image_layout,
670 uint32_t regionCount,
671 const VkImageCopy *pRegions)
672 {
673 }
674
675 void
676 tu_CmdCopyImage(VkCommandBuffer commandBuffer,
677 VkImage srcImage,
678 VkImageLayout srcImageLayout,
679 VkImage destImage,
680 VkImageLayout destImageLayout,
681 uint32_t regionCount,
682 const VkImageCopy *pRegions)
683 {
684 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
685 TU_FROM_HANDLE(tu_image, src_image, srcImage);
686 TU_FROM_HANDLE(tu_image, dest_image, destImage);
687
688 meta_copy_image(cmd_buffer, src_image, srcImageLayout, dest_image,
689 destImageLayout, regionCount, pRegions);
690 }