turnip: implement UBWC
[mesa.git] / src / freedreno / vulkan / tu_meta_copy.c
1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "tu_private.h"
25
26 #include "a6xx.xml.h"
27 #include "adreno_common.xml.h"
28 #include "adreno_pm4.xml.h"
29
30 #include "vk_format.h"
31
32 #include "tu_cs.h"
33 #include "tu_blit.h"
34
35 static uint32_t
36 blit_control(enum a6xx_color_fmt fmt)
37 {
38 unsigned blit_cntl = 0xf00000;
39 blit_cntl |= A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(fmt);
40 blit_cntl |= A6XX_RB_2D_BLIT_CNTL_IFMT(tu6_rb_fmt_to_ifmt(fmt));
41 return blit_cntl;
42 }
43
44 static void
45 tu_dma_prepare(struct tu_cmd_buffer *cmdbuf)
46 {
47 tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 10);
48
49 tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1);
50 tu_cs_emit(&cmdbuf->cs, PC_CCU_INVALIDATE_COLOR);
51
52 tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1);
53 tu_cs_emit(&cmdbuf->cs, LRZ_FLUSH);
54
55 tu_cs_emit_pkt7(&cmdbuf->cs, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
56 tu_cs_emit(&cmdbuf->cs, 0x0);
57
58 tu_cs_emit_wfi(&cmdbuf->cs);
59
60 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_CCU_CNTL, 1);
61 tu_cs_emit(&cmdbuf->cs, 0x10000000);
62 }
63
64 static void
65 tu_copy_buffer(struct tu_cmd_buffer *cmdbuf,
66 struct tu_bo *src_bo,
67 uint64_t src_offset,
68 struct tu_bo *dst_bo,
69 uint64_t dst_offset,
70 uint64_t size)
71 {
72 const unsigned max_size_per_iter = 0x4000 - 0x40;
73 const unsigned max_iterations =
74 (size + max_size_per_iter) / max_size_per_iter;
75
76 tu_bo_list_add(&cmdbuf->bo_list, src_bo, MSM_SUBMIT_BO_READ);
77 tu_bo_list_add(&cmdbuf->bo_list, dst_bo, MSM_SUBMIT_BO_WRITE);
78
79 tu_dma_prepare(cmdbuf);
80
81 tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 21 + 48 * max_iterations);
82
83 /* buffer copy setup */
84 tu_cs_emit_pkt7(&cmdbuf->cs, CP_SET_MARKER, 1);
85 tu_cs_emit(&cmdbuf->cs, A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
86
87 const uint32_t blit_cntl = blit_control(RB6_R8_UNORM) | 0x20000000;
88
89 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_BLIT_CNTL, 1);
90 tu_cs_emit(&cmdbuf->cs, blit_cntl);
91
92 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
93 tu_cs_emit(&cmdbuf->cs, blit_cntl);
94
95 for (; size;) {
96 uint64_t src_va = src_bo->iova + src_offset;
97 uint64_t dst_va = dst_bo->iova + dst_offset;
98
99 unsigned src_shift = src_va & 0x3f;
100 unsigned dst_shift = dst_va & 0x3f;
101 unsigned max_shift = MAX2(src_shift, dst_shift);
102
103 src_va -= src_shift;
104 dst_va -= dst_shift;
105
106 uint32_t size_todo = MIN2(0x4000 - max_shift, size);
107 unsigned pitch = (size_todo + max_shift + 63) & ~63;
108
109 /*
110 * Emit source:
111 */
112 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_PS_2D_SRC_INFO, 13);
113 tu_cs_emit(&cmdbuf->cs,
114 A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(RB6_R8_UNORM) |
115 A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(TILE6_LINEAR) |
116 A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(WZYX) | 0x500000);
117 tu_cs_emit(&cmdbuf->cs,
118 A6XX_SP_PS_2D_SRC_SIZE_WIDTH(src_shift + size_todo) |
119 A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(1)); /* SP_PS_2D_SRC_SIZE */
120 tu_cs_emit_qw(&cmdbuf->cs, src_va);
121 tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_PITCH_PITCH(pitch));
122
123 tu_cs_emit(&cmdbuf->cs, 0x00000000);
124 tu_cs_emit(&cmdbuf->cs, 0x00000000);
125 tu_cs_emit(&cmdbuf->cs, 0x00000000);
126 tu_cs_emit(&cmdbuf->cs, 0x00000000);
127 tu_cs_emit(&cmdbuf->cs, 0x00000000);
128
129 tu_cs_emit(&cmdbuf->cs, 0x00000000);
130 tu_cs_emit(&cmdbuf->cs, 0x00000000);
131 tu_cs_emit(&cmdbuf->cs, 0x00000000);
132
133 /*
134 * Emit destination:
135 */
136 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_DST_INFO, 9);
137 tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(RB6_R8_UNORM) |
138 A6XX_RB_2D_DST_INFO_TILE_MODE(TILE6_LINEAR) |
139 A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX));
140 tu_cs_emit_qw(&cmdbuf->cs, dst_va);
141
142 tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_SIZE_PITCH(pitch));
143 tu_cs_emit(&cmdbuf->cs, 0x00000000);
144 tu_cs_emit(&cmdbuf->cs, 0x00000000);
145 tu_cs_emit(&cmdbuf->cs, 0x00000000);
146 tu_cs_emit(&cmdbuf->cs, 0x00000000);
147 tu_cs_emit(&cmdbuf->cs, 0x00000000);
148
149 /*
150 * Blit command:
151 */
152 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
153 tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_X_X(src_shift));
154 tu_cs_emit(&cmdbuf->cs,
155 A6XX_GRAS_2D_SRC_BR_X_X(src_shift + size_todo - 1));
156 tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_Y_Y(0));
157 tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_BR_Y_Y(0));
158
159 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_DST_TL, 2);
160 tu_cs_emit(&cmdbuf->cs,
161 A6XX_GRAS_2D_DST_TL_X(dst_shift) | A6XX_GRAS_2D_DST_TL_Y(0));
162 tu_cs_emit(&cmdbuf->cs,
163 A6XX_GRAS_2D_DST_BR_X(dst_shift + size_todo - 1) |
164 A6XX_GRAS_2D_DST_BR_Y(0));
165
166 tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1);
167 tu_cs_emit(&cmdbuf->cs, 0x3f);
168 tu_cs_emit_wfi(&cmdbuf->cs);
169
170 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8C01, 1);
171 tu_cs_emit(&cmdbuf->cs, 0);
172
173 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_2D_SRC_FORMAT, 1);
174 tu_cs_emit(&cmdbuf->cs, 0xf180);
175
176 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
177 tu_cs_emit(&cmdbuf->cs, 0x01000000);
178
179 tu_cs_emit_pkt7(&cmdbuf->cs, CP_BLIT, 1);
180 tu_cs_emit(&cmdbuf->cs, CP_BLIT_0_OP(BLIT_OP_SCALE));
181
182 tu_cs_emit_wfi(&cmdbuf->cs);
183
184 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
185 tu_cs_emit(&cmdbuf->cs, 0);
186
187 src_offset += size_todo;
188 dst_offset += size_todo;
189 size -= size_todo;
190 }
191
192 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, 0x1d, true);
193 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, FACENESS_FLUSH, true);
194 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_FLUSH_TS, true);
195 }
196
197 static struct tu_blit_surf
198 tu_blit_buffer(struct tu_buffer *buffer,
199 VkFormat format,
200 const VkBufferImageCopy *info)
201 {
202 if (info->imageSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT)
203 format = VK_FORMAT_R8_UNORM;
204
205 unsigned pitch = (info->bufferRowLength ?: info->imageExtent.width) *
206 vk_format_get_blocksize(format);
207
208 return (struct tu_blit_surf) {
209 .fmt = format,
210 .tile_mode = TILE6_LINEAR,
211 .va = buffer->bo->iova + buffer->bo_offset + info->bufferOffset,
212 .pitch = pitch,
213 .layer_size = (info->bufferImageHeight ?: info->imageExtent.height) * pitch / vk_format_get_blockwidth(format) / vk_format_get_blockheight(format),
214 .width = info->imageExtent.width,
215 .height = info->imageExtent.height,
216 .samples = 1,
217 };
218 }
219
220 static void
221 tu_copy_buffer_to_image(struct tu_cmd_buffer *cmdbuf,
222 struct tu_buffer *src_buffer,
223 struct tu_image *dst_image,
224 const VkBufferImageCopy *info)
225 {
226 if (info->imageSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT &&
227 vk_format_get_blocksize(dst_image->vk_format) == 4) {
228 tu_finishme("aspect mask\n");
229 return;
230 }
231
232 tu_blit(cmdbuf, &(struct tu_blit) {
233 .dst = tu_blit_surf_ext(dst_image, info->imageSubresource, info->imageOffset, info->imageExtent),
234 .src = tu_blit_buffer(src_buffer, dst_image->vk_format, info),
235 .layers = MAX2(info->imageExtent.depth, info->imageSubresource.layerCount),
236 .type = TU_BLIT_COPY,
237 });
238 }
239
240 static void
241 tu_copy_image_to_buffer(struct tu_cmd_buffer *cmdbuf,
242 struct tu_image *src_image,
243 struct tu_buffer *dst_buffer,
244 const VkBufferImageCopy *info)
245 {
246 tu_blit(cmdbuf, &(struct tu_blit) {
247 .dst = tu_blit_buffer(dst_buffer, src_image->vk_format, info),
248 .src = tu_blit_surf_ext(src_image, info->imageSubresource, info->imageOffset, info->imageExtent),
249 .layers = MAX2(info->imageExtent.depth, info->imageSubresource.layerCount),
250 .type = TU_BLIT_COPY,
251 });
252 }
253
254 static void
255 tu_copy_image_to_image(struct tu_cmd_buffer *cmdbuf,
256 struct tu_image *src_image,
257 struct tu_image *dst_image,
258 const VkImageCopy *info)
259 {
260 if ((info->dstSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT &&
261 vk_format_get_blocksize(dst_image->vk_format) == 4) ||
262 (info->srcSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT &&
263 vk_format_get_blocksize(src_image->vk_format) == 4)) {
264 tu_finishme("aspect mask\n");
265 return;
266 }
267
268 tu_blit(cmdbuf, &(struct tu_blit) {
269 .dst = tu_blit_surf_ext(dst_image, info->dstSubresource, info->dstOffset, info->extent),
270 .src = tu_blit_surf_ext(src_image, info->srcSubresource, info->srcOffset, info->extent),
271 .layers = info->extent.depth,
272 .type = TU_BLIT_COPY,
273 });
274 }
275
276 void
277 tu_CmdCopyBuffer(VkCommandBuffer commandBuffer,
278 VkBuffer srcBuffer,
279 VkBuffer destBuffer,
280 uint32_t regionCount,
281 const VkBufferCopy *pRegions)
282 {
283 TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
284 TU_FROM_HANDLE(tu_buffer, src_buffer, srcBuffer);
285 TU_FROM_HANDLE(tu_buffer, dst_buffer, destBuffer);
286
287 for (unsigned i = 0; i < regionCount; ++i) {
288 uint64_t src_offset = src_buffer->bo_offset + pRegions[i].srcOffset;
289 uint64_t dst_offset = dst_buffer->bo_offset + pRegions[i].dstOffset;
290
291 tu_copy_buffer(cmdbuf, src_buffer->bo, src_offset, dst_buffer->bo,
292 dst_offset, pRegions[i].size);
293 }
294 }
295
296 void
297 tu_CmdCopyBufferToImage(VkCommandBuffer commandBuffer,
298 VkBuffer srcBuffer,
299 VkImage destImage,
300 VkImageLayout destImageLayout,
301 uint32_t regionCount,
302 const VkBufferImageCopy *pRegions)
303 {
304 TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
305 TU_FROM_HANDLE(tu_image, dst_image, destImage);
306 TU_FROM_HANDLE(tu_buffer, src_buffer, srcBuffer);
307
308 tu_bo_list_add(&cmdbuf->bo_list, src_buffer->bo, MSM_SUBMIT_BO_READ);
309 tu_bo_list_add(&cmdbuf->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE);
310
311 for (unsigned i = 0; i < regionCount; ++i)
312 tu_copy_buffer_to_image(cmdbuf, src_buffer, dst_image, pRegions + i);
313 }
314
315 void
316 tu_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer,
317 VkImage srcImage,
318 VkImageLayout srcImageLayout,
319 VkBuffer destBuffer,
320 uint32_t regionCount,
321 const VkBufferImageCopy *pRegions)
322 {
323 TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
324 TU_FROM_HANDLE(tu_image, src_image, srcImage);
325 TU_FROM_HANDLE(tu_buffer, dst_buffer, destBuffer);
326
327 tu_bo_list_add(&cmdbuf->bo_list, src_image->bo, MSM_SUBMIT_BO_READ);
328 tu_bo_list_add(&cmdbuf->bo_list, dst_buffer->bo, MSM_SUBMIT_BO_WRITE);
329
330 for (unsigned i = 0; i < regionCount; ++i)
331 tu_copy_image_to_buffer(cmdbuf, src_image, dst_buffer, pRegions + i);
332 }
333
334 void
335 tu_CmdCopyImage(VkCommandBuffer commandBuffer,
336 VkImage srcImage,
337 VkImageLayout srcImageLayout,
338 VkImage destImage,
339 VkImageLayout destImageLayout,
340 uint32_t regionCount,
341 const VkImageCopy *pRegions)
342 {
343 TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
344 TU_FROM_HANDLE(tu_image, src_image, srcImage);
345 TU_FROM_HANDLE(tu_image, dst_image, destImage);
346
347 tu_bo_list_add(&cmdbuf->bo_list, src_image->bo, MSM_SUBMIT_BO_READ);
348 tu_bo_list_add(&cmdbuf->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE);
349
350 for (uint32_t i = 0; i < regionCount; ++i)
351 tu_copy_image_to_image(cmdbuf, src_image, dst_image, pRegions + i);
352 }