tu: fix empty-body instruction
[mesa.git] / src / freedreno / vulkan / tu_blit.c
1 /*
2 * Copyright © 2019 Valve Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Jonathan Marek <jonathan@marek.ca>
25 *
26 */
27
28 #include "tu_blit.h"
29
30 #include "a6xx.xml.h"
31 #include "adreno_common.xml.h"
32 #include "adreno_pm4.xml.h"
33
34 #include "vk_format.h"
35
36 #include "tu_cs.h"
37
38 /* TODO:
39 * - Avoid disabling tiling for swapped formats
40 * (image_to_image copy doesn't deal with it)
41 * - Fix d24_unorm_s8_uint support & aspects
42 * - UBWC
43 */
44
45 static VkFormat
46 blit_copy_format(VkFormat format)
47 {
48 switch (vk_format_get_blocksizebits(format)) {
49 case 8: return VK_FORMAT_R8_UINT;
50 case 16: return VK_FORMAT_R16_UINT;
51 case 32: return VK_FORMAT_R8G8B8A8_UINT;
52 case 64: return VK_FORMAT_R32G32_UINT;
53 case 96: return VK_FORMAT_R32G32B32_UINT;
54 case 128:return VK_FORMAT_R32G32B32A32_UINT;
55 default:
56 unreachable("unhandled format size");
57 }
58 }
59
60 static uint32_t
61 blit_image_info(const struct tu_blit_surf *img, bool src, bool stencil_read)
62 {
63 const struct tu_native_format *fmt = tu6_get_native_format(img->fmt);
64 enum a6xx_color_fmt rb = fmt->rb;
65 enum a3xx_color_swap swap = img->tiled ? WZYX : fmt->swap;
66 if (rb == RB6_R10G10B10A2_UNORM && src)
67 rb = RB6_R10G10B10A2_FLOAT16;
68 if (rb == RB6_X8Z24_UNORM)
69 rb = RB6_Z24_UNORM_S8_UINT;
70
71 if (stencil_read)
72 swap = XYZW;
73
74 return A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(rb) |
75 A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(img->tile_mode) |
76 A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(swap) |
77 COND(vk_format_is_srgb(img->fmt), A6XX_SP_PS_2D_SRC_INFO_SRGB);
78 }
79
80 static void
81 emit_blit_step(struct tu_cmd_buffer *cmdbuf, const struct tu_blit *blt)
82 {
83 struct tu_cs *cs = &cmdbuf->cs;
84
85 tu_cs_reserve_space(cmdbuf->device, cs, 52);
86
87 enum a6xx_color_fmt fmt = tu6_get_native_format(blt->dst.fmt)->rb;
88 if (fmt == RB6_X8Z24_UNORM)
89 fmt = RB6_Z24_UNORM_S8_UINT;
90
91 enum a6xx_2d_ifmt ifmt = tu6_rb_fmt_to_ifmt(fmt);
92
93 if (vk_format_is_srgb(blt->dst.fmt)) {
94 assert(ifmt == R2D_UNORM8);
95 ifmt = R2D_UNORM8_SRGB;
96 }
97
98 uint32_t blit_cntl = A6XX_RB_2D_BLIT_CNTL_ROTATE(blt->rotation) |
99 A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(fmt) | /* not required? */
100 COND(fmt == RB6_Z24_UNORM_S8_UINT, A6XX_RB_2D_BLIT_CNTL_D24S8) |
101 A6XX_RB_2D_BLIT_CNTL_MASK(0xf) |
102 A6XX_RB_2D_BLIT_CNTL_IFMT(ifmt);
103
104 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_BLIT_CNTL, 1);
105 tu_cs_emit(&cmdbuf->cs, blit_cntl);
106
107 tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
108 tu_cs_emit(&cmdbuf->cs, blit_cntl);
109
110 /*
111 * Emit source:
112 */
113 tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_2D_SRC_INFO, 10);
114 tu_cs_emit(cs, blit_image_info(&blt->src, true, blt->stencil_read) |
115 A6XX_SP_PS_2D_SRC_INFO_SAMPLES(tu_msaa_samples(blt->src.samples)) |
116 /* TODO: should disable this bit for integer formats ? */
117 COND(blt->src.samples > 1, A6XX_SP_PS_2D_SRC_INFO_SAMPLES_AVERAGE) |
118 COND(blt->filter, A6XX_SP_PS_2D_SRC_INFO_FILTER) |
119 0x500000);
120 tu_cs_emit(cs, A6XX_SP_PS_2D_SRC_SIZE_WIDTH(blt->src.x + blt->src.width) |
121 A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(blt->src.y + blt->src.height));
122 tu_cs_emit_qw(cs, blt->src.va);
123 tu_cs_emit(cs, A6XX_SP_PS_2D_SRC_PITCH_PITCH(blt->src.pitch));
124
125 tu_cs_emit(cs, 0x00000000);
126 tu_cs_emit(cs, 0x00000000);
127 tu_cs_emit(cs, 0x00000000);
128 tu_cs_emit(cs, 0x00000000);
129 tu_cs_emit(cs, 0x00000000);
130
131 /*
132 * Emit destination:
133 */
134 tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_INFO, 9);
135 tu_cs_emit(cs, blit_image_info(&blt->dst, false, false));
136 tu_cs_emit_qw(cs, blt->dst.va);
137 tu_cs_emit(cs, A6XX_RB_2D_DST_SIZE_PITCH(blt->dst.pitch));
138 tu_cs_emit(cs, 0x00000000);
139 tu_cs_emit(cs, 0x00000000);
140 tu_cs_emit(cs, 0x00000000);
141 tu_cs_emit(cs, 0x00000000);
142 tu_cs_emit(cs, 0x00000000);
143
144 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
145 tu_cs_emit(cs, A6XX_GRAS_2D_SRC_TL_X_X(blt->src.x));
146 tu_cs_emit(cs, A6XX_GRAS_2D_SRC_BR_X_X(blt->src.x + blt->src.width - 1));
147 tu_cs_emit(cs, A6XX_GRAS_2D_SRC_TL_Y_Y(blt->src.y));
148 tu_cs_emit(cs, A6XX_GRAS_2D_SRC_BR_Y_Y(blt->src.y + blt->src.height - 1));
149
150 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_2D_DST_TL, 2);
151 tu_cs_emit(cs, A6XX_GRAS_2D_DST_TL_X(blt->dst.x) |
152 A6XX_GRAS_2D_DST_TL_Y(blt->dst.y));
153 tu_cs_emit(cs, A6XX_GRAS_2D_DST_BR_X(blt->dst.x + blt->dst.width - 1) |
154 A6XX_GRAS_2D_DST_BR_Y(blt->dst.y + blt->dst.height - 1));
155
156 tu_cs_emit_pkt7(cs, CP_EVENT_WRITE, 1);
157 tu_cs_emit(cs, 0x3f);
158 tu_cs_emit_wfi(cs);
159
160 tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_8C01, 1);
161 tu_cs_emit(cs, 0);
162
163 if (fmt == RB6_R10G10B10A2_UNORM)
164 fmt = RB6_R16G16B16A16_FLOAT;
165
166 tu_cs_emit_pkt4(cs, REG_A6XX_SP_2D_SRC_FORMAT, 1);
167 tu_cs_emit(cs, COND(vk_format_is_sint(blt->src.fmt), A6XX_SP_2D_SRC_FORMAT_SINT) |
168 COND(vk_format_is_uint(blt->src.fmt), A6XX_SP_2D_SRC_FORMAT_UINT) |
169 A6XX_SP_2D_SRC_FORMAT_COLOR_FORMAT(fmt) |
170 COND(ifmt == R2D_UNORM8_SRGB, A6XX_SP_2D_SRC_FORMAT_SRGB) |
171 A6XX_SP_2D_SRC_FORMAT_MASK(0xf));
172
173 tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
174 tu_cs_emit(cs, 0x01000000);
175
176 tu_cs_emit_pkt7(cs, CP_BLIT, 1);
177 tu_cs_emit(cs, CP_BLIT_0_OP(BLIT_OP_SCALE));
178
179 tu_cs_emit_wfi(cs);
180
181 tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
182 tu_cs_emit(cs, 0);
183 }
184
185 void tu_blit(struct tu_cmd_buffer *cmdbuf, struct tu_blit *blt, bool copy)
186 {
187 if (copy) {
188 blt->stencil_read =
189 blt->dst.fmt == VK_FORMAT_R8_UINT &&
190 blt->src.fmt == VK_FORMAT_D24_UNORM_S8_UINT;
191
192 assert(vk_format_get_blocksize(blt->dst.fmt) ==
193 vk_format_get_blocksize(blt->src.fmt) || blt->stencil_read);
194 assert(blt->src.samples == blt->dst.samples);
195
196 if (vk_format_is_compressed(blt->src.fmt)) {
197 unsigned block_width = vk_format_get_blockwidth(blt->src.fmt);
198 unsigned block_height = vk_format_get_blockheight(blt->src.fmt);
199
200 blt->src.pitch /= block_width;
201 blt->src.x /= block_width;
202 blt->src.y /= block_height;
203
204 /* for image_to_image copy, width/height is on the src format */
205 blt->dst.width = blt->src.width = DIV_ROUND_UP(blt->src.width, block_width);
206 blt->dst.height = blt->src.height = DIV_ROUND_UP(blt->src.height, block_height);
207 }
208
209 if (vk_format_is_compressed(blt->dst.fmt)) {
210 unsigned block_width = vk_format_get_blockwidth(blt->dst.fmt);
211 unsigned block_height = vk_format_get_blockheight(blt->dst.fmt);
212
213 blt->dst.pitch /= block_width;
214 blt->dst.x /= block_width;
215 blt->dst.y /= block_height;
216 }
217
218 blt->src.fmt = blit_copy_format(blt->src.fmt);
219 blt->dst.fmt = blit_copy_format(blt->dst.fmt);
220
221 /* TODO: does this work correctly with tiling/etc ? */
222 blt->src.x *= blt->src.samples;
223 blt->dst.x *= blt->dst.samples;
224 blt->src.width *= blt->src.samples;
225 blt->dst.width *= blt->dst.samples;
226 blt->src.samples = 1;
227 blt->dst.samples = 1;
228 } else {
229 assert(blt->dst.samples == 1);
230 }
231
232 tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 18);
233
234 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, LRZ_FLUSH, false);
235 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, 0x1d, true);
236 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, FACENESS_FLUSH, true);
237 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, PC_CCU_INVALIDATE_COLOR, false);
238 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, PC_CCU_INVALIDATE_DEPTH, false);
239
240 /* buffer copy setup */
241 tu_cs_emit_pkt7(&cmdbuf->cs, CP_SET_MARKER, 1);
242 tu_cs_emit(&cmdbuf->cs, A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
243
244 for (unsigned layer = 0; layer < blt->layers; layer++) {
245 if ((blt->src.va & 63) || (blt->src.pitch & 63)) {
246 /* per line copy path (buffer_to_image) */
247 assert(copy && !blt->src.tiled);
248 struct tu_blit line_blt = *blt;
249 uint64_t src_va = line_blt.src.va + blt->src.pitch * blt->src.y;
250
251 line_blt.src.y = 0;
252 line_blt.src.pitch = 0;
253 line_blt.src.height = 1;
254 line_blt.dst.height = 1;
255
256 for (unsigned y = 0; y < blt->src.height; y++) {
257 line_blt.src.x = blt->src.x + (src_va & 63) / vk_format_get_blocksize(blt->src.fmt);
258 line_blt.src.va = src_va & ~63;
259
260 emit_blit_step(cmdbuf, &line_blt);
261
262 line_blt.dst.y++;
263 src_va += blt->src.pitch;
264 }
265 } else if ((blt->dst.va & 63) || (blt->dst.pitch & 63)) {
266 /* per line copy path (image_to_buffer) */
267 assert(copy && !blt->dst.tiled);
268 struct tu_blit line_blt = *blt;
269 uint64_t dst_va = line_blt.dst.va + blt->dst.pitch * blt->dst.y;
270
271 line_blt.dst.y = 0;
272 line_blt.dst.pitch = 0;
273 line_blt.src.height = 1;
274 line_blt.dst.height = 1;
275
276 for (unsigned y = 0; y < blt->src.height; y++) {
277 line_blt.dst.x = blt->dst.x + (dst_va & 63) / vk_format_get_blocksize(blt->dst.fmt);
278 line_blt.dst.va = dst_va & ~63;
279
280 emit_blit_step(cmdbuf, &line_blt);
281
282 line_blt.src.y++;
283 dst_va += blt->dst.pitch;
284 }
285 } else {
286 emit_blit_step(cmdbuf, blt);
287 }
288 blt->dst.va += blt->dst.layer_size;
289 blt->src.va += blt->src.layer_size;
290 }
291
292 tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 17);
293
294 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, 0x1d, true);
295 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, FACENESS_FLUSH, true);
296 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_FLUSH_TS, true);
297 tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_INVALIDATE, false);
298 }