6dd482ca2f3f694eea784a9f268373dfba216221
[mesa.git] / src / gallium / drivers / freedreno / a6xx / fd6_blitter.c
1 /*
2 * Copyright (C) 2017 Rob Clark <robclark@freedesktop.org>
3 * Copyright © 2018 Google, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 * Authors:
25 * Rob Clark <robclark@freedesktop.org>
26 */
27
28 #include "util/u_dump.h"
29 #include "util/half_float.h"
30
31 #include "freedreno_blitter.h"
32 #include "freedreno_fence.h"
33 #include "freedreno_resource.h"
34
35 #include "fd6_blitter.h"
36 #include "fd6_format.h"
37 #include "fd6_emit.h"
38
39 /* Make sure none of the requested dimensions extend beyond the size of the
40 * resource. Not entirely sure why this happens, but sometimes it does, and
41 * w/ 2d blt doesn't have wrap modes like a sampler, so force those cases
42 * back to u_blitter
43 */
44 static bool
45 ok_dims(const struct pipe_resource *r, const struct pipe_box *b, int lvl)
46 {
47 int last_layer =
48 r->target == PIPE_TEXTURE_3D ? u_minify(r->depth0, lvl)
49 : r->array_size;
50
51 return (b->x >= 0) && (b->x + b->width <= u_minify(r->width0, lvl)) &&
52 (b->y >= 0) && (b->y + b->height <= u_minify(r->height0, lvl)) &&
53 (b->z >= 0) && (b->z + b->depth <= last_layer);
54 }
55
56 static bool
57 ok_format(enum pipe_format pfmt)
58 {
59 enum a6xx_color_fmt fmt = fd6_pipe2color(pfmt);
60
61 switch (pfmt) {
62 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
63 case PIPE_FORMAT_Z24X8_UNORM:
64 case PIPE_FORMAT_Z16_UNORM:
65 case PIPE_FORMAT_Z32_UNORM:
66 case PIPE_FORMAT_Z32_FLOAT:
67 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
68 case PIPE_FORMAT_S8_UINT:
69 return true;
70 default:
71 break;
72 }
73
74 if (fmt == ~0)
75 return false;
76
77 if (fd6_ifmt(fmt) == 0)
78 return false;
79
80 return true;
81 }
82
83 #define DEBUG_BLIT_FALLBACK 0
84 #define fail_if(cond) \
85 do { \
86 if (cond) { \
87 if (DEBUG_BLIT_FALLBACK) { \
88 fprintf(stderr, "falling back: %s for blit:\n", #cond); \
89 util_dump_blit_info(stderr, info); \
90 fprintf(stderr, "\nsrc: "); \
91 util_dump_resource(stderr, info->src.resource); \
92 fprintf(stderr, "\ndst: "); \
93 util_dump_resource(stderr, info->dst.resource); \
94 fprintf(stderr, "\n"); \
95 } \
96 return false; \
97 } \
98 } while (0)
99
100 static bool
101 can_do_blit(const struct pipe_blit_info *info)
102 {
103 /* I think we can do scaling, but not in z dimension since that would
104 * require blending..
105 */
106 fail_if(info->dst.box.depth != info->src.box.depth);
107
108 /* Fail if unsupported format: */
109 fail_if(!ok_format(info->src.format));
110 fail_if(!ok_format(info->dst.format));
111
112 /* We can blit if both or neither formats are compressed formats... */
113 fail_if(util_format_is_compressed(info->src.format) !=
114 util_format_is_compressed(info->src.format));
115
116 /* ... but only if they're the same compression format. */
117 fail_if(util_format_is_compressed(info->src.format) &&
118 info->src.format != info->dst.format);
119
120 fail_if(!ok_dims(info->src.resource, &info->src.box, info->src.level));
121
122 fail_if(!ok_dims(info->dst.resource, &info->dst.box, info->dst.level));
123
124 debug_assert(info->dst.box.width >= 0);
125 debug_assert(info->dst.box.height >= 0);
126 debug_assert(info->dst.box.depth >= 0);
127
128 /* We could probably blit between resources with equal sample count.. */
129 fail_if(info->dst.resource->nr_samples > 1);
130
131 /* CP_BLIT supports resolving, but seems to pick one only of the samples
132 * (no blending). This doesn't work for RGBA resolves, so we fall back in
133 * that case. However, GL/GLES spec says:
134 *
135 * "If the source formats are integer types or stencil values, a single
136 * sample’s value is selected for each pixel. If the source formats are
137 * floating-point or normalized types, the sample values for each pixel
138 * are resolved in an implementationdependent manner. If the source
139 * formats are depth values, sample values are resolved in an
140 * implementation-dependent manner where the result will be between the
141 * minimum and maximum depth values in the pixel."
142 *
143 * so do those with CP_BLIT.
144 *
145 * TODO since we re-write z/s blits to RGBA, we'll fail this check in some
146 * cases where we don't need to.
147 */
148 fail_if((info->mask & PIPE_MASK_RGBA) &&
149 info->src.resource->nr_samples > 1);
150
151 fail_if(info->window_rectangle_include);
152
153 fail_if(util_format_is_srgb(info->src.format));
154 fail_if(util_format_is_srgb(info->dst.format));
155
156 const struct util_format_description *src_desc =
157 util_format_description(info->src.format);
158 const struct util_format_description *dst_desc =
159 util_format_description(info->dst.format);
160 const int common_channels = MIN2(src_desc->nr_channels, dst_desc->nr_channels);
161
162 if (info->mask & PIPE_MASK_RGBA) {
163 for (int i = 0; i < common_channels; i++) {
164 fail_if(memcmp(&src_desc->channel[i],
165 &dst_desc->channel[i],
166 sizeof(src_desc->channel[0])));
167 }
168 }
169
170 fail_if(info->alpha_blend);
171
172 return true;
173 }
174
175 static void
176 emit_setup(struct fd_batch *batch)
177 {
178 struct fd_ringbuffer *ring = batch->draw;
179
180 fd6_event_write(batch, ring, 0x1d, true);
181 fd6_event_write(batch, ring, FACENESS_FLUSH, true);
182 fd6_event_write(batch, ring, PC_CCU_INVALIDATE_COLOR, false);
183 fd6_event_write(batch, ring, PC_CCU_INVALIDATE_DEPTH, false);
184 }
185
186 static uint32_t
187 blit_control(enum a6xx_color_fmt fmt)
188 {
189 unsigned blit_cntl = 0xf00000;
190 blit_cntl |= A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(fmt);
191 blit_cntl |= A6XX_RB_2D_BLIT_CNTL_IFMT(fd6_ifmt(fmt));
192 return blit_cntl;
193 }
194
195 /* buffers need to be handled specially since x/width can exceed the bounds
196 * supported by hw.. if necessary decompose into (potentially) two 2D blits
197 */
198 static void
199 emit_blit_buffer(struct fd_context *ctx, struct fd_ringbuffer *ring,
200 const struct pipe_blit_info *info)
201 {
202 const struct pipe_box *sbox = &info->src.box;
203 const struct pipe_box *dbox = &info->dst.box;
204 struct fd_resource *src, *dst;
205 unsigned sshift, dshift;
206
207 if (DEBUG_BLIT_FALLBACK) {
208 fprintf(stderr, "buffer blit: ");
209 util_dump_blit_info(stderr, info);
210 fprintf(stderr, "\ndst resource: ");
211 util_dump_resource(stderr, info->dst.resource);
212 fprintf(stderr, "\nsrc resource: ");
213 util_dump_resource(stderr, info->src.resource);
214 fprintf(stderr, "\n");
215 }
216
217 src = fd_resource(info->src.resource);
218 dst = fd_resource(info->dst.resource);
219
220 debug_assert(src->layout.cpp == 1);
221 debug_assert(dst->layout.cpp == 1);
222 debug_assert(info->src.resource->format == info->dst.resource->format);
223 debug_assert((sbox->y == 0) && (sbox->height == 1));
224 debug_assert((dbox->y == 0) && (dbox->height == 1));
225 debug_assert((sbox->z == 0) && (sbox->depth == 1));
226 debug_assert((dbox->z == 0) && (dbox->depth == 1));
227 debug_assert(sbox->width == dbox->width);
228 debug_assert(info->src.level == 0);
229 debug_assert(info->dst.level == 0);
230
231 /*
232 * Buffers can have dimensions bigger than max width, remap into
233 * multiple 1d blits to fit within max dimension
234 *
235 * Note that blob uses .ARRAY_PITCH=128 for blitting buffers, which
236 * seems to prevent overfetch related faults. Not quite sure what
237 * the deal is there.
238 *
239 * Low 6 bits of SRC/DST addresses need to be zero (ie. address
240 * aligned to 64) so we need to shift src/dst x1/x2 to make up the
241 * difference. On top of already splitting up the blit so width
242 * isn't > 16k.
243 *
244 * We perhaps could do a bit better, if src and dst are aligned but
245 * in the worst case this means we have to split the copy up into
246 * 16k (0x4000) minus 64 (0x40).
247 */
248
249 sshift = sbox->x & 0x3f;
250 dshift = dbox->x & 0x3f;
251
252 OUT_PKT7(ring, CP_SET_MARKER, 1);
253 OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
254
255 uint32_t blit_cntl = blit_control(RB6_R8_UNORM) | 0x20000000;
256 OUT_PKT4(ring, REG_A6XX_RB_2D_BLIT_CNTL, 1);
257 OUT_RING(ring, blit_cntl);
258
259 OUT_PKT4(ring, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
260 OUT_RING(ring, blit_cntl);
261
262 for (unsigned off = 0; off < sbox->width; off += (0x4000 - 0x40)) {
263 unsigned soff, doff, w, p;
264
265 soff = (sbox->x + off) & ~0x3f;
266 doff = (dbox->x + off) & ~0x3f;
267
268 w = MIN2(sbox->width - off, (0x4000 - 0x40));
269 p = align(w, 64);
270
271 debug_assert((soff + w) <= fd_bo_size(src->bo));
272 debug_assert((doff + w) <= fd_bo_size(dst->bo));
273
274 /*
275 * Emit source:
276 */
277 OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_INFO, 10);
278 OUT_RING(ring, A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(RB6_R8_UNORM) |
279 A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(TILE6_LINEAR) |
280 A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(WZYX) |
281 0x500000);
282 OUT_RING(ring, A6XX_SP_PS_2D_SRC_SIZE_WIDTH(sshift + w) |
283 A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(1)); /* SP_PS_2D_SRC_SIZE */
284 OUT_RELOC(ring, src->bo, soff, 0, 0); /* SP_PS_2D_SRC_LO/HI */
285 OUT_RING(ring, A6XX_SP_PS_2D_SRC_PITCH_PITCH(p));
286
287 OUT_RING(ring, 0x00000000);
288 OUT_RING(ring, 0x00000000);
289 OUT_RING(ring, 0x00000000);
290 OUT_RING(ring, 0x00000000);
291 OUT_RING(ring, 0x00000000);
292
293 /*
294 * Emit destination:
295 */
296 OUT_PKT4(ring, REG_A6XX_RB_2D_DST_INFO, 9);
297 OUT_RING(ring, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(RB6_R8_UNORM) |
298 A6XX_RB_2D_DST_INFO_TILE_MODE(TILE6_LINEAR) |
299 A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX));
300 OUT_RELOCW(ring, dst->bo, doff, 0, 0); /* RB_2D_DST_LO/HI */
301 OUT_RING(ring, A6XX_RB_2D_DST_SIZE_PITCH(p));
302 OUT_RING(ring, 0x00000000);
303 OUT_RING(ring, 0x00000000);
304 OUT_RING(ring, 0x00000000);
305 OUT_RING(ring, 0x00000000);
306 OUT_RING(ring, 0x00000000);
307
308 /*
309 * Blit command:
310 */
311 OUT_PKT4(ring, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
312 OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_X_X(sshift));
313 OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_X_X(sshift + w - 1));
314 OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_Y_Y(0));
315 OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_Y_Y(0));
316
317 OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
318 OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(dshift) | A6XX_GRAS_2D_DST_TL_Y(0));
319 OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(dshift + w - 1) | A6XX_GRAS_2D_DST_BR_Y(0));
320
321 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
322 OUT_RING(ring, 0x3f);
323 OUT_WFI5(ring);
324
325 OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8C01, 1);
326 OUT_RING(ring, 0);
327
328 OUT_PKT4(ring, REG_A6XX_SP_2D_SRC_FORMAT, 1);
329 OUT_RING(ring, 0xf180);
330
331 OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
332 OUT_RING(ring, fd6_context(ctx)->magic.RB_UNKNOWN_8E04_blit);
333
334 OUT_PKT7(ring, CP_BLIT, 1);
335 OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
336
337 OUT_WFI5(ring);
338
339 OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
340 OUT_RING(ring, 0); /* RB_UNKNOWN_8E04 */
341 }
342 }
343
344 static void
345 emit_blit_or_clear_texture(struct fd_context *ctx, struct fd_ringbuffer *ring,
346 const struct pipe_blit_info *info, union pipe_color_union *color)
347 {
348 const struct pipe_box *sbox = &info->src.box;
349 const struct pipe_box *dbox = &info->dst.box;
350 struct fd_resource *src, *dst;
351 struct fdl_slice *sslice, *dslice;
352 enum a6xx_color_fmt sfmt, dfmt;
353 enum a6xx_tile_mode stile, dtile;
354 enum a3xx_color_swap sswap, dswap;
355 unsigned spitch, dpitch;
356 int sx1, sy1, sx2, sy2;
357 int dx1, dy1, dx2, dy2;
358
359 if (DEBUG_BLIT_FALLBACK) {
360 fprintf(stderr, "texture blit: ");
361 util_dump_blit_info(stderr, info);
362 fprintf(stderr, "\ndst resource: ");
363 util_dump_resource(stderr, info->dst.resource);
364 fprintf(stderr, "\nsrc resource: ");
365 util_dump_resource(stderr, info->src.resource);
366 fprintf(stderr, "\n");
367 }
368
369 src = fd_resource(info->src.resource);
370 dst = fd_resource(info->dst.resource);
371
372 sslice = fd_resource_slice(src, info->src.level);
373 dslice = fd_resource_slice(dst, info->dst.level);
374
375 sfmt = fd6_pipe2color(info->src.format);
376 dfmt = fd6_pipe2color(info->dst.format);
377
378 int blocksize = util_format_get_blocksize(info->src.format);
379 int blockwidth = util_format_get_blockwidth(info->src.format);
380 int blockheight = util_format_get_blockheight(info->src.format);
381 int nelements;
382
383 stile = fd_resource_tile_mode(info->src.resource, info->src.level);
384 dtile = fd_resource_tile_mode(info->dst.resource, info->dst.level);
385
386 sswap = stile ? WZYX : fd6_pipe2swap(info->src.format);
387 dswap = dtile ? WZYX : fd6_pipe2swap(info->dst.format);
388
389 if (util_format_is_compressed(info->src.format)) {
390 debug_assert(info->src.format == info->dst.format);
391 sfmt = dfmt = RB6_R8_UNORM;
392 nelements = blocksize;
393 } else {
394 debug_assert(!util_format_is_compressed(info->dst.format));
395 nelements = (dst->base.nr_samples ? dst->base.nr_samples : 1);
396 }
397
398 spitch = DIV_ROUND_UP(sslice->pitch, blockwidth) * src->layout.cpp;
399 dpitch = DIV_ROUND_UP(dslice->pitch, blockwidth) * dst->layout.cpp;
400
401 sx1 = sbox->x / blockwidth * nelements;
402 sy1 = sbox->y / blockheight;
403 sx2 = DIV_ROUND_UP(sbox->x + sbox->width, blockwidth) * nelements - 1;
404 sy2 = DIV_ROUND_UP(sbox->y + sbox->height, blockheight) - 1;
405
406 dx1 = dbox->x / blockwidth * nelements;
407 dy1 = dbox->y / blockheight;
408 dx2 = DIV_ROUND_UP(dbox->x + dbox->width, blockwidth) * nelements - 1;
409 dy2 = DIV_ROUND_UP(dbox->y + dbox->height, blockheight) - 1;
410
411 uint32_t width = DIV_ROUND_UP(u_minify(src->base.width0, info->src.level), blockwidth) * nelements;
412 uint32_t height = DIV_ROUND_UP(u_minify(src->base.height0, info->src.level), blockheight);
413
414 OUT_PKT7(ring, CP_SET_MARKER, 1);
415 OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
416
417 uint32_t blit_cntl = blit_control(dfmt);
418
419 if (color) {
420 blit_cntl |= A6XX_RB_2D_BLIT_CNTL_SOLID_COLOR;
421
422 switch (info->dst.format) {
423 case PIPE_FORMAT_Z24X8_UNORM:
424 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
425 case PIPE_FORMAT_X24S8_UINT: {
426 uint32_t depth_unorm24 = color->f[0] * ((1u << 24) - 1);
427 uint8_t stencil = color->ui[1];
428 color->ui[0] = depth_unorm24 & 0xff;
429 color->ui[1] = (depth_unorm24 >> 8) & 0xff;
430 color->ui[2] = (depth_unorm24 >> 16) & 0xff;
431 color->ui[3] = stencil;
432
433 dfmt = RB6_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
434 break;
435 }
436 case PIPE_FORMAT_B5G6R5_UNORM:
437 case PIPE_FORMAT_B5G5R5A1_UNORM:
438 case PIPE_FORMAT_B5G5R5X1_UNORM:
439 case PIPE_FORMAT_B4G4R4A4_UNORM:
440 color->ui[0] = float_to_ubyte(color->f[0]);
441 color->ui[1] = float_to_ubyte(color->f[1]);
442 color->ui[2] = float_to_ubyte(color->f[2]);
443 color->ui[3] = float_to_ubyte(color->f[3]);
444 break;
445 default:
446 break;
447 }
448
449 OUT_PKT4(ring, REG_A6XX_RB_2D_SRC_SOLID_C0, 4);
450
451 switch (fd6_ifmt(dfmt)) {
452 case R2D_UNORM8:
453 case R2D_UNORM8_SRGB:
454 OUT_RING(ring, float_to_ubyte(color->f[0]));
455 OUT_RING(ring, float_to_ubyte(color->f[1]));
456 OUT_RING(ring, float_to_ubyte(color->f[2]));
457 OUT_RING(ring, float_to_ubyte(color->f[3]));
458 break;
459 case R2D_FLOAT16:
460 OUT_RING(ring, _mesa_float_to_half(color->f[0]));
461 OUT_RING(ring, _mesa_float_to_half(color->f[1]));
462 OUT_RING(ring, _mesa_float_to_half(color->f[2]));
463 OUT_RING(ring, _mesa_float_to_half(color->f[3]));
464 sfmt = RB6_R16G16B16A16_FLOAT;
465 break;
466
467 case R2D_FLOAT32:
468 case R2D_INT32:
469 case R2D_INT16:
470 case R2D_INT8:
471 case R2D_RAW:
472 default:
473 OUT_RING(ring, color->ui[0]);
474 OUT_RING(ring, color->ui[1]);
475 OUT_RING(ring, color->ui[2]);
476 OUT_RING(ring, color->ui[3]);
477 break;
478 }
479 }
480
481 if (dtile != stile)
482 blit_cntl |= 0x20000000;
483
484 if (info->scissor_enable) {
485 OUT_PKT4(ring, REG_A6XX_GRAS_RESOLVE_CNTL_1, 2);
486 OUT_RING(ring, A6XX_GRAS_RESOLVE_CNTL_1_X(info->scissor.minx) |
487 A6XX_GRAS_RESOLVE_CNTL_1_Y(info->scissor.miny));
488 OUT_RING(ring, A6XX_GRAS_RESOLVE_CNTL_1_X(info->scissor.maxx - 1) |
489 A6XX_GRAS_RESOLVE_CNTL_1_Y(info->scissor.maxy - 1));
490 blit_cntl |= A6XX_RB_2D_BLIT_CNTL_SCISSOR;
491 }
492
493 OUT_PKT4(ring, REG_A6XX_RB_2D_BLIT_CNTL, 1);
494 OUT_RING(ring, blit_cntl);
495
496 OUT_PKT4(ring, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
497 OUT_RING(ring, blit_cntl);
498
499 for (unsigned i = 0; i < info->dst.box.depth; i++) {
500 unsigned soff = fd_resource_offset(src, info->src.level, sbox->z + i);
501 unsigned doff = fd_resource_offset(dst, info->dst.level, dbox->z + i);
502 unsigned subwcoff = fd_resource_ubwc_offset(src, info->src.level, sbox->z + i);
503 unsigned dubwcoff = fd_resource_ubwc_offset(dst, info->dst.level, dbox->z + i);
504 bool subwc_enabled = fd_resource_ubwc_enabled(src, info->src.level);
505 bool dubwc_enabled = fd_resource_ubwc_enabled(dst, info->dst.level);
506
507 /*
508 * Emit source:
509 */
510 uint32_t filter = 0;
511 if (info->filter == PIPE_TEX_FILTER_LINEAR)
512 filter = A6XX_SP_PS_2D_SRC_INFO_FILTER;
513
514 enum a3xx_msaa_samples samples = fd_msaa_samples(src->base.nr_samples);
515
516 if (sfmt == RB6_R10G10B10A2_UNORM)
517 sfmt = RB6_R10G10B10A2_FLOAT16;
518
519 OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_INFO, 10);
520 OUT_RING(ring, A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(sfmt) |
521 A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(stile) |
522 A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(sswap) |
523 A6XX_SP_PS_2D_SRC_INFO_SAMPLES(samples) |
524 COND(subwc_enabled, A6XX_SP_PS_2D_SRC_INFO_FLAGS) |
525 0x500000 | filter);
526 OUT_RING(ring, A6XX_SP_PS_2D_SRC_SIZE_WIDTH(width) |
527 A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(height)); /* SP_PS_2D_SRC_SIZE */
528 OUT_RELOC(ring, src->bo, soff, 0, 0); /* SP_PS_2D_SRC_LO/HI */
529 OUT_RING(ring, A6XX_SP_PS_2D_SRC_PITCH_PITCH(spitch));
530
531 OUT_RING(ring, 0x00000000);
532 OUT_RING(ring, 0x00000000);
533 OUT_RING(ring, 0x00000000);
534 OUT_RING(ring, 0x00000000);
535 OUT_RING(ring, 0x00000000);
536
537 if (subwc_enabled) {
538 OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_FLAGS_LO, 6);
539 OUT_RELOC(ring, src->bo, subwcoff, 0, 0);
540 OUT_RING(ring, A6XX_SP_PS_2D_SRC_FLAGS_PITCH_PITCH(src->layout.ubwc_pitch) |
541 A6XX_SP_PS_2D_SRC_FLAGS_PITCH_ARRAY_PITCH(src->layout.ubwc_size));
542 OUT_RING(ring, 0x00000000);
543 OUT_RING(ring, 0x00000000);
544 OUT_RING(ring, 0x00000000);
545 }
546
547 /*
548 * Emit destination:
549 */
550 OUT_PKT4(ring, REG_A6XX_RB_2D_DST_INFO, 9);
551 OUT_RING(ring, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(dfmt) |
552 A6XX_RB_2D_DST_INFO_TILE_MODE(dtile) |
553 A6XX_RB_2D_DST_INFO_COLOR_SWAP(dswap) |
554 COND(dubwc_enabled, A6XX_RB_2D_DST_INFO_FLAGS));
555 OUT_RELOCW(ring, dst->bo, doff, 0, 0); /* RB_2D_DST_LO/HI */
556 OUT_RING(ring, A6XX_RB_2D_DST_SIZE_PITCH(dpitch));
557 OUT_RING(ring, 0x00000000);
558 OUT_RING(ring, 0x00000000);
559 OUT_RING(ring, 0x00000000);
560 OUT_RING(ring, 0x00000000);
561 OUT_RING(ring, 0x00000000);
562
563 if (dubwc_enabled) {
564 OUT_PKT4(ring, REG_A6XX_RB_2D_DST_FLAGS_LO, 6);
565 OUT_RELOCW(ring, dst->bo, dubwcoff, 0, 0);
566 OUT_RING(ring, A6XX_RB_2D_DST_FLAGS_PITCH_PITCH(dst->layout.ubwc_pitch) |
567 A6XX_RB_2D_DST_FLAGS_PITCH_ARRAY_PITCH(dst->layout.ubwc_size));
568 OUT_RING(ring, 0x00000000);
569 OUT_RING(ring, 0x00000000);
570 OUT_RING(ring, 0x00000000);
571 }
572
573 /*
574 * Blit command:
575 */
576 OUT_PKT4(ring, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
577 OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_X_X(sx1));
578 OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_X_X(sx2));
579 OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_Y_Y(sy1));
580 OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_Y_Y(sy2));
581
582 OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
583 OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(dx1) | A6XX_GRAS_2D_DST_TL_Y(dy1));
584 OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(dx2) | A6XX_GRAS_2D_DST_BR_Y(dy2));
585
586 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
587 OUT_RING(ring, 0x3f);
588 OUT_WFI5(ring);
589
590 OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8C01, 1);
591 OUT_RING(ring, 0);
592
593 if (dfmt == RB6_R10G10B10A2_UNORM)
594 sfmt = RB6_R16G16B16A16_FLOAT;
595
596 OUT_PKT4(ring, REG_A6XX_SP_2D_SRC_FORMAT, 1);
597 OUT_RING(ring, A6XX_SP_2D_SRC_FORMAT_COLOR_FORMAT(sfmt) |
598 COND(util_format_is_pure_sint(info->src.format),
599 A6XX_SP_2D_SRC_FORMAT_SINT) |
600 COND(util_format_is_pure_uint(info->src.format),
601 A6XX_SP_2D_SRC_FORMAT_UINT) |
602 COND(util_format_is_snorm(info->src.format),
603 A6XX_SP_2D_SRC_FORMAT_SINT |
604 A6XX_SP_2D_SRC_FORMAT_NORM) |
605 COND(util_format_is_unorm(info->src.format),
606 // TODO sometimes blob uses UINT+NORM but dEQP seems unhappy about that
607 // A6XX_SP_2D_SRC_FORMAT_UINT |
608 A6XX_SP_2D_SRC_FORMAT_NORM) |
609 0xf000);
610
611 OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
612 OUT_RING(ring, fd6_context(ctx)->magic.RB_UNKNOWN_8E04_blit);
613
614 OUT_PKT7(ring, CP_BLIT, 1);
615 OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
616
617 OUT_WFI5(ring);
618
619 OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
620 OUT_RING(ring, 0); /* RB_UNKNOWN_8E04 */
621 }
622 }
623
624 void
625 fd6_clear_surface(struct fd_context *ctx,
626 struct fd_ringbuffer *ring, struct pipe_surface *psurf,
627 uint32_t width, uint32_t height, union pipe_color_union *color)
628 {
629 struct pipe_blit_info info = {};
630
631 info.dst.resource = psurf->texture;
632 info.dst.level = psurf->u.tex.level;
633 info.dst.box.x = 0;
634 info.dst.box.y = 0;
635 info.dst.box.z = psurf->u.tex.first_layer;
636 info.dst.box.width = width;
637 info.dst.box.height = height;
638 info.dst.box.depth = psurf->u.tex.last_layer + 1 - psurf->u.tex.first_layer;
639 info.dst.format = psurf->format;
640 info.src = info.dst;
641 info.mask = util_format_get_mask(psurf->format);
642 info.filter = PIPE_TEX_FILTER_NEAREST;
643 info.scissor_enable = 0;
644
645 emit_blit_or_clear_texture(ctx, ring, &info, color);
646 }
647
648 static bool handle_rgba_blit(struct fd_context *ctx, const struct pipe_blit_info *info);
649
650 /**
651 * Re-written z/s blits can still fail for various reasons (for example MSAA).
652 * But we want to do the fallback blit with the re-written pipe_blit_info,
653 * in particular as u_blitter cannot blit stencil. So handle the fallback
654 * ourself and never "fail".
655 */
656 static bool
657 do_rewritten_blit(struct fd_context *ctx, const struct pipe_blit_info *info)
658 {
659 bool success = handle_rgba_blit(ctx, info);
660 if (!success)
661 success = fd_blitter_blit(ctx, info);
662 debug_assert(success); /* fallback should never fail! */
663 return success;
664 }
665
666 /**
667 * Handle depth/stencil blits either via u_blitter and/or re-writing the
668 * blit into an equivilant format that we can handle
669 */
670 static bool
671 handle_zs_blit(struct fd_context *ctx, const struct pipe_blit_info *info)
672 {
673 struct pipe_blit_info blit = *info;
674
675 if (DEBUG_BLIT_FALLBACK) {
676 fprintf(stderr, "---- handle_zs_blit: ");
677 util_dump_blit_info(stderr, info);
678 fprintf(stderr, "\ndst resource: ");
679 util_dump_resource(stderr, info->dst.resource);
680 fprintf(stderr, "\nsrc resource: ");
681 util_dump_resource(stderr, info->src.resource);
682 fprintf(stderr, "\n");
683 }
684
685 switch (info->dst.format) {
686 case PIPE_FORMAT_S8_UINT:
687 debug_assert(info->mask == PIPE_MASK_S);
688 blit.mask = PIPE_MASK_R;
689 blit.src.format = PIPE_FORMAT_R8_UINT;
690 blit.dst.format = PIPE_FORMAT_R8_UINT;
691 return do_rewritten_blit(ctx, &blit);
692
693 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
694 if (info->mask & PIPE_MASK_Z) {
695 blit.mask = PIPE_MASK_R;
696 blit.src.format = PIPE_FORMAT_R32_FLOAT;
697 blit.dst.format = PIPE_FORMAT_R32_FLOAT;
698 do_rewritten_blit(ctx, &blit);
699 }
700
701 if (info->mask & PIPE_MASK_S) {
702 blit.mask = PIPE_MASK_R;
703 blit.src.format = PIPE_FORMAT_R8_UINT;
704 blit.dst.format = PIPE_FORMAT_R8_UINT;
705 blit.src.resource = &fd_resource(info->src.resource)->stencil->base;
706 blit.dst.resource = &fd_resource(info->dst.resource)->stencil->base;
707 do_rewritten_blit(ctx, &blit);
708 }
709
710 return true;
711
712 case PIPE_FORMAT_Z16_UNORM:
713 blit.mask = PIPE_MASK_R;
714 blit.src.format = PIPE_FORMAT_R16_UNORM;
715 blit.dst.format = PIPE_FORMAT_R16_UNORM;
716 return do_rewritten_blit(ctx, &blit);
717
718 case PIPE_FORMAT_Z32_UNORM:
719 case PIPE_FORMAT_Z32_FLOAT:
720 debug_assert(info->mask == PIPE_MASK_Z);
721 blit.mask = PIPE_MASK_R;
722 blit.src.format = PIPE_FORMAT_R32_UINT;
723 blit.dst.format = PIPE_FORMAT_R32_UINT;
724 return do_rewritten_blit(ctx, &blit);
725
726 case PIPE_FORMAT_Z24X8_UNORM:
727 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
728 blit.mask = 0;
729 if (info->mask & PIPE_MASK_Z)
730 blit.mask |= PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_B;
731 if (info->mask & PIPE_MASK_S)
732 blit.mask |= PIPE_MASK_A;
733 blit.src.format = PIPE_FORMAT_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
734 blit.dst.format = PIPE_FORMAT_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
735 return fd_blitter_blit(ctx, &blit);
736
737 default:
738 return false;
739 }
740 }
741
742 static bool
743 handle_rgba_blit(struct fd_context *ctx, const struct pipe_blit_info *info)
744 {
745 struct fd_batch *batch;
746
747 debug_assert(!(info->mask & PIPE_MASK_ZS));
748
749 if (!can_do_blit(info))
750 return false;
751
752 fd_fence_ref(&ctx->last_fence, NULL);
753
754 batch = fd_bc_alloc_batch(&ctx->screen->batch_cache, ctx, true);
755
756 fd6_emit_restore(batch, batch->draw);
757 fd6_emit_lrz_flush(batch->draw);
758
759 mtx_lock(&ctx->screen->lock);
760
761 fd_batch_resource_used(batch, fd_resource(info->src.resource), false);
762 fd_batch_resource_used(batch, fd_resource(info->dst.resource), true);
763
764 mtx_unlock(&ctx->screen->lock);
765
766 emit_setup(batch);
767
768 if ((info->src.resource->target == PIPE_BUFFER) &&
769 (info->dst.resource->target == PIPE_BUFFER)) {
770 assert(fd_resource(info->src.resource)->layout.tile_mode == TILE6_LINEAR);
771 assert(fd_resource(info->dst.resource)->layout.tile_mode == TILE6_LINEAR);
772 emit_blit_buffer(ctx, batch->draw, info);
773 } else {
774 /* I don't *think* we need to handle blits between buffer <-> !buffer */
775 debug_assert(info->src.resource->target != PIPE_BUFFER);
776 debug_assert(info->dst.resource->target != PIPE_BUFFER);
777 emit_blit_or_clear_texture(ctx, batch->draw, info, NULL);
778 }
779
780 fd6_event_write(batch, batch->draw, 0x1d, true);
781 fd6_event_write(batch, batch->draw, FACENESS_FLUSH, true);
782 fd6_event_write(batch, batch->draw, CACHE_FLUSH_TS, true);
783 fd6_cache_inv(batch, batch->draw);
784
785 fd_resource(info->dst.resource)->valid = true;
786 batch->needs_flush = true;
787
788 fd_batch_flush(batch, false);
789 fd_batch_reference(&batch, NULL);
790
791 return true;
792 }
793
794 static bool
795 fd6_blit(struct fd_context *ctx, const struct pipe_blit_info *info)
796 {
797 if (info->mask & PIPE_MASK_ZS)
798 return handle_zs_blit(ctx, info);
799 return handle_rgba_blit(ctx, info);
800 }
801
802 void
803 fd6_blitter_init(struct pipe_context *pctx)
804 {
805 if (fd_mesa_debug & FD_DBG_NOBLIT)
806 return;
807
808 fd_context(pctx)->blit = fd6_blit;
809 }
810
811 unsigned
812 fd6_tile_mode(const struct pipe_resource *tmpl)
813 {
814 /* if the mipmap level 0 is still too small to be tiled, then don't
815 * bother pretending:
816 */
817 if (fd_resource_level_linear(tmpl, 0))
818 return TILE6_LINEAR;
819
820 /* basically just has to be a format we can blit, so uploads/downloads
821 * via linear staging buffer works:
822 */
823 if (ok_format(tmpl->format))
824 return TILE6_3;
825
826 return TILE6_LINEAR;
827 }