freedreno/a5xx: LRZ support
[mesa.git] / src / gallium / drivers / freedreno / freedreno_resource.c
1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
3 /*
4 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 * Rob Clark <robclark@freedesktop.org>
27 */
28
29 #include "util/u_format.h"
30 #include "util/u_format_rgtc.h"
31 #include "util/u_format_zs.h"
32 #include "util/u_inlines.h"
33 #include "util/u_transfer.h"
34 #include "util/u_string.h"
35 #include "util/u_surface.h"
36 #include "util/set.h"
37
38 #include "freedreno_resource.h"
39 #include "freedreno_batch_cache.h"
40 #include "freedreno_screen.h"
41 #include "freedreno_surface.h"
42 #include "freedreno_context.h"
43 #include "freedreno_query_hw.h"
44 #include "freedreno_util.h"
45
46 #include <errno.h>
47
48 /* XXX this should go away, needed for 'struct winsys_handle' */
49 #include "state_tracker/drm_driver.h"
50
51 static void
52 fd_invalidate_resource(struct fd_context *ctx, struct pipe_resource *prsc)
53 {
54 /* Go through the entire state and see if the resource is bound
55 * anywhere. If it is, mark the relevant state as dirty. This is called on
56 * realloc_bo.
57 */
58
59 /* VBOs */
60 for (unsigned i = 0; i < ctx->vtx.vertexbuf.count && !(ctx->dirty & FD_DIRTY_VTXBUF); i++) {
61 if (ctx->vtx.vertexbuf.vb[i].buffer.resource == prsc)
62 ctx->dirty |= FD_DIRTY_VTXBUF;
63 }
64
65 /* per-shader-stage resources: */
66 for (unsigned stage = 0; stage < PIPE_SHADER_TYPES; stage++) {
67 /* Constbufs.. note that constbuf[0] is normal uniforms emitted in
68 * cmdstream rather than by pointer..
69 */
70 const unsigned num_ubos = util_last_bit(ctx->constbuf[stage].enabled_mask);
71 for (unsigned i = 1; i < num_ubos; i++) {
72 if (ctx->dirty_shader[stage] & FD_DIRTY_SHADER_CONST)
73 break;
74 if (ctx->constbuf[stage].cb[i].buffer == prsc)
75 ctx->dirty_shader[stage] |= FD_DIRTY_SHADER_CONST;
76 }
77
78 /* Textures */
79 for (unsigned i = 0; i < ctx->tex[stage].num_textures; i++) {
80 if (ctx->dirty_shader[stage] & FD_DIRTY_SHADER_TEX)
81 break;
82 if (ctx->tex[stage].textures[i] && (ctx->tex[stage].textures[i]->texture == prsc))
83 ctx->dirty_shader[stage] |= FD_DIRTY_SHADER_TEX;
84 }
85
86 /* SSBOs */
87 const unsigned num_ssbos = util_last_bit(ctx->shaderbuf[stage].enabled_mask);
88 for (unsigned i = 0; i < num_ssbos; i++) {
89 if (ctx->dirty_shader[stage] & FD_DIRTY_SHADER_SSBO)
90 break;
91 if (ctx->shaderbuf[stage].sb[i].buffer == prsc)
92 ctx->dirty_shader[stage] |= FD_DIRTY_SHADER_SSBO;
93 }
94 }
95 }
96
97 static void
98 realloc_bo(struct fd_resource *rsc, uint32_t size)
99 {
100 struct fd_screen *screen = fd_screen(rsc->base.b.screen);
101 uint32_t flags = DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
102 DRM_FREEDRENO_GEM_TYPE_KMEM; /* TODO */
103
104 /* if we start using things other than write-combine,
105 * be sure to check for PIPE_RESOURCE_FLAG_MAP_COHERENT
106 */
107
108 if (rsc->bo)
109 fd_bo_del(rsc->bo);
110
111 rsc->bo = fd_bo_new(screen->dev, size, flags);
112 util_range_set_empty(&rsc->valid_buffer_range);
113 fd_bc_invalidate_resource(rsc, true);
114 }
115
116 static void
117 do_blit(struct fd_context *ctx, const struct pipe_blit_info *blit, bool fallback)
118 {
119 /* TODO size threshold too?? */
120 if ((blit->src.resource->target != PIPE_BUFFER) && !fallback) {
121 /* do blit on gpu: */
122 fd_blitter_pipe_begin(ctx, false, true, FD_STAGE_BLIT);
123 util_blitter_blit(ctx->blitter, blit);
124 fd_blitter_pipe_end(ctx);
125 } else {
126 /* do blit on cpu: */
127 util_resource_copy_region(&ctx->base,
128 blit->dst.resource, blit->dst.level, blit->dst.box.x,
129 blit->dst.box.y, blit->dst.box.z,
130 blit->src.resource, blit->src.level, &blit->src.box);
131 }
132 }
133
134 static bool
135 fd_try_shadow_resource(struct fd_context *ctx, struct fd_resource *rsc,
136 unsigned level, unsigned usage, const struct pipe_box *box)
137 {
138 struct pipe_context *pctx = &ctx->base;
139 struct pipe_resource *prsc = &rsc->base.b;
140 bool fallback = false;
141
142 if (prsc->next)
143 return false;
144
145 /* TODO: somehow munge dimensions and format to copy unsupported
146 * render target format to something that is supported?
147 */
148 if (!pctx->screen->is_format_supported(pctx->screen,
149 prsc->format, prsc->target, prsc->nr_samples,
150 PIPE_BIND_RENDER_TARGET))
151 fallback = true;
152
153 /* these cases should be handled elsewhere.. just for future
154 * reference in case this gets split into a more generic(ish)
155 * helper.
156 */
157 debug_assert(!(usage & PIPE_TRANSFER_READ));
158 debug_assert(!(usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE));
159
160 /* if we do a gpu blit to clone the whole resource, we'll just
161 * end up stalling on that.. so only allow if we can discard
162 * current range (and blit, possibly cpu or gpu, the rest)
163 */
164 if (!(usage & PIPE_TRANSFER_DISCARD_RANGE))
165 return false;
166
167 bool whole_level = util_texrange_covers_whole_level(prsc, level,
168 box->x, box->y, box->z, box->width, box->height, box->depth);
169
170 /* TODO need to be more clever about current level */
171 if ((prsc->target >= PIPE_TEXTURE_2D) && !whole_level)
172 return false;
173
174 struct pipe_resource *pshadow =
175 pctx->screen->resource_create(pctx->screen, prsc);
176
177 if (!pshadow)
178 return false;
179
180 assert(!ctx->in_shadow);
181 ctx->in_shadow = true;
182
183 /* get rid of any references that batch-cache might have to us (which
184 * should empty/destroy rsc->batches hashset)
185 */
186 fd_bc_invalidate_resource(rsc, false);
187
188 mtx_lock(&ctx->screen->lock);
189
190 /* Swap the backing bo's, so shadow becomes the old buffer,
191 * blit from shadow to new buffer. From here on out, we
192 * cannot fail.
193 *
194 * Note that we need to do it in this order, otherwise if
195 * we go down cpu blit path, the recursive transfer_map()
196 * sees the wrong status..
197 */
198 struct fd_resource *shadow = fd_resource(pshadow);
199
200 DBG("shadow: %p (%d) -> %p (%d)\n", rsc, rsc->base.b.reference.count,
201 shadow, shadow->base.b.reference.count);
202
203 /* TODO valid_buffer_range?? */
204 swap(rsc->bo, shadow->bo);
205 swap(rsc->write_batch, shadow->write_batch);
206
207 /* at this point, the newly created shadow buffer is not referenced
208 * by any batches, but the existing rsc (probably) is. We need to
209 * transfer those references over:
210 */
211 debug_assert(shadow->batch_mask == 0);
212 struct fd_batch *batch;
213 foreach_batch(batch, &ctx->screen->batch_cache, rsc->batch_mask) {
214 struct set_entry *entry = _mesa_set_search(batch->resources, rsc);
215 _mesa_set_remove(batch->resources, entry);
216 _mesa_set_add(batch->resources, shadow);
217 }
218 swap(rsc->batch_mask, shadow->batch_mask);
219
220 mtx_unlock(&ctx->screen->lock);
221
222 struct pipe_blit_info blit = {0};
223 blit.dst.resource = prsc;
224 blit.dst.format = prsc->format;
225 blit.src.resource = pshadow;
226 blit.src.format = pshadow->format;
227 blit.mask = util_format_get_mask(prsc->format);
228 blit.filter = PIPE_TEX_FILTER_NEAREST;
229
230 #define set_box(field, val) do { \
231 blit.dst.field = (val); \
232 blit.src.field = (val); \
233 } while (0)
234
235 /* blit the other levels in their entirety: */
236 for (unsigned l = 0; l <= prsc->last_level; l++) {
237 if (l == level)
238 continue;
239
240 /* just blit whole level: */
241 set_box(level, l);
242 set_box(box.width, u_minify(prsc->width0, l));
243 set_box(box.height, u_minify(prsc->height0, l));
244 set_box(box.depth, u_minify(prsc->depth0, l));
245
246 do_blit(ctx, &blit, fallback);
247 }
248
249 /* deal w/ current level specially, since we might need to split
250 * it up into a couple blits:
251 */
252 if (!whole_level) {
253 set_box(level, level);
254
255 switch (prsc->target) {
256 case PIPE_BUFFER:
257 case PIPE_TEXTURE_1D:
258 set_box(box.y, 0);
259 set_box(box.z, 0);
260 set_box(box.height, 1);
261 set_box(box.depth, 1);
262
263 if (box->x > 0) {
264 set_box(box.x, 0);
265 set_box(box.width, box->x);
266
267 do_blit(ctx, &blit, fallback);
268 }
269 if ((box->x + box->width) < u_minify(prsc->width0, level)) {
270 set_box(box.x, box->x + box->width);
271 set_box(box.width, u_minify(prsc->width0, level) - (box->x + box->width));
272
273 do_blit(ctx, &blit, fallback);
274 }
275 break;
276 case PIPE_TEXTURE_2D:
277 /* TODO */
278 default:
279 unreachable("TODO");
280 }
281 }
282
283 ctx->in_shadow = false;
284
285 pipe_resource_reference(&pshadow, NULL);
286
287 return true;
288 }
289
290 static unsigned
291 fd_resource_layer_offset(struct fd_resource *rsc,
292 struct fd_resource_slice *slice,
293 unsigned layer)
294 {
295 if (rsc->layer_first)
296 return layer * rsc->layer_size;
297 else
298 return layer * slice->size0;
299 }
300
301 static void
302 fd_resource_flush_z32s8(struct fd_transfer *trans, const struct pipe_box *box)
303 {
304 struct fd_resource *rsc = fd_resource(trans->base.resource);
305 struct fd_resource_slice *slice = fd_resource_slice(rsc, trans->base.level);
306 struct fd_resource_slice *sslice = fd_resource_slice(rsc->stencil, trans->base.level);
307 enum pipe_format format = trans->base.resource->format;
308
309 float *depth = fd_bo_map(rsc->bo) + slice->offset +
310 fd_resource_layer_offset(rsc, slice, trans->base.box.z) +
311 (trans->base.box.y + box->y) * slice->pitch * 4 + (trans->base.box.x + box->x) * 4;
312 uint8_t *stencil = fd_bo_map(rsc->stencil->bo) + sslice->offset +
313 fd_resource_layer_offset(rsc->stencil, sslice, trans->base.box.z) +
314 (trans->base.box.y + box->y) * sslice->pitch + trans->base.box.x + box->x;
315
316 if (format != PIPE_FORMAT_X32_S8X24_UINT)
317 util_format_z32_float_s8x24_uint_unpack_z_float(
318 depth, slice->pitch * 4,
319 trans->staging, trans->base.stride,
320 box->width, box->height);
321
322 util_format_z32_float_s8x24_uint_unpack_s_8uint(
323 stencil, sslice->pitch,
324 trans->staging, trans->base.stride,
325 box->width, box->height);
326 }
327
328 static void
329 fd_resource_flush_rgtc(struct fd_transfer *trans, const struct pipe_box *box)
330 {
331 struct fd_resource *rsc = fd_resource(trans->base.resource);
332 struct fd_resource_slice *slice = fd_resource_slice(rsc, trans->base.level);
333 enum pipe_format format = trans->base.resource->format;
334
335 uint8_t *data = fd_bo_map(rsc->bo) + slice->offset +
336 fd_resource_layer_offset(rsc, slice, trans->base.box.z) +
337 ((trans->base.box.y + box->y) * slice->pitch +
338 trans->base.box.x + box->x) * rsc->cpp;
339
340 uint8_t *source = trans->staging +
341 util_format_get_nblocksy(format, box->y) * trans->base.stride +
342 util_format_get_stride(format, box->x);
343
344 switch (format) {
345 case PIPE_FORMAT_RGTC1_UNORM:
346 case PIPE_FORMAT_RGTC1_SNORM:
347 case PIPE_FORMAT_LATC1_UNORM:
348 case PIPE_FORMAT_LATC1_SNORM:
349 util_format_rgtc1_unorm_unpack_rgba_8unorm(
350 data, slice->pitch * rsc->cpp,
351 source, trans->base.stride,
352 box->width, box->height);
353 break;
354 case PIPE_FORMAT_RGTC2_UNORM:
355 case PIPE_FORMAT_RGTC2_SNORM:
356 case PIPE_FORMAT_LATC2_UNORM:
357 case PIPE_FORMAT_LATC2_SNORM:
358 util_format_rgtc2_unorm_unpack_rgba_8unorm(
359 data, slice->pitch * rsc->cpp,
360 source, trans->base.stride,
361 box->width, box->height);
362 break;
363 default:
364 assert(!"Unexpected format\n");
365 break;
366 }
367 }
368
369 static void
370 fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box)
371 {
372 enum pipe_format format = trans->base.resource->format;
373
374 switch (format) {
375 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
376 case PIPE_FORMAT_X32_S8X24_UINT:
377 fd_resource_flush_z32s8(trans, box);
378 break;
379 case PIPE_FORMAT_RGTC1_UNORM:
380 case PIPE_FORMAT_RGTC1_SNORM:
381 case PIPE_FORMAT_RGTC2_UNORM:
382 case PIPE_FORMAT_RGTC2_SNORM:
383 case PIPE_FORMAT_LATC1_UNORM:
384 case PIPE_FORMAT_LATC1_SNORM:
385 case PIPE_FORMAT_LATC2_UNORM:
386 case PIPE_FORMAT_LATC2_SNORM:
387 fd_resource_flush_rgtc(trans, box);
388 break;
389 default:
390 assert(!"Unexpected staging transfer type");
391 break;
392 }
393 }
394
395 static void fd_resource_transfer_flush_region(struct pipe_context *pctx,
396 struct pipe_transfer *ptrans,
397 const struct pipe_box *box)
398 {
399 struct fd_resource *rsc = fd_resource(ptrans->resource);
400 struct fd_transfer *trans = fd_transfer(ptrans);
401
402 if (ptrans->resource->target == PIPE_BUFFER)
403 util_range_add(&rsc->valid_buffer_range,
404 ptrans->box.x + box->x,
405 ptrans->box.x + box->x + box->width);
406
407 if (trans->staging)
408 fd_resource_flush(trans, box);
409 }
410
411 static void
412 fd_resource_transfer_unmap(struct pipe_context *pctx,
413 struct pipe_transfer *ptrans)
414 {
415 struct fd_context *ctx = fd_context(pctx);
416 struct fd_resource *rsc = fd_resource(ptrans->resource);
417 struct fd_transfer *trans = fd_transfer(ptrans);
418
419 if (trans->staging && !(ptrans->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) {
420 struct pipe_box box;
421 u_box_2d(0, 0, ptrans->box.width, ptrans->box.height, &box);
422 fd_resource_flush(trans, &box);
423 }
424
425 if (!(ptrans->usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
426 fd_bo_cpu_fini(rsc->bo);
427 if (rsc->stencil)
428 fd_bo_cpu_fini(rsc->stencil->bo);
429 }
430
431 util_range_add(&rsc->valid_buffer_range,
432 ptrans->box.x,
433 ptrans->box.x + ptrans->box.width);
434
435 pipe_resource_reference(&ptrans->resource, NULL);
436 slab_free(&ctx->transfer_pool, ptrans);
437
438 free(trans->staging);
439 }
440
441 static void *
442 fd_resource_transfer_map(struct pipe_context *pctx,
443 struct pipe_resource *prsc,
444 unsigned level, unsigned usage,
445 const struct pipe_box *box,
446 struct pipe_transfer **pptrans)
447 {
448 struct fd_context *ctx = fd_context(pctx);
449 struct fd_resource *rsc = fd_resource(prsc);
450 struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
451 struct fd_transfer *trans;
452 struct pipe_transfer *ptrans;
453 enum pipe_format format = prsc->format;
454 uint32_t op = 0;
455 uint32_t offset;
456 char *buf;
457 int ret = 0;
458
459 DBG("prsc=%p, level=%u, usage=%x, box=%dx%d+%d,%d", prsc, level, usage,
460 box->width, box->height, box->x, box->y);
461
462 ptrans = slab_alloc(&ctx->transfer_pool);
463 if (!ptrans)
464 return NULL;
465
466 /* slab_alloc_st() doesn't zero: */
467 trans = fd_transfer(ptrans);
468 memset(trans, 0, sizeof(*trans));
469
470 pipe_resource_reference(&ptrans->resource, prsc);
471 ptrans->level = level;
472 ptrans->usage = usage;
473 ptrans->box = *box;
474 ptrans->stride = util_format_get_nblocksx(format, slice->pitch) * rsc->cpp;
475 ptrans->layer_stride = rsc->layer_first ? rsc->layer_size : slice->size0;
476
477 if (ctx->in_shadow && !(usage & PIPE_TRANSFER_READ))
478 usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
479
480 if (usage & PIPE_TRANSFER_READ)
481 op |= DRM_FREEDRENO_PREP_READ;
482
483 if (usage & PIPE_TRANSFER_WRITE)
484 op |= DRM_FREEDRENO_PREP_WRITE;
485
486 if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
487 realloc_bo(rsc, fd_bo_size(rsc->bo));
488 if (rsc->stencil)
489 realloc_bo(rsc->stencil, fd_bo_size(rsc->stencil->bo));
490 fd_invalidate_resource(ctx, prsc);
491 } else if ((usage & PIPE_TRANSFER_WRITE) &&
492 prsc->target == PIPE_BUFFER &&
493 !util_ranges_intersect(&rsc->valid_buffer_range,
494 box->x, box->x + box->width)) {
495 /* We are trying to write to a previously uninitialized range. No need
496 * to wait.
497 */
498 } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
499 struct fd_batch *write_batch = NULL;
500
501 /* hold a reference, so it doesn't disappear under us: */
502 fd_batch_reference(&write_batch, rsc->write_batch);
503
504 if ((usage & PIPE_TRANSFER_WRITE) && write_batch &&
505 write_batch->back_blit) {
506 /* if only thing pending is a back-blit, we can discard it: */
507 fd_batch_reset(write_batch);
508 }
509
510 /* If the GPU is writing to the resource, or if it is reading from the
511 * resource and we're trying to write to it, flush the renders.
512 */
513 bool needs_flush = pending(rsc, !!(usage & PIPE_TRANSFER_WRITE));
514 bool busy = needs_flush || (0 != fd_bo_cpu_prep(rsc->bo,
515 ctx->screen->pipe, op | DRM_FREEDRENO_PREP_NOSYNC));
516
517 /* if we need to flush/stall, see if we can make a shadow buffer
518 * to avoid this:
519 *
520 * TODO we could go down this path !reorder && !busy_for_read
521 * ie. we only *don't* want to go down this path if the blit
522 * will trigger a flush!
523 */
524 if (ctx->screen->reorder && busy && !(usage & PIPE_TRANSFER_READ)) {
525 if (fd_try_shadow_resource(ctx, rsc, level, usage, box)) {
526 needs_flush = busy = false;
527 fd_invalidate_resource(ctx, prsc);
528 }
529 }
530
531 if (needs_flush) {
532 if (usage & PIPE_TRANSFER_WRITE) {
533 struct fd_batch *batch, *last_batch = NULL;
534 foreach_batch(batch, &ctx->screen->batch_cache, rsc->batch_mask) {
535 fd_batch_reference(&last_batch, batch);
536 fd_batch_flush(batch, false);
537 }
538 if (last_batch) {
539 fd_batch_sync(last_batch);
540 fd_batch_reference(&last_batch, NULL);
541 }
542 assert(rsc->batch_mask == 0);
543 } else {
544 fd_batch_flush(write_batch, true);
545 }
546 assert(!rsc->write_batch);
547 }
548
549 fd_batch_reference(&write_batch, NULL);
550
551 /* The GPU keeps track of how the various bo's are being used, and
552 * will wait if necessary for the proper operation to have
553 * completed.
554 */
555 if (busy) {
556 ret = fd_bo_cpu_prep(rsc->bo, ctx->screen->pipe, op);
557 if (ret)
558 goto fail;
559 }
560 }
561
562 buf = fd_bo_map(rsc->bo);
563 if (!buf)
564 goto fail;
565
566 offset = slice->offset +
567 box->y / util_format_get_blockheight(format) * ptrans->stride +
568 box->x / util_format_get_blockwidth(format) * rsc->cpp +
569 fd_resource_layer_offset(rsc, slice, box->z);
570
571 if (prsc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ||
572 prsc->format == PIPE_FORMAT_X32_S8X24_UINT) {
573 assert(trans->base.box.depth == 1);
574
575 trans->base.stride = trans->base.box.width * rsc->cpp * 2;
576 trans->staging = malloc(trans->base.stride * trans->base.box.height);
577 if (!trans->staging)
578 goto fail;
579
580 /* if we're not discarding the whole range (or resource), we must copy
581 * the real data in.
582 */
583 if (!(usage & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
584 PIPE_TRANSFER_DISCARD_RANGE))) {
585 struct fd_resource_slice *sslice =
586 fd_resource_slice(rsc->stencil, level);
587 void *sbuf = fd_bo_map(rsc->stencil->bo);
588 if (!sbuf)
589 goto fail;
590
591 float *depth = (float *)(buf + slice->offset +
592 fd_resource_layer_offset(rsc, slice, box->z) +
593 box->y * slice->pitch * 4 + box->x * 4);
594 uint8_t *stencil = sbuf + sslice->offset +
595 fd_resource_layer_offset(rsc->stencil, sslice, box->z) +
596 box->y * sslice->pitch + box->x;
597
598 if (format != PIPE_FORMAT_X32_S8X24_UINT)
599 util_format_z32_float_s8x24_uint_pack_z_float(
600 trans->staging, trans->base.stride,
601 depth, slice->pitch * 4,
602 box->width, box->height);
603
604 util_format_z32_float_s8x24_uint_pack_s_8uint(
605 trans->staging, trans->base.stride,
606 stencil, sslice->pitch,
607 box->width, box->height);
608 }
609
610 buf = trans->staging;
611 offset = 0;
612 } else if (rsc->internal_format != format &&
613 util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC) {
614 assert(trans->base.box.depth == 1);
615
616 trans->base.stride = util_format_get_stride(
617 format, trans->base.box.width);
618 trans->staging = malloc(
619 util_format_get_2d_size(format, trans->base.stride,
620 trans->base.box.height));
621 if (!trans->staging)
622 goto fail;
623
624 /* if we're not discarding the whole range (or resource), we must copy
625 * the real data in.
626 */
627 if (!(usage & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
628 PIPE_TRANSFER_DISCARD_RANGE))) {
629 uint8_t *rgba8 = (uint8_t *)buf + slice->offset +
630 fd_resource_layer_offset(rsc, slice, box->z) +
631 box->y * slice->pitch * rsc->cpp + box->x * rsc->cpp;
632
633 switch (format) {
634 case PIPE_FORMAT_RGTC1_UNORM:
635 case PIPE_FORMAT_RGTC1_SNORM:
636 case PIPE_FORMAT_LATC1_UNORM:
637 case PIPE_FORMAT_LATC1_SNORM:
638 util_format_rgtc1_unorm_pack_rgba_8unorm(
639 trans->staging, trans->base.stride,
640 rgba8, slice->pitch * rsc->cpp,
641 box->width, box->height);
642 break;
643 case PIPE_FORMAT_RGTC2_UNORM:
644 case PIPE_FORMAT_RGTC2_SNORM:
645 case PIPE_FORMAT_LATC2_UNORM:
646 case PIPE_FORMAT_LATC2_SNORM:
647 util_format_rgtc2_unorm_pack_rgba_8unorm(
648 trans->staging, trans->base.stride,
649 rgba8, slice->pitch * rsc->cpp,
650 box->width, box->height);
651 break;
652 default:
653 assert(!"Unexpected format");
654 break;
655 }
656 }
657
658 buf = trans->staging;
659 offset = 0;
660 }
661
662 *pptrans = ptrans;
663
664 return buf + offset;
665
666 fail:
667 fd_resource_transfer_unmap(pctx, ptrans);
668 return NULL;
669 }
670
671 static void
672 fd_resource_destroy(struct pipe_screen *pscreen,
673 struct pipe_resource *prsc)
674 {
675 struct fd_resource *rsc = fd_resource(prsc);
676 fd_bc_invalidate_resource(rsc, true);
677 if (rsc->bo)
678 fd_bo_del(rsc->bo);
679 util_range_destroy(&rsc->valid_buffer_range);
680 FREE(rsc);
681 }
682
683 static boolean
684 fd_resource_get_handle(struct pipe_screen *pscreen,
685 struct pipe_resource *prsc,
686 struct winsys_handle *handle)
687 {
688 struct fd_resource *rsc = fd_resource(prsc);
689
690 return fd_screen_bo_get_handle(pscreen, rsc->bo,
691 rsc->slices[0].pitch * rsc->cpp, handle);
692 }
693
694
695 static const struct u_resource_vtbl fd_resource_vtbl = {
696 .resource_get_handle = fd_resource_get_handle,
697 .resource_destroy = fd_resource_destroy,
698 .transfer_map = fd_resource_transfer_map,
699 .transfer_flush_region = fd_resource_transfer_flush_region,
700 .transfer_unmap = fd_resource_transfer_unmap,
701 };
702
703 static uint32_t
704 setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format format)
705 {
706 struct pipe_resource *prsc = &rsc->base.b;
707 enum util_format_layout layout = util_format_description(format)->layout;
708 uint32_t pitchalign = fd_screen(prsc->screen)->gmem_alignw;
709 uint32_t level, size = 0;
710 uint32_t width = prsc->width0;
711 uint32_t height = prsc->height0;
712 uint32_t depth = prsc->depth0;
713 /* in layer_first layout, the level (slice) contains just one
714 * layer (since in fact the layer contains the slices)
715 */
716 uint32_t layers_in_level = rsc->layer_first ? 1 : prsc->array_size;
717
718 for (level = 0; level <= prsc->last_level; level++) {
719 struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
720 uint32_t blocks;
721
722 if (layout == UTIL_FORMAT_LAYOUT_ASTC)
723 slice->pitch = width =
724 util_align_npot(width, pitchalign * util_format_get_blockwidth(format));
725 else
726 slice->pitch = width = align(width, pitchalign);
727 slice->offset = size;
728 blocks = util_format_get_nblocks(format, width, height);
729 /* 1d array and 2d array textures must all have the same layer size
730 * for each miplevel on a3xx. 3d textures can have different layer
731 * sizes for high levels, but the hw auto-sizer is buggy (or at least
732 * different than what this code does), so as soon as the layer size
733 * range gets into range, we stop reducing it.
734 */
735 if (prsc->target == PIPE_TEXTURE_3D && (
736 level == 1 ||
737 (level > 1 && rsc->slices[level - 1].size0 > 0xf000)))
738 slice->size0 = align(blocks * rsc->cpp, alignment);
739 else if (level == 0 || rsc->layer_first || alignment == 1)
740 slice->size0 = align(blocks * rsc->cpp, alignment);
741 else
742 slice->size0 = rsc->slices[level - 1].size0;
743
744 size += slice->size0 * depth * layers_in_level;
745
746 width = u_minify(width, 1);
747 height = u_minify(height, 1);
748 depth = u_minify(depth, 1);
749 }
750
751 return size;
752 }
753
754 static uint32_t
755 slice_alignment(struct pipe_screen *pscreen, const struct pipe_resource *tmpl)
756 {
757 /* on a3xx, 2d array and 3d textures seem to want their
758 * layers aligned to page boundaries:
759 */
760 switch (tmpl->target) {
761 case PIPE_TEXTURE_3D:
762 case PIPE_TEXTURE_1D_ARRAY:
763 case PIPE_TEXTURE_2D_ARRAY:
764 return 4096;
765 default:
766 return 1;
767 }
768 }
769
770 /* special case to resize query buf after allocated.. */
771 void
772 fd_resource_resize(struct pipe_resource *prsc, uint32_t sz)
773 {
774 struct fd_resource *rsc = fd_resource(prsc);
775
776 debug_assert(prsc->width0 == 0);
777 debug_assert(prsc->target == PIPE_BUFFER);
778 debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER);
779
780 prsc->width0 = sz;
781 realloc_bo(rsc, setup_slices(rsc, 1, prsc->format));
782 }
783
784 // TODO common helper?
785 static bool
786 has_depth(enum pipe_format format)
787 {
788 switch (format) {
789 case PIPE_FORMAT_Z16_UNORM:
790 case PIPE_FORMAT_Z32_UNORM:
791 case PIPE_FORMAT_Z32_FLOAT:
792 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
793 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
794 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
795 case PIPE_FORMAT_Z24X8_UNORM:
796 case PIPE_FORMAT_X8Z24_UNORM:
797 return true;
798 default:
799 return false;
800 }
801 }
802
803 /**
804 * Create a new texture object, using the given template info.
805 */
806 static struct pipe_resource *
807 fd_resource_create(struct pipe_screen *pscreen,
808 const struct pipe_resource *tmpl)
809 {
810 struct fd_screen *screen = fd_screen(pscreen);
811 struct fd_resource *rsc = CALLOC_STRUCT(fd_resource);
812 struct pipe_resource *prsc = &rsc->base.b;
813 enum pipe_format format = tmpl->format;
814 uint32_t size, alignment;
815
816 DBG("%p: target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, "
817 "nr_samples=%u, usage=%u, bind=%x, flags=%x", prsc,
818 tmpl->target, util_format_name(format),
819 tmpl->width0, tmpl->height0, tmpl->depth0,
820 tmpl->array_size, tmpl->last_level, tmpl->nr_samples,
821 tmpl->usage, tmpl->bind, tmpl->flags);
822
823 if (!rsc)
824 return NULL;
825
826 *prsc = *tmpl;
827
828 pipe_reference_init(&prsc->reference, 1);
829
830 prsc->screen = pscreen;
831
832 util_range_init(&rsc->valid_buffer_range);
833
834 rsc->base.vtbl = &fd_resource_vtbl;
835
836 if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
837 format = PIPE_FORMAT_Z32_FLOAT;
838 else if (screen->gpu_id < 400 &&
839 util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC)
840 format = PIPE_FORMAT_R8G8B8A8_UNORM;
841 rsc->internal_format = format;
842 rsc->cpp = util_format_get_blocksize(format);
843
844 assert(rsc->cpp);
845
846 // XXX probably need some extra work if we hit rsc shadowing path w/ lrz..
847 if (is_a5xx(screen) && (fd_mesa_debug & FD_DBG_LRZ) && has_depth(format)) {
848 const uint32_t flags = DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
849 DRM_FREEDRENO_GEM_TYPE_KMEM; /* TODO */
850 unsigned lrz_pitch = align(DIV_ROUND_UP(tmpl->width0, 8), 32);
851 unsigned lrz_height = DIV_ROUND_UP(tmpl->height0, 8);
852 unsigned size = lrz_pitch * lrz_height * 2;
853
854 size += 0x1000; /* for GRAS_LRZ_FAST_CLEAR_BUFFER */
855
856 rsc->lrz_height = lrz_height;
857 rsc->lrz_width = lrz_pitch;
858 rsc->lrz_pitch = lrz_pitch;
859 rsc->lrz = fd_bo_new(screen->dev, size, flags);
860 }
861
862 alignment = slice_alignment(pscreen, tmpl);
863 if (is_a4xx(screen) || is_a5xx(screen)) {
864 switch (tmpl->target) {
865 case PIPE_TEXTURE_3D:
866 rsc->layer_first = false;
867 break;
868 default:
869 rsc->layer_first = true;
870 alignment = 1;
871 break;
872 }
873 }
874
875 size = setup_slices(rsc, alignment, format);
876
877 /* special case for hw-query buffer, which we need to allocate before we
878 * know the size:
879 */
880 if (size == 0) {
881 /* note, semi-intention == instead of & */
882 debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER);
883 return prsc;
884 }
885
886 if (rsc->layer_first) {
887 rsc->layer_size = align(size, 4096);
888 size = rsc->layer_size * prsc->array_size;
889 }
890
891 realloc_bo(rsc, size);
892 if (!rsc->bo)
893 goto fail;
894
895 /* There is no native Z32F_S8 sampling or rendering format, so this must
896 * be emulated via two separate textures. The depth texture still keeps
897 * its Z32F_S8 format though, and we also keep a reference to a separate
898 * S8 texture.
899 */
900 if (tmpl->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
901 struct pipe_resource stencil = *tmpl;
902 stencil.format = PIPE_FORMAT_S8_UINT;
903 rsc->stencil = fd_resource(fd_resource_create(pscreen, &stencil));
904 if (!rsc->stencil)
905 goto fail;
906 }
907
908 return prsc;
909 fail:
910 fd_resource_destroy(pscreen, prsc);
911 return NULL;
912 }
913
914 /**
915 * Create a texture from a winsys_handle. The handle is often created in
916 * another process by first creating a pipe texture and then calling
917 * resource_get_handle.
918 */
919 static struct pipe_resource *
920 fd_resource_from_handle(struct pipe_screen *pscreen,
921 const struct pipe_resource *tmpl,
922 struct winsys_handle *handle, unsigned usage)
923 {
924 struct fd_resource *rsc = CALLOC_STRUCT(fd_resource);
925 struct fd_resource_slice *slice = &rsc->slices[0];
926 struct pipe_resource *prsc = &rsc->base.b;
927 uint32_t pitchalign = fd_screen(pscreen)->gmem_alignw;
928
929 DBG("target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, "
930 "nr_samples=%u, usage=%u, bind=%x, flags=%x",
931 tmpl->target, util_format_name(tmpl->format),
932 tmpl->width0, tmpl->height0, tmpl->depth0,
933 tmpl->array_size, tmpl->last_level, tmpl->nr_samples,
934 tmpl->usage, tmpl->bind, tmpl->flags);
935
936 if (!rsc)
937 return NULL;
938
939 *prsc = *tmpl;
940
941 pipe_reference_init(&prsc->reference, 1);
942
943 prsc->screen = pscreen;
944
945 util_range_init(&rsc->valid_buffer_range);
946
947 rsc->bo = fd_screen_bo_from_handle(pscreen, handle);
948 if (!rsc->bo)
949 goto fail;
950
951 rsc->base.vtbl = &fd_resource_vtbl;
952 rsc->cpp = util_format_get_blocksize(tmpl->format);
953 slice->pitch = handle->stride / rsc->cpp;
954 slice->offset = handle->offset;
955 slice->size0 = handle->stride * prsc->height0;
956
957 if ((slice->pitch < align(prsc->width0, pitchalign)) ||
958 (slice->pitch & (pitchalign - 1)))
959 goto fail;
960
961 assert(rsc->cpp);
962
963 return prsc;
964
965 fail:
966 fd_resource_destroy(pscreen, prsc);
967 return NULL;
968 }
969
970 /**
971 * _copy_region using pipe (3d engine)
972 */
973 static bool
974 fd_blitter_pipe_copy_region(struct fd_context *ctx,
975 struct pipe_resource *dst,
976 unsigned dst_level,
977 unsigned dstx, unsigned dsty, unsigned dstz,
978 struct pipe_resource *src,
979 unsigned src_level,
980 const struct pipe_box *src_box)
981 {
982 /* not until we allow rendertargets to be buffers */
983 if (dst->target == PIPE_BUFFER || src->target == PIPE_BUFFER)
984 return false;
985
986 if (!util_blitter_is_copy_supported(ctx->blitter, dst, src))
987 return false;
988
989 /* TODO we could discard if dst box covers dst level fully.. */
990 fd_blitter_pipe_begin(ctx, false, false, FD_STAGE_BLIT);
991 util_blitter_copy_texture(ctx->blitter,
992 dst, dst_level, dstx, dsty, dstz,
993 src, src_level, src_box);
994 fd_blitter_pipe_end(ctx);
995
996 return true;
997 }
998
999 /**
1000 * Copy a block of pixels from one resource to another.
1001 * The resource must be of the same format.
1002 * Resources with nr_samples > 1 are not allowed.
1003 */
1004 static void
1005 fd_resource_copy_region(struct pipe_context *pctx,
1006 struct pipe_resource *dst,
1007 unsigned dst_level,
1008 unsigned dstx, unsigned dsty, unsigned dstz,
1009 struct pipe_resource *src,
1010 unsigned src_level,
1011 const struct pipe_box *src_box)
1012 {
1013 struct fd_context *ctx = fd_context(pctx);
1014
1015 /* TODO if we have 2d core, or other DMA engine that could be used
1016 * for simple copies and reasonably easily synchronized with the 3d
1017 * core, this is where we'd plug it in..
1018 */
1019
1020 /* try blit on 3d pipe: */
1021 if (fd_blitter_pipe_copy_region(ctx,
1022 dst, dst_level, dstx, dsty, dstz,
1023 src, src_level, src_box))
1024 return;
1025
1026 /* else fallback to pure sw: */
1027 util_resource_copy_region(pctx,
1028 dst, dst_level, dstx, dsty, dstz,
1029 src, src_level, src_box);
1030 }
1031
1032 bool
1033 fd_render_condition_check(struct pipe_context *pctx)
1034 {
1035 struct fd_context *ctx = fd_context(pctx);
1036
1037 if (!ctx->cond_query)
1038 return true;
1039
1040 union pipe_query_result res = { 0 };
1041 bool wait =
1042 ctx->cond_mode != PIPE_RENDER_COND_NO_WAIT &&
1043 ctx->cond_mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
1044
1045 if (pctx->get_query_result(pctx, ctx->cond_query, wait, &res))
1046 return (bool)res.u64 != ctx->cond_cond;
1047
1048 return true;
1049 }
1050
1051 /**
1052 * Optimal hardware path for blitting pixels.
1053 * Scaling, format conversion, up- and downsampling (resolve) are allowed.
1054 */
1055 static void
1056 fd_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
1057 {
1058 struct fd_context *ctx = fd_context(pctx);
1059 struct pipe_blit_info info = *blit_info;
1060 bool discard = false;
1061
1062 if (info.src.resource->nr_samples > 1 &&
1063 info.dst.resource->nr_samples <= 1 &&
1064 !util_format_is_depth_or_stencil(info.src.resource->format) &&
1065 !util_format_is_pure_integer(info.src.resource->format)) {
1066 DBG("color resolve unimplemented");
1067 return;
1068 }
1069
1070 if (info.render_condition_enable && !fd_render_condition_check(pctx))
1071 return;
1072
1073 if (!info.scissor_enable && !info.alpha_blend) {
1074 discard = util_texrange_covers_whole_level(info.dst.resource,
1075 info.dst.level, info.dst.box.x, info.dst.box.y,
1076 info.dst.box.z, info.dst.box.width,
1077 info.dst.box.height, info.dst.box.depth);
1078 }
1079
1080 if (util_try_blit_via_copy_region(pctx, &info)) {
1081 return; /* done */
1082 }
1083
1084 if (info.mask & PIPE_MASK_S) {
1085 DBG("cannot blit stencil, skipping");
1086 info.mask &= ~PIPE_MASK_S;
1087 }
1088
1089 if (!util_blitter_is_blit_supported(ctx->blitter, &info)) {
1090 DBG("blit unsupported %s -> %s",
1091 util_format_short_name(info.src.resource->format),
1092 util_format_short_name(info.dst.resource->format));
1093 return;
1094 }
1095
1096 fd_blitter_pipe_begin(ctx, info.render_condition_enable, discard, FD_STAGE_BLIT);
1097 util_blitter_blit(ctx->blitter, &info);
1098 fd_blitter_pipe_end(ctx);
1099 }
1100
1101 void
1102 fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond, bool discard,
1103 enum fd_render_stage stage)
1104 {
1105 util_blitter_save_fragment_constant_buffer_slot(ctx->blitter,
1106 ctx->constbuf[PIPE_SHADER_FRAGMENT].cb);
1107 util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vtx.vertexbuf.vb);
1108 util_blitter_save_vertex_elements(ctx->blitter, ctx->vtx.vtx);
1109 util_blitter_save_vertex_shader(ctx->blitter, ctx->prog.vp);
1110 util_blitter_save_so_targets(ctx->blitter, ctx->streamout.num_targets,
1111 ctx->streamout.targets);
1112 util_blitter_save_rasterizer(ctx->blitter, ctx->rasterizer);
1113 util_blitter_save_viewport(ctx->blitter, &ctx->viewport);
1114 util_blitter_save_scissor(ctx->blitter, &ctx->scissor);
1115 util_blitter_save_fragment_shader(ctx->blitter, ctx->prog.fp);
1116 util_blitter_save_blend(ctx->blitter, ctx->blend);
1117 util_blitter_save_depth_stencil_alpha(ctx->blitter, ctx->zsa);
1118 util_blitter_save_stencil_ref(ctx->blitter, &ctx->stencil_ref);
1119 util_blitter_save_sample_mask(ctx->blitter, ctx->sample_mask);
1120 util_blitter_save_framebuffer(ctx->blitter,
1121 ctx->batch ? &ctx->batch->framebuffer : NULL);
1122 util_blitter_save_fragment_sampler_states(ctx->blitter,
1123 ctx->tex[PIPE_SHADER_FRAGMENT].num_samplers,
1124 (void **)ctx->tex[PIPE_SHADER_FRAGMENT].samplers);
1125 util_blitter_save_fragment_sampler_views(ctx->blitter,
1126 ctx->tex[PIPE_SHADER_FRAGMENT].num_textures,
1127 ctx->tex[PIPE_SHADER_FRAGMENT].textures);
1128 if (!render_cond)
1129 util_blitter_save_render_condition(ctx->blitter,
1130 ctx->cond_query, ctx->cond_cond, ctx->cond_mode);
1131
1132 if (ctx->batch)
1133 fd_batch_set_stage(ctx->batch, stage);
1134
1135 ctx->in_blit = discard;
1136 }
1137
1138 void
1139 fd_blitter_pipe_end(struct fd_context *ctx)
1140 {
1141 if (ctx->batch)
1142 fd_batch_set_stage(ctx->batch, FD_STAGE_NULL);
1143 ctx->in_blit = false;
1144 }
1145
1146 static void
1147 fd_flush_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
1148 {
1149 struct fd_resource *rsc = fd_resource(prsc);
1150
1151 if (rsc->write_batch)
1152 fd_batch_flush(rsc->write_batch, true);
1153
1154 assert(!rsc->write_batch);
1155 }
1156
1157 void
1158 fd_resource_screen_init(struct pipe_screen *pscreen)
1159 {
1160 pscreen->resource_create = fd_resource_create;
1161 pscreen->resource_from_handle = fd_resource_from_handle;
1162 pscreen->resource_get_handle = u_resource_get_handle_vtbl;
1163 pscreen->resource_destroy = u_resource_destroy_vtbl;
1164 }
1165
1166 void
1167 fd_resource_context_init(struct pipe_context *pctx)
1168 {
1169 pctx->transfer_map = u_transfer_map_vtbl;
1170 pctx->transfer_flush_region = u_transfer_flush_region_vtbl;
1171 pctx->transfer_unmap = u_transfer_unmap_vtbl;
1172 pctx->buffer_subdata = u_default_buffer_subdata;
1173 pctx->texture_subdata = u_default_texture_subdata;
1174 pctx->create_surface = fd_create_surface;
1175 pctx->surface_destroy = fd_surface_destroy;
1176 pctx->resource_copy_region = fd_resource_copy_region;
1177 pctx->blit = fd_blit;
1178 pctx->flush_resource = fd_flush_resource;
1179 }