freedreno: Consolidate u_blitter functions in freedreno_blitter.c
[mesa.git] / src / gallium / drivers / freedreno / freedreno_resource.c
1 /*
2 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <robclark@freedesktop.org>
25 */
26
27 #include "util/u_format.h"
28 #include "util/u_format_rgtc.h"
29 #include "util/u_format_zs.h"
30 #include "util/u_inlines.h"
31 #include "util/u_transfer.h"
32 #include "util/u_string.h"
33 #include "util/u_surface.h"
34 #include "util/set.h"
35
36 #include "freedreno_resource.h"
37 #include "freedreno_batch_cache.h"
38 #include "freedreno_blitter.h"
39 #include "freedreno_fence.h"
40 #include "freedreno_screen.h"
41 #include "freedreno_surface.h"
42 #include "freedreno_context.h"
43 #include "freedreno_query_hw.h"
44 #include "freedreno_util.h"
45
46 #include <drm_fourcc.h>
47 #include <errno.h>
48
49 /* XXX this should go away, needed for 'struct winsys_handle' */
50 #include "state_tracker/drm_driver.h"
51
52 /**
53 * Go through the entire state and see if the resource is bound
54 * anywhere. If it is, mark the relevant state as dirty. This is
55 * called on realloc_bo to ensure the neccessary state is re-
56 * emitted so the GPU looks at the new backing bo.
57 */
58 static void
59 rebind_resource(struct fd_context *ctx, struct pipe_resource *prsc)
60 {
61 /* VBOs */
62 for (unsigned i = 0; i < ctx->vtx.vertexbuf.count && !(ctx->dirty & FD_DIRTY_VTXBUF); i++) {
63 if (ctx->vtx.vertexbuf.vb[i].buffer.resource == prsc)
64 ctx->dirty |= FD_DIRTY_VTXBUF;
65 }
66
67 /* per-shader-stage resources: */
68 for (unsigned stage = 0; stage < PIPE_SHADER_TYPES; stage++) {
69 /* Constbufs.. note that constbuf[0] is normal uniforms emitted in
70 * cmdstream rather than by pointer..
71 */
72 const unsigned num_ubos = util_last_bit(ctx->constbuf[stage].enabled_mask);
73 for (unsigned i = 1; i < num_ubos; i++) {
74 if (ctx->dirty_shader[stage] & FD_DIRTY_SHADER_CONST)
75 break;
76 if (ctx->constbuf[stage].cb[i].buffer == prsc)
77 ctx->dirty_shader[stage] |= FD_DIRTY_SHADER_CONST;
78 }
79
80 /* Textures */
81 for (unsigned i = 0; i < ctx->tex[stage].num_textures; i++) {
82 if (ctx->dirty_shader[stage] & FD_DIRTY_SHADER_TEX)
83 break;
84 if (ctx->tex[stage].textures[i] && (ctx->tex[stage].textures[i]->texture == prsc))
85 ctx->dirty_shader[stage] |= FD_DIRTY_SHADER_TEX;
86 }
87
88 /* SSBOs */
89 const unsigned num_ssbos = util_last_bit(ctx->shaderbuf[stage].enabled_mask);
90 for (unsigned i = 0; i < num_ssbos; i++) {
91 if (ctx->dirty_shader[stage] & FD_DIRTY_SHADER_SSBO)
92 break;
93 if (ctx->shaderbuf[stage].sb[i].buffer == prsc)
94 ctx->dirty_shader[stage] |= FD_DIRTY_SHADER_SSBO;
95 }
96 }
97 }
98
99 static void
100 realloc_bo(struct fd_resource *rsc, uint32_t size)
101 {
102 struct pipe_resource *prsc = &rsc->base;
103 struct fd_screen *screen = fd_screen(rsc->base.screen);
104 uint32_t flags = DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
105 DRM_FREEDRENO_GEM_TYPE_KMEM |
106 COND(prsc->bind & PIPE_BIND_SCANOUT, DRM_FREEDRENO_GEM_SCANOUT);
107 /* TODO other flags? */
108
109 /* if we start using things other than write-combine,
110 * be sure to check for PIPE_RESOURCE_FLAG_MAP_COHERENT
111 */
112
113 if (rsc->bo)
114 fd_bo_del(rsc->bo);
115
116 rsc->bo = fd_bo_new(screen->dev, size, flags, "%ux%ux%u@%u:%x",
117 prsc->width0, prsc->height0, prsc->depth0, rsc->cpp, prsc->bind);
118 rsc->seqno = p_atomic_inc_return(&screen->rsc_seqno);
119 util_range_set_empty(&rsc->valid_buffer_range);
120 fd_bc_invalidate_resource(rsc, true);
121 }
122
123 static void
124 do_blit(struct fd_context *ctx, const struct pipe_blit_info *blit, bool fallback)
125 {
126 struct pipe_context *pctx = &ctx->base;
127
128 /* TODO size threshold too?? */
129 if (!fallback) {
130 /* do blit on gpu: */
131 pctx->blit(pctx, blit);
132 } else {
133 /* do blit on cpu: */
134 util_resource_copy_region(pctx,
135 blit->dst.resource, blit->dst.level, blit->dst.box.x,
136 blit->dst.box.y, blit->dst.box.z,
137 blit->src.resource, blit->src.level, &blit->src.box);
138 }
139 }
140
141 static bool
142 fd_try_shadow_resource(struct fd_context *ctx, struct fd_resource *rsc,
143 unsigned level, const struct pipe_box *box)
144 {
145 struct pipe_context *pctx = &ctx->base;
146 struct pipe_resource *prsc = &rsc->base;
147 bool fallback = false;
148
149 if (prsc->next)
150 return false;
151
152 /* TODO: somehow munge dimensions and format to copy unsupported
153 * render target format to something that is supported?
154 */
155 if (!pctx->screen->is_format_supported(pctx->screen,
156 prsc->format, prsc->target, prsc->nr_samples,
157 prsc->nr_storage_samples,
158 PIPE_BIND_RENDER_TARGET))
159 fallback = true;
160
161 /* do shadowing back-blits on the cpu for buffers: */
162 if (prsc->target == PIPE_BUFFER)
163 fallback = true;
164
165 bool whole_level = util_texrange_covers_whole_level(prsc, level,
166 box->x, box->y, box->z, box->width, box->height, box->depth);
167
168 /* TODO need to be more clever about current level */
169 if ((prsc->target >= PIPE_TEXTURE_2D) && !whole_level)
170 return false;
171
172 struct pipe_resource *pshadow =
173 pctx->screen->resource_create(pctx->screen, prsc);
174
175 if (!pshadow)
176 return false;
177
178 assert(!ctx->in_shadow);
179 ctx->in_shadow = true;
180
181 /* get rid of any references that batch-cache might have to us (which
182 * should empty/destroy rsc->batches hashset)
183 */
184 fd_bc_invalidate_resource(rsc, false);
185
186 mtx_lock(&ctx->screen->lock);
187
188 /* Swap the backing bo's, so shadow becomes the old buffer,
189 * blit from shadow to new buffer. From here on out, we
190 * cannot fail.
191 *
192 * Note that we need to do it in this order, otherwise if
193 * we go down cpu blit path, the recursive transfer_map()
194 * sees the wrong status..
195 */
196 struct fd_resource *shadow = fd_resource(pshadow);
197
198 DBG("shadow: %p (%d) -> %p (%d)\n", rsc, rsc->base.reference.count,
199 shadow, shadow->base.reference.count);
200
201 /* TODO valid_buffer_range?? */
202 swap(rsc->bo, shadow->bo);
203 swap(rsc->write_batch, shadow->write_batch);
204 rsc->seqno = p_atomic_inc_return(&ctx->screen->rsc_seqno);
205
206 /* at this point, the newly created shadow buffer is not referenced
207 * by any batches, but the existing rsc (probably) is. We need to
208 * transfer those references over:
209 */
210 debug_assert(shadow->batch_mask == 0);
211 struct fd_batch *batch;
212 foreach_batch(batch, &ctx->screen->batch_cache, rsc->batch_mask) {
213 struct set_entry *entry = _mesa_set_search(batch->resources, rsc);
214 _mesa_set_remove(batch->resources, entry);
215 _mesa_set_add(batch->resources, shadow);
216 }
217 swap(rsc->batch_mask, shadow->batch_mask);
218
219 mtx_unlock(&ctx->screen->lock);
220
221 struct pipe_blit_info blit = {};
222 blit.dst.resource = prsc;
223 blit.dst.format = prsc->format;
224 blit.src.resource = pshadow;
225 blit.src.format = pshadow->format;
226 blit.mask = util_format_get_mask(prsc->format);
227 blit.filter = PIPE_TEX_FILTER_NEAREST;
228
229 #define set_box(field, val) do { \
230 blit.dst.field = (val); \
231 blit.src.field = (val); \
232 } while (0)
233
234 /* blit the other levels in their entirety: */
235 for (unsigned l = 0; l <= prsc->last_level; l++) {
236 if (l == level)
237 continue;
238
239 /* just blit whole level: */
240 set_box(level, l);
241 set_box(box.width, u_minify(prsc->width0, l));
242 set_box(box.height, u_minify(prsc->height0, l));
243 set_box(box.depth, u_minify(prsc->depth0, l));
244
245 do_blit(ctx, &blit, fallback);
246 }
247
248 /* deal w/ current level specially, since we might need to split
249 * it up into a couple blits:
250 */
251 if (!whole_level) {
252 set_box(level, level);
253
254 switch (prsc->target) {
255 case PIPE_BUFFER:
256 case PIPE_TEXTURE_1D:
257 set_box(box.y, 0);
258 set_box(box.z, 0);
259 set_box(box.height, 1);
260 set_box(box.depth, 1);
261
262 if (box->x > 0) {
263 set_box(box.x, 0);
264 set_box(box.width, box->x);
265
266 do_blit(ctx, &blit, fallback);
267 }
268 if ((box->x + box->width) < u_minify(prsc->width0, level)) {
269 set_box(box.x, box->x + box->width);
270 set_box(box.width, u_minify(prsc->width0, level) - (box->x + box->width));
271
272 do_blit(ctx, &blit, fallback);
273 }
274 break;
275 case PIPE_TEXTURE_2D:
276 /* TODO */
277 default:
278 unreachable("TODO");
279 }
280 }
281
282 ctx->in_shadow = false;
283
284 pipe_resource_reference(&pshadow, NULL);
285
286 return true;
287 }
288
289 static struct fd_resource *
290 fd_alloc_staging(struct fd_context *ctx, struct fd_resource *rsc,
291 unsigned level, const struct pipe_box *box)
292 {
293 struct pipe_context *pctx = &ctx->base;
294 struct pipe_resource tmpl = rsc->base;
295
296 tmpl.width0 = box->width;
297 tmpl.height0 = box->height;
298 /* for array textures, box->depth is the array_size, otherwise
299 * for 3d textures, it is the depth:
300 */
301 if (tmpl.array_size > 1) {
302 tmpl.array_size = box->depth;
303 tmpl.depth0 = 1;
304 } else {
305 tmpl.array_size = 1;
306 tmpl.depth0 = box->depth;
307 }
308 tmpl.last_level = 0;
309 tmpl.bind |= PIPE_BIND_LINEAR;
310
311 struct pipe_resource *pstaging =
312 pctx->screen->resource_create(pctx->screen, &tmpl);
313 if (!pstaging)
314 return NULL;
315
316 return fd_resource(pstaging);
317 }
318
319 static void
320 fd_blit_from_staging(struct fd_context *ctx, struct fd_transfer *trans)
321 {
322 struct pipe_resource *dst = trans->base.resource;
323 struct pipe_blit_info blit = {};
324
325 blit.dst.resource = dst;
326 blit.dst.format = dst->format;
327 blit.dst.level = trans->base.level;
328 blit.dst.box = trans->base.box;
329 blit.src.resource = trans->staging_prsc;
330 blit.src.format = trans->staging_prsc->format;
331 blit.src.level = 0;
332 blit.src.box = trans->staging_box;
333 blit.mask = util_format_get_mask(trans->staging_prsc->format);
334 blit.filter = PIPE_TEX_FILTER_NEAREST;
335
336 do_blit(ctx, &blit, false);
337 }
338
339 static void
340 fd_blit_to_staging(struct fd_context *ctx, struct fd_transfer *trans)
341 {
342 struct pipe_resource *src = trans->base.resource;
343 struct pipe_blit_info blit = {};
344
345 blit.src.resource = src;
346 blit.src.format = src->format;
347 blit.src.level = trans->base.level;
348 blit.src.box = trans->base.box;
349 blit.dst.resource = trans->staging_prsc;
350 blit.dst.format = trans->staging_prsc->format;
351 blit.dst.level = 0;
352 blit.dst.box = trans->staging_box;
353 blit.mask = util_format_get_mask(trans->staging_prsc->format);
354 blit.filter = PIPE_TEX_FILTER_NEAREST;
355
356 do_blit(ctx, &blit, false);
357 }
358
359 static void fd_resource_transfer_flush_region(struct pipe_context *pctx,
360 struct pipe_transfer *ptrans,
361 const struct pipe_box *box)
362 {
363 struct fd_resource *rsc = fd_resource(ptrans->resource);
364
365 if (ptrans->resource->target == PIPE_BUFFER)
366 util_range_add(&rsc->valid_buffer_range,
367 ptrans->box.x + box->x,
368 ptrans->box.x + box->x + box->width);
369 }
370
371 static void
372 flush_resource(struct fd_context *ctx, struct fd_resource *rsc, unsigned usage)
373 {
374 struct fd_batch *write_batch = NULL;
375
376 fd_batch_reference(&write_batch, rsc->write_batch);
377
378 if (usage & PIPE_TRANSFER_WRITE) {
379 struct fd_batch *batch, *batches[32] = {};
380 uint32_t batch_mask;
381
382 /* This is a bit awkward, probably a fd_batch_flush_locked()
383 * would make things simpler.. but we need to hold the lock
384 * to iterate the batches which reference this resource. So
385 * we must first grab references under a lock, then flush.
386 */
387 mtx_lock(&ctx->screen->lock);
388 batch_mask = rsc->batch_mask;
389 foreach_batch(batch, &ctx->screen->batch_cache, batch_mask)
390 fd_batch_reference(&batches[batch->idx], batch);
391 mtx_unlock(&ctx->screen->lock);
392
393 foreach_batch(batch, &ctx->screen->batch_cache, batch_mask)
394 fd_batch_flush(batch, false, false);
395
396 foreach_batch(batch, &ctx->screen->batch_cache, batch_mask) {
397 fd_batch_sync(batch);
398 fd_batch_reference(&batches[batch->idx], NULL);
399 }
400 assert(rsc->batch_mask == 0);
401 } else if (write_batch) {
402 fd_batch_flush(write_batch, true, false);
403 }
404
405 fd_batch_reference(&write_batch, NULL);
406
407 assert(!rsc->write_batch);
408 }
409
410 static void
411 fd_flush_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
412 {
413 flush_resource(fd_context(pctx), fd_resource(prsc), PIPE_TRANSFER_READ);
414 }
415
416 static void
417 fd_resource_transfer_unmap(struct pipe_context *pctx,
418 struct pipe_transfer *ptrans)
419 {
420 struct fd_context *ctx = fd_context(pctx);
421 struct fd_resource *rsc = fd_resource(ptrans->resource);
422 struct fd_transfer *trans = fd_transfer(ptrans);
423
424 if (trans->staging_prsc) {
425 if (ptrans->usage & PIPE_TRANSFER_WRITE)
426 fd_blit_from_staging(ctx, trans);
427 pipe_resource_reference(&trans->staging_prsc, NULL);
428 }
429
430 if (!(ptrans->usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
431 fd_bo_cpu_fini(rsc->bo);
432 }
433
434 util_range_add(&rsc->valid_buffer_range,
435 ptrans->box.x,
436 ptrans->box.x + ptrans->box.width);
437
438 pipe_resource_reference(&ptrans->resource, NULL);
439 slab_free(&ctx->transfer_pool, ptrans);
440 }
441
442 static void *
443 fd_resource_transfer_map(struct pipe_context *pctx,
444 struct pipe_resource *prsc,
445 unsigned level, unsigned usage,
446 const struct pipe_box *box,
447 struct pipe_transfer **pptrans)
448 {
449 struct fd_context *ctx = fd_context(pctx);
450 struct fd_resource *rsc = fd_resource(prsc);
451 struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
452 struct fd_transfer *trans;
453 struct pipe_transfer *ptrans;
454 enum pipe_format format = prsc->format;
455 uint32_t op = 0;
456 uint32_t offset;
457 char *buf;
458 int ret = 0;
459
460 DBG("prsc=%p, level=%u, usage=%x, box=%dx%d+%d,%d", prsc, level, usage,
461 box->width, box->height, box->x, box->y);
462
463 ptrans = slab_alloc(&ctx->transfer_pool);
464 if (!ptrans)
465 return NULL;
466
467 /* slab_alloc_st() doesn't zero: */
468 trans = fd_transfer(ptrans);
469 memset(trans, 0, sizeof(*trans));
470
471 pipe_resource_reference(&ptrans->resource, prsc);
472 ptrans->level = level;
473 ptrans->usage = usage;
474 ptrans->box = *box;
475 ptrans->stride = util_format_get_nblocksx(format, slice->pitch) * rsc->cpp;
476 ptrans->layer_stride = rsc->layer_first ? rsc->layer_size : slice->size0;
477
478 /* we always need a staging texture for tiled buffers:
479 *
480 * TODO we might sometimes want to *also* shadow the resource to avoid
481 * splitting a batch.. for ex, mid-frame texture uploads to a tiled
482 * texture.
483 */
484 if (rsc->tile_mode) {
485 struct fd_resource *staging_rsc;
486
487 staging_rsc = fd_alloc_staging(ctx, rsc, level, box);
488 if (staging_rsc) {
489 // TODO for PIPE_TRANSFER_READ, need to do untiling blit..
490 trans->staging_prsc = &staging_rsc->base;
491 trans->base.stride = util_format_get_nblocksx(format,
492 staging_rsc->slices[0].pitch) * staging_rsc->cpp;
493 trans->base.layer_stride = staging_rsc->layer_first ?
494 staging_rsc->layer_size : staging_rsc->slices[0].size0;
495 trans->staging_box = *box;
496 trans->staging_box.x = 0;
497 trans->staging_box.y = 0;
498 trans->staging_box.z = 0;
499
500 if (usage & PIPE_TRANSFER_READ) {
501 fd_blit_to_staging(ctx, trans);
502
503 struct fd_batch *batch = NULL;
504 fd_batch_reference(&batch, staging_rsc->write_batch);
505
506 /* we can't fd_bo_cpu_prep() until the blit to staging
507 * is submitted to kernel.. in that case write_batch
508 * wouldn't be NULL yet:
509 */
510 if (batch) {
511 fd_batch_sync(batch);
512 fd_batch_reference(&batch, NULL);
513 }
514
515 fd_bo_cpu_prep(staging_rsc->bo, ctx->pipe,
516 DRM_FREEDRENO_PREP_READ);
517 }
518
519 buf = fd_bo_map(staging_rsc->bo);
520 offset = 0;
521
522 *pptrans = ptrans;
523
524 ctx->stats.staging_uploads++;
525
526 return buf;
527 }
528 }
529
530 if (ctx->in_shadow && !(usage & PIPE_TRANSFER_READ))
531 usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
532
533 if (usage & PIPE_TRANSFER_READ)
534 op |= DRM_FREEDRENO_PREP_READ;
535
536 if (usage & PIPE_TRANSFER_WRITE)
537 op |= DRM_FREEDRENO_PREP_WRITE;
538
539 if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
540 realloc_bo(rsc, fd_bo_size(rsc->bo));
541 rebind_resource(ctx, prsc);
542 } else if ((usage & PIPE_TRANSFER_WRITE) &&
543 prsc->target == PIPE_BUFFER &&
544 !util_ranges_intersect(&rsc->valid_buffer_range,
545 box->x, box->x + box->width)) {
546 /* We are trying to write to a previously uninitialized range. No need
547 * to wait.
548 */
549 } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
550 struct fd_batch *write_batch = NULL;
551
552 /* hold a reference, so it doesn't disappear under us: */
553 fd_batch_reference(&write_batch, rsc->write_batch);
554
555 if ((usage & PIPE_TRANSFER_WRITE) && write_batch &&
556 write_batch->back_blit) {
557 /* if only thing pending is a back-blit, we can discard it: */
558 fd_batch_reset(write_batch);
559 }
560
561 /* If the GPU is writing to the resource, or if it is reading from the
562 * resource and we're trying to write to it, flush the renders.
563 */
564 bool needs_flush = pending(rsc, !!(usage & PIPE_TRANSFER_WRITE));
565 bool busy = needs_flush || (0 != fd_bo_cpu_prep(rsc->bo,
566 ctx->pipe, op | DRM_FREEDRENO_PREP_NOSYNC));
567
568 /* if we need to flush/stall, see if we can make a shadow buffer
569 * to avoid this:
570 *
571 * TODO we could go down this path !reorder && !busy_for_read
572 * ie. we only *don't* want to go down this path if the blit
573 * will trigger a flush!
574 */
575 if (ctx->screen->reorder && busy && !(usage & PIPE_TRANSFER_READ) &&
576 (usage & PIPE_TRANSFER_DISCARD_RANGE)) {
577 /* try shadowing only if it avoids a flush, otherwise staging would
578 * be better:
579 */
580 if (needs_flush && fd_try_shadow_resource(ctx, rsc, level, box)) {
581 needs_flush = busy = false;
582 rebind_resource(ctx, prsc);
583 ctx->stats.shadow_uploads++;
584 } else {
585 struct fd_resource *staging_rsc;
586
587 if (needs_flush) {
588 flush_resource(ctx, rsc, usage);
589 needs_flush = false;
590 }
591
592 /* in this case, we don't need to shadow the whole resource,
593 * since any draw that references the previous contents has
594 * already had rendering flushed for all tiles. So we can
595 * use a staging buffer to do the upload.
596 */
597 staging_rsc = fd_alloc_staging(ctx, rsc, level, box);
598 if (staging_rsc) {
599 trans->staging_prsc = &staging_rsc->base;
600 trans->base.stride = util_format_get_nblocksx(format,
601 staging_rsc->slices[0].pitch) * staging_rsc->cpp;
602 trans->base.layer_stride = staging_rsc->layer_first ?
603 staging_rsc->layer_size : staging_rsc->slices[0].size0;
604 trans->staging_box = *box;
605 trans->staging_box.x = 0;
606 trans->staging_box.y = 0;
607 trans->staging_box.z = 0;
608 buf = fd_bo_map(staging_rsc->bo);
609 offset = 0;
610
611 *pptrans = ptrans;
612
613 fd_batch_reference(&write_batch, NULL);
614
615 ctx->stats.staging_uploads++;
616
617 return buf;
618 }
619 }
620 }
621
622 if (needs_flush) {
623 flush_resource(ctx, rsc, usage);
624 needs_flush = false;
625 }
626
627 fd_batch_reference(&write_batch, NULL);
628
629 /* The GPU keeps track of how the various bo's are being used, and
630 * will wait if necessary for the proper operation to have
631 * completed.
632 */
633 if (busy) {
634 ret = fd_bo_cpu_prep(rsc->bo, ctx->pipe, op);
635 if (ret)
636 goto fail;
637 }
638 }
639
640 buf = fd_bo_map(rsc->bo);
641 offset =
642 box->y / util_format_get_blockheight(format) * ptrans->stride +
643 box->x / util_format_get_blockwidth(format) * rsc->cpp +
644 fd_resource_offset(rsc, level, box->z);
645
646 if (usage & PIPE_TRANSFER_WRITE)
647 rsc->valid = true;
648
649 *pptrans = ptrans;
650
651 return buf + offset;
652
653 fail:
654 fd_resource_transfer_unmap(pctx, ptrans);
655 return NULL;
656 }
657
658 static void
659 fd_resource_destroy(struct pipe_screen *pscreen,
660 struct pipe_resource *prsc)
661 {
662 struct fd_resource *rsc = fd_resource(prsc);
663 fd_bc_invalidate_resource(rsc, true);
664 if (rsc->bo)
665 fd_bo_del(rsc->bo);
666 if (rsc->scanout)
667 renderonly_scanout_destroy(rsc->scanout, fd_screen(pscreen)->ro);
668
669 util_range_destroy(&rsc->valid_buffer_range);
670 FREE(rsc);
671 }
672
673 static uint64_t
674 fd_resource_modifier(struct fd_resource *rsc)
675 {
676 if (!rsc->tile_mode)
677 return DRM_FORMAT_MOD_LINEAR;
678
679 /* TODO invent a modifier for tiled but not UBWC buffers: */
680 return DRM_FORMAT_MOD_INVALID;
681 }
682
683 static boolean
684 fd_resource_get_handle(struct pipe_screen *pscreen,
685 struct pipe_context *pctx,
686 struct pipe_resource *prsc,
687 struct winsys_handle *handle,
688 unsigned usage)
689 {
690 struct fd_resource *rsc = fd_resource(prsc);
691
692 handle->modifier = fd_resource_modifier(rsc);
693
694 return fd_screen_bo_get_handle(pscreen, rsc->bo, rsc->scanout,
695 rsc->slices[0].pitch * rsc->cpp, handle);
696 }
697
698 static uint32_t
699 setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format format)
700 {
701 struct pipe_resource *prsc = &rsc->base;
702 struct fd_screen *screen = fd_screen(prsc->screen);
703 enum util_format_layout layout = util_format_description(format)->layout;
704 uint32_t pitchalign = screen->gmem_alignw;
705 uint32_t level, size = 0;
706 uint32_t width = prsc->width0;
707 uint32_t height = prsc->height0;
708 uint32_t depth = prsc->depth0;
709 /* in layer_first layout, the level (slice) contains just one
710 * layer (since in fact the layer contains the slices)
711 */
712 uint32_t layers_in_level = rsc->layer_first ? 1 : prsc->array_size;
713
714 for (level = 0; level <= prsc->last_level; level++) {
715 struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
716 uint32_t blocks;
717
718 if (layout == UTIL_FORMAT_LAYOUT_ASTC)
719 slice->pitch = width =
720 util_align_npot(width, pitchalign * util_format_get_blockwidth(format));
721 else
722 slice->pitch = width = align(width, pitchalign);
723 slice->offset = size;
724 blocks = util_format_get_nblocks(format, width, height);
725 /* 1d array and 2d array textures must all have the same layer size
726 * for each miplevel on a3xx. 3d textures can have different layer
727 * sizes for high levels, but the hw auto-sizer is buggy (or at least
728 * different than what this code does), so as soon as the layer size
729 * range gets into range, we stop reducing it.
730 */
731 if (prsc->target == PIPE_TEXTURE_3D && (
732 level == 1 ||
733 (level > 1 && rsc->slices[level - 1].size0 > 0xf000)))
734 slice->size0 = align(blocks * rsc->cpp, alignment);
735 else if (level == 0 || rsc->layer_first || alignment == 1)
736 slice->size0 = align(blocks * rsc->cpp, alignment);
737 else
738 slice->size0 = rsc->slices[level - 1].size0;
739
740 size += slice->size0 * depth * layers_in_level;
741
742 width = u_minify(width, 1);
743 height = u_minify(height, 1);
744 depth = u_minify(depth, 1);
745 }
746
747 return size;
748 }
749
750 static uint32_t
751 slice_alignment(enum pipe_texture_target target)
752 {
753 /* on a3xx, 2d array and 3d textures seem to want their
754 * layers aligned to page boundaries:
755 */
756 switch (target) {
757 case PIPE_TEXTURE_3D:
758 case PIPE_TEXTURE_1D_ARRAY:
759 case PIPE_TEXTURE_2D_ARRAY:
760 return 4096;
761 default:
762 return 1;
763 }
764 }
765
766 /* cross generation texture layout to plug in to screen->setup_slices()..
767 * replace with generation specific one as-needed.
768 *
769 * TODO for a4xx probably can extract out the a4xx specific logic int
770 * a small fd4_setup_slices() wrapper that sets up layer_first, and then
771 * calls this.
772 */
773 uint32_t
774 fd_setup_slices(struct fd_resource *rsc)
775 {
776 uint32_t alignment;
777
778 alignment = slice_alignment(rsc->base.target);
779
780 struct fd_screen *screen = fd_screen(rsc->base.screen);
781 if (is_a4xx(screen)) {
782 switch (rsc->base.target) {
783 case PIPE_TEXTURE_3D:
784 rsc->layer_first = false;
785 break;
786 default:
787 rsc->layer_first = true;
788 alignment = 1;
789 break;
790 }
791 }
792
793 return setup_slices(rsc, alignment, rsc->base.format);
794 }
795
796 /* special case to resize query buf after allocated.. */
797 void
798 fd_resource_resize(struct pipe_resource *prsc, uint32_t sz)
799 {
800 struct fd_resource *rsc = fd_resource(prsc);
801
802 debug_assert(prsc->width0 == 0);
803 debug_assert(prsc->target == PIPE_BUFFER);
804 debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER);
805
806 prsc->width0 = sz;
807 realloc_bo(rsc, fd_screen(prsc->screen)->setup_slices(rsc));
808 }
809
810 // TODO common helper?
811 static bool
812 has_depth(enum pipe_format format)
813 {
814 switch (format) {
815 case PIPE_FORMAT_Z16_UNORM:
816 case PIPE_FORMAT_Z32_UNORM:
817 case PIPE_FORMAT_Z32_FLOAT:
818 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
819 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
820 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
821 case PIPE_FORMAT_Z24X8_UNORM:
822 case PIPE_FORMAT_X8Z24_UNORM:
823 return true;
824 default:
825 return false;
826 }
827 }
828
829 static bool
830 find_modifier(uint64_t needle, const uint64_t *haystack, int count)
831 {
832 int i;
833
834 for (i = 0; i < count; i++) {
835 if (haystack[i] == needle)
836 return true;
837 }
838
839 return false;
840 }
841
842 /**
843 * Create a new texture object, using the given template info.
844 */
845 static struct pipe_resource *
846 fd_resource_create_with_modifiers(struct pipe_screen *pscreen,
847 const struct pipe_resource *tmpl,
848 const uint64_t *modifiers, int count)
849 {
850 struct fd_screen *screen = fd_screen(pscreen);
851 struct fd_resource *rsc;
852 struct pipe_resource *prsc;
853 enum pipe_format format = tmpl->format;
854 uint32_t size;
855
856 if (screen->ro && (tmpl->bind & PIPE_BIND_SCANOUT)) {
857 struct pipe_resource scanout_templat = *tmpl;
858 struct renderonly_scanout *scanout;
859 struct winsys_handle handle;
860
861 scanout = renderonly_scanout_for_resource(&scanout_templat,
862 screen->ro, &handle);
863 if (!scanout)
864 return NULL;
865
866 renderonly_scanout_destroy(scanout, screen->ro);
867
868 assert(handle.type == WINSYS_HANDLE_TYPE_FD);
869 rsc = fd_resource(pscreen->resource_from_handle(pscreen, tmpl,
870 &handle,
871 PIPE_HANDLE_USAGE_FRAMEBUFFER_WRITE));
872 close(handle.handle);
873 if (!rsc)
874 return NULL;
875
876 return &rsc->base;
877 }
878
879 rsc = CALLOC_STRUCT(fd_resource);
880 prsc = &rsc->base;
881
882 DBG("%p: target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, "
883 "nr_samples=%u, usage=%u, bind=%x, flags=%x", prsc,
884 tmpl->target, util_format_name(format),
885 tmpl->width0, tmpl->height0, tmpl->depth0,
886 tmpl->array_size, tmpl->last_level, tmpl->nr_samples,
887 tmpl->usage, tmpl->bind, tmpl->flags);
888
889 if (!rsc)
890 return NULL;
891
892 *prsc = *tmpl;
893
894 #define LINEAR \
895 (PIPE_BIND_SCANOUT | \
896 PIPE_BIND_LINEAR | \
897 PIPE_BIND_DISPLAY_TARGET)
898
899 bool linear = find_modifier(DRM_FORMAT_MOD_LINEAR, modifiers, count);
900 if (tmpl->bind & LINEAR)
901 linear = true;
902
903 /* Normally, for non-shared buffers, allow buffer compression if
904 * not shared, otherwise only allow if QCOM_COMPRESSED modifier
905 * is requested:
906 *
907 * TODO we should probably also limit tiled in a similar way,
908 * except we don't have a format modifier for tiled. (We probably
909 * should.)
910 */
911 bool allow_ubwc = find_modifier(DRM_FORMAT_MOD_INVALID, modifiers, count);
912 if (tmpl->bind & PIPE_BIND_SHARED)
913 allow_ubwc = find_modifier(DRM_FORMAT_MOD_QCOM_COMPRESSED, modifiers, count);
914
915 if (screen->tile_mode &&
916 (tmpl->target != PIPE_BUFFER) &&
917 !linear) {
918 rsc->tile_mode = screen->tile_mode(tmpl);
919 }
920
921 pipe_reference_init(&prsc->reference, 1);
922
923 prsc->screen = pscreen;
924
925 util_range_init(&rsc->valid_buffer_range);
926
927 rsc->internal_format = format;
928 rsc->cpp = util_format_get_blocksize(format);
929 rsc->cpp *= fd_resource_nr_samples(prsc);
930
931 assert(rsc->cpp);
932
933 // XXX probably need some extra work if we hit rsc shadowing path w/ lrz..
934 if ((is_a5xx(screen) || is_a6xx(screen)) &&
935 (fd_mesa_debug & FD_DBG_LRZ) && has_depth(format)) {
936 const uint32_t flags = DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
937 DRM_FREEDRENO_GEM_TYPE_KMEM; /* TODO */
938 unsigned lrz_pitch = align(DIV_ROUND_UP(tmpl->width0, 8), 64);
939 unsigned lrz_height = DIV_ROUND_UP(tmpl->height0, 8);
940
941 /* LRZ buffer is super-sampled: */
942 switch (prsc->nr_samples) {
943 case 4:
944 lrz_pitch *= 2;
945 case 2:
946 lrz_height *= 2;
947 }
948
949 unsigned size = lrz_pitch * lrz_height * 2;
950
951 size += 0x1000; /* for GRAS_LRZ_FAST_CLEAR_BUFFER */
952
953 rsc->lrz_height = lrz_height;
954 rsc->lrz_width = lrz_pitch;
955 rsc->lrz_pitch = lrz_pitch;
956 rsc->lrz = fd_bo_new(screen->dev, size, flags, "lrz");
957 }
958
959 size = screen->setup_slices(rsc);
960
961 if (allow_ubwc && screen->fill_ubwc_buffer_sizes && rsc->tile_mode)
962 size += screen->fill_ubwc_buffer_sizes(rsc);
963
964 /* special case for hw-query buffer, which we need to allocate before we
965 * know the size:
966 */
967 if (size == 0) {
968 /* note, semi-intention == instead of & */
969 debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER);
970 return prsc;
971 }
972
973 if (rsc->layer_first) {
974 rsc->layer_size = align(size, 4096);
975 size = rsc->layer_size * prsc->array_size;
976 }
977
978 realloc_bo(rsc, size);
979 if (!rsc->bo)
980 goto fail;
981
982 return prsc;
983 fail:
984 fd_resource_destroy(pscreen, prsc);
985 return NULL;
986 }
987
988 static struct pipe_resource *
989 fd_resource_create(struct pipe_screen *pscreen,
990 const struct pipe_resource *tmpl)
991 {
992 const uint64_t mod = DRM_FORMAT_MOD_INVALID;
993 return fd_resource_create_with_modifiers(pscreen, tmpl, &mod, 1);
994 }
995
996 static bool
997 is_supported_modifier(struct pipe_screen *pscreen, enum pipe_format pfmt,
998 uint64_t mod)
999 {
1000 int count;
1001
1002 /* Get the count of supported modifiers: */
1003 pscreen->query_dmabuf_modifiers(pscreen, pfmt, 0, NULL, NULL, &count);
1004
1005 /* Get the supported modifiers: */
1006 uint64_t modifiers[count];
1007 pscreen->query_dmabuf_modifiers(pscreen, pfmt, 0, modifiers, NULL, &count);
1008
1009 for (int i = 0; i < count; i++)
1010 if (modifiers[i] == mod)
1011 return true;
1012
1013 return false;
1014 }
1015
1016 /**
1017 * Create a texture from a winsys_handle. The handle is often created in
1018 * another process by first creating a pipe texture and then calling
1019 * resource_get_handle.
1020 */
1021 static struct pipe_resource *
1022 fd_resource_from_handle(struct pipe_screen *pscreen,
1023 const struct pipe_resource *tmpl,
1024 struct winsys_handle *handle, unsigned usage)
1025 {
1026 struct fd_screen *screen = fd_screen(pscreen);
1027 struct fd_resource *rsc = CALLOC_STRUCT(fd_resource);
1028 struct fd_resource_slice *slice = &rsc->slices[0];
1029 struct pipe_resource *prsc = &rsc->base;
1030 uint32_t pitchalign = fd_screen(pscreen)->gmem_alignw;
1031
1032 DBG("target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, "
1033 "nr_samples=%u, usage=%u, bind=%x, flags=%x",
1034 tmpl->target, util_format_name(tmpl->format),
1035 tmpl->width0, tmpl->height0, tmpl->depth0,
1036 tmpl->array_size, tmpl->last_level, tmpl->nr_samples,
1037 tmpl->usage, tmpl->bind, tmpl->flags);
1038
1039 if (!rsc)
1040 return NULL;
1041
1042 *prsc = *tmpl;
1043
1044 pipe_reference_init(&prsc->reference, 1);
1045
1046 prsc->screen = pscreen;
1047
1048 util_range_init(&rsc->valid_buffer_range);
1049
1050 rsc->bo = fd_screen_bo_from_handle(pscreen, handle);
1051 if (!rsc->bo)
1052 goto fail;
1053
1054 rsc->internal_format = tmpl->format;
1055 rsc->cpp = util_format_get_blocksize(tmpl->format);
1056 rsc->cpp *= fd_resource_nr_samples(prsc);
1057 slice->pitch = handle->stride / rsc->cpp;
1058 slice->offset = handle->offset;
1059 slice->size0 = handle->stride * prsc->height0;
1060
1061 if ((slice->pitch < align(prsc->width0, pitchalign)) ||
1062 (slice->pitch & (pitchalign - 1)))
1063 goto fail;
1064
1065 if (handle->modifier == DRM_FORMAT_MOD_QCOM_COMPRESSED) {
1066 if (!is_supported_modifier(pscreen, tmpl->format,
1067 DRM_FORMAT_MOD_QCOM_COMPRESSED)) {
1068 DBG("bad modifier: %lx", handle->modifier);
1069 goto fail;
1070 }
1071 debug_assert(screen->fill_ubwc_buffer_sizes);
1072 screen->fill_ubwc_buffer_sizes(rsc);
1073 } else if (handle->modifier &&
1074 (handle->modifier != DRM_FORMAT_MOD_INVALID)) {
1075 goto fail;
1076 }
1077
1078 assert(rsc->cpp);
1079
1080 if (screen->ro) {
1081 rsc->scanout =
1082 renderonly_create_gpu_import_for_resource(prsc, screen->ro, NULL);
1083 /* failure is expected in some cases.. */
1084 }
1085
1086 return prsc;
1087
1088 fail:
1089 fd_resource_destroy(pscreen, prsc);
1090 return NULL;
1091 }
1092
1093 bool
1094 fd_render_condition_check(struct pipe_context *pctx)
1095 {
1096 struct fd_context *ctx = fd_context(pctx);
1097
1098 if (!ctx->cond_query)
1099 return true;
1100
1101 union pipe_query_result res = { 0 };
1102 bool wait =
1103 ctx->cond_mode != PIPE_RENDER_COND_NO_WAIT &&
1104 ctx->cond_mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
1105
1106 if (pctx->get_query_result(pctx, ctx->cond_query, wait, &res))
1107 return (bool)res.u64 != ctx->cond_cond;
1108
1109 return true;
1110 }
1111
1112 static void
1113 fd_invalidate_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
1114 {
1115 struct fd_context *ctx = fd_context(pctx);
1116 struct fd_resource *rsc = fd_resource(prsc);
1117
1118 /*
1119 * TODO I guess we could track that the resource is invalidated and
1120 * use that as a hint to realloc rather than stall in _transfer_map(),
1121 * even in the non-DISCARD_WHOLE_RESOURCE case?
1122 *
1123 * Note: we set dirty bits to trigger invalidate logic fd_draw_vbo
1124 */
1125
1126 if (rsc->write_batch) {
1127 struct fd_batch *batch = rsc->write_batch;
1128 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1129
1130 if (pfb->zsbuf && pfb->zsbuf->texture == prsc) {
1131 batch->resolve &= ~(FD_BUFFER_DEPTH | FD_BUFFER_STENCIL);
1132 ctx->dirty |= FD_DIRTY_ZSA;
1133 }
1134
1135 for (unsigned i = 0; i < pfb->nr_cbufs; i++) {
1136 if (pfb->cbufs[i] && pfb->cbufs[i]->texture == prsc) {
1137 batch->resolve &= ~(PIPE_CLEAR_COLOR0 << i);
1138 ctx->dirty |= FD_DIRTY_FRAMEBUFFER;
1139 }
1140 }
1141 }
1142
1143 rsc->valid = false;
1144 }
1145
1146 static enum pipe_format
1147 fd_resource_get_internal_format(struct pipe_resource *prsc)
1148 {
1149 return fd_resource(prsc)->internal_format;
1150 }
1151
1152 static void
1153 fd_resource_set_stencil(struct pipe_resource *prsc,
1154 struct pipe_resource *stencil)
1155 {
1156 fd_resource(prsc)->stencil = fd_resource(stencil);
1157 }
1158
1159 static struct pipe_resource *
1160 fd_resource_get_stencil(struct pipe_resource *prsc)
1161 {
1162 struct fd_resource *rsc = fd_resource(prsc);
1163 if (rsc->stencil)
1164 return &rsc->stencil->base;
1165 return NULL;
1166 }
1167
1168 static const struct u_transfer_vtbl transfer_vtbl = {
1169 .resource_create = fd_resource_create,
1170 .resource_destroy = fd_resource_destroy,
1171 .transfer_map = fd_resource_transfer_map,
1172 .transfer_flush_region = fd_resource_transfer_flush_region,
1173 .transfer_unmap = fd_resource_transfer_unmap,
1174 .get_internal_format = fd_resource_get_internal_format,
1175 .set_stencil = fd_resource_set_stencil,
1176 .get_stencil = fd_resource_get_stencil,
1177 };
1178
1179 void
1180 fd_resource_screen_init(struct pipe_screen *pscreen)
1181 {
1182 struct fd_screen *screen = fd_screen(pscreen);
1183 bool fake_rgtc = screen->gpu_id < 400;
1184
1185 pscreen->resource_create = u_transfer_helper_resource_create;
1186 /* NOTE: u_transfer_helper does not yet support the _with_modifiers()
1187 * variant:
1188 */
1189 pscreen->resource_create_with_modifiers = fd_resource_create_with_modifiers;
1190 pscreen->resource_from_handle = fd_resource_from_handle;
1191 pscreen->resource_get_handle = fd_resource_get_handle;
1192 pscreen->resource_destroy = u_transfer_helper_resource_destroy;
1193
1194 pscreen->transfer_helper = u_transfer_helper_create(&transfer_vtbl,
1195 true, false, fake_rgtc, true);
1196
1197 if (!screen->setup_slices)
1198 screen->setup_slices = fd_setup_slices;
1199 }
1200
1201 static void
1202 fd_get_sample_position(struct pipe_context *context,
1203 unsigned sample_count, unsigned sample_index,
1204 float *pos_out)
1205 {
1206 /* The following is copied from nouveau/nv50 except for position
1207 * values, which are taken from blob driver */
1208 static const uint8_t pos1[1][2] = { { 0x8, 0x8 } };
1209 static const uint8_t pos2[2][2] = {
1210 { 0xc, 0xc }, { 0x4, 0x4 } };
1211 static const uint8_t pos4[4][2] = {
1212 { 0x6, 0x2 }, { 0xe, 0x6 },
1213 { 0x2, 0xa }, { 0xa, 0xe } };
1214 /* TODO needs to be verified on supported hw */
1215 static const uint8_t pos8[8][2] = {
1216 { 0x9, 0x5 }, { 0x7, 0xb },
1217 { 0xd, 0x9 }, { 0x5, 0x3 },
1218 { 0x3, 0xd }, { 0x1, 0x7 },
1219 { 0xb, 0xf }, { 0xf, 0x1 } };
1220
1221 const uint8_t (*ptr)[2];
1222
1223 switch (sample_count) {
1224 case 1:
1225 ptr = pos1;
1226 break;
1227 case 2:
1228 ptr = pos2;
1229 break;
1230 case 4:
1231 ptr = pos4;
1232 break;
1233 case 8:
1234 ptr = pos8;
1235 break;
1236 default:
1237 assert(0);
1238 return;
1239 }
1240
1241 pos_out[0] = ptr[sample_index][0] / 16.0f;
1242 pos_out[1] = ptr[sample_index][1] / 16.0f;
1243 }
1244
1245 void
1246 fd_resource_context_init(struct pipe_context *pctx)
1247 {
1248 pctx->transfer_map = u_transfer_helper_transfer_map;
1249 pctx->transfer_flush_region = u_transfer_helper_transfer_flush_region;
1250 pctx->transfer_unmap = u_transfer_helper_transfer_unmap;
1251 pctx->buffer_subdata = u_default_buffer_subdata;
1252 pctx->texture_subdata = u_default_texture_subdata;
1253 pctx->create_surface = fd_create_surface;
1254 pctx->surface_destroy = fd_surface_destroy;
1255 pctx->resource_copy_region = fd_resource_copy_region;
1256 pctx->blit = fd_blit;
1257 pctx->flush_resource = fd_flush_resource;
1258 pctx->invalidate_resource = fd_invalidate_resource;
1259 pctx->get_sample_position = fd_get_sample_position;
1260 }