freedreno: use renderonly path for buffers allocated with modifiers
[mesa.git] / src / gallium / drivers / freedreno / freedreno_resource.c
1 /*
2 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <robclark@freedesktop.org>
25 */
26
27 #include "util/u_format.h"
28 #include "util/u_format_rgtc.h"
29 #include "util/u_format_zs.h"
30 #include "util/u_inlines.h"
31 #include "util/u_transfer.h"
32 #include "util/u_string.h"
33 #include "util/u_surface.h"
34 #include "util/set.h"
35
36 #include "freedreno_resource.h"
37 #include "freedreno_batch_cache.h"
38 #include "freedreno_blitter.h"
39 #include "freedreno_fence.h"
40 #include "freedreno_screen.h"
41 #include "freedreno_surface.h"
42 #include "freedreno_context.h"
43 #include "freedreno_query_hw.h"
44 #include "freedreno_util.h"
45
46 #include "drm-uapi/drm_fourcc.h"
47 #include <errno.h>
48
49 /* XXX this should go away, needed for 'struct winsys_handle' */
50 #include "state_tracker/drm_driver.h"
51
52 /**
53 * Go through the entire state and see if the resource is bound
54 * anywhere. If it is, mark the relevant state as dirty. This is
55 * called on realloc_bo to ensure the neccessary state is re-
56 * emitted so the GPU looks at the new backing bo.
57 */
58 static void
59 rebind_resource(struct fd_context *ctx, struct pipe_resource *prsc)
60 {
61 /* VBOs */
62 for (unsigned i = 0; i < ctx->vtx.vertexbuf.count && !(ctx->dirty & FD_DIRTY_VTXBUF); i++) {
63 if (ctx->vtx.vertexbuf.vb[i].buffer.resource == prsc)
64 ctx->dirty |= FD_DIRTY_VTXBUF;
65 }
66
67 /* per-shader-stage resources: */
68 for (unsigned stage = 0; stage < PIPE_SHADER_TYPES; stage++) {
69 /* Constbufs.. note that constbuf[0] is normal uniforms emitted in
70 * cmdstream rather than by pointer..
71 */
72 const unsigned num_ubos = util_last_bit(ctx->constbuf[stage].enabled_mask);
73 for (unsigned i = 1; i < num_ubos; i++) {
74 if (ctx->dirty_shader[stage] & FD_DIRTY_SHADER_CONST)
75 break;
76 if (ctx->constbuf[stage].cb[i].buffer == prsc)
77 ctx->dirty_shader[stage] |= FD_DIRTY_SHADER_CONST;
78 }
79
80 /* Textures */
81 for (unsigned i = 0; i < ctx->tex[stage].num_textures; i++) {
82 if (ctx->dirty_shader[stage] & FD_DIRTY_SHADER_TEX)
83 break;
84 if (ctx->tex[stage].textures[i] && (ctx->tex[stage].textures[i]->texture == prsc))
85 ctx->dirty_shader[stage] |= FD_DIRTY_SHADER_TEX;
86 }
87
88 /* SSBOs */
89 const unsigned num_ssbos = util_last_bit(ctx->shaderbuf[stage].enabled_mask);
90 for (unsigned i = 0; i < num_ssbos; i++) {
91 if (ctx->dirty_shader[stage] & FD_DIRTY_SHADER_SSBO)
92 break;
93 if (ctx->shaderbuf[stage].sb[i].buffer == prsc)
94 ctx->dirty_shader[stage] |= FD_DIRTY_SHADER_SSBO;
95 }
96 }
97 }
98
99 static void
100 realloc_bo(struct fd_resource *rsc, uint32_t size)
101 {
102 struct pipe_resource *prsc = &rsc->base;
103 struct fd_screen *screen = fd_screen(rsc->base.screen);
104 uint32_t flags = DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
105 DRM_FREEDRENO_GEM_TYPE_KMEM |
106 COND(prsc->bind & PIPE_BIND_SCANOUT, DRM_FREEDRENO_GEM_SCANOUT);
107 /* TODO other flags? */
108
109 /* if we start using things other than write-combine,
110 * be sure to check for PIPE_RESOURCE_FLAG_MAP_COHERENT
111 */
112
113 if (rsc->bo)
114 fd_bo_del(rsc->bo);
115
116 rsc->bo = fd_bo_new(screen->dev, size, flags, "%ux%ux%u@%u:%x",
117 prsc->width0, prsc->height0, prsc->depth0, rsc->cpp, prsc->bind);
118 rsc->seqno = p_atomic_inc_return(&screen->rsc_seqno);
119 util_range_set_empty(&rsc->valid_buffer_range);
120 fd_bc_invalidate_resource(rsc, true);
121 }
122
123 static void
124 do_blit(struct fd_context *ctx, const struct pipe_blit_info *blit, bool fallback)
125 {
126 struct pipe_context *pctx = &ctx->base;
127
128 /* TODO size threshold too?? */
129 if (!fallback) {
130 /* do blit on gpu: */
131 pctx->blit(pctx, blit);
132 } else {
133 /* do blit on cpu: */
134 util_resource_copy_region(pctx,
135 blit->dst.resource, blit->dst.level, blit->dst.box.x,
136 blit->dst.box.y, blit->dst.box.z,
137 blit->src.resource, blit->src.level, &blit->src.box);
138 }
139 }
140
141 static bool
142 fd_try_shadow_resource(struct fd_context *ctx, struct fd_resource *rsc,
143 unsigned level, const struct pipe_box *box)
144 {
145 struct pipe_context *pctx = &ctx->base;
146 struct pipe_resource *prsc = &rsc->base;
147 bool fallback = false;
148
149 if (prsc->next)
150 return false;
151
152 /* TODO: somehow munge dimensions and format to copy unsupported
153 * render target format to something that is supported?
154 */
155 if (!pctx->screen->is_format_supported(pctx->screen,
156 prsc->format, prsc->target, prsc->nr_samples,
157 prsc->nr_storage_samples,
158 PIPE_BIND_RENDER_TARGET))
159 fallback = true;
160
161 /* do shadowing back-blits on the cpu for buffers: */
162 if (prsc->target == PIPE_BUFFER)
163 fallback = true;
164
165 bool whole_level = util_texrange_covers_whole_level(prsc, level,
166 box->x, box->y, box->z, box->width, box->height, box->depth);
167
168 /* TODO need to be more clever about current level */
169 if ((prsc->target >= PIPE_TEXTURE_2D) && !whole_level)
170 return false;
171
172 struct pipe_resource *pshadow =
173 pctx->screen->resource_create(pctx->screen, prsc);
174
175 if (!pshadow)
176 return false;
177
178 assert(!ctx->in_shadow);
179 ctx->in_shadow = true;
180
181 /* get rid of any references that batch-cache might have to us (which
182 * should empty/destroy rsc->batches hashset)
183 */
184 fd_bc_invalidate_resource(rsc, false);
185
186 mtx_lock(&ctx->screen->lock);
187
188 /* Swap the backing bo's, so shadow becomes the old buffer,
189 * blit from shadow to new buffer. From here on out, we
190 * cannot fail.
191 *
192 * Note that we need to do it in this order, otherwise if
193 * we go down cpu blit path, the recursive transfer_map()
194 * sees the wrong status..
195 */
196 struct fd_resource *shadow = fd_resource(pshadow);
197
198 DBG("shadow: %p (%d) -> %p (%d)\n", rsc, rsc->base.reference.count,
199 shadow, shadow->base.reference.count);
200
201 /* TODO valid_buffer_range?? */
202 swap(rsc->bo, shadow->bo);
203 swap(rsc->write_batch, shadow->write_batch);
204 rsc->seqno = p_atomic_inc_return(&ctx->screen->rsc_seqno);
205
206 /* at this point, the newly created shadow buffer is not referenced
207 * by any batches, but the existing rsc (probably) is. We need to
208 * transfer those references over:
209 */
210 debug_assert(shadow->batch_mask == 0);
211 struct fd_batch *batch;
212 foreach_batch(batch, &ctx->screen->batch_cache, rsc->batch_mask) {
213 struct set_entry *entry = _mesa_set_search(batch->resources, rsc);
214 _mesa_set_remove(batch->resources, entry);
215 _mesa_set_add(batch->resources, shadow);
216 }
217 swap(rsc->batch_mask, shadow->batch_mask);
218
219 mtx_unlock(&ctx->screen->lock);
220
221 struct pipe_blit_info blit = {};
222 blit.dst.resource = prsc;
223 blit.dst.format = prsc->format;
224 blit.src.resource = pshadow;
225 blit.src.format = pshadow->format;
226 blit.mask = util_format_get_mask(prsc->format);
227 blit.filter = PIPE_TEX_FILTER_NEAREST;
228
229 #define set_box(field, val) do { \
230 blit.dst.field = (val); \
231 blit.src.field = (val); \
232 } while (0)
233
234 /* blit the other levels in their entirety: */
235 for (unsigned l = 0; l <= prsc->last_level; l++) {
236 if (l == level)
237 continue;
238
239 /* just blit whole level: */
240 set_box(level, l);
241 set_box(box.width, u_minify(prsc->width0, l));
242 set_box(box.height, u_minify(prsc->height0, l));
243 set_box(box.depth, u_minify(prsc->depth0, l));
244
245 do_blit(ctx, &blit, fallback);
246 }
247
248 /* deal w/ current level specially, since we might need to split
249 * it up into a couple blits:
250 */
251 if (!whole_level) {
252 set_box(level, level);
253
254 switch (prsc->target) {
255 case PIPE_BUFFER:
256 case PIPE_TEXTURE_1D:
257 set_box(box.y, 0);
258 set_box(box.z, 0);
259 set_box(box.height, 1);
260 set_box(box.depth, 1);
261
262 if (box->x > 0) {
263 set_box(box.x, 0);
264 set_box(box.width, box->x);
265
266 do_blit(ctx, &blit, fallback);
267 }
268 if ((box->x + box->width) < u_minify(prsc->width0, level)) {
269 set_box(box.x, box->x + box->width);
270 set_box(box.width, u_minify(prsc->width0, level) - (box->x + box->width));
271
272 do_blit(ctx, &blit, fallback);
273 }
274 break;
275 case PIPE_TEXTURE_2D:
276 /* TODO */
277 default:
278 unreachable("TODO");
279 }
280 }
281
282 ctx->in_shadow = false;
283
284 pipe_resource_reference(&pshadow, NULL);
285
286 return true;
287 }
288
289 static struct fd_resource *
290 fd_alloc_staging(struct fd_context *ctx, struct fd_resource *rsc,
291 unsigned level, const struct pipe_box *box)
292 {
293 struct pipe_context *pctx = &ctx->base;
294 struct pipe_resource tmpl = rsc->base;
295
296 tmpl.width0 = box->width;
297 tmpl.height0 = box->height;
298 /* for array textures, box->depth is the array_size, otherwise
299 * for 3d textures, it is the depth:
300 */
301 if (tmpl.array_size > 1) {
302 tmpl.array_size = box->depth;
303 tmpl.depth0 = 1;
304 } else {
305 tmpl.array_size = 1;
306 tmpl.depth0 = box->depth;
307 }
308 tmpl.last_level = 0;
309 tmpl.bind |= PIPE_BIND_LINEAR;
310
311 struct pipe_resource *pstaging =
312 pctx->screen->resource_create(pctx->screen, &tmpl);
313 if (!pstaging)
314 return NULL;
315
316 return fd_resource(pstaging);
317 }
318
319 static void
320 fd_blit_from_staging(struct fd_context *ctx, struct fd_transfer *trans)
321 {
322 struct pipe_resource *dst = trans->base.resource;
323 struct pipe_blit_info blit = {};
324
325 blit.dst.resource = dst;
326 blit.dst.format = dst->format;
327 blit.dst.level = trans->base.level;
328 blit.dst.box = trans->base.box;
329 blit.src.resource = trans->staging_prsc;
330 blit.src.format = trans->staging_prsc->format;
331 blit.src.level = 0;
332 blit.src.box = trans->staging_box;
333 blit.mask = util_format_get_mask(trans->staging_prsc->format);
334 blit.filter = PIPE_TEX_FILTER_NEAREST;
335
336 do_blit(ctx, &blit, false);
337 }
338
339 static void
340 fd_blit_to_staging(struct fd_context *ctx, struct fd_transfer *trans)
341 {
342 struct pipe_resource *src = trans->base.resource;
343 struct pipe_blit_info blit = {};
344
345 blit.src.resource = src;
346 blit.src.format = src->format;
347 blit.src.level = trans->base.level;
348 blit.src.box = trans->base.box;
349 blit.dst.resource = trans->staging_prsc;
350 blit.dst.format = trans->staging_prsc->format;
351 blit.dst.level = 0;
352 blit.dst.box = trans->staging_box;
353 blit.mask = util_format_get_mask(trans->staging_prsc->format);
354 blit.filter = PIPE_TEX_FILTER_NEAREST;
355
356 do_blit(ctx, &blit, false);
357 }
358
359 static void fd_resource_transfer_flush_region(struct pipe_context *pctx,
360 struct pipe_transfer *ptrans,
361 const struct pipe_box *box)
362 {
363 struct fd_resource *rsc = fd_resource(ptrans->resource);
364
365 if (ptrans->resource->target == PIPE_BUFFER)
366 util_range_add(&rsc->valid_buffer_range,
367 ptrans->box.x + box->x,
368 ptrans->box.x + box->x + box->width);
369 }
370
371 static void
372 flush_resource(struct fd_context *ctx, struct fd_resource *rsc, unsigned usage)
373 {
374 struct fd_batch *write_batch = NULL;
375
376 mtx_lock(&ctx->screen->lock);
377 fd_batch_reference_locked(&write_batch, rsc->write_batch);
378 mtx_unlock(&ctx->screen->lock);
379
380 if (usage & PIPE_TRANSFER_WRITE) {
381 struct fd_batch *batch, *batches[32] = {};
382 uint32_t batch_mask;
383
384 /* This is a bit awkward, probably a fd_batch_flush_locked()
385 * would make things simpler.. but we need to hold the lock
386 * to iterate the batches which reference this resource. So
387 * we must first grab references under a lock, then flush.
388 */
389 mtx_lock(&ctx->screen->lock);
390 batch_mask = rsc->batch_mask;
391 foreach_batch(batch, &ctx->screen->batch_cache, batch_mask)
392 fd_batch_reference_locked(&batches[batch->idx], batch);
393 mtx_unlock(&ctx->screen->lock);
394
395 foreach_batch(batch, &ctx->screen->batch_cache, batch_mask)
396 fd_batch_flush(batch, false, false);
397
398 foreach_batch(batch, &ctx->screen->batch_cache, batch_mask) {
399 fd_batch_sync(batch);
400 fd_batch_reference(&batches[batch->idx], NULL);
401 }
402 assert(rsc->batch_mask == 0);
403 } else if (write_batch) {
404 fd_batch_flush(write_batch, true, false);
405 }
406
407 fd_batch_reference(&write_batch, NULL);
408
409 assert(!rsc->write_batch);
410 }
411
412 static void
413 fd_flush_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
414 {
415 flush_resource(fd_context(pctx), fd_resource(prsc), PIPE_TRANSFER_READ);
416 }
417
418 static void
419 fd_resource_transfer_unmap(struct pipe_context *pctx,
420 struct pipe_transfer *ptrans)
421 {
422 struct fd_context *ctx = fd_context(pctx);
423 struct fd_resource *rsc = fd_resource(ptrans->resource);
424 struct fd_transfer *trans = fd_transfer(ptrans);
425
426 if (trans->staging_prsc) {
427 if (ptrans->usage & PIPE_TRANSFER_WRITE)
428 fd_blit_from_staging(ctx, trans);
429 pipe_resource_reference(&trans->staging_prsc, NULL);
430 }
431
432 if (!(ptrans->usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
433 fd_bo_cpu_fini(rsc->bo);
434 }
435
436 util_range_add(&rsc->valid_buffer_range,
437 ptrans->box.x,
438 ptrans->box.x + ptrans->box.width);
439
440 pipe_resource_reference(&ptrans->resource, NULL);
441 slab_free(&ctx->transfer_pool, ptrans);
442 }
443
444 static void *
445 fd_resource_transfer_map(struct pipe_context *pctx,
446 struct pipe_resource *prsc,
447 unsigned level, unsigned usage,
448 const struct pipe_box *box,
449 struct pipe_transfer **pptrans)
450 {
451 struct fd_context *ctx = fd_context(pctx);
452 struct fd_resource *rsc = fd_resource(prsc);
453 struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
454 struct fd_transfer *trans;
455 struct pipe_transfer *ptrans;
456 enum pipe_format format = prsc->format;
457 uint32_t op = 0;
458 uint32_t offset;
459 char *buf;
460 int ret = 0;
461
462 DBG("prsc=%p, level=%u, usage=%x, box=%dx%d+%d,%d", prsc, level, usage,
463 box->width, box->height, box->x, box->y);
464
465 ptrans = slab_alloc(&ctx->transfer_pool);
466 if (!ptrans)
467 return NULL;
468
469 /* slab_alloc_st() doesn't zero: */
470 trans = fd_transfer(ptrans);
471 memset(trans, 0, sizeof(*trans));
472
473 pipe_resource_reference(&ptrans->resource, prsc);
474 ptrans->level = level;
475 ptrans->usage = usage;
476 ptrans->box = *box;
477 ptrans->stride = util_format_get_nblocksx(format, slice->pitch) * rsc->cpp;
478 ptrans->layer_stride = rsc->layer_first ? rsc->layer_size : slice->size0;
479
480 /* we always need a staging texture for tiled buffers:
481 *
482 * TODO we might sometimes want to *also* shadow the resource to avoid
483 * splitting a batch.. for ex, mid-frame texture uploads to a tiled
484 * texture.
485 */
486 if (rsc->tile_mode) {
487 struct fd_resource *staging_rsc;
488
489 staging_rsc = fd_alloc_staging(ctx, rsc, level, box);
490 if (staging_rsc) {
491 // TODO for PIPE_TRANSFER_READ, need to do untiling blit..
492 trans->staging_prsc = &staging_rsc->base;
493 trans->base.stride = util_format_get_nblocksx(format,
494 staging_rsc->slices[0].pitch) * staging_rsc->cpp;
495 trans->base.layer_stride = staging_rsc->layer_first ?
496 staging_rsc->layer_size : staging_rsc->slices[0].size0;
497 trans->staging_box = *box;
498 trans->staging_box.x = 0;
499 trans->staging_box.y = 0;
500 trans->staging_box.z = 0;
501
502 if (usage & PIPE_TRANSFER_READ) {
503 fd_blit_to_staging(ctx, trans);
504
505 struct fd_batch *batch = NULL;
506
507 fd_context_lock(ctx);
508 fd_batch_reference_locked(&batch, staging_rsc->write_batch);
509 fd_context_unlock(ctx);
510
511 /* we can't fd_bo_cpu_prep() until the blit to staging
512 * is submitted to kernel.. in that case write_batch
513 * wouldn't be NULL yet:
514 */
515 if (batch) {
516 fd_batch_sync(batch);
517 fd_batch_reference(&batch, NULL);
518 }
519
520 fd_bo_cpu_prep(staging_rsc->bo, ctx->pipe,
521 DRM_FREEDRENO_PREP_READ);
522 }
523
524 buf = fd_bo_map(staging_rsc->bo);
525 offset = 0;
526
527 *pptrans = ptrans;
528
529 ctx->stats.staging_uploads++;
530
531 return buf;
532 }
533 }
534
535 if (ctx->in_shadow && !(usage & PIPE_TRANSFER_READ))
536 usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
537
538 if (usage & PIPE_TRANSFER_READ)
539 op |= DRM_FREEDRENO_PREP_READ;
540
541 if (usage & PIPE_TRANSFER_WRITE)
542 op |= DRM_FREEDRENO_PREP_WRITE;
543
544 if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
545 realloc_bo(rsc, fd_bo_size(rsc->bo));
546 rebind_resource(ctx, prsc);
547 } else if ((usage & PIPE_TRANSFER_WRITE) &&
548 prsc->target == PIPE_BUFFER &&
549 !util_ranges_intersect(&rsc->valid_buffer_range,
550 box->x, box->x + box->width)) {
551 /* We are trying to write to a previously uninitialized range. No need
552 * to wait.
553 */
554 } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
555 struct fd_batch *write_batch = NULL;
556
557 /* hold a reference, so it doesn't disappear under us: */
558 fd_context_lock(ctx);
559 fd_batch_reference_locked(&write_batch, rsc->write_batch);
560 fd_context_unlock(ctx);
561
562 if ((usage & PIPE_TRANSFER_WRITE) && write_batch &&
563 write_batch->back_blit) {
564 /* if only thing pending is a back-blit, we can discard it: */
565 fd_batch_reset(write_batch);
566 }
567
568 /* If the GPU is writing to the resource, or if it is reading from the
569 * resource and we're trying to write to it, flush the renders.
570 */
571 bool needs_flush = pending(rsc, !!(usage & PIPE_TRANSFER_WRITE));
572 bool busy = needs_flush || (0 != fd_bo_cpu_prep(rsc->bo,
573 ctx->pipe, op | DRM_FREEDRENO_PREP_NOSYNC));
574
575 /* if we need to flush/stall, see if we can make a shadow buffer
576 * to avoid this:
577 *
578 * TODO we could go down this path !reorder && !busy_for_read
579 * ie. we only *don't* want to go down this path if the blit
580 * will trigger a flush!
581 */
582 if (ctx->screen->reorder && busy && !(usage & PIPE_TRANSFER_READ) &&
583 (usage & PIPE_TRANSFER_DISCARD_RANGE)) {
584 /* try shadowing only if it avoids a flush, otherwise staging would
585 * be better:
586 */
587 if (needs_flush && fd_try_shadow_resource(ctx, rsc, level, box)) {
588 needs_flush = busy = false;
589 rebind_resource(ctx, prsc);
590 ctx->stats.shadow_uploads++;
591 } else {
592 struct fd_resource *staging_rsc;
593
594 if (needs_flush) {
595 flush_resource(ctx, rsc, usage);
596 needs_flush = false;
597 }
598
599 /* in this case, we don't need to shadow the whole resource,
600 * since any draw that references the previous contents has
601 * already had rendering flushed for all tiles. So we can
602 * use a staging buffer to do the upload.
603 */
604 staging_rsc = fd_alloc_staging(ctx, rsc, level, box);
605 if (staging_rsc) {
606 trans->staging_prsc = &staging_rsc->base;
607 trans->base.stride = util_format_get_nblocksx(format,
608 staging_rsc->slices[0].pitch) * staging_rsc->cpp;
609 trans->base.layer_stride = staging_rsc->layer_first ?
610 staging_rsc->layer_size : staging_rsc->slices[0].size0;
611 trans->staging_box = *box;
612 trans->staging_box.x = 0;
613 trans->staging_box.y = 0;
614 trans->staging_box.z = 0;
615 buf = fd_bo_map(staging_rsc->bo);
616 offset = 0;
617
618 *pptrans = ptrans;
619
620 fd_batch_reference(&write_batch, NULL);
621
622 ctx->stats.staging_uploads++;
623
624 return buf;
625 }
626 }
627 }
628
629 if (needs_flush) {
630 flush_resource(ctx, rsc, usage);
631 needs_flush = false;
632 }
633
634 fd_batch_reference(&write_batch, NULL);
635
636 /* The GPU keeps track of how the various bo's are being used, and
637 * will wait if necessary for the proper operation to have
638 * completed.
639 */
640 if (busy) {
641 ret = fd_bo_cpu_prep(rsc->bo, ctx->pipe, op);
642 if (ret)
643 goto fail;
644 }
645 }
646
647 buf = fd_bo_map(rsc->bo);
648 offset =
649 box->y / util_format_get_blockheight(format) * ptrans->stride +
650 box->x / util_format_get_blockwidth(format) * rsc->cpp +
651 fd_resource_offset(rsc, level, box->z);
652
653 if (usage & PIPE_TRANSFER_WRITE)
654 rsc->valid = true;
655
656 *pptrans = ptrans;
657
658 return buf + offset;
659
660 fail:
661 fd_resource_transfer_unmap(pctx, ptrans);
662 return NULL;
663 }
664
665 static void
666 fd_resource_destroy(struct pipe_screen *pscreen,
667 struct pipe_resource *prsc)
668 {
669 struct fd_resource *rsc = fd_resource(prsc);
670 fd_bc_invalidate_resource(rsc, true);
671 if (rsc->bo)
672 fd_bo_del(rsc->bo);
673 if (rsc->scanout)
674 renderonly_scanout_destroy(rsc->scanout, fd_screen(pscreen)->ro);
675
676 util_range_destroy(&rsc->valid_buffer_range);
677 FREE(rsc);
678 }
679
680 static uint64_t
681 fd_resource_modifier(struct fd_resource *rsc)
682 {
683 if (!rsc->tile_mode)
684 return DRM_FORMAT_MOD_LINEAR;
685
686 /* TODO invent a modifier for tiled but not UBWC buffers: */
687 return DRM_FORMAT_MOD_INVALID;
688 }
689
690 static boolean
691 fd_resource_get_handle(struct pipe_screen *pscreen,
692 struct pipe_context *pctx,
693 struct pipe_resource *prsc,
694 struct winsys_handle *handle,
695 unsigned usage)
696 {
697 struct fd_resource *rsc = fd_resource(prsc);
698
699 handle->modifier = fd_resource_modifier(rsc);
700
701 return fd_screen_bo_get_handle(pscreen, rsc->bo, rsc->scanout,
702 rsc->slices[0].pitch * rsc->cpp, handle);
703 }
704
705 static uint32_t
706 setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format format)
707 {
708 struct pipe_resource *prsc = &rsc->base;
709 struct fd_screen *screen = fd_screen(prsc->screen);
710 enum util_format_layout layout = util_format_description(format)->layout;
711 uint32_t pitchalign = screen->gmem_alignw;
712 uint32_t level, size = 0;
713 uint32_t width = prsc->width0;
714 uint32_t height = prsc->height0;
715 uint32_t depth = prsc->depth0;
716 /* in layer_first layout, the level (slice) contains just one
717 * layer (since in fact the layer contains the slices)
718 */
719 uint32_t layers_in_level = rsc->layer_first ? 1 : prsc->array_size;
720
721 for (level = 0; level <= prsc->last_level; level++) {
722 struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
723 uint32_t blocks;
724
725 if (layout == UTIL_FORMAT_LAYOUT_ASTC)
726 slice->pitch = width =
727 util_align_npot(width, pitchalign * util_format_get_blockwidth(format));
728 else
729 slice->pitch = width = align(width, pitchalign);
730 slice->offset = size;
731 blocks = util_format_get_nblocks(format, width, height);
732 /* 1d array and 2d array textures must all have the same layer size
733 * for each miplevel on a3xx. 3d textures can have different layer
734 * sizes for high levels, but the hw auto-sizer is buggy (or at least
735 * different than what this code does), so as soon as the layer size
736 * range gets into range, we stop reducing it.
737 */
738 if (prsc->target == PIPE_TEXTURE_3D && (
739 level == 1 ||
740 (level > 1 && rsc->slices[level - 1].size0 > 0xf000)))
741 slice->size0 = align(blocks * rsc->cpp, alignment);
742 else if (level == 0 || rsc->layer_first || alignment == 1)
743 slice->size0 = align(blocks * rsc->cpp, alignment);
744 else
745 slice->size0 = rsc->slices[level - 1].size0;
746
747 size += slice->size0 * depth * layers_in_level;
748
749 width = u_minify(width, 1);
750 height = u_minify(height, 1);
751 depth = u_minify(depth, 1);
752 }
753
754 return size;
755 }
756
757 static uint32_t
758 slice_alignment(enum pipe_texture_target target)
759 {
760 /* on a3xx, 2d array and 3d textures seem to want their
761 * layers aligned to page boundaries:
762 */
763 switch (target) {
764 case PIPE_TEXTURE_3D:
765 case PIPE_TEXTURE_1D_ARRAY:
766 case PIPE_TEXTURE_2D_ARRAY:
767 return 4096;
768 default:
769 return 1;
770 }
771 }
772
773 /* cross generation texture layout to plug in to screen->setup_slices()..
774 * replace with generation specific one as-needed.
775 *
776 * TODO for a4xx probably can extract out the a4xx specific logic int
777 * a small fd4_setup_slices() wrapper that sets up layer_first, and then
778 * calls this.
779 */
780 uint32_t
781 fd_setup_slices(struct fd_resource *rsc)
782 {
783 uint32_t alignment;
784
785 alignment = slice_alignment(rsc->base.target);
786
787 struct fd_screen *screen = fd_screen(rsc->base.screen);
788 if (is_a4xx(screen)) {
789 switch (rsc->base.target) {
790 case PIPE_TEXTURE_3D:
791 rsc->layer_first = false;
792 break;
793 default:
794 rsc->layer_first = true;
795 alignment = 1;
796 break;
797 }
798 }
799
800 return setup_slices(rsc, alignment, rsc->base.format);
801 }
802
803 /* special case to resize query buf after allocated.. */
804 void
805 fd_resource_resize(struct pipe_resource *prsc, uint32_t sz)
806 {
807 struct fd_resource *rsc = fd_resource(prsc);
808
809 debug_assert(prsc->width0 == 0);
810 debug_assert(prsc->target == PIPE_BUFFER);
811 debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER);
812
813 prsc->width0 = sz;
814 realloc_bo(rsc, fd_screen(prsc->screen)->setup_slices(rsc));
815 }
816
817 // TODO common helper?
818 static bool
819 has_depth(enum pipe_format format)
820 {
821 switch (format) {
822 case PIPE_FORMAT_Z16_UNORM:
823 case PIPE_FORMAT_Z32_UNORM:
824 case PIPE_FORMAT_Z32_FLOAT:
825 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
826 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
827 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
828 case PIPE_FORMAT_Z24X8_UNORM:
829 case PIPE_FORMAT_X8Z24_UNORM:
830 return true;
831 default:
832 return false;
833 }
834 }
835
836 static bool
837 find_modifier(uint64_t needle, const uint64_t *haystack, int count)
838 {
839 int i;
840
841 for (i = 0; i < count; i++) {
842 if (haystack[i] == needle)
843 return true;
844 }
845
846 return false;
847 }
848
849 /**
850 * Create a new texture object, using the given template info.
851 */
852 static struct pipe_resource *
853 fd_resource_create_with_modifiers(struct pipe_screen *pscreen,
854 const struct pipe_resource *tmpl,
855 const uint64_t *modifiers, int count)
856 {
857 struct fd_screen *screen = fd_screen(pscreen);
858 struct fd_resource *rsc;
859 struct pipe_resource *prsc;
860 enum pipe_format format = tmpl->format;
861 uint32_t size;
862
863 /* when using kmsro, scanout buffers are allocated on the display device
864 * create_with_modifiers() doesn't give us usage flags, so we have to
865 * assume that all calls with modifiers are scanout-possible
866 */
867 if (screen->ro &&
868 ((tmpl->bind & PIPE_BIND_SCANOUT) ||
869 !(count == 1 && modifiers[0] == DRM_FORMAT_MOD_INVALID))) {
870 struct pipe_resource scanout_templat = *tmpl;
871 struct renderonly_scanout *scanout;
872 struct winsys_handle handle;
873
874 scanout = renderonly_scanout_for_resource(&scanout_templat,
875 screen->ro, &handle);
876 if (!scanout)
877 return NULL;
878
879 renderonly_scanout_destroy(scanout, screen->ro);
880
881 assert(handle.type == WINSYS_HANDLE_TYPE_FD);
882 rsc = fd_resource(pscreen->resource_from_handle(pscreen, tmpl,
883 &handle,
884 PIPE_HANDLE_USAGE_FRAMEBUFFER_WRITE));
885 close(handle.handle);
886 if (!rsc)
887 return NULL;
888
889 return &rsc->base;
890 }
891
892 rsc = CALLOC_STRUCT(fd_resource);
893 prsc = &rsc->base;
894
895 DBG("%p: target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, "
896 "nr_samples=%u, usage=%u, bind=%x, flags=%x", prsc,
897 tmpl->target, util_format_name(format),
898 tmpl->width0, tmpl->height0, tmpl->depth0,
899 tmpl->array_size, tmpl->last_level, tmpl->nr_samples,
900 tmpl->usage, tmpl->bind, tmpl->flags);
901
902 if (!rsc)
903 return NULL;
904
905 *prsc = *tmpl;
906
907 #define LINEAR \
908 (PIPE_BIND_SCANOUT | \
909 PIPE_BIND_LINEAR | \
910 PIPE_BIND_DISPLAY_TARGET)
911
912 bool linear = find_modifier(DRM_FORMAT_MOD_LINEAR, modifiers, count);
913 if (tmpl->bind & LINEAR)
914 linear = true;
915
916 /* Normally, for non-shared buffers, allow buffer compression if
917 * not shared, otherwise only allow if QCOM_COMPRESSED modifier
918 * is requested:
919 *
920 * TODO we should probably also limit tiled in a similar way,
921 * except we don't have a format modifier for tiled. (We probably
922 * should.)
923 */
924 bool allow_ubwc = find_modifier(DRM_FORMAT_MOD_INVALID, modifiers, count);
925 if (tmpl->bind & PIPE_BIND_SHARED)
926 allow_ubwc = find_modifier(DRM_FORMAT_MOD_QCOM_COMPRESSED, modifiers, count);
927
928 if (screen->tile_mode &&
929 (tmpl->target != PIPE_BUFFER) &&
930 !linear) {
931 rsc->tile_mode = screen->tile_mode(tmpl);
932 }
933
934 pipe_reference_init(&prsc->reference, 1);
935
936 prsc->screen = pscreen;
937
938 util_range_init(&rsc->valid_buffer_range);
939
940 rsc->internal_format = format;
941 rsc->cpp = util_format_get_blocksize(format);
942 rsc->cpp *= fd_resource_nr_samples(prsc);
943
944 assert(rsc->cpp);
945
946 // XXX probably need some extra work if we hit rsc shadowing path w/ lrz..
947 if ((is_a5xx(screen) || is_a6xx(screen)) &&
948 (fd_mesa_debug & FD_DBG_LRZ) && has_depth(format)) {
949 const uint32_t flags = DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
950 DRM_FREEDRENO_GEM_TYPE_KMEM; /* TODO */
951 unsigned lrz_pitch = align(DIV_ROUND_UP(tmpl->width0, 8), 64);
952 unsigned lrz_height = DIV_ROUND_UP(tmpl->height0, 8);
953
954 /* LRZ buffer is super-sampled: */
955 switch (prsc->nr_samples) {
956 case 4:
957 lrz_pitch *= 2;
958 case 2:
959 lrz_height *= 2;
960 }
961
962 unsigned size = lrz_pitch * lrz_height * 2;
963
964 size += 0x1000; /* for GRAS_LRZ_FAST_CLEAR_BUFFER */
965
966 rsc->lrz_height = lrz_height;
967 rsc->lrz_width = lrz_pitch;
968 rsc->lrz_pitch = lrz_pitch;
969 rsc->lrz = fd_bo_new(screen->dev, size, flags, "lrz");
970 }
971
972 size = screen->setup_slices(rsc);
973
974 if (allow_ubwc && screen->fill_ubwc_buffer_sizes && rsc->tile_mode)
975 size += screen->fill_ubwc_buffer_sizes(rsc);
976
977 /* special case for hw-query buffer, which we need to allocate before we
978 * know the size:
979 */
980 if (size == 0) {
981 /* note, semi-intention == instead of & */
982 debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER);
983 return prsc;
984 }
985
986 if (rsc->layer_first) {
987 rsc->layer_size = align(size, 4096);
988 size = rsc->layer_size * prsc->array_size;
989 }
990
991 realloc_bo(rsc, size);
992 if (!rsc->bo)
993 goto fail;
994
995 return prsc;
996 fail:
997 fd_resource_destroy(pscreen, prsc);
998 return NULL;
999 }
1000
1001 static struct pipe_resource *
1002 fd_resource_create(struct pipe_screen *pscreen,
1003 const struct pipe_resource *tmpl)
1004 {
1005 const uint64_t mod = DRM_FORMAT_MOD_INVALID;
1006 return fd_resource_create_with_modifiers(pscreen, tmpl, &mod, 1);
1007 }
1008
1009 static bool
1010 is_supported_modifier(struct pipe_screen *pscreen, enum pipe_format pfmt,
1011 uint64_t mod)
1012 {
1013 int count;
1014
1015 /* Get the count of supported modifiers: */
1016 pscreen->query_dmabuf_modifiers(pscreen, pfmt, 0, NULL, NULL, &count);
1017
1018 /* Get the supported modifiers: */
1019 uint64_t modifiers[count];
1020 pscreen->query_dmabuf_modifiers(pscreen, pfmt, 0, modifiers, NULL, &count);
1021
1022 for (int i = 0; i < count; i++)
1023 if (modifiers[i] == mod)
1024 return true;
1025
1026 return false;
1027 }
1028
1029 /**
1030 * Create a texture from a winsys_handle. The handle is often created in
1031 * another process by first creating a pipe texture and then calling
1032 * resource_get_handle.
1033 */
1034 static struct pipe_resource *
1035 fd_resource_from_handle(struct pipe_screen *pscreen,
1036 const struct pipe_resource *tmpl,
1037 struct winsys_handle *handle, unsigned usage)
1038 {
1039 struct fd_screen *screen = fd_screen(pscreen);
1040 struct fd_resource *rsc = CALLOC_STRUCT(fd_resource);
1041 struct fd_resource_slice *slice = &rsc->slices[0];
1042 struct pipe_resource *prsc = &rsc->base;
1043 uint32_t pitchalign = fd_screen(pscreen)->gmem_alignw;
1044
1045 DBG("target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, "
1046 "nr_samples=%u, usage=%u, bind=%x, flags=%x",
1047 tmpl->target, util_format_name(tmpl->format),
1048 tmpl->width0, tmpl->height0, tmpl->depth0,
1049 tmpl->array_size, tmpl->last_level, tmpl->nr_samples,
1050 tmpl->usage, tmpl->bind, tmpl->flags);
1051
1052 if (!rsc)
1053 return NULL;
1054
1055 *prsc = *tmpl;
1056
1057 pipe_reference_init(&prsc->reference, 1);
1058
1059 prsc->screen = pscreen;
1060
1061 util_range_init(&rsc->valid_buffer_range);
1062
1063 rsc->bo = fd_screen_bo_from_handle(pscreen, handle);
1064 if (!rsc->bo)
1065 goto fail;
1066
1067 rsc->internal_format = tmpl->format;
1068 rsc->cpp = util_format_get_blocksize(tmpl->format);
1069 rsc->cpp *= fd_resource_nr_samples(prsc);
1070 slice->pitch = handle->stride / rsc->cpp;
1071 slice->offset = handle->offset;
1072 slice->size0 = handle->stride * prsc->height0;
1073
1074 if ((slice->pitch < align(prsc->width0, pitchalign)) ||
1075 (slice->pitch & (pitchalign - 1)))
1076 goto fail;
1077
1078 if (handle->modifier == DRM_FORMAT_MOD_QCOM_COMPRESSED) {
1079 if (!is_supported_modifier(pscreen, tmpl->format,
1080 DRM_FORMAT_MOD_QCOM_COMPRESSED)) {
1081 DBG("bad modifier: %lx", handle->modifier);
1082 goto fail;
1083 }
1084 debug_assert(screen->fill_ubwc_buffer_sizes);
1085 screen->fill_ubwc_buffer_sizes(rsc);
1086 } else if (handle->modifier &&
1087 (handle->modifier != DRM_FORMAT_MOD_INVALID)) {
1088 goto fail;
1089 }
1090
1091 assert(rsc->cpp);
1092
1093 if (screen->ro) {
1094 rsc->scanout =
1095 renderonly_create_gpu_import_for_resource(prsc, screen->ro, NULL);
1096 /* failure is expected in some cases.. */
1097 }
1098
1099 return prsc;
1100
1101 fail:
1102 fd_resource_destroy(pscreen, prsc);
1103 return NULL;
1104 }
1105
1106 bool
1107 fd_render_condition_check(struct pipe_context *pctx)
1108 {
1109 struct fd_context *ctx = fd_context(pctx);
1110
1111 if (!ctx->cond_query)
1112 return true;
1113
1114 union pipe_query_result res = { 0 };
1115 bool wait =
1116 ctx->cond_mode != PIPE_RENDER_COND_NO_WAIT &&
1117 ctx->cond_mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
1118
1119 if (pctx->get_query_result(pctx, ctx->cond_query, wait, &res))
1120 return (bool)res.u64 != ctx->cond_cond;
1121
1122 return true;
1123 }
1124
1125 static void
1126 fd_invalidate_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
1127 {
1128 struct fd_context *ctx = fd_context(pctx);
1129 struct fd_resource *rsc = fd_resource(prsc);
1130
1131 /*
1132 * TODO I guess we could track that the resource is invalidated and
1133 * use that as a hint to realloc rather than stall in _transfer_map(),
1134 * even in the non-DISCARD_WHOLE_RESOURCE case?
1135 *
1136 * Note: we set dirty bits to trigger invalidate logic fd_draw_vbo
1137 */
1138
1139 if (rsc->write_batch) {
1140 struct fd_batch *batch = rsc->write_batch;
1141 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1142
1143 if (pfb->zsbuf && pfb->zsbuf->texture == prsc) {
1144 batch->resolve &= ~(FD_BUFFER_DEPTH | FD_BUFFER_STENCIL);
1145 ctx->dirty |= FD_DIRTY_ZSA;
1146 }
1147
1148 for (unsigned i = 0; i < pfb->nr_cbufs; i++) {
1149 if (pfb->cbufs[i] && pfb->cbufs[i]->texture == prsc) {
1150 batch->resolve &= ~(PIPE_CLEAR_COLOR0 << i);
1151 ctx->dirty |= FD_DIRTY_FRAMEBUFFER;
1152 }
1153 }
1154 }
1155
1156 rsc->valid = false;
1157 }
1158
1159 static enum pipe_format
1160 fd_resource_get_internal_format(struct pipe_resource *prsc)
1161 {
1162 return fd_resource(prsc)->internal_format;
1163 }
1164
1165 static void
1166 fd_resource_set_stencil(struct pipe_resource *prsc,
1167 struct pipe_resource *stencil)
1168 {
1169 fd_resource(prsc)->stencil = fd_resource(stencil);
1170 }
1171
1172 static struct pipe_resource *
1173 fd_resource_get_stencil(struct pipe_resource *prsc)
1174 {
1175 struct fd_resource *rsc = fd_resource(prsc);
1176 if (rsc->stencil)
1177 return &rsc->stencil->base;
1178 return NULL;
1179 }
1180
1181 static const struct u_transfer_vtbl transfer_vtbl = {
1182 .resource_create = fd_resource_create,
1183 .resource_destroy = fd_resource_destroy,
1184 .transfer_map = fd_resource_transfer_map,
1185 .transfer_flush_region = fd_resource_transfer_flush_region,
1186 .transfer_unmap = fd_resource_transfer_unmap,
1187 .get_internal_format = fd_resource_get_internal_format,
1188 .set_stencil = fd_resource_set_stencil,
1189 .get_stencil = fd_resource_get_stencil,
1190 };
1191
1192 void
1193 fd_resource_screen_init(struct pipe_screen *pscreen)
1194 {
1195 struct fd_screen *screen = fd_screen(pscreen);
1196 bool fake_rgtc = screen->gpu_id < 400;
1197
1198 pscreen->resource_create = u_transfer_helper_resource_create;
1199 /* NOTE: u_transfer_helper does not yet support the _with_modifiers()
1200 * variant:
1201 */
1202 pscreen->resource_create_with_modifiers = fd_resource_create_with_modifiers;
1203 pscreen->resource_from_handle = fd_resource_from_handle;
1204 pscreen->resource_get_handle = fd_resource_get_handle;
1205 pscreen->resource_destroy = u_transfer_helper_resource_destroy;
1206
1207 pscreen->transfer_helper = u_transfer_helper_create(&transfer_vtbl,
1208 true, false, fake_rgtc, true);
1209
1210 if (!screen->setup_slices)
1211 screen->setup_slices = fd_setup_slices;
1212 }
1213
1214 static void
1215 fd_get_sample_position(struct pipe_context *context,
1216 unsigned sample_count, unsigned sample_index,
1217 float *pos_out)
1218 {
1219 /* The following is copied from nouveau/nv50 except for position
1220 * values, which are taken from blob driver */
1221 static const uint8_t pos1[1][2] = { { 0x8, 0x8 } };
1222 static const uint8_t pos2[2][2] = {
1223 { 0xc, 0xc }, { 0x4, 0x4 } };
1224 static const uint8_t pos4[4][2] = {
1225 { 0x6, 0x2 }, { 0xe, 0x6 },
1226 { 0x2, 0xa }, { 0xa, 0xe } };
1227 /* TODO needs to be verified on supported hw */
1228 static const uint8_t pos8[8][2] = {
1229 { 0x9, 0x5 }, { 0x7, 0xb },
1230 { 0xd, 0x9 }, { 0x5, 0x3 },
1231 { 0x3, 0xd }, { 0x1, 0x7 },
1232 { 0xb, 0xf }, { 0xf, 0x1 } };
1233
1234 const uint8_t (*ptr)[2];
1235
1236 switch (sample_count) {
1237 case 1:
1238 ptr = pos1;
1239 break;
1240 case 2:
1241 ptr = pos2;
1242 break;
1243 case 4:
1244 ptr = pos4;
1245 break;
1246 case 8:
1247 ptr = pos8;
1248 break;
1249 default:
1250 assert(0);
1251 return;
1252 }
1253
1254 pos_out[0] = ptr[sample_index][0] / 16.0f;
1255 pos_out[1] = ptr[sample_index][1] / 16.0f;
1256 }
1257
1258 void
1259 fd_resource_context_init(struct pipe_context *pctx)
1260 {
1261 pctx->transfer_map = u_transfer_helper_transfer_map;
1262 pctx->transfer_flush_region = u_transfer_helper_transfer_flush_region;
1263 pctx->transfer_unmap = u_transfer_helper_transfer_unmap;
1264 pctx->buffer_subdata = u_default_buffer_subdata;
1265 pctx->texture_subdata = u_default_texture_subdata;
1266 pctx->create_surface = fd_create_surface;
1267 pctx->surface_destroy = fd_surface_destroy;
1268 pctx->resource_copy_region = fd_resource_copy_region;
1269 pctx->blit = fd_blit;
1270 pctx->flush_resource = fd_flush_resource;
1271 pctx->invalidate_resource = fd_invalidate_resource;
1272 pctx->get_sample_position = fd_get_sample_position;
1273 }