880666d3af5dd117e6b068e099f0b8bf666e021e
[mesa.git] / src / gallium / drivers / freedreno / freedreno_resource.c
1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
3 /*
4 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 * Rob Clark <robclark@freedesktop.org>
27 */
28
29 #include "util/u_format.h"
30 #include "util/u_format_rgtc.h"
31 #include "util/u_format_zs.h"
32 #include "util/u_inlines.h"
33 #include "util/u_transfer.h"
34 #include "util/u_string.h"
35 #include "util/u_surface.h"
36 #include "util/set.h"
37
38 #include "freedreno_resource.h"
39 #include "freedreno_batch_cache.h"
40 #include "freedreno_screen.h"
41 #include "freedreno_surface.h"
42 #include "freedreno_context.h"
43 #include "freedreno_query_hw.h"
44 #include "freedreno_util.h"
45
46 #include <errno.h>
47
48 /* XXX this should go away, needed for 'struct winsys_handle' */
49 #include "state_tracker/drm_driver.h"
50
51 /**
52 * Go through the entire state and see if the resource is bound
53 * anywhere. If it is, mark the relevant state as dirty. This is
54 * called on realloc_bo to ensure the neccessary state is re-
55 * emitted so the GPU looks at the new backing bo.
56 */
57 static void
58 rebind_resource(struct fd_context *ctx, struct pipe_resource *prsc)
59 {
60 /* VBOs */
61 for (unsigned i = 0; i < ctx->vtx.vertexbuf.count && !(ctx->dirty & FD_DIRTY_VTXBUF); i++) {
62 if (ctx->vtx.vertexbuf.vb[i].buffer.resource == prsc)
63 ctx->dirty |= FD_DIRTY_VTXBUF;
64 }
65
66 /* per-shader-stage resources: */
67 for (unsigned stage = 0; stage < PIPE_SHADER_TYPES; stage++) {
68 /* Constbufs.. note that constbuf[0] is normal uniforms emitted in
69 * cmdstream rather than by pointer..
70 */
71 const unsigned num_ubos = util_last_bit(ctx->constbuf[stage].enabled_mask);
72 for (unsigned i = 1; i < num_ubos; i++) {
73 if (ctx->dirty_shader[stage] & FD_DIRTY_SHADER_CONST)
74 break;
75 if (ctx->constbuf[stage].cb[i].buffer == prsc)
76 ctx->dirty_shader[stage] |= FD_DIRTY_SHADER_CONST;
77 }
78
79 /* Textures */
80 for (unsigned i = 0; i < ctx->tex[stage].num_textures; i++) {
81 if (ctx->dirty_shader[stage] & FD_DIRTY_SHADER_TEX)
82 break;
83 if (ctx->tex[stage].textures[i] && (ctx->tex[stage].textures[i]->texture == prsc))
84 ctx->dirty_shader[stage] |= FD_DIRTY_SHADER_TEX;
85 }
86
87 /* SSBOs */
88 const unsigned num_ssbos = util_last_bit(ctx->shaderbuf[stage].enabled_mask);
89 for (unsigned i = 0; i < num_ssbos; i++) {
90 if (ctx->dirty_shader[stage] & FD_DIRTY_SHADER_SSBO)
91 break;
92 if (ctx->shaderbuf[stage].sb[i].buffer == prsc)
93 ctx->dirty_shader[stage] |= FD_DIRTY_SHADER_SSBO;
94 }
95 }
96 }
97
98 static void
99 realloc_bo(struct fd_resource *rsc, uint32_t size)
100 {
101 struct fd_screen *screen = fd_screen(rsc->base.b.screen);
102 uint32_t flags = DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
103 DRM_FREEDRENO_GEM_TYPE_KMEM; /* TODO */
104
105 /* if we start using things other than write-combine,
106 * be sure to check for PIPE_RESOURCE_FLAG_MAP_COHERENT
107 */
108
109 if (rsc->bo)
110 fd_bo_del(rsc->bo);
111
112 rsc->bo = fd_bo_new(screen->dev, size, flags);
113 util_range_set_empty(&rsc->valid_buffer_range);
114 fd_bc_invalidate_resource(rsc, true);
115 }
116
117 static void
118 do_blit(struct fd_context *ctx, const struct pipe_blit_info *blit, bool fallback)
119 {
120 /* TODO size threshold too?? */
121 if ((blit->src.resource->target != PIPE_BUFFER) && !fallback) {
122 /* do blit on gpu: */
123 fd_blitter_pipe_begin(ctx, false, true, FD_STAGE_BLIT);
124 util_blitter_blit(ctx->blitter, blit);
125 fd_blitter_pipe_end(ctx);
126 } else {
127 /* do blit on cpu: */
128 util_resource_copy_region(&ctx->base,
129 blit->dst.resource, blit->dst.level, blit->dst.box.x,
130 blit->dst.box.y, blit->dst.box.z,
131 blit->src.resource, blit->src.level, &blit->src.box);
132 }
133 }
134
135 static bool
136 fd_try_shadow_resource(struct fd_context *ctx, struct fd_resource *rsc,
137 unsigned level, unsigned usage, const struct pipe_box *box)
138 {
139 struct pipe_context *pctx = &ctx->base;
140 struct pipe_resource *prsc = &rsc->base.b;
141 bool fallback = false;
142
143 if (prsc->next)
144 return false;
145
146 /* TODO: somehow munge dimensions and format to copy unsupported
147 * render target format to something that is supported?
148 */
149 if (!pctx->screen->is_format_supported(pctx->screen,
150 prsc->format, prsc->target, prsc->nr_samples,
151 PIPE_BIND_RENDER_TARGET))
152 fallback = true;
153
154 /* these cases should be handled elsewhere.. just for future
155 * reference in case this gets split into a more generic(ish)
156 * helper.
157 */
158 debug_assert(!(usage & PIPE_TRANSFER_READ));
159 debug_assert(!(usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE));
160
161 /* if we do a gpu blit to clone the whole resource, we'll just
162 * end up stalling on that.. so only allow if we can discard
163 * current range (and blit, possibly cpu or gpu, the rest)
164 */
165 if (!(usage & PIPE_TRANSFER_DISCARD_RANGE))
166 return false;
167
168 bool whole_level = util_texrange_covers_whole_level(prsc, level,
169 box->x, box->y, box->z, box->width, box->height, box->depth);
170
171 /* TODO need to be more clever about current level */
172 if ((prsc->target >= PIPE_TEXTURE_2D) && !whole_level)
173 return false;
174
175 struct pipe_resource *pshadow =
176 pctx->screen->resource_create(pctx->screen, prsc);
177
178 if (!pshadow)
179 return false;
180
181 assert(!ctx->in_shadow);
182 ctx->in_shadow = true;
183
184 /* get rid of any references that batch-cache might have to us (which
185 * should empty/destroy rsc->batches hashset)
186 */
187 fd_bc_invalidate_resource(rsc, false);
188
189 mtx_lock(&ctx->screen->lock);
190
191 /* Swap the backing bo's, so shadow becomes the old buffer,
192 * blit from shadow to new buffer. From here on out, we
193 * cannot fail.
194 *
195 * Note that we need to do it in this order, otherwise if
196 * we go down cpu blit path, the recursive transfer_map()
197 * sees the wrong status..
198 */
199 struct fd_resource *shadow = fd_resource(pshadow);
200
201 DBG("shadow: %p (%d) -> %p (%d)\n", rsc, rsc->base.b.reference.count,
202 shadow, shadow->base.b.reference.count);
203
204 /* TODO valid_buffer_range?? */
205 swap(rsc->bo, shadow->bo);
206 swap(rsc->write_batch, shadow->write_batch);
207
208 /* at this point, the newly created shadow buffer is not referenced
209 * by any batches, but the existing rsc (probably) is. We need to
210 * transfer those references over:
211 */
212 debug_assert(shadow->batch_mask == 0);
213 struct fd_batch *batch;
214 foreach_batch(batch, &ctx->screen->batch_cache, rsc->batch_mask) {
215 struct set_entry *entry = _mesa_set_search(batch->resources, rsc);
216 _mesa_set_remove(batch->resources, entry);
217 _mesa_set_add(batch->resources, shadow);
218 }
219 swap(rsc->batch_mask, shadow->batch_mask);
220
221 mtx_unlock(&ctx->screen->lock);
222
223 struct pipe_blit_info blit = {0};
224 blit.dst.resource = prsc;
225 blit.dst.format = prsc->format;
226 blit.src.resource = pshadow;
227 blit.src.format = pshadow->format;
228 blit.mask = util_format_get_mask(prsc->format);
229 blit.filter = PIPE_TEX_FILTER_NEAREST;
230
231 #define set_box(field, val) do { \
232 blit.dst.field = (val); \
233 blit.src.field = (val); \
234 } while (0)
235
236 /* blit the other levels in their entirety: */
237 for (unsigned l = 0; l <= prsc->last_level; l++) {
238 if (l == level)
239 continue;
240
241 /* just blit whole level: */
242 set_box(level, l);
243 set_box(box.width, u_minify(prsc->width0, l));
244 set_box(box.height, u_minify(prsc->height0, l));
245 set_box(box.depth, u_minify(prsc->depth0, l));
246
247 do_blit(ctx, &blit, fallback);
248 }
249
250 /* deal w/ current level specially, since we might need to split
251 * it up into a couple blits:
252 */
253 if (!whole_level) {
254 set_box(level, level);
255
256 switch (prsc->target) {
257 case PIPE_BUFFER:
258 case PIPE_TEXTURE_1D:
259 set_box(box.y, 0);
260 set_box(box.z, 0);
261 set_box(box.height, 1);
262 set_box(box.depth, 1);
263
264 if (box->x > 0) {
265 set_box(box.x, 0);
266 set_box(box.width, box->x);
267
268 do_blit(ctx, &blit, fallback);
269 }
270 if ((box->x + box->width) < u_minify(prsc->width0, level)) {
271 set_box(box.x, box->x + box->width);
272 set_box(box.width, u_minify(prsc->width0, level) - (box->x + box->width));
273
274 do_blit(ctx, &blit, fallback);
275 }
276 break;
277 case PIPE_TEXTURE_2D:
278 /* TODO */
279 default:
280 unreachable("TODO");
281 }
282 }
283
284 ctx->in_shadow = false;
285
286 pipe_resource_reference(&pshadow, NULL);
287
288 return true;
289 }
290
291 static unsigned
292 fd_resource_layer_offset(struct fd_resource *rsc,
293 struct fd_resource_slice *slice,
294 unsigned layer)
295 {
296 if (rsc->layer_first)
297 return layer * rsc->layer_size;
298 else
299 return layer * slice->size0;
300 }
301
302 static void
303 fd_resource_flush_z32s8(struct fd_transfer *trans, const struct pipe_box *box)
304 {
305 struct fd_resource *rsc = fd_resource(trans->base.resource);
306 struct fd_resource_slice *slice = fd_resource_slice(rsc, trans->base.level);
307 struct fd_resource_slice *sslice = fd_resource_slice(rsc->stencil, trans->base.level);
308 enum pipe_format format = trans->base.resource->format;
309
310 float *depth = fd_bo_map(rsc->bo) + slice->offset +
311 fd_resource_layer_offset(rsc, slice, trans->base.box.z) +
312 (trans->base.box.y + box->y) * slice->pitch * 4 + (trans->base.box.x + box->x) * 4;
313 uint8_t *stencil = fd_bo_map(rsc->stencil->bo) + sslice->offset +
314 fd_resource_layer_offset(rsc->stencil, sslice, trans->base.box.z) +
315 (trans->base.box.y + box->y) * sslice->pitch + trans->base.box.x + box->x;
316
317 if (format != PIPE_FORMAT_X32_S8X24_UINT)
318 util_format_z32_float_s8x24_uint_unpack_z_float(
319 depth, slice->pitch * 4,
320 trans->staging, trans->base.stride,
321 box->width, box->height);
322
323 util_format_z32_float_s8x24_uint_unpack_s_8uint(
324 stencil, sslice->pitch,
325 trans->staging, trans->base.stride,
326 box->width, box->height);
327 }
328
329 static void
330 fd_resource_flush_rgtc(struct fd_transfer *trans, const struct pipe_box *box)
331 {
332 struct fd_resource *rsc = fd_resource(trans->base.resource);
333 struct fd_resource_slice *slice = fd_resource_slice(rsc, trans->base.level);
334 enum pipe_format format = trans->base.resource->format;
335
336 uint8_t *data = fd_bo_map(rsc->bo) + slice->offset +
337 fd_resource_layer_offset(rsc, slice, trans->base.box.z) +
338 ((trans->base.box.y + box->y) * slice->pitch +
339 trans->base.box.x + box->x) * rsc->cpp;
340
341 uint8_t *source = trans->staging +
342 util_format_get_nblocksy(format, box->y) * trans->base.stride +
343 util_format_get_stride(format, box->x);
344
345 switch (format) {
346 case PIPE_FORMAT_RGTC1_UNORM:
347 case PIPE_FORMAT_RGTC1_SNORM:
348 case PIPE_FORMAT_LATC1_UNORM:
349 case PIPE_FORMAT_LATC1_SNORM:
350 util_format_rgtc1_unorm_unpack_rgba_8unorm(
351 data, slice->pitch * rsc->cpp,
352 source, trans->base.stride,
353 box->width, box->height);
354 break;
355 case PIPE_FORMAT_RGTC2_UNORM:
356 case PIPE_FORMAT_RGTC2_SNORM:
357 case PIPE_FORMAT_LATC2_UNORM:
358 case PIPE_FORMAT_LATC2_SNORM:
359 util_format_rgtc2_unorm_unpack_rgba_8unorm(
360 data, slice->pitch * rsc->cpp,
361 source, trans->base.stride,
362 box->width, box->height);
363 break;
364 default:
365 assert(!"Unexpected format\n");
366 break;
367 }
368 }
369
370 static void
371 fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box)
372 {
373 enum pipe_format format = trans->base.resource->format;
374
375 switch (format) {
376 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
377 case PIPE_FORMAT_X32_S8X24_UINT:
378 fd_resource_flush_z32s8(trans, box);
379 break;
380 case PIPE_FORMAT_RGTC1_UNORM:
381 case PIPE_FORMAT_RGTC1_SNORM:
382 case PIPE_FORMAT_RGTC2_UNORM:
383 case PIPE_FORMAT_RGTC2_SNORM:
384 case PIPE_FORMAT_LATC1_UNORM:
385 case PIPE_FORMAT_LATC1_SNORM:
386 case PIPE_FORMAT_LATC2_UNORM:
387 case PIPE_FORMAT_LATC2_SNORM:
388 fd_resource_flush_rgtc(trans, box);
389 break;
390 default:
391 assert(!"Unexpected staging transfer type");
392 break;
393 }
394 }
395
396 static void fd_resource_transfer_flush_region(struct pipe_context *pctx,
397 struct pipe_transfer *ptrans,
398 const struct pipe_box *box)
399 {
400 struct fd_resource *rsc = fd_resource(ptrans->resource);
401 struct fd_transfer *trans = fd_transfer(ptrans);
402
403 if (ptrans->resource->target == PIPE_BUFFER)
404 util_range_add(&rsc->valid_buffer_range,
405 ptrans->box.x + box->x,
406 ptrans->box.x + box->x + box->width);
407
408 if (trans->staging)
409 fd_resource_flush(trans, box);
410 }
411
412 static void
413 fd_resource_transfer_unmap(struct pipe_context *pctx,
414 struct pipe_transfer *ptrans)
415 {
416 struct fd_context *ctx = fd_context(pctx);
417 struct fd_resource *rsc = fd_resource(ptrans->resource);
418 struct fd_transfer *trans = fd_transfer(ptrans);
419
420 if (trans->staging && !(ptrans->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) {
421 struct pipe_box box;
422 u_box_2d(0, 0, ptrans->box.width, ptrans->box.height, &box);
423 fd_resource_flush(trans, &box);
424 }
425
426 if (!(ptrans->usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
427 fd_bo_cpu_fini(rsc->bo);
428 if (rsc->stencil)
429 fd_bo_cpu_fini(rsc->stencil->bo);
430 }
431
432 util_range_add(&rsc->valid_buffer_range,
433 ptrans->box.x,
434 ptrans->box.x + ptrans->box.width);
435
436 pipe_resource_reference(&ptrans->resource, NULL);
437 slab_free(&ctx->transfer_pool, ptrans);
438
439 free(trans->staging);
440 }
441
442 static void *
443 fd_resource_transfer_map(struct pipe_context *pctx,
444 struct pipe_resource *prsc,
445 unsigned level, unsigned usage,
446 const struct pipe_box *box,
447 struct pipe_transfer **pptrans)
448 {
449 struct fd_context *ctx = fd_context(pctx);
450 struct fd_resource *rsc = fd_resource(prsc);
451 struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
452 struct fd_transfer *trans;
453 struct pipe_transfer *ptrans;
454 enum pipe_format format = prsc->format;
455 uint32_t op = 0;
456 uint32_t offset;
457 char *buf;
458 int ret = 0;
459
460 DBG("prsc=%p, level=%u, usage=%x, box=%dx%d+%d,%d", prsc, level, usage,
461 box->width, box->height, box->x, box->y);
462
463 ptrans = slab_alloc(&ctx->transfer_pool);
464 if (!ptrans)
465 return NULL;
466
467 /* slab_alloc_st() doesn't zero: */
468 trans = fd_transfer(ptrans);
469 memset(trans, 0, sizeof(*trans));
470
471 pipe_resource_reference(&ptrans->resource, prsc);
472 ptrans->level = level;
473 ptrans->usage = usage;
474 ptrans->box = *box;
475 ptrans->stride = util_format_get_nblocksx(format, slice->pitch) * rsc->cpp;
476 ptrans->layer_stride = rsc->layer_first ? rsc->layer_size : slice->size0;
477
478 if (ctx->in_shadow && !(usage & PIPE_TRANSFER_READ))
479 usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
480
481 if (usage & PIPE_TRANSFER_READ)
482 op |= DRM_FREEDRENO_PREP_READ;
483
484 if (usage & PIPE_TRANSFER_WRITE)
485 op |= DRM_FREEDRENO_PREP_WRITE;
486
487 if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
488 realloc_bo(rsc, fd_bo_size(rsc->bo));
489 if (rsc->stencil)
490 realloc_bo(rsc->stencil, fd_bo_size(rsc->stencil->bo));
491 rebind_resource(ctx, prsc);
492 } else if ((usage & PIPE_TRANSFER_WRITE) &&
493 prsc->target == PIPE_BUFFER &&
494 !util_ranges_intersect(&rsc->valid_buffer_range,
495 box->x, box->x + box->width)) {
496 /* We are trying to write to a previously uninitialized range. No need
497 * to wait.
498 */
499 } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
500 struct fd_batch *write_batch = NULL;
501
502 /* hold a reference, so it doesn't disappear under us: */
503 fd_batch_reference(&write_batch, rsc->write_batch);
504
505 if ((usage & PIPE_TRANSFER_WRITE) && write_batch &&
506 write_batch->back_blit) {
507 /* if only thing pending is a back-blit, we can discard it: */
508 fd_batch_reset(write_batch);
509 }
510
511 /* If the GPU is writing to the resource, or if it is reading from the
512 * resource and we're trying to write to it, flush the renders.
513 */
514 bool needs_flush = pending(rsc, !!(usage & PIPE_TRANSFER_WRITE));
515 bool busy = needs_flush || (0 != fd_bo_cpu_prep(rsc->bo,
516 ctx->pipe, op | DRM_FREEDRENO_PREP_NOSYNC));
517
518 /* if we need to flush/stall, see if we can make a shadow buffer
519 * to avoid this:
520 *
521 * TODO we could go down this path !reorder && !busy_for_read
522 * ie. we only *don't* want to go down this path if the blit
523 * will trigger a flush!
524 */
525 if (ctx->screen->reorder && busy && !(usage & PIPE_TRANSFER_READ)) {
526 if (fd_try_shadow_resource(ctx, rsc, level, usage, box)) {
527 needs_flush = busy = false;
528 rebind_resource(ctx, prsc);
529 }
530 }
531
532 if (needs_flush) {
533 if (usage & PIPE_TRANSFER_WRITE) {
534 struct fd_batch *batch, *last_batch = NULL;
535 foreach_batch(batch, &ctx->screen->batch_cache, rsc->batch_mask) {
536 fd_batch_reference(&last_batch, batch);
537 fd_batch_flush(batch, false);
538 }
539 if (last_batch) {
540 fd_batch_sync(last_batch);
541 fd_batch_reference(&last_batch, NULL);
542 }
543 assert(rsc->batch_mask == 0);
544 } else {
545 fd_batch_flush(write_batch, true);
546 }
547 assert(!rsc->write_batch);
548 }
549
550 fd_batch_reference(&write_batch, NULL);
551
552 /* The GPU keeps track of how the various bo's are being used, and
553 * will wait if necessary for the proper operation to have
554 * completed.
555 */
556 if (busy) {
557 ret = fd_bo_cpu_prep(rsc->bo, ctx->pipe, op);
558 if (ret)
559 goto fail;
560 }
561 }
562
563 buf = fd_bo_map(rsc->bo);
564 if (!buf)
565 goto fail;
566
567 offset = slice->offset +
568 box->y / util_format_get_blockheight(format) * ptrans->stride +
569 box->x / util_format_get_blockwidth(format) * rsc->cpp +
570 fd_resource_layer_offset(rsc, slice, box->z);
571
572 if (prsc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ||
573 prsc->format == PIPE_FORMAT_X32_S8X24_UINT) {
574 assert(trans->base.box.depth == 1);
575
576 trans->base.stride = trans->base.box.width * rsc->cpp * 2;
577 trans->staging = malloc(trans->base.stride * trans->base.box.height);
578 if (!trans->staging)
579 goto fail;
580
581 /* if we're not discarding the whole range (or resource), we must copy
582 * the real data in.
583 */
584 if (!(usage & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
585 PIPE_TRANSFER_DISCARD_RANGE))) {
586 struct fd_resource_slice *sslice =
587 fd_resource_slice(rsc->stencil, level);
588 void *sbuf = fd_bo_map(rsc->stencil->bo);
589 if (!sbuf)
590 goto fail;
591
592 float *depth = (float *)(buf + slice->offset +
593 fd_resource_layer_offset(rsc, slice, box->z) +
594 box->y * slice->pitch * 4 + box->x * 4);
595 uint8_t *stencil = sbuf + sslice->offset +
596 fd_resource_layer_offset(rsc->stencil, sslice, box->z) +
597 box->y * sslice->pitch + box->x;
598
599 if (format != PIPE_FORMAT_X32_S8X24_UINT)
600 util_format_z32_float_s8x24_uint_pack_z_float(
601 trans->staging, trans->base.stride,
602 depth, slice->pitch * 4,
603 box->width, box->height);
604
605 util_format_z32_float_s8x24_uint_pack_s_8uint(
606 trans->staging, trans->base.stride,
607 stencil, sslice->pitch,
608 box->width, box->height);
609 }
610
611 buf = trans->staging;
612 offset = 0;
613 } else if (rsc->internal_format != format &&
614 util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC) {
615 assert(trans->base.box.depth == 1);
616
617 trans->base.stride = util_format_get_stride(
618 format, trans->base.box.width);
619 trans->staging = malloc(
620 util_format_get_2d_size(format, trans->base.stride,
621 trans->base.box.height));
622 if (!trans->staging)
623 goto fail;
624
625 /* if we're not discarding the whole range (or resource), we must copy
626 * the real data in.
627 */
628 if (!(usage & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
629 PIPE_TRANSFER_DISCARD_RANGE))) {
630 uint8_t *rgba8 = (uint8_t *)buf + slice->offset +
631 fd_resource_layer_offset(rsc, slice, box->z) +
632 box->y * slice->pitch * rsc->cpp + box->x * rsc->cpp;
633
634 switch (format) {
635 case PIPE_FORMAT_RGTC1_UNORM:
636 case PIPE_FORMAT_RGTC1_SNORM:
637 case PIPE_FORMAT_LATC1_UNORM:
638 case PIPE_FORMAT_LATC1_SNORM:
639 util_format_rgtc1_unorm_pack_rgba_8unorm(
640 trans->staging, trans->base.stride,
641 rgba8, slice->pitch * rsc->cpp,
642 box->width, box->height);
643 break;
644 case PIPE_FORMAT_RGTC2_UNORM:
645 case PIPE_FORMAT_RGTC2_SNORM:
646 case PIPE_FORMAT_LATC2_UNORM:
647 case PIPE_FORMAT_LATC2_SNORM:
648 util_format_rgtc2_unorm_pack_rgba_8unorm(
649 trans->staging, trans->base.stride,
650 rgba8, slice->pitch * rsc->cpp,
651 box->width, box->height);
652 break;
653 default:
654 assert(!"Unexpected format");
655 break;
656 }
657 }
658
659 buf = trans->staging;
660 offset = 0;
661 }
662
663 *pptrans = ptrans;
664
665 return buf + offset;
666
667 fail:
668 fd_resource_transfer_unmap(pctx, ptrans);
669 return NULL;
670 }
671
672 static void
673 fd_resource_destroy(struct pipe_screen *pscreen,
674 struct pipe_resource *prsc)
675 {
676 struct fd_resource *rsc = fd_resource(prsc);
677 fd_bc_invalidate_resource(rsc, true);
678 if (rsc->bo)
679 fd_bo_del(rsc->bo);
680 util_range_destroy(&rsc->valid_buffer_range);
681 FREE(rsc);
682 }
683
684 static boolean
685 fd_resource_get_handle(struct pipe_screen *pscreen,
686 struct pipe_resource *prsc,
687 struct winsys_handle *handle)
688 {
689 struct fd_resource *rsc = fd_resource(prsc);
690
691 return fd_screen_bo_get_handle(pscreen, rsc->bo,
692 rsc->slices[0].pitch * rsc->cpp, handle);
693 }
694
695
696 static const struct u_resource_vtbl fd_resource_vtbl = {
697 .resource_get_handle = fd_resource_get_handle,
698 .resource_destroy = fd_resource_destroy,
699 .transfer_map = fd_resource_transfer_map,
700 .transfer_flush_region = fd_resource_transfer_flush_region,
701 .transfer_unmap = fd_resource_transfer_unmap,
702 };
703
704 static uint32_t
705 setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format format)
706 {
707 struct pipe_resource *prsc = &rsc->base.b;
708 struct fd_screen *screen = fd_screen(prsc->screen);
709 enum util_format_layout layout = util_format_description(format)->layout;
710 uint32_t pitchalign = screen->gmem_alignw;
711 uint32_t level, size = 0;
712 uint32_t width = prsc->width0;
713 uint32_t height = prsc->height0;
714 uint32_t depth = prsc->depth0;
715 /* in layer_first layout, the level (slice) contains just one
716 * layer (since in fact the layer contains the slices)
717 */
718 uint32_t layers_in_level = rsc->layer_first ? 1 : prsc->array_size;
719
720 if (is_a5xx(screen) && (rsc->base.b.target >= PIPE_TEXTURE_2D))
721 height = align(height, screen->gmem_alignh);
722
723 for (level = 0; level <= prsc->last_level; level++) {
724 struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
725 uint32_t blocks;
726
727 if (layout == UTIL_FORMAT_LAYOUT_ASTC)
728 slice->pitch = width =
729 util_align_npot(width, pitchalign * util_format_get_blockwidth(format));
730 else
731 slice->pitch = width = align(width, pitchalign);
732 slice->offset = size;
733 blocks = util_format_get_nblocks(format, width, height);
734 /* 1d array and 2d array textures must all have the same layer size
735 * for each miplevel on a3xx. 3d textures can have different layer
736 * sizes for high levels, but the hw auto-sizer is buggy (or at least
737 * different than what this code does), so as soon as the layer size
738 * range gets into range, we stop reducing it.
739 */
740 if (prsc->target == PIPE_TEXTURE_3D && (
741 level == 1 ||
742 (level > 1 && rsc->slices[level - 1].size0 > 0xf000)))
743 slice->size0 = align(blocks * rsc->cpp, alignment);
744 else if (level == 0 || rsc->layer_first || alignment == 1)
745 slice->size0 = align(blocks * rsc->cpp, alignment);
746 else
747 slice->size0 = rsc->slices[level - 1].size0;
748
749 size += slice->size0 * depth * layers_in_level;
750
751 width = u_minify(width, 1);
752 height = u_minify(height, 1);
753 depth = u_minify(depth, 1);
754 }
755
756 return size;
757 }
758
759 static uint32_t
760 slice_alignment(struct pipe_screen *pscreen, const struct pipe_resource *tmpl)
761 {
762 /* on a3xx, 2d array and 3d textures seem to want their
763 * layers aligned to page boundaries:
764 */
765 switch (tmpl->target) {
766 case PIPE_TEXTURE_3D:
767 case PIPE_TEXTURE_1D_ARRAY:
768 case PIPE_TEXTURE_2D_ARRAY:
769 return 4096;
770 default:
771 return 1;
772 }
773 }
774
775 /* special case to resize query buf after allocated.. */
776 void
777 fd_resource_resize(struct pipe_resource *prsc, uint32_t sz)
778 {
779 struct fd_resource *rsc = fd_resource(prsc);
780
781 debug_assert(prsc->width0 == 0);
782 debug_assert(prsc->target == PIPE_BUFFER);
783 debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER);
784
785 prsc->width0 = sz;
786 realloc_bo(rsc, setup_slices(rsc, 1, prsc->format));
787 }
788
789 // TODO common helper?
790 static bool
791 has_depth(enum pipe_format format)
792 {
793 switch (format) {
794 case PIPE_FORMAT_Z16_UNORM:
795 case PIPE_FORMAT_Z32_UNORM:
796 case PIPE_FORMAT_Z32_FLOAT:
797 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
798 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
799 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
800 case PIPE_FORMAT_Z24X8_UNORM:
801 case PIPE_FORMAT_X8Z24_UNORM:
802 return true;
803 default:
804 return false;
805 }
806 }
807
808 /**
809 * Create a new texture object, using the given template info.
810 */
811 static struct pipe_resource *
812 fd_resource_create(struct pipe_screen *pscreen,
813 const struct pipe_resource *tmpl)
814 {
815 struct fd_screen *screen = fd_screen(pscreen);
816 struct fd_resource *rsc = CALLOC_STRUCT(fd_resource);
817 struct pipe_resource *prsc = &rsc->base.b;
818 enum pipe_format format = tmpl->format;
819 uint32_t size, alignment;
820
821 DBG("%p: target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, "
822 "nr_samples=%u, usage=%u, bind=%x, flags=%x", prsc,
823 tmpl->target, util_format_name(format),
824 tmpl->width0, tmpl->height0, tmpl->depth0,
825 tmpl->array_size, tmpl->last_level, tmpl->nr_samples,
826 tmpl->usage, tmpl->bind, tmpl->flags);
827
828 if (!rsc)
829 return NULL;
830
831 *prsc = *tmpl;
832
833 pipe_reference_init(&prsc->reference, 1);
834
835 prsc->screen = pscreen;
836
837 util_range_init(&rsc->valid_buffer_range);
838
839 rsc->base.vtbl = &fd_resource_vtbl;
840
841 if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
842 format = PIPE_FORMAT_Z32_FLOAT;
843 else if (screen->gpu_id < 400 &&
844 util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC)
845 format = PIPE_FORMAT_R8G8B8A8_UNORM;
846 rsc->internal_format = format;
847 rsc->cpp = util_format_get_blocksize(format);
848
849 assert(rsc->cpp);
850
851 // XXX probably need some extra work if we hit rsc shadowing path w/ lrz..
852 if (is_a5xx(screen) && (fd_mesa_debug & FD_DBG_LRZ) && has_depth(format)) {
853 const uint32_t flags = DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
854 DRM_FREEDRENO_GEM_TYPE_KMEM; /* TODO */
855 unsigned lrz_pitch = align(DIV_ROUND_UP(tmpl->width0, 8), 32);
856 unsigned lrz_height = DIV_ROUND_UP(tmpl->height0, 8);
857 unsigned size = lrz_pitch * lrz_height * 2;
858
859 size += 0x1000; /* for GRAS_LRZ_FAST_CLEAR_BUFFER */
860
861 rsc->lrz_height = lrz_height;
862 rsc->lrz_width = lrz_pitch;
863 rsc->lrz_pitch = lrz_pitch;
864 rsc->lrz = fd_bo_new(screen->dev, size, flags);
865 }
866
867 alignment = slice_alignment(pscreen, tmpl);
868 if (is_a4xx(screen) || is_a5xx(screen)) {
869 switch (tmpl->target) {
870 case PIPE_TEXTURE_3D:
871 rsc->layer_first = false;
872 break;
873 default:
874 rsc->layer_first = true;
875 alignment = 1;
876 break;
877 }
878 }
879
880 size = setup_slices(rsc, alignment, format);
881
882 /* special case for hw-query buffer, which we need to allocate before we
883 * know the size:
884 */
885 if (size == 0) {
886 /* note, semi-intention == instead of & */
887 debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER);
888 return prsc;
889 }
890
891 if (rsc->layer_first) {
892 rsc->layer_size = align(size, 4096);
893 size = rsc->layer_size * prsc->array_size;
894 }
895
896 realloc_bo(rsc, size);
897 if (!rsc->bo)
898 goto fail;
899
900 /* There is no native Z32F_S8 sampling or rendering format, so this must
901 * be emulated via two separate textures. The depth texture still keeps
902 * its Z32F_S8 format though, and we also keep a reference to a separate
903 * S8 texture.
904 */
905 if (tmpl->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
906 struct pipe_resource stencil = *tmpl;
907 stencil.format = PIPE_FORMAT_S8_UINT;
908 rsc->stencil = fd_resource(fd_resource_create(pscreen, &stencil));
909 if (!rsc->stencil)
910 goto fail;
911 }
912
913 return prsc;
914 fail:
915 fd_resource_destroy(pscreen, prsc);
916 return NULL;
917 }
918
919 /**
920 * Create a texture from a winsys_handle. The handle is often created in
921 * another process by first creating a pipe texture and then calling
922 * resource_get_handle.
923 */
924 static struct pipe_resource *
925 fd_resource_from_handle(struct pipe_screen *pscreen,
926 const struct pipe_resource *tmpl,
927 struct winsys_handle *handle, unsigned usage)
928 {
929 struct fd_resource *rsc = CALLOC_STRUCT(fd_resource);
930 struct fd_resource_slice *slice = &rsc->slices[0];
931 struct pipe_resource *prsc = &rsc->base.b;
932 uint32_t pitchalign = fd_screen(pscreen)->gmem_alignw;
933
934 DBG("target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, "
935 "nr_samples=%u, usage=%u, bind=%x, flags=%x",
936 tmpl->target, util_format_name(tmpl->format),
937 tmpl->width0, tmpl->height0, tmpl->depth0,
938 tmpl->array_size, tmpl->last_level, tmpl->nr_samples,
939 tmpl->usage, tmpl->bind, tmpl->flags);
940
941 if (!rsc)
942 return NULL;
943
944 *prsc = *tmpl;
945
946 pipe_reference_init(&prsc->reference, 1);
947
948 prsc->screen = pscreen;
949
950 util_range_init(&rsc->valid_buffer_range);
951
952 rsc->bo = fd_screen_bo_from_handle(pscreen, handle);
953 if (!rsc->bo)
954 goto fail;
955
956 rsc->base.vtbl = &fd_resource_vtbl;
957 rsc->cpp = util_format_get_blocksize(tmpl->format);
958 slice->pitch = handle->stride / rsc->cpp;
959 slice->offset = handle->offset;
960 slice->size0 = handle->stride * prsc->height0;
961
962 if ((slice->pitch < align(prsc->width0, pitchalign)) ||
963 (slice->pitch & (pitchalign - 1)))
964 goto fail;
965
966 assert(rsc->cpp);
967
968 return prsc;
969
970 fail:
971 fd_resource_destroy(pscreen, prsc);
972 return NULL;
973 }
974
975 /**
976 * _copy_region using pipe (3d engine)
977 */
978 static bool
979 fd_blitter_pipe_copy_region(struct fd_context *ctx,
980 struct pipe_resource *dst,
981 unsigned dst_level,
982 unsigned dstx, unsigned dsty, unsigned dstz,
983 struct pipe_resource *src,
984 unsigned src_level,
985 const struct pipe_box *src_box)
986 {
987 /* not until we allow rendertargets to be buffers */
988 if (dst->target == PIPE_BUFFER || src->target == PIPE_BUFFER)
989 return false;
990
991 if (!util_blitter_is_copy_supported(ctx->blitter, dst, src))
992 return false;
993
994 /* TODO we could discard if dst box covers dst level fully.. */
995 fd_blitter_pipe_begin(ctx, false, false, FD_STAGE_BLIT);
996 util_blitter_copy_texture(ctx->blitter,
997 dst, dst_level, dstx, dsty, dstz,
998 src, src_level, src_box);
999 fd_blitter_pipe_end(ctx);
1000
1001 return true;
1002 }
1003
1004 /**
1005 * Copy a block of pixels from one resource to another.
1006 * The resource must be of the same format.
1007 * Resources with nr_samples > 1 are not allowed.
1008 */
1009 static void
1010 fd_resource_copy_region(struct pipe_context *pctx,
1011 struct pipe_resource *dst,
1012 unsigned dst_level,
1013 unsigned dstx, unsigned dsty, unsigned dstz,
1014 struct pipe_resource *src,
1015 unsigned src_level,
1016 const struct pipe_box *src_box)
1017 {
1018 struct fd_context *ctx = fd_context(pctx);
1019
1020 /* TODO if we have 2d core, or other DMA engine that could be used
1021 * for simple copies and reasonably easily synchronized with the 3d
1022 * core, this is where we'd plug it in..
1023 */
1024
1025 /* try blit on 3d pipe: */
1026 if (fd_blitter_pipe_copy_region(ctx,
1027 dst, dst_level, dstx, dsty, dstz,
1028 src, src_level, src_box))
1029 return;
1030
1031 /* else fallback to pure sw: */
1032 util_resource_copy_region(pctx,
1033 dst, dst_level, dstx, dsty, dstz,
1034 src, src_level, src_box);
1035 }
1036
1037 bool
1038 fd_render_condition_check(struct pipe_context *pctx)
1039 {
1040 struct fd_context *ctx = fd_context(pctx);
1041
1042 if (!ctx->cond_query)
1043 return true;
1044
1045 union pipe_query_result res = { 0 };
1046 bool wait =
1047 ctx->cond_mode != PIPE_RENDER_COND_NO_WAIT &&
1048 ctx->cond_mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
1049
1050 if (pctx->get_query_result(pctx, ctx->cond_query, wait, &res))
1051 return (bool)res.u64 != ctx->cond_cond;
1052
1053 return true;
1054 }
1055
1056 /**
1057 * Optimal hardware path for blitting pixels.
1058 * Scaling, format conversion, up- and downsampling (resolve) are allowed.
1059 */
1060 static void
1061 fd_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
1062 {
1063 struct fd_context *ctx = fd_context(pctx);
1064 struct pipe_blit_info info = *blit_info;
1065 bool discard = false;
1066
1067 if (info.src.resource->nr_samples > 1 &&
1068 info.dst.resource->nr_samples <= 1 &&
1069 !util_format_is_depth_or_stencil(info.src.resource->format) &&
1070 !util_format_is_pure_integer(info.src.resource->format)) {
1071 DBG("color resolve unimplemented");
1072 return;
1073 }
1074
1075 if (info.render_condition_enable && !fd_render_condition_check(pctx))
1076 return;
1077
1078 if (!info.scissor_enable && !info.alpha_blend) {
1079 discard = util_texrange_covers_whole_level(info.dst.resource,
1080 info.dst.level, info.dst.box.x, info.dst.box.y,
1081 info.dst.box.z, info.dst.box.width,
1082 info.dst.box.height, info.dst.box.depth);
1083 }
1084
1085 if (util_try_blit_via_copy_region(pctx, &info)) {
1086 return; /* done */
1087 }
1088
1089 if (info.mask & PIPE_MASK_S) {
1090 DBG("cannot blit stencil, skipping");
1091 info.mask &= ~PIPE_MASK_S;
1092 }
1093
1094 if (!util_blitter_is_blit_supported(ctx->blitter, &info)) {
1095 DBG("blit unsupported %s -> %s",
1096 util_format_short_name(info.src.resource->format),
1097 util_format_short_name(info.dst.resource->format));
1098 return;
1099 }
1100
1101 fd_blitter_pipe_begin(ctx, info.render_condition_enable, discard, FD_STAGE_BLIT);
1102 util_blitter_blit(ctx->blitter, &info);
1103 fd_blitter_pipe_end(ctx);
1104 }
1105
1106 void
1107 fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond, bool discard,
1108 enum fd_render_stage stage)
1109 {
1110 util_blitter_save_fragment_constant_buffer_slot(ctx->blitter,
1111 ctx->constbuf[PIPE_SHADER_FRAGMENT].cb);
1112 util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vtx.vertexbuf.vb);
1113 util_blitter_save_vertex_elements(ctx->blitter, ctx->vtx.vtx);
1114 util_blitter_save_vertex_shader(ctx->blitter, ctx->prog.vp);
1115 util_blitter_save_so_targets(ctx->blitter, ctx->streamout.num_targets,
1116 ctx->streamout.targets);
1117 util_blitter_save_rasterizer(ctx->blitter, ctx->rasterizer);
1118 util_blitter_save_viewport(ctx->blitter, &ctx->viewport);
1119 util_blitter_save_scissor(ctx->blitter, &ctx->scissor);
1120 util_blitter_save_fragment_shader(ctx->blitter, ctx->prog.fp);
1121 util_blitter_save_blend(ctx->blitter, ctx->blend);
1122 util_blitter_save_depth_stencil_alpha(ctx->blitter, ctx->zsa);
1123 util_blitter_save_stencil_ref(ctx->blitter, &ctx->stencil_ref);
1124 util_blitter_save_sample_mask(ctx->blitter, ctx->sample_mask);
1125 util_blitter_save_framebuffer(ctx->blitter,
1126 ctx->batch ? &ctx->batch->framebuffer : NULL);
1127 util_blitter_save_fragment_sampler_states(ctx->blitter,
1128 ctx->tex[PIPE_SHADER_FRAGMENT].num_samplers,
1129 (void **)ctx->tex[PIPE_SHADER_FRAGMENT].samplers);
1130 util_blitter_save_fragment_sampler_views(ctx->blitter,
1131 ctx->tex[PIPE_SHADER_FRAGMENT].num_textures,
1132 ctx->tex[PIPE_SHADER_FRAGMENT].textures);
1133 if (!render_cond)
1134 util_blitter_save_render_condition(ctx->blitter,
1135 ctx->cond_query, ctx->cond_cond, ctx->cond_mode);
1136
1137 if (ctx->batch)
1138 fd_batch_set_stage(ctx->batch, stage);
1139
1140 ctx->in_blit = discard;
1141 }
1142
1143 void
1144 fd_blitter_pipe_end(struct fd_context *ctx)
1145 {
1146 if (ctx->batch)
1147 fd_batch_set_stage(ctx->batch, FD_STAGE_NULL);
1148 ctx->in_blit = false;
1149 }
1150
1151 static void
1152 fd_flush_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
1153 {
1154 struct fd_resource *rsc = fd_resource(prsc);
1155
1156 if (rsc->write_batch)
1157 fd_batch_flush(rsc->write_batch, true);
1158
1159 assert(!rsc->write_batch);
1160 }
1161
1162 static void
1163 fd_invalidate_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
1164 {
1165 struct fd_resource *rsc = fd_resource(prsc);
1166
1167 /*
1168 * TODO I guess we could track that the resource is invalidated and
1169 * use that as a hint to realloc rather than stall in _transfer_map(),
1170 * even in the non-DISCARD_WHOLE_RESOURCE case?
1171 */
1172
1173 if (rsc->write_batch) {
1174 struct fd_batch *batch = rsc->write_batch;
1175 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1176
1177 if (pfb->zsbuf && pfb->zsbuf->texture == prsc)
1178 batch->resolve &= ~(FD_BUFFER_DEPTH | FD_BUFFER_STENCIL);
1179
1180 for (unsigned i = 0; i < pfb->nr_cbufs; i++) {
1181 if (pfb->cbufs[i] && pfb->cbufs[i]->texture == prsc) {
1182 batch->resolve &= ~(PIPE_CLEAR_COLOR0 << i);
1183 }
1184 }
1185 }
1186 }
1187
1188 void
1189 fd_resource_screen_init(struct pipe_screen *pscreen)
1190 {
1191 pscreen->resource_create = fd_resource_create;
1192 pscreen->resource_from_handle = fd_resource_from_handle;
1193 pscreen->resource_get_handle = u_resource_get_handle_vtbl;
1194 pscreen->resource_destroy = u_resource_destroy_vtbl;
1195 }
1196
1197 void
1198 fd_resource_context_init(struct pipe_context *pctx)
1199 {
1200 pctx->transfer_map = u_transfer_map_vtbl;
1201 pctx->transfer_flush_region = u_transfer_flush_region_vtbl;
1202 pctx->transfer_unmap = u_transfer_unmap_vtbl;
1203 pctx->buffer_subdata = u_default_buffer_subdata;
1204 pctx->texture_subdata = u_default_texture_subdata;
1205 pctx->create_surface = fd_create_surface;
1206 pctx->surface_destroy = fd_surface_destroy;
1207 pctx->resource_copy_region = fd_resource_copy_region;
1208 pctx->blit = fd_blit;
1209 pctx->flush_resource = fd_flush_resource;
1210 pctx->invalidate_resource = fd_invalidate_resource;
1211 }