freedreno: refactor dirty state handling
[mesa.git] / src / gallium / drivers / freedreno / freedreno_resource.c
1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
3 /*
4 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 * Rob Clark <robclark@freedesktop.org>
27 */
28
29 #include "util/u_format.h"
30 #include "util/u_format_rgtc.h"
31 #include "util/u_format_zs.h"
32 #include "util/u_inlines.h"
33 #include "util/u_transfer.h"
34 #include "util/u_string.h"
35 #include "util/u_surface.h"
36 #include "util/set.h"
37
38 #include "freedreno_resource.h"
39 #include "freedreno_batch_cache.h"
40 #include "freedreno_screen.h"
41 #include "freedreno_surface.h"
42 #include "freedreno_context.h"
43 #include "freedreno_query_hw.h"
44 #include "freedreno_util.h"
45
46 #include <errno.h>
47
48 /* XXX this should go away, needed for 'struct winsys_handle' */
49 #include "state_tracker/drm_driver.h"
50
51 static void
52 fd_invalidate_resource(struct fd_context *ctx, struct pipe_resource *prsc)
53 {
54 /* Go through the entire state and see if the resource is bound
55 * anywhere. If it is, mark the relevant state as dirty. This is called on
56 * realloc_bo.
57 */
58
59 /* VBOs */
60 for (unsigned i = 0; i < ctx->vtx.vertexbuf.count && !(ctx->dirty & FD_DIRTY_VTXBUF); i++) {
61 if (ctx->vtx.vertexbuf.vb[i].buffer == prsc)
62 ctx->dirty |= FD_DIRTY_VTXBUF;
63 }
64
65 /* Index buffer */
66 if (ctx->indexbuf.buffer == prsc)
67 ctx->dirty |= FD_DIRTY_INDEXBUF;
68
69 /* per-shader-stage resources: */
70 for (unsigned stage = 0; stage < PIPE_SHADER_TYPES; stage++) {
71 /* Constbufs.. note that constbuf[0] is normal uniforms emitted in
72 * cmdstream rather than by pointer..
73 */
74 const unsigned num_ubos = util_last_bit(ctx->constbuf[stage].enabled_mask);
75 for (unsigned i = 1; i < num_ubos; i++) {
76 if (ctx->dirty_shader[stage] & FD_DIRTY_SHADER_CONST)
77 break;
78 if (ctx->constbuf[stage].cb[i].buffer == prsc)
79 ctx->dirty_shader[stage] |= FD_DIRTY_SHADER_CONST;
80 }
81
82 /* Textures */
83 for (unsigned i = 0; i < ctx->tex[stage].num_textures; i++) {
84 if (ctx->dirty_shader[stage] & FD_DIRTY_SHADER_TEX)
85 break;
86 if (ctx->tex[stage].textures[i] && (ctx->tex[stage].textures[i]->texture == prsc))
87 ctx->dirty_shader[stage] |= FD_DIRTY_SHADER_TEX;
88 }
89 }
90 }
91
92 static void
93 realloc_bo(struct fd_resource *rsc, uint32_t size)
94 {
95 struct fd_screen *screen = fd_screen(rsc->base.b.screen);
96 uint32_t flags = DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
97 DRM_FREEDRENO_GEM_TYPE_KMEM; /* TODO */
98
99 /* if we start using things other than write-combine,
100 * be sure to check for PIPE_RESOURCE_FLAG_MAP_COHERENT
101 */
102
103 if (rsc->bo)
104 fd_bo_del(rsc->bo);
105
106 rsc->bo = fd_bo_new(screen->dev, size, flags);
107 rsc->timestamp = 0;
108 util_range_set_empty(&rsc->valid_buffer_range);
109 fd_bc_invalidate_resource(rsc, true);
110 }
111
112 static void
113 do_blit(struct fd_context *ctx, const struct pipe_blit_info *blit, bool fallback)
114 {
115 /* TODO size threshold too?? */
116 if ((blit->src.resource->target != PIPE_BUFFER) && !fallback) {
117 /* do blit on gpu: */
118 fd_blitter_pipe_begin(ctx, false, true, FD_STAGE_BLIT);
119 util_blitter_blit(ctx->blitter, blit);
120 fd_blitter_pipe_end(ctx);
121 } else {
122 /* do blit on cpu: */
123 util_resource_copy_region(&ctx->base,
124 blit->dst.resource, blit->dst.level, blit->dst.box.x,
125 blit->dst.box.y, blit->dst.box.z,
126 blit->src.resource, blit->src.level, &blit->src.box);
127 }
128 }
129
130 static bool
131 fd_try_shadow_resource(struct fd_context *ctx, struct fd_resource *rsc,
132 unsigned level, unsigned usage, const struct pipe_box *box)
133 {
134 struct pipe_context *pctx = &ctx->base;
135 struct pipe_resource *prsc = &rsc->base.b;
136 bool fallback = false;
137
138 if (prsc->next)
139 return false;
140
141 /* TODO: somehow munge dimensions and format to copy unsupported
142 * render target format to something that is supported?
143 */
144 if (!pctx->screen->is_format_supported(pctx->screen,
145 prsc->format, prsc->target, prsc->nr_samples,
146 PIPE_BIND_RENDER_TARGET))
147 fallback = true;
148
149 /* these cases should be handled elsewhere.. just for future
150 * reference in case this gets split into a more generic(ish)
151 * helper.
152 */
153 debug_assert(!(usage & PIPE_TRANSFER_READ));
154 debug_assert(!(usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE));
155
156 /* if we do a gpu blit to clone the whole resource, we'll just
157 * end up stalling on that.. so only allow if we can discard
158 * current range (and blit, possibly cpu or gpu, the rest)
159 */
160 if (!(usage & PIPE_TRANSFER_DISCARD_RANGE))
161 return false;
162
163 bool whole_level = util_texrange_covers_whole_level(prsc, level,
164 box->x, box->y, box->z, box->width, box->height, box->depth);
165
166 /* TODO need to be more clever about current level */
167 if ((prsc->target >= PIPE_TEXTURE_2D) && !whole_level)
168 return false;
169
170 struct pipe_resource *pshadow =
171 pctx->screen->resource_create(pctx->screen, prsc);
172
173 if (!pshadow)
174 return false;
175
176 assert(!ctx->in_shadow);
177 ctx->in_shadow = true;
178
179 /* get rid of any references that batch-cache might have to us (which
180 * should empty/destroy rsc->batches hashset)
181 */
182 fd_bc_invalidate_resource(rsc, false);
183
184 mtx_lock(&ctx->screen->lock);
185
186 /* Swap the backing bo's, so shadow becomes the old buffer,
187 * blit from shadow to new buffer. From here on out, we
188 * cannot fail.
189 *
190 * Note that we need to do it in this order, otherwise if
191 * we go down cpu blit path, the recursive transfer_map()
192 * sees the wrong status..
193 */
194 struct fd_resource *shadow = fd_resource(pshadow);
195
196 DBG("shadow: %p (%d) -> %p (%d)\n", rsc, rsc->base.b.reference.count,
197 shadow, shadow->base.b.reference.count);
198
199 /* TODO valid_buffer_range?? */
200 swap(rsc->bo, shadow->bo);
201 swap(rsc->timestamp, shadow->timestamp);
202 swap(rsc->write_batch, shadow->write_batch);
203
204 /* at this point, the newly created shadow buffer is not referenced
205 * by any batches, but the existing rsc (probably) is. We need to
206 * transfer those references over:
207 */
208 debug_assert(shadow->batch_mask == 0);
209 struct fd_batch *batch;
210 foreach_batch(batch, &ctx->screen->batch_cache, rsc->batch_mask) {
211 struct set_entry *entry = _mesa_set_search(batch->resources, rsc);
212 _mesa_set_remove(batch->resources, entry);
213 _mesa_set_add(batch->resources, shadow);
214 }
215 swap(rsc->batch_mask, shadow->batch_mask);
216
217 mtx_unlock(&ctx->screen->lock);
218
219 struct pipe_blit_info blit = {0};
220 blit.dst.resource = prsc;
221 blit.dst.format = prsc->format;
222 blit.src.resource = pshadow;
223 blit.src.format = pshadow->format;
224 blit.mask = util_format_get_mask(prsc->format);
225 blit.filter = PIPE_TEX_FILTER_NEAREST;
226
227 #define set_box(field, val) do { \
228 blit.dst.field = (val); \
229 blit.src.field = (val); \
230 } while (0)
231
232 /* blit the other levels in their entirety: */
233 for (unsigned l = 0; l <= prsc->last_level; l++) {
234 if (l == level)
235 continue;
236
237 /* just blit whole level: */
238 set_box(level, l);
239 set_box(box.width, u_minify(prsc->width0, l));
240 set_box(box.height, u_minify(prsc->height0, l));
241 set_box(box.depth, u_minify(prsc->depth0, l));
242
243 do_blit(ctx, &blit, fallback);
244 }
245
246 /* deal w/ current level specially, since we might need to split
247 * it up into a couple blits:
248 */
249 if (!whole_level) {
250 set_box(level, level);
251
252 switch (prsc->target) {
253 case PIPE_BUFFER:
254 case PIPE_TEXTURE_1D:
255 set_box(box.y, 0);
256 set_box(box.z, 0);
257 set_box(box.height, 1);
258 set_box(box.depth, 1);
259
260 if (box->x > 0) {
261 set_box(box.x, 0);
262 set_box(box.width, box->x);
263
264 do_blit(ctx, &blit, fallback);
265 }
266 if ((box->x + box->width) < u_minify(prsc->width0, level)) {
267 set_box(box.x, box->x + box->width);
268 set_box(box.width, u_minify(prsc->width0, level) - (box->x + box->width));
269
270 do_blit(ctx, &blit, fallback);
271 }
272 break;
273 case PIPE_TEXTURE_2D:
274 /* TODO */
275 default:
276 unreachable("TODO");
277 }
278 }
279
280 ctx->in_shadow = false;
281
282 pipe_resource_reference(&pshadow, NULL);
283
284 return true;
285 }
286
287 static unsigned
288 fd_resource_layer_offset(struct fd_resource *rsc,
289 struct fd_resource_slice *slice,
290 unsigned layer)
291 {
292 if (rsc->layer_first)
293 return layer * rsc->layer_size;
294 else
295 return layer * slice->size0;
296 }
297
298 static void
299 fd_resource_flush_z32s8(struct fd_transfer *trans, const struct pipe_box *box)
300 {
301 struct fd_resource *rsc = fd_resource(trans->base.resource);
302 struct fd_resource_slice *slice = fd_resource_slice(rsc, trans->base.level);
303 struct fd_resource_slice *sslice = fd_resource_slice(rsc->stencil, trans->base.level);
304 enum pipe_format format = trans->base.resource->format;
305
306 float *depth = fd_bo_map(rsc->bo) + slice->offset +
307 fd_resource_layer_offset(rsc, slice, trans->base.box.z) +
308 (trans->base.box.y + box->y) * slice->pitch * 4 + (trans->base.box.x + box->x) * 4;
309 uint8_t *stencil = fd_bo_map(rsc->stencil->bo) + sslice->offset +
310 fd_resource_layer_offset(rsc->stencil, sslice, trans->base.box.z) +
311 (trans->base.box.y + box->y) * sslice->pitch + trans->base.box.x + box->x;
312
313 if (format != PIPE_FORMAT_X32_S8X24_UINT)
314 util_format_z32_float_s8x24_uint_unpack_z_float(
315 depth, slice->pitch * 4,
316 trans->staging, trans->base.stride,
317 box->width, box->height);
318
319 util_format_z32_float_s8x24_uint_unpack_s_8uint(
320 stencil, sslice->pitch,
321 trans->staging, trans->base.stride,
322 box->width, box->height);
323 }
324
325 static void
326 fd_resource_flush_rgtc(struct fd_transfer *trans, const struct pipe_box *box)
327 {
328 struct fd_resource *rsc = fd_resource(trans->base.resource);
329 struct fd_resource_slice *slice = fd_resource_slice(rsc, trans->base.level);
330 enum pipe_format format = trans->base.resource->format;
331
332 uint8_t *data = fd_bo_map(rsc->bo) + slice->offset +
333 fd_resource_layer_offset(rsc, slice, trans->base.box.z) +
334 ((trans->base.box.y + box->y) * slice->pitch +
335 trans->base.box.x + box->x) * rsc->cpp;
336
337 uint8_t *source = trans->staging +
338 util_format_get_nblocksy(format, box->y) * trans->base.stride +
339 util_format_get_stride(format, box->x);
340
341 switch (format) {
342 case PIPE_FORMAT_RGTC1_UNORM:
343 case PIPE_FORMAT_RGTC1_SNORM:
344 case PIPE_FORMAT_LATC1_UNORM:
345 case PIPE_FORMAT_LATC1_SNORM:
346 util_format_rgtc1_unorm_unpack_rgba_8unorm(
347 data, slice->pitch * rsc->cpp,
348 source, trans->base.stride,
349 box->width, box->height);
350 break;
351 case PIPE_FORMAT_RGTC2_UNORM:
352 case PIPE_FORMAT_RGTC2_SNORM:
353 case PIPE_FORMAT_LATC2_UNORM:
354 case PIPE_FORMAT_LATC2_SNORM:
355 util_format_rgtc2_unorm_unpack_rgba_8unorm(
356 data, slice->pitch * rsc->cpp,
357 source, trans->base.stride,
358 box->width, box->height);
359 break;
360 default:
361 assert(!"Unexpected format\n");
362 break;
363 }
364 }
365
366 static void
367 fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box)
368 {
369 enum pipe_format format = trans->base.resource->format;
370
371 switch (format) {
372 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
373 case PIPE_FORMAT_X32_S8X24_UINT:
374 fd_resource_flush_z32s8(trans, box);
375 break;
376 case PIPE_FORMAT_RGTC1_UNORM:
377 case PIPE_FORMAT_RGTC1_SNORM:
378 case PIPE_FORMAT_RGTC2_UNORM:
379 case PIPE_FORMAT_RGTC2_SNORM:
380 case PIPE_FORMAT_LATC1_UNORM:
381 case PIPE_FORMAT_LATC1_SNORM:
382 case PIPE_FORMAT_LATC2_UNORM:
383 case PIPE_FORMAT_LATC2_SNORM:
384 fd_resource_flush_rgtc(trans, box);
385 break;
386 default:
387 assert(!"Unexpected staging transfer type");
388 break;
389 }
390 }
391
392 static void fd_resource_transfer_flush_region(struct pipe_context *pctx,
393 struct pipe_transfer *ptrans,
394 const struct pipe_box *box)
395 {
396 struct fd_resource *rsc = fd_resource(ptrans->resource);
397 struct fd_transfer *trans = fd_transfer(ptrans);
398
399 if (ptrans->resource->target == PIPE_BUFFER)
400 util_range_add(&rsc->valid_buffer_range,
401 ptrans->box.x + box->x,
402 ptrans->box.x + box->x + box->width);
403
404 if (trans->staging)
405 fd_resource_flush(trans, box);
406 }
407
408 static void
409 fd_resource_transfer_unmap(struct pipe_context *pctx,
410 struct pipe_transfer *ptrans)
411 {
412 struct fd_context *ctx = fd_context(pctx);
413 struct fd_resource *rsc = fd_resource(ptrans->resource);
414 struct fd_transfer *trans = fd_transfer(ptrans);
415
416 if (trans->staging && !(ptrans->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) {
417 struct pipe_box box;
418 u_box_2d(0, 0, ptrans->box.width, ptrans->box.height, &box);
419 fd_resource_flush(trans, &box);
420 }
421
422 if (!(ptrans->usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
423 fd_bo_cpu_fini(rsc->bo);
424 if (rsc->stencil)
425 fd_bo_cpu_fini(rsc->stencil->bo);
426 }
427
428 util_range_add(&rsc->valid_buffer_range,
429 ptrans->box.x,
430 ptrans->box.x + ptrans->box.width);
431
432 pipe_resource_reference(&ptrans->resource, NULL);
433 slab_free(&ctx->transfer_pool, ptrans);
434
435 free(trans->staging);
436 }
437
438 static void *
439 fd_resource_transfer_map(struct pipe_context *pctx,
440 struct pipe_resource *prsc,
441 unsigned level, unsigned usage,
442 const struct pipe_box *box,
443 struct pipe_transfer **pptrans)
444 {
445 struct fd_context *ctx = fd_context(pctx);
446 struct fd_resource *rsc = fd_resource(prsc);
447 struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
448 struct fd_transfer *trans;
449 struct pipe_transfer *ptrans;
450 enum pipe_format format = prsc->format;
451 uint32_t op = 0;
452 uint32_t offset;
453 char *buf;
454 int ret = 0;
455
456 DBG("prsc=%p, level=%u, usage=%x, box=%dx%d+%d,%d", prsc, level, usage,
457 box->width, box->height, box->x, box->y);
458
459 ptrans = slab_alloc(&ctx->transfer_pool);
460 if (!ptrans)
461 return NULL;
462
463 /* slab_alloc_st() doesn't zero: */
464 trans = fd_transfer(ptrans);
465 memset(trans, 0, sizeof(*trans));
466
467 pipe_resource_reference(&ptrans->resource, prsc);
468 ptrans->level = level;
469 ptrans->usage = usage;
470 ptrans->box = *box;
471 ptrans->stride = util_format_get_nblocksx(format, slice->pitch) * rsc->cpp;
472 ptrans->layer_stride = rsc->layer_first ? rsc->layer_size : slice->size0;
473
474 if (ctx->in_shadow && !(usage & PIPE_TRANSFER_READ))
475 usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
476
477 if (usage & PIPE_TRANSFER_READ)
478 op |= DRM_FREEDRENO_PREP_READ;
479
480 if (usage & PIPE_TRANSFER_WRITE)
481 op |= DRM_FREEDRENO_PREP_WRITE;
482
483 if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
484 realloc_bo(rsc, fd_bo_size(rsc->bo));
485 if (rsc->stencil)
486 realloc_bo(rsc->stencil, fd_bo_size(rsc->stencil->bo));
487 fd_invalidate_resource(ctx, prsc);
488 } else if ((usage & PIPE_TRANSFER_WRITE) &&
489 prsc->target == PIPE_BUFFER &&
490 !util_ranges_intersect(&rsc->valid_buffer_range,
491 box->x, box->x + box->width)) {
492 /* We are trying to write to a previously uninitialized range. No need
493 * to wait.
494 */
495 } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
496 struct fd_batch *write_batch = NULL;
497
498 /* hold a reference, so it doesn't disappear under us: */
499 fd_batch_reference(&write_batch, rsc->write_batch);
500
501 if ((usage & PIPE_TRANSFER_WRITE) && write_batch &&
502 write_batch->back_blit) {
503 /* if only thing pending is a back-blit, we can discard it: */
504 fd_batch_reset(write_batch);
505 }
506
507 /* If the GPU is writing to the resource, or if it is reading from the
508 * resource and we're trying to write to it, flush the renders.
509 */
510 bool needs_flush = pending(rsc, !!(usage & PIPE_TRANSFER_WRITE));
511 bool busy = needs_flush || (0 != fd_bo_cpu_prep(rsc->bo,
512 ctx->screen->pipe, op | DRM_FREEDRENO_PREP_NOSYNC));
513
514 /* if we need to flush/stall, see if we can make a shadow buffer
515 * to avoid this:
516 *
517 * TODO we could go down this path !reorder && !busy_for_read
518 * ie. we only *don't* want to go down this path if the blit
519 * will trigger a flush!
520 */
521 if (ctx->screen->reorder && busy && !(usage & PIPE_TRANSFER_READ)) {
522 if (fd_try_shadow_resource(ctx, rsc, level, usage, box)) {
523 needs_flush = busy = false;
524 fd_invalidate_resource(ctx, prsc);
525 }
526 }
527
528 if (needs_flush) {
529 if (usage & PIPE_TRANSFER_WRITE) {
530 struct fd_batch *batch, *last_batch = NULL;
531 foreach_batch(batch, &ctx->screen->batch_cache, rsc->batch_mask) {
532 fd_batch_reference(&last_batch, batch);
533 fd_batch_flush(batch, false);
534 }
535 if (last_batch) {
536 fd_batch_sync(last_batch);
537 fd_batch_reference(&last_batch, NULL);
538 }
539 assert(rsc->batch_mask == 0);
540 } else {
541 fd_batch_flush(write_batch, true);
542 }
543 assert(!rsc->write_batch);
544 }
545
546 fd_batch_reference(&write_batch, NULL);
547
548 /* The GPU keeps track of how the various bo's are being used, and
549 * will wait if necessary for the proper operation to have
550 * completed.
551 */
552 if (busy) {
553 ret = fd_bo_cpu_prep(rsc->bo, ctx->screen->pipe, op);
554 if (ret)
555 goto fail;
556 }
557 }
558
559 buf = fd_bo_map(rsc->bo);
560 if (!buf)
561 goto fail;
562
563 offset = slice->offset +
564 box->y / util_format_get_blockheight(format) * ptrans->stride +
565 box->x / util_format_get_blockwidth(format) * rsc->cpp +
566 fd_resource_layer_offset(rsc, slice, box->z);
567
568 if (prsc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ||
569 prsc->format == PIPE_FORMAT_X32_S8X24_UINT) {
570 assert(trans->base.box.depth == 1);
571
572 trans->base.stride = trans->base.box.width * rsc->cpp * 2;
573 trans->staging = malloc(trans->base.stride * trans->base.box.height);
574 if (!trans->staging)
575 goto fail;
576
577 /* if we're not discarding the whole range (or resource), we must copy
578 * the real data in.
579 */
580 if (!(usage & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
581 PIPE_TRANSFER_DISCARD_RANGE))) {
582 struct fd_resource_slice *sslice =
583 fd_resource_slice(rsc->stencil, level);
584 void *sbuf = fd_bo_map(rsc->stencil->bo);
585 if (!sbuf)
586 goto fail;
587
588 float *depth = (float *)(buf + slice->offset +
589 fd_resource_layer_offset(rsc, slice, box->z) +
590 box->y * slice->pitch * 4 + box->x * 4);
591 uint8_t *stencil = sbuf + sslice->offset +
592 fd_resource_layer_offset(rsc->stencil, sslice, box->z) +
593 box->y * sslice->pitch + box->x;
594
595 if (format != PIPE_FORMAT_X32_S8X24_UINT)
596 util_format_z32_float_s8x24_uint_pack_z_float(
597 trans->staging, trans->base.stride,
598 depth, slice->pitch * 4,
599 box->width, box->height);
600
601 util_format_z32_float_s8x24_uint_pack_s_8uint(
602 trans->staging, trans->base.stride,
603 stencil, sslice->pitch,
604 box->width, box->height);
605 }
606
607 buf = trans->staging;
608 offset = 0;
609 } else if (rsc->internal_format != format &&
610 util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC) {
611 assert(trans->base.box.depth == 1);
612
613 trans->base.stride = util_format_get_stride(
614 format, trans->base.box.width);
615 trans->staging = malloc(
616 util_format_get_2d_size(format, trans->base.stride,
617 trans->base.box.height));
618 if (!trans->staging)
619 goto fail;
620
621 /* if we're not discarding the whole range (or resource), we must copy
622 * the real data in.
623 */
624 if (!(usage & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
625 PIPE_TRANSFER_DISCARD_RANGE))) {
626 uint8_t *rgba8 = (uint8_t *)buf + slice->offset +
627 fd_resource_layer_offset(rsc, slice, box->z) +
628 box->y * slice->pitch * rsc->cpp + box->x * rsc->cpp;
629
630 switch (format) {
631 case PIPE_FORMAT_RGTC1_UNORM:
632 case PIPE_FORMAT_RGTC1_SNORM:
633 case PIPE_FORMAT_LATC1_UNORM:
634 case PIPE_FORMAT_LATC1_SNORM:
635 util_format_rgtc1_unorm_pack_rgba_8unorm(
636 trans->staging, trans->base.stride,
637 rgba8, slice->pitch * rsc->cpp,
638 box->width, box->height);
639 break;
640 case PIPE_FORMAT_RGTC2_UNORM:
641 case PIPE_FORMAT_RGTC2_SNORM:
642 case PIPE_FORMAT_LATC2_UNORM:
643 case PIPE_FORMAT_LATC2_SNORM:
644 util_format_rgtc2_unorm_pack_rgba_8unorm(
645 trans->staging, trans->base.stride,
646 rgba8, slice->pitch * rsc->cpp,
647 box->width, box->height);
648 break;
649 default:
650 assert(!"Unexpected format");
651 break;
652 }
653 }
654
655 buf = trans->staging;
656 offset = 0;
657 }
658
659 *pptrans = ptrans;
660
661 return buf + offset;
662
663 fail:
664 fd_resource_transfer_unmap(pctx, ptrans);
665 return NULL;
666 }
667
668 static void
669 fd_resource_destroy(struct pipe_screen *pscreen,
670 struct pipe_resource *prsc)
671 {
672 struct fd_resource *rsc = fd_resource(prsc);
673 fd_bc_invalidate_resource(rsc, true);
674 if (rsc->bo)
675 fd_bo_del(rsc->bo);
676 util_range_destroy(&rsc->valid_buffer_range);
677 FREE(rsc);
678 }
679
680 static boolean
681 fd_resource_get_handle(struct pipe_screen *pscreen,
682 struct pipe_resource *prsc,
683 struct winsys_handle *handle)
684 {
685 struct fd_resource *rsc = fd_resource(prsc);
686
687 return fd_screen_bo_get_handle(pscreen, rsc->bo,
688 rsc->slices[0].pitch * rsc->cpp, handle);
689 }
690
691
692 static const struct u_resource_vtbl fd_resource_vtbl = {
693 .resource_get_handle = fd_resource_get_handle,
694 .resource_destroy = fd_resource_destroy,
695 .transfer_map = fd_resource_transfer_map,
696 .transfer_flush_region = fd_resource_transfer_flush_region,
697 .transfer_unmap = fd_resource_transfer_unmap,
698 };
699
700 static uint32_t
701 setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format format)
702 {
703 struct pipe_resource *prsc = &rsc->base.b;
704 enum util_format_layout layout = util_format_description(format)->layout;
705 uint32_t pitchalign = fd_screen(prsc->screen)->gmem_alignw;
706 uint32_t level, size = 0;
707 uint32_t width = prsc->width0;
708 uint32_t height = prsc->height0;
709 uint32_t depth = prsc->depth0;
710 /* in layer_first layout, the level (slice) contains just one
711 * layer (since in fact the layer contains the slices)
712 */
713 uint32_t layers_in_level = rsc->layer_first ? 1 : prsc->array_size;
714
715 for (level = 0; level <= prsc->last_level; level++) {
716 struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
717 uint32_t blocks;
718
719 if (layout == UTIL_FORMAT_LAYOUT_ASTC)
720 slice->pitch = width =
721 util_align_npot(width, pitchalign * util_format_get_blockwidth(format));
722 else
723 slice->pitch = width = align(width, pitchalign);
724 slice->offset = size;
725 blocks = util_format_get_nblocks(format, width, height);
726 /* 1d array and 2d array textures must all have the same layer size
727 * for each miplevel on a3xx. 3d textures can have different layer
728 * sizes for high levels, but the hw auto-sizer is buggy (or at least
729 * different than what this code does), so as soon as the layer size
730 * range gets into range, we stop reducing it.
731 */
732 if (prsc->target == PIPE_TEXTURE_3D && (
733 level == 1 ||
734 (level > 1 && rsc->slices[level - 1].size0 > 0xf000)))
735 slice->size0 = align(blocks * rsc->cpp, alignment);
736 else if (level == 0 || rsc->layer_first || alignment == 1)
737 slice->size0 = align(blocks * rsc->cpp, alignment);
738 else
739 slice->size0 = rsc->slices[level - 1].size0;
740
741 size += slice->size0 * depth * layers_in_level;
742
743 width = u_minify(width, 1);
744 height = u_minify(height, 1);
745 depth = u_minify(depth, 1);
746 }
747
748 return size;
749 }
750
751 static uint32_t
752 slice_alignment(struct pipe_screen *pscreen, const struct pipe_resource *tmpl)
753 {
754 /* on a3xx, 2d array and 3d textures seem to want their
755 * layers aligned to page boundaries:
756 */
757 switch (tmpl->target) {
758 case PIPE_TEXTURE_3D:
759 case PIPE_TEXTURE_1D_ARRAY:
760 case PIPE_TEXTURE_2D_ARRAY:
761 return 4096;
762 default:
763 return 1;
764 }
765 }
766
767 /* special case to resize query buf after allocated.. */
768 void
769 fd_resource_resize(struct pipe_resource *prsc, uint32_t sz)
770 {
771 struct fd_resource *rsc = fd_resource(prsc);
772
773 debug_assert(prsc->width0 == 0);
774 debug_assert(prsc->target == PIPE_BUFFER);
775 debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER);
776
777 prsc->width0 = sz;
778 realloc_bo(rsc, setup_slices(rsc, 1, prsc->format));
779 }
780
781 /**
782 * Create a new texture object, using the given template info.
783 */
784 static struct pipe_resource *
785 fd_resource_create(struct pipe_screen *pscreen,
786 const struct pipe_resource *tmpl)
787 {
788 struct fd_resource *rsc = CALLOC_STRUCT(fd_resource);
789 struct pipe_resource *prsc = &rsc->base.b;
790 enum pipe_format format = tmpl->format;
791 uint32_t size, alignment;
792
793 DBG("%p: target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, "
794 "nr_samples=%u, usage=%u, bind=%x, flags=%x", prsc,
795 tmpl->target, util_format_name(format),
796 tmpl->width0, tmpl->height0, tmpl->depth0,
797 tmpl->array_size, tmpl->last_level, tmpl->nr_samples,
798 tmpl->usage, tmpl->bind, tmpl->flags);
799
800 if (!rsc)
801 return NULL;
802
803 *prsc = *tmpl;
804
805 pipe_reference_init(&prsc->reference, 1);
806
807 prsc->screen = pscreen;
808
809 util_range_init(&rsc->valid_buffer_range);
810
811 rsc->base.vtbl = &fd_resource_vtbl;
812
813 if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
814 format = PIPE_FORMAT_Z32_FLOAT;
815 else if (fd_screen(pscreen)->gpu_id < 400 &&
816 util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC)
817 format = PIPE_FORMAT_R8G8B8A8_UNORM;
818 rsc->internal_format = format;
819 rsc->cpp = util_format_get_blocksize(format);
820
821 assert(rsc->cpp);
822
823 alignment = slice_alignment(pscreen, tmpl);
824 if (is_a4xx(fd_screen(pscreen)) || is_a5xx(fd_screen(pscreen))) {
825 switch (tmpl->target) {
826 case PIPE_TEXTURE_3D:
827 rsc->layer_first = false;
828 break;
829 default:
830 rsc->layer_first = true;
831 alignment = 1;
832 break;
833 }
834 }
835
836 size = setup_slices(rsc, alignment, format);
837
838 /* special case for hw-query buffer, which we need to allocate before we
839 * know the size:
840 */
841 if (size == 0) {
842 /* note, semi-intention == instead of & */
843 debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER);
844 return prsc;
845 }
846
847 if (rsc->layer_first) {
848 rsc->layer_size = align(size, 4096);
849 size = rsc->layer_size * prsc->array_size;
850 }
851
852 realloc_bo(rsc, size);
853 if (!rsc->bo)
854 goto fail;
855
856 /* There is no native Z32F_S8 sampling or rendering format, so this must
857 * be emulated via two separate textures. The depth texture still keeps
858 * its Z32F_S8 format though, and we also keep a reference to a separate
859 * S8 texture.
860 */
861 if (tmpl->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
862 struct pipe_resource stencil = *tmpl;
863 stencil.format = PIPE_FORMAT_S8_UINT;
864 rsc->stencil = fd_resource(fd_resource_create(pscreen, &stencil));
865 if (!rsc->stencil)
866 goto fail;
867 }
868
869 return prsc;
870 fail:
871 fd_resource_destroy(pscreen, prsc);
872 return NULL;
873 }
874
875 /**
876 * Create a texture from a winsys_handle. The handle is often created in
877 * another process by first creating a pipe texture and then calling
878 * resource_get_handle.
879 */
880 static struct pipe_resource *
881 fd_resource_from_handle(struct pipe_screen *pscreen,
882 const struct pipe_resource *tmpl,
883 struct winsys_handle *handle, unsigned usage)
884 {
885 struct fd_resource *rsc = CALLOC_STRUCT(fd_resource);
886 struct fd_resource_slice *slice = &rsc->slices[0];
887 struct pipe_resource *prsc = &rsc->base.b;
888 uint32_t pitchalign = fd_screen(pscreen)->gmem_alignw;
889
890 DBG("target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, "
891 "nr_samples=%u, usage=%u, bind=%x, flags=%x",
892 tmpl->target, util_format_name(tmpl->format),
893 tmpl->width0, tmpl->height0, tmpl->depth0,
894 tmpl->array_size, tmpl->last_level, tmpl->nr_samples,
895 tmpl->usage, tmpl->bind, tmpl->flags);
896
897 if (!rsc)
898 return NULL;
899
900 *prsc = *tmpl;
901
902 pipe_reference_init(&prsc->reference, 1);
903
904 prsc->screen = pscreen;
905
906 util_range_init(&rsc->valid_buffer_range);
907
908 rsc->bo = fd_screen_bo_from_handle(pscreen, handle);
909 if (!rsc->bo)
910 goto fail;
911
912 rsc->base.vtbl = &fd_resource_vtbl;
913 rsc->cpp = util_format_get_blocksize(tmpl->format);
914 slice->pitch = handle->stride / rsc->cpp;
915 slice->offset = handle->offset;
916 slice->size0 = handle->stride * prsc->height0;
917
918 if ((slice->pitch < align(prsc->width0, pitchalign)) ||
919 (slice->pitch & (pitchalign - 1)))
920 goto fail;
921
922 assert(rsc->cpp);
923
924 return prsc;
925
926 fail:
927 fd_resource_destroy(pscreen, prsc);
928 return NULL;
929 }
930
931 /**
932 * _copy_region using pipe (3d engine)
933 */
934 static bool
935 fd_blitter_pipe_copy_region(struct fd_context *ctx,
936 struct pipe_resource *dst,
937 unsigned dst_level,
938 unsigned dstx, unsigned dsty, unsigned dstz,
939 struct pipe_resource *src,
940 unsigned src_level,
941 const struct pipe_box *src_box)
942 {
943 /* not until we allow rendertargets to be buffers */
944 if (dst->target == PIPE_BUFFER || src->target == PIPE_BUFFER)
945 return false;
946
947 if (!util_blitter_is_copy_supported(ctx->blitter, dst, src))
948 return false;
949
950 /* TODO we could discard if dst box covers dst level fully.. */
951 fd_blitter_pipe_begin(ctx, false, false, FD_STAGE_BLIT);
952 util_blitter_copy_texture(ctx->blitter,
953 dst, dst_level, dstx, dsty, dstz,
954 src, src_level, src_box);
955 fd_blitter_pipe_end(ctx);
956
957 return true;
958 }
959
960 /**
961 * Copy a block of pixels from one resource to another.
962 * The resource must be of the same format.
963 * Resources with nr_samples > 1 are not allowed.
964 */
965 static void
966 fd_resource_copy_region(struct pipe_context *pctx,
967 struct pipe_resource *dst,
968 unsigned dst_level,
969 unsigned dstx, unsigned dsty, unsigned dstz,
970 struct pipe_resource *src,
971 unsigned src_level,
972 const struct pipe_box *src_box)
973 {
974 struct fd_context *ctx = fd_context(pctx);
975
976 /* TODO if we have 2d core, or other DMA engine that could be used
977 * for simple copies and reasonably easily synchronized with the 3d
978 * core, this is where we'd plug it in..
979 */
980
981 /* try blit on 3d pipe: */
982 if (fd_blitter_pipe_copy_region(ctx,
983 dst, dst_level, dstx, dsty, dstz,
984 src, src_level, src_box))
985 return;
986
987 /* else fallback to pure sw: */
988 util_resource_copy_region(pctx,
989 dst, dst_level, dstx, dsty, dstz,
990 src, src_level, src_box);
991 }
992
993 bool
994 fd_render_condition_check(struct pipe_context *pctx)
995 {
996 struct fd_context *ctx = fd_context(pctx);
997
998 if (!ctx->cond_query)
999 return true;
1000
1001 union pipe_query_result res = { 0 };
1002 bool wait =
1003 ctx->cond_mode != PIPE_RENDER_COND_NO_WAIT &&
1004 ctx->cond_mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
1005
1006 if (pctx->get_query_result(pctx, ctx->cond_query, wait, &res))
1007 return (bool)res.u64 != ctx->cond_cond;
1008
1009 return true;
1010 }
1011
1012 /**
1013 * Optimal hardware path for blitting pixels.
1014 * Scaling, format conversion, up- and downsampling (resolve) are allowed.
1015 */
1016 static void
1017 fd_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
1018 {
1019 struct fd_context *ctx = fd_context(pctx);
1020 struct pipe_blit_info info = *blit_info;
1021 bool discard = false;
1022
1023 if (info.src.resource->nr_samples > 1 &&
1024 info.dst.resource->nr_samples <= 1 &&
1025 !util_format_is_depth_or_stencil(info.src.resource->format) &&
1026 !util_format_is_pure_integer(info.src.resource->format)) {
1027 DBG("color resolve unimplemented");
1028 return;
1029 }
1030
1031 if (info.render_condition_enable && !fd_render_condition_check(pctx))
1032 return;
1033
1034 if (!info.scissor_enable && !info.alpha_blend) {
1035 discard = util_texrange_covers_whole_level(info.dst.resource,
1036 info.dst.level, info.dst.box.x, info.dst.box.y,
1037 info.dst.box.z, info.dst.box.width,
1038 info.dst.box.height, info.dst.box.depth);
1039 }
1040
1041 if (util_try_blit_via_copy_region(pctx, &info)) {
1042 return; /* done */
1043 }
1044
1045 if (info.mask & PIPE_MASK_S) {
1046 DBG("cannot blit stencil, skipping");
1047 info.mask &= ~PIPE_MASK_S;
1048 }
1049
1050 if (!util_blitter_is_blit_supported(ctx->blitter, &info)) {
1051 DBG("blit unsupported %s -> %s",
1052 util_format_short_name(info.src.resource->format),
1053 util_format_short_name(info.dst.resource->format));
1054 return;
1055 }
1056
1057 fd_blitter_pipe_begin(ctx, info.render_condition_enable, discard, FD_STAGE_BLIT);
1058 util_blitter_blit(ctx->blitter, &info);
1059 fd_blitter_pipe_end(ctx);
1060 }
1061
1062 void
1063 fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond, bool discard,
1064 enum fd_render_stage stage)
1065 {
1066 util_blitter_save_fragment_constant_buffer_slot(ctx->blitter,
1067 ctx->constbuf[PIPE_SHADER_FRAGMENT].cb);
1068 util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vtx.vertexbuf.vb);
1069 util_blitter_save_vertex_elements(ctx->blitter, ctx->vtx.vtx);
1070 util_blitter_save_vertex_shader(ctx->blitter, ctx->prog.vp);
1071 util_blitter_save_so_targets(ctx->blitter, ctx->streamout.num_targets,
1072 ctx->streamout.targets);
1073 util_blitter_save_rasterizer(ctx->blitter, ctx->rasterizer);
1074 util_blitter_save_viewport(ctx->blitter, &ctx->viewport);
1075 util_blitter_save_scissor(ctx->blitter, &ctx->scissor);
1076 util_blitter_save_fragment_shader(ctx->blitter, ctx->prog.fp);
1077 util_blitter_save_blend(ctx->blitter, ctx->blend);
1078 util_blitter_save_depth_stencil_alpha(ctx->blitter, ctx->zsa);
1079 util_blitter_save_stencil_ref(ctx->blitter, &ctx->stencil_ref);
1080 util_blitter_save_sample_mask(ctx->blitter, ctx->sample_mask);
1081 util_blitter_save_framebuffer(ctx->blitter,
1082 ctx->batch ? &ctx->batch->framebuffer : NULL);
1083 util_blitter_save_fragment_sampler_states(ctx->blitter,
1084 ctx->tex[PIPE_SHADER_FRAGMENT].num_samplers,
1085 (void **)ctx->tex[PIPE_SHADER_FRAGMENT].samplers);
1086 util_blitter_save_fragment_sampler_views(ctx->blitter,
1087 ctx->tex[PIPE_SHADER_FRAGMENT].num_textures,
1088 ctx->tex[PIPE_SHADER_FRAGMENT].textures);
1089 if (!render_cond)
1090 util_blitter_save_render_condition(ctx->blitter,
1091 ctx->cond_query, ctx->cond_cond, ctx->cond_mode);
1092
1093 if (ctx->batch)
1094 fd_hw_query_set_stage(ctx->batch, ctx->batch->draw, stage);
1095
1096 ctx->in_blit = discard;
1097 }
1098
1099 void
1100 fd_blitter_pipe_end(struct fd_context *ctx)
1101 {
1102 if (ctx->batch)
1103 fd_hw_query_set_stage(ctx->batch, ctx->batch->draw, FD_STAGE_NULL);
1104 ctx->in_blit = false;
1105 }
1106
1107 static void
1108 fd_flush_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
1109 {
1110 struct fd_resource *rsc = fd_resource(prsc);
1111
1112 if (rsc->write_batch)
1113 fd_batch_flush(rsc->write_batch, true);
1114
1115 assert(!rsc->write_batch);
1116 }
1117
1118 void
1119 fd_resource_screen_init(struct pipe_screen *pscreen)
1120 {
1121 pscreen->resource_create = fd_resource_create;
1122 pscreen->resource_from_handle = fd_resource_from_handle;
1123 pscreen->resource_get_handle = u_resource_get_handle_vtbl;
1124 pscreen->resource_destroy = u_resource_destroy_vtbl;
1125 }
1126
1127 void
1128 fd_resource_context_init(struct pipe_context *pctx)
1129 {
1130 pctx->transfer_map = u_transfer_map_vtbl;
1131 pctx->transfer_flush_region = u_transfer_flush_region_vtbl;
1132 pctx->transfer_unmap = u_transfer_unmap_vtbl;
1133 pctx->buffer_subdata = u_default_buffer_subdata;
1134 pctx->texture_subdata = u_default_texture_subdata;
1135 pctx->create_surface = fd_create_surface;
1136 pctx->surface_destroy = fd_surface_destroy;
1137 pctx->resource_copy_region = fd_resource_copy_region;
1138 pctx->blit = fd_blit;
1139 pctx->flush_resource = fd_flush_resource;
1140 }