freedreno: some locking
[mesa.git] / src / gallium / drivers / freedreno / freedreno_resource.c
1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
3 /*
4 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 * Rob Clark <robclark@freedesktop.org>
27 */
28
29 #include "util/u_format.h"
30 #include "util/u_format_rgtc.h"
31 #include "util/u_format_zs.h"
32 #include "util/u_inlines.h"
33 #include "util/u_transfer.h"
34 #include "util/u_string.h"
35 #include "util/u_surface.h"
36 #include "util/set.h"
37
38 #include "freedreno_resource.h"
39 #include "freedreno_batch_cache.h"
40 #include "freedreno_screen.h"
41 #include "freedreno_surface.h"
42 #include "freedreno_context.h"
43 #include "freedreno_query_hw.h"
44 #include "freedreno_util.h"
45
46 #include <errno.h>
47
48 /* XXX this should go away, needed for 'struct winsys_handle' */
49 #include "state_tracker/drm_driver.h"
50
51 static void
52 fd_invalidate_resource(struct fd_context *ctx, struct pipe_resource *prsc)
53 {
54 int i;
55
56 /* Go through the entire state and see if the resource is bound
57 * anywhere. If it is, mark the relevant state as dirty. This is called on
58 * realloc_bo.
59 */
60
61 /* Constbufs */
62 for (i = 1; i < PIPE_MAX_CONSTANT_BUFFERS && !(ctx->dirty & FD_DIRTY_CONSTBUF); i++) {
63 if (ctx->constbuf[PIPE_SHADER_VERTEX].cb[i].buffer == prsc)
64 ctx->dirty |= FD_DIRTY_CONSTBUF;
65 if (ctx->constbuf[PIPE_SHADER_FRAGMENT].cb[i].buffer == prsc)
66 ctx->dirty |= FD_DIRTY_CONSTBUF;
67 }
68
69 /* VBOs */
70 for (i = 0; i < ctx->vtx.vertexbuf.count && !(ctx->dirty & FD_DIRTY_VTXBUF); i++) {
71 if (ctx->vtx.vertexbuf.vb[i].buffer == prsc)
72 ctx->dirty |= FD_DIRTY_VTXBUF;
73 }
74
75 /* Index buffer */
76 if (ctx->indexbuf.buffer == prsc)
77 ctx->dirty |= FD_DIRTY_INDEXBUF;
78
79 /* Textures */
80 for (i = 0; i < ctx->verttex.num_textures && !(ctx->dirty & FD_DIRTY_VERTTEX); i++) {
81 if (ctx->verttex.textures[i] && (ctx->verttex.textures[i]->texture == prsc))
82 ctx->dirty |= FD_DIRTY_VERTTEX;
83 }
84 for (i = 0; i < ctx->fragtex.num_textures && !(ctx->dirty & FD_DIRTY_FRAGTEX); i++) {
85 if (ctx->fragtex.textures[i] && (ctx->fragtex.textures[i]->texture == prsc))
86 ctx->dirty |= FD_DIRTY_FRAGTEX;
87 }
88 }
89
90 static void
91 realloc_bo(struct fd_resource *rsc, uint32_t size)
92 {
93 struct fd_screen *screen = fd_screen(rsc->base.b.screen);
94 uint32_t flags = DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
95 DRM_FREEDRENO_GEM_TYPE_KMEM; /* TODO */
96
97 /* if we start using things other than write-combine,
98 * be sure to check for PIPE_RESOURCE_FLAG_MAP_COHERENT
99 */
100
101 if (rsc->bo)
102 fd_bo_del(rsc->bo);
103
104 rsc->bo = fd_bo_new(screen->dev, size, flags);
105 rsc->timestamp = 0;
106 util_range_set_empty(&rsc->valid_buffer_range);
107 fd_bc_invalidate_resource(rsc, true);
108 }
109
110 static void fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond, bool discard);
111 static void fd_blitter_pipe_end(struct fd_context *ctx);
112
113 static void
114 do_blit(struct fd_context *ctx, const struct pipe_blit_info *blit, bool fallback)
115 {
116 /* TODO size threshold too?? */
117 if ((blit->src.resource->target != PIPE_BUFFER) && !fallback) {
118 /* do blit on gpu: */
119 fd_blitter_pipe_begin(ctx, false, true);
120 util_blitter_blit(ctx->blitter, blit);
121 fd_blitter_pipe_end(ctx);
122 } else {
123 /* do blit on cpu: */
124 util_resource_copy_region(&ctx->base,
125 blit->dst.resource, blit->dst.level, blit->dst.box.x,
126 blit->dst.box.y, blit->dst.box.z,
127 blit->src.resource, blit->src.level, &blit->src.box);
128 }
129 }
130
131 static bool
132 fd_try_shadow_resource(struct fd_context *ctx, struct fd_resource *rsc,
133 unsigned level, unsigned usage, const struct pipe_box *box)
134 {
135 struct pipe_context *pctx = &ctx->base;
136 struct pipe_resource *prsc = &rsc->base.b;
137 bool fallback = false;
138
139 /* TODO: somehow munge dimensions and format to copy unsupported
140 * render target format to something that is supported?
141 */
142 if (!pctx->screen->is_format_supported(pctx->screen,
143 prsc->format, prsc->target, prsc->nr_samples,
144 PIPE_BIND_RENDER_TARGET))
145 fallback = true;
146
147 /* these cases should be handled elsewhere.. just for future
148 * reference in case this gets split into a more generic(ish)
149 * helper.
150 */
151 debug_assert(!(usage & PIPE_TRANSFER_READ));
152 debug_assert(!(usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE));
153
154 /* if we do a gpu blit to clone the whole resource, we'll just
155 * end up stalling on that.. so only allow if we can discard
156 * current range (and blit, possibly cpu or gpu, the rest)
157 */
158 if (!(usage & PIPE_TRANSFER_DISCARD_RANGE))
159 return false;
160
161 bool whole_level = util_texrange_covers_whole_level(prsc, level,
162 box->x, box->y, box->z, box->width, box->height, box->depth);
163
164 /* TODO need to be more clever about current level */
165 if ((prsc->target >= PIPE_TEXTURE_2D) && !whole_level)
166 return false;
167
168 struct pipe_resource *pshadow =
169 pctx->screen->resource_create(pctx->screen, prsc);
170
171 if (!pshadow)
172 return false;
173
174 assert(!ctx->in_shadow);
175 ctx->in_shadow = true;
176
177 /* get rid of any references that batch-cache might have to us (which
178 * should empty/destroy rsc->batches hashset)
179 */
180 fd_bc_invalidate_resource(rsc, false);
181
182 pipe_mutex_lock(ctx->screen->lock);
183
184 /* Swap the backing bo's, so shadow becomes the old buffer,
185 * blit from shadow to new buffer. From here on out, we
186 * cannot fail.
187 *
188 * Note that we need to do it in this order, otherwise if
189 * we go down cpu blit path, the recursive transfer_map()
190 * sees the wrong status..
191 */
192 struct fd_resource *shadow = fd_resource(pshadow);
193
194 DBG("shadow: %p (%d) -> %p (%d)\n", rsc, rsc->base.b.reference.count,
195 shadow, shadow->base.b.reference.count);
196
197 /* TODO valid_buffer_range?? */
198 swap(rsc->bo, shadow->bo);
199 swap(rsc->timestamp, shadow->timestamp);
200 swap(rsc->write_batch, shadow->write_batch);
201
202 /* at this point, the newly created shadow buffer is not referenced
203 * by any batches, but the existing rsc (probably) is. We need to
204 * transfer those references over:
205 */
206 debug_assert(shadow->batch_mask == 0);
207 struct fd_batch *batch;
208 foreach_batch(batch, &ctx->screen->batch_cache, rsc->batch_mask) {
209 struct set_entry *entry = _mesa_set_search(batch->resources, rsc);
210 _mesa_set_remove(batch->resources, entry);
211 _mesa_set_add(batch->resources, shadow);
212 }
213 swap(rsc->batch_mask, shadow->batch_mask);
214
215 pipe_mutex_unlock(ctx->screen->lock);
216
217 struct pipe_blit_info blit = {0};
218 blit.dst.resource = prsc;
219 blit.dst.format = prsc->format;
220 blit.src.resource = pshadow;
221 blit.src.format = pshadow->format;
222 blit.mask = util_format_get_mask(prsc->format);
223 blit.filter = PIPE_TEX_FILTER_NEAREST;
224
225 #define set_box(field, val) do { \
226 blit.dst.field = (val); \
227 blit.src.field = (val); \
228 } while (0)
229
230 /* blit the other levels in their entirety: */
231 for (unsigned l = 0; l <= prsc->last_level; l++) {
232 if (l == level)
233 continue;
234
235 /* just blit whole level: */
236 set_box(level, l);
237 set_box(box.width, u_minify(prsc->width0, l));
238 set_box(box.height, u_minify(prsc->height0, l));
239 set_box(box.depth, u_minify(prsc->depth0, l));
240
241 do_blit(ctx, &blit, fallback);
242 }
243
244 /* deal w/ current level specially, since we might need to split
245 * it up into a couple blits:
246 */
247 if (!whole_level) {
248 set_box(level, level);
249
250 switch (prsc->target) {
251 case PIPE_BUFFER:
252 case PIPE_TEXTURE_1D:
253 set_box(box.y, 0);
254 set_box(box.z, 0);
255 set_box(box.height, 1);
256 set_box(box.depth, 1);
257
258 if (box->x > 0) {
259 set_box(box.x, 0);
260 set_box(box.width, box->x);
261
262 do_blit(ctx, &blit, fallback);
263 }
264 if ((box->x + box->width) < u_minify(prsc->width0, level)) {
265 set_box(box.x, box->x + box->width);
266 set_box(box.width, u_minify(prsc->width0, level) - (box->x + box->width));
267
268 do_blit(ctx, &blit, fallback);
269 }
270 break;
271 case PIPE_TEXTURE_2D:
272 /* TODO */
273 default:
274 unreachable("TODO");
275 }
276 }
277
278 ctx->in_shadow = false;
279
280 pipe_resource_reference(&pshadow, NULL);
281
282 return true;
283 }
284
285 static unsigned
286 fd_resource_layer_offset(struct fd_resource *rsc,
287 struct fd_resource_slice *slice,
288 unsigned layer)
289 {
290 if (rsc->layer_first)
291 return layer * rsc->layer_size;
292 else
293 return layer * slice->size0;
294 }
295
296 static void
297 fd_resource_flush_z32s8(struct fd_transfer *trans, const struct pipe_box *box)
298 {
299 struct fd_resource *rsc = fd_resource(trans->base.resource);
300 struct fd_resource_slice *slice = fd_resource_slice(rsc, trans->base.level);
301 struct fd_resource_slice *sslice = fd_resource_slice(rsc->stencil, trans->base.level);
302 enum pipe_format format = trans->base.resource->format;
303
304 float *depth = fd_bo_map(rsc->bo) + slice->offset +
305 fd_resource_layer_offset(rsc, slice, trans->base.box.z) +
306 (trans->base.box.y + box->y) * slice->pitch * 4 + (trans->base.box.x + box->x) * 4;
307 uint8_t *stencil = fd_bo_map(rsc->stencil->bo) + sslice->offset +
308 fd_resource_layer_offset(rsc->stencil, sslice, trans->base.box.z) +
309 (trans->base.box.y + box->y) * sslice->pitch + trans->base.box.x + box->x;
310
311 if (format != PIPE_FORMAT_X32_S8X24_UINT)
312 util_format_z32_float_s8x24_uint_unpack_z_float(
313 depth, slice->pitch * 4,
314 trans->staging, trans->base.stride,
315 box->width, box->height);
316
317 util_format_z32_float_s8x24_uint_unpack_s_8uint(
318 stencil, sslice->pitch,
319 trans->staging, trans->base.stride,
320 box->width, box->height);
321 }
322
323 static void
324 fd_resource_flush_rgtc(struct fd_transfer *trans, const struct pipe_box *box)
325 {
326 struct fd_resource *rsc = fd_resource(trans->base.resource);
327 struct fd_resource_slice *slice = fd_resource_slice(rsc, trans->base.level);
328 enum pipe_format format = trans->base.resource->format;
329
330 uint8_t *data = fd_bo_map(rsc->bo) + slice->offset +
331 fd_resource_layer_offset(rsc, slice, trans->base.box.z) +
332 ((trans->base.box.y + box->y) * slice->pitch +
333 trans->base.box.x + box->x) * rsc->cpp;
334
335 uint8_t *source = trans->staging +
336 util_format_get_nblocksy(format, box->y) * trans->base.stride +
337 util_format_get_stride(format, box->x);
338
339 switch (format) {
340 case PIPE_FORMAT_RGTC1_UNORM:
341 case PIPE_FORMAT_RGTC1_SNORM:
342 case PIPE_FORMAT_LATC1_UNORM:
343 case PIPE_FORMAT_LATC1_SNORM:
344 util_format_rgtc1_unorm_unpack_rgba_8unorm(
345 data, slice->pitch * rsc->cpp,
346 source, trans->base.stride,
347 box->width, box->height);
348 break;
349 case PIPE_FORMAT_RGTC2_UNORM:
350 case PIPE_FORMAT_RGTC2_SNORM:
351 case PIPE_FORMAT_LATC2_UNORM:
352 case PIPE_FORMAT_LATC2_SNORM:
353 util_format_rgtc2_unorm_unpack_rgba_8unorm(
354 data, slice->pitch * rsc->cpp,
355 source, trans->base.stride,
356 box->width, box->height);
357 break;
358 default:
359 assert(!"Unexpected format\n");
360 break;
361 }
362 }
363
364 static void
365 fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box)
366 {
367 enum pipe_format format = trans->base.resource->format;
368
369 switch (format) {
370 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
371 case PIPE_FORMAT_X32_S8X24_UINT:
372 fd_resource_flush_z32s8(trans, box);
373 break;
374 case PIPE_FORMAT_RGTC1_UNORM:
375 case PIPE_FORMAT_RGTC1_SNORM:
376 case PIPE_FORMAT_RGTC2_UNORM:
377 case PIPE_FORMAT_RGTC2_SNORM:
378 case PIPE_FORMAT_LATC1_UNORM:
379 case PIPE_FORMAT_LATC1_SNORM:
380 case PIPE_FORMAT_LATC2_UNORM:
381 case PIPE_FORMAT_LATC2_SNORM:
382 fd_resource_flush_rgtc(trans, box);
383 break;
384 default:
385 assert(!"Unexpected staging transfer type");
386 break;
387 }
388 }
389
390 static void fd_resource_transfer_flush_region(struct pipe_context *pctx,
391 struct pipe_transfer *ptrans,
392 const struct pipe_box *box)
393 {
394 struct fd_resource *rsc = fd_resource(ptrans->resource);
395 struct fd_transfer *trans = fd_transfer(ptrans);
396
397 if (ptrans->resource->target == PIPE_BUFFER)
398 util_range_add(&rsc->valid_buffer_range,
399 ptrans->box.x + box->x,
400 ptrans->box.x + box->x + box->width);
401
402 if (trans->staging)
403 fd_resource_flush(trans, box);
404 }
405
406 static void
407 fd_resource_transfer_unmap(struct pipe_context *pctx,
408 struct pipe_transfer *ptrans)
409 {
410 struct fd_context *ctx = fd_context(pctx);
411 struct fd_resource *rsc = fd_resource(ptrans->resource);
412 struct fd_transfer *trans = fd_transfer(ptrans);
413
414 if (trans->staging && !(ptrans->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) {
415 struct pipe_box box;
416 u_box_2d(0, 0, ptrans->box.width, ptrans->box.height, &box);
417 fd_resource_flush(trans, &box);
418 }
419
420 if (!(ptrans->usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
421 fd_bo_cpu_fini(rsc->bo);
422 if (rsc->stencil)
423 fd_bo_cpu_fini(rsc->stencil->bo);
424 }
425
426 util_range_add(&rsc->valid_buffer_range,
427 ptrans->box.x,
428 ptrans->box.x + ptrans->box.width);
429
430 pipe_resource_reference(&ptrans->resource, NULL);
431 util_slab_free(&ctx->transfer_pool, ptrans);
432
433 free(trans->staging);
434 }
435
436 static void *
437 fd_resource_transfer_map(struct pipe_context *pctx,
438 struct pipe_resource *prsc,
439 unsigned level, unsigned usage,
440 const struct pipe_box *box,
441 struct pipe_transfer **pptrans)
442 {
443 struct fd_context *ctx = fd_context(pctx);
444 struct fd_resource *rsc = fd_resource(prsc);
445 struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
446 struct fd_transfer *trans;
447 struct pipe_transfer *ptrans;
448 enum pipe_format format = prsc->format;
449 uint32_t op = 0;
450 uint32_t offset;
451 char *buf;
452 int ret = 0;
453
454 DBG("prsc=%p, level=%u, usage=%x, box=%dx%d+%d,%d", prsc, level, usage,
455 box->width, box->height, box->x, box->y);
456
457 ptrans = util_slab_alloc(&ctx->transfer_pool);
458 if (!ptrans)
459 return NULL;
460
461 /* util_slab_alloc() doesn't zero: */
462 trans = fd_transfer(ptrans);
463 memset(trans, 0, sizeof(*trans));
464
465 pipe_resource_reference(&ptrans->resource, prsc);
466 ptrans->level = level;
467 ptrans->usage = usage;
468 ptrans->box = *box;
469 ptrans->stride = util_format_get_nblocksx(format, slice->pitch) * rsc->cpp;
470 ptrans->layer_stride = rsc->layer_first ? rsc->layer_size : slice->size0;
471
472 if (ctx->in_shadow && !(usage & PIPE_TRANSFER_READ))
473 usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
474
475 if (usage & PIPE_TRANSFER_READ)
476 op |= DRM_FREEDRENO_PREP_READ;
477
478 if (usage & PIPE_TRANSFER_WRITE)
479 op |= DRM_FREEDRENO_PREP_WRITE;
480
481 if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
482 realloc_bo(rsc, fd_bo_size(rsc->bo));
483 if (rsc->stencil)
484 realloc_bo(rsc->stencil, fd_bo_size(rsc->stencil->bo));
485 fd_invalidate_resource(ctx, prsc);
486 } else if ((usage & PIPE_TRANSFER_WRITE) &&
487 prsc->target == PIPE_BUFFER &&
488 !util_ranges_intersect(&rsc->valid_buffer_range,
489 box->x, box->x + box->width)) {
490 /* We are trying to write to a previously uninitialized range. No need
491 * to wait.
492 */
493 } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
494 struct fd_batch *write_batch = NULL;
495
496 /* hold a reference, so it doesn't disappear under us: */
497 fd_batch_reference(&write_batch, rsc->write_batch);
498
499 if ((usage & PIPE_TRANSFER_WRITE) && write_batch &&
500 write_batch->back_blit) {
501 /* if only thing pending is a back-blit, we can discard it: */
502 fd_batch_reset(write_batch);
503 }
504
505 /* If the GPU is writing to the resource, or if it is reading from the
506 * resource and we're trying to write to it, flush the renders.
507 */
508 bool needs_flush = pending(rsc, !!(usage & PIPE_TRANSFER_WRITE));
509 bool busy = needs_flush || (0 != fd_bo_cpu_prep(rsc->bo,
510 ctx->screen->pipe, op | DRM_FREEDRENO_PREP_NOSYNC));
511
512 /* if we need to flush/stall, see if we can make a shadow buffer
513 * to avoid this:
514 *
515 * TODO we could go down this path !reorder && !busy_for_read
516 * ie. we only *don't* want to go down this path if the blit
517 * will trigger a flush!
518 */
519 if (ctx->screen->reorder && busy && !(usage & PIPE_TRANSFER_READ)) {
520 if (fd_try_shadow_resource(ctx, rsc, level, usage, box)) {
521 needs_flush = busy = false;
522 fd_invalidate_resource(ctx, prsc);
523 }
524 }
525
526 if (needs_flush) {
527 if (usage & PIPE_TRANSFER_WRITE) {
528 struct fd_batch *batch, *last_batch = NULL;
529 foreach_batch(batch, &ctx->screen->batch_cache, rsc->batch_mask) {
530 fd_batch_reference(&last_batch, batch);
531 fd_batch_flush(batch, false);
532 }
533 if (last_batch) {
534 fd_batch_sync(last_batch);
535 fd_batch_reference(&last_batch, NULL);
536 }
537 assert(rsc->batch_mask == 0);
538 } else {
539 fd_batch_flush(write_batch, true);
540 }
541 assert(!rsc->write_batch);
542 }
543
544 fd_batch_reference(&write_batch, NULL);
545
546 /* The GPU keeps track of how the various bo's are being used, and
547 * will wait if necessary for the proper operation to have
548 * completed.
549 */
550 if (busy) {
551 ret = fd_bo_cpu_prep(rsc->bo, ctx->screen->pipe, op);
552 if (ret)
553 goto fail;
554 }
555 }
556
557 buf = fd_bo_map(rsc->bo);
558 if (!buf)
559 goto fail;
560
561 offset = slice->offset +
562 box->y / util_format_get_blockheight(format) * ptrans->stride +
563 box->x / util_format_get_blockwidth(format) * rsc->cpp +
564 fd_resource_layer_offset(rsc, slice, box->z);
565
566 if (prsc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ||
567 prsc->format == PIPE_FORMAT_X32_S8X24_UINT) {
568 assert(trans->base.box.depth == 1);
569
570 trans->base.stride = trans->base.box.width * rsc->cpp * 2;
571 trans->staging = malloc(trans->base.stride * trans->base.box.height);
572 if (!trans->staging)
573 goto fail;
574
575 /* if we're not discarding the whole range (or resource), we must copy
576 * the real data in.
577 */
578 if (!(usage & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
579 PIPE_TRANSFER_DISCARD_RANGE))) {
580 struct fd_resource_slice *sslice =
581 fd_resource_slice(rsc->stencil, level);
582 void *sbuf = fd_bo_map(rsc->stencil->bo);
583 if (!sbuf)
584 goto fail;
585
586 float *depth = (float *)(buf + slice->offset +
587 fd_resource_layer_offset(rsc, slice, box->z) +
588 box->y * slice->pitch * 4 + box->x * 4);
589 uint8_t *stencil = sbuf + sslice->offset +
590 fd_resource_layer_offset(rsc->stencil, sslice, box->z) +
591 box->y * sslice->pitch + box->x;
592
593 if (format != PIPE_FORMAT_X32_S8X24_UINT)
594 util_format_z32_float_s8x24_uint_pack_z_float(
595 trans->staging, trans->base.stride,
596 depth, slice->pitch * 4,
597 box->width, box->height);
598
599 util_format_z32_float_s8x24_uint_pack_s_8uint(
600 trans->staging, trans->base.stride,
601 stencil, sslice->pitch,
602 box->width, box->height);
603 }
604
605 buf = trans->staging;
606 offset = 0;
607 } else if (rsc->internal_format != format &&
608 util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC) {
609 assert(trans->base.box.depth == 1);
610
611 trans->base.stride = util_format_get_stride(
612 format, trans->base.box.width);
613 trans->staging = malloc(
614 util_format_get_2d_size(format, trans->base.stride,
615 trans->base.box.height));
616 if (!trans->staging)
617 goto fail;
618
619 /* if we're not discarding the whole range (or resource), we must copy
620 * the real data in.
621 */
622 if (!(usage & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
623 PIPE_TRANSFER_DISCARD_RANGE))) {
624 uint8_t *rgba8 = (uint8_t *)buf + slice->offset +
625 fd_resource_layer_offset(rsc, slice, box->z) +
626 box->y * slice->pitch * rsc->cpp + box->x * rsc->cpp;
627
628 switch (format) {
629 case PIPE_FORMAT_RGTC1_UNORM:
630 case PIPE_FORMAT_RGTC1_SNORM:
631 case PIPE_FORMAT_LATC1_UNORM:
632 case PIPE_FORMAT_LATC1_SNORM:
633 util_format_rgtc1_unorm_pack_rgba_8unorm(
634 trans->staging, trans->base.stride,
635 rgba8, slice->pitch * rsc->cpp,
636 box->width, box->height);
637 break;
638 case PIPE_FORMAT_RGTC2_UNORM:
639 case PIPE_FORMAT_RGTC2_SNORM:
640 case PIPE_FORMAT_LATC2_UNORM:
641 case PIPE_FORMAT_LATC2_SNORM:
642 util_format_rgtc2_unorm_pack_rgba_8unorm(
643 trans->staging, trans->base.stride,
644 rgba8, slice->pitch * rsc->cpp,
645 box->width, box->height);
646 break;
647 default:
648 assert(!"Unexpected format");
649 break;
650 }
651 }
652
653 buf = trans->staging;
654 offset = 0;
655 }
656
657 *pptrans = ptrans;
658
659 return buf + offset;
660
661 fail:
662 fd_resource_transfer_unmap(pctx, ptrans);
663 return NULL;
664 }
665
666 static void
667 fd_resource_destroy(struct pipe_screen *pscreen,
668 struct pipe_resource *prsc)
669 {
670 struct fd_resource *rsc = fd_resource(prsc);
671 fd_bc_invalidate_resource(rsc, true);
672 if (rsc->bo)
673 fd_bo_del(rsc->bo);
674 util_range_destroy(&rsc->valid_buffer_range);
675 FREE(rsc);
676 }
677
678 static boolean
679 fd_resource_get_handle(struct pipe_screen *pscreen,
680 struct pipe_resource *prsc,
681 struct winsys_handle *handle)
682 {
683 struct fd_resource *rsc = fd_resource(prsc);
684
685 return fd_screen_bo_get_handle(pscreen, rsc->bo,
686 rsc->slices[0].pitch * rsc->cpp, handle);
687 }
688
689
690 static const struct u_resource_vtbl fd_resource_vtbl = {
691 .resource_get_handle = fd_resource_get_handle,
692 .resource_destroy = fd_resource_destroy,
693 .transfer_map = fd_resource_transfer_map,
694 .transfer_flush_region = fd_resource_transfer_flush_region,
695 .transfer_unmap = fd_resource_transfer_unmap,
696 };
697
698 static uint32_t
699 setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format format)
700 {
701 struct pipe_resource *prsc = &rsc->base.b;
702 enum util_format_layout layout = util_format_description(format)->layout;
703 uint32_t level, size = 0;
704 uint32_t width = prsc->width0;
705 uint32_t height = prsc->height0;
706 uint32_t depth = prsc->depth0;
707 /* in layer_first layout, the level (slice) contains just one
708 * layer (since in fact the layer contains the slices)
709 */
710 uint32_t layers_in_level = rsc->layer_first ? 1 : prsc->array_size;
711
712 for (level = 0; level <= prsc->last_level; level++) {
713 struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
714 uint32_t blocks;
715
716 if (layout == UTIL_FORMAT_LAYOUT_ASTC)
717 slice->pitch = width =
718 util_align_npot(width, 32 * util_format_get_blockwidth(format));
719 else
720 slice->pitch = width = align(width, 32);
721 slice->offset = size;
722 blocks = util_format_get_nblocks(format, width, height);
723 /* 1d array and 2d array textures must all have the same layer size
724 * for each miplevel on a3xx. 3d textures can have different layer
725 * sizes for high levels, but the hw auto-sizer is buggy (or at least
726 * different than what this code does), so as soon as the layer size
727 * range gets into range, we stop reducing it.
728 */
729 if (prsc->target == PIPE_TEXTURE_3D && (
730 level == 1 ||
731 (level > 1 && rsc->slices[level - 1].size0 > 0xf000)))
732 slice->size0 = align(blocks * rsc->cpp, alignment);
733 else if (level == 0 || rsc->layer_first || alignment == 1)
734 slice->size0 = align(blocks * rsc->cpp, alignment);
735 else
736 slice->size0 = rsc->slices[level - 1].size0;
737
738 size += slice->size0 * depth * layers_in_level;
739
740 width = u_minify(width, 1);
741 height = u_minify(height, 1);
742 depth = u_minify(depth, 1);
743 }
744
745 return size;
746 }
747
748 static uint32_t
749 slice_alignment(struct pipe_screen *pscreen, const struct pipe_resource *tmpl)
750 {
751 /* on a3xx, 2d array and 3d textures seem to want their
752 * layers aligned to page boundaries:
753 */
754 switch (tmpl->target) {
755 case PIPE_TEXTURE_3D:
756 case PIPE_TEXTURE_1D_ARRAY:
757 case PIPE_TEXTURE_2D_ARRAY:
758 return 4096;
759 default:
760 return 1;
761 }
762 }
763
764 /* special case to resize query buf after allocated.. */
765 void
766 fd_resource_resize(struct pipe_resource *prsc, uint32_t sz)
767 {
768 struct fd_resource *rsc = fd_resource(prsc);
769
770 debug_assert(prsc->width0 == 0);
771 debug_assert(prsc->target == PIPE_BUFFER);
772 debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER);
773
774 prsc->width0 = sz;
775 realloc_bo(rsc, setup_slices(rsc, 1, prsc->format));
776 }
777
778 /**
779 * Create a new texture object, using the given template info.
780 */
781 static struct pipe_resource *
782 fd_resource_create(struct pipe_screen *pscreen,
783 const struct pipe_resource *tmpl)
784 {
785 struct fd_resource *rsc = CALLOC_STRUCT(fd_resource);
786 struct pipe_resource *prsc = &rsc->base.b;
787 enum pipe_format format = tmpl->format;
788 uint32_t size, alignment;
789
790 DBG("%p: target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, "
791 "nr_samples=%u, usage=%u, bind=%x, flags=%x", prsc,
792 tmpl->target, util_format_name(format),
793 tmpl->width0, tmpl->height0, tmpl->depth0,
794 tmpl->array_size, tmpl->last_level, tmpl->nr_samples,
795 tmpl->usage, tmpl->bind, tmpl->flags);
796
797 if (!rsc)
798 return NULL;
799
800 *prsc = *tmpl;
801
802 pipe_reference_init(&prsc->reference, 1);
803
804 prsc->screen = pscreen;
805
806 util_range_init(&rsc->valid_buffer_range);
807
808 rsc->base.vtbl = &fd_resource_vtbl;
809
810 if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
811 format = PIPE_FORMAT_Z32_FLOAT;
812 else if (fd_screen(pscreen)->gpu_id < 400 &&
813 util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC)
814 format = PIPE_FORMAT_R8G8B8A8_UNORM;
815 rsc->internal_format = format;
816 rsc->cpp = util_format_get_blocksize(format);
817
818 assert(rsc->cpp);
819
820 alignment = slice_alignment(pscreen, tmpl);
821 if (is_a4xx(fd_screen(pscreen))) {
822 switch (tmpl->target) {
823 case PIPE_TEXTURE_3D:
824 rsc->layer_first = false;
825 break;
826 default:
827 rsc->layer_first = true;
828 alignment = 1;
829 break;
830 }
831 }
832
833 size = setup_slices(rsc, alignment, format);
834
835 /* special case for hw-query buffer, which we need to allocate before we
836 * know the size:
837 */
838 if (size == 0) {
839 /* note, semi-intention == instead of & */
840 debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER);
841 return prsc;
842 }
843
844 if (rsc->layer_first) {
845 rsc->layer_size = align(size, 4096);
846 size = rsc->layer_size * prsc->array_size;
847 }
848
849 realloc_bo(rsc, size);
850 if (!rsc->bo)
851 goto fail;
852
853 /* There is no native Z32F_S8 sampling or rendering format, so this must
854 * be emulated via two separate textures. The depth texture still keeps
855 * its Z32F_S8 format though, and we also keep a reference to a separate
856 * S8 texture.
857 */
858 if (tmpl->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
859 struct pipe_resource stencil = *tmpl;
860 stencil.format = PIPE_FORMAT_S8_UINT;
861 rsc->stencil = fd_resource(fd_resource_create(pscreen, &stencil));
862 if (!rsc->stencil)
863 goto fail;
864 }
865
866 return prsc;
867 fail:
868 fd_resource_destroy(pscreen, prsc);
869 return NULL;
870 }
871
872 /**
873 * Create a texture from a winsys_handle. The handle is often created in
874 * another process by first creating a pipe texture and then calling
875 * resource_get_handle.
876 */
877 static struct pipe_resource *
878 fd_resource_from_handle(struct pipe_screen *pscreen,
879 const struct pipe_resource *tmpl,
880 struct winsys_handle *handle, unsigned usage)
881 {
882 struct fd_resource *rsc = CALLOC_STRUCT(fd_resource);
883 struct fd_resource_slice *slice = &rsc->slices[0];
884 struct pipe_resource *prsc = &rsc->base.b;
885
886 DBG("target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, "
887 "nr_samples=%u, usage=%u, bind=%x, flags=%x",
888 tmpl->target, util_format_name(tmpl->format),
889 tmpl->width0, tmpl->height0, tmpl->depth0,
890 tmpl->array_size, tmpl->last_level, tmpl->nr_samples,
891 tmpl->usage, tmpl->bind, tmpl->flags);
892
893 if (!rsc)
894 return NULL;
895
896 *prsc = *tmpl;
897
898 pipe_reference_init(&prsc->reference, 1);
899
900 prsc->screen = pscreen;
901
902 util_range_init(&rsc->valid_buffer_range);
903
904 rsc->bo = fd_screen_bo_from_handle(pscreen, handle, &slice->pitch);
905 if (!rsc->bo)
906 goto fail;
907
908 rsc->base.vtbl = &fd_resource_vtbl;
909 rsc->cpp = util_format_get_blocksize(tmpl->format);
910 slice->pitch /= rsc->cpp;
911 slice->offset = handle->offset;
912
913 assert(rsc->cpp);
914
915 return prsc;
916
917 fail:
918 fd_resource_destroy(pscreen, prsc);
919 return NULL;
920 }
921
922 /**
923 * _copy_region using pipe (3d engine)
924 */
925 static bool
926 fd_blitter_pipe_copy_region(struct fd_context *ctx,
927 struct pipe_resource *dst,
928 unsigned dst_level,
929 unsigned dstx, unsigned dsty, unsigned dstz,
930 struct pipe_resource *src,
931 unsigned src_level,
932 const struct pipe_box *src_box)
933 {
934 /* not until we allow rendertargets to be buffers */
935 if (dst->target == PIPE_BUFFER || src->target == PIPE_BUFFER)
936 return false;
937
938 if (!util_blitter_is_copy_supported(ctx->blitter, dst, src))
939 return false;
940
941 /* TODO we could discard if dst box covers dst level fully.. */
942 fd_blitter_pipe_begin(ctx, false, false);
943 util_blitter_copy_texture(ctx->blitter,
944 dst, dst_level, dstx, dsty, dstz,
945 src, src_level, src_box);
946 fd_blitter_pipe_end(ctx);
947
948 return true;
949 }
950
951 /**
952 * Copy a block of pixels from one resource to another.
953 * The resource must be of the same format.
954 * Resources with nr_samples > 1 are not allowed.
955 */
956 static void
957 fd_resource_copy_region(struct pipe_context *pctx,
958 struct pipe_resource *dst,
959 unsigned dst_level,
960 unsigned dstx, unsigned dsty, unsigned dstz,
961 struct pipe_resource *src,
962 unsigned src_level,
963 const struct pipe_box *src_box)
964 {
965 struct fd_context *ctx = fd_context(pctx);
966
967 /* TODO if we have 2d core, or other DMA engine that could be used
968 * for simple copies and reasonably easily synchronized with the 3d
969 * core, this is where we'd plug it in..
970 */
971
972 /* try blit on 3d pipe: */
973 if (fd_blitter_pipe_copy_region(ctx,
974 dst, dst_level, dstx, dsty, dstz,
975 src, src_level, src_box))
976 return;
977
978 /* else fallback to pure sw: */
979 util_resource_copy_region(pctx,
980 dst, dst_level, dstx, dsty, dstz,
981 src, src_level, src_box);
982 }
983
984 bool
985 fd_render_condition_check(struct pipe_context *pctx)
986 {
987 struct fd_context *ctx = fd_context(pctx);
988
989 if (!ctx->cond_query)
990 return true;
991
992 union pipe_query_result res = { 0 };
993 bool wait =
994 ctx->cond_mode != PIPE_RENDER_COND_NO_WAIT &&
995 ctx->cond_mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
996
997 if (pctx->get_query_result(pctx, ctx->cond_query, wait, &res))
998 return (bool)res.u64 != ctx->cond_cond;
999
1000 return true;
1001 }
1002
1003 /**
1004 * Optimal hardware path for blitting pixels.
1005 * Scaling, format conversion, up- and downsampling (resolve) are allowed.
1006 */
1007 static void
1008 fd_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
1009 {
1010 struct fd_context *ctx = fd_context(pctx);
1011 struct pipe_blit_info info = *blit_info;
1012 bool discard = false;
1013
1014 if (info.src.resource->nr_samples > 1 &&
1015 info.dst.resource->nr_samples <= 1 &&
1016 !util_format_is_depth_or_stencil(info.src.resource->format) &&
1017 !util_format_is_pure_integer(info.src.resource->format)) {
1018 DBG("color resolve unimplemented");
1019 return;
1020 }
1021
1022 if (info.render_condition_enable && !fd_render_condition_check(pctx))
1023 return;
1024
1025 if (!info.scissor_enable && !info.alpha_blend) {
1026 discard = util_texrange_covers_whole_level(info.dst.resource,
1027 info.dst.level, info.dst.box.x, info.dst.box.y,
1028 info.dst.box.z, info.dst.box.width,
1029 info.dst.box.height, info.dst.box.depth);
1030 }
1031
1032 if (util_try_blit_via_copy_region(pctx, &info)) {
1033 return; /* done */
1034 }
1035
1036 if (info.mask & PIPE_MASK_S) {
1037 DBG("cannot blit stencil, skipping");
1038 info.mask &= ~PIPE_MASK_S;
1039 }
1040
1041 if (!util_blitter_is_blit_supported(ctx->blitter, &info)) {
1042 DBG("blit unsupported %s -> %s",
1043 util_format_short_name(info.src.resource->format),
1044 util_format_short_name(info.dst.resource->format));
1045 return;
1046 }
1047
1048 fd_blitter_pipe_begin(ctx, info.render_condition_enable, discard);
1049 util_blitter_blit(ctx->blitter, &info);
1050 fd_blitter_pipe_end(ctx);
1051 }
1052
1053 static void
1054 fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond, bool discard)
1055 {
1056 util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vtx.vertexbuf.vb);
1057 util_blitter_save_vertex_elements(ctx->blitter, ctx->vtx.vtx);
1058 util_blitter_save_vertex_shader(ctx->blitter, ctx->prog.vp);
1059 util_blitter_save_so_targets(ctx->blitter, ctx->streamout.num_targets,
1060 ctx->streamout.targets);
1061 util_blitter_save_rasterizer(ctx->blitter, ctx->rasterizer);
1062 util_blitter_save_viewport(ctx->blitter, &ctx->viewport);
1063 util_blitter_save_scissor(ctx->blitter, &ctx->scissor);
1064 util_blitter_save_fragment_shader(ctx->blitter, ctx->prog.fp);
1065 util_blitter_save_blend(ctx->blitter, ctx->blend);
1066 util_blitter_save_depth_stencil_alpha(ctx->blitter, ctx->zsa);
1067 util_blitter_save_stencil_ref(ctx->blitter, &ctx->stencil_ref);
1068 util_blitter_save_sample_mask(ctx->blitter, ctx->sample_mask);
1069 util_blitter_save_framebuffer(ctx->blitter,
1070 ctx->batch ? &ctx->batch->framebuffer : NULL);
1071 util_blitter_save_fragment_sampler_states(ctx->blitter,
1072 ctx->fragtex.num_samplers,
1073 (void **)ctx->fragtex.samplers);
1074 util_blitter_save_fragment_sampler_views(ctx->blitter,
1075 ctx->fragtex.num_textures, ctx->fragtex.textures);
1076 if (!render_cond)
1077 util_blitter_save_render_condition(ctx->blitter,
1078 ctx->cond_query, ctx->cond_cond, ctx->cond_mode);
1079
1080 if (ctx->batch)
1081 fd_hw_query_set_stage(ctx->batch, ctx->batch->draw, FD_STAGE_BLIT);
1082
1083 ctx->in_blit = discard;
1084 }
1085
1086 static void
1087 fd_blitter_pipe_end(struct fd_context *ctx)
1088 {
1089 if (ctx->batch)
1090 fd_hw_query_set_stage(ctx->batch, ctx->batch->draw, FD_STAGE_NULL);
1091 ctx->in_blit = false;
1092 }
1093
1094 static void
1095 fd_flush_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
1096 {
1097 struct fd_resource *rsc = fd_resource(prsc);
1098
1099 if (rsc->write_batch)
1100 fd_batch_flush(rsc->write_batch, true);
1101
1102 assert(!rsc->write_batch);
1103 }
1104
1105 void
1106 fd_resource_screen_init(struct pipe_screen *pscreen)
1107 {
1108 pscreen->resource_create = fd_resource_create;
1109 pscreen->resource_from_handle = fd_resource_from_handle;
1110 pscreen->resource_get_handle = u_resource_get_handle_vtbl;
1111 pscreen->resource_destroy = u_resource_destroy_vtbl;
1112 }
1113
1114 void
1115 fd_resource_context_init(struct pipe_context *pctx)
1116 {
1117 pctx->transfer_map = u_transfer_map_vtbl;
1118 pctx->transfer_flush_region = u_transfer_flush_region_vtbl;
1119 pctx->transfer_unmap = u_transfer_unmap_vtbl;
1120 pctx->buffer_subdata = u_default_buffer_subdata;
1121 pctx->texture_subdata = u_default_texture_subdata;
1122 pctx->create_surface = fd_create_surface;
1123 pctx->surface_destroy = fd_surface_destroy;
1124 pctx->resource_copy_region = fd_resource_copy_region;
1125 pctx->blit = fd_blit;
1126 pctx->flush_resource = fd_flush_resource;
1127 }