gallium: add PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS
[mesa.git] / src / gallium / drivers / freedreno / freedreno_resource.c
1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
3 /*
4 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 * Rob Clark <robclark@freedesktop.org>
27 */
28
29 #include "util/u_format.h"
30 #include "util/u_format_rgtc.h"
31 #include "util/u_format_zs.h"
32 #include "util/u_inlines.h"
33 #include "util/u_transfer.h"
34 #include "util/u_string.h"
35 #include "util/u_surface.h"
36 #include "util/set.h"
37
38 #include "freedreno_resource.h"
39 #include "freedreno_batch_cache.h"
40 #include "freedreno_screen.h"
41 #include "freedreno_surface.h"
42 #include "freedreno_context.h"
43 #include "freedreno_query_hw.h"
44 #include "freedreno_util.h"
45
46 #include <errno.h>
47
48 /* XXX this should go away, needed for 'struct winsys_handle' */
49 #include "state_tracker/drm_driver.h"
50
51 static void
52 fd_invalidate_resource(struct fd_context *ctx, struct pipe_resource *prsc)
53 {
54 /* Go through the entire state and see if the resource is bound
55 * anywhere. If it is, mark the relevant state as dirty. This is called on
56 * realloc_bo.
57 */
58
59 /* VBOs */
60 for (unsigned i = 0; i < ctx->vtx.vertexbuf.count && !(ctx->dirty & FD_DIRTY_VTXBUF); i++) {
61 if (ctx->vtx.vertexbuf.vb[i].buffer.resource == prsc)
62 ctx->dirty |= FD_DIRTY_VTXBUF;
63 }
64
65 /* per-shader-stage resources: */
66 for (unsigned stage = 0; stage < PIPE_SHADER_TYPES; stage++) {
67 /* Constbufs.. note that constbuf[0] is normal uniforms emitted in
68 * cmdstream rather than by pointer..
69 */
70 const unsigned num_ubos = util_last_bit(ctx->constbuf[stage].enabled_mask);
71 for (unsigned i = 1; i < num_ubos; i++) {
72 if (ctx->dirty_shader[stage] & FD_DIRTY_SHADER_CONST)
73 break;
74 if (ctx->constbuf[stage].cb[i].buffer == prsc)
75 ctx->dirty_shader[stage] |= FD_DIRTY_SHADER_CONST;
76 }
77
78 /* Textures */
79 for (unsigned i = 0; i < ctx->tex[stage].num_textures; i++) {
80 if (ctx->dirty_shader[stage] & FD_DIRTY_SHADER_TEX)
81 break;
82 if (ctx->tex[stage].textures[i] && (ctx->tex[stage].textures[i]->texture == prsc))
83 ctx->dirty_shader[stage] |= FD_DIRTY_SHADER_TEX;
84 }
85
86 /* SSBOs */
87 const unsigned num_ssbos = util_last_bit(ctx->shaderbuf[stage].enabled_mask);
88 for (unsigned i = 0; i < num_ssbos; i++) {
89 if (ctx->dirty_shader[stage] & FD_DIRTY_SHADER_SSBO)
90 break;
91 if (ctx->shaderbuf[stage].sb[i].buffer == prsc)
92 ctx->dirty_shader[stage] |= FD_DIRTY_SHADER_SSBO;
93 }
94 }
95 }
96
97 static void
98 realloc_bo(struct fd_resource *rsc, uint32_t size)
99 {
100 struct fd_screen *screen = fd_screen(rsc->base.b.screen);
101 uint32_t flags = DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
102 DRM_FREEDRENO_GEM_TYPE_KMEM; /* TODO */
103
104 /* if we start using things other than write-combine,
105 * be sure to check for PIPE_RESOURCE_FLAG_MAP_COHERENT
106 */
107
108 if (rsc->bo)
109 fd_bo_del(rsc->bo);
110
111 rsc->bo = fd_bo_new(screen->dev, size, flags);
112 util_range_set_empty(&rsc->valid_buffer_range);
113 fd_bc_invalidate_resource(rsc, true);
114 }
115
116 static void
117 do_blit(struct fd_context *ctx, const struct pipe_blit_info *blit, bool fallback)
118 {
119 /* TODO size threshold too?? */
120 if ((blit->src.resource->target != PIPE_BUFFER) && !fallback) {
121 /* do blit on gpu: */
122 fd_blitter_pipe_begin(ctx, false, true, FD_STAGE_BLIT);
123 util_blitter_blit(ctx->blitter, blit);
124 fd_blitter_pipe_end(ctx);
125 } else {
126 /* do blit on cpu: */
127 util_resource_copy_region(&ctx->base,
128 blit->dst.resource, blit->dst.level, blit->dst.box.x,
129 blit->dst.box.y, blit->dst.box.z,
130 blit->src.resource, blit->src.level, &blit->src.box);
131 }
132 }
133
134 static bool
135 fd_try_shadow_resource(struct fd_context *ctx, struct fd_resource *rsc,
136 unsigned level, unsigned usage, const struct pipe_box *box)
137 {
138 struct pipe_context *pctx = &ctx->base;
139 struct pipe_resource *prsc = &rsc->base.b;
140 bool fallback = false;
141
142 if (prsc->next)
143 return false;
144
145 /* TODO: somehow munge dimensions and format to copy unsupported
146 * render target format to something that is supported?
147 */
148 if (!pctx->screen->is_format_supported(pctx->screen,
149 prsc->format, prsc->target, prsc->nr_samples,
150 PIPE_BIND_RENDER_TARGET))
151 fallback = true;
152
153 /* these cases should be handled elsewhere.. just for future
154 * reference in case this gets split into a more generic(ish)
155 * helper.
156 */
157 debug_assert(!(usage & PIPE_TRANSFER_READ));
158 debug_assert(!(usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE));
159
160 /* if we do a gpu blit to clone the whole resource, we'll just
161 * end up stalling on that.. so only allow if we can discard
162 * current range (and blit, possibly cpu or gpu, the rest)
163 */
164 if (!(usage & PIPE_TRANSFER_DISCARD_RANGE))
165 return false;
166
167 bool whole_level = util_texrange_covers_whole_level(prsc, level,
168 box->x, box->y, box->z, box->width, box->height, box->depth);
169
170 /* TODO need to be more clever about current level */
171 if ((prsc->target >= PIPE_TEXTURE_2D) && !whole_level)
172 return false;
173
174 struct pipe_resource *pshadow =
175 pctx->screen->resource_create(pctx->screen, prsc);
176
177 if (!pshadow)
178 return false;
179
180 assert(!ctx->in_shadow);
181 ctx->in_shadow = true;
182
183 /* get rid of any references that batch-cache might have to us (which
184 * should empty/destroy rsc->batches hashset)
185 */
186 fd_bc_invalidate_resource(rsc, false);
187
188 mtx_lock(&ctx->screen->lock);
189
190 /* Swap the backing bo's, so shadow becomes the old buffer,
191 * blit from shadow to new buffer. From here on out, we
192 * cannot fail.
193 *
194 * Note that we need to do it in this order, otherwise if
195 * we go down cpu blit path, the recursive transfer_map()
196 * sees the wrong status..
197 */
198 struct fd_resource *shadow = fd_resource(pshadow);
199
200 DBG("shadow: %p (%d) -> %p (%d)\n", rsc, rsc->base.b.reference.count,
201 shadow, shadow->base.b.reference.count);
202
203 /* TODO valid_buffer_range?? */
204 swap(rsc->bo, shadow->bo);
205 swap(rsc->write_batch, shadow->write_batch);
206
207 /* at this point, the newly created shadow buffer is not referenced
208 * by any batches, but the existing rsc (probably) is. We need to
209 * transfer those references over:
210 */
211 debug_assert(shadow->batch_mask == 0);
212 struct fd_batch *batch;
213 foreach_batch(batch, &ctx->screen->batch_cache, rsc->batch_mask) {
214 struct set_entry *entry = _mesa_set_search(batch->resources, rsc);
215 _mesa_set_remove(batch->resources, entry);
216 _mesa_set_add(batch->resources, shadow);
217 }
218 swap(rsc->batch_mask, shadow->batch_mask);
219
220 mtx_unlock(&ctx->screen->lock);
221
222 struct pipe_blit_info blit = {0};
223 blit.dst.resource = prsc;
224 blit.dst.format = prsc->format;
225 blit.src.resource = pshadow;
226 blit.src.format = pshadow->format;
227 blit.mask = util_format_get_mask(prsc->format);
228 blit.filter = PIPE_TEX_FILTER_NEAREST;
229
230 #define set_box(field, val) do { \
231 blit.dst.field = (val); \
232 blit.src.field = (val); \
233 } while (0)
234
235 /* blit the other levels in their entirety: */
236 for (unsigned l = 0; l <= prsc->last_level; l++) {
237 if (l == level)
238 continue;
239
240 /* just blit whole level: */
241 set_box(level, l);
242 set_box(box.width, u_minify(prsc->width0, l));
243 set_box(box.height, u_minify(prsc->height0, l));
244 set_box(box.depth, u_minify(prsc->depth0, l));
245
246 do_blit(ctx, &blit, fallback);
247 }
248
249 /* deal w/ current level specially, since we might need to split
250 * it up into a couple blits:
251 */
252 if (!whole_level) {
253 set_box(level, level);
254
255 switch (prsc->target) {
256 case PIPE_BUFFER:
257 case PIPE_TEXTURE_1D:
258 set_box(box.y, 0);
259 set_box(box.z, 0);
260 set_box(box.height, 1);
261 set_box(box.depth, 1);
262
263 if (box->x > 0) {
264 set_box(box.x, 0);
265 set_box(box.width, box->x);
266
267 do_blit(ctx, &blit, fallback);
268 }
269 if ((box->x + box->width) < u_minify(prsc->width0, level)) {
270 set_box(box.x, box->x + box->width);
271 set_box(box.width, u_minify(prsc->width0, level) - (box->x + box->width));
272
273 do_blit(ctx, &blit, fallback);
274 }
275 break;
276 case PIPE_TEXTURE_2D:
277 /* TODO */
278 default:
279 unreachable("TODO");
280 }
281 }
282
283 ctx->in_shadow = false;
284
285 pipe_resource_reference(&pshadow, NULL);
286
287 return true;
288 }
289
290 static unsigned
291 fd_resource_layer_offset(struct fd_resource *rsc,
292 struct fd_resource_slice *slice,
293 unsigned layer)
294 {
295 if (rsc->layer_first)
296 return layer * rsc->layer_size;
297 else
298 return layer * slice->size0;
299 }
300
301 static void
302 fd_resource_flush_z32s8(struct fd_transfer *trans, const struct pipe_box *box)
303 {
304 struct fd_resource *rsc = fd_resource(trans->base.resource);
305 struct fd_resource_slice *slice = fd_resource_slice(rsc, trans->base.level);
306 struct fd_resource_slice *sslice = fd_resource_slice(rsc->stencil, trans->base.level);
307 enum pipe_format format = trans->base.resource->format;
308
309 float *depth = fd_bo_map(rsc->bo) + slice->offset +
310 fd_resource_layer_offset(rsc, slice, trans->base.box.z) +
311 (trans->base.box.y + box->y) * slice->pitch * 4 + (trans->base.box.x + box->x) * 4;
312 uint8_t *stencil = fd_bo_map(rsc->stencil->bo) + sslice->offset +
313 fd_resource_layer_offset(rsc->stencil, sslice, trans->base.box.z) +
314 (trans->base.box.y + box->y) * sslice->pitch + trans->base.box.x + box->x;
315
316 if (format != PIPE_FORMAT_X32_S8X24_UINT)
317 util_format_z32_float_s8x24_uint_unpack_z_float(
318 depth, slice->pitch * 4,
319 trans->staging, trans->base.stride,
320 box->width, box->height);
321
322 util_format_z32_float_s8x24_uint_unpack_s_8uint(
323 stencil, sslice->pitch,
324 trans->staging, trans->base.stride,
325 box->width, box->height);
326 }
327
328 static void
329 fd_resource_flush_rgtc(struct fd_transfer *trans, const struct pipe_box *box)
330 {
331 struct fd_resource *rsc = fd_resource(trans->base.resource);
332 struct fd_resource_slice *slice = fd_resource_slice(rsc, trans->base.level);
333 enum pipe_format format = trans->base.resource->format;
334
335 uint8_t *data = fd_bo_map(rsc->bo) + slice->offset +
336 fd_resource_layer_offset(rsc, slice, trans->base.box.z) +
337 ((trans->base.box.y + box->y) * slice->pitch +
338 trans->base.box.x + box->x) * rsc->cpp;
339
340 uint8_t *source = trans->staging +
341 util_format_get_nblocksy(format, box->y) * trans->base.stride +
342 util_format_get_stride(format, box->x);
343
344 switch (format) {
345 case PIPE_FORMAT_RGTC1_UNORM:
346 case PIPE_FORMAT_RGTC1_SNORM:
347 case PIPE_FORMAT_LATC1_UNORM:
348 case PIPE_FORMAT_LATC1_SNORM:
349 util_format_rgtc1_unorm_unpack_rgba_8unorm(
350 data, slice->pitch * rsc->cpp,
351 source, trans->base.stride,
352 box->width, box->height);
353 break;
354 case PIPE_FORMAT_RGTC2_UNORM:
355 case PIPE_FORMAT_RGTC2_SNORM:
356 case PIPE_FORMAT_LATC2_UNORM:
357 case PIPE_FORMAT_LATC2_SNORM:
358 util_format_rgtc2_unorm_unpack_rgba_8unorm(
359 data, slice->pitch * rsc->cpp,
360 source, trans->base.stride,
361 box->width, box->height);
362 break;
363 default:
364 assert(!"Unexpected format\n");
365 break;
366 }
367 }
368
369 static void
370 fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box)
371 {
372 enum pipe_format format = trans->base.resource->format;
373
374 switch (format) {
375 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
376 case PIPE_FORMAT_X32_S8X24_UINT:
377 fd_resource_flush_z32s8(trans, box);
378 break;
379 case PIPE_FORMAT_RGTC1_UNORM:
380 case PIPE_FORMAT_RGTC1_SNORM:
381 case PIPE_FORMAT_RGTC2_UNORM:
382 case PIPE_FORMAT_RGTC2_SNORM:
383 case PIPE_FORMAT_LATC1_UNORM:
384 case PIPE_FORMAT_LATC1_SNORM:
385 case PIPE_FORMAT_LATC2_UNORM:
386 case PIPE_FORMAT_LATC2_SNORM:
387 fd_resource_flush_rgtc(trans, box);
388 break;
389 default:
390 assert(!"Unexpected staging transfer type");
391 break;
392 }
393 }
394
395 static void fd_resource_transfer_flush_region(struct pipe_context *pctx,
396 struct pipe_transfer *ptrans,
397 const struct pipe_box *box)
398 {
399 struct fd_resource *rsc = fd_resource(ptrans->resource);
400 struct fd_transfer *trans = fd_transfer(ptrans);
401
402 if (ptrans->resource->target == PIPE_BUFFER)
403 util_range_add(&rsc->valid_buffer_range,
404 ptrans->box.x + box->x,
405 ptrans->box.x + box->x + box->width);
406
407 if (trans->staging)
408 fd_resource_flush(trans, box);
409 }
410
411 static void
412 fd_resource_transfer_unmap(struct pipe_context *pctx,
413 struct pipe_transfer *ptrans)
414 {
415 struct fd_context *ctx = fd_context(pctx);
416 struct fd_resource *rsc = fd_resource(ptrans->resource);
417 struct fd_transfer *trans = fd_transfer(ptrans);
418
419 if (trans->staging && !(ptrans->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) {
420 struct pipe_box box;
421 u_box_2d(0, 0, ptrans->box.width, ptrans->box.height, &box);
422 fd_resource_flush(trans, &box);
423 }
424
425 if (!(ptrans->usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
426 fd_bo_cpu_fini(rsc->bo);
427 if (rsc->stencil)
428 fd_bo_cpu_fini(rsc->stencil->bo);
429 }
430
431 util_range_add(&rsc->valid_buffer_range,
432 ptrans->box.x,
433 ptrans->box.x + ptrans->box.width);
434
435 pipe_resource_reference(&ptrans->resource, NULL);
436 slab_free(&ctx->transfer_pool, ptrans);
437
438 free(trans->staging);
439 }
440
441 static void *
442 fd_resource_transfer_map(struct pipe_context *pctx,
443 struct pipe_resource *prsc,
444 unsigned level, unsigned usage,
445 const struct pipe_box *box,
446 struct pipe_transfer **pptrans)
447 {
448 struct fd_context *ctx = fd_context(pctx);
449 struct fd_resource *rsc = fd_resource(prsc);
450 struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
451 struct fd_transfer *trans;
452 struct pipe_transfer *ptrans;
453 enum pipe_format format = prsc->format;
454 uint32_t op = 0;
455 uint32_t offset;
456 char *buf;
457 int ret = 0;
458
459 DBG("prsc=%p, level=%u, usage=%x, box=%dx%d+%d,%d", prsc, level, usage,
460 box->width, box->height, box->x, box->y);
461
462 ptrans = slab_alloc(&ctx->transfer_pool);
463 if (!ptrans)
464 return NULL;
465
466 /* slab_alloc_st() doesn't zero: */
467 trans = fd_transfer(ptrans);
468 memset(trans, 0, sizeof(*trans));
469
470 pipe_resource_reference(&ptrans->resource, prsc);
471 ptrans->level = level;
472 ptrans->usage = usage;
473 ptrans->box = *box;
474 ptrans->stride = util_format_get_nblocksx(format, slice->pitch) * rsc->cpp;
475 ptrans->layer_stride = rsc->layer_first ? rsc->layer_size : slice->size0;
476
477 if (ctx->in_shadow && !(usage & PIPE_TRANSFER_READ))
478 usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
479
480 if (usage & PIPE_TRANSFER_READ)
481 op |= DRM_FREEDRENO_PREP_READ;
482
483 if (usage & PIPE_TRANSFER_WRITE)
484 op |= DRM_FREEDRENO_PREP_WRITE;
485
486 if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
487 realloc_bo(rsc, fd_bo_size(rsc->bo));
488 if (rsc->stencil)
489 realloc_bo(rsc->stencil, fd_bo_size(rsc->stencil->bo));
490 fd_invalidate_resource(ctx, prsc);
491 } else if ((usage & PIPE_TRANSFER_WRITE) &&
492 prsc->target == PIPE_BUFFER &&
493 !util_ranges_intersect(&rsc->valid_buffer_range,
494 box->x, box->x + box->width)) {
495 /* We are trying to write to a previously uninitialized range. No need
496 * to wait.
497 */
498 } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
499 struct fd_batch *write_batch = NULL;
500
501 /* hold a reference, so it doesn't disappear under us: */
502 fd_batch_reference(&write_batch, rsc->write_batch);
503
504 if ((usage & PIPE_TRANSFER_WRITE) && write_batch &&
505 write_batch->back_blit) {
506 /* if only thing pending is a back-blit, we can discard it: */
507 fd_batch_reset(write_batch);
508 }
509
510 /* If the GPU is writing to the resource, or if it is reading from the
511 * resource and we're trying to write to it, flush the renders.
512 */
513 bool needs_flush = pending(rsc, !!(usage & PIPE_TRANSFER_WRITE));
514 bool busy = needs_flush || (0 != fd_bo_cpu_prep(rsc->bo,
515 ctx->screen->pipe, op | DRM_FREEDRENO_PREP_NOSYNC));
516
517 /* if we need to flush/stall, see if we can make a shadow buffer
518 * to avoid this:
519 *
520 * TODO we could go down this path !reorder && !busy_for_read
521 * ie. we only *don't* want to go down this path if the blit
522 * will trigger a flush!
523 */
524 if (ctx->screen->reorder && busy && !(usage & PIPE_TRANSFER_READ)) {
525 if (fd_try_shadow_resource(ctx, rsc, level, usage, box)) {
526 needs_flush = busy = false;
527 fd_invalidate_resource(ctx, prsc);
528 }
529 }
530
531 if (needs_flush) {
532 if (usage & PIPE_TRANSFER_WRITE) {
533 struct fd_batch *batch, *last_batch = NULL;
534 foreach_batch(batch, &ctx->screen->batch_cache, rsc->batch_mask) {
535 fd_batch_reference(&last_batch, batch);
536 fd_batch_flush(batch, false);
537 }
538 if (last_batch) {
539 fd_batch_sync(last_batch);
540 fd_batch_reference(&last_batch, NULL);
541 }
542 assert(rsc->batch_mask == 0);
543 } else {
544 fd_batch_flush(write_batch, true);
545 }
546 assert(!rsc->write_batch);
547 }
548
549 fd_batch_reference(&write_batch, NULL);
550
551 /* The GPU keeps track of how the various bo's are being used, and
552 * will wait if necessary for the proper operation to have
553 * completed.
554 */
555 if (busy) {
556 ret = fd_bo_cpu_prep(rsc->bo, ctx->screen->pipe, op);
557 if (ret)
558 goto fail;
559 }
560 }
561
562 buf = fd_bo_map(rsc->bo);
563 if (!buf)
564 goto fail;
565
566 offset = slice->offset +
567 box->y / util_format_get_blockheight(format) * ptrans->stride +
568 box->x / util_format_get_blockwidth(format) * rsc->cpp +
569 fd_resource_layer_offset(rsc, slice, box->z);
570
571 if (prsc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ||
572 prsc->format == PIPE_FORMAT_X32_S8X24_UINT) {
573 assert(trans->base.box.depth == 1);
574
575 trans->base.stride = trans->base.box.width * rsc->cpp * 2;
576 trans->staging = malloc(trans->base.stride * trans->base.box.height);
577 if (!trans->staging)
578 goto fail;
579
580 /* if we're not discarding the whole range (or resource), we must copy
581 * the real data in.
582 */
583 if (!(usage & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
584 PIPE_TRANSFER_DISCARD_RANGE))) {
585 struct fd_resource_slice *sslice =
586 fd_resource_slice(rsc->stencil, level);
587 void *sbuf = fd_bo_map(rsc->stencil->bo);
588 if (!sbuf)
589 goto fail;
590
591 float *depth = (float *)(buf + slice->offset +
592 fd_resource_layer_offset(rsc, slice, box->z) +
593 box->y * slice->pitch * 4 + box->x * 4);
594 uint8_t *stencil = sbuf + sslice->offset +
595 fd_resource_layer_offset(rsc->stencil, sslice, box->z) +
596 box->y * sslice->pitch + box->x;
597
598 if (format != PIPE_FORMAT_X32_S8X24_UINT)
599 util_format_z32_float_s8x24_uint_pack_z_float(
600 trans->staging, trans->base.stride,
601 depth, slice->pitch * 4,
602 box->width, box->height);
603
604 util_format_z32_float_s8x24_uint_pack_s_8uint(
605 trans->staging, trans->base.stride,
606 stencil, sslice->pitch,
607 box->width, box->height);
608 }
609
610 buf = trans->staging;
611 offset = 0;
612 } else if (rsc->internal_format != format &&
613 util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC) {
614 assert(trans->base.box.depth == 1);
615
616 trans->base.stride = util_format_get_stride(
617 format, trans->base.box.width);
618 trans->staging = malloc(
619 util_format_get_2d_size(format, trans->base.stride,
620 trans->base.box.height));
621 if (!trans->staging)
622 goto fail;
623
624 /* if we're not discarding the whole range (or resource), we must copy
625 * the real data in.
626 */
627 if (!(usage & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
628 PIPE_TRANSFER_DISCARD_RANGE))) {
629 uint8_t *rgba8 = (uint8_t *)buf + slice->offset +
630 fd_resource_layer_offset(rsc, slice, box->z) +
631 box->y * slice->pitch * rsc->cpp + box->x * rsc->cpp;
632
633 switch (format) {
634 case PIPE_FORMAT_RGTC1_UNORM:
635 case PIPE_FORMAT_RGTC1_SNORM:
636 case PIPE_FORMAT_LATC1_UNORM:
637 case PIPE_FORMAT_LATC1_SNORM:
638 util_format_rgtc1_unorm_pack_rgba_8unorm(
639 trans->staging, trans->base.stride,
640 rgba8, slice->pitch * rsc->cpp,
641 box->width, box->height);
642 break;
643 case PIPE_FORMAT_RGTC2_UNORM:
644 case PIPE_FORMAT_RGTC2_SNORM:
645 case PIPE_FORMAT_LATC2_UNORM:
646 case PIPE_FORMAT_LATC2_SNORM:
647 util_format_rgtc2_unorm_pack_rgba_8unorm(
648 trans->staging, trans->base.stride,
649 rgba8, slice->pitch * rsc->cpp,
650 box->width, box->height);
651 break;
652 default:
653 assert(!"Unexpected format");
654 break;
655 }
656 }
657
658 buf = trans->staging;
659 offset = 0;
660 }
661
662 *pptrans = ptrans;
663
664 return buf + offset;
665
666 fail:
667 fd_resource_transfer_unmap(pctx, ptrans);
668 return NULL;
669 }
670
671 static void
672 fd_resource_destroy(struct pipe_screen *pscreen,
673 struct pipe_resource *prsc)
674 {
675 struct fd_resource *rsc = fd_resource(prsc);
676 fd_bc_invalidate_resource(rsc, true);
677 if (rsc->bo)
678 fd_bo_del(rsc->bo);
679 util_range_destroy(&rsc->valid_buffer_range);
680 FREE(rsc);
681 }
682
683 static boolean
684 fd_resource_get_handle(struct pipe_screen *pscreen,
685 struct pipe_resource *prsc,
686 struct winsys_handle *handle)
687 {
688 struct fd_resource *rsc = fd_resource(prsc);
689
690 return fd_screen_bo_get_handle(pscreen, rsc->bo,
691 rsc->slices[0].pitch * rsc->cpp, handle);
692 }
693
694
695 static const struct u_resource_vtbl fd_resource_vtbl = {
696 .resource_get_handle = fd_resource_get_handle,
697 .resource_destroy = fd_resource_destroy,
698 .transfer_map = fd_resource_transfer_map,
699 .transfer_flush_region = fd_resource_transfer_flush_region,
700 .transfer_unmap = fd_resource_transfer_unmap,
701 };
702
703 static uint32_t
704 setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format format)
705 {
706 struct pipe_resource *prsc = &rsc->base.b;
707 struct fd_screen *screen = fd_screen(prsc->screen);
708 enum util_format_layout layout = util_format_description(format)->layout;
709 uint32_t pitchalign = screen->gmem_alignw;
710 uint32_t level, size = 0;
711 uint32_t width = prsc->width0;
712 uint32_t height = prsc->height0;
713 uint32_t depth = prsc->depth0;
714 /* in layer_first layout, the level (slice) contains just one
715 * layer (since in fact the layer contains the slices)
716 */
717 uint32_t layers_in_level = rsc->layer_first ? 1 : prsc->array_size;
718
719 if (is_a5xx(screen) && (rsc->base.b.target >= PIPE_TEXTURE_2D))
720 height = align(height, screen->gmem_alignh);
721
722 for (level = 0; level <= prsc->last_level; level++) {
723 struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
724 uint32_t blocks;
725
726 if (layout == UTIL_FORMAT_LAYOUT_ASTC)
727 slice->pitch = width =
728 util_align_npot(width, pitchalign * util_format_get_blockwidth(format));
729 else
730 slice->pitch = width = align(width, pitchalign);
731 slice->offset = size;
732 blocks = util_format_get_nblocks(format, width, height);
733 /* 1d array and 2d array textures must all have the same layer size
734 * for each miplevel on a3xx. 3d textures can have different layer
735 * sizes for high levels, but the hw auto-sizer is buggy (or at least
736 * different than what this code does), so as soon as the layer size
737 * range gets into range, we stop reducing it.
738 */
739 if (prsc->target == PIPE_TEXTURE_3D && (
740 level == 1 ||
741 (level > 1 && rsc->slices[level - 1].size0 > 0xf000)))
742 slice->size0 = align(blocks * rsc->cpp, alignment);
743 else if (level == 0 || rsc->layer_first || alignment == 1)
744 slice->size0 = align(blocks * rsc->cpp, alignment);
745 else
746 slice->size0 = rsc->slices[level - 1].size0;
747
748 size += slice->size0 * depth * layers_in_level;
749
750 width = u_minify(width, 1);
751 height = u_minify(height, 1);
752 depth = u_minify(depth, 1);
753 }
754
755 return size;
756 }
757
758 static uint32_t
759 slice_alignment(struct pipe_screen *pscreen, const struct pipe_resource *tmpl)
760 {
761 /* on a3xx, 2d array and 3d textures seem to want their
762 * layers aligned to page boundaries:
763 */
764 switch (tmpl->target) {
765 case PIPE_TEXTURE_3D:
766 case PIPE_TEXTURE_1D_ARRAY:
767 case PIPE_TEXTURE_2D_ARRAY:
768 return 4096;
769 default:
770 return 1;
771 }
772 }
773
774 /* special case to resize query buf after allocated.. */
775 void
776 fd_resource_resize(struct pipe_resource *prsc, uint32_t sz)
777 {
778 struct fd_resource *rsc = fd_resource(prsc);
779
780 debug_assert(prsc->width0 == 0);
781 debug_assert(prsc->target == PIPE_BUFFER);
782 debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER);
783
784 prsc->width0 = sz;
785 realloc_bo(rsc, setup_slices(rsc, 1, prsc->format));
786 }
787
788 // TODO common helper?
789 static bool
790 has_depth(enum pipe_format format)
791 {
792 switch (format) {
793 case PIPE_FORMAT_Z16_UNORM:
794 case PIPE_FORMAT_Z32_UNORM:
795 case PIPE_FORMAT_Z32_FLOAT:
796 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
797 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
798 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
799 case PIPE_FORMAT_Z24X8_UNORM:
800 case PIPE_FORMAT_X8Z24_UNORM:
801 return true;
802 default:
803 return false;
804 }
805 }
806
807 /**
808 * Create a new texture object, using the given template info.
809 */
810 static struct pipe_resource *
811 fd_resource_create(struct pipe_screen *pscreen,
812 const struct pipe_resource *tmpl)
813 {
814 struct fd_screen *screen = fd_screen(pscreen);
815 struct fd_resource *rsc = CALLOC_STRUCT(fd_resource);
816 struct pipe_resource *prsc = &rsc->base.b;
817 enum pipe_format format = tmpl->format;
818 uint32_t size, alignment;
819
820 DBG("%p: target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, "
821 "nr_samples=%u, usage=%u, bind=%x, flags=%x", prsc,
822 tmpl->target, util_format_name(format),
823 tmpl->width0, tmpl->height0, tmpl->depth0,
824 tmpl->array_size, tmpl->last_level, tmpl->nr_samples,
825 tmpl->usage, tmpl->bind, tmpl->flags);
826
827 if (!rsc)
828 return NULL;
829
830 *prsc = *tmpl;
831
832 pipe_reference_init(&prsc->reference, 1);
833
834 prsc->screen = pscreen;
835
836 util_range_init(&rsc->valid_buffer_range);
837
838 rsc->base.vtbl = &fd_resource_vtbl;
839
840 if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
841 format = PIPE_FORMAT_Z32_FLOAT;
842 else if (screen->gpu_id < 400 &&
843 util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC)
844 format = PIPE_FORMAT_R8G8B8A8_UNORM;
845 rsc->internal_format = format;
846 rsc->cpp = util_format_get_blocksize(format);
847
848 assert(rsc->cpp);
849
850 // XXX probably need some extra work if we hit rsc shadowing path w/ lrz..
851 if (is_a5xx(screen) && (fd_mesa_debug & FD_DBG_LRZ) && has_depth(format)) {
852 const uint32_t flags = DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
853 DRM_FREEDRENO_GEM_TYPE_KMEM; /* TODO */
854 unsigned lrz_pitch = align(DIV_ROUND_UP(tmpl->width0, 8), 32);
855 unsigned lrz_height = DIV_ROUND_UP(tmpl->height0, 8);
856 unsigned size = lrz_pitch * lrz_height * 2;
857
858 size += 0x1000; /* for GRAS_LRZ_FAST_CLEAR_BUFFER */
859
860 rsc->lrz_height = lrz_height;
861 rsc->lrz_width = lrz_pitch;
862 rsc->lrz_pitch = lrz_pitch;
863 rsc->lrz = fd_bo_new(screen->dev, size, flags);
864 }
865
866 alignment = slice_alignment(pscreen, tmpl);
867 if (is_a4xx(screen) || is_a5xx(screen)) {
868 switch (tmpl->target) {
869 case PIPE_TEXTURE_3D:
870 rsc->layer_first = false;
871 break;
872 default:
873 rsc->layer_first = true;
874 alignment = 1;
875 break;
876 }
877 }
878
879 size = setup_slices(rsc, alignment, format);
880
881 /* special case for hw-query buffer, which we need to allocate before we
882 * know the size:
883 */
884 if (size == 0) {
885 /* note, semi-intention == instead of & */
886 debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER);
887 return prsc;
888 }
889
890 if (rsc->layer_first) {
891 rsc->layer_size = align(size, 4096);
892 size = rsc->layer_size * prsc->array_size;
893 }
894
895 realloc_bo(rsc, size);
896 if (!rsc->bo)
897 goto fail;
898
899 /* There is no native Z32F_S8 sampling or rendering format, so this must
900 * be emulated via two separate textures. The depth texture still keeps
901 * its Z32F_S8 format though, and we also keep a reference to a separate
902 * S8 texture.
903 */
904 if (tmpl->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
905 struct pipe_resource stencil = *tmpl;
906 stencil.format = PIPE_FORMAT_S8_UINT;
907 rsc->stencil = fd_resource(fd_resource_create(pscreen, &stencil));
908 if (!rsc->stencil)
909 goto fail;
910 }
911
912 return prsc;
913 fail:
914 fd_resource_destroy(pscreen, prsc);
915 return NULL;
916 }
917
918 /**
919 * Create a texture from a winsys_handle. The handle is often created in
920 * another process by first creating a pipe texture and then calling
921 * resource_get_handle.
922 */
923 static struct pipe_resource *
924 fd_resource_from_handle(struct pipe_screen *pscreen,
925 const struct pipe_resource *tmpl,
926 struct winsys_handle *handle, unsigned usage)
927 {
928 struct fd_resource *rsc = CALLOC_STRUCT(fd_resource);
929 struct fd_resource_slice *slice = &rsc->slices[0];
930 struct pipe_resource *prsc = &rsc->base.b;
931 uint32_t pitchalign = fd_screen(pscreen)->gmem_alignw;
932
933 DBG("target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, "
934 "nr_samples=%u, usage=%u, bind=%x, flags=%x",
935 tmpl->target, util_format_name(tmpl->format),
936 tmpl->width0, tmpl->height0, tmpl->depth0,
937 tmpl->array_size, tmpl->last_level, tmpl->nr_samples,
938 tmpl->usage, tmpl->bind, tmpl->flags);
939
940 if (!rsc)
941 return NULL;
942
943 *prsc = *tmpl;
944
945 pipe_reference_init(&prsc->reference, 1);
946
947 prsc->screen = pscreen;
948
949 util_range_init(&rsc->valid_buffer_range);
950
951 rsc->bo = fd_screen_bo_from_handle(pscreen, handle);
952 if (!rsc->bo)
953 goto fail;
954
955 rsc->base.vtbl = &fd_resource_vtbl;
956 rsc->cpp = util_format_get_blocksize(tmpl->format);
957 slice->pitch = handle->stride / rsc->cpp;
958 slice->offset = handle->offset;
959 slice->size0 = handle->stride * prsc->height0;
960
961 if ((slice->pitch < align(prsc->width0, pitchalign)) ||
962 (slice->pitch & (pitchalign - 1)))
963 goto fail;
964
965 assert(rsc->cpp);
966
967 return prsc;
968
969 fail:
970 fd_resource_destroy(pscreen, prsc);
971 return NULL;
972 }
973
974 /**
975 * _copy_region using pipe (3d engine)
976 */
977 static bool
978 fd_blitter_pipe_copy_region(struct fd_context *ctx,
979 struct pipe_resource *dst,
980 unsigned dst_level,
981 unsigned dstx, unsigned dsty, unsigned dstz,
982 struct pipe_resource *src,
983 unsigned src_level,
984 const struct pipe_box *src_box)
985 {
986 /* not until we allow rendertargets to be buffers */
987 if (dst->target == PIPE_BUFFER || src->target == PIPE_BUFFER)
988 return false;
989
990 if (!util_blitter_is_copy_supported(ctx->blitter, dst, src))
991 return false;
992
993 /* TODO we could discard if dst box covers dst level fully.. */
994 fd_blitter_pipe_begin(ctx, false, false, FD_STAGE_BLIT);
995 util_blitter_copy_texture(ctx->blitter,
996 dst, dst_level, dstx, dsty, dstz,
997 src, src_level, src_box);
998 fd_blitter_pipe_end(ctx);
999
1000 return true;
1001 }
1002
1003 /**
1004 * Copy a block of pixels from one resource to another.
1005 * The resource must be of the same format.
1006 * Resources with nr_samples > 1 are not allowed.
1007 */
1008 static void
1009 fd_resource_copy_region(struct pipe_context *pctx,
1010 struct pipe_resource *dst,
1011 unsigned dst_level,
1012 unsigned dstx, unsigned dsty, unsigned dstz,
1013 struct pipe_resource *src,
1014 unsigned src_level,
1015 const struct pipe_box *src_box)
1016 {
1017 struct fd_context *ctx = fd_context(pctx);
1018
1019 /* TODO if we have 2d core, or other DMA engine that could be used
1020 * for simple copies and reasonably easily synchronized with the 3d
1021 * core, this is where we'd plug it in..
1022 */
1023
1024 /* try blit on 3d pipe: */
1025 if (fd_blitter_pipe_copy_region(ctx,
1026 dst, dst_level, dstx, dsty, dstz,
1027 src, src_level, src_box))
1028 return;
1029
1030 /* else fallback to pure sw: */
1031 util_resource_copy_region(pctx,
1032 dst, dst_level, dstx, dsty, dstz,
1033 src, src_level, src_box);
1034 }
1035
1036 bool
1037 fd_render_condition_check(struct pipe_context *pctx)
1038 {
1039 struct fd_context *ctx = fd_context(pctx);
1040
1041 if (!ctx->cond_query)
1042 return true;
1043
1044 union pipe_query_result res = { 0 };
1045 bool wait =
1046 ctx->cond_mode != PIPE_RENDER_COND_NO_WAIT &&
1047 ctx->cond_mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
1048
1049 if (pctx->get_query_result(pctx, ctx->cond_query, wait, &res))
1050 return (bool)res.u64 != ctx->cond_cond;
1051
1052 return true;
1053 }
1054
1055 /**
1056 * Optimal hardware path for blitting pixels.
1057 * Scaling, format conversion, up- and downsampling (resolve) are allowed.
1058 */
1059 static void
1060 fd_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
1061 {
1062 struct fd_context *ctx = fd_context(pctx);
1063 struct pipe_blit_info info = *blit_info;
1064 bool discard = false;
1065
1066 if (info.src.resource->nr_samples > 1 &&
1067 info.dst.resource->nr_samples <= 1 &&
1068 !util_format_is_depth_or_stencil(info.src.resource->format) &&
1069 !util_format_is_pure_integer(info.src.resource->format)) {
1070 DBG("color resolve unimplemented");
1071 return;
1072 }
1073
1074 if (info.render_condition_enable && !fd_render_condition_check(pctx))
1075 return;
1076
1077 if (!info.scissor_enable && !info.alpha_blend) {
1078 discard = util_texrange_covers_whole_level(info.dst.resource,
1079 info.dst.level, info.dst.box.x, info.dst.box.y,
1080 info.dst.box.z, info.dst.box.width,
1081 info.dst.box.height, info.dst.box.depth);
1082 }
1083
1084 if (util_try_blit_via_copy_region(pctx, &info)) {
1085 return; /* done */
1086 }
1087
1088 if (info.mask & PIPE_MASK_S) {
1089 DBG("cannot blit stencil, skipping");
1090 info.mask &= ~PIPE_MASK_S;
1091 }
1092
1093 if (!util_blitter_is_blit_supported(ctx->blitter, &info)) {
1094 DBG("blit unsupported %s -> %s",
1095 util_format_short_name(info.src.resource->format),
1096 util_format_short_name(info.dst.resource->format));
1097 return;
1098 }
1099
1100 fd_blitter_pipe_begin(ctx, info.render_condition_enable, discard, FD_STAGE_BLIT);
1101 util_blitter_blit(ctx->blitter, &info);
1102 fd_blitter_pipe_end(ctx);
1103 }
1104
1105 void
1106 fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond, bool discard,
1107 enum fd_render_stage stage)
1108 {
1109 util_blitter_save_fragment_constant_buffer_slot(ctx->blitter,
1110 ctx->constbuf[PIPE_SHADER_FRAGMENT].cb);
1111 util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vtx.vertexbuf.vb);
1112 util_blitter_save_vertex_elements(ctx->blitter, ctx->vtx.vtx);
1113 util_blitter_save_vertex_shader(ctx->blitter, ctx->prog.vp);
1114 util_blitter_save_so_targets(ctx->blitter, ctx->streamout.num_targets,
1115 ctx->streamout.targets);
1116 util_blitter_save_rasterizer(ctx->blitter, ctx->rasterizer);
1117 util_blitter_save_viewport(ctx->blitter, &ctx->viewport);
1118 util_blitter_save_scissor(ctx->blitter, &ctx->scissor);
1119 util_blitter_save_fragment_shader(ctx->blitter, ctx->prog.fp);
1120 util_blitter_save_blend(ctx->blitter, ctx->blend);
1121 util_blitter_save_depth_stencil_alpha(ctx->blitter, ctx->zsa);
1122 util_blitter_save_stencil_ref(ctx->blitter, &ctx->stencil_ref);
1123 util_blitter_save_sample_mask(ctx->blitter, ctx->sample_mask);
1124 util_blitter_save_framebuffer(ctx->blitter,
1125 ctx->batch ? &ctx->batch->framebuffer : NULL);
1126 util_blitter_save_fragment_sampler_states(ctx->blitter,
1127 ctx->tex[PIPE_SHADER_FRAGMENT].num_samplers,
1128 (void **)ctx->tex[PIPE_SHADER_FRAGMENT].samplers);
1129 util_blitter_save_fragment_sampler_views(ctx->blitter,
1130 ctx->tex[PIPE_SHADER_FRAGMENT].num_textures,
1131 ctx->tex[PIPE_SHADER_FRAGMENT].textures);
1132 if (!render_cond)
1133 util_blitter_save_render_condition(ctx->blitter,
1134 ctx->cond_query, ctx->cond_cond, ctx->cond_mode);
1135
1136 if (ctx->batch)
1137 fd_batch_set_stage(ctx->batch, stage);
1138
1139 ctx->in_blit = discard;
1140 }
1141
1142 void
1143 fd_blitter_pipe_end(struct fd_context *ctx)
1144 {
1145 if (ctx->batch)
1146 fd_batch_set_stage(ctx->batch, FD_STAGE_NULL);
1147 ctx->in_blit = false;
1148 }
1149
1150 static void
1151 fd_flush_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
1152 {
1153 struct fd_resource *rsc = fd_resource(prsc);
1154
1155 if (rsc->write_batch)
1156 fd_batch_flush(rsc->write_batch, true);
1157
1158 assert(!rsc->write_batch);
1159 }
1160
1161 void
1162 fd_resource_screen_init(struct pipe_screen *pscreen)
1163 {
1164 pscreen->resource_create = fd_resource_create;
1165 pscreen->resource_from_handle = fd_resource_from_handle;
1166 pscreen->resource_get_handle = u_resource_get_handle_vtbl;
1167 pscreen->resource_destroy = u_resource_destroy_vtbl;
1168 }
1169
1170 void
1171 fd_resource_context_init(struct pipe_context *pctx)
1172 {
1173 pctx->transfer_map = u_transfer_map_vtbl;
1174 pctx->transfer_flush_region = u_transfer_flush_region_vtbl;
1175 pctx->transfer_unmap = u_transfer_unmap_vtbl;
1176 pctx->buffer_subdata = u_default_buffer_subdata;
1177 pctx->texture_subdata = u_default_texture_subdata;
1178 pctx->create_surface = fd_create_surface;
1179 pctx->surface_destroy = fd_surface_destroy;
1180 pctx->resource_copy_region = fd_resource_copy_region;
1181 pctx->blit = fd_blit;
1182 pctx->flush_resource = fd_flush_resource;
1183 }