freedreno: pitch alignment should match gmem alignment
[mesa.git] / src / gallium / drivers / freedreno / freedreno_resource.c
1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
3 /*
4 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 * Rob Clark <robclark@freedesktop.org>
27 */
28
29 #include "util/u_format.h"
30 #include "util/u_format_rgtc.h"
31 #include "util/u_format_zs.h"
32 #include "util/u_inlines.h"
33 #include "util/u_transfer.h"
34 #include "util/u_string.h"
35 #include "util/u_surface.h"
36 #include "util/set.h"
37
38 #include "freedreno_resource.h"
39 #include "freedreno_batch_cache.h"
40 #include "freedreno_screen.h"
41 #include "freedreno_surface.h"
42 #include "freedreno_context.h"
43 #include "freedreno_query_hw.h"
44 #include "freedreno_util.h"
45
46 #include <errno.h>
47
48 /* XXX this should go away, needed for 'struct winsys_handle' */
49 #include "state_tracker/drm_driver.h"
50
51 static void
52 fd_invalidate_resource(struct fd_context *ctx, struct pipe_resource *prsc)
53 {
54 int i;
55
56 /* Go through the entire state and see if the resource is bound
57 * anywhere. If it is, mark the relevant state as dirty. This is called on
58 * realloc_bo.
59 */
60
61 /* Constbufs */
62 for (i = 1; i < PIPE_MAX_CONSTANT_BUFFERS && !(ctx->dirty & FD_DIRTY_CONSTBUF); i++) {
63 if (ctx->constbuf[PIPE_SHADER_VERTEX].cb[i].buffer == prsc)
64 ctx->dirty |= FD_DIRTY_CONSTBUF;
65 if (ctx->constbuf[PIPE_SHADER_FRAGMENT].cb[i].buffer == prsc)
66 ctx->dirty |= FD_DIRTY_CONSTBUF;
67 }
68
69 /* VBOs */
70 for (i = 0; i < ctx->vtx.vertexbuf.count && !(ctx->dirty & FD_DIRTY_VTXBUF); i++) {
71 if (ctx->vtx.vertexbuf.vb[i].buffer == prsc)
72 ctx->dirty |= FD_DIRTY_VTXBUF;
73 }
74
75 /* Index buffer */
76 if (ctx->indexbuf.buffer == prsc)
77 ctx->dirty |= FD_DIRTY_INDEXBUF;
78
79 /* Textures */
80 for (i = 0; i < ctx->verttex.num_textures && !(ctx->dirty & FD_DIRTY_VERTTEX); i++) {
81 if (ctx->verttex.textures[i] && (ctx->verttex.textures[i]->texture == prsc))
82 ctx->dirty |= FD_DIRTY_VERTTEX;
83 }
84 for (i = 0; i < ctx->fragtex.num_textures && !(ctx->dirty & FD_DIRTY_FRAGTEX); i++) {
85 if (ctx->fragtex.textures[i] && (ctx->fragtex.textures[i]->texture == prsc))
86 ctx->dirty |= FD_DIRTY_FRAGTEX;
87 }
88 }
89
90 static void
91 realloc_bo(struct fd_resource *rsc, uint32_t size)
92 {
93 struct fd_screen *screen = fd_screen(rsc->base.b.screen);
94 uint32_t flags = DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
95 DRM_FREEDRENO_GEM_TYPE_KMEM; /* TODO */
96
97 /* if we start using things other than write-combine,
98 * be sure to check for PIPE_RESOURCE_FLAG_MAP_COHERENT
99 */
100
101 if (rsc->bo)
102 fd_bo_del(rsc->bo);
103
104 rsc->bo = fd_bo_new(screen->dev, size, flags);
105 rsc->timestamp = 0;
106 util_range_set_empty(&rsc->valid_buffer_range);
107 fd_bc_invalidate_resource(rsc, true);
108 }
109
110 static void
111 do_blit(struct fd_context *ctx, const struct pipe_blit_info *blit, bool fallback)
112 {
113 /* TODO size threshold too?? */
114 if ((blit->src.resource->target != PIPE_BUFFER) && !fallback) {
115 /* do blit on gpu: */
116 fd_blitter_pipe_begin(ctx, false, true, FD_STAGE_BLIT);
117 util_blitter_blit(ctx->blitter, blit);
118 fd_blitter_pipe_end(ctx);
119 } else {
120 /* do blit on cpu: */
121 util_resource_copy_region(&ctx->base,
122 blit->dst.resource, blit->dst.level, blit->dst.box.x,
123 blit->dst.box.y, blit->dst.box.z,
124 blit->src.resource, blit->src.level, &blit->src.box);
125 }
126 }
127
128 static bool
129 fd_try_shadow_resource(struct fd_context *ctx, struct fd_resource *rsc,
130 unsigned level, unsigned usage, const struct pipe_box *box)
131 {
132 struct pipe_context *pctx = &ctx->base;
133 struct pipe_resource *prsc = &rsc->base.b;
134 bool fallback = false;
135
136 if (prsc->next)
137 return false;
138
139 /* TODO: somehow munge dimensions and format to copy unsupported
140 * render target format to something that is supported?
141 */
142 if (!pctx->screen->is_format_supported(pctx->screen,
143 prsc->format, prsc->target, prsc->nr_samples,
144 PIPE_BIND_RENDER_TARGET))
145 fallback = true;
146
147 /* these cases should be handled elsewhere.. just for future
148 * reference in case this gets split into a more generic(ish)
149 * helper.
150 */
151 debug_assert(!(usage & PIPE_TRANSFER_READ));
152 debug_assert(!(usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE));
153
154 /* if we do a gpu blit to clone the whole resource, we'll just
155 * end up stalling on that.. so only allow if we can discard
156 * current range (and blit, possibly cpu or gpu, the rest)
157 */
158 if (!(usage & PIPE_TRANSFER_DISCARD_RANGE))
159 return false;
160
161 bool whole_level = util_texrange_covers_whole_level(prsc, level,
162 box->x, box->y, box->z, box->width, box->height, box->depth);
163
164 /* TODO need to be more clever about current level */
165 if ((prsc->target >= PIPE_TEXTURE_2D) && !whole_level)
166 return false;
167
168 struct pipe_resource *pshadow =
169 pctx->screen->resource_create(pctx->screen, prsc);
170
171 if (!pshadow)
172 return false;
173
174 assert(!ctx->in_shadow);
175 ctx->in_shadow = true;
176
177 /* get rid of any references that batch-cache might have to us (which
178 * should empty/destroy rsc->batches hashset)
179 */
180 fd_bc_invalidate_resource(rsc, false);
181
182 pipe_mutex_lock(ctx->screen->lock);
183
184 /* Swap the backing bo's, so shadow becomes the old buffer,
185 * blit from shadow to new buffer. From here on out, we
186 * cannot fail.
187 *
188 * Note that we need to do it in this order, otherwise if
189 * we go down cpu blit path, the recursive transfer_map()
190 * sees the wrong status..
191 */
192 struct fd_resource *shadow = fd_resource(pshadow);
193
194 DBG("shadow: %p (%d) -> %p (%d)\n", rsc, rsc->base.b.reference.count,
195 shadow, shadow->base.b.reference.count);
196
197 /* TODO valid_buffer_range?? */
198 swap(rsc->bo, shadow->bo);
199 swap(rsc->timestamp, shadow->timestamp);
200 swap(rsc->write_batch, shadow->write_batch);
201
202 /* at this point, the newly created shadow buffer is not referenced
203 * by any batches, but the existing rsc (probably) is. We need to
204 * transfer those references over:
205 */
206 debug_assert(shadow->batch_mask == 0);
207 struct fd_batch *batch;
208 foreach_batch(batch, &ctx->screen->batch_cache, rsc->batch_mask) {
209 struct set_entry *entry = _mesa_set_search(batch->resources, rsc);
210 _mesa_set_remove(batch->resources, entry);
211 _mesa_set_add(batch->resources, shadow);
212 }
213 swap(rsc->batch_mask, shadow->batch_mask);
214
215 pipe_mutex_unlock(ctx->screen->lock);
216
217 struct pipe_blit_info blit = {0};
218 blit.dst.resource = prsc;
219 blit.dst.format = prsc->format;
220 blit.src.resource = pshadow;
221 blit.src.format = pshadow->format;
222 blit.mask = util_format_get_mask(prsc->format);
223 blit.filter = PIPE_TEX_FILTER_NEAREST;
224
225 #define set_box(field, val) do { \
226 blit.dst.field = (val); \
227 blit.src.field = (val); \
228 } while (0)
229
230 /* blit the other levels in their entirety: */
231 for (unsigned l = 0; l <= prsc->last_level; l++) {
232 if (l == level)
233 continue;
234
235 /* just blit whole level: */
236 set_box(level, l);
237 set_box(box.width, u_minify(prsc->width0, l));
238 set_box(box.height, u_minify(prsc->height0, l));
239 set_box(box.depth, u_minify(prsc->depth0, l));
240
241 do_blit(ctx, &blit, fallback);
242 }
243
244 /* deal w/ current level specially, since we might need to split
245 * it up into a couple blits:
246 */
247 if (!whole_level) {
248 set_box(level, level);
249
250 switch (prsc->target) {
251 case PIPE_BUFFER:
252 case PIPE_TEXTURE_1D:
253 set_box(box.y, 0);
254 set_box(box.z, 0);
255 set_box(box.height, 1);
256 set_box(box.depth, 1);
257
258 if (box->x > 0) {
259 set_box(box.x, 0);
260 set_box(box.width, box->x);
261
262 do_blit(ctx, &blit, fallback);
263 }
264 if ((box->x + box->width) < u_minify(prsc->width0, level)) {
265 set_box(box.x, box->x + box->width);
266 set_box(box.width, u_minify(prsc->width0, level) - (box->x + box->width));
267
268 do_blit(ctx, &blit, fallback);
269 }
270 break;
271 case PIPE_TEXTURE_2D:
272 /* TODO */
273 default:
274 unreachable("TODO");
275 }
276 }
277
278 ctx->in_shadow = false;
279
280 pipe_resource_reference(&pshadow, NULL);
281
282 return true;
283 }
284
285 static unsigned
286 fd_resource_layer_offset(struct fd_resource *rsc,
287 struct fd_resource_slice *slice,
288 unsigned layer)
289 {
290 if (rsc->layer_first)
291 return layer * rsc->layer_size;
292 else
293 return layer * slice->size0;
294 }
295
296 static void
297 fd_resource_flush_z32s8(struct fd_transfer *trans, const struct pipe_box *box)
298 {
299 struct fd_resource *rsc = fd_resource(trans->base.resource);
300 struct fd_resource_slice *slice = fd_resource_slice(rsc, trans->base.level);
301 struct fd_resource_slice *sslice = fd_resource_slice(rsc->stencil, trans->base.level);
302 enum pipe_format format = trans->base.resource->format;
303
304 float *depth = fd_bo_map(rsc->bo) + slice->offset +
305 fd_resource_layer_offset(rsc, slice, trans->base.box.z) +
306 (trans->base.box.y + box->y) * slice->pitch * 4 + (trans->base.box.x + box->x) * 4;
307 uint8_t *stencil = fd_bo_map(rsc->stencil->bo) + sslice->offset +
308 fd_resource_layer_offset(rsc->stencil, sslice, trans->base.box.z) +
309 (trans->base.box.y + box->y) * sslice->pitch + trans->base.box.x + box->x;
310
311 if (format != PIPE_FORMAT_X32_S8X24_UINT)
312 util_format_z32_float_s8x24_uint_unpack_z_float(
313 depth, slice->pitch * 4,
314 trans->staging, trans->base.stride,
315 box->width, box->height);
316
317 util_format_z32_float_s8x24_uint_unpack_s_8uint(
318 stencil, sslice->pitch,
319 trans->staging, trans->base.stride,
320 box->width, box->height);
321 }
322
323 static void
324 fd_resource_flush_rgtc(struct fd_transfer *trans, const struct pipe_box *box)
325 {
326 struct fd_resource *rsc = fd_resource(trans->base.resource);
327 struct fd_resource_slice *slice = fd_resource_slice(rsc, trans->base.level);
328 enum pipe_format format = trans->base.resource->format;
329
330 uint8_t *data = fd_bo_map(rsc->bo) + slice->offset +
331 fd_resource_layer_offset(rsc, slice, trans->base.box.z) +
332 ((trans->base.box.y + box->y) * slice->pitch +
333 trans->base.box.x + box->x) * rsc->cpp;
334
335 uint8_t *source = trans->staging +
336 util_format_get_nblocksy(format, box->y) * trans->base.stride +
337 util_format_get_stride(format, box->x);
338
339 switch (format) {
340 case PIPE_FORMAT_RGTC1_UNORM:
341 case PIPE_FORMAT_RGTC1_SNORM:
342 case PIPE_FORMAT_LATC1_UNORM:
343 case PIPE_FORMAT_LATC1_SNORM:
344 util_format_rgtc1_unorm_unpack_rgba_8unorm(
345 data, slice->pitch * rsc->cpp,
346 source, trans->base.stride,
347 box->width, box->height);
348 break;
349 case PIPE_FORMAT_RGTC2_UNORM:
350 case PIPE_FORMAT_RGTC2_SNORM:
351 case PIPE_FORMAT_LATC2_UNORM:
352 case PIPE_FORMAT_LATC2_SNORM:
353 util_format_rgtc2_unorm_unpack_rgba_8unorm(
354 data, slice->pitch * rsc->cpp,
355 source, trans->base.stride,
356 box->width, box->height);
357 break;
358 default:
359 assert(!"Unexpected format\n");
360 break;
361 }
362 }
363
364 static void
365 fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box)
366 {
367 enum pipe_format format = trans->base.resource->format;
368
369 switch (format) {
370 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
371 case PIPE_FORMAT_X32_S8X24_UINT:
372 fd_resource_flush_z32s8(trans, box);
373 break;
374 case PIPE_FORMAT_RGTC1_UNORM:
375 case PIPE_FORMAT_RGTC1_SNORM:
376 case PIPE_FORMAT_RGTC2_UNORM:
377 case PIPE_FORMAT_RGTC2_SNORM:
378 case PIPE_FORMAT_LATC1_UNORM:
379 case PIPE_FORMAT_LATC1_SNORM:
380 case PIPE_FORMAT_LATC2_UNORM:
381 case PIPE_FORMAT_LATC2_SNORM:
382 fd_resource_flush_rgtc(trans, box);
383 break;
384 default:
385 assert(!"Unexpected staging transfer type");
386 break;
387 }
388 }
389
390 static void fd_resource_transfer_flush_region(struct pipe_context *pctx,
391 struct pipe_transfer *ptrans,
392 const struct pipe_box *box)
393 {
394 struct fd_resource *rsc = fd_resource(ptrans->resource);
395 struct fd_transfer *trans = fd_transfer(ptrans);
396
397 if (ptrans->resource->target == PIPE_BUFFER)
398 util_range_add(&rsc->valid_buffer_range,
399 ptrans->box.x + box->x,
400 ptrans->box.x + box->x + box->width);
401
402 if (trans->staging)
403 fd_resource_flush(trans, box);
404 }
405
406 static void
407 fd_resource_transfer_unmap(struct pipe_context *pctx,
408 struct pipe_transfer *ptrans)
409 {
410 struct fd_context *ctx = fd_context(pctx);
411 struct fd_resource *rsc = fd_resource(ptrans->resource);
412 struct fd_transfer *trans = fd_transfer(ptrans);
413
414 if (trans->staging && !(ptrans->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) {
415 struct pipe_box box;
416 u_box_2d(0, 0, ptrans->box.width, ptrans->box.height, &box);
417 fd_resource_flush(trans, &box);
418 }
419
420 if (!(ptrans->usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
421 fd_bo_cpu_fini(rsc->bo);
422 if (rsc->stencil)
423 fd_bo_cpu_fini(rsc->stencil->bo);
424 }
425
426 util_range_add(&rsc->valid_buffer_range,
427 ptrans->box.x,
428 ptrans->box.x + ptrans->box.width);
429
430 pipe_resource_reference(&ptrans->resource, NULL);
431 slab_free(&ctx->transfer_pool, ptrans);
432
433 free(trans->staging);
434 }
435
436 static void *
437 fd_resource_transfer_map(struct pipe_context *pctx,
438 struct pipe_resource *prsc,
439 unsigned level, unsigned usage,
440 const struct pipe_box *box,
441 struct pipe_transfer **pptrans)
442 {
443 struct fd_context *ctx = fd_context(pctx);
444 struct fd_resource *rsc = fd_resource(prsc);
445 struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
446 struct fd_transfer *trans;
447 struct pipe_transfer *ptrans;
448 enum pipe_format format = prsc->format;
449 uint32_t op = 0;
450 uint32_t offset;
451 char *buf;
452 int ret = 0;
453
454 DBG("prsc=%p, level=%u, usage=%x, box=%dx%d+%d,%d", prsc, level, usage,
455 box->width, box->height, box->x, box->y);
456
457 ptrans = slab_alloc(&ctx->transfer_pool);
458 if (!ptrans)
459 return NULL;
460
461 /* slab_alloc_st() doesn't zero: */
462 trans = fd_transfer(ptrans);
463 memset(trans, 0, sizeof(*trans));
464
465 pipe_resource_reference(&ptrans->resource, prsc);
466 ptrans->level = level;
467 ptrans->usage = usage;
468 ptrans->box = *box;
469 ptrans->stride = util_format_get_nblocksx(format, slice->pitch) * rsc->cpp;
470 ptrans->layer_stride = rsc->layer_first ? rsc->layer_size : slice->size0;
471
472 if (ctx->in_shadow && !(usage & PIPE_TRANSFER_READ))
473 usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
474
475 if (usage & PIPE_TRANSFER_READ)
476 op |= DRM_FREEDRENO_PREP_READ;
477
478 if (usage & PIPE_TRANSFER_WRITE)
479 op |= DRM_FREEDRENO_PREP_WRITE;
480
481 if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
482 realloc_bo(rsc, fd_bo_size(rsc->bo));
483 if (rsc->stencil)
484 realloc_bo(rsc->stencil, fd_bo_size(rsc->stencil->bo));
485 fd_invalidate_resource(ctx, prsc);
486 } else if ((usage & PIPE_TRANSFER_WRITE) &&
487 prsc->target == PIPE_BUFFER &&
488 !util_ranges_intersect(&rsc->valid_buffer_range,
489 box->x, box->x + box->width)) {
490 /* We are trying to write to a previously uninitialized range. No need
491 * to wait.
492 */
493 } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
494 struct fd_batch *write_batch = NULL;
495
496 /* hold a reference, so it doesn't disappear under us: */
497 fd_batch_reference(&write_batch, rsc->write_batch);
498
499 if ((usage & PIPE_TRANSFER_WRITE) && write_batch &&
500 write_batch->back_blit) {
501 /* if only thing pending is a back-blit, we can discard it: */
502 fd_batch_reset(write_batch);
503 }
504
505 /* If the GPU is writing to the resource, or if it is reading from the
506 * resource and we're trying to write to it, flush the renders.
507 */
508 bool needs_flush = pending(rsc, !!(usage & PIPE_TRANSFER_WRITE));
509 bool busy = needs_flush || (0 != fd_bo_cpu_prep(rsc->bo,
510 ctx->screen->pipe, op | DRM_FREEDRENO_PREP_NOSYNC));
511
512 /* if we need to flush/stall, see if we can make a shadow buffer
513 * to avoid this:
514 *
515 * TODO we could go down this path !reorder && !busy_for_read
516 * ie. we only *don't* want to go down this path if the blit
517 * will trigger a flush!
518 */
519 if (ctx->screen->reorder && busy && !(usage & PIPE_TRANSFER_READ)) {
520 if (fd_try_shadow_resource(ctx, rsc, level, usage, box)) {
521 needs_flush = busy = false;
522 fd_invalidate_resource(ctx, prsc);
523 }
524 }
525
526 if (needs_flush) {
527 if (usage & PIPE_TRANSFER_WRITE) {
528 struct fd_batch *batch, *last_batch = NULL;
529 foreach_batch(batch, &ctx->screen->batch_cache, rsc->batch_mask) {
530 fd_batch_reference(&last_batch, batch);
531 fd_batch_flush(batch, false);
532 }
533 if (last_batch) {
534 fd_batch_sync(last_batch);
535 fd_batch_reference(&last_batch, NULL);
536 }
537 assert(rsc->batch_mask == 0);
538 } else {
539 fd_batch_flush(write_batch, true);
540 }
541 assert(!rsc->write_batch);
542 }
543
544 fd_batch_reference(&write_batch, NULL);
545
546 /* The GPU keeps track of how the various bo's are being used, and
547 * will wait if necessary for the proper operation to have
548 * completed.
549 */
550 if (busy) {
551 ret = fd_bo_cpu_prep(rsc->bo, ctx->screen->pipe, op);
552 if (ret)
553 goto fail;
554 }
555 }
556
557 buf = fd_bo_map(rsc->bo);
558 if (!buf)
559 goto fail;
560
561 offset = slice->offset +
562 box->y / util_format_get_blockheight(format) * ptrans->stride +
563 box->x / util_format_get_blockwidth(format) * rsc->cpp +
564 fd_resource_layer_offset(rsc, slice, box->z);
565
566 if (prsc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ||
567 prsc->format == PIPE_FORMAT_X32_S8X24_UINT) {
568 assert(trans->base.box.depth == 1);
569
570 trans->base.stride = trans->base.box.width * rsc->cpp * 2;
571 trans->staging = malloc(trans->base.stride * trans->base.box.height);
572 if (!trans->staging)
573 goto fail;
574
575 /* if we're not discarding the whole range (or resource), we must copy
576 * the real data in.
577 */
578 if (!(usage & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
579 PIPE_TRANSFER_DISCARD_RANGE))) {
580 struct fd_resource_slice *sslice =
581 fd_resource_slice(rsc->stencil, level);
582 void *sbuf = fd_bo_map(rsc->stencil->bo);
583 if (!sbuf)
584 goto fail;
585
586 float *depth = (float *)(buf + slice->offset +
587 fd_resource_layer_offset(rsc, slice, box->z) +
588 box->y * slice->pitch * 4 + box->x * 4);
589 uint8_t *stencil = sbuf + sslice->offset +
590 fd_resource_layer_offset(rsc->stencil, sslice, box->z) +
591 box->y * sslice->pitch + box->x;
592
593 if (format != PIPE_FORMAT_X32_S8X24_UINT)
594 util_format_z32_float_s8x24_uint_pack_z_float(
595 trans->staging, trans->base.stride,
596 depth, slice->pitch * 4,
597 box->width, box->height);
598
599 util_format_z32_float_s8x24_uint_pack_s_8uint(
600 trans->staging, trans->base.stride,
601 stencil, sslice->pitch,
602 box->width, box->height);
603 }
604
605 buf = trans->staging;
606 offset = 0;
607 } else if (rsc->internal_format != format &&
608 util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC) {
609 assert(trans->base.box.depth == 1);
610
611 trans->base.stride = util_format_get_stride(
612 format, trans->base.box.width);
613 trans->staging = malloc(
614 util_format_get_2d_size(format, trans->base.stride,
615 trans->base.box.height));
616 if (!trans->staging)
617 goto fail;
618
619 /* if we're not discarding the whole range (or resource), we must copy
620 * the real data in.
621 */
622 if (!(usage & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
623 PIPE_TRANSFER_DISCARD_RANGE))) {
624 uint8_t *rgba8 = (uint8_t *)buf + slice->offset +
625 fd_resource_layer_offset(rsc, slice, box->z) +
626 box->y * slice->pitch * rsc->cpp + box->x * rsc->cpp;
627
628 switch (format) {
629 case PIPE_FORMAT_RGTC1_UNORM:
630 case PIPE_FORMAT_RGTC1_SNORM:
631 case PIPE_FORMAT_LATC1_UNORM:
632 case PIPE_FORMAT_LATC1_SNORM:
633 util_format_rgtc1_unorm_pack_rgba_8unorm(
634 trans->staging, trans->base.stride,
635 rgba8, slice->pitch * rsc->cpp,
636 box->width, box->height);
637 break;
638 case PIPE_FORMAT_RGTC2_UNORM:
639 case PIPE_FORMAT_RGTC2_SNORM:
640 case PIPE_FORMAT_LATC2_UNORM:
641 case PIPE_FORMAT_LATC2_SNORM:
642 util_format_rgtc2_unorm_pack_rgba_8unorm(
643 trans->staging, trans->base.stride,
644 rgba8, slice->pitch * rsc->cpp,
645 box->width, box->height);
646 break;
647 default:
648 assert(!"Unexpected format");
649 break;
650 }
651 }
652
653 buf = trans->staging;
654 offset = 0;
655 }
656
657 *pptrans = ptrans;
658
659 return buf + offset;
660
661 fail:
662 fd_resource_transfer_unmap(pctx, ptrans);
663 return NULL;
664 }
665
666 static void
667 fd_resource_destroy(struct pipe_screen *pscreen,
668 struct pipe_resource *prsc)
669 {
670 struct fd_resource *rsc = fd_resource(prsc);
671 fd_bc_invalidate_resource(rsc, true);
672 if (rsc->bo)
673 fd_bo_del(rsc->bo);
674 util_range_destroy(&rsc->valid_buffer_range);
675 FREE(rsc);
676 }
677
678 static boolean
679 fd_resource_get_handle(struct pipe_screen *pscreen,
680 struct pipe_resource *prsc,
681 struct winsys_handle *handle)
682 {
683 struct fd_resource *rsc = fd_resource(prsc);
684
685 return fd_screen_bo_get_handle(pscreen, rsc->bo,
686 rsc->slices[0].pitch * rsc->cpp, handle);
687 }
688
689
690 static const struct u_resource_vtbl fd_resource_vtbl = {
691 .resource_get_handle = fd_resource_get_handle,
692 .resource_destroy = fd_resource_destroy,
693 .transfer_map = fd_resource_transfer_map,
694 .transfer_flush_region = fd_resource_transfer_flush_region,
695 .transfer_unmap = fd_resource_transfer_unmap,
696 };
697
698 static uint32_t
699 setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format format)
700 {
701 struct pipe_resource *prsc = &rsc->base.b;
702 enum util_format_layout layout = util_format_description(format)->layout;
703 uint32_t pitchalign = fd_screen(prsc->screen)->gmem_alignw;
704 uint32_t level, size = 0;
705 uint32_t width = prsc->width0;
706 uint32_t height = prsc->height0;
707 uint32_t depth = prsc->depth0;
708 /* in layer_first layout, the level (slice) contains just one
709 * layer (since in fact the layer contains the slices)
710 */
711 uint32_t layers_in_level = rsc->layer_first ? 1 : prsc->array_size;
712
713 for (level = 0; level <= prsc->last_level; level++) {
714 struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
715 uint32_t blocks;
716
717 if (layout == UTIL_FORMAT_LAYOUT_ASTC)
718 slice->pitch = width =
719 util_align_npot(width, pitchalign * util_format_get_blockwidth(format));
720 else
721 slice->pitch = width = align(width, pitchalign);
722 slice->offset = size;
723 blocks = util_format_get_nblocks(format, width, height);
724 /* 1d array and 2d array textures must all have the same layer size
725 * for each miplevel on a3xx. 3d textures can have different layer
726 * sizes for high levels, but the hw auto-sizer is buggy (or at least
727 * different than what this code does), so as soon as the layer size
728 * range gets into range, we stop reducing it.
729 */
730 if (prsc->target == PIPE_TEXTURE_3D && (
731 level == 1 ||
732 (level > 1 && rsc->slices[level - 1].size0 > 0xf000)))
733 slice->size0 = align(blocks * rsc->cpp, alignment);
734 else if (level == 0 || rsc->layer_first || alignment == 1)
735 slice->size0 = align(blocks * rsc->cpp, alignment);
736 else
737 slice->size0 = rsc->slices[level - 1].size0;
738
739 size += slice->size0 * depth * layers_in_level;
740
741 width = u_minify(width, 1);
742 height = u_minify(height, 1);
743 depth = u_minify(depth, 1);
744 }
745
746 return size;
747 }
748
749 static uint32_t
750 slice_alignment(struct pipe_screen *pscreen, const struct pipe_resource *tmpl)
751 {
752 /* on a3xx, 2d array and 3d textures seem to want their
753 * layers aligned to page boundaries:
754 */
755 switch (tmpl->target) {
756 case PIPE_TEXTURE_3D:
757 case PIPE_TEXTURE_1D_ARRAY:
758 case PIPE_TEXTURE_2D_ARRAY:
759 return 4096;
760 default:
761 return 1;
762 }
763 }
764
765 /* special case to resize query buf after allocated.. */
766 void
767 fd_resource_resize(struct pipe_resource *prsc, uint32_t sz)
768 {
769 struct fd_resource *rsc = fd_resource(prsc);
770
771 debug_assert(prsc->width0 == 0);
772 debug_assert(prsc->target == PIPE_BUFFER);
773 debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER);
774
775 prsc->width0 = sz;
776 realloc_bo(rsc, setup_slices(rsc, 1, prsc->format));
777 }
778
779 /**
780 * Create a new texture object, using the given template info.
781 */
782 static struct pipe_resource *
783 fd_resource_create(struct pipe_screen *pscreen,
784 const struct pipe_resource *tmpl)
785 {
786 struct fd_resource *rsc = CALLOC_STRUCT(fd_resource);
787 struct pipe_resource *prsc = &rsc->base.b;
788 enum pipe_format format = tmpl->format;
789 uint32_t size, alignment;
790
791 DBG("%p: target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, "
792 "nr_samples=%u, usage=%u, bind=%x, flags=%x", prsc,
793 tmpl->target, util_format_name(format),
794 tmpl->width0, tmpl->height0, tmpl->depth0,
795 tmpl->array_size, tmpl->last_level, tmpl->nr_samples,
796 tmpl->usage, tmpl->bind, tmpl->flags);
797
798 if (!rsc)
799 return NULL;
800
801 *prsc = *tmpl;
802
803 pipe_reference_init(&prsc->reference, 1);
804
805 prsc->screen = pscreen;
806
807 util_range_init(&rsc->valid_buffer_range);
808
809 rsc->base.vtbl = &fd_resource_vtbl;
810
811 if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
812 format = PIPE_FORMAT_Z32_FLOAT;
813 else if (fd_screen(pscreen)->gpu_id < 400 &&
814 util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC)
815 format = PIPE_FORMAT_R8G8B8A8_UNORM;
816 rsc->internal_format = format;
817 rsc->cpp = util_format_get_blocksize(format);
818
819 assert(rsc->cpp);
820
821 alignment = slice_alignment(pscreen, tmpl);
822 if (is_a4xx(fd_screen(pscreen))) {
823 switch (tmpl->target) {
824 case PIPE_TEXTURE_3D:
825 rsc->layer_first = false;
826 break;
827 default:
828 rsc->layer_first = true;
829 alignment = 1;
830 break;
831 }
832 }
833
834 size = setup_slices(rsc, alignment, format);
835
836 /* special case for hw-query buffer, which we need to allocate before we
837 * know the size:
838 */
839 if (size == 0) {
840 /* note, semi-intention == instead of & */
841 debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER);
842 return prsc;
843 }
844
845 if (rsc->layer_first) {
846 rsc->layer_size = align(size, 4096);
847 size = rsc->layer_size * prsc->array_size;
848 }
849
850 realloc_bo(rsc, size);
851 if (!rsc->bo)
852 goto fail;
853
854 /* There is no native Z32F_S8 sampling or rendering format, so this must
855 * be emulated via two separate textures. The depth texture still keeps
856 * its Z32F_S8 format though, and we also keep a reference to a separate
857 * S8 texture.
858 */
859 if (tmpl->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
860 struct pipe_resource stencil = *tmpl;
861 stencil.format = PIPE_FORMAT_S8_UINT;
862 rsc->stencil = fd_resource(fd_resource_create(pscreen, &stencil));
863 if (!rsc->stencil)
864 goto fail;
865 }
866
867 return prsc;
868 fail:
869 fd_resource_destroy(pscreen, prsc);
870 return NULL;
871 }
872
873 /**
874 * Create a texture from a winsys_handle. The handle is often created in
875 * another process by first creating a pipe texture and then calling
876 * resource_get_handle.
877 */
878 static struct pipe_resource *
879 fd_resource_from_handle(struct pipe_screen *pscreen,
880 const struct pipe_resource *tmpl,
881 struct winsys_handle *handle, unsigned usage)
882 {
883 struct fd_resource *rsc = CALLOC_STRUCT(fd_resource);
884 struct fd_resource_slice *slice = &rsc->slices[0];
885 struct pipe_resource *prsc = &rsc->base.b;
886 uint32_t pitchalign = fd_screen(pscreen)->gmem_alignw;
887
888 DBG("target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, "
889 "nr_samples=%u, usage=%u, bind=%x, flags=%x",
890 tmpl->target, util_format_name(tmpl->format),
891 tmpl->width0, tmpl->height0, tmpl->depth0,
892 tmpl->array_size, tmpl->last_level, tmpl->nr_samples,
893 tmpl->usage, tmpl->bind, tmpl->flags);
894
895 if (!rsc)
896 return NULL;
897
898 *prsc = *tmpl;
899
900 pipe_reference_init(&prsc->reference, 1);
901
902 prsc->screen = pscreen;
903
904 util_range_init(&rsc->valid_buffer_range);
905
906 rsc->bo = fd_screen_bo_from_handle(pscreen, handle);
907 if (!rsc->bo)
908 goto fail;
909
910 rsc->base.vtbl = &fd_resource_vtbl;
911 rsc->cpp = util_format_get_blocksize(tmpl->format);
912 slice->pitch = handle->stride / rsc->cpp;
913 slice->offset = handle->offset;
914 slice->size0 = handle->stride * prsc->height0;
915
916 if ((slice->pitch < align(prsc->width0, pitchalign)) ||
917 (slice->pitch & (pitchalign - 1)))
918 goto fail;
919
920 assert(rsc->cpp);
921
922 return prsc;
923
924 fail:
925 fd_resource_destroy(pscreen, prsc);
926 return NULL;
927 }
928
929 /**
930 * _copy_region using pipe (3d engine)
931 */
932 static bool
933 fd_blitter_pipe_copy_region(struct fd_context *ctx,
934 struct pipe_resource *dst,
935 unsigned dst_level,
936 unsigned dstx, unsigned dsty, unsigned dstz,
937 struct pipe_resource *src,
938 unsigned src_level,
939 const struct pipe_box *src_box)
940 {
941 /* not until we allow rendertargets to be buffers */
942 if (dst->target == PIPE_BUFFER || src->target == PIPE_BUFFER)
943 return false;
944
945 if (!util_blitter_is_copy_supported(ctx->blitter, dst, src))
946 return false;
947
948 /* TODO we could discard if dst box covers dst level fully.. */
949 fd_blitter_pipe_begin(ctx, false, false, FD_STAGE_BLIT);
950 util_blitter_copy_texture(ctx->blitter,
951 dst, dst_level, dstx, dsty, dstz,
952 src, src_level, src_box);
953 fd_blitter_pipe_end(ctx);
954
955 return true;
956 }
957
958 /**
959 * Copy a block of pixels from one resource to another.
960 * The resource must be of the same format.
961 * Resources with nr_samples > 1 are not allowed.
962 */
963 static void
964 fd_resource_copy_region(struct pipe_context *pctx,
965 struct pipe_resource *dst,
966 unsigned dst_level,
967 unsigned dstx, unsigned dsty, unsigned dstz,
968 struct pipe_resource *src,
969 unsigned src_level,
970 const struct pipe_box *src_box)
971 {
972 struct fd_context *ctx = fd_context(pctx);
973
974 /* TODO if we have 2d core, or other DMA engine that could be used
975 * for simple copies and reasonably easily synchronized with the 3d
976 * core, this is where we'd plug it in..
977 */
978
979 /* try blit on 3d pipe: */
980 if (fd_blitter_pipe_copy_region(ctx,
981 dst, dst_level, dstx, dsty, dstz,
982 src, src_level, src_box))
983 return;
984
985 /* else fallback to pure sw: */
986 util_resource_copy_region(pctx,
987 dst, dst_level, dstx, dsty, dstz,
988 src, src_level, src_box);
989 }
990
991 bool
992 fd_render_condition_check(struct pipe_context *pctx)
993 {
994 struct fd_context *ctx = fd_context(pctx);
995
996 if (!ctx->cond_query)
997 return true;
998
999 union pipe_query_result res = { 0 };
1000 bool wait =
1001 ctx->cond_mode != PIPE_RENDER_COND_NO_WAIT &&
1002 ctx->cond_mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
1003
1004 if (pctx->get_query_result(pctx, ctx->cond_query, wait, &res))
1005 return (bool)res.u64 != ctx->cond_cond;
1006
1007 return true;
1008 }
1009
1010 /**
1011 * Optimal hardware path for blitting pixels.
1012 * Scaling, format conversion, up- and downsampling (resolve) are allowed.
1013 */
1014 static void
1015 fd_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
1016 {
1017 struct fd_context *ctx = fd_context(pctx);
1018 struct pipe_blit_info info = *blit_info;
1019 bool discard = false;
1020
1021 if (info.src.resource->nr_samples > 1 &&
1022 info.dst.resource->nr_samples <= 1 &&
1023 !util_format_is_depth_or_stencil(info.src.resource->format) &&
1024 !util_format_is_pure_integer(info.src.resource->format)) {
1025 DBG("color resolve unimplemented");
1026 return;
1027 }
1028
1029 if (info.render_condition_enable && !fd_render_condition_check(pctx))
1030 return;
1031
1032 if (!info.scissor_enable && !info.alpha_blend) {
1033 discard = util_texrange_covers_whole_level(info.dst.resource,
1034 info.dst.level, info.dst.box.x, info.dst.box.y,
1035 info.dst.box.z, info.dst.box.width,
1036 info.dst.box.height, info.dst.box.depth);
1037 }
1038
1039 if (util_try_blit_via_copy_region(pctx, &info)) {
1040 return; /* done */
1041 }
1042
1043 if (info.mask & PIPE_MASK_S) {
1044 DBG("cannot blit stencil, skipping");
1045 info.mask &= ~PIPE_MASK_S;
1046 }
1047
1048 if (!util_blitter_is_blit_supported(ctx->blitter, &info)) {
1049 DBG("blit unsupported %s -> %s",
1050 util_format_short_name(info.src.resource->format),
1051 util_format_short_name(info.dst.resource->format));
1052 return;
1053 }
1054
1055 fd_blitter_pipe_begin(ctx, info.render_condition_enable, discard, FD_STAGE_BLIT);
1056 util_blitter_blit(ctx->blitter, &info);
1057 fd_blitter_pipe_end(ctx);
1058 }
1059
1060 void
1061 fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond, bool discard,
1062 enum fd_render_stage stage)
1063 {
1064 util_blitter_save_fragment_constant_buffer_slot(ctx->blitter,
1065 ctx->constbuf[PIPE_SHADER_FRAGMENT].cb);
1066 util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vtx.vertexbuf.vb);
1067 util_blitter_save_vertex_elements(ctx->blitter, ctx->vtx.vtx);
1068 util_blitter_save_vertex_shader(ctx->blitter, ctx->prog.vp);
1069 util_blitter_save_so_targets(ctx->blitter, ctx->streamout.num_targets,
1070 ctx->streamout.targets);
1071 util_blitter_save_rasterizer(ctx->blitter, ctx->rasterizer);
1072 util_blitter_save_viewport(ctx->blitter, &ctx->viewport);
1073 util_blitter_save_scissor(ctx->blitter, &ctx->scissor);
1074 util_blitter_save_fragment_shader(ctx->blitter, ctx->prog.fp);
1075 util_blitter_save_blend(ctx->blitter, ctx->blend);
1076 util_blitter_save_depth_stencil_alpha(ctx->blitter, ctx->zsa);
1077 util_blitter_save_stencil_ref(ctx->blitter, &ctx->stencil_ref);
1078 util_blitter_save_sample_mask(ctx->blitter, ctx->sample_mask);
1079 util_blitter_save_framebuffer(ctx->blitter,
1080 ctx->batch ? &ctx->batch->framebuffer : NULL);
1081 util_blitter_save_fragment_sampler_states(ctx->blitter,
1082 ctx->fragtex.num_samplers,
1083 (void **)ctx->fragtex.samplers);
1084 util_blitter_save_fragment_sampler_views(ctx->blitter,
1085 ctx->fragtex.num_textures, ctx->fragtex.textures);
1086 if (!render_cond)
1087 util_blitter_save_render_condition(ctx->blitter,
1088 ctx->cond_query, ctx->cond_cond, ctx->cond_mode);
1089
1090 if (ctx->batch)
1091 fd_hw_query_set_stage(ctx->batch, ctx->batch->draw, stage);
1092
1093 ctx->in_blit = discard;
1094 }
1095
1096 void
1097 fd_blitter_pipe_end(struct fd_context *ctx)
1098 {
1099 if (ctx->batch)
1100 fd_hw_query_set_stage(ctx->batch, ctx->batch->draw, FD_STAGE_NULL);
1101 ctx->in_blit = false;
1102 }
1103
1104 static void
1105 fd_flush_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
1106 {
1107 struct fd_resource *rsc = fd_resource(prsc);
1108
1109 if (rsc->write_batch)
1110 fd_batch_flush(rsc->write_batch, true);
1111
1112 assert(!rsc->write_batch);
1113 }
1114
1115 void
1116 fd_resource_screen_init(struct pipe_screen *pscreen)
1117 {
1118 pscreen->resource_create = fd_resource_create;
1119 pscreen->resource_from_handle = fd_resource_from_handle;
1120 pscreen->resource_get_handle = u_resource_get_handle_vtbl;
1121 pscreen->resource_destroy = u_resource_destroy_vtbl;
1122 }
1123
1124 void
1125 fd_resource_context_init(struct pipe_context *pctx)
1126 {
1127 pctx->transfer_map = u_transfer_map_vtbl;
1128 pctx->transfer_flush_region = u_transfer_flush_region_vtbl;
1129 pctx->transfer_unmap = u_transfer_unmap_vtbl;
1130 pctx->buffer_subdata = u_default_buffer_subdata;
1131 pctx->texture_subdata = u_default_texture_subdata;
1132 pctx->create_surface = fd_create_surface;
1133 pctx->surface_destroy = fd_surface_destroy;
1134 pctx->resource_copy_region = fd_resource_copy_region;
1135 pctx->blit = fd_blit;
1136 pctx->flush_resource = fd_flush_resource;
1137 }