gallium: remove pipe_index_buffer and set_index_buffer
[mesa.git] / src / gallium / drivers / freedreno / freedreno_resource.c
1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
3 /*
4 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 * Rob Clark <robclark@freedesktop.org>
27 */
28
29 #include "util/u_format.h"
30 #include "util/u_format_rgtc.h"
31 #include "util/u_format_zs.h"
32 #include "util/u_inlines.h"
33 #include "util/u_transfer.h"
34 #include "util/u_string.h"
35 #include "util/u_surface.h"
36 #include "util/set.h"
37
38 #include "freedreno_resource.h"
39 #include "freedreno_batch_cache.h"
40 #include "freedreno_screen.h"
41 #include "freedreno_surface.h"
42 #include "freedreno_context.h"
43 #include "freedreno_query_hw.h"
44 #include "freedreno_util.h"
45
46 #include <errno.h>
47
48 /* XXX this should go away, needed for 'struct winsys_handle' */
49 #include "state_tracker/drm_driver.h"
50
51 static void
52 fd_invalidate_resource(struct fd_context *ctx, struct pipe_resource *prsc)
53 {
54 /* Go through the entire state and see if the resource is bound
55 * anywhere. If it is, mark the relevant state as dirty. This is called on
56 * realloc_bo.
57 */
58
59 /* VBOs */
60 for (unsigned i = 0; i < ctx->vtx.vertexbuf.count && !(ctx->dirty & FD_DIRTY_VTXBUF); i++) {
61 if (ctx->vtx.vertexbuf.vb[i].buffer.resource == prsc)
62 ctx->dirty |= FD_DIRTY_VTXBUF;
63 }
64
65 /* per-shader-stage resources: */
66 for (unsigned stage = 0; stage < PIPE_SHADER_TYPES; stage++) {
67 /* Constbufs.. note that constbuf[0] is normal uniforms emitted in
68 * cmdstream rather than by pointer..
69 */
70 const unsigned num_ubos = util_last_bit(ctx->constbuf[stage].enabled_mask);
71 for (unsigned i = 1; i < num_ubos; i++) {
72 if (ctx->dirty_shader[stage] & FD_DIRTY_SHADER_CONST)
73 break;
74 if (ctx->constbuf[stage].cb[i].buffer == prsc)
75 ctx->dirty_shader[stage] |= FD_DIRTY_SHADER_CONST;
76 }
77
78 /* Textures */
79 for (unsigned i = 0; i < ctx->tex[stage].num_textures; i++) {
80 if (ctx->dirty_shader[stage] & FD_DIRTY_SHADER_TEX)
81 break;
82 if (ctx->tex[stage].textures[i] && (ctx->tex[stage].textures[i]->texture == prsc))
83 ctx->dirty_shader[stage] |= FD_DIRTY_SHADER_TEX;
84 }
85
86 /* SSBOs */
87 const unsigned num_ssbos = util_last_bit(ctx->shaderbuf[stage].enabled_mask);
88 for (unsigned i = 0; i < num_ssbos; i++) {
89 if (ctx->dirty_shader[stage] & FD_DIRTY_SHADER_SSBO)
90 break;
91 if (ctx->shaderbuf[stage].sb[i].buffer == prsc)
92 ctx->dirty_shader[stage] |= FD_DIRTY_SHADER_SSBO;
93 }
94 }
95 }
96
97 static void
98 realloc_bo(struct fd_resource *rsc, uint32_t size)
99 {
100 struct fd_screen *screen = fd_screen(rsc->base.b.screen);
101 uint32_t flags = DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
102 DRM_FREEDRENO_GEM_TYPE_KMEM; /* TODO */
103
104 /* if we start using things other than write-combine,
105 * be sure to check for PIPE_RESOURCE_FLAG_MAP_COHERENT
106 */
107
108 if (rsc->bo)
109 fd_bo_del(rsc->bo);
110
111 rsc->bo = fd_bo_new(screen->dev, size, flags);
112 rsc->timestamp = 0;
113 util_range_set_empty(&rsc->valid_buffer_range);
114 fd_bc_invalidate_resource(rsc, true);
115 }
116
117 static void
118 do_blit(struct fd_context *ctx, const struct pipe_blit_info *blit, bool fallback)
119 {
120 /* TODO size threshold too?? */
121 if ((blit->src.resource->target != PIPE_BUFFER) && !fallback) {
122 /* do blit on gpu: */
123 fd_blitter_pipe_begin(ctx, false, true, FD_STAGE_BLIT);
124 util_blitter_blit(ctx->blitter, blit);
125 fd_blitter_pipe_end(ctx);
126 } else {
127 /* do blit on cpu: */
128 util_resource_copy_region(&ctx->base,
129 blit->dst.resource, blit->dst.level, blit->dst.box.x,
130 blit->dst.box.y, blit->dst.box.z,
131 blit->src.resource, blit->src.level, &blit->src.box);
132 }
133 }
134
135 static bool
136 fd_try_shadow_resource(struct fd_context *ctx, struct fd_resource *rsc,
137 unsigned level, unsigned usage, const struct pipe_box *box)
138 {
139 struct pipe_context *pctx = &ctx->base;
140 struct pipe_resource *prsc = &rsc->base.b;
141 bool fallback = false;
142
143 if (prsc->next)
144 return false;
145
146 /* TODO: somehow munge dimensions and format to copy unsupported
147 * render target format to something that is supported?
148 */
149 if (!pctx->screen->is_format_supported(pctx->screen,
150 prsc->format, prsc->target, prsc->nr_samples,
151 PIPE_BIND_RENDER_TARGET))
152 fallback = true;
153
154 /* these cases should be handled elsewhere.. just for future
155 * reference in case this gets split into a more generic(ish)
156 * helper.
157 */
158 debug_assert(!(usage & PIPE_TRANSFER_READ));
159 debug_assert(!(usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE));
160
161 /* if we do a gpu blit to clone the whole resource, we'll just
162 * end up stalling on that.. so only allow if we can discard
163 * current range (and blit, possibly cpu or gpu, the rest)
164 */
165 if (!(usage & PIPE_TRANSFER_DISCARD_RANGE))
166 return false;
167
168 bool whole_level = util_texrange_covers_whole_level(prsc, level,
169 box->x, box->y, box->z, box->width, box->height, box->depth);
170
171 /* TODO need to be more clever about current level */
172 if ((prsc->target >= PIPE_TEXTURE_2D) && !whole_level)
173 return false;
174
175 struct pipe_resource *pshadow =
176 pctx->screen->resource_create(pctx->screen, prsc);
177
178 if (!pshadow)
179 return false;
180
181 assert(!ctx->in_shadow);
182 ctx->in_shadow = true;
183
184 /* get rid of any references that batch-cache might have to us (which
185 * should empty/destroy rsc->batches hashset)
186 */
187 fd_bc_invalidate_resource(rsc, false);
188
189 mtx_lock(&ctx->screen->lock);
190
191 /* Swap the backing bo's, so shadow becomes the old buffer,
192 * blit from shadow to new buffer. From here on out, we
193 * cannot fail.
194 *
195 * Note that we need to do it in this order, otherwise if
196 * we go down cpu blit path, the recursive transfer_map()
197 * sees the wrong status..
198 */
199 struct fd_resource *shadow = fd_resource(pshadow);
200
201 DBG("shadow: %p (%d) -> %p (%d)\n", rsc, rsc->base.b.reference.count,
202 shadow, shadow->base.b.reference.count);
203
204 /* TODO valid_buffer_range?? */
205 swap(rsc->bo, shadow->bo);
206 swap(rsc->timestamp, shadow->timestamp);
207 swap(rsc->write_batch, shadow->write_batch);
208
209 /* at this point, the newly created shadow buffer is not referenced
210 * by any batches, but the existing rsc (probably) is. We need to
211 * transfer those references over:
212 */
213 debug_assert(shadow->batch_mask == 0);
214 struct fd_batch *batch;
215 foreach_batch(batch, &ctx->screen->batch_cache, rsc->batch_mask) {
216 struct set_entry *entry = _mesa_set_search(batch->resources, rsc);
217 _mesa_set_remove(batch->resources, entry);
218 _mesa_set_add(batch->resources, shadow);
219 }
220 swap(rsc->batch_mask, shadow->batch_mask);
221
222 mtx_unlock(&ctx->screen->lock);
223
224 struct pipe_blit_info blit = {0};
225 blit.dst.resource = prsc;
226 blit.dst.format = prsc->format;
227 blit.src.resource = pshadow;
228 blit.src.format = pshadow->format;
229 blit.mask = util_format_get_mask(prsc->format);
230 blit.filter = PIPE_TEX_FILTER_NEAREST;
231
232 #define set_box(field, val) do { \
233 blit.dst.field = (val); \
234 blit.src.field = (val); \
235 } while (0)
236
237 /* blit the other levels in their entirety: */
238 for (unsigned l = 0; l <= prsc->last_level; l++) {
239 if (l == level)
240 continue;
241
242 /* just blit whole level: */
243 set_box(level, l);
244 set_box(box.width, u_minify(prsc->width0, l));
245 set_box(box.height, u_minify(prsc->height0, l));
246 set_box(box.depth, u_minify(prsc->depth0, l));
247
248 do_blit(ctx, &blit, fallback);
249 }
250
251 /* deal w/ current level specially, since we might need to split
252 * it up into a couple blits:
253 */
254 if (!whole_level) {
255 set_box(level, level);
256
257 switch (prsc->target) {
258 case PIPE_BUFFER:
259 case PIPE_TEXTURE_1D:
260 set_box(box.y, 0);
261 set_box(box.z, 0);
262 set_box(box.height, 1);
263 set_box(box.depth, 1);
264
265 if (box->x > 0) {
266 set_box(box.x, 0);
267 set_box(box.width, box->x);
268
269 do_blit(ctx, &blit, fallback);
270 }
271 if ((box->x + box->width) < u_minify(prsc->width0, level)) {
272 set_box(box.x, box->x + box->width);
273 set_box(box.width, u_minify(prsc->width0, level) - (box->x + box->width));
274
275 do_blit(ctx, &blit, fallback);
276 }
277 break;
278 case PIPE_TEXTURE_2D:
279 /* TODO */
280 default:
281 unreachable("TODO");
282 }
283 }
284
285 ctx->in_shadow = false;
286
287 pipe_resource_reference(&pshadow, NULL);
288
289 return true;
290 }
291
292 static unsigned
293 fd_resource_layer_offset(struct fd_resource *rsc,
294 struct fd_resource_slice *slice,
295 unsigned layer)
296 {
297 if (rsc->layer_first)
298 return layer * rsc->layer_size;
299 else
300 return layer * slice->size0;
301 }
302
303 static void
304 fd_resource_flush_z32s8(struct fd_transfer *trans, const struct pipe_box *box)
305 {
306 struct fd_resource *rsc = fd_resource(trans->base.resource);
307 struct fd_resource_slice *slice = fd_resource_slice(rsc, trans->base.level);
308 struct fd_resource_slice *sslice = fd_resource_slice(rsc->stencil, trans->base.level);
309 enum pipe_format format = trans->base.resource->format;
310
311 float *depth = fd_bo_map(rsc->bo) + slice->offset +
312 fd_resource_layer_offset(rsc, slice, trans->base.box.z) +
313 (trans->base.box.y + box->y) * slice->pitch * 4 + (trans->base.box.x + box->x) * 4;
314 uint8_t *stencil = fd_bo_map(rsc->stencil->bo) + sslice->offset +
315 fd_resource_layer_offset(rsc->stencil, sslice, trans->base.box.z) +
316 (trans->base.box.y + box->y) * sslice->pitch + trans->base.box.x + box->x;
317
318 if (format != PIPE_FORMAT_X32_S8X24_UINT)
319 util_format_z32_float_s8x24_uint_unpack_z_float(
320 depth, slice->pitch * 4,
321 trans->staging, trans->base.stride,
322 box->width, box->height);
323
324 util_format_z32_float_s8x24_uint_unpack_s_8uint(
325 stencil, sslice->pitch,
326 trans->staging, trans->base.stride,
327 box->width, box->height);
328 }
329
330 static void
331 fd_resource_flush_rgtc(struct fd_transfer *trans, const struct pipe_box *box)
332 {
333 struct fd_resource *rsc = fd_resource(trans->base.resource);
334 struct fd_resource_slice *slice = fd_resource_slice(rsc, trans->base.level);
335 enum pipe_format format = trans->base.resource->format;
336
337 uint8_t *data = fd_bo_map(rsc->bo) + slice->offset +
338 fd_resource_layer_offset(rsc, slice, trans->base.box.z) +
339 ((trans->base.box.y + box->y) * slice->pitch +
340 trans->base.box.x + box->x) * rsc->cpp;
341
342 uint8_t *source = trans->staging +
343 util_format_get_nblocksy(format, box->y) * trans->base.stride +
344 util_format_get_stride(format, box->x);
345
346 switch (format) {
347 case PIPE_FORMAT_RGTC1_UNORM:
348 case PIPE_FORMAT_RGTC1_SNORM:
349 case PIPE_FORMAT_LATC1_UNORM:
350 case PIPE_FORMAT_LATC1_SNORM:
351 util_format_rgtc1_unorm_unpack_rgba_8unorm(
352 data, slice->pitch * rsc->cpp,
353 source, trans->base.stride,
354 box->width, box->height);
355 break;
356 case PIPE_FORMAT_RGTC2_UNORM:
357 case PIPE_FORMAT_RGTC2_SNORM:
358 case PIPE_FORMAT_LATC2_UNORM:
359 case PIPE_FORMAT_LATC2_SNORM:
360 util_format_rgtc2_unorm_unpack_rgba_8unorm(
361 data, slice->pitch * rsc->cpp,
362 source, trans->base.stride,
363 box->width, box->height);
364 break;
365 default:
366 assert(!"Unexpected format\n");
367 break;
368 }
369 }
370
371 static void
372 fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box)
373 {
374 enum pipe_format format = trans->base.resource->format;
375
376 switch (format) {
377 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
378 case PIPE_FORMAT_X32_S8X24_UINT:
379 fd_resource_flush_z32s8(trans, box);
380 break;
381 case PIPE_FORMAT_RGTC1_UNORM:
382 case PIPE_FORMAT_RGTC1_SNORM:
383 case PIPE_FORMAT_RGTC2_UNORM:
384 case PIPE_FORMAT_RGTC2_SNORM:
385 case PIPE_FORMAT_LATC1_UNORM:
386 case PIPE_FORMAT_LATC1_SNORM:
387 case PIPE_FORMAT_LATC2_UNORM:
388 case PIPE_FORMAT_LATC2_SNORM:
389 fd_resource_flush_rgtc(trans, box);
390 break;
391 default:
392 assert(!"Unexpected staging transfer type");
393 break;
394 }
395 }
396
397 static void fd_resource_transfer_flush_region(struct pipe_context *pctx,
398 struct pipe_transfer *ptrans,
399 const struct pipe_box *box)
400 {
401 struct fd_resource *rsc = fd_resource(ptrans->resource);
402 struct fd_transfer *trans = fd_transfer(ptrans);
403
404 if (ptrans->resource->target == PIPE_BUFFER)
405 util_range_add(&rsc->valid_buffer_range,
406 ptrans->box.x + box->x,
407 ptrans->box.x + box->x + box->width);
408
409 if (trans->staging)
410 fd_resource_flush(trans, box);
411 }
412
413 static void
414 fd_resource_transfer_unmap(struct pipe_context *pctx,
415 struct pipe_transfer *ptrans)
416 {
417 struct fd_context *ctx = fd_context(pctx);
418 struct fd_resource *rsc = fd_resource(ptrans->resource);
419 struct fd_transfer *trans = fd_transfer(ptrans);
420
421 if (trans->staging && !(ptrans->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) {
422 struct pipe_box box;
423 u_box_2d(0, 0, ptrans->box.width, ptrans->box.height, &box);
424 fd_resource_flush(trans, &box);
425 }
426
427 if (!(ptrans->usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
428 fd_bo_cpu_fini(rsc->bo);
429 if (rsc->stencil)
430 fd_bo_cpu_fini(rsc->stencil->bo);
431 }
432
433 util_range_add(&rsc->valid_buffer_range,
434 ptrans->box.x,
435 ptrans->box.x + ptrans->box.width);
436
437 pipe_resource_reference(&ptrans->resource, NULL);
438 slab_free(&ctx->transfer_pool, ptrans);
439
440 free(trans->staging);
441 }
442
443 static void *
444 fd_resource_transfer_map(struct pipe_context *pctx,
445 struct pipe_resource *prsc,
446 unsigned level, unsigned usage,
447 const struct pipe_box *box,
448 struct pipe_transfer **pptrans)
449 {
450 struct fd_context *ctx = fd_context(pctx);
451 struct fd_resource *rsc = fd_resource(prsc);
452 struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
453 struct fd_transfer *trans;
454 struct pipe_transfer *ptrans;
455 enum pipe_format format = prsc->format;
456 uint32_t op = 0;
457 uint32_t offset;
458 char *buf;
459 int ret = 0;
460
461 DBG("prsc=%p, level=%u, usage=%x, box=%dx%d+%d,%d", prsc, level, usage,
462 box->width, box->height, box->x, box->y);
463
464 ptrans = slab_alloc(&ctx->transfer_pool);
465 if (!ptrans)
466 return NULL;
467
468 /* slab_alloc_st() doesn't zero: */
469 trans = fd_transfer(ptrans);
470 memset(trans, 0, sizeof(*trans));
471
472 pipe_resource_reference(&ptrans->resource, prsc);
473 ptrans->level = level;
474 ptrans->usage = usage;
475 ptrans->box = *box;
476 ptrans->stride = util_format_get_nblocksx(format, slice->pitch) * rsc->cpp;
477 ptrans->layer_stride = rsc->layer_first ? rsc->layer_size : slice->size0;
478
479 if (ctx->in_shadow && !(usage & PIPE_TRANSFER_READ))
480 usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
481
482 if (usage & PIPE_TRANSFER_READ)
483 op |= DRM_FREEDRENO_PREP_READ;
484
485 if (usage & PIPE_TRANSFER_WRITE)
486 op |= DRM_FREEDRENO_PREP_WRITE;
487
488 if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
489 realloc_bo(rsc, fd_bo_size(rsc->bo));
490 if (rsc->stencil)
491 realloc_bo(rsc->stencil, fd_bo_size(rsc->stencil->bo));
492 fd_invalidate_resource(ctx, prsc);
493 } else if ((usage & PIPE_TRANSFER_WRITE) &&
494 prsc->target == PIPE_BUFFER &&
495 !util_ranges_intersect(&rsc->valid_buffer_range,
496 box->x, box->x + box->width)) {
497 /* We are trying to write to a previously uninitialized range. No need
498 * to wait.
499 */
500 } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
501 struct fd_batch *write_batch = NULL;
502
503 /* hold a reference, so it doesn't disappear under us: */
504 fd_batch_reference(&write_batch, rsc->write_batch);
505
506 if ((usage & PIPE_TRANSFER_WRITE) && write_batch &&
507 write_batch->back_blit) {
508 /* if only thing pending is a back-blit, we can discard it: */
509 fd_batch_reset(write_batch);
510 }
511
512 /* If the GPU is writing to the resource, or if it is reading from the
513 * resource and we're trying to write to it, flush the renders.
514 */
515 bool needs_flush = pending(rsc, !!(usage & PIPE_TRANSFER_WRITE));
516 bool busy = needs_flush || (0 != fd_bo_cpu_prep(rsc->bo,
517 ctx->screen->pipe, op | DRM_FREEDRENO_PREP_NOSYNC));
518
519 /* if we need to flush/stall, see if we can make a shadow buffer
520 * to avoid this:
521 *
522 * TODO we could go down this path !reorder && !busy_for_read
523 * ie. we only *don't* want to go down this path if the blit
524 * will trigger a flush!
525 */
526 if (ctx->screen->reorder && busy && !(usage & PIPE_TRANSFER_READ)) {
527 if (fd_try_shadow_resource(ctx, rsc, level, usage, box)) {
528 needs_flush = busy = false;
529 fd_invalidate_resource(ctx, prsc);
530 }
531 }
532
533 if (needs_flush) {
534 if (usage & PIPE_TRANSFER_WRITE) {
535 struct fd_batch *batch, *last_batch = NULL;
536 foreach_batch(batch, &ctx->screen->batch_cache, rsc->batch_mask) {
537 fd_batch_reference(&last_batch, batch);
538 fd_batch_flush(batch, false);
539 }
540 if (last_batch) {
541 fd_batch_sync(last_batch);
542 fd_batch_reference(&last_batch, NULL);
543 }
544 assert(rsc->batch_mask == 0);
545 } else {
546 fd_batch_flush(write_batch, true);
547 }
548 assert(!rsc->write_batch);
549 }
550
551 fd_batch_reference(&write_batch, NULL);
552
553 /* The GPU keeps track of how the various bo's are being used, and
554 * will wait if necessary for the proper operation to have
555 * completed.
556 */
557 if (busy) {
558 ret = fd_bo_cpu_prep(rsc->bo, ctx->screen->pipe, op);
559 if (ret)
560 goto fail;
561 }
562 }
563
564 buf = fd_bo_map(rsc->bo);
565 if (!buf)
566 goto fail;
567
568 offset = slice->offset +
569 box->y / util_format_get_blockheight(format) * ptrans->stride +
570 box->x / util_format_get_blockwidth(format) * rsc->cpp +
571 fd_resource_layer_offset(rsc, slice, box->z);
572
573 if (prsc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ||
574 prsc->format == PIPE_FORMAT_X32_S8X24_UINT) {
575 assert(trans->base.box.depth == 1);
576
577 trans->base.stride = trans->base.box.width * rsc->cpp * 2;
578 trans->staging = malloc(trans->base.stride * trans->base.box.height);
579 if (!trans->staging)
580 goto fail;
581
582 /* if we're not discarding the whole range (or resource), we must copy
583 * the real data in.
584 */
585 if (!(usage & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
586 PIPE_TRANSFER_DISCARD_RANGE))) {
587 struct fd_resource_slice *sslice =
588 fd_resource_slice(rsc->stencil, level);
589 void *sbuf = fd_bo_map(rsc->stencil->bo);
590 if (!sbuf)
591 goto fail;
592
593 float *depth = (float *)(buf + slice->offset +
594 fd_resource_layer_offset(rsc, slice, box->z) +
595 box->y * slice->pitch * 4 + box->x * 4);
596 uint8_t *stencil = sbuf + sslice->offset +
597 fd_resource_layer_offset(rsc->stencil, sslice, box->z) +
598 box->y * sslice->pitch + box->x;
599
600 if (format != PIPE_FORMAT_X32_S8X24_UINT)
601 util_format_z32_float_s8x24_uint_pack_z_float(
602 trans->staging, trans->base.stride,
603 depth, slice->pitch * 4,
604 box->width, box->height);
605
606 util_format_z32_float_s8x24_uint_pack_s_8uint(
607 trans->staging, trans->base.stride,
608 stencil, sslice->pitch,
609 box->width, box->height);
610 }
611
612 buf = trans->staging;
613 offset = 0;
614 } else if (rsc->internal_format != format &&
615 util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC) {
616 assert(trans->base.box.depth == 1);
617
618 trans->base.stride = util_format_get_stride(
619 format, trans->base.box.width);
620 trans->staging = malloc(
621 util_format_get_2d_size(format, trans->base.stride,
622 trans->base.box.height));
623 if (!trans->staging)
624 goto fail;
625
626 /* if we're not discarding the whole range (or resource), we must copy
627 * the real data in.
628 */
629 if (!(usage & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
630 PIPE_TRANSFER_DISCARD_RANGE))) {
631 uint8_t *rgba8 = (uint8_t *)buf + slice->offset +
632 fd_resource_layer_offset(rsc, slice, box->z) +
633 box->y * slice->pitch * rsc->cpp + box->x * rsc->cpp;
634
635 switch (format) {
636 case PIPE_FORMAT_RGTC1_UNORM:
637 case PIPE_FORMAT_RGTC1_SNORM:
638 case PIPE_FORMAT_LATC1_UNORM:
639 case PIPE_FORMAT_LATC1_SNORM:
640 util_format_rgtc1_unorm_pack_rgba_8unorm(
641 trans->staging, trans->base.stride,
642 rgba8, slice->pitch * rsc->cpp,
643 box->width, box->height);
644 break;
645 case PIPE_FORMAT_RGTC2_UNORM:
646 case PIPE_FORMAT_RGTC2_SNORM:
647 case PIPE_FORMAT_LATC2_UNORM:
648 case PIPE_FORMAT_LATC2_SNORM:
649 util_format_rgtc2_unorm_pack_rgba_8unorm(
650 trans->staging, trans->base.stride,
651 rgba8, slice->pitch * rsc->cpp,
652 box->width, box->height);
653 break;
654 default:
655 assert(!"Unexpected format");
656 break;
657 }
658 }
659
660 buf = trans->staging;
661 offset = 0;
662 }
663
664 *pptrans = ptrans;
665
666 return buf + offset;
667
668 fail:
669 fd_resource_transfer_unmap(pctx, ptrans);
670 return NULL;
671 }
672
673 static void
674 fd_resource_destroy(struct pipe_screen *pscreen,
675 struct pipe_resource *prsc)
676 {
677 struct fd_resource *rsc = fd_resource(prsc);
678 fd_bc_invalidate_resource(rsc, true);
679 if (rsc->bo)
680 fd_bo_del(rsc->bo);
681 util_range_destroy(&rsc->valid_buffer_range);
682 FREE(rsc);
683 }
684
685 static boolean
686 fd_resource_get_handle(struct pipe_screen *pscreen,
687 struct pipe_resource *prsc,
688 struct winsys_handle *handle)
689 {
690 struct fd_resource *rsc = fd_resource(prsc);
691
692 return fd_screen_bo_get_handle(pscreen, rsc->bo,
693 rsc->slices[0].pitch * rsc->cpp, handle);
694 }
695
696
697 static const struct u_resource_vtbl fd_resource_vtbl = {
698 .resource_get_handle = fd_resource_get_handle,
699 .resource_destroy = fd_resource_destroy,
700 .transfer_map = fd_resource_transfer_map,
701 .transfer_flush_region = fd_resource_transfer_flush_region,
702 .transfer_unmap = fd_resource_transfer_unmap,
703 };
704
705 static uint32_t
706 setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format format)
707 {
708 struct pipe_resource *prsc = &rsc->base.b;
709 enum util_format_layout layout = util_format_description(format)->layout;
710 uint32_t pitchalign = fd_screen(prsc->screen)->gmem_alignw;
711 uint32_t level, size = 0;
712 uint32_t width = prsc->width0;
713 uint32_t height = prsc->height0;
714 uint32_t depth = prsc->depth0;
715 /* in layer_first layout, the level (slice) contains just one
716 * layer (since in fact the layer contains the slices)
717 */
718 uint32_t layers_in_level = rsc->layer_first ? 1 : prsc->array_size;
719
720 for (level = 0; level <= prsc->last_level; level++) {
721 struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
722 uint32_t blocks;
723
724 if (layout == UTIL_FORMAT_LAYOUT_ASTC)
725 slice->pitch = width =
726 util_align_npot(width, pitchalign * util_format_get_blockwidth(format));
727 else
728 slice->pitch = width = align(width, pitchalign);
729 slice->offset = size;
730 blocks = util_format_get_nblocks(format, width, height);
731 /* 1d array and 2d array textures must all have the same layer size
732 * for each miplevel on a3xx. 3d textures can have different layer
733 * sizes for high levels, but the hw auto-sizer is buggy (or at least
734 * different than what this code does), so as soon as the layer size
735 * range gets into range, we stop reducing it.
736 */
737 if (prsc->target == PIPE_TEXTURE_3D && (
738 level == 1 ||
739 (level > 1 && rsc->slices[level - 1].size0 > 0xf000)))
740 slice->size0 = align(blocks * rsc->cpp, alignment);
741 else if (level == 0 || rsc->layer_first || alignment == 1)
742 slice->size0 = align(blocks * rsc->cpp, alignment);
743 else
744 slice->size0 = rsc->slices[level - 1].size0;
745
746 size += slice->size0 * depth * layers_in_level;
747
748 width = u_minify(width, 1);
749 height = u_minify(height, 1);
750 depth = u_minify(depth, 1);
751 }
752
753 return size;
754 }
755
756 static uint32_t
757 slice_alignment(struct pipe_screen *pscreen, const struct pipe_resource *tmpl)
758 {
759 /* on a3xx, 2d array and 3d textures seem to want their
760 * layers aligned to page boundaries:
761 */
762 switch (tmpl->target) {
763 case PIPE_TEXTURE_3D:
764 case PIPE_TEXTURE_1D_ARRAY:
765 case PIPE_TEXTURE_2D_ARRAY:
766 return 4096;
767 default:
768 return 1;
769 }
770 }
771
772 /* special case to resize query buf after allocated.. */
773 void
774 fd_resource_resize(struct pipe_resource *prsc, uint32_t sz)
775 {
776 struct fd_resource *rsc = fd_resource(prsc);
777
778 debug_assert(prsc->width0 == 0);
779 debug_assert(prsc->target == PIPE_BUFFER);
780 debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER);
781
782 prsc->width0 = sz;
783 realloc_bo(rsc, setup_slices(rsc, 1, prsc->format));
784 }
785
786 /**
787 * Create a new texture object, using the given template info.
788 */
789 static struct pipe_resource *
790 fd_resource_create(struct pipe_screen *pscreen,
791 const struct pipe_resource *tmpl)
792 {
793 struct fd_resource *rsc = CALLOC_STRUCT(fd_resource);
794 struct pipe_resource *prsc = &rsc->base.b;
795 enum pipe_format format = tmpl->format;
796 uint32_t size, alignment;
797
798 DBG("%p: target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, "
799 "nr_samples=%u, usage=%u, bind=%x, flags=%x", prsc,
800 tmpl->target, util_format_name(format),
801 tmpl->width0, tmpl->height0, tmpl->depth0,
802 tmpl->array_size, tmpl->last_level, tmpl->nr_samples,
803 tmpl->usage, tmpl->bind, tmpl->flags);
804
805 if (!rsc)
806 return NULL;
807
808 *prsc = *tmpl;
809
810 pipe_reference_init(&prsc->reference, 1);
811
812 prsc->screen = pscreen;
813
814 util_range_init(&rsc->valid_buffer_range);
815
816 rsc->base.vtbl = &fd_resource_vtbl;
817
818 if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
819 format = PIPE_FORMAT_Z32_FLOAT;
820 else if (fd_screen(pscreen)->gpu_id < 400 &&
821 util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC)
822 format = PIPE_FORMAT_R8G8B8A8_UNORM;
823 rsc->internal_format = format;
824 rsc->cpp = util_format_get_blocksize(format);
825
826 assert(rsc->cpp);
827
828 alignment = slice_alignment(pscreen, tmpl);
829 if (is_a4xx(fd_screen(pscreen)) || is_a5xx(fd_screen(pscreen))) {
830 switch (tmpl->target) {
831 case PIPE_TEXTURE_3D:
832 rsc->layer_first = false;
833 break;
834 default:
835 rsc->layer_first = true;
836 alignment = 1;
837 break;
838 }
839 }
840
841 size = setup_slices(rsc, alignment, format);
842
843 /* special case for hw-query buffer, which we need to allocate before we
844 * know the size:
845 */
846 if (size == 0) {
847 /* note, semi-intention == instead of & */
848 debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER);
849 return prsc;
850 }
851
852 if (rsc->layer_first) {
853 rsc->layer_size = align(size, 4096);
854 size = rsc->layer_size * prsc->array_size;
855 }
856
857 realloc_bo(rsc, size);
858 if (!rsc->bo)
859 goto fail;
860
861 /* There is no native Z32F_S8 sampling or rendering format, so this must
862 * be emulated via two separate textures. The depth texture still keeps
863 * its Z32F_S8 format though, and we also keep a reference to a separate
864 * S8 texture.
865 */
866 if (tmpl->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
867 struct pipe_resource stencil = *tmpl;
868 stencil.format = PIPE_FORMAT_S8_UINT;
869 rsc->stencil = fd_resource(fd_resource_create(pscreen, &stencil));
870 if (!rsc->stencil)
871 goto fail;
872 }
873
874 return prsc;
875 fail:
876 fd_resource_destroy(pscreen, prsc);
877 return NULL;
878 }
879
880 /**
881 * Create a texture from a winsys_handle. The handle is often created in
882 * another process by first creating a pipe texture and then calling
883 * resource_get_handle.
884 */
885 static struct pipe_resource *
886 fd_resource_from_handle(struct pipe_screen *pscreen,
887 const struct pipe_resource *tmpl,
888 struct winsys_handle *handle, unsigned usage)
889 {
890 struct fd_resource *rsc = CALLOC_STRUCT(fd_resource);
891 struct fd_resource_slice *slice = &rsc->slices[0];
892 struct pipe_resource *prsc = &rsc->base.b;
893 uint32_t pitchalign = fd_screen(pscreen)->gmem_alignw;
894
895 DBG("target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, "
896 "nr_samples=%u, usage=%u, bind=%x, flags=%x",
897 tmpl->target, util_format_name(tmpl->format),
898 tmpl->width0, tmpl->height0, tmpl->depth0,
899 tmpl->array_size, tmpl->last_level, tmpl->nr_samples,
900 tmpl->usage, tmpl->bind, tmpl->flags);
901
902 if (!rsc)
903 return NULL;
904
905 *prsc = *tmpl;
906
907 pipe_reference_init(&prsc->reference, 1);
908
909 prsc->screen = pscreen;
910
911 util_range_init(&rsc->valid_buffer_range);
912
913 rsc->bo = fd_screen_bo_from_handle(pscreen, handle);
914 if (!rsc->bo)
915 goto fail;
916
917 rsc->base.vtbl = &fd_resource_vtbl;
918 rsc->cpp = util_format_get_blocksize(tmpl->format);
919 slice->pitch = handle->stride / rsc->cpp;
920 slice->offset = handle->offset;
921 slice->size0 = handle->stride * prsc->height0;
922
923 if ((slice->pitch < align(prsc->width0, pitchalign)) ||
924 (slice->pitch & (pitchalign - 1)))
925 goto fail;
926
927 assert(rsc->cpp);
928
929 return prsc;
930
931 fail:
932 fd_resource_destroy(pscreen, prsc);
933 return NULL;
934 }
935
936 /**
937 * _copy_region using pipe (3d engine)
938 */
939 static bool
940 fd_blitter_pipe_copy_region(struct fd_context *ctx,
941 struct pipe_resource *dst,
942 unsigned dst_level,
943 unsigned dstx, unsigned dsty, unsigned dstz,
944 struct pipe_resource *src,
945 unsigned src_level,
946 const struct pipe_box *src_box)
947 {
948 /* not until we allow rendertargets to be buffers */
949 if (dst->target == PIPE_BUFFER || src->target == PIPE_BUFFER)
950 return false;
951
952 if (!util_blitter_is_copy_supported(ctx->blitter, dst, src))
953 return false;
954
955 /* TODO we could discard if dst box covers dst level fully.. */
956 fd_blitter_pipe_begin(ctx, false, false, FD_STAGE_BLIT);
957 util_blitter_copy_texture(ctx->blitter,
958 dst, dst_level, dstx, dsty, dstz,
959 src, src_level, src_box);
960 fd_blitter_pipe_end(ctx);
961
962 return true;
963 }
964
965 /**
966 * Copy a block of pixels from one resource to another.
967 * The resource must be of the same format.
968 * Resources with nr_samples > 1 are not allowed.
969 */
970 static void
971 fd_resource_copy_region(struct pipe_context *pctx,
972 struct pipe_resource *dst,
973 unsigned dst_level,
974 unsigned dstx, unsigned dsty, unsigned dstz,
975 struct pipe_resource *src,
976 unsigned src_level,
977 const struct pipe_box *src_box)
978 {
979 struct fd_context *ctx = fd_context(pctx);
980
981 /* TODO if we have 2d core, or other DMA engine that could be used
982 * for simple copies and reasonably easily synchronized with the 3d
983 * core, this is where we'd plug it in..
984 */
985
986 /* try blit on 3d pipe: */
987 if (fd_blitter_pipe_copy_region(ctx,
988 dst, dst_level, dstx, dsty, dstz,
989 src, src_level, src_box))
990 return;
991
992 /* else fallback to pure sw: */
993 util_resource_copy_region(pctx,
994 dst, dst_level, dstx, dsty, dstz,
995 src, src_level, src_box);
996 }
997
998 bool
999 fd_render_condition_check(struct pipe_context *pctx)
1000 {
1001 struct fd_context *ctx = fd_context(pctx);
1002
1003 if (!ctx->cond_query)
1004 return true;
1005
1006 union pipe_query_result res = { 0 };
1007 bool wait =
1008 ctx->cond_mode != PIPE_RENDER_COND_NO_WAIT &&
1009 ctx->cond_mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
1010
1011 if (pctx->get_query_result(pctx, ctx->cond_query, wait, &res))
1012 return (bool)res.u64 != ctx->cond_cond;
1013
1014 return true;
1015 }
1016
1017 /**
1018 * Optimal hardware path for blitting pixels.
1019 * Scaling, format conversion, up- and downsampling (resolve) are allowed.
1020 */
1021 static void
1022 fd_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
1023 {
1024 struct fd_context *ctx = fd_context(pctx);
1025 struct pipe_blit_info info = *blit_info;
1026 bool discard = false;
1027
1028 if (info.src.resource->nr_samples > 1 &&
1029 info.dst.resource->nr_samples <= 1 &&
1030 !util_format_is_depth_or_stencil(info.src.resource->format) &&
1031 !util_format_is_pure_integer(info.src.resource->format)) {
1032 DBG("color resolve unimplemented");
1033 return;
1034 }
1035
1036 if (info.render_condition_enable && !fd_render_condition_check(pctx))
1037 return;
1038
1039 if (!info.scissor_enable && !info.alpha_blend) {
1040 discard = util_texrange_covers_whole_level(info.dst.resource,
1041 info.dst.level, info.dst.box.x, info.dst.box.y,
1042 info.dst.box.z, info.dst.box.width,
1043 info.dst.box.height, info.dst.box.depth);
1044 }
1045
1046 if (util_try_blit_via_copy_region(pctx, &info)) {
1047 return; /* done */
1048 }
1049
1050 if (info.mask & PIPE_MASK_S) {
1051 DBG("cannot blit stencil, skipping");
1052 info.mask &= ~PIPE_MASK_S;
1053 }
1054
1055 if (!util_blitter_is_blit_supported(ctx->blitter, &info)) {
1056 DBG("blit unsupported %s -> %s",
1057 util_format_short_name(info.src.resource->format),
1058 util_format_short_name(info.dst.resource->format));
1059 return;
1060 }
1061
1062 fd_blitter_pipe_begin(ctx, info.render_condition_enable, discard, FD_STAGE_BLIT);
1063 util_blitter_blit(ctx->blitter, &info);
1064 fd_blitter_pipe_end(ctx);
1065 }
1066
1067 void
1068 fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond, bool discard,
1069 enum fd_render_stage stage)
1070 {
1071 util_blitter_save_fragment_constant_buffer_slot(ctx->blitter,
1072 ctx->constbuf[PIPE_SHADER_FRAGMENT].cb);
1073 util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vtx.vertexbuf.vb);
1074 util_blitter_save_vertex_elements(ctx->blitter, ctx->vtx.vtx);
1075 util_blitter_save_vertex_shader(ctx->blitter, ctx->prog.vp);
1076 util_blitter_save_so_targets(ctx->blitter, ctx->streamout.num_targets,
1077 ctx->streamout.targets);
1078 util_blitter_save_rasterizer(ctx->blitter, ctx->rasterizer);
1079 util_blitter_save_viewport(ctx->blitter, &ctx->viewport);
1080 util_blitter_save_scissor(ctx->blitter, &ctx->scissor);
1081 util_blitter_save_fragment_shader(ctx->blitter, ctx->prog.fp);
1082 util_blitter_save_blend(ctx->blitter, ctx->blend);
1083 util_blitter_save_depth_stencil_alpha(ctx->blitter, ctx->zsa);
1084 util_blitter_save_stencil_ref(ctx->blitter, &ctx->stencil_ref);
1085 util_blitter_save_sample_mask(ctx->blitter, ctx->sample_mask);
1086 util_blitter_save_framebuffer(ctx->blitter,
1087 ctx->batch ? &ctx->batch->framebuffer : NULL);
1088 util_blitter_save_fragment_sampler_states(ctx->blitter,
1089 ctx->tex[PIPE_SHADER_FRAGMENT].num_samplers,
1090 (void **)ctx->tex[PIPE_SHADER_FRAGMENT].samplers);
1091 util_blitter_save_fragment_sampler_views(ctx->blitter,
1092 ctx->tex[PIPE_SHADER_FRAGMENT].num_textures,
1093 ctx->tex[PIPE_SHADER_FRAGMENT].textures);
1094 if (!render_cond)
1095 util_blitter_save_render_condition(ctx->blitter,
1096 ctx->cond_query, ctx->cond_cond, ctx->cond_mode);
1097
1098 if (ctx->batch)
1099 fd_batch_set_stage(ctx->batch, stage);
1100
1101 ctx->in_blit = discard;
1102 }
1103
1104 void
1105 fd_blitter_pipe_end(struct fd_context *ctx)
1106 {
1107 if (ctx->batch)
1108 fd_batch_set_stage(ctx->batch, FD_STAGE_NULL);
1109 ctx->in_blit = false;
1110 }
1111
1112 static void
1113 fd_flush_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
1114 {
1115 struct fd_resource *rsc = fd_resource(prsc);
1116
1117 if (rsc->write_batch)
1118 fd_batch_flush(rsc->write_batch, true);
1119
1120 assert(!rsc->write_batch);
1121 }
1122
1123 void
1124 fd_resource_screen_init(struct pipe_screen *pscreen)
1125 {
1126 pscreen->resource_create = fd_resource_create;
1127 pscreen->resource_from_handle = fd_resource_from_handle;
1128 pscreen->resource_get_handle = u_resource_get_handle_vtbl;
1129 pscreen->resource_destroy = u_resource_destroy_vtbl;
1130 }
1131
1132 void
1133 fd_resource_context_init(struct pipe_context *pctx)
1134 {
1135 pctx->transfer_map = u_transfer_map_vtbl;
1136 pctx->transfer_flush_region = u_transfer_flush_region_vtbl;
1137 pctx->transfer_unmap = u_transfer_unmap_vtbl;
1138 pctx->buffer_subdata = u_default_buffer_subdata;
1139 pctx->texture_subdata = u_default_texture_subdata;
1140 pctx->create_surface = fd_create_surface;
1141 pctx->surface_destroy = fd_surface_destroy;
1142 pctx->resource_copy_region = fd_resource_copy_region;
1143 pctx->blit = fd_blit;
1144 pctx->flush_resource = fd_flush_resource;
1145 }