freedreno/ir3: relax restriction in grouping
[mesa.git] / src / gallium / drivers / ilo / ilo_transfer.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "util/u_surface.h"
29 #include "util/u_transfer.h"
30 #include "util/u_format_etc.h"
31
32 #include "ilo_blit.h"
33 #include "ilo_blitter.h"
34 #include "ilo_cp.h"
35 #include "ilo_context.h"
36 #include "ilo_resource.h"
37 #include "ilo_state.h"
38 #include "ilo_transfer.h"
39
40 /*
41 * For buffers that are not busy, we want to map/unmap them directly. For
42 * those that are busy, we have to worry about synchronization. We could wait
43 * for GPU to finish, but there are cases where we could avoid waiting.
44 *
45 * - When PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE is set, the contents of the
46 * buffer can be discarded. We can replace the backing bo by a new one of
47 * the same size (renaming).
48 * - When PIPE_TRANSFER_DISCARD_RANGE is set, the contents of the mapped
49 * range can be discarded. We can allocate and map a staging bo on
50 * mapping, and (pipelined-)copy it over to the real bo on unmapping.
51 * - When PIPE_TRANSFER_FLUSH_EXPLICIT is set, there is no reading and only
52 * flushed regions need to be written. We can still allocate and map a
53 * staging bo, but should copy only the flushed regions over.
54 *
55 * However, there are other flags to consider.
56 *
57 * - When PIPE_TRANSFER_UNSYNCHRONIZED is set, we do not need to worry about
58 * synchronization at all on mapping.
59 * - When PIPE_TRANSFER_MAP_DIRECTLY is set, no staging area is allowed.
60 * - When PIPE_TRANSFER_DONTBLOCK is set, we should fail if we have to block.
61 * - When PIPE_TRANSFER_PERSISTENT is set, GPU may access the buffer while it
62 * is mapped. Synchronization is done by defining memory barriers,
63 * explicitly via memory_barrier() or implicitly via
64 * transfer_flush_region(), as well as GPU fences.
65 * - When PIPE_TRANSFER_COHERENT is set, updates by either CPU or GPU should
66 * be made visible to the other side immediately. Since the kernel flushes
67 * GPU caches at the end of each batch buffer, CPU always sees GPU updates.
68 * We could use a coherent mapping to make all persistent mappings
69 * coherent.
70 *
71 * These also apply to textures, except that we may additionally need to do
72 * format conversion or tiling/untiling.
73 */
74
75 /**
76 * Return a transfer method suitable for the usage. The returned method will
77 * correctly block when the resource is busy.
78 */
79 static bool
80 resource_get_transfer_method(struct pipe_resource *res,
81 const struct pipe_transfer *transfer,
82 enum ilo_transfer_map_method *method)
83 {
84 const struct ilo_screen *is = ilo_screen(res->screen);
85 const unsigned usage = transfer->usage;
86 enum ilo_transfer_map_method m;
87 bool tiled;
88
89 if (res->target == PIPE_BUFFER) {
90 tiled = false;
91 } else {
92 struct ilo_texture *tex = ilo_texture(res);
93 bool need_convert = false;
94
95 /* we may need to convert on the fly */
96 if (tex->image.tiling == GEN8_TILING_W || tex->separate_s8) {
97 /* on GEN6, separate stencil is enabled only when HiZ is */
98 if (ilo_dev_gen(&is->dev) >= ILO_GEN(7) ||
99 ilo_image_can_enable_aux(&tex->image, transfer->level)) {
100 m = ILO_TRANSFER_MAP_SW_ZS;
101 need_convert = true;
102 }
103 } else if (tex->image_format != tex->base.format) {
104 m = ILO_TRANSFER_MAP_SW_CONVERT;
105 need_convert = true;
106 }
107
108 if (need_convert) {
109 if (usage & (PIPE_TRANSFER_MAP_DIRECTLY | PIPE_TRANSFER_PERSISTENT))
110 return false;
111
112 *method = m;
113 return true;
114 }
115
116 tiled = (tex->image.tiling != GEN6_TILING_NONE);
117 }
118
119 if (tiled)
120 m = ILO_TRANSFER_MAP_GTT; /* to have a linear view */
121 else if (is->dev.has_llc)
122 m = ILO_TRANSFER_MAP_CPU; /* fast and mostly coherent */
123 else if (usage & PIPE_TRANSFER_PERSISTENT)
124 m = ILO_TRANSFER_MAP_GTT; /* for coherency */
125 else if (usage & PIPE_TRANSFER_READ)
126 m = ILO_TRANSFER_MAP_CPU; /* gtt read is too slow */
127 else
128 m = ILO_TRANSFER_MAP_GTT;
129
130 *method = m;
131
132 return true;
133 }
134
135 /**
136 * Return true if usage allows the use of staging bo to avoid blocking.
137 */
138 static bool
139 usage_allows_staging_bo(unsigned usage)
140 {
141 /* do we know how to write the data back to the resource? */
142 const unsigned can_writeback = (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
143 PIPE_TRANSFER_DISCARD_RANGE |
144 PIPE_TRANSFER_FLUSH_EXPLICIT);
145 const unsigned reasons_against = (PIPE_TRANSFER_READ |
146 PIPE_TRANSFER_MAP_DIRECTLY |
147 PIPE_TRANSFER_PERSISTENT);
148
149 return (usage & can_writeback) && !(usage & reasons_against);
150 }
151
152 /**
153 * Allocate the staging resource. It is always linear and its size matches
154 * the transfer box, with proper paddings.
155 */
156 static bool
157 xfer_alloc_staging_res(struct ilo_transfer *xfer)
158 {
159 const struct pipe_resource *res = xfer->base.resource;
160 const struct pipe_box *box = &xfer->base.box;
161 struct pipe_resource templ;
162
163 memset(&templ, 0, sizeof(templ));
164
165 templ.format = res->format;
166
167 if (res->target == PIPE_BUFFER) {
168 templ.target = PIPE_BUFFER;
169 templ.width0 =
170 (box->x % ILO_TRANSFER_MAP_BUFFER_ALIGNMENT) + box->width;
171 }
172 else {
173 /* use 2D array for any texture target */
174 templ.target = PIPE_TEXTURE_2D_ARRAY;
175 templ.width0 = box->width;
176 }
177
178 templ.height0 = box->height;
179 templ.depth0 = 1;
180 templ.array_size = box->depth;
181 templ.nr_samples = 1;
182 templ.usage = PIPE_USAGE_STAGING;
183 templ.bind = PIPE_BIND_TRANSFER_WRITE;
184
185 if (xfer->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT) {
186 templ.flags = PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
187 PIPE_RESOURCE_FLAG_MAP_COHERENT;
188 }
189
190 xfer->staging.res = res->screen->resource_create(res->screen, &templ);
191
192 if (xfer->staging.res && xfer->staging.res->target != PIPE_BUFFER) {
193 assert(ilo_texture(xfer->staging.res)->image.tiling ==
194 GEN6_TILING_NONE);
195 }
196
197 return (xfer->staging.res != NULL);
198 }
199
200 /**
201 * Use an alternative transfer method or rename the resource to unblock an
202 * otherwise blocking transfer.
203 */
204 static bool
205 xfer_unblock(struct ilo_transfer *xfer, bool *resource_renamed)
206 {
207 struct pipe_resource *res = xfer->base.resource;
208 bool unblocked = false, renamed = false;
209
210 switch (xfer->method) {
211 case ILO_TRANSFER_MAP_CPU:
212 case ILO_TRANSFER_MAP_GTT:
213 if (xfer->base.usage & PIPE_TRANSFER_UNSYNCHRONIZED) {
214 xfer->method = ILO_TRANSFER_MAP_GTT_ASYNC;
215 unblocked = true;
216 }
217 else if ((xfer->base.usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) &&
218 ilo_resource_rename_bo(res)) {
219 renamed = true;
220 unblocked = true;
221 }
222 else if (usage_allows_staging_bo(xfer->base.usage) &&
223 xfer_alloc_staging_res(xfer)) {
224 xfer->method = ILO_TRANSFER_MAP_STAGING;
225 unblocked = true;
226 }
227 break;
228 case ILO_TRANSFER_MAP_GTT_ASYNC:
229 case ILO_TRANSFER_MAP_STAGING:
230 unblocked = true;
231 break;
232 default:
233 break;
234 }
235
236 *resource_renamed = renamed;
237
238 return unblocked;
239 }
240
241 /**
242 * Allocate the staging system buffer based on the resource format and the
243 * transfer box.
244 */
245 static bool
246 xfer_alloc_staging_sys(struct ilo_transfer *xfer)
247 {
248 const enum pipe_format format = xfer->base.resource->format;
249 const struct pipe_box *box = &xfer->base.box;
250 const unsigned alignment = 64;
251
252 /* need to tell the world the layout */
253 xfer->base.stride =
254 align(util_format_get_stride(format, box->width), alignment);
255 xfer->base.layer_stride =
256 util_format_get_2d_size(format, xfer->base.stride, box->height);
257
258 xfer->staging.sys =
259 align_malloc(xfer->base.layer_stride * box->depth, alignment);
260
261 return (xfer->staging.sys != NULL);
262 }
263
264 /**
265 * Map according to the method. The staging system buffer should have been
266 * allocated if the method requires it.
267 */
268 static void *
269 xfer_map(struct ilo_transfer *xfer)
270 {
271 const struct ilo_vma *vma;
272 void *ptr;
273
274 switch (xfer->method) {
275 case ILO_TRANSFER_MAP_CPU:
276 vma = ilo_resource_get_vma(xfer->base.resource);
277 ptr = intel_bo_map(vma->bo, xfer->base.usage & PIPE_TRANSFER_WRITE);
278 break;
279 case ILO_TRANSFER_MAP_GTT:
280 vma = ilo_resource_get_vma(xfer->base.resource);
281 ptr = intel_bo_map_gtt(vma->bo);
282 break;
283 case ILO_TRANSFER_MAP_GTT_ASYNC:
284 vma = ilo_resource_get_vma(xfer->base.resource);
285 ptr = intel_bo_map_gtt_async(vma->bo);
286 break;
287 case ILO_TRANSFER_MAP_STAGING:
288 {
289 const struct ilo_screen *is = ilo_screen(xfer->staging.res->screen);
290
291 vma = ilo_resource_get_vma(xfer->staging.res);
292
293 /*
294 * We want a writable, optionally persistent and coherent, mapping
295 * for a linear bo. We can call resource_get_transfer_method(), but
296 * this turns out to be fairly simple.
297 */
298 if (is->dev.has_llc)
299 ptr = intel_bo_map(vma->bo, true);
300 else
301 ptr = intel_bo_map_gtt(vma->bo);
302
303 if (ptr && xfer->staging.res->target == PIPE_BUFFER)
304 ptr += (xfer->base.box.x % ILO_TRANSFER_MAP_BUFFER_ALIGNMENT);
305 }
306 break;
307 case ILO_TRANSFER_MAP_SW_CONVERT:
308 case ILO_TRANSFER_MAP_SW_ZS:
309 vma = NULL;
310 ptr = xfer->staging.sys;
311 break;
312 default:
313 assert(!"unknown mapping method");
314 vma = NULL;
315 ptr = NULL;
316 break;
317 }
318
319 if (ptr && vma)
320 ptr = (void *) ((char *) ptr + vma->bo_offset);
321
322 return ptr;
323 }
324
325 /**
326 * Unmap a transfer.
327 */
328 static void
329 xfer_unmap(struct ilo_transfer *xfer)
330 {
331 switch (xfer->method) {
332 case ILO_TRANSFER_MAP_CPU:
333 case ILO_TRANSFER_MAP_GTT:
334 case ILO_TRANSFER_MAP_GTT_ASYNC:
335 intel_bo_unmap(ilo_resource_get_vma(xfer->base.resource)->bo);
336 break;
337 case ILO_TRANSFER_MAP_STAGING:
338 intel_bo_unmap(ilo_resource_get_vma(xfer->staging.res)->bo);
339 break;
340 default:
341 break;
342 }
343 }
344
345 static void
346 tex_get_box_origin(const struct ilo_texture *tex,
347 unsigned level, unsigned slice,
348 const struct pipe_box *box,
349 unsigned *mem_x, unsigned *mem_y)
350 {
351 unsigned x, y;
352
353 ilo_image_get_slice_pos(&tex->image, level, box->z + slice, &x, &y);
354 x += box->x;
355 y += box->y;
356
357 ilo_image_pos_to_mem(&tex->image, x, y, mem_x, mem_y);
358 }
359
360 static unsigned
361 tex_get_box_offset(const struct ilo_texture *tex, unsigned level,
362 const struct pipe_box *box)
363 {
364 unsigned mem_x, mem_y;
365
366 tex_get_box_origin(tex, level, 0, box, &mem_x, &mem_y);
367
368 return ilo_image_mem_to_linear(&tex->image, mem_x, mem_y);
369 }
370
371 static unsigned
372 tex_get_slice_stride(const struct ilo_texture *tex, unsigned level)
373 {
374 return ilo_image_get_slice_stride(&tex->image, level);
375 }
376
377 static unsigned
378 tex_tile_x_swizzle(unsigned addr)
379 {
380 /*
381 * From the Ivy Bridge PRM, volume 1 part 2, page 24:
382 *
383 * "As shown in the tiling algorithm, the new address bit[6] should be:
384 *
385 * Address bit[6] <= TiledAddr bit[6] XOR
386 * TiledAddr bit[9] XOR
387 * TiledAddr bit[10]"
388 */
389 return addr ^ (((addr >> 3) ^ (addr >> 4)) & 0x40);
390 }
391
392 static unsigned
393 tex_tile_y_swizzle(unsigned addr)
394 {
395 /*
396 * From the Ivy Bridge PRM, volume 1 part 2, page 24:
397 *
398 * "As shown in the tiling algorithm, The new address bit[6] becomes:
399 *
400 * Address bit[6] <= TiledAddr bit[6] XOR
401 * TiledAddr bit[9]"
402 */
403 return addr ^ ((addr >> 3) & 0x40);
404 }
405
406 static unsigned
407 tex_tile_x_offset(unsigned mem_x, unsigned mem_y,
408 unsigned tiles_per_row, bool swizzle)
409 {
410 /*
411 * From the Sandy Bridge PRM, volume 1 part 2, page 21, we know that a
412 * X-major tile has 8 rows and 32 OWord columns (512 bytes). Tiles in the
413 * tiled region are numbered in row-major order, starting from zero. The
414 * tile number can thus be calculated as follows:
415 *
416 * tile = (mem_y / 8) * tiles_per_row + (mem_x / 512)
417 *
418 * OWords in that tile are also numbered in row-major order, starting from
419 * zero. The OWord number can thus be calculated as follows:
420 *
421 * oword = (mem_y % 8) * 32 + ((mem_x % 512) / 16)
422 *
423 * and the tiled offset is
424 *
425 * offset = tile * 4096 + oword * 16 + (mem_x % 16)
426 * = tile * 4096 + (mem_y % 8) * 512 + (mem_x % 512)
427 */
428 unsigned tile, offset;
429
430 tile = (mem_y >> 3) * tiles_per_row + (mem_x >> 9);
431 offset = tile << 12 | (mem_y & 0x7) << 9 | (mem_x & 0x1ff);
432
433 return (swizzle) ? tex_tile_x_swizzle(offset) : offset;
434 }
435
436 static unsigned
437 tex_tile_y_offset(unsigned mem_x, unsigned mem_y,
438 unsigned tiles_per_row, bool swizzle)
439 {
440 /*
441 * From the Sandy Bridge PRM, volume 1 part 2, page 22, we know that a
442 * Y-major tile has 32 rows and 8 OWord columns (128 bytes). Tiles in the
443 * tiled region are numbered in row-major order, starting from zero. The
444 * tile number can thus be calculated as follows:
445 *
446 * tile = (mem_y / 32) * tiles_per_row + (mem_x / 128)
447 *
448 * OWords in that tile are numbered in column-major order, starting from
449 * zero. The OWord number can thus be calculated as follows:
450 *
451 * oword = ((mem_x % 128) / 16) * 32 + (mem_y % 32)
452 *
453 * and the tiled offset is
454 *
455 * offset = tile * 4096 + oword * 16 + (mem_x % 16)
456 */
457 unsigned tile, oword, offset;
458
459 tile = (mem_y >> 5) * tiles_per_row + (mem_x >> 7);
460 oword = (mem_x & 0x70) << 1 | (mem_y & 0x1f);
461 offset = tile << 12 | oword << 4 | (mem_x & 0xf);
462
463 return (swizzle) ? tex_tile_y_swizzle(offset) : offset;
464 }
465
466 static unsigned
467 tex_tile_w_offset(unsigned mem_x, unsigned mem_y,
468 unsigned tiles_per_row, bool swizzle)
469 {
470 /*
471 * From the Sandy Bridge PRM, volume 1 part 2, page 23, we know that a
472 * W-major tile has 8 8x8-block rows and 8 8x8-block columns. Tiles in the
473 * tiled region are numbered in row-major order, starting from zero. The
474 * tile number can thus be calculated as follows:
475 *
476 * tile = (mem_y / 64) * tiles_per_row + (mem_x / 64)
477 *
478 * 8x8-blocks in that tile are numbered in column-major order, starting
479 * from zero. The 8x8-block number can thus be calculated as follows:
480 *
481 * blk8 = ((mem_x % 64) / 8) * 8 + ((mem_y % 64) / 8)
482 *
483 * Each 8x8-block is divided into 4 4x4-blocks, in row-major order. Each
484 * 4x4-block is further divided into 4 2x2-blocks, also in row-major order.
485 * We have
486 *
487 * blk4 = (((mem_y % 64) / 4) & 1) * 2 + (((mem_x % 64) / 4) & 1)
488 * blk2 = (((mem_y % 64) / 2) & 1) * 2 + (((mem_x % 64) / 2) & 1)
489 * blk1 = (((mem_y % 64) ) & 1) * 2 + (((mem_x % 64) ) & 1)
490 *
491 * and the tiled offset is
492 *
493 * offset = tile * 4096 + blk8 * 64 + blk4 * 16 + blk2 * 4 + blk1
494 */
495 unsigned tile, blk8, blk4, blk2, blk1, offset;
496
497 tile = (mem_y >> 6) * tiles_per_row + (mem_x >> 6);
498 blk8 = ((mem_x >> 3) & 0x7) << 3 | ((mem_y >> 3) & 0x7);
499 blk4 = ((mem_y >> 2) & 0x1) << 1 | ((mem_x >> 2) & 0x1);
500 blk2 = ((mem_y >> 1) & 0x1) << 1 | ((mem_x >> 1) & 0x1);
501 blk1 = ((mem_y ) & 0x1) << 1 | ((mem_x ) & 0x1);
502 offset = tile << 12 | blk8 << 6 | blk4 << 4 | blk2 << 2 | blk1;
503
504 return (swizzle) ? tex_tile_y_swizzle(offset) : offset;
505 }
506
507 static unsigned
508 tex_tile_none_offset(unsigned mem_x, unsigned mem_y,
509 unsigned tiles_per_row, bool swizzle)
510 {
511 return mem_y * tiles_per_row + mem_x;
512 }
513
514 typedef unsigned (*tex_tile_offset_func)(unsigned mem_x, unsigned mem_y,
515 unsigned tiles_per_row,
516 bool swizzle);
517
518 static tex_tile_offset_func
519 tex_tile_choose_offset_func(const struct ilo_texture *tex,
520 unsigned *tiles_per_row)
521 {
522 switch (tex->image.tiling) {
523 default:
524 assert(!"unknown tiling");
525 /* fall through */
526 case GEN6_TILING_NONE:
527 *tiles_per_row = tex->image.bo_stride;
528 return tex_tile_none_offset;
529 case GEN6_TILING_X:
530 *tiles_per_row = tex->image.bo_stride / 512;
531 return tex_tile_x_offset;
532 case GEN6_TILING_Y:
533 *tiles_per_row = tex->image.bo_stride / 128;
534 return tex_tile_y_offset;
535 case GEN8_TILING_W:
536 *tiles_per_row = tex->image.bo_stride / 64;
537 return tex_tile_w_offset;
538 }
539 }
540
541 static void *
542 tex_staging_sys_map_bo(struct ilo_texture *tex,
543 bool for_read_back,
544 bool linear_view)
545 {
546 const struct ilo_screen *is = ilo_screen(tex->base.screen);
547 const bool prefer_cpu = (is->dev.has_llc || for_read_back);
548 void *ptr;
549
550 if (prefer_cpu && (tex->image.tiling == GEN6_TILING_NONE ||
551 !linear_view))
552 ptr = intel_bo_map(tex->vma.bo, !for_read_back);
553 else
554 ptr = intel_bo_map_gtt(tex->vma.bo);
555
556 if (ptr)
557 ptr = (void *) ((char *) ptr + tex->vma.bo_offset);
558
559 return ptr;
560 }
561
562 static void
563 tex_staging_sys_unmap_bo(struct ilo_texture *tex)
564 {
565 intel_bo_unmap(tex->vma.bo);
566 }
567
568 static bool
569 tex_staging_sys_zs_read(struct ilo_texture *tex,
570 const struct ilo_transfer *xfer)
571 {
572 const struct ilo_screen *is = ilo_screen(tex->base.screen);
573 const bool swizzle = is->dev.has_address_swizzling;
574 const struct pipe_box *box = &xfer->base.box;
575 const uint8_t *src;
576 tex_tile_offset_func tile_offset;
577 unsigned tiles_per_row;
578 int slice;
579
580 src = tex_staging_sys_map_bo(tex, true, false);
581 if (!src)
582 return false;
583
584 tile_offset = tex_tile_choose_offset_func(tex, &tiles_per_row);
585
586 assert(tex->image.block_width == 1 && tex->image.block_height == 1);
587
588 if (tex->separate_s8) {
589 struct ilo_texture *s8_tex = tex->separate_s8;
590 const uint8_t *s8_src;
591 tex_tile_offset_func s8_tile_offset;
592 unsigned s8_tiles_per_row;
593 int dst_cpp, dst_s8_pos, src_cpp_used;
594
595 s8_src = tex_staging_sys_map_bo(s8_tex, true, false);
596 if (!s8_src) {
597 tex_staging_sys_unmap_bo(tex);
598 return false;
599 }
600
601 s8_tile_offset = tex_tile_choose_offset_func(s8_tex, &s8_tiles_per_row);
602
603 if (tex->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
604 assert(tex->image_format == PIPE_FORMAT_Z24X8_UNORM);
605
606 dst_cpp = 4;
607 dst_s8_pos = 3;
608 src_cpp_used = 3;
609 }
610 else {
611 assert(tex->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
612 assert(tex->image_format == PIPE_FORMAT_Z32_FLOAT);
613
614 dst_cpp = 8;
615 dst_s8_pos = 4;
616 src_cpp_used = 4;
617 }
618
619 for (slice = 0; slice < box->depth; slice++) {
620 unsigned mem_x, mem_y, s8_mem_x, s8_mem_y;
621 uint8_t *dst;
622 int i, j;
623
624 tex_get_box_origin(tex, xfer->base.level, slice,
625 box, &mem_x, &mem_y);
626 tex_get_box_origin(s8_tex, xfer->base.level, slice,
627 box, &s8_mem_x, &s8_mem_y);
628
629 dst = xfer->staging.sys + xfer->base.layer_stride * slice;
630
631 for (i = 0; i < box->height; i++) {
632 unsigned x = mem_x, s8_x = s8_mem_x;
633 uint8_t *d = dst;
634
635 for (j = 0; j < box->width; j++) {
636 const unsigned offset =
637 tile_offset(x, mem_y, tiles_per_row, swizzle);
638 const unsigned s8_offset =
639 s8_tile_offset(s8_x, s8_mem_y, s8_tiles_per_row, swizzle);
640
641 memcpy(d, src + offset, src_cpp_used);
642 d[dst_s8_pos] = s8_src[s8_offset];
643
644 d += dst_cpp;
645 x += tex->image.block_size;
646 s8_x++;
647 }
648
649 dst += xfer->base.stride;
650 mem_y++;
651 s8_mem_y++;
652 }
653 }
654
655 tex_staging_sys_unmap_bo(s8_tex);
656 }
657 else {
658 assert(tex->image_format == PIPE_FORMAT_S8_UINT);
659
660 for (slice = 0; slice < box->depth; slice++) {
661 unsigned mem_x, mem_y;
662 uint8_t *dst;
663 int i, j;
664
665 tex_get_box_origin(tex, xfer->base.level, slice,
666 box, &mem_x, &mem_y);
667
668 dst = xfer->staging.sys + xfer->base.layer_stride * slice;
669
670 for (i = 0; i < box->height; i++) {
671 unsigned x = mem_x;
672 uint8_t *d = dst;
673
674 for (j = 0; j < box->width; j++) {
675 const unsigned offset =
676 tile_offset(x, mem_y, tiles_per_row, swizzle);
677
678 *d = src[offset];
679
680 d++;
681 x++;
682 }
683
684 dst += xfer->base.stride;
685 mem_y++;
686 }
687 }
688 }
689
690 tex_staging_sys_unmap_bo(tex);
691
692 return true;
693 }
694
695 static bool
696 tex_staging_sys_zs_write(struct ilo_texture *tex,
697 const struct ilo_transfer *xfer)
698 {
699 const struct ilo_screen *is = ilo_screen(tex->base.screen);
700 const bool swizzle = is->dev.has_address_swizzling;
701 const struct pipe_box *box = &xfer->base.box;
702 uint8_t *dst;
703 tex_tile_offset_func tile_offset;
704 unsigned tiles_per_row;
705 int slice;
706
707 dst = tex_staging_sys_map_bo(tex, false, false);
708 if (!dst)
709 return false;
710
711 tile_offset = tex_tile_choose_offset_func(tex, &tiles_per_row);
712
713 assert(tex->image.block_width == 1 && tex->image.block_height == 1);
714
715 if (tex->separate_s8) {
716 struct ilo_texture *s8_tex = tex->separate_s8;
717 uint8_t *s8_dst;
718 tex_tile_offset_func s8_tile_offset;
719 unsigned s8_tiles_per_row;
720 int src_cpp, src_s8_pos, dst_cpp_used;
721
722 s8_dst = tex_staging_sys_map_bo(s8_tex, false, false);
723 if (!s8_dst) {
724 tex_staging_sys_unmap_bo(s8_tex);
725 return false;
726 }
727
728 s8_tile_offset = tex_tile_choose_offset_func(s8_tex, &s8_tiles_per_row);
729
730 if (tex->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
731 assert(tex->image_format == PIPE_FORMAT_Z24X8_UNORM);
732
733 src_cpp = 4;
734 src_s8_pos = 3;
735 dst_cpp_used = 3;
736 }
737 else {
738 assert(tex->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
739 assert(tex->image_format == PIPE_FORMAT_Z32_FLOAT);
740
741 src_cpp = 8;
742 src_s8_pos = 4;
743 dst_cpp_used = 4;
744 }
745
746 for (slice = 0; slice < box->depth; slice++) {
747 unsigned mem_x, mem_y, s8_mem_x, s8_mem_y;
748 const uint8_t *src;
749 int i, j;
750
751 tex_get_box_origin(tex, xfer->base.level, slice,
752 box, &mem_x, &mem_y);
753 tex_get_box_origin(s8_tex, xfer->base.level, slice,
754 box, &s8_mem_x, &s8_mem_y);
755
756 src = xfer->staging.sys + xfer->base.layer_stride * slice;
757
758 for (i = 0; i < box->height; i++) {
759 unsigned x = mem_x, s8_x = s8_mem_x;
760 const uint8_t *s = src;
761
762 for (j = 0; j < box->width; j++) {
763 const unsigned offset =
764 tile_offset(x, mem_y, tiles_per_row, swizzle);
765 const unsigned s8_offset =
766 s8_tile_offset(s8_x, s8_mem_y, s8_tiles_per_row, swizzle);
767
768 memcpy(dst + offset, s, dst_cpp_used);
769 s8_dst[s8_offset] = s[src_s8_pos];
770
771 s += src_cpp;
772 x += tex->image.block_size;
773 s8_x++;
774 }
775
776 src += xfer->base.stride;
777 mem_y++;
778 s8_mem_y++;
779 }
780 }
781
782 tex_staging_sys_unmap_bo(s8_tex);
783 }
784 else {
785 assert(tex->image_format == PIPE_FORMAT_S8_UINT);
786
787 for (slice = 0; slice < box->depth; slice++) {
788 unsigned mem_x, mem_y;
789 const uint8_t *src;
790 int i, j;
791
792 tex_get_box_origin(tex, xfer->base.level, slice,
793 box, &mem_x, &mem_y);
794
795 src = xfer->staging.sys + xfer->base.layer_stride * slice;
796
797 for (i = 0; i < box->height; i++) {
798 unsigned x = mem_x;
799 const uint8_t *s = src;
800
801 for (j = 0; j < box->width; j++) {
802 const unsigned offset =
803 tile_offset(x, mem_y, tiles_per_row, swizzle);
804
805 dst[offset] = *s;
806
807 s++;
808 x++;
809 }
810
811 src += xfer->base.stride;
812 mem_y++;
813 }
814 }
815 }
816
817 tex_staging_sys_unmap_bo(tex);
818
819 return true;
820 }
821
822 static bool
823 tex_staging_sys_convert_write(struct ilo_texture *tex,
824 const struct ilo_transfer *xfer)
825 {
826 const struct pipe_box *box = &xfer->base.box;
827 unsigned dst_slice_stride;
828 void *dst;
829 int slice;
830
831 dst = tex_staging_sys_map_bo(tex, false, true);
832 if (!dst)
833 return false;
834
835 dst += tex_get_box_offset(tex, xfer->base.level, box);
836
837 /* slice stride is not always available */
838 if (box->depth > 1)
839 dst_slice_stride = tex_get_slice_stride(tex, xfer->base.level);
840 else
841 dst_slice_stride = 0;
842
843 if (unlikely(tex->image_format == tex->base.format)) {
844 util_copy_box(dst, tex->image_format, tex->image.bo_stride,
845 dst_slice_stride, 0, 0, 0, box->width, box->height, box->depth,
846 xfer->staging.sys, xfer->base.stride, xfer->base.layer_stride,
847 0, 0, 0);
848
849 tex_staging_sys_unmap_bo(tex);
850
851 return true;
852 }
853
854 switch (tex->base.format) {
855 case PIPE_FORMAT_ETC1_RGB8:
856 assert(tex->image_format == PIPE_FORMAT_R8G8B8X8_UNORM);
857
858 for (slice = 0; slice < box->depth; slice++) {
859 const void *src =
860 xfer->staging.sys + xfer->base.layer_stride * slice;
861
862 util_format_etc1_rgb8_unpack_rgba_8unorm(dst,
863 tex->image.bo_stride, src, xfer->base.stride,
864 box->width, box->height);
865
866 dst += dst_slice_stride;
867 }
868 break;
869 default:
870 assert(!"unable to convert the staging data");
871 break;
872 }
873
874 tex_staging_sys_unmap_bo(tex);
875
876 return true;
877 }
878
879 static void
880 tex_staging_sys_writeback(struct ilo_transfer *xfer)
881 {
882 struct ilo_texture *tex = ilo_texture(xfer->base.resource);
883 bool success;
884
885 if (!(xfer->base.usage & PIPE_TRANSFER_WRITE))
886 return;
887
888 switch (xfer->method) {
889 case ILO_TRANSFER_MAP_SW_CONVERT:
890 success = tex_staging_sys_convert_write(tex, xfer);
891 break;
892 case ILO_TRANSFER_MAP_SW_ZS:
893 success = tex_staging_sys_zs_write(tex, xfer);
894 break;
895 default:
896 assert(!"unknown mapping method");
897 success = false;
898 break;
899 }
900
901 if (!success)
902 ilo_err("failed to map resource for moving staging data\n");
903 }
904
905 static bool
906 tex_staging_sys_readback(struct ilo_transfer *xfer)
907 {
908 struct ilo_texture *tex = ilo_texture(xfer->base.resource);
909 bool read_back = false, success;
910
911 /* see if we need to read the resource back */
912 if (xfer->base.usage & PIPE_TRANSFER_READ) {
913 read_back = true;
914 }
915 else if (xfer->base.usage & PIPE_TRANSFER_WRITE) {
916 const unsigned discard_flags =
917 (PIPE_TRANSFER_DISCARD_RANGE | PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE);
918
919 if (!(xfer->base.usage & discard_flags))
920 read_back = true;
921 }
922
923 if (!read_back)
924 return true;
925
926 switch (xfer->method) {
927 case ILO_TRANSFER_MAP_SW_CONVERT:
928 assert(!"no on-the-fly format conversion for mapping");
929 success = false;
930 break;
931 case ILO_TRANSFER_MAP_SW_ZS:
932 success = tex_staging_sys_zs_read(tex, xfer);
933 break;
934 default:
935 assert(!"unknown mapping method");
936 success = false;
937 break;
938 }
939
940 return success;
941 }
942
943 static void *
944 tex_map(struct ilo_transfer *xfer)
945 {
946 void *ptr;
947
948 switch (xfer->method) {
949 case ILO_TRANSFER_MAP_CPU:
950 case ILO_TRANSFER_MAP_GTT:
951 case ILO_TRANSFER_MAP_GTT_ASYNC:
952 ptr = xfer_map(xfer);
953 if (ptr) {
954 const struct ilo_texture *tex = ilo_texture(xfer->base.resource);
955
956 ptr += tex_get_box_offset(tex, xfer->base.level, &xfer->base.box);
957
958 /* stride is for a block row, not a texel row */
959 xfer->base.stride = tex->image.bo_stride;
960 /* note that slice stride is not always available */
961 xfer->base.layer_stride = (xfer->base.box.depth > 1) ?
962 tex_get_slice_stride(tex, xfer->base.level) : 0;
963 }
964 break;
965 case ILO_TRANSFER_MAP_STAGING:
966 ptr = xfer_map(xfer);
967 if (ptr) {
968 const struct ilo_texture *staging = ilo_texture(xfer->staging.res);
969 xfer->base.stride = staging->image.bo_stride;
970 xfer->base.layer_stride = tex_get_slice_stride(staging, 0);
971 }
972 break;
973 case ILO_TRANSFER_MAP_SW_CONVERT:
974 case ILO_TRANSFER_MAP_SW_ZS:
975 if (xfer_alloc_staging_sys(xfer) && tex_staging_sys_readback(xfer))
976 ptr = xfer_map(xfer);
977 else
978 ptr = NULL;
979 break;
980 default:
981 assert(!"unknown mapping method");
982 ptr = NULL;
983 break;
984 }
985
986 return ptr;
987 }
988
989 static void *
990 buf_map(struct ilo_transfer *xfer)
991 {
992 void *ptr;
993
994 ptr = xfer_map(xfer);
995 if (!ptr)
996 return NULL;
997
998 if (xfer->method != ILO_TRANSFER_MAP_STAGING)
999 ptr += xfer->base.box.x;
1000
1001 xfer->base.stride = 0;
1002 xfer->base.layer_stride = 0;
1003
1004 assert(xfer->base.level == 0);
1005 assert(xfer->base.box.y == 0);
1006 assert(xfer->base.box.z == 0);
1007 assert(xfer->base.box.height == 1);
1008 assert(xfer->base.box.depth == 1);
1009
1010 return ptr;
1011 }
1012
1013 static void
1014 copy_staging_resource(struct ilo_context *ilo,
1015 struct ilo_transfer *xfer,
1016 const struct pipe_box *box)
1017 {
1018 const unsigned pad_x = (xfer->staging.res->target == PIPE_BUFFER) ?
1019 xfer->base.box.x % ILO_TRANSFER_MAP_BUFFER_ALIGNMENT : 0;
1020 struct pipe_box modified_box;
1021
1022 assert(xfer->method == ILO_TRANSFER_MAP_STAGING && xfer->staging.res);
1023
1024 if (!box) {
1025 u_box_3d(pad_x, 0, 0, xfer->base.box.width, xfer->base.box.height,
1026 xfer->base.box.depth, &modified_box);
1027 box = &modified_box;
1028 }
1029 else if (pad_x) {
1030 modified_box = *box;
1031 modified_box.x += pad_x;
1032 box = &modified_box;
1033 }
1034
1035 ilo_blitter_blt_copy_resource(ilo->blitter,
1036 xfer->base.resource, xfer->base.level,
1037 xfer->base.box.x, xfer->base.box.y, xfer->base.box.z,
1038 xfer->staging.res, 0, box);
1039 }
1040
1041 static bool
1042 is_bo_busy(struct ilo_context *ilo, struct intel_bo *bo, bool *need_submit)
1043 {
1044 const bool referenced = ilo_builder_has_reloc(&ilo->cp->builder, bo);
1045
1046 if (need_submit)
1047 *need_submit = referenced;
1048
1049 if (referenced)
1050 return true;
1051
1052 return intel_bo_is_busy(bo);
1053 }
1054
1055 /**
1056 * Choose the best mapping method, depending on the transfer usage and whether
1057 * the bo is busy.
1058 */
1059 static bool
1060 choose_transfer_method(struct ilo_context *ilo, struct ilo_transfer *xfer)
1061 {
1062 struct pipe_resource *res = xfer->base.resource;
1063 bool need_submit;
1064
1065 if (!resource_get_transfer_method(res, &xfer->base, &xfer->method))
1066 return false;
1067
1068 /* see if we can avoid blocking */
1069 if (is_bo_busy(ilo, ilo_resource_get_vma(res)->bo, &need_submit)) {
1070 bool resource_renamed;
1071
1072 if (!xfer_unblock(xfer, &resource_renamed)) {
1073 if (xfer->base.usage & PIPE_TRANSFER_DONTBLOCK)
1074 return false;
1075
1076 /* submit to make bo really busy and map() correctly blocks */
1077 if (need_submit)
1078 ilo_cp_submit(ilo->cp, "syncing for transfers");
1079 }
1080
1081 if (resource_renamed)
1082 ilo_state_vector_resource_renamed(&ilo->state_vector, res);
1083 }
1084
1085 return true;
1086 }
1087
1088 static void
1089 buf_pwrite(struct ilo_context *ilo, struct pipe_resource *res,
1090 unsigned usage, int offset, int size, const void *data)
1091 {
1092 struct ilo_buffer_resource *buf = ilo_buffer_resource(res);
1093 bool need_submit;
1094
1095 /* see if we can avoid blocking */
1096 if (is_bo_busy(ilo, buf->vma.bo, &need_submit)) {
1097 bool unblocked = false;
1098
1099 if ((usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) &&
1100 ilo_resource_rename_bo(res)) {
1101 ilo_state_vector_resource_renamed(&ilo->state_vector, res);
1102 unblocked = true;
1103 }
1104 else {
1105 struct pipe_resource templ, *staging;
1106
1107 /*
1108 * allocate a staging buffer to hold the data and pipelined copy it
1109 * over
1110 */
1111 templ = *res;
1112 templ.width0 = size;
1113 templ.usage = PIPE_USAGE_STAGING;
1114 templ.bind = PIPE_BIND_TRANSFER_WRITE;
1115 staging = ilo->base.screen->resource_create(ilo->base.screen, &templ);
1116 if (staging) {
1117 const struct ilo_vma *staging_vma = ilo_resource_get_vma(staging);
1118 struct pipe_box staging_box;
1119
1120 /* offset by staging_vma->bo_offset for pwrite */
1121 intel_bo_pwrite(staging_vma->bo, staging_vma->bo_offset,
1122 size, data);
1123
1124 u_box_1d(0, size, &staging_box);
1125 ilo_blitter_blt_copy_resource(ilo->blitter,
1126 res, 0, offset, 0, 0,
1127 staging, 0, &staging_box);
1128
1129 pipe_resource_reference(&staging, NULL);
1130
1131 return;
1132 }
1133 }
1134
1135 /* submit to make bo really busy and pwrite() correctly blocks */
1136 if (!unblocked && need_submit)
1137 ilo_cp_submit(ilo->cp, "syncing for pwrites");
1138 }
1139
1140 /* offset by buf->vma.bo_offset for pwrite */
1141 intel_bo_pwrite(buf->vma.bo, buf->vma.bo_offset + offset, size, data);
1142 }
1143
1144 static void
1145 ilo_transfer_flush_region(struct pipe_context *pipe,
1146 struct pipe_transfer *transfer,
1147 const struct pipe_box *box)
1148 {
1149 struct ilo_context *ilo = ilo_context(pipe);
1150 struct ilo_transfer *xfer = ilo_transfer(transfer);
1151
1152 /*
1153 * The staging resource is mapped persistently and coherently. We can copy
1154 * without unmapping.
1155 */
1156 if (xfer->method == ILO_TRANSFER_MAP_STAGING &&
1157 (xfer->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT))
1158 copy_staging_resource(ilo, xfer, box);
1159 }
1160
1161 static void
1162 ilo_transfer_unmap(struct pipe_context *pipe,
1163 struct pipe_transfer *transfer)
1164 {
1165 struct ilo_context *ilo = ilo_context(pipe);
1166 struct ilo_transfer *xfer = ilo_transfer(transfer);
1167
1168 xfer_unmap(xfer);
1169
1170 switch (xfer->method) {
1171 case ILO_TRANSFER_MAP_STAGING:
1172 if (!(xfer->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT))
1173 copy_staging_resource(ilo, xfer, NULL);
1174 pipe_resource_reference(&xfer->staging.res, NULL);
1175 break;
1176 case ILO_TRANSFER_MAP_SW_CONVERT:
1177 case ILO_TRANSFER_MAP_SW_ZS:
1178 tex_staging_sys_writeback(xfer);
1179 align_free(xfer->staging.sys);
1180 break;
1181 default:
1182 break;
1183 }
1184
1185 pipe_resource_reference(&xfer->base.resource, NULL);
1186
1187 util_slab_free(&ilo->transfer_mempool, xfer);
1188 }
1189
1190 static void *
1191 ilo_transfer_map(struct pipe_context *pipe,
1192 struct pipe_resource *res,
1193 unsigned level,
1194 unsigned usage,
1195 const struct pipe_box *box,
1196 struct pipe_transfer **transfer)
1197 {
1198 struct ilo_context *ilo = ilo_context(pipe);
1199 struct ilo_transfer *xfer;
1200 void *ptr;
1201
1202 /* note that xfer is not zero'd */
1203 xfer = util_slab_alloc(&ilo->transfer_mempool);
1204 if (!xfer) {
1205 *transfer = NULL;
1206 return NULL;
1207 }
1208
1209 xfer->base.resource = NULL;
1210 pipe_resource_reference(&xfer->base.resource, res);
1211 xfer->base.level = level;
1212 xfer->base.usage = usage;
1213 xfer->base.box = *box;
1214
1215 ilo_blit_resolve_transfer(ilo, &xfer->base);
1216
1217 if (choose_transfer_method(ilo, xfer)) {
1218 if (res->target == PIPE_BUFFER)
1219 ptr = buf_map(xfer);
1220 else
1221 ptr = tex_map(xfer);
1222 }
1223 else {
1224 ptr = NULL;
1225 }
1226
1227 if (!ptr) {
1228 pipe_resource_reference(&xfer->base.resource, NULL);
1229 util_slab_free(&ilo->transfer_mempool, xfer);
1230 *transfer = NULL;
1231 return NULL;
1232 }
1233
1234 *transfer = &xfer->base;
1235
1236 return ptr;
1237 }
1238
1239 static void
1240 ilo_transfer_inline_write(struct pipe_context *pipe,
1241 struct pipe_resource *res,
1242 unsigned level,
1243 unsigned usage,
1244 const struct pipe_box *box,
1245 const void *data,
1246 unsigned stride,
1247 unsigned layer_stride)
1248 {
1249 if (likely(res->target == PIPE_BUFFER) &&
1250 !(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
1251 /* they should specify just an offset and a size */
1252 assert(level == 0);
1253 assert(box->y == 0);
1254 assert(box->z == 0);
1255 assert(box->height == 1);
1256 assert(box->depth == 1);
1257
1258 buf_pwrite(ilo_context(pipe), res,
1259 usage, box->x, box->width, data);
1260 }
1261 else {
1262 u_default_transfer_inline_write(pipe, res,
1263 level, usage, box, data, stride, layer_stride);
1264 }
1265 }
1266
1267 /**
1268 * Initialize transfer-related functions.
1269 */
1270 void
1271 ilo_init_transfer_functions(struct ilo_context *ilo)
1272 {
1273 ilo->base.transfer_map = ilo_transfer_map;
1274 ilo->base.transfer_flush_region = ilo_transfer_flush_region;
1275 ilo->base.transfer_unmap = ilo_transfer_unmap;
1276 ilo->base.transfer_inline_write = ilo_transfer_inline_write;
1277 }