ilo: add ilo_image_can_enable_aux()
[mesa.git] / src / gallium / drivers / ilo / ilo_transfer.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "util/u_surface.h"
29 #include "util/u_transfer.h"
30 #include "util/u_format_etc.h"
31
32 #include "ilo_blit.h"
33 #include "ilo_blitter.h"
34 #include "ilo_cp.h"
35 #include "ilo_context.h"
36 #include "ilo_resource.h"
37 #include "ilo_state.h"
38 #include "ilo_transfer.h"
39
40 /*
41 * For buffers that are not busy, we want to map/unmap them directly. For
42 * those that are busy, we have to worry about synchronization. We could wait
43 * for GPU to finish, but there are cases where we could avoid waiting.
44 *
45 * - When PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE is set, the contents of the
46 * buffer can be discarded. We can replace the backing bo by a new one of
47 * the same size (renaming).
48 * - When PIPE_TRANSFER_DISCARD_RANGE is set, the contents of the mapped
49 * range can be discarded. We can allocate and map a staging bo on
50 * mapping, and (pipelined-)copy it over to the real bo on unmapping.
51 * - When PIPE_TRANSFER_FLUSH_EXPLICIT is set, there is no reading and only
52 * flushed regions need to be written. We can still allocate and map a
53 * staging bo, but should copy only the flushed regions over.
54 *
55 * However, there are other flags to consider.
56 *
57 * - When PIPE_TRANSFER_UNSYNCHRONIZED is set, we do not need to worry about
58 * synchronization at all on mapping.
59 * - When PIPE_TRANSFER_MAP_DIRECTLY is set, no staging area is allowed.
60 * - When PIPE_TRANSFER_DONTBLOCK is set, we should fail if we have to block.
61 * - When PIPE_TRANSFER_PERSISTENT is set, GPU may access the buffer while it
62 * is mapped. Synchronization is done by defining memory barriers,
63 * explicitly via memory_barrier() or implicitly via
64 * transfer_flush_region(), as well as GPU fences.
65 * - When PIPE_TRANSFER_COHERENT is set, updates by either CPU or GPU should
66 * be made visible to the other side immediately. Since the kernel flushes
67 * GPU caches at the end of each batch buffer, CPU always sees GPU updates.
68 * We could use a coherent mapping to make all persistent mappings
69 * coherent.
70 *
71 * These also apply to textures, except that we may additionally need to do
72 * format conversion or tiling/untiling.
73 */
74
75 /**
76 * Return a transfer method suitable for the usage. The returned method will
77 * correctly block when the resource is busy.
78 */
79 static bool
80 resource_get_transfer_method(struct pipe_resource *res,
81 const struct pipe_transfer *transfer,
82 enum ilo_transfer_map_method *method)
83 {
84 const struct ilo_screen *is = ilo_screen(res->screen);
85 const unsigned usage = transfer->usage;
86 enum ilo_transfer_map_method m;
87 bool tiled;
88
89 if (res->target == PIPE_BUFFER) {
90 tiled = false;
91 } else {
92 struct ilo_texture *tex = ilo_texture(res);
93 bool need_convert = false;
94
95 /* we may need to convert on the fly */
96 if (tex->image.tiling == GEN8_TILING_W || tex->separate_s8) {
97 /* on GEN6, separate stencil is enabled only when HiZ is */
98 if (ilo_dev_gen(&is->dev) >= ILO_GEN(7) ||
99 ilo_image_can_enable_aux(&tex->image, transfer->level)) {
100 m = ILO_TRANSFER_MAP_SW_ZS;
101 need_convert = true;
102 }
103 } else if (tex->image.format != tex->base.format) {
104 m = ILO_TRANSFER_MAP_SW_CONVERT;
105 need_convert = true;
106 }
107
108 if (need_convert) {
109 if (usage & (PIPE_TRANSFER_MAP_DIRECTLY | PIPE_TRANSFER_PERSISTENT))
110 return false;
111
112 *method = m;
113 return true;
114 }
115
116 tiled = (tex->image.tiling != GEN6_TILING_NONE);
117 }
118
119 if (tiled)
120 m = ILO_TRANSFER_MAP_GTT; /* to have a linear view */
121 else if (is->dev.has_llc)
122 m = ILO_TRANSFER_MAP_CPU; /* fast and mostly coherent */
123 else if (usage & PIPE_TRANSFER_PERSISTENT)
124 m = ILO_TRANSFER_MAP_GTT; /* for coherency */
125 else if (usage & PIPE_TRANSFER_READ)
126 m = ILO_TRANSFER_MAP_CPU; /* gtt read is too slow */
127 else
128 m = ILO_TRANSFER_MAP_GTT;
129
130 *method = m;
131
132 return true;
133 }
134
135 /**
136 * Return true if usage allows the use of staging bo to avoid blocking.
137 */
138 static bool
139 usage_allows_staging_bo(unsigned usage)
140 {
141 /* do we know how to write the data back to the resource? */
142 const unsigned can_writeback = (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
143 PIPE_TRANSFER_DISCARD_RANGE |
144 PIPE_TRANSFER_FLUSH_EXPLICIT);
145 const unsigned reasons_against = (PIPE_TRANSFER_READ |
146 PIPE_TRANSFER_MAP_DIRECTLY |
147 PIPE_TRANSFER_PERSISTENT);
148
149 return (usage & can_writeback) && !(usage & reasons_against);
150 }
151
152 /**
153 * Allocate the staging resource. It is always linear and its size matches
154 * the transfer box, with proper paddings.
155 */
156 static bool
157 xfer_alloc_staging_res(struct ilo_transfer *xfer)
158 {
159 const struct pipe_resource *res = xfer->base.resource;
160 const struct pipe_box *box = &xfer->base.box;
161 struct pipe_resource templ;
162
163 memset(&templ, 0, sizeof(templ));
164
165 templ.format = res->format;
166
167 if (res->target == PIPE_BUFFER) {
168 templ.target = PIPE_BUFFER;
169 templ.width0 =
170 (box->x % ILO_TRANSFER_MAP_BUFFER_ALIGNMENT) + box->width;
171 }
172 else {
173 /* use 2D array for any texture target */
174 templ.target = PIPE_TEXTURE_2D_ARRAY;
175 templ.width0 = box->width;
176 }
177
178 templ.height0 = box->height;
179 templ.depth0 = 1;
180 templ.array_size = box->depth;
181 templ.nr_samples = 1;
182 templ.usage = PIPE_USAGE_STAGING;
183 templ.bind = PIPE_BIND_TRANSFER_WRITE;
184
185 if (xfer->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT) {
186 templ.flags = PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
187 PIPE_RESOURCE_FLAG_MAP_COHERENT;
188 }
189
190 xfer->staging.res = res->screen->resource_create(res->screen, &templ);
191
192 if (xfer->staging.res && xfer->staging.res->target != PIPE_BUFFER) {
193 assert(ilo_texture(xfer->staging.res)->image.tiling ==
194 GEN6_TILING_NONE);
195 }
196
197 return (xfer->staging.res != NULL);
198 }
199
200 /**
201 * Use an alternative transfer method or rename the resource to unblock an
202 * otherwise blocking transfer.
203 */
204 static bool
205 xfer_unblock(struct ilo_transfer *xfer, bool *resource_renamed)
206 {
207 struct pipe_resource *res = xfer->base.resource;
208 bool unblocked = false, renamed = false;
209
210 switch (xfer->method) {
211 case ILO_TRANSFER_MAP_CPU:
212 case ILO_TRANSFER_MAP_GTT:
213 if (xfer->base.usage & PIPE_TRANSFER_UNSYNCHRONIZED) {
214 xfer->method = ILO_TRANSFER_MAP_GTT_ASYNC;
215 unblocked = true;
216 }
217 else if ((xfer->base.usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) &&
218 ilo_resource_rename_bo(res)) {
219 renamed = true;
220 unblocked = true;
221 }
222 else if (usage_allows_staging_bo(xfer->base.usage) &&
223 xfer_alloc_staging_res(xfer)) {
224 xfer->method = ILO_TRANSFER_MAP_STAGING;
225 unblocked = true;
226 }
227 break;
228 case ILO_TRANSFER_MAP_GTT_ASYNC:
229 case ILO_TRANSFER_MAP_STAGING:
230 unblocked = true;
231 break;
232 default:
233 break;
234 }
235
236 *resource_renamed = renamed;
237
238 return unblocked;
239 }
240
241 /**
242 * Allocate the staging system buffer based on the resource format and the
243 * transfer box.
244 */
245 static bool
246 xfer_alloc_staging_sys(struct ilo_transfer *xfer)
247 {
248 const enum pipe_format format = xfer->base.resource->format;
249 const struct pipe_box *box = &xfer->base.box;
250 const unsigned alignment = 64;
251
252 /* need to tell the world the layout */
253 xfer->base.stride =
254 align(util_format_get_stride(format, box->width), alignment);
255 xfer->base.layer_stride =
256 util_format_get_2d_size(format, xfer->base.stride, box->height);
257
258 xfer->staging.sys =
259 align_malloc(xfer->base.layer_stride * box->depth, alignment);
260
261 return (xfer->staging.sys != NULL);
262 }
263
264 /**
265 * Map according to the method. The staging system buffer should have been
266 * allocated if the method requires it.
267 */
268 static void *
269 xfer_map(struct ilo_transfer *xfer)
270 {
271 void *ptr;
272
273 switch (xfer->method) {
274 case ILO_TRANSFER_MAP_CPU:
275 ptr = intel_bo_map(ilo_resource_get_bo(xfer->base.resource),
276 xfer->base.usage & PIPE_TRANSFER_WRITE);
277 break;
278 case ILO_TRANSFER_MAP_GTT:
279 ptr = intel_bo_map_gtt(ilo_resource_get_bo(xfer->base.resource));
280 break;
281 case ILO_TRANSFER_MAP_GTT_ASYNC:
282 ptr = intel_bo_map_gtt_async(ilo_resource_get_bo(xfer->base.resource));
283 break;
284 case ILO_TRANSFER_MAP_STAGING:
285 {
286 const struct ilo_screen *is = ilo_screen(xfer->staging.res->screen);
287 struct intel_bo *bo = ilo_resource_get_bo(xfer->staging.res);
288
289 /*
290 * We want a writable, optionally persistent and coherent, mapping
291 * for a linear bo. We can call resource_get_transfer_method(), but
292 * this turns out to be fairly simple.
293 */
294 if (is->dev.has_llc)
295 ptr = intel_bo_map(bo, true);
296 else
297 ptr = intel_bo_map_gtt(bo);
298
299 if (ptr && xfer->staging.res->target == PIPE_BUFFER)
300 ptr += (xfer->base.box.x % ILO_TRANSFER_MAP_BUFFER_ALIGNMENT);
301
302 }
303 break;
304 case ILO_TRANSFER_MAP_SW_CONVERT:
305 case ILO_TRANSFER_MAP_SW_ZS:
306 ptr = xfer->staging.sys;
307 break;
308 default:
309 assert(!"unknown mapping method");
310 ptr = NULL;
311 break;
312 }
313
314 return ptr;
315 }
316
317 /**
318 * Unmap a transfer.
319 */
320 static void
321 xfer_unmap(struct ilo_transfer *xfer)
322 {
323 switch (xfer->method) {
324 case ILO_TRANSFER_MAP_CPU:
325 case ILO_TRANSFER_MAP_GTT:
326 case ILO_TRANSFER_MAP_GTT_ASYNC:
327 intel_bo_unmap(ilo_resource_get_bo(xfer->base.resource));
328 break;
329 case ILO_TRANSFER_MAP_STAGING:
330 intel_bo_unmap(ilo_resource_get_bo(xfer->staging.res));
331 break;
332 default:
333 break;
334 }
335 }
336
337 static void
338 tex_get_box_origin(const struct ilo_texture *tex,
339 unsigned level, unsigned slice,
340 const struct pipe_box *box,
341 unsigned *mem_x, unsigned *mem_y)
342 {
343 unsigned x, y;
344
345 ilo_image_get_slice_pos(&tex->image, level, box->z + slice, &x, &y);
346 x += box->x;
347 y += box->y;
348
349 ilo_image_pos_to_mem(&tex->image, x, y, mem_x, mem_y);
350 }
351
352 static unsigned
353 tex_get_box_offset(const struct ilo_texture *tex, unsigned level,
354 const struct pipe_box *box)
355 {
356 unsigned mem_x, mem_y;
357
358 tex_get_box_origin(tex, level, 0, box, &mem_x, &mem_y);
359
360 return ilo_image_mem_to_linear(&tex->image, mem_x, mem_y);
361 }
362
363 static unsigned
364 tex_get_slice_stride(const struct ilo_texture *tex, unsigned level)
365 {
366 return ilo_image_get_slice_stride(&tex->image, level);
367 }
368
369 static unsigned
370 tex_tile_x_swizzle(unsigned addr)
371 {
372 /*
373 * From the Ivy Bridge PRM, volume 1 part 2, page 24:
374 *
375 * "As shown in the tiling algorithm, the new address bit[6] should be:
376 *
377 * Address bit[6] <= TiledAddr bit[6] XOR
378 * TiledAddr bit[9] XOR
379 * TiledAddr bit[10]"
380 */
381 return addr ^ (((addr >> 3) ^ (addr >> 4)) & 0x40);
382 }
383
384 static unsigned
385 tex_tile_y_swizzle(unsigned addr)
386 {
387 /*
388 * From the Ivy Bridge PRM, volume 1 part 2, page 24:
389 *
390 * "As shown in the tiling algorithm, The new address bit[6] becomes:
391 *
392 * Address bit[6] <= TiledAddr bit[6] XOR
393 * TiledAddr bit[9]"
394 */
395 return addr ^ ((addr >> 3) & 0x40);
396 }
397
398 static unsigned
399 tex_tile_x_offset(unsigned mem_x, unsigned mem_y,
400 unsigned tiles_per_row, bool swizzle)
401 {
402 /*
403 * From the Sandy Bridge PRM, volume 1 part 2, page 21, we know that a
404 * X-major tile has 8 rows and 32 OWord columns (512 bytes). Tiles in the
405 * tiled region are numbered in row-major order, starting from zero. The
406 * tile number can thus be calculated as follows:
407 *
408 * tile = (mem_y / 8) * tiles_per_row + (mem_x / 512)
409 *
410 * OWords in that tile are also numbered in row-major order, starting from
411 * zero. The OWord number can thus be calculated as follows:
412 *
413 * oword = (mem_y % 8) * 32 + ((mem_x % 512) / 16)
414 *
415 * and the tiled offset is
416 *
417 * offset = tile * 4096 + oword * 16 + (mem_x % 16)
418 * = tile * 4096 + (mem_y % 8) * 512 + (mem_x % 512)
419 */
420 unsigned tile, offset;
421
422 tile = (mem_y >> 3) * tiles_per_row + (mem_x >> 9);
423 offset = tile << 12 | (mem_y & 0x7) << 9 | (mem_x & 0x1ff);
424
425 return (swizzle) ? tex_tile_x_swizzle(offset) : offset;
426 }
427
428 static unsigned
429 tex_tile_y_offset(unsigned mem_x, unsigned mem_y,
430 unsigned tiles_per_row, bool swizzle)
431 {
432 /*
433 * From the Sandy Bridge PRM, volume 1 part 2, page 22, we know that a
434 * Y-major tile has 32 rows and 8 OWord columns (128 bytes). Tiles in the
435 * tiled region are numbered in row-major order, starting from zero. The
436 * tile number can thus be calculated as follows:
437 *
438 * tile = (mem_y / 32) * tiles_per_row + (mem_x / 128)
439 *
440 * OWords in that tile are numbered in column-major order, starting from
441 * zero. The OWord number can thus be calculated as follows:
442 *
443 * oword = ((mem_x % 128) / 16) * 32 + (mem_y % 32)
444 *
445 * and the tiled offset is
446 *
447 * offset = tile * 4096 + oword * 16 + (mem_x % 16)
448 */
449 unsigned tile, oword, offset;
450
451 tile = (mem_y >> 5) * tiles_per_row + (mem_x >> 7);
452 oword = (mem_x & 0x70) << 1 | (mem_y & 0x1f);
453 offset = tile << 12 | oword << 4 | (mem_x & 0xf);
454
455 return (swizzle) ? tex_tile_y_swizzle(offset) : offset;
456 }
457
458 static unsigned
459 tex_tile_w_offset(unsigned mem_x, unsigned mem_y,
460 unsigned tiles_per_row, bool swizzle)
461 {
462 /*
463 * From the Sandy Bridge PRM, volume 1 part 2, page 23, we know that a
464 * W-major tile has 8 8x8-block rows and 8 8x8-block columns. Tiles in the
465 * tiled region are numbered in row-major order, starting from zero. The
466 * tile number can thus be calculated as follows:
467 *
468 * tile = (mem_y / 64) * tiles_per_row + (mem_x / 64)
469 *
470 * 8x8-blocks in that tile are numbered in column-major order, starting
471 * from zero. The 8x8-block number can thus be calculated as follows:
472 *
473 * blk8 = ((mem_x % 64) / 8) * 8 + ((mem_y % 64) / 8)
474 *
475 * Each 8x8-block is divided into 4 4x4-blocks, in row-major order. Each
476 * 4x4-block is further divided into 4 2x2-blocks, also in row-major order.
477 * We have
478 *
479 * blk4 = (((mem_y % 64) / 4) & 1) * 2 + (((mem_x % 64) / 4) & 1)
480 * blk2 = (((mem_y % 64) / 2) & 1) * 2 + (((mem_x % 64) / 2) & 1)
481 * blk1 = (((mem_y % 64) ) & 1) * 2 + (((mem_x % 64) ) & 1)
482 *
483 * and the tiled offset is
484 *
485 * offset = tile * 4096 + blk8 * 64 + blk4 * 16 + blk2 * 4 + blk1
486 */
487 unsigned tile, blk8, blk4, blk2, blk1, offset;
488
489 tile = (mem_y >> 6) * tiles_per_row + (mem_x >> 6);
490 blk8 = ((mem_x >> 3) & 0x7) << 3 | ((mem_y >> 3) & 0x7);
491 blk4 = ((mem_y >> 2) & 0x1) << 1 | ((mem_x >> 2) & 0x1);
492 blk2 = ((mem_y >> 1) & 0x1) << 1 | ((mem_x >> 1) & 0x1);
493 blk1 = ((mem_y ) & 0x1) << 1 | ((mem_x ) & 0x1);
494 offset = tile << 12 | blk8 << 6 | blk4 << 4 | blk2 << 2 | blk1;
495
496 return (swizzle) ? tex_tile_y_swizzle(offset) : offset;
497 }
498
499 static unsigned
500 tex_tile_none_offset(unsigned mem_x, unsigned mem_y,
501 unsigned tiles_per_row, bool swizzle)
502 {
503 return mem_y * tiles_per_row + mem_x;
504 }
505
506 typedef unsigned (*tex_tile_offset_func)(unsigned mem_x, unsigned mem_y,
507 unsigned tiles_per_row,
508 bool swizzle);
509
510 static tex_tile_offset_func
511 tex_tile_choose_offset_func(const struct ilo_texture *tex,
512 unsigned *tiles_per_row)
513 {
514 switch (tex->image.tiling) {
515 default:
516 assert(!"unknown tiling");
517 /* fall through */
518 case GEN6_TILING_NONE:
519 *tiles_per_row = tex->image.bo_stride;
520 return tex_tile_none_offset;
521 case GEN6_TILING_X:
522 *tiles_per_row = tex->image.bo_stride / 512;
523 return tex_tile_x_offset;
524 case GEN6_TILING_Y:
525 *tiles_per_row = tex->image.bo_stride / 128;
526 return tex_tile_y_offset;
527 case GEN8_TILING_W:
528 *tiles_per_row = tex->image.bo_stride / 64;
529 return tex_tile_w_offset;
530 }
531 }
532
533 static void *
534 tex_staging_sys_map_bo(struct ilo_texture *tex,
535 bool for_read_back,
536 bool linear_view)
537 {
538 const struct ilo_screen *is = ilo_screen(tex->base.screen);
539 const bool prefer_cpu = (is->dev.has_llc || for_read_back);
540 void *ptr;
541
542 if (prefer_cpu && (tex->image.tiling == GEN6_TILING_NONE ||
543 !linear_view))
544 ptr = intel_bo_map(tex->image.bo, !for_read_back);
545 else
546 ptr = intel_bo_map_gtt(tex->image.bo);
547
548 return ptr;
549 }
550
551 static void
552 tex_staging_sys_unmap_bo(struct ilo_texture *tex)
553 {
554 intel_bo_unmap(tex->image.bo);
555 }
556
557 static bool
558 tex_staging_sys_zs_read(struct ilo_texture *tex,
559 const struct ilo_transfer *xfer)
560 {
561 const struct ilo_screen *is = ilo_screen(tex->base.screen);
562 const bool swizzle = is->dev.has_address_swizzling;
563 const struct pipe_box *box = &xfer->base.box;
564 const uint8_t *src;
565 tex_tile_offset_func tile_offset;
566 unsigned tiles_per_row;
567 int slice;
568
569 src = tex_staging_sys_map_bo(tex, true, false);
570 if (!src)
571 return false;
572
573 tile_offset = tex_tile_choose_offset_func(tex, &tiles_per_row);
574
575 assert(tex->image.block_width == 1 && tex->image.block_height == 1);
576
577 if (tex->separate_s8) {
578 struct ilo_texture *s8_tex = tex->separate_s8;
579 const uint8_t *s8_src;
580 tex_tile_offset_func s8_tile_offset;
581 unsigned s8_tiles_per_row;
582 int dst_cpp, dst_s8_pos, src_cpp_used;
583
584 s8_src = tex_staging_sys_map_bo(s8_tex, true, false);
585 if (!s8_src) {
586 tex_staging_sys_unmap_bo(tex);
587 return false;
588 }
589
590 s8_tile_offset = tex_tile_choose_offset_func(s8_tex, &s8_tiles_per_row);
591
592 if (tex->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
593 assert(tex->image.format == PIPE_FORMAT_Z24X8_UNORM);
594
595 dst_cpp = 4;
596 dst_s8_pos = 3;
597 src_cpp_used = 3;
598 }
599 else {
600 assert(tex->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
601 assert(tex->image.format == PIPE_FORMAT_Z32_FLOAT);
602
603 dst_cpp = 8;
604 dst_s8_pos = 4;
605 src_cpp_used = 4;
606 }
607
608 for (slice = 0; slice < box->depth; slice++) {
609 unsigned mem_x, mem_y, s8_mem_x, s8_mem_y;
610 uint8_t *dst;
611 int i, j;
612
613 tex_get_box_origin(tex, xfer->base.level, slice,
614 box, &mem_x, &mem_y);
615 tex_get_box_origin(s8_tex, xfer->base.level, slice,
616 box, &s8_mem_x, &s8_mem_y);
617
618 dst = xfer->staging.sys + xfer->base.layer_stride * slice;
619
620 for (i = 0; i < box->height; i++) {
621 unsigned x = mem_x, s8_x = s8_mem_x;
622 uint8_t *d = dst;
623
624 for (j = 0; j < box->width; j++) {
625 const unsigned offset =
626 tile_offset(x, mem_y, tiles_per_row, swizzle);
627 const unsigned s8_offset =
628 s8_tile_offset(s8_x, s8_mem_y, s8_tiles_per_row, swizzle);
629
630 memcpy(d, src + offset, src_cpp_used);
631 d[dst_s8_pos] = s8_src[s8_offset];
632
633 d += dst_cpp;
634 x += tex->image.block_size;
635 s8_x++;
636 }
637
638 dst += xfer->base.stride;
639 mem_y++;
640 s8_mem_y++;
641 }
642 }
643
644 tex_staging_sys_unmap_bo(s8_tex);
645 }
646 else {
647 assert(tex->image.format == PIPE_FORMAT_S8_UINT);
648
649 for (slice = 0; slice < box->depth; slice++) {
650 unsigned mem_x, mem_y;
651 uint8_t *dst;
652 int i, j;
653
654 tex_get_box_origin(tex, xfer->base.level, slice,
655 box, &mem_x, &mem_y);
656
657 dst = xfer->staging.sys + xfer->base.layer_stride * slice;
658
659 for (i = 0; i < box->height; i++) {
660 unsigned x = mem_x;
661 uint8_t *d = dst;
662
663 for (j = 0; j < box->width; j++) {
664 const unsigned offset =
665 tile_offset(x, mem_y, tiles_per_row, swizzle);
666
667 *d = src[offset];
668
669 d++;
670 x++;
671 }
672
673 dst += xfer->base.stride;
674 mem_y++;
675 }
676 }
677 }
678
679 tex_staging_sys_unmap_bo(tex);
680
681 return true;
682 }
683
684 static bool
685 tex_staging_sys_zs_write(struct ilo_texture *tex,
686 const struct ilo_transfer *xfer)
687 {
688 const struct ilo_screen *is = ilo_screen(tex->base.screen);
689 const bool swizzle = is->dev.has_address_swizzling;
690 const struct pipe_box *box = &xfer->base.box;
691 uint8_t *dst;
692 tex_tile_offset_func tile_offset;
693 unsigned tiles_per_row;
694 int slice;
695
696 dst = tex_staging_sys_map_bo(tex, false, false);
697 if (!dst)
698 return false;
699
700 tile_offset = tex_tile_choose_offset_func(tex, &tiles_per_row);
701
702 assert(tex->image.block_width == 1 && tex->image.block_height == 1);
703
704 if (tex->separate_s8) {
705 struct ilo_texture *s8_tex = tex->separate_s8;
706 uint8_t *s8_dst;
707 tex_tile_offset_func s8_tile_offset;
708 unsigned s8_tiles_per_row;
709 int src_cpp, src_s8_pos, dst_cpp_used;
710
711 s8_dst = tex_staging_sys_map_bo(s8_tex, false, false);
712 if (!s8_dst) {
713 tex_staging_sys_unmap_bo(s8_tex);
714 return false;
715 }
716
717 s8_tile_offset = tex_tile_choose_offset_func(s8_tex, &s8_tiles_per_row);
718
719 if (tex->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
720 assert(tex->image.format == PIPE_FORMAT_Z24X8_UNORM);
721
722 src_cpp = 4;
723 src_s8_pos = 3;
724 dst_cpp_used = 3;
725 }
726 else {
727 assert(tex->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
728 assert(tex->image.format == PIPE_FORMAT_Z32_FLOAT);
729
730 src_cpp = 8;
731 src_s8_pos = 4;
732 dst_cpp_used = 4;
733 }
734
735 for (slice = 0; slice < box->depth; slice++) {
736 unsigned mem_x, mem_y, s8_mem_x, s8_mem_y;
737 const uint8_t *src;
738 int i, j;
739
740 tex_get_box_origin(tex, xfer->base.level, slice,
741 box, &mem_x, &mem_y);
742 tex_get_box_origin(s8_tex, xfer->base.level, slice,
743 box, &s8_mem_x, &s8_mem_y);
744
745 src = xfer->staging.sys + xfer->base.layer_stride * slice;
746
747 for (i = 0; i < box->height; i++) {
748 unsigned x = mem_x, s8_x = s8_mem_x;
749 const uint8_t *s = src;
750
751 for (j = 0; j < box->width; j++) {
752 const unsigned offset =
753 tile_offset(x, mem_y, tiles_per_row, swizzle);
754 const unsigned s8_offset =
755 s8_tile_offset(s8_x, s8_mem_y, s8_tiles_per_row, swizzle);
756
757 memcpy(dst + offset, s, dst_cpp_used);
758 s8_dst[s8_offset] = s[src_s8_pos];
759
760 s += src_cpp;
761 x += tex->image.block_size;
762 s8_x++;
763 }
764
765 src += xfer->base.stride;
766 mem_y++;
767 s8_mem_y++;
768 }
769 }
770
771 tex_staging_sys_unmap_bo(s8_tex);
772 }
773 else {
774 assert(tex->image.format == PIPE_FORMAT_S8_UINT);
775
776 for (slice = 0; slice < box->depth; slice++) {
777 unsigned mem_x, mem_y;
778 const uint8_t *src;
779 int i, j;
780
781 tex_get_box_origin(tex, xfer->base.level, slice,
782 box, &mem_x, &mem_y);
783
784 src = xfer->staging.sys + xfer->base.layer_stride * slice;
785
786 for (i = 0; i < box->height; i++) {
787 unsigned x = mem_x;
788 const uint8_t *s = src;
789
790 for (j = 0; j < box->width; j++) {
791 const unsigned offset =
792 tile_offset(x, mem_y, tiles_per_row, swizzle);
793
794 dst[offset] = *s;
795
796 s++;
797 x++;
798 }
799
800 src += xfer->base.stride;
801 mem_y++;
802 }
803 }
804 }
805
806 tex_staging_sys_unmap_bo(tex);
807
808 return true;
809 }
810
811 static bool
812 tex_staging_sys_convert_write(struct ilo_texture *tex,
813 const struct ilo_transfer *xfer)
814 {
815 const struct pipe_box *box = &xfer->base.box;
816 unsigned dst_slice_stride;
817 void *dst;
818 int slice;
819
820 dst = tex_staging_sys_map_bo(tex, false, true);
821 if (!dst)
822 return false;
823
824 dst += tex_get_box_offset(tex, xfer->base.level, box);
825
826 /* slice stride is not always available */
827 if (box->depth > 1)
828 dst_slice_stride = tex_get_slice_stride(tex, xfer->base.level);
829 else
830 dst_slice_stride = 0;
831
832 if (unlikely(tex->image.format == tex->base.format)) {
833 util_copy_box(dst, tex->image.format, tex->image.bo_stride,
834 dst_slice_stride, 0, 0, 0, box->width, box->height, box->depth,
835 xfer->staging.sys, xfer->base.stride, xfer->base.layer_stride,
836 0, 0, 0);
837
838 tex_staging_sys_unmap_bo(tex);
839
840 return true;
841 }
842
843 switch (tex->base.format) {
844 case PIPE_FORMAT_ETC1_RGB8:
845 assert(tex->image.format == PIPE_FORMAT_R8G8B8X8_UNORM);
846
847 for (slice = 0; slice < box->depth; slice++) {
848 const void *src =
849 xfer->staging.sys + xfer->base.layer_stride * slice;
850
851 util_format_etc1_rgb8_unpack_rgba_8unorm(dst,
852 tex->image.bo_stride, src, xfer->base.stride,
853 box->width, box->height);
854
855 dst += dst_slice_stride;
856 }
857 break;
858 default:
859 assert(!"unable to convert the staging data");
860 break;
861 }
862
863 tex_staging_sys_unmap_bo(tex);
864
865 return true;
866 }
867
868 static void
869 tex_staging_sys_writeback(struct ilo_transfer *xfer)
870 {
871 struct ilo_texture *tex = ilo_texture(xfer->base.resource);
872 bool success;
873
874 if (!(xfer->base.usage & PIPE_TRANSFER_WRITE))
875 return;
876
877 switch (xfer->method) {
878 case ILO_TRANSFER_MAP_SW_CONVERT:
879 success = tex_staging_sys_convert_write(tex, xfer);
880 break;
881 case ILO_TRANSFER_MAP_SW_ZS:
882 success = tex_staging_sys_zs_write(tex, xfer);
883 break;
884 default:
885 assert(!"unknown mapping method");
886 success = false;
887 break;
888 }
889
890 if (!success)
891 ilo_err("failed to map resource for moving staging data\n");
892 }
893
894 static bool
895 tex_staging_sys_readback(struct ilo_transfer *xfer)
896 {
897 struct ilo_texture *tex = ilo_texture(xfer->base.resource);
898 bool read_back = false, success;
899
900 /* see if we need to read the resource back */
901 if (xfer->base.usage & PIPE_TRANSFER_READ) {
902 read_back = true;
903 }
904 else if (xfer->base.usage & PIPE_TRANSFER_WRITE) {
905 const unsigned discard_flags =
906 (PIPE_TRANSFER_DISCARD_RANGE | PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE);
907
908 if (!(xfer->base.usage & discard_flags))
909 read_back = true;
910 }
911
912 if (!read_back)
913 return true;
914
915 switch (xfer->method) {
916 case ILO_TRANSFER_MAP_SW_CONVERT:
917 assert(!"no on-the-fly format conversion for mapping");
918 success = false;
919 break;
920 case ILO_TRANSFER_MAP_SW_ZS:
921 success = tex_staging_sys_zs_read(tex, xfer);
922 break;
923 default:
924 assert(!"unknown mapping method");
925 success = false;
926 break;
927 }
928
929 return success;
930 }
931
932 static void *
933 tex_map(struct ilo_transfer *xfer)
934 {
935 void *ptr;
936
937 switch (xfer->method) {
938 case ILO_TRANSFER_MAP_CPU:
939 case ILO_TRANSFER_MAP_GTT:
940 case ILO_TRANSFER_MAP_GTT_ASYNC:
941 ptr = xfer_map(xfer);
942 if (ptr) {
943 const struct ilo_texture *tex = ilo_texture(xfer->base.resource);
944
945 ptr += tex_get_box_offset(tex, xfer->base.level, &xfer->base.box);
946
947 /* stride is for a block row, not a texel row */
948 xfer->base.stride = tex->image.bo_stride;
949 /* note that slice stride is not always available */
950 xfer->base.layer_stride = (xfer->base.box.depth > 1) ?
951 tex_get_slice_stride(tex, xfer->base.level) : 0;
952 }
953 break;
954 case ILO_TRANSFER_MAP_STAGING:
955 ptr = xfer_map(xfer);
956 if (ptr) {
957 const struct ilo_texture *staging = ilo_texture(xfer->staging.res);
958 xfer->base.stride = staging->image.bo_stride;
959 xfer->base.layer_stride = tex_get_slice_stride(staging, 0);
960 }
961 break;
962 case ILO_TRANSFER_MAP_SW_CONVERT:
963 case ILO_TRANSFER_MAP_SW_ZS:
964 if (xfer_alloc_staging_sys(xfer) && tex_staging_sys_readback(xfer))
965 ptr = xfer_map(xfer);
966 else
967 ptr = NULL;
968 break;
969 default:
970 assert(!"unknown mapping method");
971 ptr = NULL;
972 break;
973 }
974
975 return ptr;
976 }
977
978 static void *
979 buf_map(struct ilo_transfer *xfer)
980 {
981 void *ptr;
982
983 ptr = xfer_map(xfer);
984 if (!ptr)
985 return NULL;
986
987 if (xfer->method != ILO_TRANSFER_MAP_STAGING)
988 ptr += xfer->base.box.x;
989
990 xfer->base.stride = 0;
991 xfer->base.layer_stride = 0;
992
993 assert(xfer->base.level == 0);
994 assert(xfer->base.box.y == 0);
995 assert(xfer->base.box.z == 0);
996 assert(xfer->base.box.height == 1);
997 assert(xfer->base.box.depth == 1);
998
999 return ptr;
1000 }
1001
1002 static void
1003 copy_staging_resource(struct ilo_context *ilo,
1004 struct ilo_transfer *xfer,
1005 const struct pipe_box *box)
1006 {
1007 const unsigned pad_x = (xfer->staging.res->target == PIPE_BUFFER) ?
1008 xfer->base.box.x % ILO_TRANSFER_MAP_BUFFER_ALIGNMENT : 0;
1009 struct pipe_box modified_box;
1010
1011 assert(xfer->method == ILO_TRANSFER_MAP_STAGING && xfer->staging.res);
1012
1013 if (!box) {
1014 u_box_3d(pad_x, 0, 0, xfer->base.box.width, xfer->base.box.height,
1015 xfer->base.box.depth, &modified_box);
1016 box = &modified_box;
1017 }
1018 else if (pad_x) {
1019 modified_box = *box;
1020 modified_box.x += pad_x;
1021 box = &modified_box;
1022 }
1023
1024 ilo_blitter_blt_copy_resource(ilo->blitter,
1025 xfer->base.resource, xfer->base.level,
1026 xfer->base.box.x, xfer->base.box.y, xfer->base.box.z,
1027 xfer->staging.res, 0, box);
1028 }
1029
1030 static bool
1031 is_bo_busy(struct ilo_context *ilo, struct intel_bo *bo, bool *need_submit)
1032 {
1033 const bool referenced = ilo_builder_has_reloc(&ilo->cp->builder, bo);
1034
1035 if (need_submit)
1036 *need_submit = referenced;
1037
1038 if (referenced)
1039 return true;
1040
1041 return intel_bo_is_busy(bo);
1042 }
1043
1044 /**
1045 * Choose the best mapping method, depending on the transfer usage and whether
1046 * the bo is busy.
1047 */
1048 static bool
1049 choose_transfer_method(struct ilo_context *ilo, struct ilo_transfer *xfer)
1050 {
1051 struct pipe_resource *res = xfer->base.resource;
1052 bool need_submit;
1053
1054 if (!resource_get_transfer_method(res, &xfer->base, &xfer->method))
1055 return false;
1056
1057 /* see if we can avoid blocking */
1058 if (is_bo_busy(ilo, ilo_resource_get_bo(res), &need_submit)) {
1059 bool resource_renamed;
1060
1061 if (!xfer_unblock(xfer, &resource_renamed)) {
1062 if (xfer->base.usage & PIPE_TRANSFER_DONTBLOCK)
1063 return false;
1064
1065 /* submit to make bo really busy and map() correctly blocks */
1066 if (need_submit)
1067 ilo_cp_submit(ilo->cp, "syncing for transfers");
1068 }
1069
1070 if (resource_renamed)
1071 ilo_state_vector_resource_renamed(&ilo->state_vector, res);
1072 }
1073
1074 return true;
1075 }
1076
1077 static void
1078 buf_pwrite(struct ilo_context *ilo, struct pipe_resource *res,
1079 unsigned usage, int offset, int size, const void *data)
1080 {
1081 struct ilo_buffer *buf = ilo_buffer(res);
1082 bool need_submit;
1083
1084 /* see if we can avoid blocking */
1085 if (is_bo_busy(ilo, buf->bo, &need_submit)) {
1086 bool unblocked = false;
1087
1088 if ((usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) &&
1089 ilo_resource_rename_bo(res)) {
1090 ilo_state_vector_resource_renamed(&ilo->state_vector, res);
1091 unblocked = true;
1092 }
1093 else {
1094 struct pipe_resource templ, *staging;
1095
1096 /*
1097 * allocate a staging buffer to hold the data and pipelined copy it
1098 * over
1099 */
1100 templ = *res;
1101 templ.width0 = size;
1102 templ.usage = PIPE_USAGE_STAGING;
1103 templ.bind = PIPE_BIND_TRANSFER_WRITE;
1104 staging = ilo->base.screen->resource_create(ilo->base.screen, &templ);
1105 if (staging) {
1106 struct pipe_box staging_box;
1107
1108 intel_bo_pwrite(ilo_buffer(staging)->bo, 0, size, data);
1109
1110 u_box_1d(0, size, &staging_box);
1111 ilo_blitter_blt_copy_resource(ilo->blitter,
1112 res, 0, offset, 0, 0,
1113 staging, 0, &staging_box);
1114
1115 pipe_resource_reference(&staging, NULL);
1116
1117 return;
1118 }
1119 }
1120
1121 /* submit to make bo really busy and pwrite() correctly blocks */
1122 if (!unblocked && need_submit)
1123 ilo_cp_submit(ilo->cp, "syncing for pwrites");
1124 }
1125
1126 intel_bo_pwrite(buf->bo, offset, size, data);
1127 }
1128
1129 static void
1130 ilo_transfer_flush_region(struct pipe_context *pipe,
1131 struct pipe_transfer *transfer,
1132 const struct pipe_box *box)
1133 {
1134 struct ilo_context *ilo = ilo_context(pipe);
1135 struct ilo_transfer *xfer = ilo_transfer(transfer);
1136
1137 /*
1138 * The staging resource is mapped persistently and coherently. We can copy
1139 * without unmapping.
1140 */
1141 if (xfer->method == ILO_TRANSFER_MAP_STAGING &&
1142 (xfer->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT))
1143 copy_staging_resource(ilo, xfer, box);
1144 }
1145
1146 static void
1147 ilo_transfer_unmap(struct pipe_context *pipe,
1148 struct pipe_transfer *transfer)
1149 {
1150 struct ilo_context *ilo = ilo_context(pipe);
1151 struct ilo_transfer *xfer = ilo_transfer(transfer);
1152
1153 xfer_unmap(xfer);
1154
1155 switch (xfer->method) {
1156 case ILO_TRANSFER_MAP_STAGING:
1157 if (!(xfer->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT))
1158 copy_staging_resource(ilo, xfer, NULL);
1159 pipe_resource_reference(&xfer->staging.res, NULL);
1160 break;
1161 case ILO_TRANSFER_MAP_SW_CONVERT:
1162 case ILO_TRANSFER_MAP_SW_ZS:
1163 tex_staging_sys_writeback(xfer);
1164 align_free(xfer->staging.sys);
1165 break;
1166 default:
1167 break;
1168 }
1169
1170 pipe_resource_reference(&xfer->base.resource, NULL);
1171
1172 util_slab_free(&ilo->transfer_mempool, xfer);
1173 }
1174
1175 static void *
1176 ilo_transfer_map(struct pipe_context *pipe,
1177 struct pipe_resource *res,
1178 unsigned level,
1179 unsigned usage,
1180 const struct pipe_box *box,
1181 struct pipe_transfer **transfer)
1182 {
1183 struct ilo_context *ilo = ilo_context(pipe);
1184 struct ilo_transfer *xfer;
1185 void *ptr;
1186
1187 /* note that xfer is not zero'd */
1188 xfer = util_slab_alloc(&ilo->transfer_mempool);
1189 if (!xfer) {
1190 *transfer = NULL;
1191 return NULL;
1192 }
1193
1194 xfer->base.resource = NULL;
1195 pipe_resource_reference(&xfer->base.resource, res);
1196 xfer->base.level = level;
1197 xfer->base.usage = usage;
1198 xfer->base.box = *box;
1199
1200 ilo_blit_resolve_transfer(ilo, &xfer->base);
1201
1202 if (choose_transfer_method(ilo, xfer)) {
1203 if (res->target == PIPE_BUFFER)
1204 ptr = buf_map(xfer);
1205 else
1206 ptr = tex_map(xfer);
1207 }
1208 else {
1209 ptr = NULL;
1210 }
1211
1212 if (!ptr) {
1213 pipe_resource_reference(&xfer->base.resource, NULL);
1214 util_slab_free(&ilo->transfer_mempool, xfer);
1215 *transfer = NULL;
1216 return NULL;
1217 }
1218
1219 *transfer = &xfer->base;
1220
1221 return ptr;
1222 }
1223
1224 static void
1225 ilo_transfer_inline_write(struct pipe_context *pipe,
1226 struct pipe_resource *res,
1227 unsigned level,
1228 unsigned usage,
1229 const struct pipe_box *box,
1230 const void *data,
1231 unsigned stride,
1232 unsigned layer_stride)
1233 {
1234 if (likely(res->target == PIPE_BUFFER) &&
1235 !(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
1236 /* they should specify just an offset and a size */
1237 assert(level == 0);
1238 assert(box->y == 0);
1239 assert(box->z == 0);
1240 assert(box->height == 1);
1241 assert(box->depth == 1);
1242
1243 buf_pwrite(ilo_context(pipe), res,
1244 usage, box->x, box->width, data);
1245 }
1246 else {
1247 u_default_transfer_inline_write(pipe, res,
1248 level, usage, box, data, stride, layer_stride);
1249 }
1250 }
1251
1252 /**
1253 * Initialize transfer-related functions.
1254 */
1255 void
1256 ilo_init_transfer_functions(struct ilo_context *ilo)
1257 {
1258 ilo->base.transfer_map = ilo_transfer_map;
1259 ilo->base.transfer_flush_region = ilo_transfer_flush_region;
1260 ilo->base.transfer_unmap = ilo_transfer_unmap;
1261 ilo->base.transfer_inline_write = ilo_transfer_inline_write;
1262 }