56e6787088187459cc1ab2e9b7741abb98df556a
[mesa.git] / src / gallium / drivers / ilo / ilo_transfer.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "util/u_surface.h"
29 #include "util/u_transfer.h"
30 #include "util/u_format_etc.h"
31
32 #include "ilo_blit.h"
33 #include "ilo_blitter.h"
34 #include "ilo_cp.h"
35 #include "ilo_context.h"
36 #include "ilo_resource.h"
37 #include "ilo_state.h"
38 #include "ilo_transfer.h"
39
40 /*
41 * For buffers that are not busy, we want to map/unmap them directly. For
42 * those that are busy, we have to worry about synchronization. We could wait
43 * for GPU to finish, but there are cases where we could avoid waiting.
44 *
45 * - When PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE is set, the contents of the
46 * buffer can be discarded. We can replace the backing bo by a new one of
47 * the same size (renaming).
48 * - When PIPE_TRANSFER_DISCARD_RANGE is set, the contents of the mapped
49 * range can be discarded. We can allocate and map a staging bo on
50 * mapping, and (pipelined-)copy it over to the real bo on unmapping.
51 * - When PIPE_TRANSFER_FLUSH_EXPLICIT is set, there is no reading and only
52 * flushed regions need to be written. We can still allocate and map a
53 * staging bo, but should copy only the flushed regions over.
54 *
55 * However, there are other flags to consider.
56 *
57 * - When PIPE_TRANSFER_UNSYNCHRONIZED is set, we do not need to worry about
58 * synchronization at all on mapping.
59 * - When PIPE_TRANSFER_MAP_DIRECTLY is set, no staging area is allowed.
60 * - When PIPE_TRANSFER_DONTBLOCK is set, we should fail if we have to block.
61 * - When PIPE_TRANSFER_PERSISTENT is set, GPU may access the buffer while it
62 * is mapped. Synchronization is done by defining memory barriers,
63 * explicitly via memory_barrier() or implicitly via
64 * transfer_flush_region(), as well as GPU fences.
65 * - When PIPE_TRANSFER_COHERENT is set, updates by either CPU or GPU should
66 * be made visible to the other side immediately. Since the kernel flushes
67 * GPU caches at the end of each batch buffer, CPU always sees GPU updates.
68 * We could use a coherent mapping to make all persistent mappings
69 * coherent.
70 *
71 * These also apply to textures, except that we may additionally need to do
72 * format conversion or tiling/untiling.
73 */
74
75 /**
76 * Return a transfer method suitable for the usage. The returned method will
77 * correctly block when the resource is busy.
78 */
79 static bool
80 resource_get_transfer_method(struct pipe_resource *res,
81 const struct pipe_transfer *transfer,
82 enum ilo_transfer_map_method *method)
83 {
84 const struct ilo_screen *is = ilo_screen(res->screen);
85 const unsigned usage = transfer->usage;
86 enum ilo_transfer_map_method m;
87 bool tiled;
88
89 if (res->target == PIPE_BUFFER) {
90 tiled = false;
91 } else {
92 struct ilo_texture *tex = ilo_texture(res);
93 bool need_convert = false;
94
95 /* we may need to convert on the fly */
96 if (tex->image.tiling == GEN8_TILING_W || tex->separate_s8) {
97 /* on GEN6, separate stencil is enabled only when HiZ is */
98 if (ilo_dev_gen(&is->dev) >= ILO_GEN(7) ||
99 ilo_texture_can_enable_hiz(tex, transfer->level,
100 transfer->box.z, transfer->box.depth)) {
101 m = ILO_TRANSFER_MAP_SW_ZS;
102 need_convert = true;
103 }
104 } else if (tex->image.format != tex->base.format) {
105 m = ILO_TRANSFER_MAP_SW_CONVERT;
106 need_convert = true;
107 }
108
109 if (need_convert) {
110 if (usage & (PIPE_TRANSFER_MAP_DIRECTLY | PIPE_TRANSFER_PERSISTENT))
111 return false;
112
113 *method = m;
114 return true;
115 }
116
117 tiled = (tex->image.tiling != GEN6_TILING_NONE);
118 }
119
120 if (tiled)
121 m = ILO_TRANSFER_MAP_GTT; /* to have a linear view */
122 else if (is->dev.has_llc)
123 m = ILO_TRANSFER_MAP_CPU; /* fast and mostly coherent */
124 else if (usage & PIPE_TRANSFER_PERSISTENT)
125 m = ILO_TRANSFER_MAP_GTT; /* for coherency */
126 else if (usage & PIPE_TRANSFER_READ)
127 m = ILO_TRANSFER_MAP_CPU; /* gtt read is too slow */
128 else
129 m = ILO_TRANSFER_MAP_GTT;
130
131 *method = m;
132
133 return true;
134 }
135
136 /**
137 * Return true if usage allows the use of staging bo to avoid blocking.
138 */
139 static bool
140 usage_allows_staging_bo(unsigned usage)
141 {
142 /* do we know how to write the data back to the resource? */
143 const unsigned can_writeback = (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
144 PIPE_TRANSFER_DISCARD_RANGE |
145 PIPE_TRANSFER_FLUSH_EXPLICIT);
146 const unsigned reasons_against = (PIPE_TRANSFER_READ |
147 PIPE_TRANSFER_MAP_DIRECTLY |
148 PIPE_TRANSFER_PERSISTENT);
149
150 return (usage & can_writeback) && !(usage & reasons_against);
151 }
152
153 /**
154 * Allocate the staging resource. It is always linear and its size matches
155 * the transfer box, with proper paddings.
156 */
157 static bool
158 xfer_alloc_staging_res(struct ilo_transfer *xfer)
159 {
160 const struct pipe_resource *res = xfer->base.resource;
161 const struct pipe_box *box = &xfer->base.box;
162 struct pipe_resource templ;
163
164 memset(&templ, 0, sizeof(templ));
165
166 templ.format = res->format;
167
168 if (res->target == PIPE_BUFFER) {
169 templ.target = PIPE_BUFFER;
170 templ.width0 =
171 (box->x % ILO_TRANSFER_MAP_BUFFER_ALIGNMENT) + box->width;
172 }
173 else {
174 /* use 2D array for any texture target */
175 templ.target = PIPE_TEXTURE_2D_ARRAY;
176 templ.width0 = box->width;
177 }
178
179 templ.height0 = box->height;
180 templ.depth0 = 1;
181 templ.array_size = box->depth;
182 templ.nr_samples = 1;
183 templ.usage = PIPE_USAGE_STAGING;
184 templ.bind = PIPE_BIND_TRANSFER_WRITE;
185
186 if (xfer->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT) {
187 templ.flags = PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
188 PIPE_RESOURCE_FLAG_MAP_COHERENT;
189 }
190
191 xfer->staging.res = res->screen->resource_create(res->screen, &templ);
192
193 if (xfer->staging.res && xfer->staging.res->target != PIPE_BUFFER) {
194 assert(ilo_texture(xfer->staging.res)->image.tiling ==
195 GEN6_TILING_NONE);
196 }
197
198 return (xfer->staging.res != NULL);
199 }
200
201 /**
202 * Use an alternative transfer method or rename the resource to unblock an
203 * otherwise blocking transfer.
204 */
205 static bool
206 xfer_unblock(struct ilo_transfer *xfer, bool *resource_renamed)
207 {
208 struct pipe_resource *res = xfer->base.resource;
209 bool unblocked = false, renamed = false;
210
211 switch (xfer->method) {
212 case ILO_TRANSFER_MAP_CPU:
213 case ILO_TRANSFER_MAP_GTT:
214 if (xfer->base.usage & PIPE_TRANSFER_UNSYNCHRONIZED) {
215 xfer->method = ILO_TRANSFER_MAP_GTT_ASYNC;
216 unblocked = true;
217 }
218 else if ((xfer->base.usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) &&
219 ilo_resource_rename_bo(res)) {
220 renamed = true;
221 unblocked = true;
222 }
223 else if (usage_allows_staging_bo(xfer->base.usage) &&
224 xfer_alloc_staging_res(xfer)) {
225 xfer->method = ILO_TRANSFER_MAP_STAGING;
226 unblocked = true;
227 }
228 break;
229 case ILO_TRANSFER_MAP_GTT_ASYNC:
230 case ILO_TRANSFER_MAP_STAGING:
231 unblocked = true;
232 break;
233 default:
234 break;
235 }
236
237 *resource_renamed = renamed;
238
239 return unblocked;
240 }
241
242 /**
243 * Allocate the staging system buffer based on the resource format and the
244 * transfer box.
245 */
246 static bool
247 xfer_alloc_staging_sys(struct ilo_transfer *xfer)
248 {
249 const enum pipe_format format = xfer->base.resource->format;
250 const struct pipe_box *box = &xfer->base.box;
251 const unsigned alignment = 64;
252
253 /* need to tell the world the layout */
254 xfer->base.stride =
255 align(util_format_get_stride(format, box->width), alignment);
256 xfer->base.layer_stride =
257 util_format_get_2d_size(format, xfer->base.stride, box->height);
258
259 xfer->staging.sys =
260 align_malloc(xfer->base.layer_stride * box->depth, alignment);
261
262 return (xfer->staging.sys != NULL);
263 }
264
265 /**
266 * Map according to the method. The staging system buffer should have been
267 * allocated if the method requires it.
268 */
269 static void *
270 xfer_map(struct ilo_transfer *xfer)
271 {
272 void *ptr;
273
274 switch (xfer->method) {
275 case ILO_TRANSFER_MAP_CPU:
276 ptr = intel_bo_map(ilo_resource_get_bo(xfer->base.resource),
277 xfer->base.usage & PIPE_TRANSFER_WRITE);
278 break;
279 case ILO_TRANSFER_MAP_GTT:
280 ptr = intel_bo_map_gtt(ilo_resource_get_bo(xfer->base.resource));
281 break;
282 case ILO_TRANSFER_MAP_GTT_ASYNC:
283 ptr = intel_bo_map_gtt_async(ilo_resource_get_bo(xfer->base.resource));
284 break;
285 case ILO_TRANSFER_MAP_STAGING:
286 {
287 const struct ilo_screen *is = ilo_screen(xfer->staging.res->screen);
288 struct intel_bo *bo = ilo_resource_get_bo(xfer->staging.res);
289
290 /*
291 * We want a writable, optionally persistent and coherent, mapping
292 * for a linear bo. We can call resource_get_transfer_method(), but
293 * this turns out to be fairly simple.
294 */
295 if (is->dev.has_llc)
296 ptr = intel_bo_map(bo, true);
297 else
298 ptr = intel_bo_map_gtt(bo);
299
300 if (ptr && xfer->staging.res->target == PIPE_BUFFER)
301 ptr += (xfer->base.box.x % ILO_TRANSFER_MAP_BUFFER_ALIGNMENT);
302
303 }
304 break;
305 case ILO_TRANSFER_MAP_SW_CONVERT:
306 case ILO_TRANSFER_MAP_SW_ZS:
307 ptr = xfer->staging.sys;
308 break;
309 default:
310 assert(!"unknown mapping method");
311 ptr = NULL;
312 break;
313 }
314
315 return ptr;
316 }
317
318 /**
319 * Unmap a transfer.
320 */
321 static void
322 xfer_unmap(struct ilo_transfer *xfer)
323 {
324 switch (xfer->method) {
325 case ILO_TRANSFER_MAP_CPU:
326 case ILO_TRANSFER_MAP_GTT:
327 case ILO_TRANSFER_MAP_GTT_ASYNC:
328 intel_bo_unmap(ilo_resource_get_bo(xfer->base.resource));
329 break;
330 case ILO_TRANSFER_MAP_STAGING:
331 intel_bo_unmap(ilo_resource_get_bo(xfer->staging.res));
332 break;
333 default:
334 break;
335 }
336 }
337
338 static void
339 tex_get_box_origin(const struct ilo_texture *tex,
340 unsigned level, unsigned slice,
341 const struct pipe_box *box,
342 unsigned *mem_x, unsigned *mem_y)
343 {
344 unsigned x, y;
345
346 ilo_image_get_slice_pos(&tex->image, level, box->z + slice, &x, &y);
347 x += box->x;
348 y += box->y;
349
350 ilo_image_pos_to_mem(&tex->image, x, y, mem_x, mem_y);
351 }
352
353 static unsigned
354 tex_get_box_offset(const struct ilo_texture *tex, unsigned level,
355 const struct pipe_box *box)
356 {
357 unsigned mem_x, mem_y;
358
359 tex_get_box_origin(tex, level, 0, box, &mem_x, &mem_y);
360
361 return ilo_image_mem_to_linear(&tex->image, mem_x, mem_y);
362 }
363
364 static unsigned
365 tex_get_slice_stride(const struct ilo_texture *tex, unsigned level)
366 {
367 return ilo_image_get_slice_stride(&tex->image, level);
368 }
369
370 static unsigned
371 tex_tile_x_swizzle(unsigned addr)
372 {
373 /*
374 * From the Ivy Bridge PRM, volume 1 part 2, page 24:
375 *
376 * "As shown in the tiling algorithm, the new address bit[6] should be:
377 *
378 * Address bit[6] <= TiledAddr bit[6] XOR
379 * TiledAddr bit[9] XOR
380 * TiledAddr bit[10]"
381 */
382 return addr ^ (((addr >> 3) ^ (addr >> 4)) & 0x40);
383 }
384
385 static unsigned
386 tex_tile_y_swizzle(unsigned addr)
387 {
388 /*
389 * From the Ivy Bridge PRM, volume 1 part 2, page 24:
390 *
391 * "As shown in the tiling algorithm, The new address bit[6] becomes:
392 *
393 * Address bit[6] <= TiledAddr bit[6] XOR
394 * TiledAddr bit[9]"
395 */
396 return addr ^ ((addr >> 3) & 0x40);
397 }
398
399 static unsigned
400 tex_tile_x_offset(unsigned mem_x, unsigned mem_y,
401 unsigned tiles_per_row, bool swizzle)
402 {
403 /*
404 * From the Sandy Bridge PRM, volume 1 part 2, page 21, we know that a
405 * X-major tile has 8 rows and 32 OWord columns (512 bytes). Tiles in the
406 * tiled region are numbered in row-major order, starting from zero. The
407 * tile number can thus be calculated as follows:
408 *
409 * tile = (mem_y / 8) * tiles_per_row + (mem_x / 512)
410 *
411 * OWords in that tile are also numbered in row-major order, starting from
412 * zero. The OWord number can thus be calculated as follows:
413 *
414 * oword = (mem_y % 8) * 32 + ((mem_x % 512) / 16)
415 *
416 * and the tiled offset is
417 *
418 * offset = tile * 4096 + oword * 16 + (mem_x % 16)
419 * = tile * 4096 + (mem_y % 8) * 512 + (mem_x % 512)
420 */
421 unsigned tile, offset;
422
423 tile = (mem_y >> 3) * tiles_per_row + (mem_x >> 9);
424 offset = tile << 12 | (mem_y & 0x7) << 9 | (mem_x & 0x1ff);
425
426 return (swizzle) ? tex_tile_x_swizzle(offset) : offset;
427 }
428
429 static unsigned
430 tex_tile_y_offset(unsigned mem_x, unsigned mem_y,
431 unsigned tiles_per_row, bool swizzle)
432 {
433 /*
434 * From the Sandy Bridge PRM, volume 1 part 2, page 22, we know that a
435 * Y-major tile has 32 rows and 8 OWord columns (128 bytes). Tiles in the
436 * tiled region are numbered in row-major order, starting from zero. The
437 * tile number can thus be calculated as follows:
438 *
439 * tile = (mem_y / 32) * tiles_per_row + (mem_x / 128)
440 *
441 * OWords in that tile are numbered in column-major order, starting from
442 * zero. The OWord number can thus be calculated as follows:
443 *
444 * oword = ((mem_x % 128) / 16) * 32 + (mem_y % 32)
445 *
446 * and the tiled offset is
447 *
448 * offset = tile * 4096 + oword * 16 + (mem_x % 16)
449 */
450 unsigned tile, oword, offset;
451
452 tile = (mem_y >> 5) * tiles_per_row + (mem_x >> 7);
453 oword = (mem_x & 0x70) << 1 | (mem_y & 0x1f);
454 offset = tile << 12 | oword << 4 | (mem_x & 0xf);
455
456 return (swizzle) ? tex_tile_y_swizzle(offset) : offset;
457 }
458
459 static unsigned
460 tex_tile_w_offset(unsigned mem_x, unsigned mem_y,
461 unsigned tiles_per_row, bool swizzle)
462 {
463 /*
464 * From the Sandy Bridge PRM, volume 1 part 2, page 23, we know that a
465 * W-major tile has 8 8x8-block rows and 8 8x8-block columns. Tiles in the
466 * tiled region are numbered in row-major order, starting from zero. The
467 * tile number can thus be calculated as follows:
468 *
469 * tile = (mem_y / 64) * tiles_per_row + (mem_x / 64)
470 *
471 * 8x8-blocks in that tile are numbered in column-major order, starting
472 * from zero. The 8x8-block number can thus be calculated as follows:
473 *
474 * blk8 = ((mem_x % 64) / 8) * 8 + ((mem_y % 64) / 8)
475 *
476 * Each 8x8-block is divided into 4 4x4-blocks, in row-major order. Each
477 * 4x4-block is further divided into 4 2x2-blocks, also in row-major order.
478 * We have
479 *
480 * blk4 = (((mem_y % 64) / 4) & 1) * 2 + (((mem_x % 64) / 4) & 1)
481 * blk2 = (((mem_y % 64) / 2) & 1) * 2 + (((mem_x % 64) / 2) & 1)
482 * blk1 = (((mem_y % 64) ) & 1) * 2 + (((mem_x % 64) ) & 1)
483 *
484 * and the tiled offset is
485 *
486 * offset = tile * 4096 + blk8 * 64 + blk4 * 16 + blk2 * 4 + blk1
487 */
488 unsigned tile, blk8, blk4, blk2, blk1, offset;
489
490 tile = (mem_y >> 6) * tiles_per_row + (mem_x >> 6);
491 blk8 = ((mem_x >> 3) & 0x7) << 3 | ((mem_y >> 3) & 0x7);
492 blk4 = ((mem_y >> 2) & 0x1) << 1 | ((mem_x >> 2) & 0x1);
493 blk2 = ((mem_y >> 1) & 0x1) << 1 | ((mem_x >> 1) & 0x1);
494 blk1 = ((mem_y ) & 0x1) << 1 | ((mem_x ) & 0x1);
495 offset = tile << 12 | blk8 << 6 | blk4 << 4 | blk2 << 2 | blk1;
496
497 return (swizzle) ? tex_tile_y_swizzle(offset) : offset;
498 }
499
500 static unsigned
501 tex_tile_none_offset(unsigned mem_x, unsigned mem_y,
502 unsigned tiles_per_row, bool swizzle)
503 {
504 return mem_y * tiles_per_row + mem_x;
505 }
506
507 typedef unsigned (*tex_tile_offset_func)(unsigned mem_x, unsigned mem_y,
508 unsigned tiles_per_row,
509 bool swizzle);
510
511 static tex_tile_offset_func
512 tex_tile_choose_offset_func(const struct ilo_texture *tex,
513 unsigned *tiles_per_row)
514 {
515 switch (tex->image.tiling) {
516 default:
517 assert(!"unknown tiling");
518 /* fall through */
519 case GEN6_TILING_NONE:
520 *tiles_per_row = tex->image.bo_stride;
521 return tex_tile_none_offset;
522 case GEN6_TILING_X:
523 *tiles_per_row = tex->image.bo_stride / 512;
524 return tex_tile_x_offset;
525 case GEN6_TILING_Y:
526 *tiles_per_row = tex->image.bo_stride / 128;
527 return tex_tile_y_offset;
528 case GEN8_TILING_W:
529 *tiles_per_row = tex->image.bo_stride / 64;
530 return tex_tile_w_offset;
531 }
532 }
533
534 static void *
535 tex_staging_sys_map_bo(struct ilo_texture *tex,
536 bool for_read_back,
537 bool linear_view)
538 {
539 const struct ilo_screen *is = ilo_screen(tex->base.screen);
540 const bool prefer_cpu = (is->dev.has_llc || for_read_back);
541 void *ptr;
542
543 if (prefer_cpu && (tex->image.tiling == GEN6_TILING_NONE ||
544 !linear_view))
545 ptr = intel_bo_map(tex->image.bo, !for_read_back);
546 else
547 ptr = intel_bo_map_gtt(tex->image.bo);
548
549 return ptr;
550 }
551
552 static void
553 tex_staging_sys_unmap_bo(struct ilo_texture *tex)
554 {
555 intel_bo_unmap(tex->image.bo);
556 }
557
558 static bool
559 tex_staging_sys_zs_read(struct ilo_texture *tex,
560 const struct ilo_transfer *xfer)
561 {
562 const struct ilo_screen *is = ilo_screen(tex->base.screen);
563 const bool swizzle = is->dev.has_address_swizzling;
564 const struct pipe_box *box = &xfer->base.box;
565 const uint8_t *src;
566 tex_tile_offset_func tile_offset;
567 unsigned tiles_per_row;
568 int slice;
569
570 src = tex_staging_sys_map_bo(tex, true, false);
571 if (!src)
572 return false;
573
574 tile_offset = tex_tile_choose_offset_func(tex, &tiles_per_row);
575
576 assert(tex->image.block_width == 1 && tex->image.block_height == 1);
577
578 if (tex->separate_s8) {
579 struct ilo_texture *s8_tex = tex->separate_s8;
580 const uint8_t *s8_src;
581 tex_tile_offset_func s8_tile_offset;
582 unsigned s8_tiles_per_row;
583 int dst_cpp, dst_s8_pos, src_cpp_used;
584
585 s8_src = tex_staging_sys_map_bo(s8_tex, true, false);
586 if (!s8_src) {
587 tex_staging_sys_unmap_bo(tex);
588 return false;
589 }
590
591 s8_tile_offset = tex_tile_choose_offset_func(s8_tex, &s8_tiles_per_row);
592
593 if (tex->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
594 assert(tex->image.format == PIPE_FORMAT_Z24X8_UNORM);
595
596 dst_cpp = 4;
597 dst_s8_pos = 3;
598 src_cpp_used = 3;
599 }
600 else {
601 assert(tex->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
602 assert(tex->image.format == PIPE_FORMAT_Z32_FLOAT);
603
604 dst_cpp = 8;
605 dst_s8_pos = 4;
606 src_cpp_used = 4;
607 }
608
609 for (slice = 0; slice < box->depth; slice++) {
610 unsigned mem_x, mem_y, s8_mem_x, s8_mem_y;
611 uint8_t *dst;
612 int i, j;
613
614 tex_get_box_origin(tex, xfer->base.level, slice,
615 box, &mem_x, &mem_y);
616 tex_get_box_origin(s8_tex, xfer->base.level, slice,
617 box, &s8_mem_x, &s8_mem_y);
618
619 dst = xfer->staging.sys + xfer->base.layer_stride * slice;
620
621 for (i = 0; i < box->height; i++) {
622 unsigned x = mem_x, s8_x = s8_mem_x;
623 uint8_t *d = dst;
624
625 for (j = 0; j < box->width; j++) {
626 const unsigned offset =
627 tile_offset(x, mem_y, tiles_per_row, swizzle);
628 const unsigned s8_offset =
629 s8_tile_offset(s8_x, s8_mem_y, s8_tiles_per_row, swizzle);
630
631 memcpy(d, src + offset, src_cpp_used);
632 d[dst_s8_pos] = s8_src[s8_offset];
633
634 d += dst_cpp;
635 x += tex->image.block_size;
636 s8_x++;
637 }
638
639 dst += xfer->base.stride;
640 mem_y++;
641 s8_mem_y++;
642 }
643 }
644
645 tex_staging_sys_unmap_bo(s8_tex);
646 }
647 else {
648 assert(tex->image.format == PIPE_FORMAT_S8_UINT);
649
650 for (slice = 0; slice < box->depth; slice++) {
651 unsigned mem_x, mem_y;
652 uint8_t *dst;
653 int i, j;
654
655 tex_get_box_origin(tex, xfer->base.level, slice,
656 box, &mem_x, &mem_y);
657
658 dst = xfer->staging.sys + xfer->base.layer_stride * slice;
659
660 for (i = 0; i < box->height; i++) {
661 unsigned x = mem_x;
662 uint8_t *d = dst;
663
664 for (j = 0; j < box->width; j++) {
665 const unsigned offset =
666 tile_offset(x, mem_y, tiles_per_row, swizzle);
667
668 *d = src[offset];
669
670 d++;
671 x++;
672 }
673
674 dst += xfer->base.stride;
675 mem_y++;
676 }
677 }
678 }
679
680 tex_staging_sys_unmap_bo(tex);
681
682 return true;
683 }
684
685 static bool
686 tex_staging_sys_zs_write(struct ilo_texture *tex,
687 const struct ilo_transfer *xfer)
688 {
689 const struct ilo_screen *is = ilo_screen(tex->base.screen);
690 const bool swizzle = is->dev.has_address_swizzling;
691 const struct pipe_box *box = &xfer->base.box;
692 uint8_t *dst;
693 tex_tile_offset_func tile_offset;
694 unsigned tiles_per_row;
695 int slice;
696
697 dst = tex_staging_sys_map_bo(tex, false, false);
698 if (!dst)
699 return false;
700
701 tile_offset = tex_tile_choose_offset_func(tex, &tiles_per_row);
702
703 assert(tex->image.block_width == 1 && tex->image.block_height == 1);
704
705 if (tex->separate_s8) {
706 struct ilo_texture *s8_tex = tex->separate_s8;
707 uint8_t *s8_dst;
708 tex_tile_offset_func s8_tile_offset;
709 unsigned s8_tiles_per_row;
710 int src_cpp, src_s8_pos, dst_cpp_used;
711
712 s8_dst = tex_staging_sys_map_bo(s8_tex, false, false);
713 if (!s8_dst) {
714 tex_staging_sys_unmap_bo(s8_tex);
715 return false;
716 }
717
718 s8_tile_offset = tex_tile_choose_offset_func(s8_tex, &s8_tiles_per_row);
719
720 if (tex->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
721 assert(tex->image.format == PIPE_FORMAT_Z24X8_UNORM);
722
723 src_cpp = 4;
724 src_s8_pos = 3;
725 dst_cpp_used = 3;
726 }
727 else {
728 assert(tex->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
729 assert(tex->image.format == PIPE_FORMAT_Z32_FLOAT);
730
731 src_cpp = 8;
732 src_s8_pos = 4;
733 dst_cpp_used = 4;
734 }
735
736 for (slice = 0; slice < box->depth; slice++) {
737 unsigned mem_x, mem_y, s8_mem_x, s8_mem_y;
738 const uint8_t *src;
739 int i, j;
740
741 tex_get_box_origin(tex, xfer->base.level, slice,
742 box, &mem_x, &mem_y);
743 tex_get_box_origin(s8_tex, xfer->base.level, slice,
744 box, &s8_mem_x, &s8_mem_y);
745
746 src = xfer->staging.sys + xfer->base.layer_stride * slice;
747
748 for (i = 0; i < box->height; i++) {
749 unsigned x = mem_x, s8_x = s8_mem_x;
750 const uint8_t *s = src;
751
752 for (j = 0; j < box->width; j++) {
753 const unsigned offset =
754 tile_offset(x, mem_y, tiles_per_row, swizzle);
755 const unsigned s8_offset =
756 s8_tile_offset(s8_x, s8_mem_y, s8_tiles_per_row, swizzle);
757
758 memcpy(dst + offset, s, dst_cpp_used);
759 s8_dst[s8_offset] = s[src_s8_pos];
760
761 s += src_cpp;
762 x += tex->image.block_size;
763 s8_x++;
764 }
765
766 src += xfer->base.stride;
767 mem_y++;
768 s8_mem_y++;
769 }
770 }
771
772 tex_staging_sys_unmap_bo(s8_tex);
773 }
774 else {
775 assert(tex->image.format == PIPE_FORMAT_S8_UINT);
776
777 for (slice = 0; slice < box->depth; slice++) {
778 unsigned mem_x, mem_y;
779 const uint8_t *src;
780 int i, j;
781
782 tex_get_box_origin(tex, xfer->base.level, slice,
783 box, &mem_x, &mem_y);
784
785 src = xfer->staging.sys + xfer->base.layer_stride * slice;
786
787 for (i = 0; i < box->height; i++) {
788 unsigned x = mem_x;
789 const uint8_t *s = src;
790
791 for (j = 0; j < box->width; j++) {
792 const unsigned offset =
793 tile_offset(x, mem_y, tiles_per_row, swizzle);
794
795 dst[offset] = *s;
796
797 s++;
798 x++;
799 }
800
801 src += xfer->base.stride;
802 mem_y++;
803 }
804 }
805 }
806
807 tex_staging_sys_unmap_bo(tex);
808
809 return true;
810 }
811
812 static bool
813 tex_staging_sys_convert_write(struct ilo_texture *tex,
814 const struct ilo_transfer *xfer)
815 {
816 const struct pipe_box *box = &xfer->base.box;
817 unsigned dst_slice_stride;
818 void *dst;
819 int slice;
820
821 dst = tex_staging_sys_map_bo(tex, false, true);
822 if (!dst)
823 return false;
824
825 dst += tex_get_box_offset(tex, xfer->base.level, box);
826
827 /* slice stride is not always available */
828 if (box->depth > 1)
829 dst_slice_stride = tex_get_slice_stride(tex, xfer->base.level);
830 else
831 dst_slice_stride = 0;
832
833 if (unlikely(tex->image.format == tex->base.format)) {
834 util_copy_box(dst, tex->image.format, tex->image.bo_stride,
835 dst_slice_stride, 0, 0, 0, box->width, box->height, box->depth,
836 xfer->staging.sys, xfer->base.stride, xfer->base.layer_stride,
837 0, 0, 0);
838
839 tex_staging_sys_unmap_bo(tex);
840
841 return true;
842 }
843
844 switch (tex->base.format) {
845 case PIPE_FORMAT_ETC1_RGB8:
846 assert(tex->image.format == PIPE_FORMAT_R8G8B8X8_UNORM);
847
848 for (slice = 0; slice < box->depth; slice++) {
849 const void *src =
850 xfer->staging.sys + xfer->base.layer_stride * slice;
851
852 util_format_etc1_rgb8_unpack_rgba_8unorm(dst,
853 tex->image.bo_stride, src, xfer->base.stride,
854 box->width, box->height);
855
856 dst += dst_slice_stride;
857 }
858 break;
859 default:
860 assert(!"unable to convert the staging data");
861 break;
862 }
863
864 tex_staging_sys_unmap_bo(tex);
865
866 return true;
867 }
868
869 static void
870 tex_staging_sys_writeback(struct ilo_transfer *xfer)
871 {
872 struct ilo_texture *tex = ilo_texture(xfer->base.resource);
873 bool success;
874
875 if (!(xfer->base.usage & PIPE_TRANSFER_WRITE))
876 return;
877
878 switch (xfer->method) {
879 case ILO_TRANSFER_MAP_SW_CONVERT:
880 success = tex_staging_sys_convert_write(tex, xfer);
881 break;
882 case ILO_TRANSFER_MAP_SW_ZS:
883 success = tex_staging_sys_zs_write(tex, xfer);
884 break;
885 default:
886 assert(!"unknown mapping method");
887 success = false;
888 break;
889 }
890
891 if (!success)
892 ilo_err("failed to map resource for moving staging data\n");
893 }
894
895 static bool
896 tex_staging_sys_readback(struct ilo_transfer *xfer)
897 {
898 struct ilo_texture *tex = ilo_texture(xfer->base.resource);
899 bool read_back = false, success;
900
901 /* see if we need to read the resource back */
902 if (xfer->base.usage & PIPE_TRANSFER_READ) {
903 read_back = true;
904 }
905 else if (xfer->base.usage & PIPE_TRANSFER_WRITE) {
906 const unsigned discard_flags =
907 (PIPE_TRANSFER_DISCARD_RANGE | PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE);
908
909 if (!(xfer->base.usage & discard_flags))
910 read_back = true;
911 }
912
913 if (!read_back)
914 return true;
915
916 switch (xfer->method) {
917 case ILO_TRANSFER_MAP_SW_CONVERT:
918 assert(!"no on-the-fly format conversion for mapping");
919 success = false;
920 break;
921 case ILO_TRANSFER_MAP_SW_ZS:
922 success = tex_staging_sys_zs_read(tex, xfer);
923 break;
924 default:
925 assert(!"unknown mapping method");
926 success = false;
927 break;
928 }
929
930 return success;
931 }
932
933 static void *
934 tex_map(struct ilo_transfer *xfer)
935 {
936 void *ptr;
937
938 switch (xfer->method) {
939 case ILO_TRANSFER_MAP_CPU:
940 case ILO_TRANSFER_MAP_GTT:
941 case ILO_TRANSFER_MAP_GTT_ASYNC:
942 ptr = xfer_map(xfer);
943 if (ptr) {
944 const struct ilo_texture *tex = ilo_texture(xfer->base.resource);
945
946 ptr += tex_get_box_offset(tex, xfer->base.level, &xfer->base.box);
947
948 /* stride is for a block row, not a texel row */
949 xfer->base.stride = tex->image.bo_stride;
950 /* note that slice stride is not always available */
951 xfer->base.layer_stride = (xfer->base.box.depth > 1) ?
952 tex_get_slice_stride(tex, xfer->base.level) : 0;
953 }
954 break;
955 case ILO_TRANSFER_MAP_STAGING:
956 ptr = xfer_map(xfer);
957 if (ptr) {
958 const struct ilo_texture *staging = ilo_texture(xfer->staging.res);
959 xfer->base.stride = staging->image.bo_stride;
960 xfer->base.layer_stride = tex_get_slice_stride(staging, 0);
961 }
962 break;
963 case ILO_TRANSFER_MAP_SW_CONVERT:
964 case ILO_TRANSFER_MAP_SW_ZS:
965 if (xfer_alloc_staging_sys(xfer) && tex_staging_sys_readback(xfer))
966 ptr = xfer_map(xfer);
967 else
968 ptr = NULL;
969 break;
970 default:
971 assert(!"unknown mapping method");
972 ptr = NULL;
973 break;
974 }
975
976 return ptr;
977 }
978
979 static void *
980 buf_map(struct ilo_transfer *xfer)
981 {
982 void *ptr;
983
984 ptr = xfer_map(xfer);
985 if (!ptr)
986 return NULL;
987
988 if (xfer->method != ILO_TRANSFER_MAP_STAGING)
989 ptr += xfer->base.box.x;
990
991 xfer->base.stride = 0;
992 xfer->base.layer_stride = 0;
993
994 assert(xfer->base.level == 0);
995 assert(xfer->base.box.y == 0);
996 assert(xfer->base.box.z == 0);
997 assert(xfer->base.box.height == 1);
998 assert(xfer->base.box.depth == 1);
999
1000 return ptr;
1001 }
1002
1003 static void
1004 copy_staging_resource(struct ilo_context *ilo,
1005 struct ilo_transfer *xfer,
1006 const struct pipe_box *box)
1007 {
1008 const unsigned pad_x = (xfer->staging.res->target == PIPE_BUFFER) ?
1009 xfer->base.box.x % ILO_TRANSFER_MAP_BUFFER_ALIGNMENT : 0;
1010 struct pipe_box modified_box;
1011
1012 assert(xfer->method == ILO_TRANSFER_MAP_STAGING && xfer->staging.res);
1013
1014 if (!box) {
1015 u_box_3d(pad_x, 0, 0, xfer->base.box.width, xfer->base.box.height,
1016 xfer->base.box.depth, &modified_box);
1017 box = &modified_box;
1018 }
1019 else if (pad_x) {
1020 modified_box = *box;
1021 modified_box.x += pad_x;
1022 box = &modified_box;
1023 }
1024
1025 ilo_blitter_blt_copy_resource(ilo->blitter,
1026 xfer->base.resource, xfer->base.level,
1027 xfer->base.box.x, xfer->base.box.y, xfer->base.box.z,
1028 xfer->staging.res, 0, box);
1029 }
1030
1031 static bool
1032 is_bo_busy(struct ilo_context *ilo, struct intel_bo *bo, bool *need_submit)
1033 {
1034 const bool referenced = ilo_builder_has_reloc(&ilo->cp->builder, bo);
1035
1036 if (need_submit)
1037 *need_submit = referenced;
1038
1039 if (referenced)
1040 return true;
1041
1042 return intel_bo_is_busy(bo);
1043 }
1044
1045 /**
1046 * Choose the best mapping method, depending on the transfer usage and whether
1047 * the bo is busy.
1048 */
1049 static bool
1050 choose_transfer_method(struct ilo_context *ilo, struct ilo_transfer *xfer)
1051 {
1052 struct pipe_resource *res = xfer->base.resource;
1053 bool need_submit;
1054
1055 if (!resource_get_transfer_method(res, &xfer->base, &xfer->method))
1056 return false;
1057
1058 /* see if we can avoid blocking */
1059 if (is_bo_busy(ilo, ilo_resource_get_bo(res), &need_submit)) {
1060 bool resource_renamed;
1061
1062 if (!xfer_unblock(xfer, &resource_renamed)) {
1063 if (xfer->base.usage & PIPE_TRANSFER_DONTBLOCK)
1064 return false;
1065
1066 /* submit to make bo really busy and map() correctly blocks */
1067 if (need_submit)
1068 ilo_cp_submit(ilo->cp, "syncing for transfers");
1069 }
1070
1071 if (resource_renamed)
1072 ilo_state_vector_resource_renamed(&ilo->state_vector, res);
1073 }
1074
1075 return true;
1076 }
1077
1078 static void
1079 buf_pwrite(struct ilo_context *ilo, struct pipe_resource *res,
1080 unsigned usage, int offset, int size, const void *data)
1081 {
1082 struct ilo_buffer *buf = ilo_buffer(res);
1083 bool need_submit;
1084
1085 /* see if we can avoid blocking */
1086 if (is_bo_busy(ilo, buf->bo, &need_submit)) {
1087 bool unblocked = false;
1088
1089 if ((usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) &&
1090 ilo_resource_rename_bo(res)) {
1091 ilo_state_vector_resource_renamed(&ilo->state_vector, res);
1092 unblocked = true;
1093 }
1094 else {
1095 struct pipe_resource templ, *staging;
1096
1097 /*
1098 * allocate a staging buffer to hold the data and pipelined copy it
1099 * over
1100 */
1101 templ = *res;
1102 templ.width0 = size;
1103 templ.usage = PIPE_USAGE_STAGING;
1104 templ.bind = PIPE_BIND_TRANSFER_WRITE;
1105 staging = ilo->base.screen->resource_create(ilo->base.screen, &templ);
1106 if (staging) {
1107 struct pipe_box staging_box;
1108
1109 intel_bo_pwrite(ilo_buffer(staging)->bo, 0, size, data);
1110
1111 u_box_1d(0, size, &staging_box);
1112 ilo_blitter_blt_copy_resource(ilo->blitter,
1113 res, 0, offset, 0, 0,
1114 staging, 0, &staging_box);
1115
1116 pipe_resource_reference(&staging, NULL);
1117
1118 return;
1119 }
1120 }
1121
1122 /* submit to make bo really busy and pwrite() correctly blocks */
1123 if (!unblocked && need_submit)
1124 ilo_cp_submit(ilo->cp, "syncing for pwrites");
1125 }
1126
1127 intel_bo_pwrite(buf->bo, offset, size, data);
1128 }
1129
1130 static void
1131 ilo_transfer_flush_region(struct pipe_context *pipe,
1132 struct pipe_transfer *transfer,
1133 const struct pipe_box *box)
1134 {
1135 struct ilo_context *ilo = ilo_context(pipe);
1136 struct ilo_transfer *xfer = ilo_transfer(transfer);
1137
1138 /*
1139 * The staging resource is mapped persistently and coherently. We can copy
1140 * without unmapping.
1141 */
1142 if (xfer->method == ILO_TRANSFER_MAP_STAGING &&
1143 (xfer->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT))
1144 copy_staging_resource(ilo, xfer, box);
1145 }
1146
1147 static void
1148 ilo_transfer_unmap(struct pipe_context *pipe,
1149 struct pipe_transfer *transfer)
1150 {
1151 struct ilo_context *ilo = ilo_context(pipe);
1152 struct ilo_transfer *xfer = ilo_transfer(transfer);
1153
1154 xfer_unmap(xfer);
1155
1156 switch (xfer->method) {
1157 case ILO_TRANSFER_MAP_STAGING:
1158 if (!(xfer->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT))
1159 copy_staging_resource(ilo, xfer, NULL);
1160 pipe_resource_reference(&xfer->staging.res, NULL);
1161 break;
1162 case ILO_TRANSFER_MAP_SW_CONVERT:
1163 case ILO_TRANSFER_MAP_SW_ZS:
1164 tex_staging_sys_writeback(xfer);
1165 align_free(xfer->staging.sys);
1166 break;
1167 default:
1168 break;
1169 }
1170
1171 pipe_resource_reference(&xfer->base.resource, NULL);
1172
1173 util_slab_free(&ilo->transfer_mempool, xfer);
1174 }
1175
1176 static void *
1177 ilo_transfer_map(struct pipe_context *pipe,
1178 struct pipe_resource *res,
1179 unsigned level,
1180 unsigned usage,
1181 const struct pipe_box *box,
1182 struct pipe_transfer **transfer)
1183 {
1184 struct ilo_context *ilo = ilo_context(pipe);
1185 struct ilo_transfer *xfer;
1186 void *ptr;
1187
1188 /* note that xfer is not zero'd */
1189 xfer = util_slab_alloc(&ilo->transfer_mempool);
1190 if (!xfer) {
1191 *transfer = NULL;
1192 return NULL;
1193 }
1194
1195 xfer->base.resource = NULL;
1196 pipe_resource_reference(&xfer->base.resource, res);
1197 xfer->base.level = level;
1198 xfer->base.usage = usage;
1199 xfer->base.box = *box;
1200
1201 ilo_blit_resolve_transfer(ilo, &xfer->base);
1202
1203 if (choose_transfer_method(ilo, xfer)) {
1204 if (res->target == PIPE_BUFFER)
1205 ptr = buf_map(xfer);
1206 else
1207 ptr = tex_map(xfer);
1208 }
1209 else {
1210 ptr = NULL;
1211 }
1212
1213 if (!ptr) {
1214 pipe_resource_reference(&xfer->base.resource, NULL);
1215 util_slab_free(&ilo->transfer_mempool, xfer);
1216 *transfer = NULL;
1217 return NULL;
1218 }
1219
1220 *transfer = &xfer->base;
1221
1222 return ptr;
1223 }
1224
1225 static void
1226 ilo_transfer_inline_write(struct pipe_context *pipe,
1227 struct pipe_resource *res,
1228 unsigned level,
1229 unsigned usage,
1230 const struct pipe_box *box,
1231 const void *data,
1232 unsigned stride,
1233 unsigned layer_stride)
1234 {
1235 if (likely(res->target == PIPE_BUFFER) &&
1236 !(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
1237 /* they should specify just an offset and a size */
1238 assert(level == 0);
1239 assert(box->y == 0);
1240 assert(box->z == 0);
1241 assert(box->height == 1);
1242 assert(box->depth == 1);
1243
1244 buf_pwrite(ilo_context(pipe), res,
1245 usage, box->x, box->width, data);
1246 }
1247 else {
1248 u_default_transfer_inline_write(pipe, res,
1249 level, usage, box, data, stride, layer_stride);
1250 }
1251 }
1252
1253 /**
1254 * Initialize transfer-related functions.
1255 */
1256 void
1257 ilo_init_transfer_functions(struct ilo_context *ilo)
1258 {
1259 ilo->base.transfer_map = ilo_transfer_map;
1260 ilo->base.transfer_flush_region = ilo_transfer_flush_region;
1261 ilo->base.transfer_unmap = ilo_transfer_unmap;
1262 ilo->base.transfer_inline_write = ilo_transfer_inline_write;
1263 }