e80ed8bda9ca8b5fca1de0f8686a65a4c4d02b5f
[mesa.git] / src / gallium / drivers / ilo / ilo_transfer.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "util/u_surface.h"
29 #include "util/u_transfer.h"
30 #include "util/u_format_etc.h"
31
32 #include "ilo_blit.h"
33 #include "ilo_blitter.h"
34 #include "ilo_cp.h"
35 #include "ilo_context.h"
36 #include "ilo_resource.h"
37 #include "ilo_state.h"
38 #include "ilo_transfer.h"
39
40 /*
41 * For buffers that are not busy, we want to map/unmap them directly. For
42 * those that are busy, we have to worry about synchronization. We could wait
43 * for GPU to finish, but there are cases where we could avoid waiting.
44 *
45 * - When PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE is set, the contents of the
46 * buffer can be discarded. We can replace the backing bo by a new one of
47 * the same size (renaming).
48 * - When PIPE_TRANSFER_DISCARD_RANGE is set, the contents of the mapped
49 * range can be discarded. We can allocate and map a staging bo on
50 * mapping, and (pipelined-)copy it over to the real bo on unmapping.
51 * - When PIPE_TRANSFER_FLUSH_EXPLICIT is set, there is no reading and only
52 * flushed regions need to be written. We can still allocate and map a
53 * staging bo, but should copy only the flushed regions over.
54 *
55 * However, there are other flags to consider.
56 *
57 * - When PIPE_TRANSFER_UNSYNCHRONIZED is set, we do not need to worry about
58 * synchronization at all on mapping.
59 * - When PIPE_TRANSFER_MAP_DIRECTLY is set, no staging area is allowed.
60 * - When PIPE_TRANSFER_DONTBLOCK is set, we should fail if we have to block.
61 * - When PIPE_TRANSFER_PERSISTENT is set, GPU may access the buffer while it
62 * is mapped. Synchronization is done by defining memory barriers,
63 * explicitly via memory_barrier() or implicitly via
64 * transfer_flush_region(), as well as GPU fences.
65 * - When PIPE_TRANSFER_COHERENT is set, updates by either CPU or GPU should
66 * be made visible to the other side immediately. Since the kernel flushes
67 * GPU caches at the end of each batch buffer, CPU always sees GPU updates.
68 * We could use a coherent mapping to make all persistent mappings
69 * coherent.
70 *
71 * These also apply to textures, except that we may additionally need to do
72 * format conversion or tiling/untiling.
73 */
74
75 /**
76 * Return a transfer method suitable for the usage. The returned method will
77 * correctly block when the resource is busy.
78 */
79 static bool
80 resource_get_transfer_method(struct pipe_resource *res,
81 const struct pipe_transfer *transfer,
82 enum ilo_transfer_map_method *method)
83 {
84 const struct ilo_screen *is = ilo_screen(res->screen);
85 const unsigned usage = transfer->usage;
86 enum ilo_transfer_map_method m;
87 bool tiled;
88
89 if (res->target == PIPE_BUFFER) {
90 tiled = false;
91 } else {
92 struct ilo_texture *tex = ilo_texture(res);
93 bool need_convert = false;
94
95 /* we may need to convert on the fly */
96 if (tex->image.tiling == GEN8_TILING_W || tex->separate_s8) {
97 /* on GEN6, separate stencil is enabled only when HiZ is */
98 if (ilo_dev_gen(&is->dev) >= ILO_GEN(7) ||
99 ilo_texture_can_enable_hiz(tex, transfer->level,
100 transfer->box.z, transfer->box.depth)) {
101 m = ILO_TRANSFER_MAP_SW_ZS;
102 need_convert = true;
103 }
104 } else if (tex->image.format != tex->base.format) {
105 m = ILO_TRANSFER_MAP_SW_CONVERT;
106 need_convert = true;
107 }
108
109 if (need_convert) {
110 if (usage & (PIPE_TRANSFER_MAP_DIRECTLY | PIPE_TRANSFER_PERSISTENT))
111 return false;
112
113 *method = m;
114 return true;
115 }
116
117 tiled = (tex->image.tiling != GEN6_TILING_NONE);
118 }
119
120 if (tiled)
121 m = ILO_TRANSFER_MAP_GTT; /* to have a linear view */
122 else if (is->dev.has_llc)
123 m = ILO_TRANSFER_MAP_CPU; /* fast and mostly coherent */
124 else if (usage & PIPE_TRANSFER_PERSISTENT)
125 m = ILO_TRANSFER_MAP_GTT; /* for coherency */
126 else if (usage & PIPE_TRANSFER_READ)
127 m = ILO_TRANSFER_MAP_CPU; /* gtt read is too slow */
128 else
129 m = ILO_TRANSFER_MAP_GTT;
130
131 *method = m;
132
133 return true;
134 }
135
136 /**
137 * Rename the bo of the resource.
138 */
139 static bool
140 resource_rename_bo(struct pipe_resource *res)
141 {
142 return (res->target == PIPE_BUFFER) ?
143 ilo_buffer_rename_bo(ilo_buffer(res)) :
144 ilo_texture_rename_bo(ilo_texture(res));
145 }
146
147 /**
148 * Return true if usage allows the use of staging bo to avoid blocking.
149 */
150 static bool
151 usage_allows_staging_bo(unsigned usage)
152 {
153 /* do we know how to write the data back to the resource? */
154 const unsigned can_writeback = (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
155 PIPE_TRANSFER_DISCARD_RANGE |
156 PIPE_TRANSFER_FLUSH_EXPLICIT);
157 const unsigned reasons_against = (PIPE_TRANSFER_READ |
158 PIPE_TRANSFER_MAP_DIRECTLY |
159 PIPE_TRANSFER_PERSISTENT);
160
161 return (usage & can_writeback) && !(usage & reasons_against);
162 }
163
164 /**
165 * Allocate the staging resource. It is always linear and its size matches
166 * the transfer box, with proper paddings.
167 */
168 static bool
169 xfer_alloc_staging_res(struct ilo_transfer *xfer)
170 {
171 const struct pipe_resource *res = xfer->base.resource;
172 const struct pipe_box *box = &xfer->base.box;
173 struct pipe_resource templ;
174
175 memset(&templ, 0, sizeof(templ));
176
177 templ.format = res->format;
178
179 if (res->target == PIPE_BUFFER) {
180 templ.target = PIPE_BUFFER;
181 templ.width0 =
182 (box->x % ILO_TRANSFER_MAP_BUFFER_ALIGNMENT) + box->width;
183 }
184 else {
185 /* use 2D array for any texture target */
186 templ.target = PIPE_TEXTURE_2D_ARRAY;
187 templ.width0 = box->width;
188 }
189
190 templ.height0 = box->height;
191 templ.depth0 = 1;
192 templ.array_size = box->depth;
193 templ.nr_samples = 1;
194 templ.usage = PIPE_USAGE_STAGING;
195 templ.bind = PIPE_BIND_TRANSFER_WRITE;
196
197 if (xfer->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT) {
198 templ.flags = PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
199 PIPE_RESOURCE_FLAG_MAP_COHERENT;
200 }
201
202 xfer->staging.res = res->screen->resource_create(res->screen, &templ);
203
204 if (xfer->staging.res && xfer->staging.res->target != PIPE_BUFFER) {
205 assert(ilo_texture(xfer->staging.res)->image.tiling ==
206 GEN6_TILING_NONE);
207 }
208
209 return (xfer->staging.res != NULL);
210 }
211
212 /**
213 * Use an alternative transfer method or rename the resource to unblock an
214 * otherwise blocking transfer.
215 */
216 static bool
217 xfer_unblock(struct ilo_transfer *xfer, bool *resource_renamed)
218 {
219 struct pipe_resource *res = xfer->base.resource;
220 bool unblocked = false, renamed = false;
221
222 switch (xfer->method) {
223 case ILO_TRANSFER_MAP_CPU:
224 case ILO_TRANSFER_MAP_GTT:
225 if (xfer->base.usage & PIPE_TRANSFER_UNSYNCHRONIZED) {
226 xfer->method = ILO_TRANSFER_MAP_GTT_ASYNC;
227 unblocked = true;
228 }
229 else if ((xfer->base.usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) &&
230 resource_rename_bo(res)) {
231 renamed = true;
232 unblocked = true;
233 }
234 else if (usage_allows_staging_bo(xfer->base.usage) &&
235 xfer_alloc_staging_res(xfer)) {
236 xfer->method = ILO_TRANSFER_MAP_STAGING;
237 unblocked = true;
238 }
239 break;
240 case ILO_TRANSFER_MAP_GTT_ASYNC:
241 case ILO_TRANSFER_MAP_STAGING:
242 unblocked = true;
243 break;
244 default:
245 break;
246 }
247
248 *resource_renamed = renamed;
249
250 return unblocked;
251 }
252
253 /**
254 * Allocate the staging system buffer based on the resource format and the
255 * transfer box.
256 */
257 static bool
258 xfer_alloc_staging_sys(struct ilo_transfer *xfer)
259 {
260 const enum pipe_format format = xfer->base.resource->format;
261 const struct pipe_box *box = &xfer->base.box;
262 const unsigned alignment = 64;
263
264 /* need to tell the world the layout */
265 xfer->base.stride =
266 align(util_format_get_stride(format, box->width), alignment);
267 xfer->base.layer_stride =
268 util_format_get_2d_size(format, xfer->base.stride, box->height);
269
270 xfer->staging.sys =
271 align_malloc(xfer->base.layer_stride * box->depth, alignment);
272
273 return (xfer->staging.sys != NULL);
274 }
275
276 /**
277 * Map according to the method. The staging system buffer should have been
278 * allocated if the method requires it.
279 */
280 static void *
281 xfer_map(struct ilo_transfer *xfer)
282 {
283 void *ptr;
284
285 switch (xfer->method) {
286 case ILO_TRANSFER_MAP_CPU:
287 ptr = intel_bo_map(ilo_resource_get_bo(xfer->base.resource),
288 xfer->base.usage & PIPE_TRANSFER_WRITE);
289 break;
290 case ILO_TRANSFER_MAP_GTT:
291 ptr = intel_bo_map_gtt(ilo_resource_get_bo(xfer->base.resource));
292 break;
293 case ILO_TRANSFER_MAP_GTT_ASYNC:
294 ptr = intel_bo_map_gtt_async(ilo_resource_get_bo(xfer->base.resource));
295 break;
296 case ILO_TRANSFER_MAP_STAGING:
297 {
298 const struct ilo_screen *is = ilo_screen(xfer->staging.res->screen);
299 struct intel_bo *bo = ilo_resource_get_bo(xfer->staging.res);
300
301 /*
302 * We want a writable, optionally persistent and coherent, mapping
303 * for a linear bo. We can call resource_get_transfer_method(), but
304 * this turns out to be fairly simple.
305 */
306 if (is->dev.has_llc)
307 ptr = intel_bo_map(bo, true);
308 else
309 ptr = intel_bo_map_gtt(bo);
310
311 if (ptr && xfer->staging.res->target == PIPE_BUFFER)
312 ptr += (xfer->base.box.x % ILO_TRANSFER_MAP_BUFFER_ALIGNMENT);
313
314 }
315 break;
316 case ILO_TRANSFER_MAP_SW_CONVERT:
317 case ILO_TRANSFER_MAP_SW_ZS:
318 ptr = xfer->staging.sys;
319 break;
320 default:
321 assert(!"unknown mapping method");
322 ptr = NULL;
323 break;
324 }
325
326 return ptr;
327 }
328
329 /**
330 * Unmap a transfer.
331 */
332 static void
333 xfer_unmap(struct ilo_transfer *xfer)
334 {
335 switch (xfer->method) {
336 case ILO_TRANSFER_MAP_CPU:
337 case ILO_TRANSFER_MAP_GTT:
338 case ILO_TRANSFER_MAP_GTT_ASYNC:
339 intel_bo_unmap(ilo_resource_get_bo(xfer->base.resource));
340 break;
341 case ILO_TRANSFER_MAP_STAGING:
342 intel_bo_unmap(ilo_resource_get_bo(xfer->staging.res));
343 break;
344 default:
345 break;
346 }
347 }
348
349 static void
350 tex_get_box_origin(const struct ilo_texture *tex,
351 unsigned level, unsigned slice,
352 const struct pipe_box *box,
353 unsigned *mem_x, unsigned *mem_y)
354 {
355 unsigned x, y;
356
357 ilo_image_get_slice_pos(&tex->image, level, box->z + slice, &x, &y);
358 x += box->x;
359 y += box->y;
360
361 ilo_image_pos_to_mem(&tex->image, x, y, mem_x, mem_y);
362 }
363
364 static unsigned
365 tex_get_box_offset(const struct ilo_texture *tex, unsigned level,
366 const struct pipe_box *box)
367 {
368 unsigned mem_x, mem_y;
369
370 tex_get_box_origin(tex, level, 0, box, &mem_x, &mem_y);
371
372 return ilo_image_mem_to_linear(&tex->image, mem_x, mem_y);
373 }
374
375 static unsigned
376 tex_get_slice_stride(const struct ilo_texture *tex, unsigned level)
377 {
378 return ilo_image_get_slice_stride(&tex->image, level);
379 }
380
381 static unsigned
382 tex_tile_x_swizzle(unsigned addr)
383 {
384 /*
385 * From the Ivy Bridge PRM, volume 1 part 2, page 24:
386 *
387 * "As shown in the tiling algorithm, the new address bit[6] should be:
388 *
389 * Address bit[6] <= TiledAddr bit[6] XOR
390 * TiledAddr bit[9] XOR
391 * TiledAddr bit[10]"
392 */
393 return addr ^ (((addr >> 3) ^ (addr >> 4)) & 0x40);
394 }
395
396 static unsigned
397 tex_tile_y_swizzle(unsigned addr)
398 {
399 /*
400 * From the Ivy Bridge PRM, volume 1 part 2, page 24:
401 *
402 * "As shown in the tiling algorithm, The new address bit[6] becomes:
403 *
404 * Address bit[6] <= TiledAddr bit[6] XOR
405 * TiledAddr bit[9]"
406 */
407 return addr ^ ((addr >> 3) & 0x40);
408 }
409
410 static unsigned
411 tex_tile_x_offset(unsigned mem_x, unsigned mem_y,
412 unsigned tiles_per_row, bool swizzle)
413 {
414 /*
415 * From the Sandy Bridge PRM, volume 1 part 2, page 21, we know that a
416 * X-major tile has 8 rows and 32 OWord columns (512 bytes). Tiles in the
417 * tiled region are numbered in row-major order, starting from zero. The
418 * tile number can thus be calculated as follows:
419 *
420 * tile = (mem_y / 8) * tiles_per_row + (mem_x / 512)
421 *
422 * OWords in that tile are also numbered in row-major order, starting from
423 * zero. The OWord number can thus be calculated as follows:
424 *
425 * oword = (mem_y % 8) * 32 + ((mem_x % 512) / 16)
426 *
427 * and the tiled offset is
428 *
429 * offset = tile * 4096 + oword * 16 + (mem_x % 16)
430 * = tile * 4096 + (mem_y % 8) * 512 + (mem_x % 512)
431 */
432 unsigned tile, offset;
433
434 tile = (mem_y >> 3) * tiles_per_row + (mem_x >> 9);
435 offset = tile << 12 | (mem_y & 0x7) << 9 | (mem_x & 0x1ff);
436
437 return (swizzle) ? tex_tile_x_swizzle(offset) : offset;
438 }
439
440 static unsigned
441 tex_tile_y_offset(unsigned mem_x, unsigned mem_y,
442 unsigned tiles_per_row, bool swizzle)
443 {
444 /*
445 * From the Sandy Bridge PRM, volume 1 part 2, page 22, we know that a
446 * Y-major tile has 32 rows and 8 OWord columns (128 bytes). Tiles in the
447 * tiled region are numbered in row-major order, starting from zero. The
448 * tile number can thus be calculated as follows:
449 *
450 * tile = (mem_y / 32) * tiles_per_row + (mem_x / 128)
451 *
452 * OWords in that tile are numbered in column-major order, starting from
453 * zero. The OWord number can thus be calculated as follows:
454 *
455 * oword = ((mem_x % 128) / 16) * 32 + (mem_y % 32)
456 *
457 * and the tiled offset is
458 *
459 * offset = tile * 4096 + oword * 16 + (mem_x % 16)
460 */
461 unsigned tile, oword, offset;
462
463 tile = (mem_y >> 5) * tiles_per_row + (mem_x >> 7);
464 oword = (mem_x & 0x70) << 1 | (mem_y & 0x1f);
465 offset = tile << 12 | oword << 4 | (mem_x & 0xf);
466
467 return (swizzle) ? tex_tile_y_swizzle(offset) : offset;
468 }
469
470 static unsigned
471 tex_tile_w_offset(unsigned mem_x, unsigned mem_y,
472 unsigned tiles_per_row, bool swizzle)
473 {
474 /*
475 * From the Sandy Bridge PRM, volume 1 part 2, page 23, we know that a
476 * W-major tile has 8 8x8-block rows and 8 8x8-block columns. Tiles in the
477 * tiled region are numbered in row-major order, starting from zero. The
478 * tile number can thus be calculated as follows:
479 *
480 * tile = (mem_y / 64) * tiles_per_row + (mem_x / 64)
481 *
482 * 8x8-blocks in that tile are numbered in column-major order, starting
483 * from zero. The 8x8-block number can thus be calculated as follows:
484 *
485 * blk8 = ((mem_x % 64) / 8) * 8 + ((mem_y % 64) / 8)
486 *
487 * Each 8x8-block is divided into 4 4x4-blocks, in row-major order. Each
488 * 4x4-block is further divided into 4 2x2-blocks, also in row-major order.
489 * We have
490 *
491 * blk4 = (((mem_y % 64) / 4) & 1) * 2 + (((mem_x % 64) / 4) & 1)
492 * blk2 = (((mem_y % 64) / 2) & 1) * 2 + (((mem_x % 64) / 2) & 1)
493 * blk1 = (((mem_y % 64) ) & 1) * 2 + (((mem_x % 64) ) & 1)
494 *
495 * and the tiled offset is
496 *
497 * offset = tile * 4096 + blk8 * 64 + blk4 * 16 + blk2 * 4 + blk1
498 */
499 unsigned tile, blk8, blk4, blk2, blk1, offset;
500
501 tile = (mem_y >> 6) * tiles_per_row + (mem_x >> 6);
502 blk8 = ((mem_x >> 3) & 0x7) << 3 | ((mem_y >> 3) & 0x7);
503 blk4 = ((mem_y >> 2) & 0x1) << 1 | ((mem_x >> 2) & 0x1);
504 blk2 = ((mem_y >> 1) & 0x1) << 1 | ((mem_x >> 1) & 0x1);
505 blk1 = ((mem_y ) & 0x1) << 1 | ((mem_x ) & 0x1);
506 offset = tile << 12 | blk8 << 6 | blk4 << 4 | blk2 << 2 | blk1;
507
508 return (swizzle) ? tex_tile_y_swizzle(offset) : offset;
509 }
510
511 static unsigned
512 tex_tile_none_offset(unsigned mem_x, unsigned mem_y,
513 unsigned tiles_per_row, bool swizzle)
514 {
515 return mem_y * tiles_per_row + mem_x;
516 }
517
518 typedef unsigned (*tex_tile_offset_func)(unsigned mem_x, unsigned mem_y,
519 unsigned tiles_per_row,
520 bool swizzle);
521
522 static tex_tile_offset_func
523 tex_tile_choose_offset_func(const struct ilo_texture *tex,
524 unsigned *tiles_per_row)
525 {
526 switch (tex->image.tiling) {
527 default:
528 assert(!"unknown tiling");
529 /* fall through */
530 case GEN6_TILING_NONE:
531 *tiles_per_row = tex->image.bo_stride;
532 return tex_tile_none_offset;
533 case GEN6_TILING_X:
534 *tiles_per_row = tex->image.bo_stride / 512;
535 return tex_tile_x_offset;
536 case GEN6_TILING_Y:
537 *tiles_per_row = tex->image.bo_stride / 128;
538 return tex_tile_y_offset;
539 case GEN8_TILING_W:
540 *tiles_per_row = tex->image.bo_stride / 64;
541 return tex_tile_w_offset;
542 }
543 }
544
545 static void *
546 tex_staging_sys_map_bo(struct ilo_texture *tex,
547 bool for_read_back,
548 bool linear_view)
549 {
550 const struct ilo_screen *is = ilo_screen(tex->base.screen);
551 const bool prefer_cpu = (is->dev.has_llc || for_read_back);
552 void *ptr;
553
554 if (prefer_cpu && (tex->image.tiling == GEN6_TILING_NONE ||
555 !linear_view))
556 ptr = intel_bo_map(tex->bo, !for_read_back);
557 else
558 ptr = intel_bo_map_gtt(tex->bo);
559
560 return ptr;
561 }
562
563 static void
564 tex_staging_sys_unmap_bo(struct ilo_texture *tex)
565 {
566 intel_bo_unmap(tex->bo);
567 }
568
569 static bool
570 tex_staging_sys_zs_read(struct ilo_texture *tex,
571 const struct ilo_transfer *xfer)
572 {
573 const struct ilo_screen *is = ilo_screen(tex->base.screen);
574 const bool swizzle = is->dev.has_address_swizzling;
575 const struct pipe_box *box = &xfer->base.box;
576 const uint8_t *src;
577 tex_tile_offset_func tile_offset;
578 unsigned tiles_per_row;
579 int slice;
580
581 src = tex_staging_sys_map_bo(tex, true, false);
582 if (!src)
583 return false;
584
585 tile_offset = tex_tile_choose_offset_func(tex, &tiles_per_row);
586
587 assert(tex->image.block_width == 1 && tex->image.block_height == 1);
588
589 if (tex->separate_s8) {
590 struct ilo_texture *s8_tex = tex->separate_s8;
591 const uint8_t *s8_src;
592 tex_tile_offset_func s8_tile_offset;
593 unsigned s8_tiles_per_row;
594 int dst_cpp, dst_s8_pos, src_cpp_used;
595
596 s8_src = tex_staging_sys_map_bo(s8_tex, true, false);
597 if (!s8_src) {
598 tex_staging_sys_unmap_bo(tex);
599 return false;
600 }
601
602 s8_tile_offset = tex_tile_choose_offset_func(s8_tex, &s8_tiles_per_row);
603
604 if (tex->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
605 assert(tex->image.format == PIPE_FORMAT_Z24X8_UNORM);
606
607 dst_cpp = 4;
608 dst_s8_pos = 3;
609 src_cpp_used = 3;
610 }
611 else {
612 assert(tex->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
613 assert(tex->image.format == PIPE_FORMAT_Z32_FLOAT);
614
615 dst_cpp = 8;
616 dst_s8_pos = 4;
617 src_cpp_used = 4;
618 }
619
620 for (slice = 0; slice < box->depth; slice++) {
621 unsigned mem_x, mem_y, s8_mem_x, s8_mem_y;
622 uint8_t *dst;
623 int i, j;
624
625 tex_get_box_origin(tex, xfer->base.level, slice,
626 box, &mem_x, &mem_y);
627 tex_get_box_origin(s8_tex, xfer->base.level, slice,
628 box, &s8_mem_x, &s8_mem_y);
629
630 dst = xfer->staging.sys + xfer->base.layer_stride * slice;
631
632 for (i = 0; i < box->height; i++) {
633 unsigned x = mem_x, s8_x = s8_mem_x;
634 uint8_t *d = dst;
635
636 for (j = 0; j < box->width; j++) {
637 const unsigned offset =
638 tile_offset(x, mem_y, tiles_per_row, swizzle);
639 const unsigned s8_offset =
640 s8_tile_offset(s8_x, s8_mem_y, s8_tiles_per_row, swizzle);
641
642 memcpy(d, src + offset, src_cpp_used);
643 d[dst_s8_pos] = s8_src[s8_offset];
644
645 d += dst_cpp;
646 x += tex->image.block_size;
647 s8_x++;
648 }
649
650 dst += xfer->base.stride;
651 mem_y++;
652 s8_mem_y++;
653 }
654 }
655
656 tex_staging_sys_unmap_bo(s8_tex);
657 }
658 else {
659 assert(tex->image.format == PIPE_FORMAT_S8_UINT);
660
661 for (slice = 0; slice < box->depth; slice++) {
662 unsigned mem_x, mem_y;
663 uint8_t *dst;
664 int i, j;
665
666 tex_get_box_origin(tex, xfer->base.level, slice,
667 box, &mem_x, &mem_y);
668
669 dst = xfer->staging.sys + xfer->base.layer_stride * slice;
670
671 for (i = 0; i < box->height; i++) {
672 unsigned x = mem_x;
673 uint8_t *d = dst;
674
675 for (j = 0; j < box->width; j++) {
676 const unsigned offset =
677 tile_offset(x, mem_y, tiles_per_row, swizzle);
678
679 *d = src[offset];
680
681 d++;
682 x++;
683 }
684
685 dst += xfer->base.stride;
686 mem_y++;
687 }
688 }
689 }
690
691 tex_staging_sys_unmap_bo(tex);
692
693 return true;
694 }
695
696 static bool
697 tex_staging_sys_zs_write(struct ilo_texture *tex,
698 const struct ilo_transfer *xfer)
699 {
700 const struct ilo_screen *is = ilo_screen(tex->base.screen);
701 const bool swizzle = is->dev.has_address_swizzling;
702 const struct pipe_box *box = &xfer->base.box;
703 uint8_t *dst;
704 tex_tile_offset_func tile_offset;
705 unsigned tiles_per_row;
706 int slice;
707
708 dst = tex_staging_sys_map_bo(tex, false, false);
709 if (!dst)
710 return false;
711
712 tile_offset = tex_tile_choose_offset_func(tex, &tiles_per_row);
713
714 assert(tex->image.block_width == 1 && tex->image.block_height == 1);
715
716 if (tex->separate_s8) {
717 struct ilo_texture *s8_tex = tex->separate_s8;
718 uint8_t *s8_dst;
719 tex_tile_offset_func s8_tile_offset;
720 unsigned s8_tiles_per_row;
721 int src_cpp, src_s8_pos, dst_cpp_used;
722
723 s8_dst = tex_staging_sys_map_bo(s8_tex, false, false);
724 if (!s8_dst) {
725 tex_staging_sys_unmap_bo(s8_tex);
726 return false;
727 }
728
729 s8_tile_offset = tex_tile_choose_offset_func(s8_tex, &s8_tiles_per_row);
730
731 if (tex->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
732 assert(tex->image.format == PIPE_FORMAT_Z24X8_UNORM);
733
734 src_cpp = 4;
735 src_s8_pos = 3;
736 dst_cpp_used = 3;
737 }
738 else {
739 assert(tex->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
740 assert(tex->image.format == PIPE_FORMAT_Z32_FLOAT);
741
742 src_cpp = 8;
743 src_s8_pos = 4;
744 dst_cpp_used = 4;
745 }
746
747 for (slice = 0; slice < box->depth; slice++) {
748 unsigned mem_x, mem_y, s8_mem_x, s8_mem_y;
749 const uint8_t *src;
750 int i, j;
751
752 tex_get_box_origin(tex, xfer->base.level, slice,
753 box, &mem_x, &mem_y);
754 tex_get_box_origin(s8_tex, xfer->base.level, slice,
755 box, &s8_mem_x, &s8_mem_y);
756
757 src = xfer->staging.sys + xfer->base.layer_stride * slice;
758
759 for (i = 0; i < box->height; i++) {
760 unsigned x = mem_x, s8_x = s8_mem_x;
761 const uint8_t *s = src;
762
763 for (j = 0; j < box->width; j++) {
764 const unsigned offset =
765 tile_offset(x, mem_y, tiles_per_row, swizzle);
766 const unsigned s8_offset =
767 s8_tile_offset(s8_x, s8_mem_y, s8_tiles_per_row, swizzle);
768
769 memcpy(dst + offset, s, dst_cpp_used);
770 s8_dst[s8_offset] = s[src_s8_pos];
771
772 s += src_cpp;
773 x += tex->image.block_size;
774 s8_x++;
775 }
776
777 src += xfer->base.stride;
778 mem_y++;
779 s8_mem_y++;
780 }
781 }
782
783 tex_staging_sys_unmap_bo(s8_tex);
784 }
785 else {
786 assert(tex->image.format == PIPE_FORMAT_S8_UINT);
787
788 for (slice = 0; slice < box->depth; slice++) {
789 unsigned mem_x, mem_y;
790 const uint8_t *src;
791 int i, j;
792
793 tex_get_box_origin(tex, xfer->base.level, slice,
794 box, &mem_x, &mem_y);
795
796 src = xfer->staging.sys + xfer->base.layer_stride * slice;
797
798 for (i = 0; i < box->height; i++) {
799 unsigned x = mem_x;
800 const uint8_t *s = src;
801
802 for (j = 0; j < box->width; j++) {
803 const unsigned offset =
804 tile_offset(x, mem_y, tiles_per_row, swizzle);
805
806 dst[offset] = *s;
807
808 s++;
809 x++;
810 }
811
812 src += xfer->base.stride;
813 mem_y++;
814 }
815 }
816 }
817
818 tex_staging_sys_unmap_bo(tex);
819
820 return true;
821 }
822
823 static bool
824 tex_staging_sys_convert_write(struct ilo_texture *tex,
825 const struct ilo_transfer *xfer)
826 {
827 const struct pipe_box *box = &xfer->base.box;
828 unsigned dst_slice_stride;
829 void *dst;
830 int slice;
831
832 dst = tex_staging_sys_map_bo(tex, false, true);
833 if (!dst)
834 return false;
835
836 dst += tex_get_box_offset(tex, xfer->base.level, box);
837
838 /* slice stride is not always available */
839 if (box->depth > 1)
840 dst_slice_stride = tex_get_slice_stride(tex, xfer->base.level);
841 else
842 dst_slice_stride = 0;
843
844 if (unlikely(tex->image.format == tex->base.format)) {
845 util_copy_box(dst, tex->image.format, tex->image.bo_stride,
846 dst_slice_stride, 0, 0, 0, box->width, box->height, box->depth,
847 xfer->staging.sys, xfer->base.stride, xfer->base.layer_stride,
848 0, 0, 0);
849
850 tex_staging_sys_unmap_bo(tex);
851
852 return true;
853 }
854
855 switch (tex->base.format) {
856 case PIPE_FORMAT_ETC1_RGB8:
857 assert(tex->image.format == PIPE_FORMAT_R8G8B8X8_UNORM);
858
859 for (slice = 0; slice < box->depth; slice++) {
860 const void *src =
861 xfer->staging.sys + xfer->base.layer_stride * slice;
862
863 util_format_etc1_rgb8_unpack_rgba_8unorm(dst,
864 tex->image.bo_stride, src, xfer->base.stride,
865 box->width, box->height);
866
867 dst += dst_slice_stride;
868 }
869 break;
870 default:
871 assert(!"unable to convert the staging data");
872 break;
873 }
874
875 tex_staging_sys_unmap_bo(tex);
876
877 return true;
878 }
879
880 static void
881 tex_staging_sys_writeback(struct ilo_transfer *xfer)
882 {
883 struct ilo_texture *tex = ilo_texture(xfer->base.resource);
884 bool success;
885
886 if (!(xfer->base.usage & PIPE_TRANSFER_WRITE))
887 return;
888
889 switch (xfer->method) {
890 case ILO_TRANSFER_MAP_SW_CONVERT:
891 success = tex_staging_sys_convert_write(tex, xfer);
892 break;
893 case ILO_TRANSFER_MAP_SW_ZS:
894 success = tex_staging_sys_zs_write(tex, xfer);
895 break;
896 default:
897 assert(!"unknown mapping method");
898 success = false;
899 break;
900 }
901
902 if (!success)
903 ilo_err("failed to map resource for moving staging data\n");
904 }
905
906 static bool
907 tex_staging_sys_readback(struct ilo_transfer *xfer)
908 {
909 struct ilo_texture *tex = ilo_texture(xfer->base.resource);
910 bool read_back = false, success;
911
912 /* see if we need to read the resource back */
913 if (xfer->base.usage & PIPE_TRANSFER_READ) {
914 read_back = true;
915 }
916 else if (xfer->base.usage & PIPE_TRANSFER_WRITE) {
917 const unsigned discard_flags =
918 (PIPE_TRANSFER_DISCARD_RANGE | PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE);
919
920 if (!(xfer->base.usage & discard_flags))
921 read_back = true;
922 }
923
924 if (!read_back)
925 return true;
926
927 switch (xfer->method) {
928 case ILO_TRANSFER_MAP_SW_CONVERT:
929 assert(!"no on-the-fly format conversion for mapping");
930 success = false;
931 break;
932 case ILO_TRANSFER_MAP_SW_ZS:
933 success = tex_staging_sys_zs_read(tex, xfer);
934 break;
935 default:
936 assert(!"unknown mapping method");
937 success = false;
938 break;
939 }
940
941 return success;
942 }
943
944 static void *
945 tex_map(struct ilo_transfer *xfer)
946 {
947 void *ptr;
948
949 switch (xfer->method) {
950 case ILO_TRANSFER_MAP_CPU:
951 case ILO_TRANSFER_MAP_GTT:
952 case ILO_TRANSFER_MAP_GTT_ASYNC:
953 ptr = xfer_map(xfer);
954 if (ptr) {
955 const struct ilo_texture *tex = ilo_texture(xfer->base.resource);
956
957 ptr += tex_get_box_offset(tex, xfer->base.level, &xfer->base.box);
958
959 /* stride is for a block row, not a texel row */
960 xfer->base.stride = tex->image.bo_stride;
961 /* note that slice stride is not always available */
962 xfer->base.layer_stride = (xfer->base.box.depth > 1) ?
963 tex_get_slice_stride(tex, xfer->base.level) : 0;
964 }
965 break;
966 case ILO_TRANSFER_MAP_STAGING:
967 ptr = xfer_map(xfer);
968 if (ptr) {
969 const struct ilo_texture *staging = ilo_texture(xfer->staging.res);
970 xfer->base.stride = staging->image.bo_stride;
971 xfer->base.layer_stride = tex_get_slice_stride(staging, 0);
972 }
973 break;
974 case ILO_TRANSFER_MAP_SW_CONVERT:
975 case ILO_TRANSFER_MAP_SW_ZS:
976 if (xfer_alloc_staging_sys(xfer) && tex_staging_sys_readback(xfer))
977 ptr = xfer_map(xfer);
978 else
979 ptr = NULL;
980 break;
981 default:
982 assert(!"unknown mapping method");
983 ptr = NULL;
984 break;
985 }
986
987 return ptr;
988 }
989
990 static void *
991 buf_map(struct ilo_transfer *xfer)
992 {
993 void *ptr;
994
995 ptr = xfer_map(xfer);
996 if (!ptr)
997 return NULL;
998
999 if (xfer->method != ILO_TRANSFER_MAP_STAGING)
1000 ptr += xfer->base.box.x;
1001
1002 xfer->base.stride = 0;
1003 xfer->base.layer_stride = 0;
1004
1005 assert(xfer->base.level == 0);
1006 assert(xfer->base.box.y == 0);
1007 assert(xfer->base.box.z == 0);
1008 assert(xfer->base.box.height == 1);
1009 assert(xfer->base.box.depth == 1);
1010
1011 return ptr;
1012 }
1013
1014 static void
1015 copy_staging_resource(struct ilo_context *ilo,
1016 struct ilo_transfer *xfer,
1017 const struct pipe_box *box)
1018 {
1019 const unsigned pad_x = (xfer->staging.res->target == PIPE_BUFFER) ?
1020 xfer->base.box.x % ILO_TRANSFER_MAP_BUFFER_ALIGNMENT : 0;
1021 struct pipe_box modified_box;
1022
1023 assert(xfer->method == ILO_TRANSFER_MAP_STAGING && xfer->staging.res);
1024
1025 if (!box) {
1026 u_box_3d(pad_x, 0, 0, xfer->base.box.width, xfer->base.box.height,
1027 xfer->base.box.depth, &modified_box);
1028 box = &modified_box;
1029 }
1030 else if (pad_x) {
1031 modified_box = *box;
1032 modified_box.x += pad_x;
1033 box = &modified_box;
1034 }
1035
1036 ilo_blitter_blt_copy_resource(ilo->blitter,
1037 xfer->base.resource, xfer->base.level,
1038 xfer->base.box.x, xfer->base.box.y, xfer->base.box.z,
1039 xfer->staging.res, 0, box);
1040 }
1041
1042 static bool
1043 is_bo_busy(struct ilo_context *ilo, struct intel_bo *bo, bool *need_submit)
1044 {
1045 const bool referenced = ilo_builder_has_reloc(&ilo->cp->builder, bo);
1046
1047 if (need_submit)
1048 *need_submit = referenced;
1049
1050 if (referenced)
1051 return true;
1052
1053 return intel_bo_is_busy(bo);
1054 }
1055
1056 /**
1057 * Choose the best mapping method, depending on the transfer usage and whether
1058 * the bo is busy.
1059 */
1060 static bool
1061 choose_transfer_method(struct ilo_context *ilo, struct ilo_transfer *xfer)
1062 {
1063 struct pipe_resource *res = xfer->base.resource;
1064 bool need_submit;
1065
1066 if (!resource_get_transfer_method(res, &xfer->base, &xfer->method))
1067 return false;
1068
1069 /* see if we can avoid blocking */
1070 if (is_bo_busy(ilo, ilo_resource_get_bo(res), &need_submit)) {
1071 bool resource_renamed;
1072
1073 if (!xfer_unblock(xfer, &resource_renamed)) {
1074 if (xfer->base.usage & PIPE_TRANSFER_DONTBLOCK)
1075 return false;
1076
1077 /* submit to make bo really busy and map() correctly blocks */
1078 if (need_submit)
1079 ilo_cp_submit(ilo->cp, "syncing for transfers");
1080 }
1081
1082 if (resource_renamed)
1083 ilo_state_vector_resource_renamed(&ilo->state_vector, res);
1084 }
1085
1086 return true;
1087 }
1088
1089 static void
1090 buf_pwrite(struct ilo_context *ilo, struct ilo_buffer *buf,
1091 unsigned usage, int offset, int size, const void *data)
1092 {
1093 bool need_submit;
1094
1095 /* see if we can avoid blocking */
1096 if (is_bo_busy(ilo, buf->bo, &need_submit)) {
1097 bool unblocked = false;
1098
1099 if ((usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) &&
1100 ilo_buffer_rename_bo(buf)) {
1101 ilo_state_vector_resource_renamed(&ilo->state_vector, &buf->base);
1102 unblocked = true;
1103 }
1104 else {
1105 struct pipe_resource templ, *staging;
1106
1107 /*
1108 * allocate a staging buffer to hold the data and pipelined copy it
1109 * over
1110 */
1111 templ = buf->base;
1112 templ.width0 = size;
1113 templ.usage = PIPE_USAGE_STAGING;
1114 templ.bind = PIPE_BIND_TRANSFER_WRITE;
1115 staging = ilo->base.screen->resource_create(ilo->base.screen, &templ);
1116 if (staging) {
1117 struct pipe_box staging_box;
1118
1119 intel_bo_pwrite(ilo_buffer(staging)->bo, 0, size, data);
1120
1121 u_box_1d(0, size, &staging_box);
1122 ilo_blitter_blt_copy_resource(ilo->blitter,
1123 &buf->base, 0, offset, 0, 0,
1124 staging, 0, &staging_box);
1125
1126 pipe_resource_reference(&staging, NULL);
1127
1128 return;
1129 }
1130 }
1131
1132 /* submit to make bo really busy and pwrite() correctly blocks */
1133 if (!unblocked && need_submit)
1134 ilo_cp_submit(ilo->cp, "syncing for pwrites");
1135 }
1136
1137 intel_bo_pwrite(buf->bo, offset, size, data);
1138 }
1139
1140 static void
1141 ilo_transfer_flush_region(struct pipe_context *pipe,
1142 struct pipe_transfer *transfer,
1143 const struct pipe_box *box)
1144 {
1145 struct ilo_context *ilo = ilo_context(pipe);
1146 struct ilo_transfer *xfer = ilo_transfer(transfer);
1147
1148 /*
1149 * The staging resource is mapped persistently and coherently. We can copy
1150 * without unmapping.
1151 */
1152 if (xfer->method == ILO_TRANSFER_MAP_STAGING &&
1153 (xfer->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT))
1154 copy_staging_resource(ilo, xfer, box);
1155 }
1156
1157 static void
1158 ilo_transfer_unmap(struct pipe_context *pipe,
1159 struct pipe_transfer *transfer)
1160 {
1161 struct ilo_context *ilo = ilo_context(pipe);
1162 struct ilo_transfer *xfer = ilo_transfer(transfer);
1163
1164 xfer_unmap(xfer);
1165
1166 switch (xfer->method) {
1167 case ILO_TRANSFER_MAP_STAGING:
1168 if (!(xfer->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT))
1169 copy_staging_resource(ilo, xfer, NULL);
1170 pipe_resource_reference(&xfer->staging.res, NULL);
1171 break;
1172 case ILO_TRANSFER_MAP_SW_CONVERT:
1173 case ILO_TRANSFER_MAP_SW_ZS:
1174 tex_staging_sys_writeback(xfer);
1175 align_free(xfer->staging.sys);
1176 break;
1177 default:
1178 break;
1179 }
1180
1181 pipe_resource_reference(&xfer->base.resource, NULL);
1182
1183 util_slab_free(&ilo->transfer_mempool, xfer);
1184 }
1185
1186 static void *
1187 ilo_transfer_map(struct pipe_context *pipe,
1188 struct pipe_resource *res,
1189 unsigned level,
1190 unsigned usage,
1191 const struct pipe_box *box,
1192 struct pipe_transfer **transfer)
1193 {
1194 struct ilo_context *ilo = ilo_context(pipe);
1195 struct ilo_transfer *xfer;
1196 void *ptr;
1197
1198 /* note that xfer is not zero'd */
1199 xfer = util_slab_alloc(&ilo->transfer_mempool);
1200 if (!xfer) {
1201 *transfer = NULL;
1202 return NULL;
1203 }
1204
1205 xfer->base.resource = NULL;
1206 pipe_resource_reference(&xfer->base.resource, res);
1207 xfer->base.level = level;
1208 xfer->base.usage = usage;
1209 xfer->base.box = *box;
1210
1211 ilo_blit_resolve_transfer(ilo, &xfer->base);
1212
1213 if (choose_transfer_method(ilo, xfer)) {
1214 if (res->target == PIPE_BUFFER)
1215 ptr = buf_map(xfer);
1216 else
1217 ptr = tex_map(xfer);
1218 }
1219 else {
1220 ptr = NULL;
1221 }
1222
1223 if (!ptr) {
1224 pipe_resource_reference(&xfer->base.resource, NULL);
1225 util_slab_free(&ilo->transfer_mempool, xfer);
1226 *transfer = NULL;
1227 return NULL;
1228 }
1229
1230 *transfer = &xfer->base;
1231
1232 return ptr;
1233 }
1234
1235 static void
1236 ilo_transfer_inline_write(struct pipe_context *pipe,
1237 struct pipe_resource *res,
1238 unsigned level,
1239 unsigned usage,
1240 const struct pipe_box *box,
1241 const void *data,
1242 unsigned stride,
1243 unsigned layer_stride)
1244 {
1245 if (likely(res->target == PIPE_BUFFER) &&
1246 !(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
1247 /* they should specify just an offset and a size */
1248 assert(level == 0);
1249 assert(box->y == 0);
1250 assert(box->z == 0);
1251 assert(box->height == 1);
1252 assert(box->depth == 1);
1253
1254 buf_pwrite(ilo_context(pipe), ilo_buffer(res),
1255 usage, box->x, box->width, data);
1256 }
1257 else {
1258 u_default_transfer_inline_write(pipe, res,
1259 level, usage, box, data, stride, layer_stride);
1260 }
1261 }
1262
1263 /**
1264 * Initialize transfer-related functions.
1265 */
1266 void
1267 ilo_init_transfer_functions(struct ilo_context *ilo)
1268 {
1269 ilo->base.transfer_map = ilo_transfer_map;
1270 ilo->base.transfer_flush_region = ilo_transfer_flush_region;
1271 ilo->base.transfer_unmap = ilo_transfer_unmap;
1272 ilo->base.transfer_inline_write = ilo_transfer_inline_write;
1273 }