nvc0: update GM107 sched control codes format
[mesa.git] / src / gallium / drivers / ilo / ilo_transfer.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "util/u_surface.h"
29 #include "util/u_transfer.h"
30 #include "util/u_format_etc.h"
31
32 #include "ilo_blit.h"
33 #include "ilo_blitter.h"
34 #include "ilo_cp.h"
35 #include "ilo_context.h"
36 #include "ilo_resource.h"
37 #include "ilo_state.h"
38 #include "ilo_transfer.h"
39
40 /*
41 * For buffers that are not busy, we want to map/unmap them directly. For
42 * those that are busy, we have to worry about synchronization. We could wait
43 * for GPU to finish, but there are cases where we could avoid waiting.
44 *
45 * - When PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE is set, the contents of the
46 * buffer can be discarded. We can replace the backing bo by a new one of
47 * the same size (renaming).
48 * - When PIPE_TRANSFER_DISCARD_RANGE is set, the contents of the mapped
49 * range can be discarded. We can allocate and map a staging bo on
50 * mapping, and (pipelined-)copy it over to the real bo on unmapping.
51 * - When PIPE_TRANSFER_FLUSH_EXPLICIT is set, there is no reading and only
52 * flushed regions need to be written. We can still allocate and map a
53 * staging bo, but should copy only the flushed regions over.
54 *
55 * However, there are other flags to consider.
56 *
57 * - When PIPE_TRANSFER_UNSYNCHRONIZED is set, we do not need to worry about
58 * synchronization at all on mapping.
59 * - When PIPE_TRANSFER_MAP_DIRECTLY is set, no staging area is allowed.
60 * - When PIPE_TRANSFER_DONTBLOCK is set, we should fail if we have to block.
61 * - When PIPE_TRANSFER_PERSISTENT is set, GPU may access the buffer while it
62 * is mapped. Synchronization is done by defining memory barriers,
63 * explicitly via memory_barrier() or implicitly via
64 * transfer_flush_region(), as well as GPU fences.
65 * - When PIPE_TRANSFER_COHERENT is set, updates by either CPU or GPU should
66 * be made visible to the other side immediately. Since the kernel flushes
67 * GPU caches at the end of each batch buffer, CPU always sees GPU updates.
68 * We could use a coherent mapping to make all persistent mappings
69 * coherent.
70 *
71 * These also apply to textures, except that we may additionally need to do
72 * format conversion or tiling/untiling.
73 */
74
75 /**
76 * Return a transfer method suitable for the usage. The returned method will
77 * correctly block when the resource is busy.
78 */
79 static bool
80 resource_get_transfer_method(struct pipe_resource *res,
81 const struct pipe_transfer *transfer,
82 enum ilo_transfer_map_method *method)
83 {
84 const struct ilo_screen *is = ilo_screen(res->screen);
85 const unsigned usage = transfer->usage;
86 enum ilo_transfer_map_method m;
87 bool tiled;
88
89 if (res->target == PIPE_BUFFER) {
90 tiled = false;
91 } else {
92 struct ilo_texture *tex = ilo_texture(res);
93 bool need_convert = false;
94
95 /* we may need to convert on the fly */
96 if (tex->image.tiling == GEN8_TILING_W || tex->separate_s8) {
97 /* on GEN6, separate stencil is enabled only when HiZ is */
98 if (ilo_dev_gen(&is->dev) >= ILO_GEN(7) ||
99 ilo_image_can_enable_aux(&tex->image, transfer->level)) {
100 m = ILO_TRANSFER_MAP_SW_ZS;
101 need_convert = true;
102 }
103 } else if (tex->image_format != tex->base.format) {
104 m = ILO_TRANSFER_MAP_SW_CONVERT;
105 need_convert = true;
106 }
107
108 if (need_convert) {
109 if (usage & (PIPE_TRANSFER_MAP_DIRECTLY | PIPE_TRANSFER_PERSISTENT))
110 return false;
111
112 *method = m;
113 return true;
114 }
115
116 tiled = (tex->image.tiling != GEN6_TILING_NONE);
117 }
118
119 if (tiled)
120 m = ILO_TRANSFER_MAP_GTT; /* to have a linear view */
121 else if (is->dev.has_llc)
122 m = ILO_TRANSFER_MAP_CPU; /* fast and mostly coherent */
123 else if (usage & PIPE_TRANSFER_PERSISTENT)
124 m = ILO_TRANSFER_MAP_GTT; /* for coherency */
125 else if (usage & PIPE_TRANSFER_READ)
126 m = ILO_TRANSFER_MAP_CPU; /* gtt read is too slow */
127 else
128 m = ILO_TRANSFER_MAP_GTT;
129
130 *method = m;
131
132 return true;
133 }
134
135 /**
136 * Return true if usage allows the use of staging bo to avoid blocking.
137 */
138 static bool
139 usage_allows_staging_bo(unsigned usage)
140 {
141 /* do we know how to write the data back to the resource? */
142 const unsigned can_writeback = (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
143 PIPE_TRANSFER_DISCARD_RANGE |
144 PIPE_TRANSFER_FLUSH_EXPLICIT);
145 const unsigned reasons_against = (PIPE_TRANSFER_READ |
146 PIPE_TRANSFER_MAP_DIRECTLY |
147 PIPE_TRANSFER_PERSISTENT);
148
149 return (usage & can_writeback) && !(usage & reasons_against);
150 }
151
152 /**
153 * Allocate the staging resource. It is always linear and its size matches
154 * the transfer box, with proper paddings.
155 */
156 static bool
157 xfer_alloc_staging_res(struct ilo_transfer *xfer)
158 {
159 const struct pipe_resource *res = xfer->base.resource;
160 const struct pipe_box *box = &xfer->base.box;
161 struct pipe_resource templ;
162
163 memset(&templ, 0, sizeof(templ));
164
165 templ.format = res->format;
166
167 if (res->target == PIPE_BUFFER) {
168 templ.target = PIPE_BUFFER;
169 templ.width0 =
170 (box->x % ILO_TRANSFER_MAP_BUFFER_ALIGNMENT) + box->width;
171 }
172 else {
173 /* use 2D array for any texture target */
174 templ.target = PIPE_TEXTURE_2D_ARRAY;
175 templ.width0 = box->width;
176 }
177
178 templ.height0 = box->height;
179 templ.depth0 = 1;
180 templ.array_size = box->depth;
181 templ.nr_samples = 1;
182 templ.usage = PIPE_USAGE_STAGING;
183
184 if (xfer->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT) {
185 templ.flags = PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
186 PIPE_RESOURCE_FLAG_MAP_COHERENT;
187 }
188
189 xfer->staging.res = res->screen->resource_create(res->screen, &templ);
190
191 if (xfer->staging.res && xfer->staging.res->target != PIPE_BUFFER) {
192 assert(ilo_texture(xfer->staging.res)->image.tiling ==
193 GEN6_TILING_NONE);
194 }
195
196 return (xfer->staging.res != NULL);
197 }
198
199 /**
200 * Use an alternative transfer method or rename the resource to unblock an
201 * otherwise blocking transfer.
202 */
203 static bool
204 xfer_unblock(struct ilo_transfer *xfer, bool *resource_renamed)
205 {
206 struct pipe_resource *res = xfer->base.resource;
207 bool unblocked = false, renamed = false;
208
209 switch (xfer->method) {
210 case ILO_TRANSFER_MAP_CPU:
211 case ILO_TRANSFER_MAP_GTT:
212 if (xfer->base.usage & PIPE_TRANSFER_UNSYNCHRONIZED) {
213 xfer->method = ILO_TRANSFER_MAP_GTT_ASYNC;
214 unblocked = true;
215 }
216 else if ((xfer->base.usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) &&
217 ilo_resource_rename_bo(res)) {
218 renamed = true;
219 unblocked = true;
220 }
221 else if (usage_allows_staging_bo(xfer->base.usage) &&
222 xfer_alloc_staging_res(xfer)) {
223 xfer->method = ILO_TRANSFER_MAP_STAGING;
224 unblocked = true;
225 }
226 break;
227 case ILO_TRANSFER_MAP_GTT_ASYNC:
228 case ILO_TRANSFER_MAP_STAGING:
229 unblocked = true;
230 break;
231 default:
232 break;
233 }
234
235 *resource_renamed = renamed;
236
237 return unblocked;
238 }
239
240 /**
241 * Allocate the staging system buffer based on the resource format and the
242 * transfer box.
243 */
244 static bool
245 xfer_alloc_staging_sys(struct ilo_transfer *xfer)
246 {
247 const enum pipe_format format = xfer->base.resource->format;
248 const struct pipe_box *box = &xfer->base.box;
249 const unsigned alignment = 64;
250
251 /* need to tell the world the layout */
252 xfer->base.stride =
253 align(util_format_get_stride(format, box->width), alignment);
254 xfer->base.layer_stride =
255 util_format_get_2d_size(format, xfer->base.stride, box->height);
256
257 xfer->staging.sys =
258 align_malloc(xfer->base.layer_stride * box->depth, alignment);
259
260 return (xfer->staging.sys != NULL);
261 }
262
263 /**
264 * Map according to the method. The staging system buffer should have been
265 * allocated if the method requires it.
266 */
267 static void *
268 xfer_map(struct ilo_transfer *xfer)
269 {
270 const struct ilo_vma *vma;
271 void *ptr;
272
273 switch (xfer->method) {
274 case ILO_TRANSFER_MAP_CPU:
275 vma = ilo_resource_get_vma(xfer->base.resource);
276 ptr = intel_bo_map(vma->bo, xfer->base.usage & PIPE_TRANSFER_WRITE);
277 break;
278 case ILO_TRANSFER_MAP_GTT:
279 vma = ilo_resource_get_vma(xfer->base.resource);
280 ptr = intel_bo_map_gtt(vma->bo);
281 break;
282 case ILO_TRANSFER_MAP_GTT_ASYNC:
283 vma = ilo_resource_get_vma(xfer->base.resource);
284 ptr = intel_bo_map_gtt_async(vma->bo);
285 break;
286 case ILO_TRANSFER_MAP_STAGING:
287 {
288 const struct ilo_screen *is = ilo_screen(xfer->staging.res->screen);
289
290 vma = ilo_resource_get_vma(xfer->staging.res);
291
292 /*
293 * We want a writable, optionally persistent and coherent, mapping
294 * for a linear bo. We can call resource_get_transfer_method(), but
295 * this turns out to be fairly simple.
296 */
297 if (is->dev.has_llc)
298 ptr = intel_bo_map(vma->bo, true);
299 else
300 ptr = intel_bo_map_gtt(vma->bo);
301
302 if (ptr && xfer->staging.res->target == PIPE_BUFFER)
303 ptr += (xfer->base.box.x % ILO_TRANSFER_MAP_BUFFER_ALIGNMENT);
304 }
305 break;
306 case ILO_TRANSFER_MAP_SW_CONVERT:
307 case ILO_TRANSFER_MAP_SW_ZS:
308 vma = NULL;
309 ptr = xfer->staging.sys;
310 break;
311 default:
312 assert(!"unknown mapping method");
313 vma = NULL;
314 ptr = NULL;
315 break;
316 }
317
318 if (ptr && vma)
319 ptr = (void *) ((char *) ptr + vma->bo_offset);
320
321 return ptr;
322 }
323
324 /**
325 * Unmap a transfer.
326 */
327 static void
328 xfer_unmap(struct ilo_transfer *xfer)
329 {
330 switch (xfer->method) {
331 case ILO_TRANSFER_MAP_CPU:
332 case ILO_TRANSFER_MAP_GTT:
333 case ILO_TRANSFER_MAP_GTT_ASYNC:
334 intel_bo_unmap(ilo_resource_get_vma(xfer->base.resource)->bo);
335 break;
336 case ILO_TRANSFER_MAP_STAGING:
337 intel_bo_unmap(ilo_resource_get_vma(xfer->staging.res)->bo);
338 break;
339 default:
340 break;
341 }
342 }
343
344 static void
345 tex_get_box_origin(const struct ilo_texture *tex,
346 unsigned level, unsigned slice,
347 const struct pipe_box *box,
348 unsigned *mem_x, unsigned *mem_y)
349 {
350 unsigned x, y;
351
352 ilo_image_get_slice_pos(&tex->image, level, box->z + slice, &x, &y);
353 x += box->x;
354 y += box->y;
355
356 ilo_image_pos_to_mem(&tex->image, x, y, mem_x, mem_y);
357 }
358
359 static unsigned
360 tex_get_box_offset(const struct ilo_texture *tex, unsigned level,
361 const struct pipe_box *box)
362 {
363 unsigned mem_x, mem_y;
364
365 tex_get_box_origin(tex, level, 0, box, &mem_x, &mem_y);
366
367 return ilo_image_mem_to_linear(&tex->image, mem_x, mem_y);
368 }
369
370 static unsigned
371 tex_get_slice_stride(const struct ilo_texture *tex, unsigned level)
372 {
373 return ilo_image_get_slice_stride(&tex->image, level);
374 }
375
376 static unsigned
377 tex_tile_x_swizzle(unsigned addr)
378 {
379 /*
380 * From the Ivy Bridge PRM, volume 1 part 2, page 24:
381 *
382 * "As shown in the tiling algorithm, the new address bit[6] should be:
383 *
384 * Address bit[6] <= TiledAddr bit[6] XOR
385 * TiledAddr bit[9] XOR
386 * TiledAddr bit[10]"
387 */
388 return addr ^ (((addr >> 3) ^ (addr >> 4)) & 0x40);
389 }
390
391 static unsigned
392 tex_tile_y_swizzle(unsigned addr)
393 {
394 /*
395 * From the Ivy Bridge PRM, volume 1 part 2, page 24:
396 *
397 * "As shown in the tiling algorithm, The new address bit[6] becomes:
398 *
399 * Address bit[6] <= TiledAddr bit[6] XOR
400 * TiledAddr bit[9]"
401 */
402 return addr ^ ((addr >> 3) & 0x40);
403 }
404
405 static unsigned
406 tex_tile_x_offset(unsigned mem_x, unsigned mem_y,
407 unsigned tiles_per_row, bool swizzle)
408 {
409 /*
410 * From the Sandy Bridge PRM, volume 1 part 2, page 21, we know that a
411 * X-major tile has 8 rows and 32 OWord columns (512 bytes). Tiles in the
412 * tiled region are numbered in row-major order, starting from zero. The
413 * tile number can thus be calculated as follows:
414 *
415 * tile = (mem_y / 8) * tiles_per_row + (mem_x / 512)
416 *
417 * OWords in that tile are also numbered in row-major order, starting from
418 * zero. The OWord number can thus be calculated as follows:
419 *
420 * oword = (mem_y % 8) * 32 + ((mem_x % 512) / 16)
421 *
422 * and the tiled offset is
423 *
424 * offset = tile * 4096 + oword * 16 + (mem_x % 16)
425 * = tile * 4096 + (mem_y % 8) * 512 + (mem_x % 512)
426 */
427 unsigned tile, offset;
428
429 tile = (mem_y >> 3) * tiles_per_row + (mem_x >> 9);
430 offset = tile << 12 | (mem_y & 0x7) << 9 | (mem_x & 0x1ff);
431
432 return (swizzle) ? tex_tile_x_swizzle(offset) : offset;
433 }
434
435 static unsigned
436 tex_tile_y_offset(unsigned mem_x, unsigned mem_y,
437 unsigned tiles_per_row, bool swizzle)
438 {
439 /*
440 * From the Sandy Bridge PRM, volume 1 part 2, page 22, we know that a
441 * Y-major tile has 32 rows and 8 OWord columns (128 bytes). Tiles in the
442 * tiled region are numbered in row-major order, starting from zero. The
443 * tile number can thus be calculated as follows:
444 *
445 * tile = (mem_y / 32) * tiles_per_row + (mem_x / 128)
446 *
447 * OWords in that tile are numbered in column-major order, starting from
448 * zero. The OWord number can thus be calculated as follows:
449 *
450 * oword = ((mem_x % 128) / 16) * 32 + (mem_y % 32)
451 *
452 * and the tiled offset is
453 *
454 * offset = tile * 4096 + oword * 16 + (mem_x % 16)
455 */
456 unsigned tile, oword, offset;
457
458 tile = (mem_y >> 5) * tiles_per_row + (mem_x >> 7);
459 oword = (mem_x & 0x70) << 1 | (mem_y & 0x1f);
460 offset = tile << 12 | oword << 4 | (mem_x & 0xf);
461
462 return (swizzle) ? tex_tile_y_swizzle(offset) : offset;
463 }
464
465 static unsigned
466 tex_tile_w_offset(unsigned mem_x, unsigned mem_y,
467 unsigned tiles_per_row, bool swizzle)
468 {
469 /*
470 * From the Sandy Bridge PRM, volume 1 part 2, page 23, we know that a
471 * W-major tile has 8 8x8-block rows and 8 8x8-block columns. Tiles in the
472 * tiled region are numbered in row-major order, starting from zero. The
473 * tile number can thus be calculated as follows:
474 *
475 * tile = (mem_y / 64) * tiles_per_row + (mem_x / 64)
476 *
477 * 8x8-blocks in that tile are numbered in column-major order, starting
478 * from zero. The 8x8-block number can thus be calculated as follows:
479 *
480 * blk8 = ((mem_x % 64) / 8) * 8 + ((mem_y % 64) / 8)
481 *
482 * Each 8x8-block is divided into 4 4x4-blocks, in row-major order. Each
483 * 4x4-block is further divided into 4 2x2-blocks, also in row-major order.
484 * We have
485 *
486 * blk4 = (((mem_y % 64) / 4) & 1) * 2 + (((mem_x % 64) / 4) & 1)
487 * blk2 = (((mem_y % 64) / 2) & 1) * 2 + (((mem_x % 64) / 2) & 1)
488 * blk1 = (((mem_y % 64) ) & 1) * 2 + (((mem_x % 64) ) & 1)
489 *
490 * and the tiled offset is
491 *
492 * offset = tile * 4096 + blk8 * 64 + blk4 * 16 + blk2 * 4 + blk1
493 */
494 unsigned tile, blk8, blk4, blk2, blk1, offset;
495
496 tile = (mem_y >> 6) * tiles_per_row + (mem_x >> 6);
497 blk8 = ((mem_x >> 3) & 0x7) << 3 | ((mem_y >> 3) & 0x7);
498 blk4 = ((mem_y >> 2) & 0x1) << 1 | ((mem_x >> 2) & 0x1);
499 blk2 = ((mem_y >> 1) & 0x1) << 1 | ((mem_x >> 1) & 0x1);
500 blk1 = ((mem_y ) & 0x1) << 1 | ((mem_x ) & 0x1);
501 offset = tile << 12 | blk8 << 6 | blk4 << 4 | blk2 << 2 | blk1;
502
503 return (swizzle) ? tex_tile_y_swizzle(offset) : offset;
504 }
505
506 static unsigned
507 tex_tile_none_offset(unsigned mem_x, unsigned mem_y,
508 unsigned tiles_per_row, bool swizzle)
509 {
510 return mem_y * tiles_per_row + mem_x;
511 }
512
513 typedef unsigned (*tex_tile_offset_func)(unsigned mem_x, unsigned mem_y,
514 unsigned tiles_per_row,
515 bool swizzle);
516
517 static tex_tile_offset_func
518 tex_tile_choose_offset_func(const struct ilo_texture *tex,
519 unsigned *tiles_per_row)
520 {
521 switch (tex->image.tiling) {
522 default:
523 assert(!"unknown tiling");
524 /* fall through */
525 case GEN6_TILING_NONE:
526 *tiles_per_row = tex->image.bo_stride;
527 return tex_tile_none_offset;
528 case GEN6_TILING_X:
529 *tiles_per_row = tex->image.bo_stride / 512;
530 return tex_tile_x_offset;
531 case GEN6_TILING_Y:
532 *tiles_per_row = tex->image.bo_stride / 128;
533 return tex_tile_y_offset;
534 case GEN8_TILING_W:
535 *tiles_per_row = tex->image.bo_stride / 64;
536 return tex_tile_w_offset;
537 }
538 }
539
540 static void *
541 tex_staging_sys_map_bo(struct ilo_texture *tex,
542 bool for_read_back,
543 bool linear_view)
544 {
545 const struct ilo_screen *is = ilo_screen(tex->base.screen);
546 const bool prefer_cpu = (is->dev.has_llc || for_read_back);
547 void *ptr;
548
549 if (prefer_cpu && (tex->image.tiling == GEN6_TILING_NONE ||
550 !linear_view))
551 ptr = intel_bo_map(tex->vma.bo, !for_read_back);
552 else
553 ptr = intel_bo_map_gtt(tex->vma.bo);
554
555 if (ptr)
556 ptr = (void *) ((char *) ptr + tex->vma.bo_offset);
557
558 return ptr;
559 }
560
561 static void
562 tex_staging_sys_unmap_bo(struct ilo_texture *tex)
563 {
564 intel_bo_unmap(tex->vma.bo);
565 }
566
567 static bool
568 tex_staging_sys_zs_read(struct ilo_texture *tex,
569 const struct ilo_transfer *xfer)
570 {
571 const struct ilo_screen *is = ilo_screen(tex->base.screen);
572 const bool swizzle = is->dev.has_address_swizzling;
573 const struct pipe_box *box = &xfer->base.box;
574 const uint8_t *src;
575 tex_tile_offset_func tile_offset;
576 unsigned tiles_per_row;
577 int slice;
578
579 src = tex_staging_sys_map_bo(tex, true, false);
580 if (!src)
581 return false;
582
583 tile_offset = tex_tile_choose_offset_func(tex, &tiles_per_row);
584
585 assert(tex->image.block_width == 1 && tex->image.block_height == 1);
586
587 if (tex->separate_s8) {
588 struct ilo_texture *s8_tex = tex->separate_s8;
589 const uint8_t *s8_src;
590 tex_tile_offset_func s8_tile_offset;
591 unsigned s8_tiles_per_row;
592 int dst_cpp, dst_s8_pos, src_cpp_used;
593
594 s8_src = tex_staging_sys_map_bo(s8_tex, true, false);
595 if (!s8_src) {
596 tex_staging_sys_unmap_bo(tex);
597 return false;
598 }
599
600 s8_tile_offset = tex_tile_choose_offset_func(s8_tex, &s8_tiles_per_row);
601
602 if (tex->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
603 assert(tex->image_format == PIPE_FORMAT_Z24X8_UNORM);
604
605 dst_cpp = 4;
606 dst_s8_pos = 3;
607 src_cpp_used = 3;
608 }
609 else {
610 assert(tex->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
611 assert(tex->image_format == PIPE_FORMAT_Z32_FLOAT);
612
613 dst_cpp = 8;
614 dst_s8_pos = 4;
615 src_cpp_used = 4;
616 }
617
618 for (slice = 0; slice < box->depth; slice++) {
619 unsigned mem_x, mem_y, s8_mem_x, s8_mem_y;
620 uint8_t *dst;
621 int i, j;
622
623 tex_get_box_origin(tex, xfer->base.level, slice,
624 box, &mem_x, &mem_y);
625 tex_get_box_origin(s8_tex, xfer->base.level, slice,
626 box, &s8_mem_x, &s8_mem_y);
627
628 dst = xfer->staging.sys + xfer->base.layer_stride * slice;
629
630 for (i = 0; i < box->height; i++) {
631 unsigned x = mem_x, s8_x = s8_mem_x;
632 uint8_t *d = dst;
633
634 for (j = 0; j < box->width; j++) {
635 const unsigned offset =
636 tile_offset(x, mem_y, tiles_per_row, swizzle);
637 const unsigned s8_offset =
638 s8_tile_offset(s8_x, s8_mem_y, s8_tiles_per_row, swizzle);
639
640 memcpy(d, src + offset, src_cpp_used);
641 d[dst_s8_pos] = s8_src[s8_offset];
642
643 d += dst_cpp;
644 x += tex->image.block_size;
645 s8_x++;
646 }
647
648 dst += xfer->base.stride;
649 mem_y++;
650 s8_mem_y++;
651 }
652 }
653
654 tex_staging_sys_unmap_bo(s8_tex);
655 }
656 else {
657 assert(tex->image_format == PIPE_FORMAT_S8_UINT);
658
659 for (slice = 0; slice < box->depth; slice++) {
660 unsigned mem_x, mem_y;
661 uint8_t *dst;
662 int i, j;
663
664 tex_get_box_origin(tex, xfer->base.level, slice,
665 box, &mem_x, &mem_y);
666
667 dst = xfer->staging.sys + xfer->base.layer_stride * slice;
668
669 for (i = 0; i < box->height; i++) {
670 unsigned x = mem_x;
671 uint8_t *d = dst;
672
673 for (j = 0; j < box->width; j++) {
674 const unsigned offset =
675 tile_offset(x, mem_y, tiles_per_row, swizzle);
676
677 *d = src[offset];
678
679 d++;
680 x++;
681 }
682
683 dst += xfer->base.stride;
684 mem_y++;
685 }
686 }
687 }
688
689 tex_staging_sys_unmap_bo(tex);
690
691 return true;
692 }
693
694 static bool
695 tex_staging_sys_zs_write(struct ilo_texture *tex,
696 const struct ilo_transfer *xfer)
697 {
698 const struct ilo_screen *is = ilo_screen(tex->base.screen);
699 const bool swizzle = is->dev.has_address_swizzling;
700 const struct pipe_box *box = &xfer->base.box;
701 uint8_t *dst;
702 tex_tile_offset_func tile_offset;
703 unsigned tiles_per_row;
704 int slice;
705
706 dst = tex_staging_sys_map_bo(tex, false, false);
707 if (!dst)
708 return false;
709
710 tile_offset = tex_tile_choose_offset_func(tex, &tiles_per_row);
711
712 assert(tex->image.block_width == 1 && tex->image.block_height == 1);
713
714 if (tex->separate_s8) {
715 struct ilo_texture *s8_tex = tex->separate_s8;
716 uint8_t *s8_dst;
717 tex_tile_offset_func s8_tile_offset;
718 unsigned s8_tiles_per_row;
719 int src_cpp, src_s8_pos, dst_cpp_used;
720
721 s8_dst = tex_staging_sys_map_bo(s8_tex, false, false);
722 if (!s8_dst) {
723 tex_staging_sys_unmap_bo(s8_tex);
724 return false;
725 }
726
727 s8_tile_offset = tex_tile_choose_offset_func(s8_tex, &s8_tiles_per_row);
728
729 if (tex->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
730 assert(tex->image_format == PIPE_FORMAT_Z24X8_UNORM);
731
732 src_cpp = 4;
733 src_s8_pos = 3;
734 dst_cpp_used = 3;
735 }
736 else {
737 assert(tex->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
738 assert(tex->image_format == PIPE_FORMAT_Z32_FLOAT);
739
740 src_cpp = 8;
741 src_s8_pos = 4;
742 dst_cpp_used = 4;
743 }
744
745 for (slice = 0; slice < box->depth; slice++) {
746 unsigned mem_x, mem_y, s8_mem_x, s8_mem_y;
747 const uint8_t *src;
748 int i, j;
749
750 tex_get_box_origin(tex, xfer->base.level, slice,
751 box, &mem_x, &mem_y);
752 tex_get_box_origin(s8_tex, xfer->base.level, slice,
753 box, &s8_mem_x, &s8_mem_y);
754
755 src = xfer->staging.sys + xfer->base.layer_stride * slice;
756
757 for (i = 0; i < box->height; i++) {
758 unsigned x = mem_x, s8_x = s8_mem_x;
759 const uint8_t *s = src;
760
761 for (j = 0; j < box->width; j++) {
762 const unsigned offset =
763 tile_offset(x, mem_y, tiles_per_row, swizzle);
764 const unsigned s8_offset =
765 s8_tile_offset(s8_x, s8_mem_y, s8_tiles_per_row, swizzle);
766
767 memcpy(dst + offset, s, dst_cpp_used);
768 s8_dst[s8_offset] = s[src_s8_pos];
769
770 s += src_cpp;
771 x += tex->image.block_size;
772 s8_x++;
773 }
774
775 src += xfer->base.stride;
776 mem_y++;
777 s8_mem_y++;
778 }
779 }
780
781 tex_staging_sys_unmap_bo(s8_tex);
782 }
783 else {
784 assert(tex->image_format == PIPE_FORMAT_S8_UINT);
785
786 for (slice = 0; slice < box->depth; slice++) {
787 unsigned mem_x, mem_y;
788 const uint8_t *src;
789 int i, j;
790
791 tex_get_box_origin(tex, xfer->base.level, slice,
792 box, &mem_x, &mem_y);
793
794 src = xfer->staging.sys + xfer->base.layer_stride * slice;
795
796 for (i = 0; i < box->height; i++) {
797 unsigned x = mem_x;
798 const uint8_t *s = src;
799
800 for (j = 0; j < box->width; j++) {
801 const unsigned offset =
802 tile_offset(x, mem_y, tiles_per_row, swizzle);
803
804 dst[offset] = *s;
805
806 s++;
807 x++;
808 }
809
810 src += xfer->base.stride;
811 mem_y++;
812 }
813 }
814 }
815
816 tex_staging_sys_unmap_bo(tex);
817
818 return true;
819 }
820
821 static bool
822 tex_staging_sys_convert_write(struct ilo_texture *tex,
823 const struct ilo_transfer *xfer)
824 {
825 const struct pipe_box *box = &xfer->base.box;
826 unsigned dst_slice_stride;
827 void *dst;
828 int slice;
829
830 dst = tex_staging_sys_map_bo(tex, false, true);
831 if (!dst)
832 return false;
833
834 dst += tex_get_box_offset(tex, xfer->base.level, box);
835
836 /* slice stride is not always available */
837 if (box->depth > 1)
838 dst_slice_stride = tex_get_slice_stride(tex, xfer->base.level);
839 else
840 dst_slice_stride = 0;
841
842 if (unlikely(tex->image_format == tex->base.format)) {
843 util_copy_box(dst, tex->image_format, tex->image.bo_stride,
844 dst_slice_stride, 0, 0, 0, box->width, box->height, box->depth,
845 xfer->staging.sys, xfer->base.stride, xfer->base.layer_stride,
846 0, 0, 0);
847
848 tex_staging_sys_unmap_bo(tex);
849
850 return true;
851 }
852
853 switch (tex->base.format) {
854 case PIPE_FORMAT_ETC1_RGB8:
855 assert(tex->image_format == PIPE_FORMAT_R8G8B8X8_UNORM);
856
857 for (slice = 0; slice < box->depth; slice++) {
858 const void *src =
859 xfer->staging.sys + xfer->base.layer_stride * slice;
860
861 util_format_etc1_rgb8_unpack_rgba_8unorm(dst,
862 tex->image.bo_stride, src, xfer->base.stride,
863 box->width, box->height);
864
865 dst += dst_slice_stride;
866 }
867 break;
868 default:
869 assert(!"unable to convert the staging data");
870 break;
871 }
872
873 tex_staging_sys_unmap_bo(tex);
874
875 return true;
876 }
877
878 static void
879 tex_staging_sys_writeback(struct ilo_transfer *xfer)
880 {
881 struct ilo_texture *tex = ilo_texture(xfer->base.resource);
882 bool success;
883
884 if (!(xfer->base.usage & PIPE_TRANSFER_WRITE))
885 return;
886
887 switch (xfer->method) {
888 case ILO_TRANSFER_MAP_SW_CONVERT:
889 success = tex_staging_sys_convert_write(tex, xfer);
890 break;
891 case ILO_TRANSFER_MAP_SW_ZS:
892 success = tex_staging_sys_zs_write(tex, xfer);
893 break;
894 default:
895 assert(!"unknown mapping method");
896 success = false;
897 break;
898 }
899
900 if (!success)
901 ilo_err("failed to map resource for moving staging data\n");
902 }
903
904 static bool
905 tex_staging_sys_readback(struct ilo_transfer *xfer)
906 {
907 struct ilo_texture *tex = ilo_texture(xfer->base.resource);
908 bool read_back = false, success;
909
910 /* see if we need to read the resource back */
911 if (xfer->base.usage & PIPE_TRANSFER_READ) {
912 read_back = true;
913 }
914 else if (xfer->base.usage & PIPE_TRANSFER_WRITE) {
915 const unsigned discard_flags =
916 (PIPE_TRANSFER_DISCARD_RANGE | PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE);
917
918 if (!(xfer->base.usage & discard_flags))
919 read_back = true;
920 }
921
922 if (!read_back)
923 return true;
924
925 switch (xfer->method) {
926 case ILO_TRANSFER_MAP_SW_CONVERT:
927 assert(!"no on-the-fly format conversion for mapping");
928 success = false;
929 break;
930 case ILO_TRANSFER_MAP_SW_ZS:
931 success = tex_staging_sys_zs_read(tex, xfer);
932 break;
933 default:
934 assert(!"unknown mapping method");
935 success = false;
936 break;
937 }
938
939 return success;
940 }
941
942 static void *
943 tex_map(struct ilo_transfer *xfer)
944 {
945 void *ptr;
946
947 switch (xfer->method) {
948 case ILO_TRANSFER_MAP_CPU:
949 case ILO_TRANSFER_MAP_GTT:
950 case ILO_TRANSFER_MAP_GTT_ASYNC:
951 ptr = xfer_map(xfer);
952 if (ptr) {
953 const struct ilo_texture *tex = ilo_texture(xfer->base.resource);
954
955 ptr += tex_get_box_offset(tex, xfer->base.level, &xfer->base.box);
956
957 /* stride is for a block row, not a texel row */
958 xfer->base.stride = tex->image.bo_stride;
959 /* note that slice stride is not always available */
960 xfer->base.layer_stride = (xfer->base.box.depth > 1) ?
961 tex_get_slice_stride(tex, xfer->base.level) : 0;
962 }
963 break;
964 case ILO_TRANSFER_MAP_STAGING:
965 ptr = xfer_map(xfer);
966 if (ptr) {
967 const struct ilo_texture *staging = ilo_texture(xfer->staging.res);
968 xfer->base.stride = staging->image.bo_stride;
969 xfer->base.layer_stride = tex_get_slice_stride(staging, 0);
970 }
971 break;
972 case ILO_TRANSFER_MAP_SW_CONVERT:
973 case ILO_TRANSFER_MAP_SW_ZS:
974 if (xfer_alloc_staging_sys(xfer) && tex_staging_sys_readback(xfer))
975 ptr = xfer_map(xfer);
976 else
977 ptr = NULL;
978 break;
979 default:
980 assert(!"unknown mapping method");
981 ptr = NULL;
982 break;
983 }
984
985 return ptr;
986 }
987
988 static void *
989 buf_map(struct ilo_transfer *xfer)
990 {
991 void *ptr;
992
993 ptr = xfer_map(xfer);
994 if (!ptr)
995 return NULL;
996
997 if (xfer->method != ILO_TRANSFER_MAP_STAGING)
998 ptr += xfer->base.box.x;
999
1000 xfer->base.stride = 0;
1001 xfer->base.layer_stride = 0;
1002
1003 assert(xfer->base.level == 0);
1004 assert(xfer->base.box.y == 0);
1005 assert(xfer->base.box.z == 0);
1006 assert(xfer->base.box.height == 1);
1007 assert(xfer->base.box.depth == 1);
1008
1009 return ptr;
1010 }
1011
1012 static void
1013 copy_staging_resource(struct ilo_context *ilo,
1014 struct ilo_transfer *xfer,
1015 const struct pipe_box *box)
1016 {
1017 const unsigned pad_x = (xfer->staging.res->target == PIPE_BUFFER) ?
1018 xfer->base.box.x % ILO_TRANSFER_MAP_BUFFER_ALIGNMENT : 0;
1019 struct pipe_box modified_box;
1020
1021 assert(xfer->method == ILO_TRANSFER_MAP_STAGING && xfer->staging.res);
1022
1023 if (!box) {
1024 u_box_3d(pad_x, 0, 0, xfer->base.box.width, xfer->base.box.height,
1025 xfer->base.box.depth, &modified_box);
1026 box = &modified_box;
1027 }
1028 else if (pad_x) {
1029 modified_box = *box;
1030 modified_box.x += pad_x;
1031 box = &modified_box;
1032 }
1033
1034 ilo_blitter_blt_copy_resource(ilo->blitter,
1035 xfer->base.resource, xfer->base.level,
1036 xfer->base.box.x, xfer->base.box.y, xfer->base.box.z,
1037 xfer->staging.res, 0, box);
1038 }
1039
1040 static bool
1041 is_bo_busy(struct ilo_context *ilo, struct intel_bo *bo, bool *need_submit)
1042 {
1043 const bool referenced = ilo_builder_has_reloc(&ilo->cp->builder, bo);
1044
1045 if (need_submit)
1046 *need_submit = referenced;
1047
1048 if (referenced)
1049 return true;
1050
1051 return intel_bo_is_busy(bo);
1052 }
1053
1054 /**
1055 * Choose the best mapping method, depending on the transfer usage and whether
1056 * the bo is busy.
1057 */
1058 static bool
1059 choose_transfer_method(struct ilo_context *ilo, struct ilo_transfer *xfer)
1060 {
1061 struct pipe_resource *res = xfer->base.resource;
1062 bool need_submit;
1063
1064 if (!resource_get_transfer_method(res, &xfer->base, &xfer->method))
1065 return false;
1066
1067 /* see if we can avoid blocking */
1068 if (is_bo_busy(ilo, ilo_resource_get_vma(res)->bo, &need_submit)) {
1069 bool resource_renamed;
1070
1071 if (!xfer_unblock(xfer, &resource_renamed)) {
1072 if (xfer->base.usage & PIPE_TRANSFER_DONTBLOCK)
1073 return false;
1074
1075 /* submit to make bo really busy and map() correctly blocks */
1076 if (need_submit)
1077 ilo_cp_submit(ilo->cp, "syncing for transfers");
1078 }
1079
1080 if (resource_renamed)
1081 ilo_state_vector_resource_renamed(&ilo->state_vector, res);
1082 }
1083
1084 return true;
1085 }
1086
1087 static void
1088 buf_pwrite(struct ilo_context *ilo, struct pipe_resource *res,
1089 unsigned usage, int offset, int size, const void *data)
1090 {
1091 struct ilo_buffer_resource *buf = ilo_buffer_resource(res);
1092 bool need_submit;
1093
1094 /* see if we can avoid blocking */
1095 if (is_bo_busy(ilo, buf->vma.bo, &need_submit)) {
1096 bool unblocked = false;
1097
1098 if ((usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) &&
1099 ilo_resource_rename_bo(res)) {
1100 ilo_state_vector_resource_renamed(&ilo->state_vector, res);
1101 unblocked = true;
1102 }
1103 else {
1104 struct pipe_resource templ, *staging;
1105
1106 /*
1107 * allocate a staging buffer to hold the data and pipelined copy it
1108 * over
1109 */
1110 templ = *res;
1111 templ.width0 = size;
1112 templ.usage = PIPE_USAGE_STAGING;
1113 templ.bind = 0;
1114 staging = ilo->base.screen->resource_create(ilo->base.screen, &templ);
1115 if (staging) {
1116 const struct ilo_vma *staging_vma = ilo_resource_get_vma(staging);
1117 struct pipe_box staging_box;
1118
1119 /* offset by staging_vma->bo_offset for pwrite */
1120 intel_bo_pwrite(staging_vma->bo, staging_vma->bo_offset,
1121 size, data);
1122
1123 u_box_1d(0, size, &staging_box);
1124 ilo_blitter_blt_copy_resource(ilo->blitter,
1125 res, 0, offset, 0, 0,
1126 staging, 0, &staging_box);
1127
1128 pipe_resource_reference(&staging, NULL);
1129
1130 return;
1131 }
1132 }
1133
1134 /* submit to make bo really busy and pwrite() correctly blocks */
1135 if (!unblocked && need_submit)
1136 ilo_cp_submit(ilo->cp, "syncing for pwrites");
1137 }
1138
1139 /* offset by buf->vma.bo_offset for pwrite */
1140 intel_bo_pwrite(buf->vma.bo, buf->vma.bo_offset + offset, size, data);
1141 }
1142
1143 static void
1144 ilo_transfer_flush_region(struct pipe_context *pipe,
1145 struct pipe_transfer *transfer,
1146 const struct pipe_box *box)
1147 {
1148 struct ilo_context *ilo = ilo_context(pipe);
1149 struct ilo_transfer *xfer = ilo_transfer(transfer);
1150
1151 /*
1152 * The staging resource is mapped persistently and coherently. We can copy
1153 * without unmapping.
1154 */
1155 if (xfer->method == ILO_TRANSFER_MAP_STAGING &&
1156 (xfer->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT))
1157 copy_staging_resource(ilo, xfer, box);
1158 }
1159
1160 static void
1161 ilo_transfer_unmap(struct pipe_context *pipe,
1162 struct pipe_transfer *transfer)
1163 {
1164 struct ilo_context *ilo = ilo_context(pipe);
1165 struct ilo_transfer *xfer = ilo_transfer(transfer);
1166
1167 xfer_unmap(xfer);
1168
1169 switch (xfer->method) {
1170 case ILO_TRANSFER_MAP_STAGING:
1171 if (!(xfer->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT))
1172 copy_staging_resource(ilo, xfer, NULL);
1173 pipe_resource_reference(&xfer->staging.res, NULL);
1174 break;
1175 case ILO_TRANSFER_MAP_SW_CONVERT:
1176 case ILO_TRANSFER_MAP_SW_ZS:
1177 tex_staging_sys_writeback(xfer);
1178 align_free(xfer->staging.sys);
1179 break;
1180 default:
1181 break;
1182 }
1183
1184 pipe_resource_reference(&xfer->base.resource, NULL);
1185
1186 slab_free_st(&ilo->transfer_mempool, xfer);
1187 }
1188
1189 static void *
1190 ilo_transfer_map(struct pipe_context *pipe,
1191 struct pipe_resource *res,
1192 unsigned level,
1193 unsigned usage,
1194 const struct pipe_box *box,
1195 struct pipe_transfer **transfer)
1196 {
1197 struct ilo_context *ilo = ilo_context(pipe);
1198 struct ilo_transfer *xfer;
1199 void *ptr;
1200
1201 /* note that xfer is not zero'd */
1202 xfer = slab_alloc_st(&ilo->transfer_mempool);
1203 if (!xfer) {
1204 *transfer = NULL;
1205 return NULL;
1206 }
1207
1208 xfer->base.resource = NULL;
1209 pipe_resource_reference(&xfer->base.resource, res);
1210 xfer->base.level = level;
1211 xfer->base.usage = usage;
1212 xfer->base.box = *box;
1213
1214 ilo_blit_resolve_transfer(ilo, &xfer->base);
1215
1216 if (choose_transfer_method(ilo, xfer)) {
1217 if (res->target == PIPE_BUFFER)
1218 ptr = buf_map(xfer);
1219 else
1220 ptr = tex_map(xfer);
1221 }
1222 else {
1223 ptr = NULL;
1224 }
1225
1226 if (!ptr) {
1227 pipe_resource_reference(&xfer->base.resource, NULL);
1228 slab_free_st(&ilo->transfer_mempool, xfer);
1229 *transfer = NULL;
1230 return NULL;
1231 }
1232
1233 *transfer = &xfer->base;
1234
1235 return ptr;
1236 }
1237
1238 static void ilo_buffer_subdata(struct pipe_context *pipe,
1239 struct pipe_resource *resource,
1240 unsigned usage, unsigned offset,
1241 unsigned size, const void *data)
1242 {
1243 if (usage & PIPE_TRANSFER_UNSYNCHRONIZED)
1244 u_default_buffer_subdata(pipe, resource, usage, offset, size, data);
1245 else
1246 buf_pwrite(ilo_context(pipe), resource, usage, offset, size, data);
1247 }
1248
1249 /**
1250 * Initialize transfer-related functions.
1251 */
1252 void
1253 ilo_init_transfer_functions(struct ilo_context *ilo)
1254 {
1255 ilo->base.transfer_map = ilo_transfer_map;
1256 ilo->base.transfer_flush_region = ilo_transfer_flush_region;
1257 ilo->base.transfer_unmap = ilo_transfer_unmap;
1258 ilo->base.buffer_subdata = ilo_buffer_subdata;
1259 ilo->base.texture_subdata = u_default_texture_subdata;
1260 }