2 * Mesa 3-D graphics library
4 * Copyright (C) 2012-2013 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #include "util/u_surface.h"
29 #include "util/u_transfer.h"
30 #include "util/u_format_etc.h"
33 #include "ilo_blitter.h"
35 #include "ilo_context.h"
36 #include "ilo_resource.h"
37 #include "ilo_state.h"
38 #include "ilo_transfer.h"
41 * For buffers that are not busy, we want to map/unmap them directly. For
42 * those that are busy, we have to worry about synchronization. We could wait
43 * for GPU to finish, but there are cases where we could avoid waiting.
45 * - When PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE is set, the contents of the
46 * buffer can be discarded. We can replace the backing bo by a new one of
47 * the same size (renaming).
48 * - When PIPE_TRANSFER_DISCARD_RANGE is set, the contents of the mapped
49 * range can be discarded. We can allocate and map a staging bo on
50 * mapping, and (pipelined-)copy it over to the real bo on unmapping.
51 * - When PIPE_TRANSFER_FLUSH_EXPLICIT is set, there is no reading and only
52 * flushed regions need to be written. We can still allocate and map a
53 * staging bo, but should copy only the flushed regions over.
55 * However, there are other flags to consider.
57 * - When PIPE_TRANSFER_UNSYNCHRONIZED is set, we do not need to worry about
58 * synchronization at all on mapping.
59 * - When PIPE_TRANSFER_MAP_DIRECTLY is set, no staging area is allowed.
60 * - When PIPE_TRANSFER_DONTBLOCK is set, we should fail if we have to block.
61 * - When PIPE_TRANSFER_PERSISTENT is set, GPU may access the buffer while it
62 * is mapped. Synchronization is done by defining memory barriers,
63 * explicitly via memory_barrier() or implicitly via
64 * transfer_flush_region(), as well as GPU fences.
65 * - When PIPE_TRANSFER_COHERENT is set, updates by either CPU or GPU should
66 * be made visible to the other side immediately. Since the kernel flushes
67 * GPU caches at the end of each batch buffer, CPU always sees GPU updates.
68 * We could use a coherent mapping to make all persistent mappings
71 * These also apply to textures, except that we may additionally need to do
72 * format conversion or tiling/untiling.
76 * Return a transfer method suitable for the usage. The returned method will
77 * correctly block when the resource is busy.
80 resource_get_transfer_method(struct pipe_resource
*res
,
81 const struct pipe_transfer
*transfer
,
82 enum ilo_transfer_map_method
*method
)
84 const struct ilo_screen
*is
= ilo_screen(res
->screen
);
85 const unsigned usage
= transfer
->usage
;
86 enum ilo_transfer_map_method m
;
89 if (res
->target
== PIPE_BUFFER
) {
92 struct ilo_texture
*tex
= ilo_texture(res
);
93 bool need_convert
= false;
95 /* we may need to convert on the fly */
96 if (tex
->image
.tiling
== GEN8_TILING_W
|| tex
->separate_s8
) {
97 /* on GEN6, separate stencil is enabled only when HiZ is */
98 if (ilo_dev_gen(&is
->dev
) >= ILO_GEN(7) ||
99 ilo_image_can_enable_aux(&tex
->image
, transfer
->level
)) {
100 m
= ILO_TRANSFER_MAP_SW_ZS
;
103 } else if (tex
->image_format
!= tex
->base
.format
) {
104 m
= ILO_TRANSFER_MAP_SW_CONVERT
;
109 if (usage
& (PIPE_TRANSFER_MAP_DIRECTLY
| PIPE_TRANSFER_PERSISTENT
))
116 tiled
= (tex
->image
.tiling
!= GEN6_TILING_NONE
);
120 m
= ILO_TRANSFER_MAP_GTT
; /* to have a linear view */
121 else if (is
->dev
.has_llc
)
122 m
= ILO_TRANSFER_MAP_CPU
; /* fast and mostly coherent */
123 else if (usage
& PIPE_TRANSFER_PERSISTENT
)
124 m
= ILO_TRANSFER_MAP_GTT
; /* for coherency */
125 else if (usage
& PIPE_TRANSFER_READ
)
126 m
= ILO_TRANSFER_MAP_CPU
; /* gtt read is too slow */
128 m
= ILO_TRANSFER_MAP_GTT
;
136 * Return true if usage allows the use of staging bo to avoid blocking.
139 usage_allows_staging_bo(unsigned usage
)
141 /* do we know how to write the data back to the resource? */
142 const unsigned can_writeback
= (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE
|
143 PIPE_TRANSFER_DISCARD_RANGE
|
144 PIPE_TRANSFER_FLUSH_EXPLICIT
);
145 const unsigned reasons_against
= (PIPE_TRANSFER_READ
|
146 PIPE_TRANSFER_MAP_DIRECTLY
|
147 PIPE_TRANSFER_PERSISTENT
);
149 return (usage
& can_writeback
) && !(usage
& reasons_against
);
153 * Allocate the staging resource. It is always linear and its size matches
154 * the transfer box, with proper paddings.
157 xfer_alloc_staging_res(struct ilo_transfer
*xfer
)
159 const struct pipe_resource
*res
= xfer
->base
.resource
;
160 const struct pipe_box
*box
= &xfer
->base
.box
;
161 struct pipe_resource templ
;
163 memset(&templ
, 0, sizeof(templ
));
165 templ
.format
= res
->format
;
167 if (res
->target
== PIPE_BUFFER
) {
168 templ
.target
= PIPE_BUFFER
;
170 (box
->x
% ILO_TRANSFER_MAP_BUFFER_ALIGNMENT
) + box
->width
;
173 /* use 2D array for any texture target */
174 templ
.target
= PIPE_TEXTURE_2D_ARRAY
;
175 templ
.width0
= box
->width
;
178 templ
.height0
= box
->height
;
180 templ
.array_size
= box
->depth
;
181 templ
.nr_samples
= 1;
182 templ
.usage
= PIPE_USAGE_STAGING
;
184 if (xfer
->base
.usage
& PIPE_TRANSFER_FLUSH_EXPLICIT
) {
185 templ
.flags
= PIPE_RESOURCE_FLAG_MAP_PERSISTENT
|
186 PIPE_RESOURCE_FLAG_MAP_COHERENT
;
189 xfer
->staging
.res
= res
->screen
->resource_create(res
->screen
, &templ
);
191 if (xfer
->staging
.res
&& xfer
->staging
.res
->target
!= PIPE_BUFFER
) {
192 assert(ilo_texture(xfer
->staging
.res
)->image
.tiling
==
196 return (xfer
->staging
.res
!= NULL
);
200 * Use an alternative transfer method or rename the resource to unblock an
201 * otherwise blocking transfer.
204 xfer_unblock(struct ilo_transfer
*xfer
, bool *resource_renamed
)
206 struct pipe_resource
*res
= xfer
->base
.resource
;
207 bool unblocked
= false, renamed
= false;
209 switch (xfer
->method
) {
210 case ILO_TRANSFER_MAP_CPU
:
211 case ILO_TRANSFER_MAP_GTT
:
212 if (xfer
->base
.usage
& PIPE_TRANSFER_UNSYNCHRONIZED
) {
213 xfer
->method
= ILO_TRANSFER_MAP_GTT_ASYNC
;
216 else if ((xfer
->base
.usage
& PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE
) &&
217 ilo_resource_rename_bo(res
)) {
221 else if (usage_allows_staging_bo(xfer
->base
.usage
) &&
222 xfer_alloc_staging_res(xfer
)) {
223 xfer
->method
= ILO_TRANSFER_MAP_STAGING
;
227 case ILO_TRANSFER_MAP_GTT_ASYNC
:
228 case ILO_TRANSFER_MAP_STAGING
:
235 *resource_renamed
= renamed
;
241 * Allocate the staging system buffer based on the resource format and the
245 xfer_alloc_staging_sys(struct ilo_transfer
*xfer
)
247 const enum pipe_format format
= xfer
->base
.resource
->format
;
248 const struct pipe_box
*box
= &xfer
->base
.box
;
249 const unsigned alignment
= 64;
251 /* need to tell the world the layout */
253 align(util_format_get_stride(format
, box
->width
), alignment
);
254 xfer
->base
.layer_stride
=
255 util_format_get_2d_size(format
, xfer
->base
.stride
, box
->height
);
258 align_malloc(xfer
->base
.layer_stride
* box
->depth
, alignment
);
260 return (xfer
->staging
.sys
!= NULL
);
264 * Map according to the method. The staging system buffer should have been
265 * allocated if the method requires it.
268 xfer_map(struct ilo_transfer
*xfer
)
270 const struct ilo_vma
*vma
;
273 switch (xfer
->method
) {
274 case ILO_TRANSFER_MAP_CPU
:
275 vma
= ilo_resource_get_vma(xfer
->base
.resource
);
276 ptr
= intel_bo_map(vma
->bo
, xfer
->base
.usage
& PIPE_TRANSFER_WRITE
);
278 case ILO_TRANSFER_MAP_GTT
:
279 vma
= ilo_resource_get_vma(xfer
->base
.resource
);
280 ptr
= intel_bo_map_gtt(vma
->bo
);
282 case ILO_TRANSFER_MAP_GTT_ASYNC
:
283 vma
= ilo_resource_get_vma(xfer
->base
.resource
);
284 ptr
= intel_bo_map_gtt_async(vma
->bo
);
286 case ILO_TRANSFER_MAP_STAGING
:
288 const struct ilo_screen
*is
= ilo_screen(xfer
->staging
.res
->screen
);
290 vma
= ilo_resource_get_vma(xfer
->staging
.res
);
293 * We want a writable, optionally persistent and coherent, mapping
294 * for a linear bo. We can call resource_get_transfer_method(), but
295 * this turns out to be fairly simple.
298 ptr
= intel_bo_map(vma
->bo
, true);
300 ptr
= intel_bo_map_gtt(vma
->bo
);
302 if (ptr
&& xfer
->staging
.res
->target
== PIPE_BUFFER
)
303 ptr
+= (xfer
->base
.box
.x
% ILO_TRANSFER_MAP_BUFFER_ALIGNMENT
);
306 case ILO_TRANSFER_MAP_SW_CONVERT
:
307 case ILO_TRANSFER_MAP_SW_ZS
:
309 ptr
= xfer
->staging
.sys
;
312 assert(!"unknown mapping method");
319 ptr
= (void *) ((char *) ptr
+ vma
->bo_offset
);
328 xfer_unmap(struct ilo_transfer
*xfer
)
330 switch (xfer
->method
) {
331 case ILO_TRANSFER_MAP_CPU
:
332 case ILO_TRANSFER_MAP_GTT
:
333 case ILO_TRANSFER_MAP_GTT_ASYNC
:
334 intel_bo_unmap(ilo_resource_get_vma(xfer
->base
.resource
)->bo
);
336 case ILO_TRANSFER_MAP_STAGING
:
337 intel_bo_unmap(ilo_resource_get_vma(xfer
->staging
.res
)->bo
);
345 tex_get_box_origin(const struct ilo_texture
*tex
,
346 unsigned level
, unsigned slice
,
347 const struct pipe_box
*box
,
348 unsigned *mem_x
, unsigned *mem_y
)
352 ilo_image_get_slice_pos(&tex
->image
, level
, box
->z
+ slice
, &x
, &y
);
356 ilo_image_pos_to_mem(&tex
->image
, x
, y
, mem_x
, mem_y
);
360 tex_get_box_offset(const struct ilo_texture
*tex
, unsigned level
,
361 const struct pipe_box
*box
)
363 unsigned mem_x
, mem_y
;
365 tex_get_box_origin(tex
, level
, 0, box
, &mem_x
, &mem_y
);
367 return ilo_image_mem_to_linear(&tex
->image
, mem_x
, mem_y
);
371 tex_get_slice_stride(const struct ilo_texture
*tex
, unsigned level
)
373 return ilo_image_get_slice_stride(&tex
->image
, level
);
377 tex_tile_x_swizzle(unsigned addr
)
380 * From the Ivy Bridge PRM, volume 1 part 2, page 24:
382 * "As shown in the tiling algorithm, the new address bit[6] should be:
384 * Address bit[6] <= TiledAddr bit[6] XOR
385 * TiledAddr bit[9] XOR
388 return addr
^ (((addr
>> 3) ^ (addr
>> 4)) & 0x40);
392 tex_tile_y_swizzle(unsigned addr
)
395 * From the Ivy Bridge PRM, volume 1 part 2, page 24:
397 * "As shown in the tiling algorithm, The new address bit[6] becomes:
399 * Address bit[6] <= TiledAddr bit[6] XOR
402 return addr
^ ((addr
>> 3) & 0x40);
406 tex_tile_x_offset(unsigned mem_x
, unsigned mem_y
,
407 unsigned tiles_per_row
, bool swizzle
)
410 * From the Sandy Bridge PRM, volume 1 part 2, page 21, we know that a
411 * X-major tile has 8 rows and 32 OWord columns (512 bytes). Tiles in the
412 * tiled region are numbered in row-major order, starting from zero. The
413 * tile number can thus be calculated as follows:
415 * tile = (mem_y / 8) * tiles_per_row + (mem_x / 512)
417 * OWords in that tile are also numbered in row-major order, starting from
418 * zero. The OWord number can thus be calculated as follows:
420 * oword = (mem_y % 8) * 32 + ((mem_x % 512) / 16)
422 * and the tiled offset is
424 * offset = tile * 4096 + oword * 16 + (mem_x % 16)
425 * = tile * 4096 + (mem_y % 8) * 512 + (mem_x % 512)
427 unsigned tile
, offset
;
429 tile
= (mem_y
>> 3) * tiles_per_row
+ (mem_x
>> 9);
430 offset
= tile
<< 12 | (mem_y
& 0x7) << 9 | (mem_x
& 0x1ff);
432 return (swizzle
) ? tex_tile_x_swizzle(offset
) : offset
;
436 tex_tile_y_offset(unsigned mem_x
, unsigned mem_y
,
437 unsigned tiles_per_row
, bool swizzle
)
440 * From the Sandy Bridge PRM, volume 1 part 2, page 22, we know that a
441 * Y-major tile has 32 rows and 8 OWord columns (128 bytes). Tiles in the
442 * tiled region are numbered in row-major order, starting from zero. The
443 * tile number can thus be calculated as follows:
445 * tile = (mem_y / 32) * tiles_per_row + (mem_x / 128)
447 * OWords in that tile are numbered in column-major order, starting from
448 * zero. The OWord number can thus be calculated as follows:
450 * oword = ((mem_x % 128) / 16) * 32 + (mem_y % 32)
452 * and the tiled offset is
454 * offset = tile * 4096 + oword * 16 + (mem_x % 16)
456 unsigned tile
, oword
, offset
;
458 tile
= (mem_y
>> 5) * tiles_per_row
+ (mem_x
>> 7);
459 oword
= (mem_x
& 0x70) << 1 | (mem_y
& 0x1f);
460 offset
= tile
<< 12 | oword
<< 4 | (mem_x
& 0xf);
462 return (swizzle
) ? tex_tile_y_swizzle(offset
) : offset
;
466 tex_tile_w_offset(unsigned mem_x
, unsigned mem_y
,
467 unsigned tiles_per_row
, bool swizzle
)
470 * From the Sandy Bridge PRM, volume 1 part 2, page 23, we know that a
471 * W-major tile has 8 8x8-block rows and 8 8x8-block columns. Tiles in the
472 * tiled region are numbered in row-major order, starting from zero. The
473 * tile number can thus be calculated as follows:
475 * tile = (mem_y / 64) * tiles_per_row + (mem_x / 64)
477 * 8x8-blocks in that tile are numbered in column-major order, starting
478 * from zero. The 8x8-block number can thus be calculated as follows:
480 * blk8 = ((mem_x % 64) / 8) * 8 + ((mem_y % 64) / 8)
482 * Each 8x8-block is divided into 4 4x4-blocks, in row-major order. Each
483 * 4x4-block is further divided into 4 2x2-blocks, also in row-major order.
486 * blk4 = (((mem_y % 64) / 4) & 1) * 2 + (((mem_x % 64) / 4) & 1)
487 * blk2 = (((mem_y % 64) / 2) & 1) * 2 + (((mem_x % 64) / 2) & 1)
488 * blk1 = (((mem_y % 64) ) & 1) * 2 + (((mem_x % 64) ) & 1)
490 * and the tiled offset is
492 * offset = tile * 4096 + blk8 * 64 + blk4 * 16 + blk2 * 4 + blk1
494 unsigned tile
, blk8
, blk4
, blk2
, blk1
, offset
;
496 tile
= (mem_y
>> 6) * tiles_per_row
+ (mem_x
>> 6);
497 blk8
= ((mem_x
>> 3) & 0x7) << 3 | ((mem_y
>> 3) & 0x7);
498 blk4
= ((mem_y
>> 2) & 0x1) << 1 | ((mem_x
>> 2) & 0x1);
499 blk2
= ((mem_y
>> 1) & 0x1) << 1 | ((mem_x
>> 1) & 0x1);
500 blk1
= ((mem_y
) & 0x1) << 1 | ((mem_x
) & 0x1);
501 offset
= tile
<< 12 | blk8
<< 6 | blk4
<< 4 | blk2
<< 2 | blk1
;
503 return (swizzle
) ? tex_tile_y_swizzle(offset
) : offset
;
507 tex_tile_none_offset(unsigned mem_x
, unsigned mem_y
,
508 unsigned tiles_per_row
, bool swizzle
)
510 return mem_y
* tiles_per_row
+ mem_x
;
513 typedef unsigned (*tex_tile_offset_func
)(unsigned mem_x
, unsigned mem_y
,
514 unsigned tiles_per_row
,
517 static tex_tile_offset_func
518 tex_tile_choose_offset_func(const struct ilo_texture
*tex
,
519 unsigned *tiles_per_row
)
521 switch (tex
->image
.tiling
) {
523 assert(!"unknown tiling");
525 case GEN6_TILING_NONE
:
526 *tiles_per_row
= tex
->image
.bo_stride
;
527 return tex_tile_none_offset
;
529 *tiles_per_row
= tex
->image
.bo_stride
/ 512;
530 return tex_tile_x_offset
;
532 *tiles_per_row
= tex
->image
.bo_stride
/ 128;
533 return tex_tile_y_offset
;
535 *tiles_per_row
= tex
->image
.bo_stride
/ 64;
536 return tex_tile_w_offset
;
541 tex_staging_sys_map_bo(struct ilo_texture
*tex
,
545 const struct ilo_screen
*is
= ilo_screen(tex
->base
.screen
);
546 const bool prefer_cpu
= (is
->dev
.has_llc
|| for_read_back
);
549 if (prefer_cpu
&& (tex
->image
.tiling
== GEN6_TILING_NONE
||
551 ptr
= intel_bo_map(tex
->vma
.bo
, !for_read_back
);
553 ptr
= intel_bo_map_gtt(tex
->vma
.bo
);
556 ptr
= (void *) ((char *) ptr
+ tex
->vma
.bo_offset
);
562 tex_staging_sys_unmap_bo(struct ilo_texture
*tex
)
564 intel_bo_unmap(tex
->vma
.bo
);
568 tex_staging_sys_zs_read(struct ilo_texture
*tex
,
569 const struct ilo_transfer
*xfer
)
571 const struct ilo_screen
*is
= ilo_screen(tex
->base
.screen
);
572 const bool swizzle
= is
->dev
.has_address_swizzling
;
573 const struct pipe_box
*box
= &xfer
->base
.box
;
575 tex_tile_offset_func tile_offset
;
576 unsigned tiles_per_row
;
579 src
= tex_staging_sys_map_bo(tex
, true, false);
583 tile_offset
= tex_tile_choose_offset_func(tex
, &tiles_per_row
);
585 assert(tex
->image
.block_width
== 1 && tex
->image
.block_height
== 1);
587 if (tex
->separate_s8
) {
588 struct ilo_texture
*s8_tex
= tex
->separate_s8
;
589 const uint8_t *s8_src
;
590 tex_tile_offset_func s8_tile_offset
;
591 unsigned s8_tiles_per_row
;
592 int dst_cpp
, dst_s8_pos
, src_cpp_used
;
594 s8_src
= tex_staging_sys_map_bo(s8_tex
, true, false);
596 tex_staging_sys_unmap_bo(tex
);
600 s8_tile_offset
= tex_tile_choose_offset_func(s8_tex
, &s8_tiles_per_row
);
602 if (tex
->base
.format
== PIPE_FORMAT_Z24_UNORM_S8_UINT
) {
603 assert(tex
->image_format
== PIPE_FORMAT_Z24X8_UNORM
);
610 assert(tex
->base
.format
== PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
);
611 assert(tex
->image_format
== PIPE_FORMAT_Z32_FLOAT
);
618 for (slice
= 0; slice
< box
->depth
; slice
++) {
619 unsigned mem_x
, mem_y
, s8_mem_x
, s8_mem_y
;
623 tex_get_box_origin(tex
, xfer
->base
.level
, slice
,
624 box
, &mem_x
, &mem_y
);
625 tex_get_box_origin(s8_tex
, xfer
->base
.level
, slice
,
626 box
, &s8_mem_x
, &s8_mem_y
);
628 dst
= xfer
->staging
.sys
+ xfer
->base
.layer_stride
* slice
;
630 for (i
= 0; i
< box
->height
; i
++) {
631 unsigned x
= mem_x
, s8_x
= s8_mem_x
;
634 for (j
= 0; j
< box
->width
; j
++) {
635 const unsigned offset
=
636 tile_offset(x
, mem_y
, tiles_per_row
, swizzle
);
637 const unsigned s8_offset
=
638 s8_tile_offset(s8_x
, s8_mem_y
, s8_tiles_per_row
, swizzle
);
640 memcpy(d
, src
+ offset
, src_cpp_used
);
641 d
[dst_s8_pos
] = s8_src
[s8_offset
];
644 x
+= tex
->image
.block_size
;
648 dst
+= xfer
->base
.stride
;
654 tex_staging_sys_unmap_bo(s8_tex
);
657 assert(tex
->image_format
== PIPE_FORMAT_S8_UINT
);
659 for (slice
= 0; slice
< box
->depth
; slice
++) {
660 unsigned mem_x
, mem_y
;
664 tex_get_box_origin(tex
, xfer
->base
.level
, slice
,
665 box
, &mem_x
, &mem_y
);
667 dst
= xfer
->staging
.sys
+ xfer
->base
.layer_stride
* slice
;
669 for (i
= 0; i
< box
->height
; i
++) {
673 for (j
= 0; j
< box
->width
; j
++) {
674 const unsigned offset
=
675 tile_offset(x
, mem_y
, tiles_per_row
, swizzle
);
683 dst
+= xfer
->base
.stride
;
689 tex_staging_sys_unmap_bo(tex
);
695 tex_staging_sys_zs_write(struct ilo_texture
*tex
,
696 const struct ilo_transfer
*xfer
)
698 const struct ilo_screen
*is
= ilo_screen(tex
->base
.screen
);
699 const bool swizzle
= is
->dev
.has_address_swizzling
;
700 const struct pipe_box
*box
= &xfer
->base
.box
;
702 tex_tile_offset_func tile_offset
;
703 unsigned tiles_per_row
;
706 dst
= tex_staging_sys_map_bo(tex
, false, false);
710 tile_offset
= tex_tile_choose_offset_func(tex
, &tiles_per_row
);
712 assert(tex
->image
.block_width
== 1 && tex
->image
.block_height
== 1);
714 if (tex
->separate_s8
) {
715 struct ilo_texture
*s8_tex
= tex
->separate_s8
;
717 tex_tile_offset_func s8_tile_offset
;
718 unsigned s8_tiles_per_row
;
719 int src_cpp
, src_s8_pos
, dst_cpp_used
;
721 s8_dst
= tex_staging_sys_map_bo(s8_tex
, false, false);
723 tex_staging_sys_unmap_bo(s8_tex
);
727 s8_tile_offset
= tex_tile_choose_offset_func(s8_tex
, &s8_tiles_per_row
);
729 if (tex
->base
.format
== PIPE_FORMAT_Z24_UNORM_S8_UINT
) {
730 assert(tex
->image_format
== PIPE_FORMAT_Z24X8_UNORM
);
737 assert(tex
->base
.format
== PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
);
738 assert(tex
->image_format
== PIPE_FORMAT_Z32_FLOAT
);
745 for (slice
= 0; slice
< box
->depth
; slice
++) {
746 unsigned mem_x
, mem_y
, s8_mem_x
, s8_mem_y
;
750 tex_get_box_origin(tex
, xfer
->base
.level
, slice
,
751 box
, &mem_x
, &mem_y
);
752 tex_get_box_origin(s8_tex
, xfer
->base
.level
, slice
,
753 box
, &s8_mem_x
, &s8_mem_y
);
755 src
= xfer
->staging
.sys
+ xfer
->base
.layer_stride
* slice
;
757 for (i
= 0; i
< box
->height
; i
++) {
758 unsigned x
= mem_x
, s8_x
= s8_mem_x
;
759 const uint8_t *s
= src
;
761 for (j
= 0; j
< box
->width
; j
++) {
762 const unsigned offset
=
763 tile_offset(x
, mem_y
, tiles_per_row
, swizzle
);
764 const unsigned s8_offset
=
765 s8_tile_offset(s8_x
, s8_mem_y
, s8_tiles_per_row
, swizzle
);
767 memcpy(dst
+ offset
, s
, dst_cpp_used
);
768 s8_dst
[s8_offset
] = s
[src_s8_pos
];
771 x
+= tex
->image
.block_size
;
775 src
+= xfer
->base
.stride
;
781 tex_staging_sys_unmap_bo(s8_tex
);
784 assert(tex
->image_format
== PIPE_FORMAT_S8_UINT
);
786 for (slice
= 0; slice
< box
->depth
; slice
++) {
787 unsigned mem_x
, mem_y
;
791 tex_get_box_origin(tex
, xfer
->base
.level
, slice
,
792 box
, &mem_x
, &mem_y
);
794 src
= xfer
->staging
.sys
+ xfer
->base
.layer_stride
* slice
;
796 for (i
= 0; i
< box
->height
; i
++) {
798 const uint8_t *s
= src
;
800 for (j
= 0; j
< box
->width
; j
++) {
801 const unsigned offset
=
802 tile_offset(x
, mem_y
, tiles_per_row
, swizzle
);
810 src
+= xfer
->base
.stride
;
816 tex_staging_sys_unmap_bo(tex
);
822 tex_staging_sys_convert_write(struct ilo_texture
*tex
,
823 const struct ilo_transfer
*xfer
)
825 const struct pipe_box
*box
= &xfer
->base
.box
;
826 unsigned dst_slice_stride
;
830 dst
= tex_staging_sys_map_bo(tex
, false, true);
834 dst
+= tex_get_box_offset(tex
, xfer
->base
.level
, box
);
836 /* slice stride is not always available */
838 dst_slice_stride
= tex_get_slice_stride(tex
, xfer
->base
.level
);
840 dst_slice_stride
= 0;
842 if (unlikely(tex
->image_format
== tex
->base
.format
)) {
843 util_copy_box(dst
, tex
->image_format
, tex
->image
.bo_stride
,
844 dst_slice_stride
, 0, 0, 0, box
->width
, box
->height
, box
->depth
,
845 xfer
->staging
.sys
, xfer
->base
.stride
, xfer
->base
.layer_stride
,
848 tex_staging_sys_unmap_bo(tex
);
853 switch (tex
->base
.format
) {
854 case PIPE_FORMAT_ETC1_RGB8
:
855 assert(tex
->image_format
== PIPE_FORMAT_R8G8B8X8_UNORM
);
857 for (slice
= 0; slice
< box
->depth
; slice
++) {
859 xfer
->staging
.sys
+ xfer
->base
.layer_stride
* slice
;
861 util_format_etc1_rgb8_unpack_rgba_8unorm(dst
,
862 tex
->image
.bo_stride
, src
, xfer
->base
.stride
,
863 box
->width
, box
->height
);
865 dst
+= dst_slice_stride
;
869 assert(!"unable to convert the staging data");
873 tex_staging_sys_unmap_bo(tex
);
879 tex_staging_sys_writeback(struct ilo_transfer
*xfer
)
881 struct ilo_texture
*tex
= ilo_texture(xfer
->base
.resource
);
884 if (!(xfer
->base
.usage
& PIPE_TRANSFER_WRITE
))
887 switch (xfer
->method
) {
888 case ILO_TRANSFER_MAP_SW_CONVERT
:
889 success
= tex_staging_sys_convert_write(tex
, xfer
);
891 case ILO_TRANSFER_MAP_SW_ZS
:
892 success
= tex_staging_sys_zs_write(tex
, xfer
);
895 assert(!"unknown mapping method");
901 ilo_err("failed to map resource for moving staging data\n");
905 tex_staging_sys_readback(struct ilo_transfer
*xfer
)
907 struct ilo_texture
*tex
= ilo_texture(xfer
->base
.resource
);
908 bool read_back
= false, success
;
910 /* see if we need to read the resource back */
911 if (xfer
->base
.usage
& PIPE_TRANSFER_READ
) {
914 else if (xfer
->base
.usage
& PIPE_TRANSFER_WRITE
) {
915 const unsigned discard_flags
=
916 (PIPE_TRANSFER_DISCARD_RANGE
| PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE
);
918 if (!(xfer
->base
.usage
& discard_flags
))
925 switch (xfer
->method
) {
926 case ILO_TRANSFER_MAP_SW_CONVERT
:
927 assert(!"no on-the-fly format conversion for mapping");
930 case ILO_TRANSFER_MAP_SW_ZS
:
931 success
= tex_staging_sys_zs_read(tex
, xfer
);
934 assert(!"unknown mapping method");
943 tex_map(struct ilo_transfer
*xfer
)
947 switch (xfer
->method
) {
948 case ILO_TRANSFER_MAP_CPU
:
949 case ILO_TRANSFER_MAP_GTT
:
950 case ILO_TRANSFER_MAP_GTT_ASYNC
:
951 ptr
= xfer_map(xfer
);
953 const struct ilo_texture
*tex
= ilo_texture(xfer
->base
.resource
);
955 ptr
+= tex_get_box_offset(tex
, xfer
->base
.level
, &xfer
->base
.box
);
957 /* stride is for a block row, not a texel row */
958 xfer
->base
.stride
= tex
->image
.bo_stride
;
959 /* note that slice stride is not always available */
960 xfer
->base
.layer_stride
= (xfer
->base
.box
.depth
> 1) ?
961 tex_get_slice_stride(tex
, xfer
->base
.level
) : 0;
964 case ILO_TRANSFER_MAP_STAGING
:
965 ptr
= xfer_map(xfer
);
967 const struct ilo_texture
*staging
= ilo_texture(xfer
->staging
.res
);
968 xfer
->base
.stride
= staging
->image
.bo_stride
;
969 xfer
->base
.layer_stride
= tex_get_slice_stride(staging
, 0);
972 case ILO_TRANSFER_MAP_SW_CONVERT
:
973 case ILO_TRANSFER_MAP_SW_ZS
:
974 if (xfer_alloc_staging_sys(xfer
) && tex_staging_sys_readback(xfer
))
975 ptr
= xfer_map(xfer
);
980 assert(!"unknown mapping method");
989 buf_map(struct ilo_transfer
*xfer
)
993 ptr
= xfer_map(xfer
);
997 if (xfer
->method
!= ILO_TRANSFER_MAP_STAGING
)
998 ptr
+= xfer
->base
.box
.x
;
1000 xfer
->base
.stride
= 0;
1001 xfer
->base
.layer_stride
= 0;
1003 assert(xfer
->base
.level
== 0);
1004 assert(xfer
->base
.box
.y
== 0);
1005 assert(xfer
->base
.box
.z
== 0);
1006 assert(xfer
->base
.box
.height
== 1);
1007 assert(xfer
->base
.box
.depth
== 1);
1013 copy_staging_resource(struct ilo_context
*ilo
,
1014 struct ilo_transfer
*xfer
,
1015 const struct pipe_box
*box
)
1017 const unsigned pad_x
= (xfer
->staging
.res
->target
== PIPE_BUFFER
) ?
1018 xfer
->base
.box
.x
% ILO_TRANSFER_MAP_BUFFER_ALIGNMENT
: 0;
1019 struct pipe_box modified_box
;
1021 assert(xfer
->method
== ILO_TRANSFER_MAP_STAGING
&& xfer
->staging
.res
);
1024 u_box_3d(pad_x
, 0, 0, xfer
->base
.box
.width
, xfer
->base
.box
.height
,
1025 xfer
->base
.box
.depth
, &modified_box
);
1026 box
= &modified_box
;
1029 modified_box
= *box
;
1030 modified_box
.x
+= pad_x
;
1031 box
= &modified_box
;
1034 ilo_blitter_blt_copy_resource(ilo
->blitter
,
1035 xfer
->base
.resource
, xfer
->base
.level
,
1036 xfer
->base
.box
.x
, xfer
->base
.box
.y
, xfer
->base
.box
.z
,
1037 xfer
->staging
.res
, 0, box
);
1041 is_bo_busy(struct ilo_context
*ilo
, struct intel_bo
*bo
, bool *need_submit
)
1043 const bool referenced
= ilo_builder_has_reloc(&ilo
->cp
->builder
, bo
);
1046 *need_submit
= referenced
;
1051 return intel_bo_is_busy(bo
);
1055 * Choose the best mapping method, depending on the transfer usage and whether
1059 choose_transfer_method(struct ilo_context
*ilo
, struct ilo_transfer
*xfer
)
1061 struct pipe_resource
*res
= xfer
->base
.resource
;
1064 if (!resource_get_transfer_method(res
, &xfer
->base
, &xfer
->method
))
1067 /* see if we can avoid blocking */
1068 if (is_bo_busy(ilo
, ilo_resource_get_vma(res
)->bo
, &need_submit
)) {
1069 bool resource_renamed
;
1071 if (!xfer_unblock(xfer
, &resource_renamed
)) {
1072 if (xfer
->base
.usage
& PIPE_TRANSFER_DONTBLOCK
)
1075 /* submit to make bo really busy and map() correctly blocks */
1077 ilo_cp_submit(ilo
->cp
, "syncing for transfers");
1080 if (resource_renamed
)
1081 ilo_state_vector_resource_renamed(&ilo
->state_vector
, res
);
1088 buf_pwrite(struct ilo_context
*ilo
, struct pipe_resource
*res
,
1089 unsigned usage
, int offset
, int size
, const void *data
)
1091 struct ilo_buffer_resource
*buf
= ilo_buffer_resource(res
);
1094 /* see if we can avoid blocking */
1095 if (is_bo_busy(ilo
, buf
->vma
.bo
, &need_submit
)) {
1096 bool unblocked
= false;
1098 if ((usage
& PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE
) &&
1099 ilo_resource_rename_bo(res
)) {
1100 ilo_state_vector_resource_renamed(&ilo
->state_vector
, res
);
1104 struct pipe_resource templ
, *staging
;
1107 * allocate a staging buffer to hold the data and pipelined copy it
1111 templ
.width0
= size
;
1112 templ
.usage
= PIPE_USAGE_STAGING
;
1114 staging
= ilo
->base
.screen
->resource_create(ilo
->base
.screen
, &templ
);
1116 const struct ilo_vma
*staging_vma
= ilo_resource_get_vma(staging
);
1117 struct pipe_box staging_box
;
1119 /* offset by staging_vma->bo_offset for pwrite */
1120 intel_bo_pwrite(staging_vma
->bo
, staging_vma
->bo_offset
,
1123 u_box_1d(0, size
, &staging_box
);
1124 ilo_blitter_blt_copy_resource(ilo
->blitter
,
1125 res
, 0, offset
, 0, 0,
1126 staging
, 0, &staging_box
);
1128 pipe_resource_reference(&staging
, NULL
);
1134 /* submit to make bo really busy and pwrite() correctly blocks */
1135 if (!unblocked
&& need_submit
)
1136 ilo_cp_submit(ilo
->cp
, "syncing for pwrites");
1139 /* offset by buf->vma.bo_offset for pwrite */
1140 intel_bo_pwrite(buf
->vma
.bo
, buf
->vma
.bo_offset
+ offset
, size
, data
);
1144 ilo_transfer_flush_region(struct pipe_context
*pipe
,
1145 struct pipe_transfer
*transfer
,
1146 const struct pipe_box
*box
)
1148 struct ilo_context
*ilo
= ilo_context(pipe
);
1149 struct ilo_transfer
*xfer
= ilo_transfer(transfer
);
1152 * The staging resource is mapped persistently and coherently. We can copy
1153 * without unmapping.
1155 if (xfer
->method
== ILO_TRANSFER_MAP_STAGING
&&
1156 (xfer
->base
.usage
& PIPE_TRANSFER_FLUSH_EXPLICIT
))
1157 copy_staging_resource(ilo
, xfer
, box
);
1161 ilo_transfer_unmap(struct pipe_context
*pipe
,
1162 struct pipe_transfer
*transfer
)
1164 struct ilo_context
*ilo
= ilo_context(pipe
);
1165 struct ilo_transfer
*xfer
= ilo_transfer(transfer
);
1169 switch (xfer
->method
) {
1170 case ILO_TRANSFER_MAP_STAGING
:
1171 if (!(xfer
->base
.usage
& PIPE_TRANSFER_FLUSH_EXPLICIT
))
1172 copy_staging_resource(ilo
, xfer
, NULL
);
1173 pipe_resource_reference(&xfer
->staging
.res
, NULL
);
1175 case ILO_TRANSFER_MAP_SW_CONVERT
:
1176 case ILO_TRANSFER_MAP_SW_ZS
:
1177 tex_staging_sys_writeback(xfer
);
1178 align_free(xfer
->staging
.sys
);
1184 pipe_resource_reference(&xfer
->base
.resource
, NULL
);
1186 slab_free_st(&ilo
->transfer_mempool
, xfer
);
1190 ilo_transfer_map(struct pipe_context
*pipe
,
1191 struct pipe_resource
*res
,
1194 const struct pipe_box
*box
,
1195 struct pipe_transfer
**transfer
)
1197 struct ilo_context
*ilo
= ilo_context(pipe
);
1198 struct ilo_transfer
*xfer
;
1201 /* note that xfer is not zero'd */
1202 xfer
= slab_alloc_st(&ilo
->transfer_mempool
);
1208 xfer
->base
.resource
= NULL
;
1209 pipe_resource_reference(&xfer
->base
.resource
, res
);
1210 xfer
->base
.level
= level
;
1211 xfer
->base
.usage
= usage
;
1212 xfer
->base
.box
= *box
;
1214 ilo_blit_resolve_transfer(ilo
, &xfer
->base
);
1216 if (choose_transfer_method(ilo
, xfer
)) {
1217 if (res
->target
== PIPE_BUFFER
)
1218 ptr
= buf_map(xfer
);
1220 ptr
= tex_map(xfer
);
1227 pipe_resource_reference(&xfer
->base
.resource
, NULL
);
1228 slab_free_st(&ilo
->transfer_mempool
, xfer
);
1233 *transfer
= &xfer
->base
;
1238 static void ilo_buffer_subdata(struct pipe_context
*pipe
,
1239 struct pipe_resource
*resource
,
1240 unsigned usage
, unsigned offset
,
1241 unsigned size
, const void *data
)
1243 if (usage
& PIPE_TRANSFER_UNSYNCHRONIZED
)
1244 u_default_buffer_subdata(pipe
, resource
, usage
, offset
, size
, data
);
1246 buf_pwrite(ilo_context(pipe
), resource
, usage
, offset
, size
, data
);
1250 * Initialize transfer-related functions.
1253 ilo_init_transfer_functions(struct ilo_context
*ilo
)
1255 ilo
->base
.transfer_map
= ilo_transfer_map
;
1256 ilo
->base
.transfer_flush_region
= ilo_transfer_flush_region
;
1257 ilo
->base
.transfer_unmap
= ilo_transfer_unmap
;
1258 ilo
->base
.buffer_subdata
= ilo_buffer_subdata
;
1259 ilo
->base
.texture_subdata
= u_default_texture_subdata
;