2 * Mesa 3-D graphics library
4 * Copyright (C) 2012-2013 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #include "util/u_surface.h"
29 #include "util/u_transfer.h"
30 #include "util/u_format_etc.h"
33 #include "ilo_blitter.h"
35 #include "ilo_context.h"
36 #include "ilo_resource.h"
37 #include "ilo_state.h"
38 #include "ilo_transfer.h"
41 * For buffers that are not busy, we want to map/unmap them directly. For
42 * those that are busy, we have to worry about synchronization. We could wait
43 * for GPU to finish, but there are cases where we could avoid waiting.
45 * - When PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE is set, the contents of the
46 * buffer can be discarded. We can replace the backing bo by a new one of
47 * the same size (renaming).
48 * - When PIPE_TRANSFER_DISCARD_RANGE is set, the contents of the mapped
49 * range can be discarded. We can allocate and map a staging bo on
50 * mapping, and (pipelined-)copy it over to the real bo on unmapping.
51 * - When PIPE_TRANSFER_FLUSH_EXPLICIT is set, there is no reading and only
52 * flushed regions need to be written. We can still allocate and map a
53 * staging bo, but should copy only the flushed regions over.
55 * However, there are other flags to consider.
57 * - When PIPE_TRANSFER_UNSYNCHRONIZED is set, we do not need to worry about
58 * synchronization at all on mapping.
59 * - When PIPE_TRANSFER_MAP_DIRECTLY is set, no staging area is allowed.
60 * - When PIPE_TRANSFER_DONTBLOCK is set, we should fail if we have to block.
61 * - When PIPE_TRANSFER_PERSISTENT is set, GPU may access the buffer while it
62 * is mapped. Synchronization is done by defining memory barriers,
63 * explicitly via memory_barrier() or implicitly via
64 * transfer_flush_region(), as well as GPU fences.
65 * - When PIPE_TRANSFER_COHERENT is set, updates by either CPU or GPU should
66 * be made visible to the other side immediately. Since the kernel flushes
67 * GPU caches at the end of each batch buffer, CPU always sees GPU updates.
68 * We could use a coherent mapping to make all persistent mappings
71 * These also apply to textures, except that we may additionally need to do
72 * format conversion or tiling/untiling.
76 * Return a transfer method suitable for the usage. The returned method will
77 * correctly block when the resource is busy.
80 resource_get_transfer_method(struct pipe_resource
*res
,
81 const struct pipe_transfer
*transfer
,
82 enum ilo_transfer_map_method
*method
)
84 const struct ilo_screen
*is
= ilo_screen(res
->screen
);
85 const unsigned usage
= transfer
->usage
;
86 enum ilo_transfer_map_method m
;
89 if (res
->target
== PIPE_BUFFER
) {
92 struct ilo_texture
*tex
= ilo_texture(res
);
93 bool need_convert
= false;
95 /* we may need to convert on the fly */
96 if (tex
->image
.tiling
== GEN8_TILING_W
|| tex
->separate_s8
) {
97 /* on GEN6, separate stencil is enabled only when HiZ is */
98 if (ilo_dev_gen(&is
->dev
) >= ILO_GEN(7) ||
99 ilo_texture_can_enable_hiz(tex
, transfer
->level
,
100 transfer
->box
.z
, transfer
->box
.depth
)) {
101 m
= ILO_TRANSFER_MAP_SW_ZS
;
104 } else if (tex
->image
.format
!= tex
->base
.format
) {
105 m
= ILO_TRANSFER_MAP_SW_CONVERT
;
110 if (usage
& (PIPE_TRANSFER_MAP_DIRECTLY
| PIPE_TRANSFER_PERSISTENT
))
117 tiled
= (tex
->image
.tiling
!= GEN6_TILING_NONE
);
121 m
= ILO_TRANSFER_MAP_GTT
; /* to have a linear view */
122 else if (is
->dev
.has_llc
)
123 m
= ILO_TRANSFER_MAP_CPU
; /* fast and mostly coherent */
124 else if (usage
& PIPE_TRANSFER_PERSISTENT
)
125 m
= ILO_TRANSFER_MAP_GTT
; /* for coherency */
126 else if (usage
& PIPE_TRANSFER_READ
)
127 m
= ILO_TRANSFER_MAP_CPU
; /* gtt read is too slow */
129 m
= ILO_TRANSFER_MAP_GTT
;
137 * Return true if usage allows the use of staging bo to avoid blocking.
140 usage_allows_staging_bo(unsigned usage
)
142 /* do we know how to write the data back to the resource? */
143 const unsigned can_writeback
= (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE
|
144 PIPE_TRANSFER_DISCARD_RANGE
|
145 PIPE_TRANSFER_FLUSH_EXPLICIT
);
146 const unsigned reasons_against
= (PIPE_TRANSFER_READ
|
147 PIPE_TRANSFER_MAP_DIRECTLY
|
148 PIPE_TRANSFER_PERSISTENT
);
150 return (usage
& can_writeback
) && !(usage
& reasons_against
);
154 * Allocate the staging resource. It is always linear and its size matches
155 * the transfer box, with proper paddings.
158 xfer_alloc_staging_res(struct ilo_transfer
*xfer
)
160 const struct pipe_resource
*res
= xfer
->base
.resource
;
161 const struct pipe_box
*box
= &xfer
->base
.box
;
162 struct pipe_resource templ
;
164 memset(&templ
, 0, sizeof(templ
));
166 templ
.format
= res
->format
;
168 if (res
->target
== PIPE_BUFFER
) {
169 templ
.target
= PIPE_BUFFER
;
171 (box
->x
% ILO_TRANSFER_MAP_BUFFER_ALIGNMENT
) + box
->width
;
174 /* use 2D array for any texture target */
175 templ
.target
= PIPE_TEXTURE_2D_ARRAY
;
176 templ
.width0
= box
->width
;
179 templ
.height0
= box
->height
;
181 templ
.array_size
= box
->depth
;
182 templ
.nr_samples
= 1;
183 templ
.usage
= PIPE_USAGE_STAGING
;
184 templ
.bind
= PIPE_BIND_TRANSFER_WRITE
;
186 if (xfer
->base
.usage
& PIPE_TRANSFER_FLUSH_EXPLICIT
) {
187 templ
.flags
= PIPE_RESOURCE_FLAG_MAP_PERSISTENT
|
188 PIPE_RESOURCE_FLAG_MAP_COHERENT
;
191 xfer
->staging
.res
= res
->screen
->resource_create(res
->screen
, &templ
);
193 if (xfer
->staging
.res
&& xfer
->staging
.res
->target
!= PIPE_BUFFER
) {
194 assert(ilo_texture(xfer
->staging
.res
)->image
.tiling
==
198 return (xfer
->staging
.res
!= NULL
);
202 * Use an alternative transfer method or rename the resource to unblock an
203 * otherwise blocking transfer.
206 xfer_unblock(struct ilo_transfer
*xfer
, bool *resource_renamed
)
208 struct pipe_resource
*res
= xfer
->base
.resource
;
209 bool unblocked
= false, renamed
= false;
211 switch (xfer
->method
) {
212 case ILO_TRANSFER_MAP_CPU
:
213 case ILO_TRANSFER_MAP_GTT
:
214 if (xfer
->base
.usage
& PIPE_TRANSFER_UNSYNCHRONIZED
) {
215 xfer
->method
= ILO_TRANSFER_MAP_GTT_ASYNC
;
218 else if ((xfer
->base
.usage
& PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE
) &&
219 ilo_resource_rename_bo(res
)) {
223 else if (usage_allows_staging_bo(xfer
->base
.usage
) &&
224 xfer_alloc_staging_res(xfer
)) {
225 xfer
->method
= ILO_TRANSFER_MAP_STAGING
;
229 case ILO_TRANSFER_MAP_GTT_ASYNC
:
230 case ILO_TRANSFER_MAP_STAGING
:
237 *resource_renamed
= renamed
;
243 * Allocate the staging system buffer based on the resource format and the
247 xfer_alloc_staging_sys(struct ilo_transfer
*xfer
)
249 const enum pipe_format format
= xfer
->base
.resource
->format
;
250 const struct pipe_box
*box
= &xfer
->base
.box
;
251 const unsigned alignment
= 64;
253 /* need to tell the world the layout */
255 align(util_format_get_stride(format
, box
->width
), alignment
);
256 xfer
->base
.layer_stride
=
257 util_format_get_2d_size(format
, xfer
->base
.stride
, box
->height
);
260 align_malloc(xfer
->base
.layer_stride
* box
->depth
, alignment
);
262 return (xfer
->staging
.sys
!= NULL
);
266 * Map according to the method. The staging system buffer should have been
267 * allocated if the method requires it.
270 xfer_map(struct ilo_transfer
*xfer
)
274 switch (xfer
->method
) {
275 case ILO_TRANSFER_MAP_CPU
:
276 ptr
= intel_bo_map(ilo_resource_get_bo(xfer
->base
.resource
),
277 xfer
->base
.usage
& PIPE_TRANSFER_WRITE
);
279 case ILO_TRANSFER_MAP_GTT
:
280 ptr
= intel_bo_map_gtt(ilo_resource_get_bo(xfer
->base
.resource
));
282 case ILO_TRANSFER_MAP_GTT_ASYNC
:
283 ptr
= intel_bo_map_gtt_async(ilo_resource_get_bo(xfer
->base
.resource
));
285 case ILO_TRANSFER_MAP_STAGING
:
287 const struct ilo_screen
*is
= ilo_screen(xfer
->staging
.res
->screen
);
288 struct intel_bo
*bo
= ilo_resource_get_bo(xfer
->staging
.res
);
291 * We want a writable, optionally persistent and coherent, mapping
292 * for a linear bo. We can call resource_get_transfer_method(), but
293 * this turns out to be fairly simple.
296 ptr
= intel_bo_map(bo
, true);
298 ptr
= intel_bo_map_gtt(bo
);
300 if (ptr
&& xfer
->staging
.res
->target
== PIPE_BUFFER
)
301 ptr
+= (xfer
->base
.box
.x
% ILO_TRANSFER_MAP_BUFFER_ALIGNMENT
);
305 case ILO_TRANSFER_MAP_SW_CONVERT
:
306 case ILO_TRANSFER_MAP_SW_ZS
:
307 ptr
= xfer
->staging
.sys
;
310 assert(!"unknown mapping method");
322 xfer_unmap(struct ilo_transfer
*xfer
)
324 switch (xfer
->method
) {
325 case ILO_TRANSFER_MAP_CPU
:
326 case ILO_TRANSFER_MAP_GTT
:
327 case ILO_TRANSFER_MAP_GTT_ASYNC
:
328 intel_bo_unmap(ilo_resource_get_bo(xfer
->base
.resource
));
330 case ILO_TRANSFER_MAP_STAGING
:
331 intel_bo_unmap(ilo_resource_get_bo(xfer
->staging
.res
));
339 tex_get_box_origin(const struct ilo_texture
*tex
,
340 unsigned level
, unsigned slice
,
341 const struct pipe_box
*box
,
342 unsigned *mem_x
, unsigned *mem_y
)
346 ilo_image_get_slice_pos(&tex
->image
, level
, box
->z
+ slice
, &x
, &y
);
350 ilo_image_pos_to_mem(&tex
->image
, x
, y
, mem_x
, mem_y
);
354 tex_get_box_offset(const struct ilo_texture
*tex
, unsigned level
,
355 const struct pipe_box
*box
)
357 unsigned mem_x
, mem_y
;
359 tex_get_box_origin(tex
, level
, 0, box
, &mem_x
, &mem_y
);
361 return ilo_image_mem_to_linear(&tex
->image
, mem_x
, mem_y
);
365 tex_get_slice_stride(const struct ilo_texture
*tex
, unsigned level
)
367 return ilo_image_get_slice_stride(&tex
->image
, level
);
371 tex_tile_x_swizzle(unsigned addr
)
374 * From the Ivy Bridge PRM, volume 1 part 2, page 24:
376 * "As shown in the tiling algorithm, the new address bit[6] should be:
378 * Address bit[6] <= TiledAddr bit[6] XOR
379 * TiledAddr bit[9] XOR
382 return addr
^ (((addr
>> 3) ^ (addr
>> 4)) & 0x40);
386 tex_tile_y_swizzle(unsigned addr
)
389 * From the Ivy Bridge PRM, volume 1 part 2, page 24:
391 * "As shown in the tiling algorithm, The new address bit[6] becomes:
393 * Address bit[6] <= TiledAddr bit[6] XOR
396 return addr
^ ((addr
>> 3) & 0x40);
400 tex_tile_x_offset(unsigned mem_x
, unsigned mem_y
,
401 unsigned tiles_per_row
, bool swizzle
)
404 * From the Sandy Bridge PRM, volume 1 part 2, page 21, we know that a
405 * X-major tile has 8 rows and 32 OWord columns (512 bytes). Tiles in the
406 * tiled region are numbered in row-major order, starting from zero. The
407 * tile number can thus be calculated as follows:
409 * tile = (mem_y / 8) * tiles_per_row + (mem_x / 512)
411 * OWords in that tile are also numbered in row-major order, starting from
412 * zero. The OWord number can thus be calculated as follows:
414 * oword = (mem_y % 8) * 32 + ((mem_x % 512) / 16)
416 * and the tiled offset is
418 * offset = tile * 4096 + oword * 16 + (mem_x % 16)
419 * = tile * 4096 + (mem_y % 8) * 512 + (mem_x % 512)
421 unsigned tile
, offset
;
423 tile
= (mem_y
>> 3) * tiles_per_row
+ (mem_x
>> 9);
424 offset
= tile
<< 12 | (mem_y
& 0x7) << 9 | (mem_x
& 0x1ff);
426 return (swizzle
) ? tex_tile_x_swizzle(offset
) : offset
;
430 tex_tile_y_offset(unsigned mem_x
, unsigned mem_y
,
431 unsigned tiles_per_row
, bool swizzle
)
434 * From the Sandy Bridge PRM, volume 1 part 2, page 22, we know that a
435 * Y-major tile has 32 rows and 8 OWord columns (128 bytes). Tiles in the
436 * tiled region are numbered in row-major order, starting from zero. The
437 * tile number can thus be calculated as follows:
439 * tile = (mem_y / 32) * tiles_per_row + (mem_x / 128)
441 * OWords in that tile are numbered in column-major order, starting from
442 * zero. The OWord number can thus be calculated as follows:
444 * oword = ((mem_x % 128) / 16) * 32 + (mem_y % 32)
446 * and the tiled offset is
448 * offset = tile * 4096 + oword * 16 + (mem_x % 16)
450 unsigned tile
, oword
, offset
;
452 tile
= (mem_y
>> 5) * tiles_per_row
+ (mem_x
>> 7);
453 oword
= (mem_x
& 0x70) << 1 | (mem_y
& 0x1f);
454 offset
= tile
<< 12 | oword
<< 4 | (mem_x
& 0xf);
456 return (swizzle
) ? tex_tile_y_swizzle(offset
) : offset
;
460 tex_tile_w_offset(unsigned mem_x
, unsigned mem_y
,
461 unsigned tiles_per_row
, bool swizzle
)
464 * From the Sandy Bridge PRM, volume 1 part 2, page 23, we know that a
465 * W-major tile has 8 8x8-block rows and 8 8x8-block columns. Tiles in the
466 * tiled region are numbered in row-major order, starting from zero. The
467 * tile number can thus be calculated as follows:
469 * tile = (mem_y / 64) * tiles_per_row + (mem_x / 64)
471 * 8x8-blocks in that tile are numbered in column-major order, starting
472 * from zero. The 8x8-block number can thus be calculated as follows:
474 * blk8 = ((mem_x % 64) / 8) * 8 + ((mem_y % 64) / 8)
476 * Each 8x8-block is divided into 4 4x4-blocks, in row-major order. Each
477 * 4x4-block is further divided into 4 2x2-blocks, also in row-major order.
480 * blk4 = (((mem_y % 64) / 4) & 1) * 2 + (((mem_x % 64) / 4) & 1)
481 * blk2 = (((mem_y % 64) / 2) & 1) * 2 + (((mem_x % 64) / 2) & 1)
482 * blk1 = (((mem_y % 64) ) & 1) * 2 + (((mem_x % 64) ) & 1)
484 * and the tiled offset is
486 * offset = tile * 4096 + blk8 * 64 + blk4 * 16 + blk2 * 4 + blk1
488 unsigned tile
, blk8
, blk4
, blk2
, blk1
, offset
;
490 tile
= (mem_y
>> 6) * tiles_per_row
+ (mem_x
>> 6);
491 blk8
= ((mem_x
>> 3) & 0x7) << 3 | ((mem_y
>> 3) & 0x7);
492 blk4
= ((mem_y
>> 2) & 0x1) << 1 | ((mem_x
>> 2) & 0x1);
493 blk2
= ((mem_y
>> 1) & 0x1) << 1 | ((mem_x
>> 1) & 0x1);
494 blk1
= ((mem_y
) & 0x1) << 1 | ((mem_x
) & 0x1);
495 offset
= tile
<< 12 | blk8
<< 6 | blk4
<< 4 | blk2
<< 2 | blk1
;
497 return (swizzle
) ? tex_tile_y_swizzle(offset
) : offset
;
501 tex_tile_none_offset(unsigned mem_x
, unsigned mem_y
,
502 unsigned tiles_per_row
, bool swizzle
)
504 return mem_y
* tiles_per_row
+ mem_x
;
507 typedef unsigned (*tex_tile_offset_func
)(unsigned mem_x
, unsigned mem_y
,
508 unsigned tiles_per_row
,
511 static tex_tile_offset_func
512 tex_tile_choose_offset_func(const struct ilo_texture
*tex
,
513 unsigned *tiles_per_row
)
515 switch (tex
->image
.tiling
) {
517 assert(!"unknown tiling");
519 case GEN6_TILING_NONE
:
520 *tiles_per_row
= tex
->image
.bo_stride
;
521 return tex_tile_none_offset
;
523 *tiles_per_row
= tex
->image
.bo_stride
/ 512;
524 return tex_tile_x_offset
;
526 *tiles_per_row
= tex
->image
.bo_stride
/ 128;
527 return tex_tile_y_offset
;
529 *tiles_per_row
= tex
->image
.bo_stride
/ 64;
530 return tex_tile_w_offset
;
535 tex_staging_sys_map_bo(struct ilo_texture
*tex
,
539 const struct ilo_screen
*is
= ilo_screen(tex
->base
.screen
);
540 const bool prefer_cpu
= (is
->dev
.has_llc
|| for_read_back
);
543 if (prefer_cpu
&& (tex
->image
.tiling
== GEN6_TILING_NONE
||
545 ptr
= intel_bo_map(tex
->image
.bo
, !for_read_back
);
547 ptr
= intel_bo_map_gtt(tex
->image
.bo
);
553 tex_staging_sys_unmap_bo(struct ilo_texture
*tex
)
555 intel_bo_unmap(tex
->image
.bo
);
559 tex_staging_sys_zs_read(struct ilo_texture
*tex
,
560 const struct ilo_transfer
*xfer
)
562 const struct ilo_screen
*is
= ilo_screen(tex
->base
.screen
);
563 const bool swizzle
= is
->dev
.has_address_swizzling
;
564 const struct pipe_box
*box
= &xfer
->base
.box
;
566 tex_tile_offset_func tile_offset
;
567 unsigned tiles_per_row
;
570 src
= tex_staging_sys_map_bo(tex
, true, false);
574 tile_offset
= tex_tile_choose_offset_func(tex
, &tiles_per_row
);
576 assert(tex
->image
.block_width
== 1 && tex
->image
.block_height
== 1);
578 if (tex
->separate_s8
) {
579 struct ilo_texture
*s8_tex
= tex
->separate_s8
;
580 const uint8_t *s8_src
;
581 tex_tile_offset_func s8_tile_offset
;
582 unsigned s8_tiles_per_row
;
583 int dst_cpp
, dst_s8_pos
, src_cpp_used
;
585 s8_src
= tex_staging_sys_map_bo(s8_tex
, true, false);
587 tex_staging_sys_unmap_bo(tex
);
591 s8_tile_offset
= tex_tile_choose_offset_func(s8_tex
, &s8_tiles_per_row
);
593 if (tex
->base
.format
== PIPE_FORMAT_Z24_UNORM_S8_UINT
) {
594 assert(tex
->image
.format
== PIPE_FORMAT_Z24X8_UNORM
);
601 assert(tex
->base
.format
== PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
);
602 assert(tex
->image
.format
== PIPE_FORMAT_Z32_FLOAT
);
609 for (slice
= 0; slice
< box
->depth
; slice
++) {
610 unsigned mem_x
, mem_y
, s8_mem_x
, s8_mem_y
;
614 tex_get_box_origin(tex
, xfer
->base
.level
, slice
,
615 box
, &mem_x
, &mem_y
);
616 tex_get_box_origin(s8_tex
, xfer
->base
.level
, slice
,
617 box
, &s8_mem_x
, &s8_mem_y
);
619 dst
= xfer
->staging
.sys
+ xfer
->base
.layer_stride
* slice
;
621 for (i
= 0; i
< box
->height
; i
++) {
622 unsigned x
= mem_x
, s8_x
= s8_mem_x
;
625 for (j
= 0; j
< box
->width
; j
++) {
626 const unsigned offset
=
627 tile_offset(x
, mem_y
, tiles_per_row
, swizzle
);
628 const unsigned s8_offset
=
629 s8_tile_offset(s8_x
, s8_mem_y
, s8_tiles_per_row
, swizzle
);
631 memcpy(d
, src
+ offset
, src_cpp_used
);
632 d
[dst_s8_pos
] = s8_src
[s8_offset
];
635 x
+= tex
->image
.block_size
;
639 dst
+= xfer
->base
.stride
;
645 tex_staging_sys_unmap_bo(s8_tex
);
648 assert(tex
->image
.format
== PIPE_FORMAT_S8_UINT
);
650 for (slice
= 0; slice
< box
->depth
; slice
++) {
651 unsigned mem_x
, mem_y
;
655 tex_get_box_origin(tex
, xfer
->base
.level
, slice
,
656 box
, &mem_x
, &mem_y
);
658 dst
= xfer
->staging
.sys
+ xfer
->base
.layer_stride
* slice
;
660 for (i
= 0; i
< box
->height
; i
++) {
664 for (j
= 0; j
< box
->width
; j
++) {
665 const unsigned offset
=
666 tile_offset(x
, mem_y
, tiles_per_row
, swizzle
);
674 dst
+= xfer
->base
.stride
;
680 tex_staging_sys_unmap_bo(tex
);
686 tex_staging_sys_zs_write(struct ilo_texture
*tex
,
687 const struct ilo_transfer
*xfer
)
689 const struct ilo_screen
*is
= ilo_screen(tex
->base
.screen
);
690 const bool swizzle
= is
->dev
.has_address_swizzling
;
691 const struct pipe_box
*box
= &xfer
->base
.box
;
693 tex_tile_offset_func tile_offset
;
694 unsigned tiles_per_row
;
697 dst
= tex_staging_sys_map_bo(tex
, false, false);
701 tile_offset
= tex_tile_choose_offset_func(tex
, &tiles_per_row
);
703 assert(tex
->image
.block_width
== 1 && tex
->image
.block_height
== 1);
705 if (tex
->separate_s8
) {
706 struct ilo_texture
*s8_tex
= tex
->separate_s8
;
708 tex_tile_offset_func s8_tile_offset
;
709 unsigned s8_tiles_per_row
;
710 int src_cpp
, src_s8_pos
, dst_cpp_used
;
712 s8_dst
= tex_staging_sys_map_bo(s8_tex
, false, false);
714 tex_staging_sys_unmap_bo(s8_tex
);
718 s8_tile_offset
= tex_tile_choose_offset_func(s8_tex
, &s8_tiles_per_row
);
720 if (tex
->base
.format
== PIPE_FORMAT_Z24_UNORM_S8_UINT
) {
721 assert(tex
->image
.format
== PIPE_FORMAT_Z24X8_UNORM
);
728 assert(tex
->base
.format
== PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
);
729 assert(tex
->image
.format
== PIPE_FORMAT_Z32_FLOAT
);
736 for (slice
= 0; slice
< box
->depth
; slice
++) {
737 unsigned mem_x
, mem_y
, s8_mem_x
, s8_mem_y
;
741 tex_get_box_origin(tex
, xfer
->base
.level
, slice
,
742 box
, &mem_x
, &mem_y
);
743 tex_get_box_origin(s8_tex
, xfer
->base
.level
, slice
,
744 box
, &s8_mem_x
, &s8_mem_y
);
746 src
= xfer
->staging
.sys
+ xfer
->base
.layer_stride
* slice
;
748 for (i
= 0; i
< box
->height
; i
++) {
749 unsigned x
= mem_x
, s8_x
= s8_mem_x
;
750 const uint8_t *s
= src
;
752 for (j
= 0; j
< box
->width
; j
++) {
753 const unsigned offset
=
754 tile_offset(x
, mem_y
, tiles_per_row
, swizzle
);
755 const unsigned s8_offset
=
756 s8_tile_offset(s8_x
, s8_mem_y
, s8_tiles_per_row
, swizzle
);
758 memcpy(dst
+ offset
, s
, dst_cpp_used
);
759 s8_dst
[s8_offset
] = s
[src_s8_pos
];
762 x
+= tex
->image
.block_size
;
766 src
+= xfer
->base
.stride
;
772 tex_staging_sys_unmap_bo(s8_tex
);
775 assert(tex
->image
.format
== PIPE_FORMAT_S8_UINT
);
777 for (slice
= 0; slice
< box
->depth
; slice
++) {
778 unsigned mem_x
, mem_y
;
782 tex_get_box_origin(tex
, xfer
->base
.level
, slice
,
783 box
, &mem_x
, &mem_y
);
785 src
= xfer
->staging
.sys
+ xfer
->base
.layer_stride
* slice
;
787 for (i
= 0; i
< box
->height
; i
++) {
789 const uint8_t *s
= src
;
791 for (j
= 0; j
< box
->width
; j
++) {
792 const unsigned offset
=
793 tile_offset(x
, mem_y
, tiles_per_row
, swizzle
);
801 src
+= xfer
->base
.stride
;
807 tex_staging_sys_unmap_bo(tex
);
813 tex_staging_sys_convert_write(struct ilo_texture
*tex
,
814 const struct ilo_transfer
*xfer
)
816 const struct pipe_box
*box
= &xfer
->base
.box
;
817 unsigned dst_slice_stride
;
821 dst
= tex_staging_sys_map_bo(tex
, false, true);
825 dst
+= tex_get_box_offset(tex
, xfer
->base
.level
, box
);
827 /* slice stride is not always available */
829 dst_slice_stride
= tex_get_slice_stride(tex
, xfer
->base
.level
);
831 dst_slice_stride
= 0;
833 if (unlikely(tex
->image
.format
== tex
->base
.format
)) {
834 util_copy_box(dst
, tex
->image
.format
, tex
->image
.bo_stride
,
835 dst_slice_stride
, 0, 0, 0, box
->width
, box
->height
, box
->depth
,
836 xfer
->staging
.sys
, xfer
->base
.stride
, xfer
->base
.layer_stride
,
839 tex_staging_sys_unmap_bo(tex
);
844 switch (tex
->base
.format
) {
845 case PIPE_FORMAT_ETC1_RGB8
:
846 assert(tex
->image
.format
== PIPE_FORMAT_R8G8B8X8_UNORM
);
848 for (slice
= 0; slice
< box
->depth
; slice
++) {
850 xfer
->staging
.sys
+ xfer
->base
.layer_stride
* slice
;
852 util_format_etc1_rgb8_unpack_rgba_8unorm(dst
,
853 tex
->image
.bo_stride
, src
, xfer
->base
.stride
,
854 box
->width
, box
->height
);
856 dst
+= dst_slice_stride
;
860 assert(!"unable to convert the staging data");
864 tex_staging_sys_unmap_bo(tex
);
870 tex_staging_sys_writeback(struct ilo_transfer
*xfer
)
872 struct ilo_texture
*tex
= ilo_texture(xfer
->base
.resource
);
875 if (!(xfer
->base
.usage
& PIPE_TRANSFER_WRITE
))
878 switch (xfer
->method
) {
879 case ILO_TRANSFER_MAP_SW_CONVERT
:
880 success
= tex_staging_sys_convert_write(tex
, xfer
);
882 case ILO_TRANSFER_MAP_SW_ZS
:
883 success
= tex_staging_sys_zs_write(tex
, xfer
);
886 assert(!"unknown mapping method");
892 ilo_err("failed to map resource for moving staging data\n");
896 tex_staging_sys_readback(struct ilo_transfer
*xfer
)
898 struct ilo_texture
*tex
= ilo_texture(xfer
->base
.resource
);
899 bool read_back
= false, success
;
901 /* see if we need to read the resource back */
902 if (xfer
->base
.usage
& PIPE_TRANSFER_READ
) {
905 else if (xfer
->base
.usage
& PIPE_TRANSFER_WRITE
) {
906 const unsigned discard_flags
=
907 (PIPE_TRANSFER_DISCARD_RANGE
| PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE
);
909 if (!(xfer
->base
.usage
& discard_flags
))
916 switch (xfer
->method
) {
917 case ILO_TRANSFER_MAP_SW_CONVERT
:
918 assert(!"no on-the-fly format conversion for mapping");
921 case ILO_TRANSFER_MAP_SW_ZS
:
922 success
= tex_staging_sys_zs_read(tex
, xfer
);
925 assert(!"unknown mapping method");
934 tex_map(struct ilo_transfer
*xfer
)
938 switch (xfer
->method
) {
939 case ILO_TRANSFER_MAP_CPU
:
940 case ILO_TRANSFER_MAP_GTT
:
941 case ILO_TRANSFER_MAP_GTT_ASYNC
:
942 ptr
= xfer_map(xfer
);
944 const struct ilo_texture
*tex
= ilo_texture(xfer
->base
.resource
);
946 ptr
+= tex_get_box_offset(tex
, xfer
->base
.level
, &xfer
->base
.box
);
948 /* stride is for a block row, not a texel row */
949 xfer
->base
.stride
= tex
->image
.bo_stride
;
950 /* note that slice stride is not always available */
951 xfer
->base
.layer_stride
= (xfer
->base
.box
.depth
> 1) ?
952 tex_get_slice_stride(tex
, xfer
->base
.level
) : 0;
955 case ILO_TRANSFER_MAP_STAGING
:
956 ptr
= xfer_map(xfer
);
958 const struct ilo_texture
*staging
= ilo_texture(xfer
->staging
.res
);
959 xfer
->base
.stride
= staging
->image
.bo_stride
;
960 xfer
->base
.layer_stride
= tex_get_slice_stride(staging
, 0);
963 case ILO_TRANSFER_MAP_SW_CONVERT
:
964 case ILO_TRANSFER_MAP_SW_ZS
:
965 if (xfer_alloc_staging_sys(xfer
) && tex_staging_sys_readback(xfer
))
966 ptr
= xfer_map(xfer
);
971 assert(!"unknown mapping method");
980 buf_map(struct ilo_transfer
*xfer
)
984 ptr
= xfer_map(xfer
);
988 if (xfer
->method
!= ILO_TRANSFER_MAP_STAGING
)
989 ptr
+= xfer
->base
.box
.x
;
991 xfer
->base
.stride
= 0;
992 xfer
->base
.layer_stride
= 0;
994 assert(xfer
->base
.level
== 0);
995 assert(xfer
->base
.box
.y
== 0);
996 assert(xfer
->base
.box
.z
== 0);
997 assert(xfer
->base
.box
.height
== 1);
998 assert(xfer
->base
.box
.depth
== 1);
1004 copy_staging_resource(struct ilo_context
*ilo
,
1005 struct ilo_transfer
*xfer
,
1006 const struct pipe_box
*box
)
1008 const unsigned pad_x
= (xfer
->staging
.res
->target
== PIPE_BUFFER
) ?
1009 xfer
->base
.box
.x
% ILO_TRANSFER_MAP_BUFFER_ALIGNMENT
: 0;
1010 struct pipe_box modified_box
;
1012 assert(xfer
->method
== ILO_TRANSFER_MAP_STAGING
&& xfer
->staging
.res
);
1015 u_box_3d(pad_x
, 0, 0, xfer
->base
.box
.width
, xfer
->base
.box
.height
,
1016 xfer
->base
.box
.depth
, &modified_box
);
1017 box
= &modified_box
;
1020 modified_box
= *box
;
1021 modified_box
.x
+= pad_x
;
1022 box
= &modified_box
;
1025 ilo_blitter_blt_copy_resource(ilo
->blitter
,
1026 xfer
->base
.resource
, xfer
->base
.level
,
1027 xfer
->base
.box
.x
, xfer
->base
.box
.y
, xfer
->base
.box
.z
,
1028 xfer
->staging
.res
, 0, box
);
1032 is_bo_busy(struct ilo_context
*ilo
, struct intel_bo
*bo
, bool *need_submit
)
1034 const bool referenced
= ilo_builder_has_reloc(&ilo
->cp
->builder
, bo
);
1037 *need_submit
= referenced
;
1042 return intel_bo_is_busy(bo
);
1046 * Choose the best mapping method, depending on the transfer usage and whether
1050 choose_transfer_method(struct ilo_context
*ilo
, struct ilo_transfer
*xfer
)
1052 struct pipe_resource
*res
= xfer
->base
.resource
;
1055 if (!resource_get_transfer_method(res
, &xfer
->base
, &xfer
->method
))
1058 /* see if we can avoid blocking */
1059 if (is_bo_busy(ilo
, ilo_resource_get_bo(res
), &need_submit
)) {
1060 bool resource_renamed
;
1062 if (!xfer_unblock(xfer
, &resource_renamed
)) {
1063 if (xfer
->base
.usage
& PIPE_TRANSFER_DONTBLOCK
)
1066 /* submit to make bo really busy and map() correctly blocks */
1068 ilo_cp_submit(ilo
->cp
, "syncing for transfers");
1071 if (resource_renamed
)
1072 ilo_state_vector_resource_renamed(&ilo
->state_vector
, res
);
1079 buf_pwrite(struct ilo_context
*ilo
, struct pipe_resource
*res
,
1080 unsigned usage
, int offset
, int size
, const void *data
)
1082 struct ilo_buffer
*buf
= ilo_buffer(res
);
1085 /* see if we can avoid blocking */
1086 if (is_bo_busy(ilo
, buf
->bo
, &need_submit
)) {
1087 bool unblocked
= false;
1089 if ((usage
& PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE
) &&
1090 ilo_resource_rename_bo(res
)) {
1091 ilo_state_vector_resource_renamed(&ilo
->state_vector
, res
);
1095 struct pipe_resource templ
, *staging
;
1098 * allocate a staging buffer to hold the data and pipelined copy it
1102 templ
.width0
= size
;
1103 templ
.usage
= PIPE_USAGE_STAGING
;
1104 templ
.bind
= PIPE_BIND_TRANSFER_WRITE
;
1105 staging
= ilo
->base
.screen
->resource_create(ilo
->base
.screen
, &templ
);
1107 struct pipe_box staging_box
;
1109 intel_bo_pwrite(ilo_buffer(staging
)->bo
, 0, size
, data
);
1111 u_box_1d(0, size
, &staging_box
);
1112 ilo_blitter_blt_copy_resource(ilo
->blitter
,
1113 res
, 0, offset
, 0, 0,
1114 staging
, 0, &staging_box
);
1116 pipe_resource_reference(&staging
, NULL
);
1122 /* submit to make bo really busy and pwrite() correctly blocks */
1123 if (!unblocked
&& need_submit
)
1124 ilo_cp_submit(ilo
->cp
, "syncing for pwrites");
1127 intel_bo_pwrite(buf
->bo
, offset
, size
, data
);
1131 ilo_transfer_flush_region(struct pipe_context
*pipe
,
1132 struct pipe_transfer
*transfer
,
1133 const struct pipe_box
*box
)
1135 struct ilo_context
*ilo
= ilo_context(pipe
);
1136 struct ilo_transfer
*xfer
= ilo_transfer(transfer
);
1139 * The staging resource is mapped persistently and coherently. We can copy
1140 * without unmapping.
1142 if (xfer
->method
== ILO_TRANSFER_MAP_STAGING
&&
1143 (xfer
->base
.usage
& PIPE_TRANSFER_FLUSH_EXPLICIT
))
1144 copy_staging_resource(ilo
, xfer
, box
);
1148 ilo_transfer_unmap(struct pipe_context
*pipe
,
1149 struct pipe_transfer
*transfer
)
1151 struct ilo_context
*ilo
= ilo_context(pipe
);
1152 struct ilo_transfer
*xfer
= ilo_transfer(transfer
);
1156 switch (xfer
->method
) {
1157 case ILO_TRANSFER_MAP_STAGING
:
1158 if (!(xfer
->base
.usage
& PIPE_TRANSFER_FLUSH_EXPLICIT
))
1159 copy_staging_resource(ilo
, xfer
, NULL
);
1160 pipe_resource_reference(&xfer
->staging
.res
, NULL
);
1162 case ILO_TRANSFER_MAP_SW_CONVERT
:
1163 case ILO_TRANSFER_MAP_SW_ZS
:
1164 tex_staging_sys_writeback(xfer
);
1165 align_free(xfer
->staging
.sys
);
1171 pipe_resource_reference(&xfer
->base
.resource
, NULL
);
1173 util_slab_free(&ilo
->transfer_mempool
, xfer
);
1177 ilo_transfer_map(struct pipe_context
*pipe
,
1178 struct pipe_resource
*res
,
1181 const struct pipe_box
*box
,
1182 struct pipe_transfer
**transfer
)
1184 struct ilo_context
*ilo
= ilo_context(pipe
);
1185 struct ilo_transfer
*xfer
;
1188 /* note that xfer is not zero'd */
1189 xfer
= util_slab_alloc(&ilo
->transfer_mempool
);
1195 xfer
->base
.resource
= NULL
;
1196 pipe_resource_reference(&xfer
->base
.resource
, res
);
1197 xfer
->base
.level
= level
;
1198 xfer
->base
.usage
= usage
;
1199 xfer
->base
.box
= *box
;
1201 ilo_blit_resolve_transfer(ilo
, &xfer
->base
);
1203 if (choose_transfer_method(ilo
, xfer
)) {
1204 if (res
->target
== PIPE_BUFFER
)
1205 ptr
= buf_map(xfer
);
1207 ptr
= tex_map(xfer
);
1214 pipe_resource_reference(&xfer
->base
.resource
, NULL
);
1215 util_slab_free(&ilo
->transfer_mempool
, xfer
);
1220 *transfer
= &xfer
->base
;
1226 ilo_transfer_inline_write(struct pipe_context
*pipe
,
1227 struct pipe_resource
*res
,
1230 const struct pipe_box
*box
,
1233 unsigned layer_stride
)
1235 if (likely(res
->target
== PIPE_BUFFER
) &&
1236 !(usage
& PIPE_TRANSFER_UNSYNCHRONIZED
)) {
1237 /* they should specify just an offset and a size */
1239 assert(box
->y
== 0);
1240 assert(box
->z
== 0);
1241 assert(box
->height
== 1);
1242 assert(box
->depth
== 1);
1244 buf_pwrite(ilo_context(pipe
), res
,
1245 usage
, box
->x
, box
->width
, data
);
1248 u_default_transfer_inline_write(pipe
, res
,
1249 level
, usage
, box
, data
, stride
, layer_stride
);
1254 * Initialize transfer-related functions.
1257 ilo_init_transfer_functions(struct ilo_context
*ilo
)
1259 ilo
->base
.transfer_map
= ilo_transfer_map
;
1260 ilo
->base
.transfer_flush_region
= ilo_transfer_flush_region
;
1261 ilo
->base
.transfer_unmap
= ilo_transfer_unmap
;
1262 ilo
->base
.transfer_inline_write
= ilo_transfer_inline_write
;