2 * Mesa 3-D graphics library
4 * Copyright (C) 2012-2013 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #include "util/u_surface.h"
29 #include "util/u_transfer.h"
30 #include "util/u_format_etc.h"
33 #include "ilo_blitter.h"
35 #include "ilo_context.h"
36 #include "ilo_resource.h"
37 #include "ilo_state.h"
38 #include "ilo_transfer.h"
41 * For buffers that are not busy, we want to map/unmap them directly. For
42 * those that are busy, we have to worry about synchronization. We could wait
43 * for GPU to finish, but there are cases where we could avoid waiting.
45 * - When PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE is set, the contents of the
46 * buffer can be discarded. We can replace the backing bo by a new one of
47 * the same size (renaming).
48 * - When PIPE_TRANSFER_DISCARD_RANGE is set, the contents of the mapped
49 * range can be discarded. We can allocate and map a staging bo on
50 * mapping, and (pipelined-)copy it over to the real bo on unmapping.
51 * - When PIPE_TRANSFER_FLUSH_EXPLICIT is set, there is no reading and only
52 * flushed regions need to be written. We can still allocate and map a
53 * staging bo, but should copy only the flushed regions over.
55 * However, there are other flags to consider.
57 * - When PIPE_TRANSFER_UNSYNCHRONIZED is set, we do not need to worry about
58 * synchronization at all on mapping.
59 * - When PIPE_TRANSFER_MAP_DIRECTLY is set, no staging area is allowed.
60 * - When PIPE_TRANSFER_DONTBLOCK is set, we should fail if we have to block.
61 * - When PIPE_TRANSFER_PERSISTENT is set, GPU may access the buffer while it
62 * is mapped. Synchronization is done by defining memory barriers,
63 * explicitly via memory_barrier() or implicitly via
64 * transfer_flush_region(), as well as GPU fences.
65 * - When PIPE_TRANSFER_COHERENT is set, updates by either CPU or GPU should
66 * be made visible to the other side immediately. Since the kernel flushes
67 * GPU caches at the end of each batch buffer, CPU always sees GPU updates.
68 * We could use a coherent mapping to make all persistent mappings
71 * These also apply to textures, except that we may additionally need to do
72 * format conversion or tiling/untiling.
76 * Return a transfer method suitable for the usage. The returned method will
77 * correctly block when the resource is busy.
80 resource_get_transfer_method(struct pipe_resource
*res
,
81 const struct pipe_transfer
*transfer
,
82 enum ilo_transfer_map_method
*method
)
84 const struct ilo_screen
*is
= ilo_screen(res
->screen
);
85 const unsigned usage
= transfer
->usage
;
86 enum ilo_transfer_map_method m
;
89 if (res
->target
== PIPE_BUFFER
) {
92 struct ilo_texture
*tex
= ilo_texture(res
);
93 bool need_convert
= false;
95 /* we may need to convert on the fly */
96 if (tex
->image
.tiling
== GEN8_TILING_W
|| tex
->separate_s8
) {
97 /* on GEN6, separate stencil is enabled only when HiZ is */
98 if (ilo_dev_gen(&is
->dev
) >= ILO_GEN(7) ||
99 ilo_image_can_enable_aux(&tex
->image
, transfer
->level
)) {
100 m
= ILO_TRANSFER_MAP_SW_ZS
;
103 } else if (tex
->image_format
!= tex
->base
.format
) {
104 m
= ILO_TRANSFER_MAP_SW_CONVERT
;
109 if (usage
& (PIPE_TRANSFER_MAP_DIRECTLY
| PIPE_TRANSFER_PERSISTENT
))
116 tiled
= (tex
->image
.tiling
!= GEN6_TILING_NONE
);
120 m
= ILO_TRANSFER_MAP_GTT
; /* to have a linear view */
121 else if (is
->dev
.has_llc
)
122 m
= ILO_TRANSFER_MAP_CPU
; /* fast and mostly coherent */
123 else if (usage
& PIPE_TRANSFER_PERSISTENT
)
124 m
= ILO_TRANSFER_MAP_GTT
; /* for coherency */
125 else if (usage
& PIPE_TRANSFER_READ
)
126 m
= ILO_TRANSFER_MAP_CPU
; /* gtt read is too slow */
128 m
= ILO_TRANSFER_MAP_GTT
;
136 * Return true if usage allows the use of staging bo to avoid blocking.
139 usage_allows_staging_bo(unsigned usage
)
141 /* do we know how to write the data back to the resource? */
142 const unsigned can_writeback
= (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE
|
143 PIPE_TRANSFER_DISCARD_RANGE
|
144 PIPE_TRANSFER_FLUSH_EXPLICIT
);
145 const unsigned reasons_against
= (PIPE_TRANSFER_READ
|
146 PIPE_TRANSFER_MAP_DIRECTLY
|
147 PIPE_TRANSFER_PERSISTENT
);
149 return (usage
& can_writeback
) && !(usage
& reasons_against
);
153 * Allocate the staging resource. It is always linear and its size matches
154 * the transfer box, with proper paddings.
157 xfer_alloc_staging_res(struct ilo_transfer
*xfer
)
159 const struct pipe_resource
*res
= xfer
->base
.resource
;
160 const struct pipe_box
*box
= &xfer
->base
.box
;
161 struct pipe_resource templ
;
163 memset(&templ
, 0, sizeof(templ
));
165 templ
.format
= res
->format
;
167 if (res
->target
== PIPE_BUFFER
) {
168 templ
.target
= PIPE_BUFFER
;
170 (box
->x
% ILO_TRANSFER_MAP_BUFFER_ALIGNMENT
) + box
->width
;
173 /* use 2D array for any texture target */
174 templ
.target
= PIPE_TEXTURE_2D_ARRAY
;
175 templ
.width0
= box
->width
;
178 templ
.height0
= box
->height
;
180 templ
.array_size
= box
->depth
;
181 templ
.nr_samples
= 1;
182 templ
.usage
= PIPE_USAGE_STAGING
;
183 templ
.bind
= PIPE_BIND_TRANSFER_WRITE
;
185 if (xfer
->base
.usage
& PIPE_TRANSFER_FLUSH_EXPLICIT
) {
186 templ
.flags
= PIPE_RESOURCE_FLAG_MAP_PERSISTENT
|
187 PIPE_RESOURCE_FLAG_MAP_COHERENT
;
190 xfer
->staging
.res
= res
->screen
->resource_create(res
->screen
, &templ
);
192 if (xfer
->staging
.res
&& xfer
->staging
.res
->target
!= PIPE_BUFFER
) {
193 assert(ilo_texture(xfer
->staging
.res
)->image
.tiling
==
197 return (xfer
->staging
.res
!= NULL
);
201 * Use an alternative transfer method or rename the resource to unblock an
202 * otherwise blocking transfer.
205 xfer_unblock(struct ilo_transfer
*xfer
, bool *resource_renamed
)
207 struct pipe_resource
*res
= xfer
->base
.resource
;
208 bool unblocked
= false, renamed
= false;
210 switch (xfer
->method
) {
211 case ILO_TRANSFER_MAP_CPU
:
212 case ILO_TRANSFER_MAP_GTT
:
213 if (xfer
->base
.usage
& PIPE_TRANSFER_UNSYNCHRONIZED
) {
214 xfer
->method
= ILO_TRANSFER_MAP_GTT_ASYNC
;
217 else if ((xfer
->base
.usage
& PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE
) &&
218 ilo_resource_rename_bo(res
)) {
222 else if (usage_allows_staging_bo(xfer
->base
.usage
) &&
223 xfer_alloc_staging_res(xfer
)) {
224 xfer
->method
= ILO_TRANSFER_MAP_STAGING
;
228 case ILO_TRANSFER_MAP_GTT_ASYNC
:
229 case ILO_TRANSFER_MAP_STAGING
:
236 *resource_renamed
= renamed
;
242 * Allocate the staging system buffer based on the resource format and the
246 xfer_alloc_staging_sys(struct ilo_transfer
*xfer
)
248 const enum pipe_format format
= xfer
->base
.resource
->format
;
249 const struct pipe_box
*box
= &xfer
->base
.box
;
250 const unsigned alignment
= 64;
252 /* need to tell the world the layout */
254 align(util_format_get_stride(format
, box
->width
), alignment
);
255 xfer
->base
.layer_stride
=
256 util_format_get_2d_size(format
, xfer
->base
.stride
, box
->height
);
259 align_malloc(xfer
->base
.layer_stride
* box
->depth
, alignment
);
261 return (xfer
->staging
.sys
!= NULL
);
265 * Map according to the method. The staging system buffer should have been
266 * allocated if the method requires it.
269 xfer_map(struct ilo_transfer
*xfer
)
271 const struct ilo_vma
*vma
;
274 switch (xfer
->method
) {
275 case ILO_TRANSFER_MAP_CPU
:
276 vma
= ilo_resource_get_vma(xfer
->base
.resource
);
277 ptr
= intel_bo_map(vma
->bo
, xfer
->base
.usage
& PIPE_TRANSFER_WRITE
);
279 case ILO_TRANSFER_MAP_GTT
:
280 vma
= ilo_resource_get_vma(xfer
->base
.resource
);
281 ptr
= intel_bo_map_gtt(vma
->bo
);
283 case ILO_TRANSFER_MAP_GTT_ASYNC
:
284 vma
= ilo_resource_get_vma(xfer
->base
.resource
);
285 ptr
= intel_bo_map_gtt_async(vma
->bo
);
287 case ILO_TRANSFER_MAP_STAGING
:
289 const struct ilo_screen
*is
= ilo_screen(xfer
->staging
.res
->screen
);
291 vma
= ilo_resource_get_vma(xfer
->staging
.res
);
294 * We want a writable, optionally persistent and coherent, mapping
295 * for a linear bo. We can call resource_get_transfer_method(), but
296 * this turns out to be fairly simple.
299 ptr
= intel_bo_map(vma
->bo
, true);
301 ptr
= intel_bo_map_gtt(vma
->bo
);
303 if (ptr
&& xfer
->staging
.res
->target
== PIPE_BUFFER
)
304 ptr
+= (xfer
->base
.box
.x
% ILO_TRANSFER_MAP_BUFFER_ALIGNMENT
);
307 case ILO_TRANSFER_MAP_SW_CONVERT
:
308 case ILO_TRANSFER_MAP_SW_ZS
:
310 ptr
= xfer
->staging
.sys
;
313 assert(!"unknown mapping method");
320 ptr
= (void *) ((char *) ptr
+ vma
->bo_offset
);
329 xfer_unmap(struct ilo_transfer
*xfer
)
331 switch (xfer
->method
) {
332 case ILO_TRANSFER_MAP_CPU
:
333 case ILO_TRANSFER_MAP_GTT
:
334 case ILO_TRANSFER_MAP_GTT_ASYNC
:
335 intel_bo_unmap(ilo_resource_get_vma(xfer
->base
.resource
)->bo
);
337 case ILO_TRANSFER_MAP_STAGING
:
338 intel_bo_unmap(ilo_resource_get_vma(xfer
->staging
.res
)->bo
);
346 tex_get_box_origin(const struct ilo_texture
*tex
,
347 unsigned level
, unsigned slice
,
348 const struct pipe_box
*box
,
349 unsigned *mem_x
, unsigned *mem_y
)
353 ilo_image_get_slice_pos(&tex
->image
, level
, box
->z
+ slice
, &x
, &y
);
357 ilo_image_pos_to_mem(&tex
->image
, x
, y
, mem_x
, mem_y
);
361 tex_get_box_offset(const struct ilo_texture
*tex
, unsigned level
,
362 const struct pipe_box
*box
)
364 unsigned mem_x
, mem_y
;
366 tex_get_box_origin(tex
, level
, 0, box
, &mem_x
, &mem_y
);
368 return ilo_image_mem_to_linear(&tex
->image
, mem_x
, mem_y
);
372 tex_get_slice_stride(const struct ilo_texture
*tex
, unsigned level
)
374 return ilo_image_get_slice_stride(&tex
->image
, level
);
378 tex_tile_x_swizzle(unsigned addr
)
381 * From the Ivy Bridge PRM, volume 1 part 2, page 24:
383 * "As shown in the tiling algorithm, the new address bit[6] should be:
385 * Address bit[6] <= TiledAddr bit[6] XOR
386 * TiledAddr bit[9] XOR
389 return addr
^ (((addr
>> 3) ^ (addr
>> 4)) & 0x40);
393 tex_tile_y_swizzle(unsigned addr
)
396 * From the Ivy Bridge PRM, volume 1 part 2, page 24:
398 * "As shown in the tiling algorithm, The new address bit[6] becomes:
400 * Address bit[6] <= TiledAddr bit[6] XOR
403 return addr
^ ((addr
>> 3) & 0x40);
407 tex_tile_x_offset(unsigned mem_x
, unsigned mem_y
,
408 unsigned tiles_per_row
, bool swizzle
)
411 * From the Sandy Bridge PRM, volume 1 part 2, page 21, we know that a
412 * X-major tile has 8 rows and 32 OWord columns (512 bytes). Tiles in the
413 * tiled region are numbered in row-major order, starting from zero. The
414 * tile number can thus be calculated as follows:
416 * tile = (mem_y / 8) * tiles_per_row + (mem_x / 512)
418 * OWords in that tile are also numbered in row-major order, starting from
419 * zero. The OWord number can thus be calculated as follows:
421 * oword = (mem_y % 8) * 32 + ((mem_x % 512) / 16)
423 * and the tiled offset is
425 * offset = tile * 4096 + oword * 16 + (mem_x % 16)
426 * = tile * 4096 + (mem_y % 8) * 512 + (mem_x % 512)
428 unsigned tile
, offset
;
430 tile
= (mem_y
>> 3) * tiles_per_row
+ (mem_x
>> 9);
431 offset
= tile
<< 12 | (mem_y
& 0x7) << 9 | (mem_x
& 0x1ff);
433 return (swizzle
) ? tex_tile_x_swizzle(offset
) : offset
;
437 tex_tile_y_offset(unsigned mem_x
, unsigned mem_y
,
438 unsigned tiles_per_row
, bool swizzle
)
441 * From the Sandy Bridge PRM, volume 1 part 2, page 22, we know that a
442 * Y-major tile has 32 rows and 8 OWord columns (128 bytes). Tiles in the
443 * tiled region are numbered in row-major order, starting from zero. The
444 * tile number can thus be calculated as follows:
446 * tile = (mem_y / 32) * tiles_per_row + (mem_x / 128)
448 * OWords in that tile are numbered in column-major order, starting from
449 * zero. The OWord number can thus be calculated as follows:
451 * oword = ((mem_x % 128) / 16) * 32 + (mem_y % 32)
453 * and the tiled offset is
455 * offset = tile * 4096 + oword * 16 + (mem_x % 16)
457 unsigned tile
, oword
, offset
;
459 tile
= (mem_y
>> 5) * tiles_per_row
+ (mem_x
>> 7);
460 oword
= (mem_x
& 0x70) << 1 | (mem_y
& 0x1f);
461 offset
= tile
<< 12 | oword
<< 4 | (mem_x
& 0xf);
463 return (swizzle
) ? tex_tile_y_swizzle(offset
) : offset
;
467 tex_tile_w_offset(unsigned mem_x
, unsigned mem_y
,
468 unsigned tiles_per_row
, bool swizzle
)
471 * From the Sandy Bridge PRM, volume 1 part 2, page 23, we know that a
472 * W-major tile has 8 8x8-block rows and 8 8x8-block columns. Tiles in the
473 * tiled region are numbered in row-major order, starting from zero. The
474 * tile number can thus be calculated as follows:
476 * tile = (mem_y / 64) * tiles_per_row + (mem_x / 64)
478 * 8x8-blocks in that tile are numbered in column-major order, starting
479 * from zero. The 8x8-block number can thus be calculated as follows:
481 * blk8 = ((mem_x % 64) / 8) * 8 + ((mem_y % 64) / 8)
483 * Each 8x8-block is divided into 4 4x4-blocks, in row-major order. Each
484 * 4x4-block is further divided into 4 2x2-blocks, also in row-major order.
487 * blk4 = (((mem_y % 64) / 4) & 1) * 2 + (((mem_x % 64) / 4) & 1)
488 * blk2 = (((mem_y % 64) / 2) & 1) * 2 + (((mem_x % 64) / 2) & 1)
489 * blk1 = (((mem_y % 64) ) & 1) * 2 + (((mem_x % 64) ) & 1)
491 * and the tiled offset is
493 * offset = tile * 4096 + blk8 * 64 + blk4 * 16 + blk2 * 4 + blk1
495 unsigned tile
, blk8
, blk4
, blk2
, blk1
, offset
;
497 tile
= (mem_y
>> 6) * tiles_per_row
+ (mem_x
>> 6);
498 blk8
= ((mem_x
>> 3) & 0x7) << 3 | ((mem_y
>> 3) & 0x7);
499 blk4
= ((mem_y
>> 2) & 0x1) << 1 | ((mem_x
>> 2) & 0x1);
500 blk2
= ((mem_y
>> 1) & 0x1) << 1 | ((mem_x
>> 1) & 0x1);
501 blk1
= ((mem_y
) & 0x1) << 1 | ((mem_x
) & 0x1);
502 offset
= tile
<< 12 | blk8
<< 6 | blk4
<< 4 | blk2
<< 2 | blk1
;
504 return (swizzle
) ? tex_tile_y_swizzle(offset
) : offset
;
508 tex_tile_none_offset(unsigned mem_x
, unsigned mem_y
,
509 unsigned tiles_per_row
, bool swizzle
)
511 return mem_y
* tiles_per_row
+ mem_x
;
514 typedef unsigned (*tex_tile_offset_func
)(unsigned mem_x
, unsigned mem_y
,
515 unsigned tiles_per_row
,
518 static tex_tile_offset_func
519 tex_tile_choose_offset_func(const struct ilo_texture
*tex
,
520 unsigned *tiles_per_row
)
522 switch (tex
->image
.tiling
) {
524 assert(!"unknown tiling");
526 case GEN6_TILING_NONE
:
527 *tiles_per_row
= tex
->image
.bo_stride
;
528 return tex_tile_none_offset
;
530 *tiles_per_row
= tex
->image
.bo_stride
/ 512;
531 return tex_tile_x_offset
;
533 *tiles_per_row
= tex
->image
.bo_stride
/ 128;
534 return tex_tile_y_offset
;
536 *tiles_per_row
= tex
->image
.bo_stride
/ 64;
537 return tex_tile_w_offset
;
542 tex_staging_sys_map_bo(struct ilo_texture
*tex
,
546 const struct ilo_screen
*is
= ilo_screen(tex
->base
.screen
);
547 const bool prefer_cpu
= (is
->dev
.has_llc
|| for_read_back
);
550 if (prefer_cpu
&& (tex
->image
.tiling
== GEN6_TILING_NONE
||
552 ptr
= intel_bo_map(tex
->vma
.bo
, !for_read_back
);
554 ptr
= intel_bo_map_gtt(tex
->vma
.bo
);
557 ptr
= (void *) ((char *) ptr
+ tex
->vma
.bo_offset
);
563 tex_staging_sys_unmap_bo(struct ilo_texture
*tex
)
565 intel_bo_unmap(tex
->vma
.bo
);
569 tex_staging_sys_zs_read(struct ilo_texture
*tex
,
570 const struct ilo_transfer
*xfer
)
572 const struct ilo_screen
*is
= ilo_screen(tex
->base
.screen
);
573 const bool swizzle
= is
->dev
.has_address_swizzling
;
574 const struct pipe_box
*box
= &xfer
->base
.box
;
576 tex_tile_offset_func tile_offset
;
577 unsigned tiles_per_row
;
580 src
= tex_staging_sys_map_bo(tex
, true, false);
584 tile_offset
= tex_tile_choose_offset_func(tex
, &tiles_per_row
);
586 assert(tex
->image
.block_width
== 1 && tex
->image
.block_height
== 1);
588 if (tex
->separate_s8
) {
589 struct ilo_texture
*s8_tex
= tex
->separate_s8
;
590 const uint8_t *s8_src
;
591 tex_tile_offset_func s8_tile_offset
;
592 unsigned s8_tiles_per_row
;
593 int dst_cpp
, dst_s8_pos
, src_cpp_used
;
595 s8_src
= tex_staging_sys_map_bo(s8_tex
, true, false);
597 tex_staging_sys_unmap_bo(tex
);
601 s8_tile_offset
= tex_tile_choose_offset_func(s8_tex
, &s8_tiles_per_row
);
603 if (tex
->base
.format
== PIPE_FORMAT_Z24_UNORM_S8_UINT
) {
604 assert(tex
->image_format
== PIPE_FORMAT_Z24X8_UNORM
);
611 assert(tex
->base
.format
== PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
);
612 assert(tex
->image_format
== PIPE_FORMAT_Z32_FLOAT
);
619 for (slice
= 0; slice
< box
->depth
; slice
++) {
620 unsigned mem_x
, mem_y
, s8_mem_x
, s8_mem_y
;
624 tex_get_box_origin(tex
, xfer
->base
.level
, slice
,
625 box
, &mem_x
, &mem_y
);
626 tex_get_box_origin(s8_tex
, xfer
->base
.level
, slice
,
627 box
, &s8_mem_x
, &s8_mem_y
);
629 dst
= xfer
->staging
.sys
+ xfer
->base
.layer_stride
* slice
;
631 for (i
= 0; i
< box
->height
; i
++) {
632 unsigned x
= mem_x
, s8_x
= s8_mem_x
;
635 for (j
= 0; j
< box
->width
; j
++) {
636 const unsigned offset
=
637 tile_offset(x
, mem_y
, tiles_per_row
, swizzle
);
638 const unsigned s8_offset
=
639 s8_tile_offset(s8_x
, s8_mem_y
, s8_tiles_per_row
, swizzle
);
641 memcpy(d
, src
+ offset
, src_cpp_used
);
642 d
[dst_s8_pos
] = s8_src
[s8_offset
];
645 x
+= tex
->image
.block_size
;
649 dst
+= xfer
->base
.stride
;
655 tex_staging_sys_unmap_bo(s8_tex
);
658 assert(tex
->image_format
== PIPE_FORMAT_S8_UINT
);
660 for (slice
= 0; slice
< box
->depth
; slice
++) {
661 unsigned mem_x
, mem_y
;
665 tex_get_box_origin(tex
, xfer
->base
.level
, slice
,
666 box
, &mem_x
, &mem_y
);
668 dst
= xfer
->staging
.sys
+ xfer
->base
.layer_stride
* slice
;
670 for (i
= 0; i
< box
->height
; i
++) {
674 for (j
= 0; j
< box
->width
; j
++) {
675 const unsigned offset
=
676 tile_offset(x
, mem_y
, tiles_per_row
, swizzle
);
684 dst
+= xfer
->base
.stride
;
690 tex_staging_sys_unmap_bo(tex
);
696 tex_staging_sys_zs_write(struct ilo_texture
*tex
,
697 const struct ilo_transfer
*xfer
)
699 const struct ilo_screen
*is
= ilo_screen(tex
->base
.screen
);
700 const bool swizzle
= is
->dev
.has_address_swizzling
;
701 const struct pipe_box
*box
= &xfer
->base
.box
;
703 tex_tile_offset_func tile_offset
;
704 unsigned tiles_per_row
;
707 dst
= tex_staging_sys_map_bo(tex
, false, false);
711 tile_offset
= tex_tile_choose_offset_func(tex
, &tiles_per_row
);
713 assert(tex
->image
.block_width
== 1 && tex
->image
.block_height
== 1);
715 if (tex
->separate_s8
) {
716 struct ilo_texture
*s8_tex
= tex
->separate_s8
;
718 tex_tile_offset_func s8_tile_offset
;
719 unsigned s8_tiles_per_row
;
720 int src_cpp
, src_s8_pos
, dst_cpp_used
;
722 s8_dst
= tex_staging_sys_map_bo(s8_tex
, false, false);
724 tex_staging_sys_unmap_bo(s8_tex
);
728 s8_tile_offset
= tex_tile_choose_offset_func(s8_tex
, &s8_tiles_per_row
);
730 if (tex
->base
.format
== PIPE_FORMAT_Z24_UNORM_S8_UINT
) {
731 assert(tex
->image_format
== PIPE_FORMAT_Z24X8_UNORM
);
738 assert(tex
->base
.format
== PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
);
739 assert(tex
->image_format
== PIPE_FORMAT_Z32_FLOAT
);
746 for (slice
= 0; slice
< box
->depth
; slice
++) {
747 unsigned mem_x
, mem_y
, s8_mem_x
, s8_mem_y
;
751 tex_get_box_origin(tex
, xfer
->base
.level
, slice
,
752 box
, &mem_x
, &mem_y
);
753 tex_get_box_origin(s8_tex
, xfer
->base
.level
, slice
,
754 box
, &s8_mem_x
, &s8_mem_y
);
756 src
= xfer
->staging
.sys
+ xfer
->base
.layer_stride
* slice
;
758 for (i
= 0; i
< box
->height
; i
++) {
759 unsigned x
= mem_x
, s8_x
= s8_mem_x
;
760 const uint8_t *s
= src
;
762 for (j
= 0; j
< box
->width
; j
++) {
763 const unsigned offset
=
764 tile_offset(x
, mem_y
, tiles_per_row
, swizzle
);
765 const unsigned s8_offset
=
766 s8_tile_offset(s8_x
, s8_mem_y
, s8_tiles_per_row
, swizzle
);
768 memcpy(dst
+ offset
, s
, dst_cpp_used
);
769 s8_dst
[s8_offset
] = s
[src_s8_pos
];
772 x
+= tex
->image
.block_size
;
776 src
+= xfer
->base
.stride
;
782 tex_staging_sys_unmap_bo(s8_tex
);
785 assert(tex
->image_format
== PIPE_FORMAT_S8_UINT
);
787 for (slice
= 0; slice
< box
->depth
; slice
++) {
788 unsigned mem_x
, mem_y
;
792 tex_get_box_origin(tex
, xfer
->base
.level
, slice
,
793 box
, &mem_x
, &mem_y
);
795 src
= xfer
->staging
.sys
+ xfer
->base
.layer_stride
* slice
;
797 for (i
= 0; i
< box
->height
; i
++) {
799 const uint8_t *s
= src
;
801 for (j
= 0; j
< box
->width
; j
++) {
802 const unsigned offset
=
803 tile_offset(x
, mem_y
, tiles_per_row
, swizzle
);
811 src
+= xfer
->base
.stride
;
817 tex_staging_sys_unmap_bo(tex
);
823 tex_staging_sys_convert_write(struct ilo_texture
*tex
,
824 const struct ilo_transfer
*xfer
)
826 const struct pipe_box
*box
= &xfer
->base
.box
;
827 unsigned dst_slice_stride
;
831 dst
= tex_staging_sys_map_bo(tex
, false, true);
835 dst
+= tex_get_box_offset(tex
, xfer
->base
.level
, box
);
837 /* slice stride is not always available */
839 dst_slice_stride
= tex_get_slice_stride(tex
, xfer
->base
.level
);
841 dst_slice_stride
= 0;
843 if (unlikely(tex
->image_format
== tex
->base
.format
)) {
844 util_copy_box(dst
, tex
->image_format
, tex
->image
.bo_stride
,
845 dst_slice_stride
, 0, 0, 0, box
->width
, box
->height
, box
->depth
,
846 xfer
->staging
.sys
, xfer
->base
.stride
, xfer
->base
.layer_stride
,
849 tex_staging_sys_unmap_bo(tex
);
854 switch (tex
->base
.format
) {
855 case PIPE_FORMAT_ETC1_RGB8
:
856 assert(tex
->image_format
== PIPE_FORMAT_R8G8B8X8_UNORM
);
858 for (slice
= 0; slice
< box
->depth
; slice
++) {
860 xfer
->staging
.sys
+ xfer
->base
.layer_stride
* slice
;
862 util_format_etc1_rgb8_unpack_rgba_8unorm(dst
,
863 tex
->image
.bo_stride
, src
, xfer
->base
.stride
,
864 box
->width
, box
->height
);
866 dst
+= dst_slice_stride
;
870 assert(!"unable to convert the staging data");
874 tex_staging_sys_unmap_bo(tex
);
880 tex_staging_sys_writeback(struct ilo_transfer
*xfer
)
882 struct ilo_texture
*tex
= ilo_texture(xfer
->base
.resource
);
885 if (!(xfer
->base
.usage
& PIPE_TRANSFER_WRITE
))
888 switch (xfer
->method
) {
889 case ILO_TRANSFER_MAP_SW_CONVERT
:
890 success
= tex_staging_sys_convert_write(tex
, xfer
);
892 case ILO_TRANSFER_MAP_SW_ZS
:
893 success
= tex_staging_sys_zs_write(tex
, xfer
);
896 assert(!"unknown mapping method");
902 ilo_err("failed to map resource for moving staging data\n");
906 tex_staging_sys_readback(struct ilo_transfer
*xfer
)
908 struct ilo_texture
*tex
= ilo_texture(xfer
->base
.resource
);
909 bool read_back
= false, success
;
911 /* see if we need to read the resource back */
912 if (xfer
->base
.usage
& PIPE_TRANSFER_READ
) {
915 else if (xfer
->base
.usage
& PIPE_TRANSFER_WRITE
) {
916 const unsigned discard_flags
=
917 (PIPE_TRANSFER_DISCARD_RANGE
| PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE
);
919 if (!(xfer
->base
.usage
& discard_flags
))
926 switch (xfer
->method
) {
927 case ILO_TRANSFER_MAP_SW_CONVERT
:
928 assert(!"no on-the-fly format conversion for mapping");
931 case ILO_TRANSFER_MAP_SW_ZS
:
932 success
= tex_staging_sys_zs_read(tex
, xfer
);
935 assert(!"unknown mapping method");
944 tex_map(struct ilo_transfer
*xfer
)
948 switch (xfer
->method
) {
949 case ILO_TRANSFER_MAP_CPU
:
950 case ILO_TRANSFER_MAP_GTT
:
951 case ILO_TRANSFER_MAP_GTT_ASYNC
:
952 ptr
= xfer_map(xfer
);
954 const struct ilo_texture
*tex
= ilo_texture(xfer
->base
.resource
);
956 ptr
+= tex_get_box_offset(tex
, xfer
->base
.level
, &xfer
->base
.box
);
958 /* stride is for a block row, not a texel row */
959 xfer
->base
.stride
= tex
->image
.bo_stride
;
960 /* note that slice stride is not always available */
961 xfer
->base
.layer_stride
= (xfer
->base
.box
.depth
> 1) ?
962 tex_get_slice_stride(tex
, xfer
->base
.level
) : 0;
965 case ILO_TRANSFER_MAP_STAGING
:
966 ptr
= xfer_map(xfer
);
968 const struct ilo_texture
*staging
= ilo_texture(xfer
->staging
.res
);
969 xfer
->base
.stride
= staging
->image
.bo_stride
;
970 xfer
->base
.layer_stride
= tex_get_slice_stride(staging
, 0);
973 case ILO_TRANSFER_MAP_SW_CONVERT
:
974 case ILO_TRANSFER_MAP_SW_ZS
:
975 if (xfer_alloc_staging_sys(xfer
) && tex_staging_sys_readback(xfer
))
976 ptr
= xfer_map(xfer
);
981 assert(!"unknown mapping method");
990 buf_map(struct ilo_transfer
*xfer
)
994 ptr
= xfer_map(xfer
);
998 if (xfer
->method
!= ILO_TRANSFER_MAP_STAGING
)
999 ptr
+= xfer
->base
.box
.x
;
1001 xfer
->base
.stride
= 0;
1002 xfer
->base
.layer_stride
= 0;
1004 assert(xfer
->base
.level
== 0);
1005 assert(xfer
->base
.box
.y
== 0);
1006 assert(xfer
->base
.box
.z
== 0);
1007 assert(xfer
->base
.box
.height
== 1);
1008 assert(xfer
->base
.box
.depth
== 1);
1014 copy_staging_resource(struct ilo_context
*ilo
,
1015 struct ilo_transfer
*xfer
,
1016 const struct pipe_box
*box
)
1018 const unsigned pad_x
= (xfer
->staging
.res
->target
== PIPE_BUFFER
) ?
1019 xfer
->base
.box
.x
% ILO_TRANSFER_MAP_BUFFER_ALIGNMENT
: 0;
1020 struct pipe_box modified_box
;
1022 assert(xfer
->method
== ILO_TRANSFER_MAP_STAGING
&& xfer
->staging
.res
);
1025 u_box_3d(pad_x
, 0, 0, xfer
->base
.box
.width
, xfer
->base
.box
.height
,
1026 xfer
->base
.box
.depth
, &modified_box
);
1027 box
= &modified_box
;
1030 modified_box
= *box
;
1031 modified_box
.x
+= pad_x
;
1032 box
= &modified_box
;
1035 ilo_blitter_blt_copy_resource(ilo
->blitter
,
1036 xfer
->base
.resource
, xfer
->base
.level
,
1037 xfer
->base
.box
.x
, xfer
->base
.box
.y
, xfer
->base
.box
.z
,
1038 xfer
->staging
.res
, 0, box
);
1042 is_bo_busy(struct ilo_context
*ilo
, struct intel_bo
*bo
, bool *need_submit
)
1044 const bool referenced
= ilo_builder_has_reloc(&ilo
->cp
->builder
, bo
);
1047 *need_submit
= referenced
;
1052 return intel_bo_is_busy(bo
);
1056 * Choose the best mapping method, depending on the transfer usage and whether
1060 choose_transfer_method(struct ilo_context
*ilo
, struct ilo_transfer
*xfer
)
1062 struct pipe_resource
*res
= xfer
->base
.resource
;
1065 if (!resource_get_transfer_method(res
, &xfer
->base
, &xfer
->method
))
1068 /* see if we can avoid blocking */
1069 if (is_bo_busy(ilo
, ilo_resource_get_vma(res
)->bo
, &need_submit
)) {
1070 bool resource_renamed
;
1072 if (!xfer_unblock(xfer
, &resource_renamed
)) {
1073 if (xfer
->base
.usage
& PIPE_TRANSFER_DONTBLOCK
)
1076 /* submit to make bo really busy and map() correctly blocks */
1078 ilo_cp_submit(ilo
->cp
, "syncing for transfers");
1081 if (resource_renamed
)
1082 ilo_state_vector_resource_renamed(&ilo
->state_vector
, res
);
1089 buf_pwrite(struct ilo_context
*ilo
, struct pipe_resource
*res
,
1090 unsigned usage
, int offset
, int size
, const void *data
)
1092 struct ilo_buffer_resource
*buf
= ilo_buffer_resource(res
);
1095 /* see if we can avoid blocking */
1096 if (is_bo_busy(ilo
, buf
->vma
.bo
, &need_submit
)) {
1097 bool unblocked
= false;
1099 if ((usage
& PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE
) &&
1100 ilo_resource_rename_bo(res
)) {
1101 ilo_state_vector_resource_renamed(&ilo
->state_vector
, res
);
1105 struct pipe_resource templ
, *staging
;
1108 * allocate a staging buffer to hold the data and pipelined copy it
1112 templ
.width0
= size
;
1113 templ
.usage
= PIPE_USAGE_STAGING
;
1114 templ
.bind
= PIPE_BIND_TRANSFER_WRITE
;
1115 staging
= ilo
->base
.screen
->resource_create(ilo
->base
.screen
, &templ
);
1117 const struct ilo_vma
*staging_vma
= ilo_resource_get_vma(staging
);
1118 struct pipe_box staging_box
;
1120 /* offset by staging_vma->bo_offset for pwrite */
1121 intel_bo_pwrite(staging_vma
->bo
, staging_vma
->bo_offset
,
1124 u_box_1d(0, size
, &staging_box
);
1125 ilo_blitter_blt_copy_resource(ilo
->blitter
,
1126 res
, 0, offset
, 0, 0,
1127 staging
, 0, &staging_box
);
1129 pipe_resource_reference(&staging
, NULL
);
1135 /* submit to make bo really busy and pwrite() correctly blocks */
1136 if (!unblocked
&& need_submit
)
1137 ilo_cp_submit(ilo
->cp
, "syncing for pwrites");
1140 /* offset by buf->vma.bo_offset for pwrite */
1141 intel_bo_pwrite(buf
->vma
.bo
, buf
->vma
.bo_offset
+ offset
, size
, data
);
1145 ilo_transfer_flush_region(struct pipe_context
*pipe
,
1146 struct pipe_transfer
*transfer
,
1147 const struct pipe_box
*box
)
1149 struct ilo_context
*ilo
= ilo_context(pipe
);
1150 struct ilo_transfer
*xfer
= ilo_transfer(transfer
);
1153 * The staging resource is mapped persistently and coherently. We can copy
1154 * without unmapping.
1156 if (xfer
->method
== ILO_TRANSFER_MAP_STAGING
&&
1157 (xfer
->base
.usage
& PIPE_TRANSFER_FLUSH_EXPLICIT
))
1158 copy_staging_resource(ilo
, xfer
, box
);
1162 ilo_transfer_unmap(struct pipe_context
*pipe
,
1163 struct pipe_transfer
*transfer
)
1165 struct ilo_context
*ilo
= ilo_context(pipe
);
1166 struct ilo_transfer
*xfer
= ilo_transfer(transfer
);
1170 switch (xfer
->method
) {
1171 case ILO_TRANSFER_MAP_STAGING
:
1172 if (!(xfer
->base
.usage
& PIPE_TRANSFER_FLUSH_EXPLICIT
))
1173 copy_staging_resource(ilo
, xfer
, NULL
);
1174 pipe_resource_reference(&xfer
->staging
.res
, NULL
);
1176 case ILO_TRANSFER_MAP_SW_CONVERT
:
1177 case ILO_TRANSFER_MAP_SW_ZS
:
1178 tex_staging_sys_writeback(xfer
);
1179 align_free(xfer
->staging
.sys
);
1185 pipe_resource_reference(&xfer
->base
.resource
, NULL
);
1187 util_slab_free(&ilo
->transfer_mempool
, xfer
);
1191 ilo_transfer_map(struct pipe_context
*pipe
,
1192 struct pipe_resource
*res
,
1195 const struct pipe_box
*box
,
1196 struct pipe_transfer
**transfer
)
1198 struct ilo_context
*ilo
= ilo_context(pipe
);
1199 struct ilo_transfer
*xfer
;
1202 /* note that xfer is not zero'd */
1203 xfer
= util_slab_alloc(&ilo
->transfer_mempool
);
1209 xfer
->base
.resource
= NULL
;
1210 pipe_resource_reference(&xfer
->base
.resource
, res
);
1211 xfer
->base
.level
= level
;
1212 xfer
->base
.usage
= usage
;
1213 xfer
->base
.box
= *box
;
1215 ilo_blit_resolve_transfer(ilo
, &xfer
->base
);
1217 if (choose_transfer_method(ilo
, xfer
)) {
1218 if (res
->target
== PIPE_BUFFER
)
1219 ptr
= buf_map(xfer
);
1221 ptr
= tex_map(xfer
);
1228 pipe_resource_reference(&xfer
->base
.resource
, NULL
);
1229 util_slab_free(&ilo
->transfer_mempool
, xfer
);
1234 *transfer
= &xfer
->base
;
1240 ilo_transfer_inline_write(struct pipe_context
*pipe
,
1241 struct pipe_resource
*res
,
1244 const struct pipe_box
*box
,
1247 unsigned layer_stride
)
1249 if (likely(res
->target
== PIPE_BUFFER
) &&
1250 !(usage
& PIPE_TRANSFER_UNSYNCHRONIZED
)) {
1251 /* they should specify just an offset and a size */
1253 assert(box
->y
== 0);
1254 assert(box
->z
== 0);
1255 assert(box
->height
== 1);
1256 assert(box
->depth
== 1);
1258 buf_pwrite(ilo_context(pipe
), res
,
1259 usage
, box
->x
, box
->width
, data
);
1262 u_default_transfer_inline_write(pipe
, res
,
1263 level
, usage
, box
, data
, stride
, layer_stride
);
1268 * Initialize transfer-related functions.
1271 ilo_init_transfer_functions(struct ilo_context
*ilo
)
1273 ilo
->base
.transfer_map
= ilo_transfer_map
;
1274 ilo
->base
.transfer_flush_region
= ilo_transfer_flush_region
;
1275 ilo
->base
.transfer_unmap
= ilo_transfer_unmap
;
1276 ilo
->base
.transfer_inline_write
= ilo_transfer_inline_write
;