2 * Mesa 3-D graphics library
4 * Copyright (C) 2012-2013 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #include "util/u_surface.h"
29 #include "util/u_transfer.h"
30 #include "util/u_format_etc.h"
33 #include "ilo_blitter.h"
35 #include "ilo_context.h"
36 #include "ilo_resource.h"
37 #include "ilo_state.h"
38 #include "ilo_transfer.h"
41 * For buffers that are not busy, we want to map/unmap them directly. For
42 * those that are busy, we have to worry about synchronization. We could wait
43 * for GPU to finish, but there are cases where we could avoid waiting.
45 * - When PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE is set, the contents of the
46 * buffer can be discarded. We can replace the backing bo by a new one of
47 * the same size (renaming).
48 * - When PIPE_TRANSFER_DISCARD_RANGE is set, the contents of the mapped
49 * range can be discarded. We can allocate and map a staging bo on
50 * mapping, and (pipelined-)copy it over to the real bo on unmapping.
51 * - When PIPE_TRANSFER_FLUSH_EXPLICIT is set, there is no reading and only
52 * flushed regions need to be written. We can still allocate and map a
53 * staging bo, but should copy only the flushed regions over.
55 * However, there are other flags to consider.
57 * - When PIPE_TRANSFER_UNSYNCHRONIZED is set, we do not need to worry about
58 * synchronization at all on mapping.
59 * - When PIPE_TRANSFER_MAP_DIRECTLY is set, no staging area is allowed.
60 * - When PIPE_TRANSFER_DONTBLOCK is set, we should fail if we have to block.
61 * - When PIPE_TRANSFER_PERSISTENT is set, GPU may access the buffer while it
62 * is mapped. Synchronization is done by defining memory barriers,
63 * explicitly via memory_barrier() or implicitly via
64 * transfer_flush_region(), as well as GPU fences.
65 * - When PIPE_TRANSFER_COHERENT is set, updates by either CPU or GPU should
66 * be made visible to the other side immediately. Since the kernel flushes
67 * GPU caches at the end of each batch buffer, CPU always sees GPU updates.
68 * We could use a coherent mapping to make all persistent mappings
71 * These also apply to textures, except that we may additionally need to do
72 * format conversion or tiling/untiling.
76 * Return a transfer method suitable for the usage. The returned method will
77 * correctly block when the resource is busy.
80 resource_get_transfer_method(struct pipe_resource
*res
,
81 const struct pipe_transfer
*transfer
,
82 enum ilo_transfer_map_method
*method
)
84 const struct ilo_screen
*is
= ilo_screen(res
->screen
);
85 const unsigned usage
= transfer
->usage
;
86 enum ilo_transfer_map_method m
;
89 if (res
->target
== PIPE_BUFFER
) {
92 struct ilo_texture
*tex
= ilo_texture(res
);
93 bool need_convert
= false;
95 /* we may need to convert on the fly */
96 if (tex
->image
.tiling
== GEN8_TILING_W
|| tex
->separate_s8
) {
97 /* on GEN6, separate stencil is enabled only when HiZ is */
98 if (ilo_dev_gen(&is
->dev
) >= ILO_GEN(7) ||
99 ilo_texture_can_enable_hiz(tex
, transfer
->level
,
100 transfer
->box
.z
, transfer
->box
.depth
)) {
101 m
= ILO_TRANSFER_MAP_SW_ZS
;
104 } else if (tex
->image
.format
!= tex
->base
.format
) {
105 m
= ILO_TRANSFER_MAP_SW_CONVERT
;
110 if (usage
& (PIPE_TRANSFER_MAP_DIRECTLY
| PIPE_TRANSFER_PERSISTENT
))
117 tiled
= (tex
->image
.tiling
!= GEN6_TILING_NONE
);
121 m
= ILO_TRANSFER_MAP_GTT
; /* to have a linear view */
122 else if (is
->dev
.has_llc
)
123 m
= ILO_TRANSFER_MAP_CPU
; /* fast and mostly coherent */
124 else if (usage
& PIPE_TRANSFER_PERSISTENT
)
125 m
= ILO_TRANSFER_MAP_GTT
; /* for coherency */
126 else if (usage
& PIPE_TRANSFER_READ
)
127 m
= ILO_TRANSFER_MAP_CPU
; /* gtt read is too slow */
129 m
= ILO_TRANSFER_MAP_GTT
;
137 * Rename the bo of the resource.
140 resource_rename_bo(struct pipe_resource
*res
)
142 return (res
->target
== PIPE_BUFFER
) ?
143 ilo_buffer_rename_bo(ilo_buffer(res
)) :
144 ilo_texture_rename_bo(ilo_texture(res
));
148 * Return true if usage allows the use of staging bo to avoid blocking.
151 usage_allows_staging_bo(unsigned usage
)
153 /* do we know how to write the data back to the resource? */
154 const unsigned can_writeback
= (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE
|
155 PIPE_TRANSFER_DISCARD_RANGE
|
156 PIPE_TRANSFER_FLUSH_EXPLICIT
);
157 const unsigned reasons_against
= (PIPE_TRANSFER_READ
|
158 PIPE_TRANSFER_MAP_DIRECTLY
|
159 PIPE_TRANSFER_PERSISTENT
);
161 return (usage
& can_writeback
) && !(usage
& reasons_against
);
165 * Allocate the staging resource. It is always linear and its size matches
166 * the transfer box, with proper paddings.
169 xfer_alloc_staging_res(struct ilo_transfer
*xfer
)
171 const struct pipe_resource
*res
= xfer
->base
.resource
;
172 const struct pipe_box
*box
= &xfer
->base
.box
;
173 struct pipe_resource templ
;
175 memset(&templ
, 0, sizeof(templ
));
177 templ
.format
= res
->format
;
179 if (res
->target
== PIPE_BUFFER
) {
180 templ
.target
= PIPE_BUFFER
;
182 (box
->x
% ILO_TRANSFER_MAP_BUFFER_ALIGNMENT
) + box
->width
;
185 /* use 2D array for any texture target */
186 templ
.target
= PIPE_TEXTURE_2D_ARRAY
;
187 templ
.width0
= box
->width
;
190 templ
.height0
= box
->height
;
192 templ
.array_size
= box
->depth
;
193 templ
.nr_samples
= 1;
194 templ
.usage
= PIPE_USAGE_STAGING
;
195 templ
.bind
= PIPE_BIND_TRANSFER_WRITE
;
197 if (xfer
->base
.usage
& PIPE_TRANSFER_FLUSH_EXPLICIT
) {
198 templ
.flags
= PIPE_RESOURCE_FLAG_MAP_PERSISTENT
|
199 PIPE_RESOURCE_FLAG_MAP_COHERENT
;
202 xfer
->staging
.res
= res
->screen
->resource_create(res
->screen
, &templ
);
204 if (xfer
->staging
.res
&& xfer
->staging
.res
->target
!= PIPE_BUFFER
) {
205 assert(ilo_texture(xfer
->staging
.res
)->image
.tiling
==
209 return (xfer
->staging
.res
!= NULL
);
213 * Use an alternative transfer method or rename the resource to unblock an
214 * otherwise blocking transfer.
217 xfer_unblock(struct ilo_transfer
*xfer
, bool *resource_renamed
)
219 struct pipe_resource
*res
= xfer
->base
.resource
;
220 bool unblocked
= false, renamed
= false;
222 switch (xfer
->method
) {
223 case ILO_TRANSFER_MAP_CPU
:
224 case ILO_TRANSFER_MAP_GTT
:
225 if (xfer
->base
.usage
& PIPE_TRANSFER_UNSYNCHRONIZED
) {
226 xfer
->method
= ILO_TRANSFER_MAP_GTT_ASYNC
;
229 else if ((xfer
->base
.usage
& PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE
) &&
230 resource_rename_bo(res
)) {
234 else if (usage_allows_staging_bo(xfer
->base
.usage
) &&
235 xfer_alloc_staging_res(xfer
)) {
236 xfer
->method
= ILO_TRANSFER_MAP_STAGING
;
240 case ILO_TRANSFER_MAP_GTT_ASYNC
:
241 case ILO_TRANSFER_MAP_STAGING
:
248 *resource_renamed
= renamed
;
254 * Allocate the staging system buffer based on the resource format and the
258 xfer_alloc_staging_sys(struct ilo_transfer
*xfer
)
260 const enum pipe_format format
= xfer
->base
.resource
->format
;
261 const struct pipe_box
*box
= &xfer
->base
.box
;
262 const unsigned alignment
= 64;
264 /* need to tell the world the layout */
266 align(util_format_get_stride(format
, box
->width
), alignment
);
267 xfer
->base
.layer_stride
=
268 util_format_get_2d_size(format
, xfer
->base
.stride
, box
->height
);
271 align_malloc(xfer
->base
.layer_stride
* box
->depth
, alignment
);
273 return (xfer
->staging
.sys
!= NULL
);
277 * Map according to the method. The staging system buffer should have been
278 * allocated if the method requires it.
281 xfer_map(struct ilo_transfer
*xfer
)
285 switch (xfer
->method
) {
286 case ILO_TRANSFER_MAP_CPU
:
287 ptr
= intel_bo_map(ilo_resource_get_bo(xfer
->base
.resource
),
288 xfer
->base
.usage
& PIPE_TRANSFER_WRITE
);
290 case ILO_TRANSFER_MAP_GTT
:
291 ptr
= intel_bo_map_gtt(ilo_resource_get_bo(xfer
->base
.resource
));
293 case ILO_TRANSFER_MAP_GTT_ASYNC
:
294 ptr
= intel_bo_map_gtt_async(ilo_resource_get_bo(xfer
->base
.resource
));
296 case ILO_TRANSFER_MAP_STAGING
:
298 const struct ilo_screen
*is
= ilo_screen(xfer
->staging
.res
->screen
);
299 struct intel_bo
*bo
= ilo_resource_get_bo(xfer
->staging
.res
);
302 * We want a writable, optionally persistent and coherent, mapping
303 * for a linear bo. We can call resource_get_transfer_method(), but
304 * this turns out to be fairly simple.
307 ptr
= intel_bo_map(bo
, true);
309 ptr
= intel_bo_map_gtt(bo
);
311 if (ptr
&& xfer
->staging
.res
->target
== PIPE_BUFFER
)
312 ptr
+= (xfer
->base
.box
.x
% ILO_TRANSFER_MAP_BUFFER_ALIGNMENT
);
316 case ILO_TRANSFER_MAP_SW_CONVERT
:
317 case ILO_TRANSFER_MAP_SW_ZS
:
318 ptr
= xfer
->staging
.sys
;
321 assert(!"unknown mapping method");
333 xfer_unmap(struct ilo_transfer
*xfer
)
335 switch (xfer
->method
) {
336 case ILO_TRANSFER_MAP_CPU
:
337 case ILO_TRANSFER_MAP_GTT
:
338 case ILO_TRANSFER_MAP_GTT_ASYNC
:
339 intel_bo_unmap(ilo_resource_get_bo(xfer
->base
.resource
));
341 case ILO_TRANSFER_MAP_STAGING
:
342 intel_bo_unmap(ilo_resource_get_bo(xfer
->staging
.res
));
350 tex_get_box_origin(const struct ilo_texture
*tex
,
351 unsigned level
, unsigned slice
,
352 const struct pipe_box
*box
,
353 unsigned *mem_x
, unsigned *mem_y
)
357 ilo_image_get_slice_pos(&tex
->image
, level
, box
->z
+ slice
, &x
, &y
);
361 ilo_image_pos_to_mem(&tex
->image
, x
, y
, mem_x
, mem_y
);
365 tex_get_box_offset(const struct ilo_texture
*tex
, unsigned level
,
366 const struct pipe_box
*box
)
368 unsigned mem_x
, mem_y
;
370 tex_get_box_origin(tex
, level
, 0, box
, &mem_x
, &mem_y
);
372 return ilo_image_mem_to_linear(&tex
->image
, mem_x
, mem_y
);
376 tex_get_slice_stride(const struct ilo_texture
*tex
, unsigned level
)
378 return ilo_image_get_slice_stride(&tex
->image
, level
);
382 tex_tile_x_swizzle(unsigned addr
)
385 * From the Ivy Bridge PRM, volume 1 part 2, page 24:
387 * "As shown in the tiling algorithm, the new address bit[6] should be:
389 * Address bit[6] <= TiledAddr bit[6] XOR
390 * TiledAddr bit[9] XOR
393 return addr
^ (((addr
>> 3) ^ (addr
>> 4)) & 0x40);
397 tex_tile_y_swizzle(unsigned addr
)
400 * From the Ivy Bridge PRM, volume 1 part 2, page 24:
402 * "As shown in the tiling algorithm, The new address bit[6] becomes:
404 * Address bit[6] <= TiledAddr bit[6] XOR
407 return addr
^ ((addr
>> 3) & 0x40);
411 tex_tile_x_offset(unsigned mem_x
, unsigned mem_y
,
412 unsigned tiles_per_row
, bool swizzle
)
415 * From the Sandy Bridge PRM, volume 1 part 2, page 21, we know that a
416 * X-major tile has 8 rows and 32 OWord columns (512 bytes). Tiles in the
417 * tiled region are numbered in row-major order, starting from zero. The
418 * tile number can thus be calculated as follows:
420 * tile = (mem_y / 8) * tiles_per_row + (mem_x / 512)
422 * OWords in that tile are also numbered in row-major order, starting from
423 * zero. The OWord number can thus be calculated as follows:
425 * oword = (mem_y % 8) * 32 + ((mem_x % 512) / 16)
427 * and the tiled offset is
429 * offset = tile * 4096 + oword * 16 + (mem_x % 16)
430 * = tile * 4096 + (mem_y % 8) * 512 + (mem_x % 512)
432 unsigned tile
, offset
;
434 tile
= (mem_y
>> 3) * tiles_per_row
+ (mem_x
>> 9);
435 offset
= tile
<< 12 | (mem_y
& 0x7) << 9 | (mem_x
& 0x1ff);
437 return (swizzle
) ? tex_tile_x_swizzle(offset
) : offset
;
441 tex_tile_y_offset(unsigned mem_x
, unsigned mem_y
,
442 unsigned tiles_per_row
, bool swizzle
)
445 * From the Sandy Bridge PRM, volume 1 part 2, page 22, we know that a
446 * Y-major tile has 32 rows and 8 OWord columns (128 bytes). Tiles in the
447 * tiled region are numbered in row-major order, starting from zero. The
448 * tile number can thus be calculated as follows:
450 * tile = (mem_y / 32) * tiles_per_row + (mem_x / 128)
452 * OWords in that tile are numbered in column-major order, starting from
453 * zero. The OWord number can thus be calculated as follows:
455 * oword = ((mem_x % 128) / 16) * 32 + (mem_y % 32)
457 * and the tiled offset is
459 * offset = tile * 4096 + oword * 16 + (mem_x % 16)
461 unsigned tile
, oword
, offset
;
463 tile
= (mem_y
>> 5) * tiles_per_row
+ (mem_x
>> 7);
464 oword
= (mem_x
& 0x70) << 1 | (mem_y
& 0x1f);
465 offset
= tile
<< 12 | oword
<< 4 | (mem_x
& 0xf);
467 return (swizzle
) ? tex_tile_y_swizzle(offset
) : offset
;
471 tex_tile_w_offset(unsigned mem_x
, unsigned mem_y
,
472 unsigned tiles_per_row
, bool swizzle
)
475 * From the Sandy Bridge PRM, volume 1 part 2, page 23, we know that a
476 * W-major tile has 8 8x8-block rows and 8 8x8-block columns. Tiles in the
477 * tiled region are numbered in row-major order, starting from zero. The
478 * tile number can thus be calculated as follows:
480 * tile = (mem_y / 64) * tiles_per_row + (mem_x / 64)
482 * 8x8-blocks in that tile are numbered in column-major order, starting
483 * from zero. The 8x8-block number can thus be calculated as follows:
485 * blk8 = ((mem_x % 64) / 8) * 8 + ((mem_y % 64) / 8)
487 * Each 8x8-block is divided into 4 4x4-blocks, in row-major order. Each
488 * 4x4-block is further divided into 4 2x2-blocks, also in row-major order.
491 * blk4 = (((mem_y % 64) / 4) & 1) * 2 + (((mem_x % 64) / 4) & 1)
492 * blk2 = (((mem_y % 64) / 2) & 1) * 2 + (((mem_x % 64) / 2) & 1)
493 * blk1 = (((mem_y % 64) ) & 1) * 2 + (((mem_x % 64) ) & 1)
495 * and the tiled offset is
497 * offset = tile * 4096 + blk8 * 64 + blk4 * 16 + blk2 * 4 + blk1
499 unsigned tile
, blk8
, blk4
, blk2
, blk1
, offset
;
501 tile
= (mem_y
>> 6) * tiles_per_row
+ (mem_x
>> 6);
502 blk8
= ((mem_x
>> 3) & 0x7) << 3 | ((mem_y
>> 3) & 0x7);
503 blk4
= ((mem_y
>> 2) & 0x1) << 1 | ((mem_x
>> 2) & 0x1);
504 blk2
= ((mem_y
>> 1) & 0x1) << 1 | ((mem_x
>> 1) & 0x1);
505 blk1
= ((mem_y
) & 0x1) << 1 | ((mem_x
) & 0x1);
506 offset
= tile
<< 12 | blk8
<< 6 | blk4
<< 4 | blk2
<< 2 | blk1
;
508 return (swizzle
) ? tex_tile_y_swizzle(offset
) : offset
;
512 tex_tile_none_offset(unsigned mem_x
, unsigned mem_y
,
513 unsigned tiles_per_row
, bool swizzle
)
515 return mem_y
* tiles_per_row
+ mem_x
;
518 typedef unsigned (*tex_tile_offset_func
)(unsigned mem_x
, unsigned mem_y
,
519 unsigned tiles_per_row
,
522 static tex_tile_offset_func
523 tex_tile_choose_offset_func(const struct ilo_texture
*tex
,
524 unsigned *tiles_per_row
)
526 switch (tex
->image
.tiling
) {
528 assert(!"unknown tiling");
530 case GEN6_TILING_NONE
:
531 *tiles_per_row
= tex
->image
.bo_stride
;
532 return tex_tile_none_offset
;
534 *tiles_per_row
= tex
->image
.bo_stride
/ 512;
535 return tex_tile_x_offset
;
537 *tiles_per_row
= tex
->image
.bo_stride
/ 128;
538 return tex_tile_y_offset
;
540 *tiles_per_row
= tex
->image
.bo_stride
/ 64;
541 return tex_tile_w_offset
;
546 tex_staging_sys_map_bo(struct ilo_texture
*tex
,
550 const struct ilo_screen
*is
= ilo_screen(tex
->base
.screen
);
551 const bool prefer_cpu
= (is
->dev
.has_llc
|| for_read_back
);
554 if (prefer_cpu
&& (tex
->image
.tiling
== GEN6_TILING_NONE
||
556 ptr
= intel_bo_map(tex
->bo
, !for_read_back
);
558 ptr
= intel_bo_map_gtt(tex
->bo
);
564 tex_staging_sys_unmap_bo(struct ilo_texture
*tex
)
566 intel_bo_unmap(tex
->bo
);
570 tex_staging_sys_zs_read(struct ilo_texture
*tex
,
571 const struct ilo_transfer
*xfer
)
573 const struct ilo_screen
*is
= ilo_screen(tex
->base
.screen
);
574 const bool swizzle
= is
->dev
.has_address_swizzling
;
575 const struct pipe_box
*box
= &xfer
->base
.box
;
577 tex_tile_offset_func tile_offset
;
578 unsigned tiles_per_row
;
581 src
= tex_staging_sys_map_bo(tex
, true, false);
585 tile_offset
= tex_tile_choose_offset_func(tex
, &tiles_per_row
);
587 assert(tex
->image
.block_width
== 1 && tex
->image
.block_height
== 1);
589 if (tex
->separate_s8
) {
590 struct ilo_texture
*s8_tex
= tex
->separate_s8
;
591 const uint8_t *s8_src
;
592 tex_tile_offset_func s8_tile_offset
;
593 unsigned s8_tiles_per_row
;
594 int dst_cpp
, dst_s8_pos
, src_cpp_used
;
596 s8_src
= tex_staging_sys_map_bo(s8_tex
, true, false);
598 tex_staging_sys_unmap_bo(tex
);
602 s8_tile_offset
= tex_tile_choose_offset_func(s8_tex
, &s8_tiles_per_row
);
604 if (tex
->base
.format
== PIPE_FORMAT_Z24_UNORM_S8_UINT
) {
605 assert(tex
->image
.format
== PIPE_FORMAT_Z24X8_UNORM
);
612 assert(tex
->base
.format
== PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
);
613 assert(tex
->image
.format
== PIPE_FORMAT_Z32_FLOAT
);
620 for (slice
= 0; slice
< box
->depth
; slice
++) {
621 unsigned mem_x
, mem_y
, s8_mem_x
, s8_mem_y
;
625 tex_get_box_origin(tex
, xfer
->base
.level
, slice
,
626 box
, &mem_x
, &mem_y
);
627 tex_get_box_origin(s8_tex
, xfer
->base
.level
, slice
,
628 box
, &s8_mem_x
, &s8_mem_y
);
630 dst
= xfer
->staging
.sys
+ xfer
->base
.layer_stride
* slice
;
632 for (i
= 0; i
< box
->height
; i
++) {
633 unsigned x
= mem_x
, s8_x
= s8_mem_x
;
636 for (j
= 0; j
< box
->width
; j
++) {
637 const unsigned offset
=
638 tile_offset(x
, mem_y
, tiles_per_row
, swizzle
);
639 const unsigned s8_offset
=
640 s8_tile_offset(s8_x
, s8_mem_y
, s8_tiles_per_row
, swizzle
);
642 memcpy(d
, src
+ offset
, src_cpp_used
);
643 d
[dst_s8_pos
] = s8_src
[s8_offset
];
646 x
+= tex
->image
.block_size
;
650 dst
+= xfer
->base
.stride
;
656 tex_staging_sys_unmap_bo(s8_tex
);
659 assert(tex
->image
.format
== PIPE_FORMAT_S8_UINT
);
661 for (slice
= 0; slice
< box
->depth
; slice
++) {
662 unsigned mem_x
, mem_y
;
666 tex_get_box_origin(tex
, xfer
->base
.level
, slice
,
667 box
, &mem_x
, &mem_y
);
669 dst
= xfer
->staging
.sys
+ xfer
->base
.layer_stride
* slice
;
671 for (i
= 0; i
< box
->height
; i
++) {
675 for (j
= 0; j
< box
->width
; j
++) {
676 const unsigned offset
=
677 tile_offset(x
, mem_y
, tiles_per_row
, swizzle
);
685 dst
+= xfer
->base
.stride
;
691 tex_staging_sys_unmap_bo(tex
);
697 tex_staging_sys_zs_write(struct ilo_texture
*tex
,
698 const struct ilo_transfer
*xfer
)
700 const struct ilo_screen
*is
= ilo_screen(tex
->base
.screen
);
701 const bool swizzle
= is
->dev
.has_address_swizzling
;
702 const struct pipe_box
*box
= &xfer
->base
.box
;
704 tex_tile_offset_func tile_offset
;
705 unsigned tiles_per_row
;
708 dst
= tex_staging_sys_map_bo(tex
, false, false);
712 tile_offset
= tex_tile_choose_offset_func(tex
, &tiles_per_row
);
714 assert(tex
->image
.block_width
== 1 && tex
->image
.block_height
== 1);
716 if (tex
->separate_s8
) {
717 struct ilo_texture
*s8_tex
= tex
->separate_s8
;
719 tex_tile_offset_func s8_tile_offset
;
720 unsigned s8_tiles_per_row
;
721 int src_cpp
, src_s8_pos
, dst_cpp_used
;
723 s8_dst
= tex_staging_sys_map_bo(s8_tex
, false, false);
725 tex_staging_sys_unmap_bo(s8_tex
);
729 s8_tile_offset
= tex_tile_choose_offset_func(s8_tex
, &s8_tiles_per_row
);
731 if (tex
->base
.format
== PIPE_FORMAT_Z24_UNORM_S8_UINT
) {
732 assert(tex
->image
.format
== PIPE_FORMAT_Z24X8_UNORM
);
739 assert(tex
->base
.format
== PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
);
740 assert(tex
->image
.format
== PIPE_FORMAT_Z32_FLOAT
);
747 for (slice
= 0; slice
< box
->depth
; slice
++) {
748 unsigned mem_x
, mem_y
, s8_mem_x
, s8_mem_y
;
752 tex_get_box_origin(tex
, xfer
->base
.level
, slice
,
753 box
, &mem_x
, &mem_y
);
754 tex_get_box_origin(s8_tex
, xfer
->base
.level
, slice
,
755 box
, &s8_mem_x
, &s8_mem_y
);
757 src
= xfer
->staging
.sys
+ xfer
->base
.layer_stride
* slice
;
759 for (i
= 0; i
< box
->height
; i
++) {
760 unsigned x
= mem_x
, s8_x
= s8_mem_x
;
761 const uint8_t *s
= src
;
763 for (j
= 0; j
< box
->width
; j
++) {
764 const unsigned offset
=
765 tile_offset(x
, mem_y
, tiles_per_row
, swizzle
);
766 const unsigned s8_offset
=
767 s8_tile_offset(s8_x
, s8_mem_y
, s8_tiles_per_row
, swizzle
);
769 memcpy(dst
+ offset
, s
, dst_cpp_used
);
770 s8_dst
[s8_offset
] = s
[src_s8_pos
];
773 x
+= tex
->image
.block_size
;
777 src
+= xfer
->base
.stride
;
783 tex_staging_sys_unmap_bo(s8_tex
);
786 assert(tex
->image
.format
== PIPE_FORMAT_S8_UINT
);
788 for (slice
= 0; slice
< box
->depth
; slice
++) {
789 unsigned mem_x
, mem_y
;
793 tex_get_box_origin(tex
, xfer
->base
.level
, slice
,
794 box
, &mem_x
, &mem_y
);
796 src
= xfer
->staging
.sys
+ xfer
->base
.layer_stride
* slice
;
798 for (i
= 0; i
< box
->height
; i
++) {
800 const uint8_t *s
= src
;
802 for (j
= 0; j
< box
->width
; j
++) {
803 const unsigned offset
=
804 tile_offset(x
, mem_y
, tiles_per_row
, swizzle
);
812 src
+= xfer
->base
.stride
;
818 tex_staging_sys_unmap_bo(tex
);
824 tex_staging_sys_convert_write(struct ilo_texture
*tex
,
825 const struct ilo_transfer
*xfer
)
827 const struct pipe_box
*box
= &xfer
->base
.box
;
828 unsigned dst_slice_stride
;
832 dst
= tex_staging_sys_map_bo(tex
, false, true);
836 dst
+= tex_get_box_offset(tex
, xfer
->base
.level
, box
);
838 /* slice stride is not always available */
840 dst_slice_stride
= tex_get_slice_stride(tex
, xfer
->base
.level
);
842 dst_slice_stride
= 0;
844 if (unlikely(tex
->image
.format
== tex
->base
.format
)) {
845 util_copy_box(dst
, tex
->image
.format
, tex
->image
.bo_stride
,
846 dst_slice_stride
, 0, 0, 0, box
->width
, box
->height
, box
->depth
,
847 xfer
->staging
.sys
, xfer
->base
.stride
, xfer
->base
.layer_stride
,
850 tex_staging_sys_unmap_bo(tex
);
855 switch (tex
->base
.format
) {
856 case PIPE_FORMAT_ETC1_RGB8
:
857 assert(tex
->image
.format
== PIPE_FORMAT_R8G8B8X8_UNORM
);
859 for (slice
= 0; slice
< box
->depth
; slice
++) {
861 xfer
->staging
.sys
+ xfer
->base
.layer_stride
* slice
;
863 util_format_etc1_rgb8_unpack_rgba_8unorm(dst
,
864 tex
->image
.bo_stride
, src
, xfer
->base
.stride
,
865 box
->width
, box
->height
);
867 dst
+= dst_slice_stride
;
871 assert(!"unable to convert the staging data");
875 tex_staging_sys_unmap_bo(tex
);
881 tex_staging_sys_writeback(struct ilo_transfer
*xfer
)
883 struct ilo_texture
*tex
= ilo_texture(xfer
->base
.resource
);
886 if (!(xfer
->base
.usage
& PIPE_TRANSFER_WRITE
))
889 switch (xfer
->method
) {
890 case ILO_TRANSFER_MAP_SW_CONVERT
:
891 success
= tex_staging_sys_convert_write(tex
, xfer
);
893 case ILO_TRANSFER_MAP_SW_ZS
:
894 success
= tex_staging_sys_zs_write(tex
, xfer
);
897 assert(!"unknown mapping method");
903 ilo_err("failed to map resource for moving staging data\n");
907 tex_staging_sys_readback(struct ilo_transfer
*xfer
)
909 struct ilo_texture
*tex
= ilo_texture(xfer
->base
.resource
);
910 bool read_back
= false, success
;
912 /* see if we need to read the resource back */
913 if (xfer
->base
.usage
& PIPE_TRANSFER_READ
) {
916 else if (xfer
->base
.usage
& PIPE_TRANSFER_WRITE
) {
917 const unsigned discard_flags
=
918 (PIPE_TRANSFER_DISCARD_RANGE
| PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE
);
920 if (!(xfer
->base
.usage
& discard_flags
))
927 switch (xfer
->method
) {
928 case ILO_TRANSFER_MAP_SW_CONVERT
:
929 assert(!"no on-the-fly format conversion for mapping");
932 case ILO_TRANSFER_MAP_SW_ZS
:
933 success
= tex_staging_sys_zs_read(tex
, xfer
);
936 assert(!"unknown mapping method");
945 tex_map(struct ilo_transfer
*xfer
)
949 switch (xfer
->method
) {
950 case ILO_TRANSFER_MAP_CPU
:
951 case ILO_TRANSFER_MAP_GTT
:
952 case ILO_TRANSFER_MAP_GTT_ASYNC
:
953 ptr
= xfer_map(xfer
);
955 const struct ilo_texture
*tex
= ilo_texture(xfer
->base
.resource
);
957 ptr
+= tex_get_box_offset(tex
, xfer
->base
.level
, &xfer
->base
.box
);
959 /* stride is for a block row, not a texel row */
960 xfer
->base
.stride
= tex
->image
.bo_stride
;
961 /* note that slice stride is not always available */
962 xfer
->base
.layer_stride
= (xfer
->base
.box
.depth
> 1) ?
963 tex_get_slice_stride(tex
, xfer
->base
.level
) : 0;
966 case ILO_TRANSFER_MAP_STAGING
:
967 ptr
= xfer_map(xfer
);
969 const struct ilo_texture
*staging
= ilo_texture(xfer
->staging
.res
);
970 xfer
->base
.stride
= staging
->image
.bo_stride
;
971 xfer
->base
.layer_stride
= tex_get_slice_stride(staging
, 0);
974 case ILO_TRANSFER_MAP_SW_CONVERT
:
975 case ILO_TRANSFER_MAP_SW_ZS
:
976 if (xfer_alloc_staging_sys(xfer
) && tex_staging_sys_readback(xfer
))
977 ptr
= xfer_map(xfer
);
982 assert(!"unknown mapping method");
991 buf_map(struct ilo_transfer
*xfer
)
995 ptr
= xfer_map(xfer
);
999 if (xfer
->method
!= ILO_TRANSFER_MAP_STAGING
)
1000 ptr
+= xfer
->base
.box
.x
;
1002 xfer
->base
.stride
= 0;
1003 xfer
->base
.layer_stride
= 0;
1005 assert(xfer
->base
.level
== 0);
1006 assert(xfer
->base
.box
.y
== 0);
1007 assert(xfer
->base
.box
.z
== 0);
1008 assert(xfer
->base
.box
.height
== 1);
1009 assert(xfer
->base
.box
.depth
== 1);
1015 copy_staging_resource(struct ilo_context
*ilo
,
1016 struct ilo_transfer
*xfer
,
1017 const struct pipe_box
*box
)
1019 const unsigned pad_x
= (xfer
->staging
.res
->target
== PIPE_BUFFER
) ?
1020 xfer
->base
.box
.x
% ILO_TRANSFER_MAP_BUFFER_ALIGNMENT
: 0;
1021 struct pipe_box modified_box
;
1023 assert(xfer
->method
== ILO_TRANSFER_MAP_STAGING
&& xfer
->staging
.res
);
1026 u_box_3d(pad_x
, 0, 0, xfer
->base
.box
.width
, xfer
->base
.box
.height
,
1027 xfer
->base
.box
.depth
, &modified_box
);
1028 box
= &modified_box
;
1031 modified_box
= *box
;
1032 modified_box
.x
+= pad_x
;
1033 box
= &modified_box
;
1036 ilo_blitter_blt_copy_resource(ilo
->blitter
,
1037 xfer
->base
.resource
, xfer
->base
.level
,
1038 xfer
->base
.box
.x
, xfer
->base
.box
.y
, xfer
->base
.box
.z
,
1039 xfer
->staging
.res
, 0, box
);
1043 is_bo_busy(struct ilo_context
*ilo
, struct intel_bo
*bo
, bool *need_submit
)
1045 const bool referenced
= ilo_builder_has_reloc(&ilo
->cp
->builder
, bo
);
1048 *need_submit
= referenced
;
1053 return intel_bo_is_busy(bo
);
1057 * Choose the best mapping method, depending on the transfer usage and whether
1061 choose_transfer_method(struct ilo_context
*ilo
, struct ilo_transfer
*xfer
)
1063 struct pipe_resource
*res
= xfer
->base
.resource
;
1066 if (!resource_get_transfer_method(res
, &xfer
->base
, &xfer
->method
))
1069 /* see if we can avoid blocking */
1070 if (is_bo_busy(ilo
, ilo_resource_get_bo(res
), &need_submit
)) {
1071 bool resource_renamed
;
1073 if (!xfer_unblock(xfer
, &resource_renamed
)) {
1074 if (xfer
->base
.usage
& PIPE_TRANSFER_DONTBLOCK
)
1077 /* submit to make bo really busy and map() correctly blocks */
1079 ilo_cp_submit(ilo
->cp
, "syncing for transfers");
1082 if (resource_renamed
)
1083 ilo_state_vector_resource_renamed(&ilo
->state_vector
, res
);
1090 buf_pwrite(struct ilo_context
*ilo
, struct ilo_buffer
*buf
,
1091 unsigned usage
, int offset
, int size
, const void *data
)
1095 /* see if we can avoid blocking */
1096 if (is_bo_busy(ilo
, buf
->bo
, &need_submit
)) {
1097 bool unblocked
= false;
1099 if ((usage
& PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE
) &&
1100 ilo_buffer_rename_bo(buf
)) {
1101 ilo_state_vector_resource_renamed(&ilo
->state_vector
, &buf
->base
);
1105 struct pipe_resource templ
, *staging
;
1108 * allocate a staging buffer to hold the data and pipelined copy it
1112 templ
.width0
= size
;
1113 templ
.usage
= PIPE_USAGE_STAGING
;
1114 templ
.bind
= PIPE_BIND_TRANSFER_WRITE
;
1115 staging
= ilo
->base
.screen
->resource_create(ilo
->base
.screen
, &templ
);
1117 struct pipe_box staging_box
;
1119 intel_bo_pwrite(ilo_buffer(staging
)->bo
, 0, size
, data
);
1121 u_box_1d(0, size
, &staging_box
);
1122 ilo_blitter_blt_copy_resource(ilo
->blitter
,
1123 &buf
->base
, 0, offset
, 0, 0,
1124 staging
, 0, &staging_box
);
1126 pipe_resource_reference(&staging
, NULL
);
1132 /* submit to make bo really busy and pwrite() correctly blocks */
1133 if (!unblocked
&& need_submit
)
1134 ilo_cp_submit(ilo
->cp
, "syncing for pwrites");
1137 intel_bo_pwrite(buf
->bo
, offset
, size
, data
);
1141 ilo_transfer_flush_region(struct pipe_context
*pipe
,
1142 struct pipe_transfer
*transfer
,
1143 const struct pipe_box
*box
)
1145 struct ilo_context
*ilo
= ilo_context(pipe
);
1146 struct ilo_transfer
*xfer
= ilo_transfer(transfer
);
1149 * The staging resource is mapped persistently and coherently. We can copy
1150 * without unmapping.
1152 if (xfer
->method
== ILO_TRANSFER_MAP_STAGING
&&
1153 (xfer
->base
.usage
& PIPE_TRANSFER_FLUSH_EXPLICIT
))
1154 copy_staging_resource(ilo
, xfer
, box
);
1158 ilo_transfer_unmap(struct pipe_context
*pipe
,
1159 struct pipe_transfer
*transfer
)
1161 struct ilo_context
*ilo
= ilo_context(pipe
);
1162 struct ilo_transfer
*xfer
= ilo_transfer(transfer
);
1166 switch (xfer
->method
) {
1167 case ILO_TRANSFER_MAP_STAGING
:
1168 if (!(xfer
->base
.usage
& PIPE_TRANSFER_FLUSH_EXPLICIT
))
1169 copy_staging_resource(ilo
, xfer
, NULL
);
1170 pipe_resource_reference(&xfer
->staging
.res
, NULL
);
1172 case ILO_TRANSFER_MAP_SW_CONVERT
:
1173 case ILO_TRANSFER_MAP_SW_ZS
:
1174 tex_staging_sys_writeback(xfer
);
1175 align_free(xfer
->staging
.sys
);
1181 pipe_resource_reference(&xfer
->base
.resource
, NULL
);
1183 util_slab_free(&ilo
->transfer_mempool
, xfer
);
1187 ilo_transfer_map(struct pipe_context
*pipe
,
1188 struct pipe_resource
*res
,
1191 const struct pipe_box
*box
,
1192 struct pipe_transfer
**transfer
)
1194 struct ilo_context
*ilo
= ilo_context(pipe
);
1195 struct ilo_transfer
*xfer
;
1198 /* note that xfer is not zero'd */
1199 xfer
= util_slab_alloc(&ilo
->transfer_mempool
);
1205 xfer
->base
.resource
= NULL
;
1206 pipe_resource_reference(&xfer
->base
.resource
, res
);
1207 xfer
->base
.level
= level
;
1208 xfer
->base
.usage
= usage
;
1209 xfer
->base
.box
= *box
;
1211 ilo_blit_resolve_transfer(ilo
, &xfer
->base
);
1213 if (choose_transfer_method(ilo
, xfer
)) {
1214 if (res
->target
== PIPE_BUFFER
)
1215 ptr
= buf_map(xfer
);
1217 ptr
= tex_map(xfer
);
1224 pipe_resource_reference(&xfer
->base
.resource
, NULL
);
1225 util_slab_free(&ilo
->transfer_mempool
, xfer
);
1230 *transfer
= &xfer
->base
;
1236 ilo_transfer_inline_write(struct pipe_context
*pipe
,
1237 struct pipe_resource
*res
,
1240 const struct pipe_box
*box
,
1243 unsigned layer_stride
)
1245 if (likely(res
->target
== PIPE_BUFFER
) &&
1246 !(usage
& PIPE_TRANSFER_UNSYNCHRONIZED
)) {
1247 /* they should specify just an offset and a size */
1249 assert(box
->y
== 0);
1250 assert(box
->z
== 0);
1251 assert(box
->height
== 1);
1252 assert(box
->depth
== 1);
1254 buf_pwrite(ilo_context(pipe
), ilo_buffer(res
),
1255 usage
, box
->x
, box
->width
, data
);
1258 u_default_transfer_inline_write(pipe
, res
,
1259 level
, usage
, box
, data
, stride
, layer_stride
);
1264 * Initialize transfer-related functions.
1267 ilo_init_transfer_functions(struct ilo_context
*ilo
)
1269 ilo
->base
.transfer_map
= ilo_transfer_map
;
1270 ilo
->base
.transfer_flush_region
= ilo_transfer_flush_region
;
1271 ilo
->base
.transfer_unmap
= ilo_transfer_unmap
;
1272 ilo
->base
.transfer_inline_write
= ilo_transfer_inline_write
;