2 * Mesa 3-D graphics library
4 * Copyright (C) 2012-2013 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #include "util/u_surface.h"
29 #include "util/u_transfer.h"
30 #include "util/u_format_etc.h"
33 #include "ilo_blitter.h"
35 #include "ilo_context.h"
36 #include "ilo_resource.h"
37 #include "ilo_state.h"
38 #include "ilo_transfer.h"
41 * For buffers that are not busy, we want to map/unmap them directly. For
42 * those that are busy, we have to worry about synchronization. We could wait
43 * for GPU to finish, but there are cases where we could avoid waiting.
45 * - When PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE is set, the contents of the
46 * buffer can be discarded. We can replace the backing bo by a new one of
47 * the same size (renaming).
48 * - When PIPE_TRANSFER_DISCARD_RANGE is set, the contents of the mapped
49 * range can be discarded. We can allocate and map a staging bo on
50 * mapping, and (pipelined-)copy it over to the real bo on unmapping.
51 * - When PIPE_TRANSFER_FLUSH_EXPLICIT is set, there is no reading and only
52 * flushed regions need to be written. We can still allocate and map a
53 * staging bo, but should copy only the flushed regions over.
55 * However, there are other flags to consider.
57 * - When PIPE_TRANSFER_UNSYNCHRONIZED is set, we do not need to worry about
58 * synchronization at all on mapping.
59 * - When PIPE_TRANSFER_MAP_DIRECTLY is set, no staging area is allowed.
60 * - When PIPE_TRANSFER_DONTBLOCK is set, we should fail if we have to block.
61 * - When PIPE_TRANSFER_PERSISTENT is set, GPU may access the buffer while it
62 * is mapped. Synchronization is done by defining memory barriers,
63 * explicitly via memory_barrier() or implicitly via
64 * transfer_flush_region(), as well as GPU fences.
65 * - When PIPE_TRANSFER_COHERENT is set, updates by either CPU or GPU should
66 * be made visible to the other side immediately. Since the kernel flushes
67 * GPU caches at the end of each batch buffer, CPU always sees GPU updates.
68 * We could use a coherent mapping to make all persistent mappings
71 * These also apply to textures, except that we may additionally need to do
72 * format conversion or tiling/untiling.
76 * Return a transfer method suitable for the usage. The returned method will
77 * correctly block when the resource is busy.
80 resource_get_transfer_method(struct pipe_resource
*res
,
81 const struct pipe_transfer
*transfer
,
82 enum ilo_transfer_map_method
*method
)
84 const struct ilo_screen
*is
= ilo_screen(res
->screen
);
85 const unsigned usage
= transfer
->usage
;
86 enum ilo_transfer_map_method m
;
89 if (res
->target
== PIPE_BUFFER
) {
92 struct ilo_texture
*tex
= ilo_texture(res
);
93 bool need_convert
= false;
95 /* we may need to convert on the fly */
96 if (tex
->image
.tiling
== GEN8_TILING_W
|| tex
->separate_s8
) {
97 /* on GEN6, separate stencil is enabled only when HiZ is */
98 if (ilo_dev_gen(&is
->dev
) >= ILO_GEN(7) ||
99 ilo_image_can_enable_aux(&tex
->image
, transfer
->level
)) {
100 m
= ILO_TRANSFER_MAP_SW_ZS
;
103 } else if (tex
->image
.format
!= tex
->base
.format
) {
104 m
= ILO_TRANSFER_MAP_SW_CONVERT
;
109 if (usage
& (PIPE_TRANSFER_MAP_DIRECTLY
| PIPE_TRANSFER_PERSISTENT
))
116 tiled
= (tex
->image
.tiling
!= GEN6_TILING_NONE
);
120 m
= ILO_TRANSFER_MAP_GTT
; /* to have a linear view */
121 else if (is
->dev
.has_llc
)
122 m
= ILO_TRANSFER_MAP_CPU
; /* fast and mostly coherent */
123 else if (usage
& PIPE_TRANSFER_PERSISTENT
)
124 m
= ILO_TRANSFER_MAP_GTT
; /* for coherency */
125 else if (usage
& PIPE_TRANSFER_READ
)
126 m
= ILO_TRANSFER_MAP_CPU
; /* gtt read is too slow */
128 m
= ILO_TRANSFER_MAP_GTT
;
136 * Return true if usage allows the use of staging bo to avoid blocking.
139 usage_allows_staging_bo(unsigned usage
)
141 /* do we know how to write the data back to the resource? */
142 const unsigned can_writeback
= (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE
|
143 PIPE_TRANSFER_DISCARD_RANGE
|
144 PIPE_TRANSFER_FLUSH_EXPLICIT
);
145 const unsigned reasons_against
= (PIPE_TRANSFER_READ
|
146 PIPE_TRANSFER_MAP_DIRECTLY
|
147 PIPE_TRANSFER_PERSISTENT
);
149 return (usage
& can_writeback
) && !(usage
& reasons_against
);
153 * Allocate the staging resource. It is always linear and its size matches
154 * the transfer box, with proper paddings.
157 xfer_alloc_staging_res(struct ilo_transfer
*xfer
)
159 const struct pipe_resource
*res
= xfer
->base
.resource
;
160 const struct pipe_box
*box
= &xfer
->base
.box
;
161 struct pipe_resource templ
;
163 memset(&templ
, 0, sizeof(templ
));
165 templ
.format
= res
->format
;
167 if (res
->target
== PIPE_BUFFER
) {
168 templ
.target
= PIPE_BUFFER
;
170 (box
->x
% ILO_TRANSFER_MAP_BUFFER_ALIGNMENT
) + box
->width
;
173 /* use 2D array for any texture target */
174 templ
.target
= PIPE_TEXTURE_2D_ARRAY
;
175 templ
.width0
= box
->width
;
178 templ
.height0
= box
->height
;
180 templ
.array_size
= box
->depth
;
181 templ
.nr_samples
= 1;
182 templ
.usage
= PIPE_USAGE_STAGING
;
183 templ
.bind
= PIPE_BIND_TRANSFER_WRITE
;
185 if (xfer
->base
.usage
& PIPE_TRANSFER_FLUSH_EXPLICIT
) {
186 templ
.flags
= PIPE_RESOURCE_FLAG_MAP_PERSISTENT
|
187 PIPE_RESOURCE_FLAG_MAP_COHERENT
;
190 xfer
->staging
.res
= res
->screen
->resource_create(res
->screen
, &templ
);
192 if (xfer
->staging
.res
&& xfer
->staging
.res
->target
!= PIPE_BUFFER
) {
193 assert(ilo_texture(xfer
->staging
.res
)->image
.tiling
==
197 return (xfer
->staging
.res
!= NULL
);
201 * Use an alternative transfer method or rename the resource to unblock an
202 * otherwise blocking transfer.
205 xfer_unblock(struct ilo_transfer
*xfer
, bool *resource_renamed
)
207 struct pipe_resource
*res
= xfer
->base
.resource
;
208 bool unblocked
= false, renamed
= false;
210 switch (xfer
->method
) {
211 case ILO_TRANSFER_MAP_CPU
:
212 case ILO_TRANSFER_MAP_GTT
:
213 if (xfer
->base
.usage
& PIPE_TRANSFER_UNSYNCHRONIZED
) {
214 xfer
->method
= ILO_TRANSFER_MAP_GTT_ASYNC
;
217 else if ((xfer
->base
.usage
& PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE
) &&
218 ilo_resource_rename_bo(res
)) {
222 else if (usage_allows_staging_bo(xfer
->base
.usage
) &&
223 xfer_alloc_staging_res(xfer
)) {
224 xfer
->method
= ILO_TRANSFER_MAP_STAGING
;
228 case ILO_TRANSFER_MAP_GTT_ASYNC
:
229 case ILO_TRANSFER_MAP_STAGING
:
236 *resource_renamed
= renamed
;
242 * Allocate the staging system buffer based on the resource format and the
246 xfer_alloc_staging_sys(struct ilo_transfer
*xfer
)
248 const enum pipe_format format
= xfer
->base
.resource
->format
;
249 const struct pipe_box
*box
= &xfer
->base
.box
;
250 const unsigned alignment
= 64;
252 /* need to tell the world the layout */
254 align(util_format_get_stride(format
, box
->width
), alignment
);
255 xfer
->base
.layer_stride
=
256 util_format_get_2d_size(format
, xfer
->base
.stride
, box
->height
);
259 align_malloc(xfer
->base
.layer_stride
* box
->depth
, alignment
);
261 return (xfer
->staging
.sys
!= NULL
);
265 * Map according to the method. The staging system buffer should have been
266 * allocated if the method requires it.
269 xfer_map(struct ilo_transfer
*xfer
)
273 switch (xfer
->method
) {
274 case ILO_TRANSFER_MAP_CPU
:
275 ptr
= intel_bo_map(ilo_resource_get_bo(xfer
->base
.resource
),
276 xfer
->base
.usage
& PIPE_TRANSFER_WRITE
);
278 case ILO_TRANSFER_MAP_GTT
:
279 ptr
= intel_bo_map_gtt(ilo_resource_get_bo(xfer
->base
.resource
));
281 case ILO_TRANSFER_MAP_GTT_ASYNC
:
282 ptr
= intel_bo_map_gtt_async(ilo_resource_get_bo(xfer
->base
.resource
));
284 case ILO_TRANSFER_MAP_STAGING
:
286 const struct ilo_screen
*is
= ilo_screen(xfer
->staging
.res
->screen
);
287 struct intel_bo
*bo
= ilo_resource_get_bo(xfer
->staging
.res
);
290 * We want a writable, optionally persistent and coherent, mapping
291 * for a linear bo. We can call resource_get_transfer_method(), but
292 * this turns out to be fairly simple.
295 ptr
= intel_bo_map(bo
, true);
297 ptr
= intel_bo_map_gtt(bo
);
299 if (ptr
&& xfer
->staging
.res
->target
== PIPE_BUFFER
)
300 ptr
+= (xfer
->base
.box
.x
% ILO_TRANSFER_MAP_BUFFER_ALIGNMENT
);
304 case ILO_TRANSFER_MAP_SW_CONVERT
:
305 case ILO_TRANSFER_MAP_SW_ZS
:
306 ptr
= xfer
->staging
.sys
;
309 assert(!"unknown mapping method");
321 xfer_unmap(struct ilo_transfer
*xfer
)
323 switch (xfer
->method
) {
324 case ILO_TRANSFER_MAP_CPU
:
325 case ILO_TRANSFER_MAP_GTT
:
326 case ILO_TRANSFER_MAP_GTT_ASYNC
:
327 intel_bo_unmap(ilo_resource_get_bo(xfer
->base
.resource
));
329 case ILO_TRANSFER_MAP_STAGING
:
330 intel_bo_unmap(ilo_resource_get_bo(xfer
->staging
.res
));
338 tex_get_box_origin(const struct ilo_texture
*tex
,
339 unsigned level
, unsigned slice
,
340 const struct pipe_box
*box
,
341 unsigned *mem_x
, unsigned *mem_y
)
345 ilo_image_get_slice_pos(&tex
->image
, level
, box
->z
+ slice
, &x
, &y
);
349 ilo_image_pos_to_mem(&tex
->image
, x
, y
, mem_x
, mem_y
);
353 tex_get_box_offset(const struct ilo_texture
*tex
, unsigned level
,
354 const struct pipe_box
*box
)
356 unsigned mem_x
, mem_y
;
358 tex_get_box_origin(tex
, level
, 0, box
, &mem_x
, &mem_y
);
360 return ilo_image_mem_to_linear(&tex
->image
, mem_x
, mem_y
);
364 tex_get_slice_stride(const struct ilo_texture
*tex
, unsigned level
)
366 return ilo_image_get_slice_stride(&tex
->image
, level
);
370 tex_tile_x_swizzle(unsigned addr
)
373 * From the Ivy Bridge PRM, volume 1 part 2, page 24:
375 * "As shown in the tiling algorithm, the new address bit[6] should be:
377 * Address bit[6] <= TiledAddr bit[6] XOR
378 * TiledAddr bit[9] XOR
381 return addr
^ (((addr
>> 3) ^ (addr
>> 4)) & 0x40);
385 tex_tile_y_swizzle(unsigned addr
)
388 * From the Ivy Bridge PRM, volume 1 part 2, page 24:
390 * "As shown in the tiling algorithm, The new address bit[6] becomes:
392 * Address bit[6] <= TiledAddr bit[6] XOR
395 return addr
^ ((addr
>> 3) & 0x40);
399 tex_tile_x_offset(unsigned mem_x
, unsigned mem_y
,
400 unsigned tiles_per_row
, bool swizzle
)
403 * From the Sandy Bridge PRM, volume 1 part 2, page 21, we know that a
404 * X-major tile has 8 rows and 32 OWord columns (512 bytes). Tiles in the
405 * tiled region are numbered in row-major order, starting from zero. The
406 * tile number can thus be calculated as follows:
408 * tile = (mem_y / 8) * tiles_per_row + (mem_x / 512)
410 * OWords in that tile are also numbered in row-major order, starting from
411 * zero. The OWord number can thus be calculated as follows:
413 * oword = (mem_y % 8) * 32 + ((mem_x % 512) / 16)
415 * and the tiled offset is
417 * offset = tile * 4096 + oword * 16 + (mem_x % 16)
418 * = tile * 4096 + (mem_y % 8) * 512 + (mem_x % 512)
420 unsigned tile
, offset
;
422 tile
= (mem_y
>> 3) * tiles_per_row
+ (mem_x
>> 9);
423 offset
= tile
<< 12 | (mem_y
& 0x7) << 9 | (mem_x
& 0x1ff);
425 return (swizzle
) ? tex_tile_x_swizzle(offset
) : offset
;
429 tex_tile_y_offset(unsigned mem_x
, unsigned mem_y
,
430 unsigned tiles_per_row
, bool swizzle
)
433 * From the Sandy Bridge PRM, volume 1 part 2, page 22, we know that a
434 * Y-major tile has 32 rows and 8 OWord columns (128 bytes). Tiles in the
435 * tiled region are numbered in row-major order, starting from zero. The
436 * tile number can thus be calculated as follows:
438 * tile = (mem_y / 32) * tiles_per_row + (mem_x / 128)
440 * OWords in that tile are numbered in column-major order, starting from
441 * zero. The OWord number can thus be calculated as follows:
443 * oword = ((mem_x % 128) / 16) * 32 + (mem_y % 32)
445 * and the tiled offset is
447 * offset = tile * 4096 + oword * 16 + (mem_x % 16)
449 unsigned tile
, oword
, offset
;
451 tile
= (mem_y
>> 5) * tiles_per_row
+ (mem_x
>> 7);
452 oword
= (mem_x
& 0x70) << 1 | (mem_y
& 0x1f);
453 offset
= tile
<< 12 | oword
<< 4 | (mem_x
& 0xf);
455 return (swizzle
) ? tex_tile_y_swizzle(offset
) : offset
;
459 tex_tile_w_offset(unsigned mem_x
, unsigned mem_y
,
460 unsigned tiles_per_row
, bool swizzle
)
463 * From the Sandy Bridge PRM, volume 1 part 2, page 23, we know that a
464 * W-major tile has 8 8x8-block rows and 8 8x8-block columns. Tiles in the
465 * tiled region are numbered in row-major order, starting from zero. The
466 * tile number can thus be calculated as follows:
468 * tile = (mem_y / 64) * tiles_per_row + (mem_x / 64)
470 * 8x8-blocks in that tile are numbered in column-major order, starting
471 * from zero. The 8x8-block number can thus be calculated as follows:
473 * blk8 = ((mem_x % 64) / 8) * 8 + ((mem_y % 64) / 8)
475 * Each 8x8-block is divided into 4 4x4-blocks, in row-major order. Each
476 * 4x4-block is further divided into 4 2x2-blocks, also in row-major order.
479 * blk4 = (((mem_y % 64) / 4) & 1) * 2 + (((mem_x % 64) / 4) & 1)
480 * blk2 = (((mem_y % 64) / 2) & 1) * 2 + (((mem_x % 64) / 2) & 1)
481 * blk1 = (((mem_y % 64) ) & 1) * 2 + (((mem_x % 64) ) & 1)
483 * and the tiled offset is
485 * offset = tile * 4096 + blk8 * 64 + blk4 * 16 + blk2 * 4 + blk1
487 unsigned tile
, blk8
, blk4
, blk2
, blk1
, offset
;
489 tile
= (mem_y
>> 6) * tiles_per_row
+ (mem_x
>> 6);
490 blk8
= ((mem_x
>> 3) & 0x7) << 3 | ((mem_y
>> 3) & 0x7);
491 blk4
= ((mem_y
>> 2) & 0x1) << 1 | ((mem_x
>> 2) & 0x1);
492 blk2
= ((mem_y
>> 1) & 0x1) << 1 | ((mem_x
>> 1) & 0x1);
493 blk1
= ((mem_y
) & 0x1) << 1 | ((mem_x
) & 0x1);
494 offset
= tile
<< 12 | blk8
<< 6 | blk4
<< 4 | blk2
<< 2 | blk1
;
496 return (swizzle
) ? tex_tile_y_swizzle(offset
) : offset
;
500 tex_tile_none_offset(unsigned mem_x
, unsigned mem_y
,
501 unsigned tiles_per_row
, bool swizzle
)
503 return mem_y
* tiles_per_row
+ mem_x
;
506 typedef unsigned (*tex_tile_offset_func
)(unsigned mem_x
, unsigned mem_y
,
507 unsigned tiles_per_row
,
510 static tex_tile_offset_func
511 tex_tile_choose_offset_func(const struct ilo_texture
*tex
,
512 unsigned *tiles_per_row
)
514 switch (tex
->image
.tiling
) {
516 assert(!"unknown tiling");
518 case GEN6_TILING_NONE
:
519 *tiles_per_row
= tex
->image
.bo_stride
;
520 return tex_tile_none_offset
;
522 *tiles_per_row
= tex
->image
.bo_stride
/ 512;
523 return tex_tile_x_offset
;
525 *tiles_per_row
= tex
->image
.bo_stride
/ 128;
526 return tex_tile_y_offset
;
528 *tiles_per_row
= tex
->image
.bo_stride
/ 64;
529 return tex_tile_w_offset
;
534 tex_staging_sys_map_bo(struct ilo_texture
*tex
,
538 const struct ilo_screen
*is
= ilo_screen(tex
->base
.screen
);
539 const bool prefer_cpu
= (is
->dev
.has_llc
|| for_read_back
);
542 if (prefer_cpu
&& (tex
->image
.tiling
== GEN6_TILING_NONE
||
544 ptr
= intel_bo_map(tex
->image
.bo
, !for_read_back
);
546 ptr
= intel_bo_map_gtt(tex
->image
.bo
);
552 tex_staging_sys_unmap_bo(struct ilo_texture
*tex
)
554 intel_bo_unmap(tex
->image
.bo
);
558 tex_staging_sys_zs_read(struct ilo_texture
*tex
,
559 const struct ilo_transfer
*xfer
)
561 const struct ilo_screen
*is
= ilo_screen(tex
->base
.screen
);
562 const bool swizzle
= is
->dev
.has_address_swizzling
;
563 const struct pipe_box
*box
= &xfer
->base
.box
;
565 tex_tile_offset_func tile_offset
;
566 unsigned tiles_per_row
;
569 src
= tex_staging_sys_map_bo(tex
, true, false);
573 tile_offset
= tex_tile_choose_offset_func(tex
, &tiles_per_row
);
575 assert(tex
->image
.block_width
== 1 && tex
->image
.block_height
== 1);
577 if (tex
->separate_s8
) {
578 struct ilo_texture
*s8_tex
= tex
->separate_s8
;
579 const uint8_t *s8_src
;
580 tex_tile_offset_func s8_tile_offset
;
581 unsigned s8_tiles_per_row
;
582 int dst_cpp
, dst_s8_pos
, src_cpp_used
;
584 s8_src
= tex_staging_sys_map_bo(s8_tex
, true, false);
586 tex_staging_sys_unmap_bo(tex
);
590 s8_tile_offset
= tex_tile_choose_offset_func(s8_tex
, &s8_tiles_per_row
);
592 if (tex
->base
.format
== PIPE_FORMAT_Z24_UNORM_S8_UINT
) {
593 assert(tex
->image
.format
== PIPE_FORMAT_Z24X8_UNORM
);
600 assert(tex
->base
.format
== PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
);
601 assert(tex
->image
.format
== PIPE_FORMAT_Z32_FLOAT
);
608 for (slice
= 0; slice
< box
->depth
; slice
++) {
609 unsigned mem_x
, mem_y
, s8_mem_x
, s8_mem_y
;
613 tex_get_box_origin(tex
, xfer
->base
.level
, slice
,
614 box
, &mem_x
, &mem_y
);
615 tex_get_box_origin(s8_tex
, xfer
->base
.level
, slice
,
616 box
, &s8_mem_x
, &s8_mem_y
);
618 dst
= xfer
->staging
.sys
+ xfer
->base
.layer_stride
* slice
;
620 for (i
= 0; i
< box
->height
; i
++) {
621 unsigned x
= mem_x
, s8_x
= s8_mem_x
;
624 for (j
= 0; j
< box
->width
; j
++) {
625 const unsigned offset
=
626 tile_offset(x
, mem_y
, tiles_per_row
, swizzle
);
627 const unsigned s8_offset
=
628 s8_tile_offset(s8_x
, s8_mem_y
, s8_tiles_per_row
, swizzle
);
630 memcpy(d
, src
+ offset
, src_cpp_used
);
631 d
[dst_s8_pos
] = s8_src
[s8_offset
];
634 x
+= tex
->image
.block_size
;
638 dst
+= xfer
->base
.stride
;
644 tex_staging_sys_unmap_bo(s8_tex
);
647 assert(tex
->image
.format
== PIPE_FORMAT_S8_UINT
);
649 for (slice
= 0; slice
< box
->depth
; slice
++) {
650 unsigned mem_x
, mem_y
;
654 tex_get_box_origin(tex
, xfer
->base
.level
, slice
,
655 box
, &mem_x
, &mem_y
);
657 dst
= xfer
->staging
.sys
+ xfer
->base
.layer_stride
* slice
;
659 for (i
= 0; i
< box
->height
; i
++) {
663 for (j
= 0; j
< box
->width
; j
++) {
664 const unsigned offset
=
665 tile_offset(x
, mem_y
, tiles_per_row
, swizzle
);
673 dst
+= xfer
->base
.stride
;
679 tex_staging_sys_unmap_bo(tex
);
685 tex_staging_sys_zs_write(struct ilo_texture
*tex
,
686 const struct ilo_transfer
*xfer
)
688 const struct ilo_screen
*is
= ilo_screen(tex
->base
.screen
);
689 const bool swizzle
= is
->dev
.has_address_swizzling
;
690 const struct pipe_box
*box
= &xfer
->base
.box
;
692 tex_tile_offset_func tile_offset
;
693 unsigned tiles_per_row
;
696 dst
= tex_staging_sys_map_bo(tex
, false, false);
700 tile_offset
= tex_tile_choose_offset_func(tex
, &tiles_per_row
);
702 assert(tex
->image
.block_width
== 1 && tex
->image
.block_height
== 1);
704 if (tex
->separate_s8
) {
705 struct ilo_texture
*s8_tex
= tex
->separate_s8
;
707 tex_tile_offset_func s8_tile_offset
;
708 unsigned s8_tiles_per_row
;
709 int src_cpp
, src_s8_pos
, dst_cpp_used
;
711 s8_dst
= tex_staging_sys_map_bo(s8_tex
, false, false);
713 tex_staging_sys_unmap_bo(s8_tex
);
717 s8_tile_offset
= tex_tile_choose_offset_func(s8_tex
, &s8_tiles_per_row
);
719 if (tex
->base
.format
== PIPE_FORMAT_Z24_UNORM_S8_UINT
) {
720 assert(tex
->image
.format
== PIPE_FORMAT_Z24X8_UNORM
);
727 assert(tex
->base
.format
== PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
);
728 assert(tex
->image
.format
== PIPE_FORMAT_Z32_FLOAT
);
735 for (slice
= 0; slice
< box
->depth
; slice
++) {
736 unsigned mem_x
, mem_y
, s8_mem_x
, s8_mem_y
;
740 tex_get_box_origin(tex
, xfer
->base
.level
, slice
,
741 box
, &mem_x
, &mem_y
);
742 tex_get_box_origin(s8_tex
, xfer
->base
.level
, slice
,
743 box
, &s8_mem_x
, &s8_mem_y
);
745 src
= xfer
->staging
.sys
+ xfer
->base
.layer_stride
* slice
;
747 for (i
= 0; i
< box
->height
; i
++) {
748 unsigned x
= mem_x
, s8_x
= s8_mem_x
;
749 const uint8_t *s
= src
;
751 for (j
= 0; j
< box
->width
; j
++) {
752 const unsigned offset
=
753 tile_offset(x
, mem_y
, tiles_per_row
, swizzle
);
754 const unsigned s8_offset
=
755 s8_tile_offset(s8_x
, s8_mem_y
, s8_tiles_per_row
, swizzle
);
757 memcpy(dst
+ offset
, s
, dst_cpp_used
);
758 s8_dst
[s8_offset
] = s
[src_s8_pos
];
761 x
+= tex
->image
.block_size
;
765 src
+= xfer
->base
.stride
;
771 tex_staging_sys_unmap_bo(s8_tex
);
774 assert(tex
->image
.format
== PIPE_FORMAT_S8_UINT
);
776 for (slice
= 0; slice
< box
->depth
; slice
++) {
777 unsigned mem_x
, mem_y
;
781 tex_get_box_origin(tex
, xfer
->base
.level
, slice
,
782 box
, &mem_x
, &mem_y
);
784 src
= xfer
->staging
.sys
+ xfer
->base
.layer_stride
* slice
;
786 for (i
= 0; i
< box
->height
; i
++) {
788 const uint8_t *s
= src
;
790 for (j
= 0; j
< box
->width
; j
++) {
791 const unsigned offset
=
792 tile_offset(x
, mem_y
, tiles_per_row
, swizzle
);
800 src
+= xfer
->base
.stride
;
806 tex_staging_sys_unmap_bo(tex
);
812 tex_staging_sys_convert_write(struct ilo_texture
*tex
,
813 const struct ilo_transfer
*xfer
)
815 const struct pipe_box
*box
= &xfer
->base
.box
;
816 unsigned dst_slice_stride
;
820 dst
= tex_staging_sys_map_bo(tex
, false, true);
824 dst
+= tex_get_box_offset(tex
, xfer
->base
.level
, box
);
826 /* slice stride is not always available */
828 dst_slice_stride
= tex_get_slice_stride(tex
, xfer
->base
.level
);
830 dst_slice_stride
= 0;
832 if (unlikely(tex
->image
.format
== tex
->base
.format
)) {
833 util_copy_box(dst
, tex
->image
.format
, tex
->image
.bo_stride
,
834 dst_slice_stride
, 0, 0, 0, box
->width
, box
->height
, box
->depth
,
835 xfer
->staging
.sys
, xfer
->base
.stride
, xfer
->base
.layer_stride
,
838 tex_staging_sys_unmap_bo(tex
);
843 switch (tex
->base
.format
) {
844 case PIPE_FORMAT_ETC1_RGB8
:
845 assert(tex
->image
.format
== PIPE_FORMAT_R8G8B8X8_UNORM
);
847 for (slice
= 0; slice
< box
->depth
; slice
++) {
849 xfer
->staging
.sys
+ xfer
->base
.layer_stride
* slice
;
851 util_format_etc1_rgb8_unpack_rgba_8unorm(dst
,
852 tex
->image
.bo_stride
, src
, xfer
->base
.stride
,
853 box
->width
, box
->height
);
855 dst
+= dst_slice_stride
;
859 assert(!"unable to convert the staging data");
863 tex_staging_sys_unmap_bo(tex
);
869 tex_staging_sys_writeback(struct ilo_transfer
*xfer
)
871 struct ilo_texture
*tex
= ilo_texture(xfer
->base
.resource
);
874 if (!(xfer
->base
.usage
& PIPE_TRANSFER_WRITE
))
877 switch (xfer
->method
) {
878 case ILO_TRANSFER_MAP_SW_CONVERT
:
879 success
= tex_staging_sys_convert_write(tex
, xfer
);
881 case ILO_TRANSFER_MAP_SW_ZS
:
882 success
= tex_staging_sys_zs_write(tex
, xfer
);
885 assert(!"unknown mapping method");
891 ilo_err("failed to map resource for moving staging data\n");
895 tex_staging_sys_readback(struct ilo_transfer
*xfer
)
897 struct ilo_texture
*tex
= ilo_texture(xfer
->base
.resource
);
898 bool read_back
= false, success
;
900 /* see if we need to read the resource back */
901 if (xfer
->base
.usage
& PIPE_TRANSFER_READ
) {
904 else if (xfer
->base
.usage
& PIPE_TRANSFER_WRITE
) {
905 const unsigned discard_flags
=
906 (PIPE_TRANSFER_DISCARD_RANGE
| PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE
);
908 if (!(xfer
->base
.usage
& discard_flags
))
915 switch (xfer
->method
) {
916 case ILO_TRANSFER_MAP_SW_CONVERT
:
917 assert(!"no on-the-fly format conversion for mapping");
920 case ILO_TRANSFER_MAP_SW_ZS
:
921 success
= tex_staging_sys_zs_read(tex
, xfer
);
924 assert(!"unknown mapping method");
933 tex_map(struct ilo_transfer
*xfer
)
937 switch (xfer
->method
) {
938 case ILO_TRANSFER_MAP_CPU
:
939 case ILO_TRANSFER_MAP_GTT
:
940 case ILO_TRANSFER_MAP_GTT_ASYNC
:
941 ptr
= xfer_map(xfer
);
943 const struct ilo_texture
*tex
= ilo_texture(xfer
->base
.resource
);
945 ptr
+= tex_get_box_offset(tex
, xfer
->base
.level
, &xfer
->base
.box
);
947 /* stride is for a block row, not a texel row */
948 xfer
->base
.stride
= tex
->image
.bo_stride
;
949 /* note that slice stride is not always available */
950 xfer
->base
.layer_stride
= (xfer
->base
.box
.depth
> 1) ?
951 tex_get_slice_stride(tex
, xfer
->base
.level
) : 0;
954 case ILO_TRANSFER_MAP_STAGING
:
955 ptr
= xfer_map(xfer
);
957 const struct ilo_texture
*staging
= ilo_texture(xfer
->staging
.res
);
958 xfer
->base
.stride
= staging
->image
.bo_stride
;
959 xfer
->base
.layer_stride
= tex_get_slice_stride(staging
, 0);
962 case ILO_TRANSFER_MAP_SW_CONVERT
:
963 case ILO_TRANSFER_MAP_SW_ZS
:
964 if (xfer_alloc_staging_sys(xfer
) && tex_staging_sys_readback(xfer
))
965 ptr
= xfer_map(xfer
);
970 assert(!"unknown mapping method");
979 buf_map(struct ilo_transfer
*xfer
)
983 ptr
= xfer_map(xfer
);
987 if (xfer
->method
!= ILO_TRANSFER_MAP_STAGING
)
988 ptr
+= xfer
->base
.box
.x
;
990 xfer
->base
.stride
= 0;
991 xfer
->base
.layer_stride
= 0;
993 assert(xfer
->base
.level
== 0);
994 assert(xfer
->base
.box
.y
== 0);
995 assert(xfer
->base
.box
.z
== 0);
996 assert(xfer
->base
.box
.height
== 1);
997 assert(xfer
->base
.box
.depth
== 1);
1003 copy_staging_resource(struct ilo_context
*ilo
,
1004 struct ilo_transfer
*xfer
,
1005 const struct pipe_box
*box
)
1007 const unsigned pad_x
= (xfer
->staging
.res
->target
== PIPE_BUFFER
) ?
1008 xfer
->base
.box
.x
% ILO_TRANSFER_MAP_BUFFER_ALIGNMENT
: 0;
1009 struct pipe_box modified_box
;
1011 assert(xfer
->method
== ILO_TRANSFER_MAP_STAGING
&& xfer
->staging
.res
);
1014 u_box_3d(pad_x
, 0, 0, xfer
->base
.box
.width
, xfer
->base
.box
.height
,
1015 xfer
->base
.box
.depth
, &modified_box
);
1016 box
= &modified_box
;
1019 modified_box
= *box
;
1020 modified_box
.x
+= pad_x
;
1021 box
= &modified_box
;
1024 ilo_blitter_blt_copy_resource(ilo
->blitter
,
1025 xfer
->base
.resource
, xfer
->base
.level
,
1026 xfer
->base
.box
.x
, xfer
->base
.box
.y
, xfer
->base
.box
.z
,
1027 xfer
->staging
.res
, 0, box
);
1031 is_bo_busy(struct ilo_context
*ilo
, struct intel_bo
*bo
, bool *need_submit
)
1033 const bool referenced
= ilo_builder_has_reloc(&ilo
->cp
->builder
, bo
);
1036 *need_submit
= referenced
;
1041 return intel_bo_is_busy(bo
);
1045 * Choose the best mapping method, depending on the transfer usage and whether
1049 choose_transfer_method(struct ilo_context
*ilo
, struct ilo_transfer
*xfer
)
1051 struct pipe_resource
*res
= xfer
->base
.resource
;
1054 if (!resource_get_transfer_method(res
, &xfer
->base
, &xfer
->method
))
1057 /* see if we can avoid blocking */
1058 if (is_bo_busy(ilo
, ilo_resource_get_bo(res
), &need_submit
)) {
1059 bool resource_renamed
;
1061 if (!xfer_unblock(xfer
, &resource_renamed
)) {
1062 if (xfer
->base
.usage
& PIPE_TRANSFER_DONTBLOCK
)
1065 /* submit to make bo really busy and map() correctly blocks */
1067 ilo_cp_submit(ilo
->cp
, "syncing for transfers");
1070 if (resource_renamed
)
1071 ilo_state_vector_resource_renamed(&ilo
->state_vector
, res
);
1078 buf_pwrite(struct ilo_context
*ilo
, struct pipe_resource
*res
,
1079 unsigned usage
, int offset
, int size
, const void *data
)
1081 struct ilo_buffer
*buf
= ilo_buffer(res
);
1084 /* see if we can avoid blocking */
1085 if (is_bo_busy(ilo
, buf
->bo
, &need_submit
)) {
1086 bool unblocked
= false;
1088 if ((usage
& PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE
) &&
1089 ilo_resource_rename_bo(res
)) {
1090 ilo_state_vector_resource_renamed(&ilo
->state_vector
, res
);
1094 struct pipe_resource templ
, *staging
;
1097 * allocate a staging buffer to hold the data and pipelined copy it
1101 templ
.width0
= size
;
1102 templ
.usage
= PIPE_USAGE_STAGING
;
1103 templ
.bind
= PIPE_BIND_TRANSFER_WRITE
;
1104 staging
= ilo
->base
.screen
->resource_create(ilo
->base
.screen
, &templ
);
1106 struct pipe_box staging_box
;
1108 intel_bo_pwrite(ilo_buffer(staging
)->bo
, 0, size
, data
);
1110 u_box_1d(0, size
, &staging_box
);
1111 ilo_blitter_blt_copy_resource(ilo
->blitter
,
1112 res
, 0, offset
, 0, 0,
1113 staging
, 0, &staging_box
);
1115 pipe_resource_reference(&staging
, NULL
);
1121 /* submit to make bo really busy and pwrite() correctly blocks */
1122 if (!unblocked
&& need_submit
)
1123 ilo_cp_submit(ilo
->cp
, "syncing for pwrites");
1126 intel_bo_pwrite(buf
->bo
, offset
, size
, data
);
1130 ilo_transfer_flush_region(struct pipe_context
*pipe
,
1131 struct pipe_transfer
*transfer
,
1132 const struct pipe_box
*box
)
1134 struct ilo_context
*ilo
= ilo_context(pipe
);
1135 struct ilo_transfer
*xfer
= ilo_transfer(transfer
);
1138 * The staging resource is mapped persistently and coherently. We can copy
1139 * without unmapping.
1141 if (xfer
->method
== ILO_TRANSFER_MAP_STAGING
&&
1142 (xfer
->base
.usage
& PIPE_TRANSFER_FLUSH_EXPLICIT
))
1143 copy_staging_resource(ilo
, xfer
, box
);
1147 ilo_transfer_unmap(struct pipe_context
*pipe
,
1148 struct pipe_transfer
*transfer
)
1150 struct ilo_context
*ilo
= ilo_context(pipe
);
1151 struct ilo_transfer
*xfer
= ilo_transfer(transfer
);
1155 switch (xfer
->method
) {
1156 case ILO_TRANSFER_MAP_STAGING
:
1157 if (!(xfer
->base
.usage
& PIPE_TRANSFER_FLUSH_EXPLICIT
))
1158 copy_staging_resource(ilo
, xfer
, NULL
);
1159 pipe_resource_reference(&xfer
->staging
.res
, NULL
);
1161 case ILO_TRANSFER_MAP_SW_CONVERT
:
1162 case ILO_TRANSFER_MAP_SW_ZS
:
1163 tex_staging_sys_writeback(xfer
);
1164 align_free(xfer
->staging
.sys
);
1170 pipe_resource_reference(&xfer
->base
.resource
, NULL
);
1172 util_slab_free(&ilo
->transfer_mempool
, xfer
);
1176 ilo_transfer_map(struct pipe_context
*pipe
,
1177 struct pipe_resource
*res
,
1180 const struct pipe_box
*box
,
1181 struct pipe_transfer
**transfer
)
1183 struct ilo_context
*ilo
= ilo_context(pipe
);
1184 struct ilo_transfer
*xfer
;
1187 /* note that xfer is not zero'd */
1188 xfer
= util_slab_alloc(&ilo
->transfer_mempool
);
1194 xfer
->base
.resource
= NULL
;
1195 pipe_resource_reference(&xfer
->base
.resource
, res
);
1196 xfer
->base
.level
= level
;
1197 xfer
->base
.usage
= usage
;
1198 xfer
->base
.box
= *box
;
1200 ilo_blit_resolve_transfer(ilo
, &xfer
->base
);
1202 if (choose_transfer_method(ilo
, xfer
)) {
1203 if (res
->target
== PIPE_BUFFER
)
1204 ptr
= buf_map(xfer
);
1206 ptr
= tex_map(xfer
);
1213 pipe_resource_reference(&xfer
->base
.resource
, NULL
);
1214 util_slab_free(&ilo
->transfer_mempool
, xfer
);
1219 *transfer
= &xfer
->base
;
1225 ilo_transfer_inline_write(struct pipe_context
*pipe
,
1226 struct pipe_resource
*res
,
1229 const struct pipe_box
*box
,
1232 unsigned layer_stride
)
1234 if (likely(res
->target
== PIPE_BUFFER
) &&
1235 !(usage
& PIPE_TRANSFER_UNSYNCHRONIZED
)) {
1236 /* they should specify just an offset and a size */
1238 assert(box
->y
== 0);
1239 assert(box
->z
== 0);
1240 assert(box
->height
== 1);
1241 assert(box
->depth
== 1);
1243 buf_pwrite(ilo_context(pipe
), res
,
1244 usage
, box
->x
, box
->width
, data
);
1247 u_default_transfer_inline_write(pipe
, res
,
1248 level
, usage
, box
, data
, stride
, layer_stride
);
1253 * Initialize transfer-related functions.
1256 ilo_init_transfer_functions(struct ilo_context
*ilo
)
1258 ilo
->base
.transfer_map
= ilo_transfer_map
;
1259 ilo
->base
.transfer_flush_region
= ilo_transfer_flush_region
;
1260 ilo
->base
.transfer_unmap
= ilo_transfer_unmap
;
1261 ilo
->base
.transfer_inline_write
= ilo_transfer_inline_write
;