2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 * Copyright 2014,2015 Advanced Micro Devices, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
30 #include "radeon/r600_cs.h"
32 #include "util/u_format.h"
34 static void cik_sdma_do_copy_buffer(struct si_context
*ctx
,
35 struct pipe_resource
*dst
,
36 struct pipe_resource
*src
,
41 struct radeon_winsys_cs
*cs
= ctx
->b
.dma
.cs
;
42 unsigned i
, ncopy
, csize
;
43 struct r600_resource
*rdst
= (struct r600_resource
*)dst
;
44 struct r600_resource
*rsrc
= (struct r600_resource
*)src
;
46 dst_offset
+= r600_resource(dst
)->gpu_address
;
47 src_offset
+= r600_resource(src
)->gpu_address
;
49 ncopy
= (size
+ CIK_SDMA_COPY_MAX_SIZE
- 1) / CIK_SDMA_COPY_MAX_SIZE
;
50 r600_need_dma_space(&ctx
->b
, ncopy
* 7);
52 radeon_add_to_buffer_list(&ctx
->b
, &ctx
->b
.dma
, rsrc
, RADEON_USAGE_READ
,
53 RADEON_PRIO_SDMA_BUFFER
);
54 radeon_add_to_buffer_list(&ctx
->b
, &ctx
->b
.dma
, rdst
, RADEON_USAGE_WRITE
,
55 RADEON_PRIO_SDMA_BUFFER
);
57 for (i
= 0; i
< ncopy
; i
++) {
58 csize
= size
< CIK_SDMA_COPY_MAX_SIZE
? size
: CIK_SDMA_COPY_MAX_SIZE
;
59 cs
->buf
[cs
->cdw
++] = CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY
,
60 CIK_SDMA_COPY_SUB_OPCODE_LINEAR
,
62 cs
->buf
[cs
->cdw
++] = csize
;
63 cs
->buf
[cs
->cdw
++] = 0; /* src/dst endian swap */
64 cs
->buf
[cs
->cdw
++] = src_offset
;
65 cs
->buf
[cs
->cdw
++] = src_offset
>> 32;
66 cs
->buf
[cs
->cdw
++] = dst_offset
;
67 cs
->buf
[cs
->cdw
++] = dst_offset
>> 32;
74 static void cik_sdma_copy_buffer(struct si_context
*ctx
,
75 struct pipe_resource
*dst
,
76 struct pipe_resource
*src
,
81 struct r600_resource
*rdst
= (struct r600_resource
*)dst
;
83 /* Mark the buffer range of destination as valid (initialized),
84 * so that transfer_map knows it should wait for the GPU when mapping
86 util_range_add(&rdst
->valid_buffer_range
, dst_offset
,
89 cik_sdma_do_copy_buffer(ctx
, dst
, src
, dst_offset
, src_offset
, size
);
92 static void cik_sdma_copy_tile(struct si_context
*ctx
,
93 struct pipe_resource
*dst
,
95 struct pipe_resource
*src
,
103 struct radeon_winsys_cs
*cs
= ctx
->b
.dma
.cs
;
104 struct r600_texture
*rsrc
= (struct r600_texture
*)src
;
105 struct r600_texture
*rdst
= (struct r600_texture
*)dst
;
106 unsigned dst_mode
= rdst
->surface
.level
[dst_level
].mode
;
107 unsigned src_mode
= rsrc
->surface
.level
[src_level
].mode
;
108 bool detile
= dst_mode
== RADEON_SURF_MODE_LINEAR_ALIGNED
;
109 struct r600_texture
*rlinear
= detile
? rdst
: rsrc
;
110 struct r600_texture
*rtiled
= detile
? rsrc
: rdst
;
111 unsigned linear_lvl
= detile
? dst_level
: src_level
;
112 unsigned tiled_lvl
= detile
? src_level
: dst_level
;
113 struct radeon_info
*info
= &ctx
->screen
->b
.info
;
114 unsigned index
= rtiled
->surface
.tiling_index
[tiled_lvl
];
115 unsigned macro_index
= rtiled
->surface
.macro_tile_index
;
116 unsigned tile_mode
= info
->si_tile_mode_array
[index
];
117 unsigned macro_mode
= info
->cik_macrotile_mode_array
[macro_index
];
118 unsigned array_mode
, lbpe
, pitch_tile_max
, slice_tile_max
, size
;
119 unsigned ncopy
, height
, cheight
, i
;
120 unsigned sub_op
, bank_h
, bank_w
, mt_aspect
, nbanks
, tile_split
, mt
;
122 unsigned pipe_config
;
124 assert(dst_mode
!= src_mode
);
125 assert(src_mode
== RADEON_SURF_MODE_LINEAR_ALIGNED
|| dst_mode
== RADEON_SURF_MODE_LINEAR_ALIGNED
);
127 sub_op
= CIK_SDMA_COPY_SUB_OPCODE_TILED
;
128 lbpe
= util_logbase2(bpe
);
129 pitch_tile_max
= ((pitch
/ bpe
) / 8) - 1;
131 assert(!util_format_is_depth_and_stencil(rtiled
->resource
.b
.b
.format
));
133 array_mode
= G_009910_ARRAY_MODE(tile_mode
);
134 slice_tile_max
= (rtiled
->surface
.level
[tiled_lvl
].nblk_x
*
135 rtiled
->surface
.level
[tiled_lvl
].nblk_y
) / (8*8) - 1;
136 height
= rlinear
->surface
.level
[linear_lvl
].nblk_y
;
137 base
= rtiled
->surface
.level
[tiled_lvl
].offset
;
138 addr
= rlinear
->surface
.level
[linear_lvl
].offset
;
139 bank_h
= G_009990_BANK_HEIGHT(macro_mode
);
140 bank_w
= G_009990_BANK_WIDTH(macro_mode
);
141 mt_aspect
= G_009990_MACRO_TILE_ASPECT(macro_mode
);
142 /* Non-depth modes don't have TILE_SPLIT set. */
143 tile_split
= util_logbase2(rtiled
->surface
.tile_split
>> 6);
144 nbanks
= G_009990_NUM_BANKS(macro_mode
);
145 base
+= rtiled
->resource
.gpu_address
;
146 addr
+= rlinear
->resource
.gpu_address
;
148 pipe_config
= G_009910_PIPE_CONFIG(tile_mode
);
149 mt
= G_009910_MICRO_TILE_MODE_NEW(tile_mode
);
151 size
= (copy_height
* pitch
) / 4;
152 cheight
= copy_height
;
153 if (((cheight
* pitch
) / 4) > CIK_SDMA_COPY_MAX_SIZE
) {
154 cheight
= (CIK_SDMA_COPY_MAX_SIZE
* 4) / pitch
;
155 cheight
&= ~(y_align
- 1);
157 ncopy
= (copy_height
+ cheight
- 1) / cheight
;
158 r600_need_dma_space(&ctx
->b
, ncopy
* 12);
160 radeon_add_to_buffer_list(&ctx
->b
, &ctx
->b
.dma
, &rsrc
->resource
,
161 RADEON_USAGE_READ
, RADEON_PRIO_SDMA_TEXTURE
);
162 radeon_add_to_buffer_list(&ctx
->b
, &ctx
->b
.dma
, &rdst
->resource
,
163 RADEON_USAGE_WRITE
, RADEON_PRIO_SDMA_TEXTURE
);
165 copy_height
= size
* 4 / pitch
;
166 for (i
= 0; i
< ncopy
; i
++) {
167 cheight
= copy_height
;
168 if (((cheight
* pitch
) / 4) > CIK_SDMA_COPY_MAX_SIZE
) {
169 cheight
= (CIK_SDMA_COPY_MAX_SIZE
* 4) / pitch
;
170 cheight
&= ~(y_align
- 1);
172 size
= (cheight
* pitch
) / 4;
174 cs
->buf
[cs
->cdw
++] = CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY
,
175 sub_op
, detile
<< 15);
176 cs
->buf
[cs
->cdw
++] = base
;
177 cs
->buf
[cs
->cdw
++] = base
>> 32;
178 cs
->buf
[cs
->cdw
++] = ((height
- 1) << 16) | pitch_tile_max
;
179 cs
->buf
[cs
->cdw
++] = slice_tile_max
;
180 cs
->buf
[cs
->cdw
++] = (pipe_config
<< 26) | (mt_aspect
<< 24) |
181 (nbanks
<< 21) | (bank_h
<< 18) | (bank_w
<< 15) |
182 (tile_split
<< 11) | (mt
<< 8) | (array_mode
<< 3) |
184 cs
->buf
[cs
->cdw
++] = y
<< 16; /* | x */
185 cs
->buf
[cs
->cdw
++] = 0; /* z */
186 cs
->buf
[cs
->cdw
++] = addr
& 0xfffffffc;
187 cs
->buf
[cs
->cdw
++] = addr
>> 32;
188 cs
->buf
[cs
->cdw
++] = (pitch
/ bpe
) - 1;
189 cs
->buf
[cs
->cdw
++] = size
;
191 copy_height
-= cheight
;
196 void cik_sdma_copy(struct pipe_context
*ctx
,
197 struct pipe_resource
*dst
,
199 unsigned dstx
, unsigned dsty
, unsigned dstz
,
200 struct pipe_resource
*src
,
202 const struct pipe_box
*src_box
)
204 struct si_context
*sctx
= (struct si_context
*)ctx
;
205 struct r600_texture
*rsrc
= (struct r600_texture
*)src
;
206 struct r600_texture
*rdst
= (struct r600_texture
*)dst
;
207 unsigned dst_pitch
, src_pitch
, bpe
, dst_mode
, src_mode
;
208 unsigned src_w
, dst_w
;
209 unsigned src_x
, src_y
;
210 unsigned copy_height
, y_align
;
211 unsigned dst_x
= dstx
, dst_y
= dsty
, dst_z
= dstz
;
213 if (sctx
->b
.dma
.cs
== NULL
) {
217 if (dst
->target
== PIPE_BUFFER
&& src
->target
== PIPE_BUFFER
) {
218 cik_sdma_copy_buffer(sctx
, dst
, src
, dst_x
, src_box
->x
, src_box
->width
);
222 /* Before re-enabling this, please make sure you can hit all newly
223 * enabled paths in your testing, preferably with both piglit (in
224 * particular the streaming-texture-leak test) and real world apps
225 * (e.g. the UE4 Elemental demo).
229 if (!r600_prepare_for_dma_blit(&sctx
->b
, rdst
, dst_level
, dstx
, dsty
,
230 dstz
, rsrc
, src_level
, src_box
))
233 src_x
= util_format_get_nblocksx(src
->format
, src_box
->x
);
234 dst_x
= util_format_get_nblocksx(src
->format
, dst_x
);
235 src_y
= util_format_get_nblocksy(src
->format
, src_box
->y
);
236 dst_y
= util_format_get_nblocksy(src
->format
, dst_y
);
238 dst_pitch
= rdst
->surface
.level
[dst_level
].pitch_bytes
;
239 src_pitch
= rsrc
->surface
.level
[src_level
].pitch_bytes
;
240 src_w
= rsrc
->surface
.level
[src_level
].npix_x
;
241 dst_w
= rdst
->surface
.level
[dst_level
].npix_x
;
243 if (src_pitch
!= dst_pitch
|| src_box
->x
|| dst_x
|| src_w
!= dst_w
||
244 src_box
->width
!= src_w
||
245 rsrc
->surface
.level
[src_level
].nblk_y
!=
246 rdst
->surface
.level
[dst_level
].nblk_y
) {
247 /* FIXME CIK can do partial blit */
251 bpe
= rdst
->surface
.bpe
;
252 copy_height
= src_box
->height
/ rsrc
->surface
.blk_h
;
253 dst_mode
= rdst
->surface
.level
[dst_level
].mode
;
254 src_mode
= rsrc
->surface
.level
[src_level
].mode
;
256 /* Dimensions must be aligned to (macro)tiles */
257 switch (src_mode
== RADEON_SURF_MODE_LINEAR_ALIGNED
? dst_mode
: src_mode
) {
258 case RADEON_SURF_MODE_1D
:
259 if ((src_x
% 8) || (src_y
% 8) || (dst_x
% 8) || (dst_y
% 8) ||
264 case RADEON_SURF_MODE_2D
: {
265 unsigned mtilew
, mtileh
;
266 struct radeon_info
*info
= &sctx
->screen
->b
.info
;
267 unsigned macro_index
= rsrc
->surface
.macro_tile_index
;
268 unsigned macro_mode
= info
->cik_macrotile_mode_array
[macro_index
];
269 unsigned num_banks
= 2 << G_009990_NUM_BANKS(macro_mode
);
271 mtilew
= (8 * rsrc
->surface
.bankw
*
272 sctx
->screen
->b
.info
.num_tile_pipes
) *
273 rsrc
->surface
.mtilea
;
274 assert(!(mtilew
& (mtilew
- 1)));
275 mtileh
= (8 * rsrc
->surface
.bankh
* num_banks
) /
276 rsrc
->surface
.mtilea
;
277 assert(!(mtileh
& (mtileh
- 1)));
279 if ((src_x
& (mtilew
- 1)) || (src_y
& (mtileh
- 1)) ||
280 (dst_x
& (mtilew
- 1)) || (dst_y
& (mtileh
- 1)) ||
281 (copy_height
& (mtileh
- 1)))
291 if (src_mode
== dst_mode
) {
292 uint64_t dst_offset
, src_offset
;
293 unsigned src_h
, dst_h
;
295 src_h
= rsrc
->surface
.level
[src_level
].npix_y
;
296 dst_h
= rdst
->surface
.level
[dst_level
].npix_y
;
298 if (src_box
->depth
> 1 &&
299 (src_y
|| dst_y
|| src_h
!= dst_h
|| src_box
->height
!= src_h
))
302 /* simple dma blit would do NOTE code here assume :
303 * dst_pitch == src_pitch
305 src_offset
= rsrc
->surface
.level
[src_level
].offset
;
306 src_offset
+= rsrc
->surface
.level
[src_level
].slice_size
* src_box
->z
;
307 src_offset
+= src_y
* src_pitch
+ src_x
* bpe
;
308 dst_offset
= rdst
->surface
.level
[dst_level
].offset
;
309 dst_offset
+= rdst
->surface
.level
[dst_level
].slice_size
* dst_z
;
310 dst_offset
+= dst_y
* dst_pitch
+ dst_x
* bpe
;
311 cik_sdma_do_copy_buffer(sctx
, dst
, src
, dst_offset
, src_offset
,
313 rsrc
->surface
.level
[src_level
].slice_size
);
315 if (dst_y
!= src_y
|| src_box
->depth
> 1 || src_box
->z
|| dst_z
)
318 cik_sdma_copy_tile(sctx
, dst
, dst_level
, src
, src_level
,
319 src_y
, copy_height
, y_align
, dst_pitch
, bpe
);
324 si_resource_copy_region(ctx
, dst
, dst_level
, dstx
, dsty
, dstz
,
325 src
, src_level
, src_box
);