2 * Copyright 2016 Advanced Micro Devices, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26 /* This file implements randomized SDMA texture blit tests. */
29 #include "util/u_surface.h"
30 #include "util/rand_xor.h"
32 static uint64_t seed_xorshift128plus
[2];
34 #define RAND_NUM_SIZE 8
36 /* The GPU blits are emulated on the CPU using these CPU textures. */
41 uint64_t layer_stride
;
45 static void alloc_cpu_texture(struct cpu_texture
*tex
,
46 struct pipe_resource
*templ
, int bpp
)
48 tex
->stride
= align(templ
->width0
* bpp
, RAND_NUM_SIZE
);
49 tex
->layer_stride
= (uint64_t)tex
->stride
* templ
->height0
;
50 tex
->size
= tex
->layer_stride
* templ
->array_size
;
51 tex
->ptr
= malloc(tex
->size
);
55 static void set_random_pixels(struct pipe_context
*ctx
,
56 struct pipe_resource
*tex
,
57 struct cpu_texture
*cpu
)
59 struct pipe_transfer
*t
;
63 map
= pipe_transfer_map_3d(ctx
, tex
, 0, PIPE_TRANSFER_WRITE
,
64 0, 0, 0, tex
->width0
, tex
->height0
,
68 for (z
= 0; z
< tex
->array_size
; z
++) {
69 for (y
= 0; y
< tex
->height0
; y
++) {
70 uint64_t *ptr
= (uint64_t*)
71 (map
+ t
->layer_stride
*z
+ t
->stride
*y
);
72 uint64_t *ptr_cpu
= (uint64_t*)
73 (cpu
->ptr
+ cpu
->layer_stride
*z
+ cpu
->stride
*y
);
74 unsigned size
= cpu
->stride
/ RAND_NUM_SIZE
;
76 assert(t
->stride
% RAND_NUM_SIZE
== 0);
77 assert(cpu
->stride
% RAND_NUM_SIZE
== 0);
79 for (x
= 0; x
< size
; x
++) {
81 rand_xorshift128plus(seed_xorshift128plus
);
86 pipe_transfer_unmap(ctx
, t
);
89 static bool compare_textures(struct pipe_context
*ctx
,
90 struct pipe_resource
*tex
,
91 struct cpu_texture
*cpu
, int bpp
)
93 struct pipe_transfer
*t
;
98 map
= pipe_transfer_map_3d(ctx
, tex
, 0, PIPE_TRANSFER_READ
,
99 0, 0, 0, tex
->width0
, tex
->height0
,
100 tex
->array_size
, &t
);
103 for (z
= 0; z
< tex
->array_size
; z
++) {
104 for (y
= 0; y
< tex
->height0
; y
++) {
105 uint8_t *ptr
= map
+ t
->layer_stride
*z
+ t
->stride
*y
;
106 uint8_t *cpu_ptr
= cpu
->ptr
+
107 cpu
->layer_stride
*z
+ cpu
->stride
*y
;
109 if (memcmp(ptr
, cpu_ptr
, tex
->width0
* bpp
)) {
116 pipe_transfer_unmap(ctx
, t
);
120 static enum pipe_format
get_format_from_bpp(int bpp
)
124 return PIPE_FORMAT_R8_UINT
;
126 return PIPE_FORMAT_R16_UINT
;
128 return PIPE_FORMAT_R32_UINT
;
130 return PIPE_FORMAT_R32G32_UINT
;
132 return PIPE_FORMAT_R32G32B32A32_UINT
;
135 return PIPE_FORMAT_NONE
;
139 static const char *array_mode_to_string(struct si_screen
*sscreen
,
140 struct radeon_surf
*surf
)
142 if (sscreen
->info
.chip_class
>= GFX9
) {
143 switch (surf
->u
.gfx9
.surf
.swizzle_mode
) {
155 printf("Unhandled swizzle mode = %u\n",
156 surf
->u
.gfx9
.surf
.swizzle_mode
);
160 switch (surf
->u
.legacy
.level
[0].mode
) {
161 case RADEON_SURF_MODE_LINEAR_ALIGNED
:
162 return "LINEAR_ALIGNED";
163 case RADEON_SURF_MODE_1D
:
164 return "1D_TILED_THIN1";
165 case RADEON_SURF_MODE_2D
:
166 return "2D_TILED_THIN1";
174 static unsigned generate_max_tex_side(unsigned max_tex_side
)
176 switch (rand() % 4) {
178 /* Try to hit large sizes in 1/4 of the cases. */
181 /* Try to hit 1D tiling in 1/4 of the cases. */
184 /* Try to hit common sizes in 2/4 of the cases. */
189 void si_test_dma(struct si_screen
*sscreen
)
191 struct pipe_screen
*screen
= &sscreen
->b
;
192 struct pipe_context
*ctx
= screen
->context_create(screen
, NULL
, 0);
193 struct si_context
*sctx
= (struct si_context
*)ctx
;
194 uint64_t max_alloc_size
;
195 unsigned i
, iterations
, num_partial_copies
, max_tex_side
;
196 unsigned num_pass
= 0, num_fail
= 0;
198 max_tex_side
= screen
->get_param(screen
, PIPE_CAP_MAX_TEXTURE_2D_SIZE
);
200 /* Max 128 MB allowed for both textures. */
201 max_alloc_size
= 128 * 1024 * 1024;
203 /* the seed for random test parameters */
205 /* the seed for random pixel data */
206 s_rand_xorshift128plus(seed_xorshift128plus
, false);
208 iterations
= 1000000000; /* just kill it when you are bored */
209 num_partial_copies
= 30;
211 /* These parameters are randomly generated per test:
212 * - whether to do one whole-surface copy or N partial copies per test
213 * - which tiling modes to use (LINEAR_ALIGNED, 1D, 2D)
214 * - which texture dimensions to use
215 * - whether to use VRAM (all tiling modes) and GTT (staging, linear
217 * - random initial pixels in src
218 * - generate random subrectangle copies for partial blits
220 for (i
= 0; i
< iterations
; i
++) {
221 struct pipe_resource tsrc
= {}, tdst
= {}, *src
, *dst
;
222 struct si_texture
*sdst
;
223 struct si_texture
*ssrc
;
224 struct cpu_texture src_cpu
, dst_cpu
;
225 unsigned bpp
, max_width
, max_height
, max_depth
, j
, num
;
226 unsigned gfx_blits
= 0, dma_blits
= 0, cs_blits
= 0, max_tex_side_gen
;
227 unsigned max_tex_layers
;
229 bool do_partial_copies
= rand() & 1;
231 /* generate a random test case */
232 tsrc
.target
= tdst
.target
= PIPE_TEXTURE_2D_ARRAY
;
233 tsrc
.depth0
= tdst
.depth0
= 1;
235 bpp
= 1 << (rand() % 5);
236 tsrc
.format
= tdst
.format
= get_format_from_bpp(bpp
);
238 max_tex_side_gen
= generate_max_tex_side(max_tex_side
);
239 max_tex_layers
= rand() % 4 ? 1 : 5;
241 tsrc
.width0
= (rand() % max_tex_side_gen
) + 1;
242 tsrc
.height0
= (rand() % max_tex_side_gen
) + 1;
243 tsrc
.array_size
= (rand() % max_tex_layers
) + 1;
245 /* Have a 1/4 chance of getting power-of-two dimensions. */
246 if (rand() % 4 == 0) {
247 tsrc
.width0
= util_next_power_of_two(tsrc
.width0
);
248 tsrc
.height0
= util_next_power_of_two(tsrc
.height0
);
251 if (!do_partial_copies
) {
252 /* whole-surface copies only, same dimensions */
255 max_tex_side_gen
= generate_max_tex_side(max_tex_side
);
256 max_tex_layers
= rand() % 4 ? 1 : 5;
258 /* many partial copies, dimensions can be different */
259 tdst
.width0
= (rand() % max_tex_side_gen
) + 1;
260 tdst
.height0
= (rand() % max_tex_side_gen
) + 1;
261 tdst
.array_size
= (rand() % max_tex_layers
) + 1;
263 /* Have a 1/4 chance of getting power-of-two dimensions. */
264 if (rand() % 4 == 0) {
265 tdst
.width0
= util_next_power_of_two(tdst
.width0
);
266 tdst
.height0
= util_next_power_of_two(tdst
.height0
);
270 /* check texture sizes */
271 if ((uint64_t)tsrc
.width0
* tsrc
.height0
* tsrc
.array_size
* bpp
+
272 (uint64_t)tdst
.width0
* tdst
.height0
* tdst
.array_size
* bpp
>
274 /* too large, try again */
279 /* VRAM + the tiling mode depends on dimensions (3/4 of cases),
280 * or GTT + linear only (1/4 of cases)
282 tsrc
.usage
= rand() % 4 ? PIPE_USAGE_DEFAULT
: PIPE_USAGE_STAGING
;
283 tdst
.usage
= rand() % 4 ? PIPE_USAGE_DEFAULT
: PIPE_USAGE_STAGING
;
285 /* Allocate textures (both the GPU and CPU copies).
286 * The CPU will emulate what the GPU should be doing.
288 src
= screen
->resource_create(screen
, &tsrc
);
289 dst
= screen
->resource_create(screen
, &tdst
);
292 sdst
= (struct si_texture
*)dst
;
293 ssrc
= (struct si_texture
*)src
;
294 alloc_cpu_texture(&src_cpu
, &tsrc
, bpp
);
295 alloc_cpu_texture(&dst_cpu
, &tdst
, bpp
);
297 printf("%4u: dst = (%5u x %5u x %u, %s), "
298 " src = (%5u x %5u x %u, %s), bpp = %2u, ",
299 i
, tdst
.width0
, tdst
.height0
, tdst
.array_size
,
300 array_mode_to_string(sscreen
, &sdst
->surface
),
301 tsrc
.width0
, tsrc
.height0
, tsrc
.array_size
,
302 array_mode_to_string(sscreen
, &ssrc
->surface
), bpp
);
306 set_random_pixels(ctx
, src
, &src_cpu
);
308 /* clear dst pixels */
310 si_clear_buffer(sctx
, dst
, 0, sdst
->surface
.surf_size
, &zero
, 4,
311 SI_COHERENCY_SHADER
, false);
312 memset(dst_cpu
.ptr
, 0, dst_cpu
.layer_stride
* tdst
.array_size
);
315 max_width
= MIN2(tsrc
.width0
, tdst
.width0
);
316 max_height
= MIN2(tsrc
.height0
, tdst
.height0
);
317 max_depth
= MIN2(tsrc
.array_size
, tdst
.array_size
);
319 num
= do_partial_copies
? num_partial_copies
: 1;
320 for (j
= 0; j
< num
; j
++) {
321 int width
, height
, depth
;
322 int srcx
, srcy
, srcz
, dstx
, dsty
, dstz
;
324 unsigned old_num_draw_calls
= sctx
->num_draw_calls
;
325 unsigned old_num_dma_calls
= sctx
->num_dma_calls
;
326 unsigned old_num_cs_calls
= sctx
->num_compute_calls
;
328 if (!do_partial_copies
) {
329 /* copy whole src to dst */
334 srcx
= srcy
= srcz
= dstx
= dsty
= dstz
= 0;
336 /* random sub-rectangle copies from src to dst */
337 depth
= (rand() % max_depth
) + 1;
338 srcz
= rand() % (tsrc
.array_size
- depth
+ 1);
339 dstz
= rand() % (tdst
.array_size
- depth
+ 1);
341 /* special code path to hit the tiled partial copies */
342 if (!ssrc
->surface
.is_linear
&&
343 !sdst
->surface
.is_linear
&&
345 if (max_width
< 8 || max_height
< 8)
347 width
= ((rand() % (max_width
/ 8)) + 1) * 8;
348 height
= ((rand() % (max_height
/ 8)) + 1) * 8;
350 srcx
= rand() % (tsrc
.width0
- width
+ 1) & ~0x7;
351 srcy
= rand() % (tsrc
.height0
- height
+ 1) & ~0x7;
353 dstx
= rand() % (tdst
.width0
- width
+ 1) & ~0x7;
354 dsty
= rand() % (tdst
.height0
- height
+ 1) & ~0x7;
356 /* just make sure that it doesn't divide by zero */
357 assert(max_width
> 0 && max_height
> 0);
359 width
= (rand() % max_width
) + 1;
360 height
= (rand() % max_height
) + 1;
362 srcx
= rand() % (tsrc
.width0
- width
+ 1);
363 srcy
= rand() % (tsrc
.height0
- height
+ 1);
365 dstx
= rand() % (tdst
.width0
- width
+ 1);
366 dsty
= rand() % (tdst
.height0
- height
+ 1);
369 /* special code path to hit out-of-bounds reads in L2T */
370 if (ssrc
->surface
.is_linear
&&
371 !sdst
->surface
.is_linear
&&
380 u_box_3d(srcx
, srcy
, srcz
, width
, height
, depth
, &box
);
381 sctx
->dma_copy(ctx
, dst
, 0, dstx
, dsty
, dstz
, src
, 0, &box
);
383 /* See which engine was used. */
384 gfx_blits
+= sctx
->num_draw_calls
> old_num_draw_calls
;
385 dma_blits
+= sctx
->num_dma_calls
> old_num_dma_calls
;
386 cs_blits
+= sctx
->num_compute_calls
> old_num_cs_calls
;
389 util_copy_box(dst_cpu
.ptr
, tdst
.format
, dst_cpu
.stride
,
390 dst_cpu
.layer_stride
,
391 dstx
, dsty
, dstz
, width
, height
, depth
,
392 src_cpu
.ptr
, src_cpu
.stride
,
393 src_cpu
.layer_stride
,
397 pass
= compare_textures(ctx
, dst
, &dst_cpu
, bpp
);
403 printf("BLITs: GFX = %2u, DMA = %2u, CS = %2u, %s [%u/%u]\n",
404 gfx_blits
, dma_blits
, cs_blits
, pass
? "pass" : "fail",
405 num_pass
, num_pass
+num_fail
);
408 pipe_resource_reference(&src
, NULL
);
409 pipe_resource_reference(&dst
, NULL
);