2 * Copyright 2016 Advanced Micro Devices, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26 /* This file implements randomized SDMA texture blit tests. */
29 #include "util/rand_xor.h"
30 #include "util/u_surface.h"
32 static uint64_t seed_xorshift128plus
[2];
34 #define RAND_NUM_SIZE 8
36 /* The GPU blits are emulated on the CPU using these CPU textures. */
41 uint64_t layer_stride
;
45 static void alloc_cpu_texture(struct cpu_texture
*tex
, struct pipe_resource
*templ
)
47 tex
->stride
= align(util_format_get_stride(templ
->format
, templ
->width0
), RAND_NUM_SIZE
);
48 tex
->layer_stride
= (uint64_t)tex
->stride
* templ
->height0
;
49 tex
->size
= tex
->layer_stride
* templ
->array_size
;
50 tex
->ptr
= malloc(tex
->size
);
54 static void set_random_pixels(struct pipe_context
*ctx
, struct pipe_resource
*tex
,
55 struct cpu_texture
*cpu
)
57 struct pipe_transfer
*t
;
61 map
= pipe_transfer_map_3d(ctx
, tex
, 0, PIPE_TRANSFER_WRITE
, 0, 0, 0, tex
->width0
, tex
->height0
,
65 for (z
= 0; z
< tex
->array_size
; z
++) {
66 for (y
= 0; y
< tex
->height0
; y
++) {
67 uint64_t *ptr
= (uint64_t *)(map
+ t
->layer_stride
* z
+ t
->stride
* y
);
68 uint64_t *ptr_cpu
= (uint64_t *)(cpu
->ptr
+ cpu
->layer_stride
* z
+ cpu
->stride
* y
);
69 unsigned size
= cpu
->stride
/ RAND_NUM_SIZE
;
71 assert(t
->stride
% RAND_NUM_SIZE
== 0);
72 assert(cpu
->stride
% RAND_NUM_SIZE
== 0);
74 for (x
= 0; x
< size
; x
++) {
75 *ptr
++ = *ptr_cpu
++ = rand_xorshift128plus(seed_xorshift128plus
);
80 pipe_transfer_unmap(ctx
, t
);
83 static bool compare_textures(struct pipe_context
*ctx
, struct pipe_resource
*tex
,
84 struct cpu_texture
*cpu
)
86 struct pipe_transfer
*t
;
90 unsigned stride
= util_format_get_stride(tex
->format
, tex
->width0
);
92 map
= pipe_transfer_map_3d(ctx
, tex
, 0, PIPE_TRANSFER_READ
, 0, 0, 0, tex
->width0
, tex
->height0
,
96 for (z
= 0; z
< tex
->array_size
; z
++) {
97 for (y
= 0; y
< tex
->height0
; y
++) {
98 uint8_t *ptr
= map
+ t
->layer_stride
* z
+ t
->stride
* y
;
99 uint8_t *cpu_ptr
= cpu
->ptr
+ cpu
->layer_stride
* z
+ cpu
->stride
* y
;
101 if (memcmp(ptr
, cpu_ptr
, stride
)) {
108 pipe_transfer_unmap(ctx
, t
);
112 static enum pipe_format
choose_format()
114 enum pipe_format formats
[] = {
115 PIPE_FORMAT_R8_UINT
, PIPE_FORMAT_R16_UINT
, PIPE_FORMAT_R32_UINT
,
116 PIPE_FORMAT_R32G32_UINT
, PIPE_FORMAT_R32G32B32A32_UINT
, PIPE_FORMAT_G8R8_B8R8_UNORM
,
118 return formats
[rand() % ARRAY_SIZE(formats
)];
121 static const char *array_mode_to_string(struct si_screen
*sscreen
, struct radeon_surf
*surf
)
123 if (sscreen
->info
.chip_class
>= GFX9
) {
124 switch (surf
->u
.gfx9
.surf
.swizzle_mode
) {
136 printf("Unhandled swizzle mode = %u\n", surf
->u
.gfx9
.surf
.swizzle_mode
);
140 switch (surf
->u
.legacy
.level
[0].mode
) {
141 case RADEON_SURF_MODE_LINEAR_ALIGNED
:
142 return "LINEAR_ALIGNED";
143 case RADEON_SURF_MODE_1D
:
144 return "1D_TILED_THIN1";
145 case RADEON_SURF_MODE_2D
:
146 return "2D_TILED_THIN1";
154 static unsigned generate_max_tex_side(unsigned max_tex_side
)
156 switch (rand() % 4) {
158 /* Try to hit large sizes in 1/4 of the cases. */
161 /* Try to hit 1D tiling in 1/4 of the cases. */
164 /* Try to hit common sizes in 2/4 of the cases. */
169 void si_test_dma(struct si_screen
*sscreen
)
171 struct pipe_screen
*screen
= &sscreen
->b
;
172 struct pipe_context
*ctx
= screen
->context_create(screen
, NULL
, 0);
173 struct si_context
*sctx
= (struct si_context
*)ctx
;
174 uint64_t max_alloc_size
;
175 unsigned i
, iterations
, num_partial_copies
, max_tex_side
;
176 unsigned num_pass
= 0, num_fail
= 0;
178 max_tex_side
= screen
->get_param(screen
, PIPE_CAP_MAX_TEXTURE_2D_SIZE
);
180 /* Max 128 MB allowed for both textures. */
181 max_alloc_size
= 128 * 1024 * 1024;
183 /* the seed for random test parameters */
185 /* the seed for random pixel data */
186 s_rand_xorshift128plus(seed_xorshift128plus
, false);
188 iterations
= 1000000000; /* just kill it when you are bored */
189 num_partial_copies
= 30;
191 /* These parameters are randomly generated per test:
192 * - whether to do one whole-surface copy or N partial copies per test
193 * - which tiling modes to use (LINEAR_ALIGNED, 1D, 2D)
194 * - which texture dimensions to use
195 * - whether to use VRAM (all tiling modes) and GTT (staging, linear
197 * - random initial pixels in src
198 * - generate random subrectangle copies for partial blits
200 for (i
= 0; i
< iterations
; i
++) {
201 struct pipe_resource tsrc
= {}, tdst
= {}, *src
, *dst
;
202 struct si_texture
*sdst
;
203 struct si_texture
*ssrc
;
204 struct cpu_texture src_cpu
, dst_cpu
;
205 unsigned max_width
, max_height
, max_depth
, j
, num
;
206 unsigned gfx_blits
= 0, dma_blits
= 0, cs_blits
= 0, max_tex_side_gen
;
207 unsigned max_tex_layers
;
209 bool do_partial_copies
= rand() & 1;
211 /* generate a random test case */
212 tsrc
.target
= tdst
.target
= PIPE_TEXTURE_2D_ARRAY
;
213 tsrc
.depth0
= tdst
.depth0
= 1;
215 tsrc
.format
= tdst
.format
= choose_format();
217 max_tex_side_gen
= generate_max_tex_side(max_tex_side
);
218 max_tex_layers
= rand() % 4 ? 1 : 5;
220 tsrc
.width0
= (rand() % max_tex_side_gen
) + 1;
221 tsrc
.height0
= (rand() % max_tex_side_gen
) + 1;
222 tsrc
.array_size
= (rand() % max_tex_layers
) + 1;
224 if (tsrc
.format
== PIPE_FORMAT_G8R8_B8R8_UNORM
)
225 tsrc
.width0
= align(tsrc
.width0
, 2);
227 /* Have a 1/4 chance of getting power-of-two dimensions. */
228 if (rand() % 4 == 0) {
229 tsrc
.width0
= util_next_power_of_two(tsrc
.width0
);
230 tsrc
.height0
= util_next_power_of_two(tsrc
.height0
);
233 if (!do_partial_copies
) {
234 /* whole-surface copies only, same dimensions */
237 max_tex_side_gen
= generate_max_tex_side(max_tex_side
);
238 max_tex_layers
= rand() % 4 ? 1 : 5;
240 /* many partial copies, dimensions can be different */
241 tdst
.width0
= (rand() % max_tex_side_gen
) + 1;
242 tdst
.height0
= (rand() % max_tex_side_gen
) + 1;
243 tdst
.array_size
= (rand() % max_tex_layers
) + 1;
245 /* Have a 1/4 chance of getting power-of-two dimensions. */
246 if (rand() % 4 == 0) {
247 tdst
.width0
= util_next_power_of_two(tdst
.width0
);
248 tdst
.height0
= util_next_power_of_two(tdst
.height0
);
252 /* check texture sizes */
253 if ((uint64_t)util_format_get_nblocks(tsrc
.format
, tsrc
.width0
, tsrc
.height0
) *
254 tsrc
.array_size
* util_format_get_blocksize(tsrc
.format
) +
255 (uint64_t)util_format_get_nblocks(tdst
.format
, tdst
.width0
, tdst
.height0
) *
256 tdst
.array_size
* util_format_get_blocksize(tdst
.format
) >
258 /* too large, try again */
263 /* VRAM + the tiling mode depends on dimensions (3/4 of cases),
264 * or GTT + linear only (1/4 of cases)
266 tsrc
.usage
= rand() % 4 ? PIPE_USAGE_DEFAULT
: PIPE_USAGE_STAGING
;
267 tdst
.usage
= rand() % 4 ? PIPE_USAGE_DEFAULT
: PIPE_USAGE_STAGING
;
269 /* Allocate textures (both the GPU and CPU copies).
270 * The CPU will emulate what the GPU should be doing.
272 src
= screen
->resource_create(screen
, &tsrc
);
273 dst
= screen
->resource_create(screen
, &tdst
);
276 sdst
= (struct si_texture
*)dst
;
277 ssrc
= (struct si_texture
*)src
;
278 alloc_cpu_texture(&src_cpu
, &tsrc
);
279 alloc_cpu_texture(&dst_cpu
, &tdst
);
281 printf("%4u: dst = (%5u x %5u x %u, %s), "
282 " src = (%5u x %5u x %u, %s), format = %s, ",
283 i
, tdst
.width0
, tdst
.height0
, tdst
.array_size
,
284 array_mode_to_string(sscreen
, &sdst
->surface
), tsrc
.width0
, tsrc
.height0
,
285 tsrc
.array_size
, array_mode_to_string(sscreen
, &ssrc
->surface
),
286 util_format_description(tsrc
.format
)->name
);
290 set_random_pixels(ctx
, src
, &src_cpu
);
292 /* clear dst pixels */
294 si_clear_buffer(sctx
, dst
, 0, sdst
->surface
.surf_size
, &zero
, 4, SI_COHERENCY_SHADER
, false);
295 memset(dst_cpu
.ptr
, 0, dst_cpu
.layer_stride
* tdst
.array_size
);
298 max_width
= MIN2(tsrc
.width0
, tdst
.width0
);
299 max_height
= MIN2(tsrc
.height0
, tdst
.height0
);
300 max_depth
= MIN2(tsrc
.array_size
, tdst
.array_size
);
302 num
= do_partial_copies
? num_partial_copies
: 1;
303 for (j
= 0; j
< num
; j
++) {
304 int width
, height
, depth
;
305 int srcx
, srcy
, srcz
, dstx
, dsty
, dstz
;
307 unsigned old_num_draw_calls
= sctx
->num_draw_calls
;
308 unsigned old_num_dma_calls
= sctx
->num_dma_calls
;
309 unsigned old_num_cs_calls
= sctx
->num_compute_calls
;
311 if (!do_partial_copies
) {
312 /* copy whole src to dst */
317 srcx
= srcy
= srcz
= dstx
= dsty
= dstz
= 0;
319 /* random sub-rectangle copies from src to dst */
320 depth
= (rand() % max_depth
) + 1;
321 srcz
= rand() % (tsrc
.array_size
- depth
+ 1);
322 dstz
= rand() % (tdst
.array_size
- depth
+ 1);
324 /* special code path to hit the tiled partial copies */
325 if (!ssrc
->surface
.is_linear
&& !sdst
->surface
.is_linear
&& rand() & 1) {
326 if (max_width
< 8 || max_height
< 8)
328 width
= ((rand() % (max_width
/ 8)) + 1) * 8;
329 height
= ((rand() % (max_height
/ 8)) + 1) * 8;
331 srcx
= rand() % (tsrc
.width0
- width
+ 1) & ~0x7;
332 srcy
= rand() % (tsrc
.height0
- height
+ 1) & ~0x7;
334 dstx
= rand() % (tdst
.width0
- width
+ 1) & ~0x7;
335 dsty
= rand() % (tdst
.height0
- height
+ 1) & ~0x7;
337 /* just make sure that it doesn't divide by zero */
338 assert(max_width
> 0 && max_height
> 0);
340 width
= (rand() % max_width
) + 1;
341 height
= (rand() % max_height
) + 1;
343 srcx
= rand() % (tsrc
.width0
- width
+ 1);
344 srcy
= rand() % (tsrc
.height0
- height
+ 1);
346 dstx
= rand() % (tdst
.width0
- width
+ 1);
347 dsty
= rand() % (tdst
.height0
- height
+ 1);
350 /* special code path to hit out-of-bounds reads in L2T */
351 if (ssrc
->surface
.is_linear
&& !sdst
->surface
.is_linear
&& rand() % 4 == 0) {
359 u_box_3d(srcx
, srcy
, srcz
, width
, height
, depth
, &box
);
360 sctx
->dma_copy(ctx
, dst
, 0, dstx
, dsty
, dstz
, src
, 0, &box
);
362 /* See which engine was used. */
363 gfx_blits
+= sctx
->num_draw_calls
> old_num_draw_calls
;
364 dma_blits
+= sctx
->num_dma_calls
> old_num_dma_calls
;
365 cs_blits
+= sctx
->num_compute_calls
> old_num_cs_calls
;
368 util_copy_box(dst_cpu
.ptr
, tdst
.format
, dst_cpu
.stride
, dst_cpu
.layer_stride
, dstx
, dsty
,
369 dstz
, width
, height
, depth
, src_cpu
.ptr
, src_cpu
.stride
,
370 src_cpu
.layer_stride
, srcx
, srcy
, srcz
);
373 pass
= compare_textures(ctx
, dst
, &dst_cpu
);
379 printf("BLITs: GFX = %2u, DMA = %2u, CS = %2u, %s [%u/%u]\n", gfx_blits
, dma_blits
, cs_blits
,
380 pass
? "pass" : "fail", num_pass
, num_pass
+ num_fail
);
383 pipe_resource_reference(&src
, NULL
);
384 pipe_resource_reference(&dst
, NULL
);