2 * Copyright 2016 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 /* This file implements randomized SDMA texture blit tests. */
28 #include "util/u_surface.h"
29 #include "util/rand_xor.h"
31 static uint64_t seed_xorshift128plus
[2];
33 #define RAND_NUM_SIZE 8
35 /* The GPU blits are emulated on the CPU using these CPU textures. */
40 uint64_t layer_stride
;
44 static void alloc_cpu_texture(struct cpu_texture
*tex
,
45 struct pipe_resource
*templ
, int bpp
)
47 tex
->stride
= align(templ
->width0
* bpp
, RAND_NUM_SIZE
);
48 tex
->layer_stride
= (uint64_t)tex
->stride
* templ
->height0
;
49 tex
->size
= tex
->layer_stride
* templ
->array_size
;
50 tex
->ptr
= malloc(tex
->size
);
54 static void set_random_pixels(struct pipe_context
*ctx
,
55 struct pipe_resource
*tex
,
56 struct cpu_texture
*cpu
)
58 struct pipe_transfer
*t
;
62 map
= pipe_transfer_map_3d(ctx
, tex
, 0, PIPE_TRANSFER_WRITE
,
63 0, 0, 0, tex
->width0
, tex
->height0
,
67 for (z
= 0; z
< tex
->array_size
; z
++) {
68 for (y
= 0; y
< tex
->height0
; y
++) {
69 uint64_t *ptr
= (uint64_t*)
70 (map
+ t
->layer_stride
*z
+ t
->stride
*y
);
71 uint64_t *ptr_cpu
= (uint64_t*)
72 (cpu
->ptr
+ cpu
->layer_stride
*z
+ cpu
->stride
*y
);
73 unsigned size
= cpu
->stride
/ RAND_NUM_SIZE
;
75 assert(t
->stride
% RAND_NUM_SIZE
== 0);
76 assert(cpu
->stride
% RAND_NUM_SIZE
== 0);
78 for (x
= 0; x
< size
; x
++) {
80 rand_xorshift128plus(seed_xorshift128plus
);
85 pipe_transfer_unmap(ctx
, t
);
88 static bool compare_textures(struct pipe_context
*ctx
,
89 struct pipe_resource
*tex
,
90 struct cpu_texture
*cpu
, int bpp
)
92 struct pipe_transfer
*t
;
97 map
= pipe_transfer_map_3d(ctx
, tex
, 0, PIPE_TRANSFER_READ
,
98 0, 0, 0, tex
->width0
, tex
->height0
,
102 for (z
= 0; z
< tex
->array_size
; z
++) {
103 for (y
= 0; y
< tex
->height0
; y
++) {
104 uint8_t *ptr
= map
+ t
->layer_stride
*z
+ t
->stride
*y
;
105 uint8_t *cpu_ptr
= cpu
->ptr
+
106 cpu
->layer_stride
*z
+ cpu
->stride
*y
;
108 if (memcmp(ptr
, cpu_ptr
, tex
->width0
* bpp
)) {
115 pipe_transfer_unmap(ctx
, t
);
119 static enum pipe_format
get_format_from_bpp(int bpp
)
123 return PIPE_FORMAT_R8_UINT
;
125 return PIPE_FORMAT_R16_UINT
;
127 return PIPE_FORMAT_R32_UINT
;
129 return PIPE_FORMAT_R32G32_UINT
;
131 return PIPE_FORMAT_R32G32B32A32_UINT
;
134 return PIPE_FORMAT_NONE
;
138 static const char *array_mode_to_string(struct si_screen
*sscreen
,
139 struct radeon_surf
*surf
)
141 if (sscreen
->info
.chip_class
>= GFX9
) {
142 switch (surf
->u
.gfx9
.surf
.swizzle_mode
) {
154 printf("Unhandled swizzle mode = %u\n",
155 surf
->u
.gfx9
.surf
.swizzle_mode
);
159 switch (surf
->u
.legacy
.level
[0].mode
) {
160 case RADEON_SURF_MODE_LINEAR_ALIGNED
:
161 return "LINEAR_ALIGNED";
162 case RADEON_SURF_MODE_1D
:
163 return "1D_TILED_THIN1";
164 case RADEON_SURF_MODE_2D
:
165 return "2D_TILED_THIN1";
173 static unsigned generate_max_tex_side(unsigned max_tex_side
)
175 switch (rand() % 4) {
177 /* Try to hit large sizes in 1/4 of the cases. */
180 /* Try to hit 1D tiling in 1/4 of the cases. */
183 /* Try to hit common sizes in 2/4 of the cases. */
188 void si_test_dma(struct si_screen
*sscreen
)
190 struct pipe_screen
*screen
= &sscreen
->b
;
191 struct pipe_context
*ctx
= screen
->context_create(screen
, NULL
, 0);
192 struct si_context
*sctx
= (struct si_context
*)ctx
;
193 uint64_t max_alloc_size
;
194 unsigned i
, iterations
, num_partial_copies
, max_levels
, max_tex_side
;
195 unsigned num_pass
= 0, num_fail
= 0;
197 max_levels
= screen
->get_param(screen
, PIPE_CAP_MAX_TEXTURE_2D_LEVELS
);
198 max_tex_side
= 1 << (max_levels
- 1);
200 /* Max 128 MB allowed for both textures. */
201 max_alloc_size
= 128 * 1024 * 1024;
203 /* the seed for random test parameters */
205 /* the seed for random pixel data */
206 s_rand_xorshift128plus(seed_xorshift128plus
, false);
208 iterations
= 1000000000; /* just kill it when you are bored */
209 num_partial_copies
= 30;
211 /* These parameters are randomly generated per test:
212 * - whether to do one whole-surface copy or N partial copies per test
213 * - which tiling modes to use (LINEAR_ALIGNED, 1D, 2D)
214 * - which texture dimensions to use
215 * - whether to use VRAM (all tiling modes) and GTT (staging, linear
217 * - random initial pixels in src
218 * - generate random subrectangle copies for partial blits
220 for (i
= 0; i
< iterations
; i
++) {
221 struct pipe_resource tsrc
= {}, tdst
= {}, *src
, *dst
;
222 struct r600_texture
*rdst
;
223 struct r600_texture
*rsrc
;
224 struct cpu_texture src_cpu
, dst_cpu
;
225 unsigned bpp
, max_width
, max_height
, max_depth
, j
, num
;
226 unsigned gfx_blits
= 0, dma_blits
= 0, max_tex_side_gen
;
227 unsigned max_tex_layers
;
229 bool do_partial_copies
= rand() & 1;
231 /* generate a random test case */
232 tsrc
.target
= tdst
.target
= PIPE_TEXTURE_2D_ARRAY
;
233 tsrc
.depth0
= tdst
.depth0
= 1;
235 bpp
= 1 << (rand() % 5);
236 tsrc
.format
= tdst
.format
= get_format_from_bpp(bpp
);
238 max_tex_side_gen
= generate_max_tex_side(max_tex_side
);
239 max_tex_layers
= rand() % 4 ? 1 : 5;
241 tsrc
.width0
= (rand() % max_tex_side_gen
) + 1;
242 tsrc
.height0
= (rand() % max_tex_side_gen
) + 1;
243 tsrc
.array_size
= (rand() % max_tex_layers
) + 1;
245 /* Have a 1/4 chance of getting power-of-two dimensions. */
246 if (rand() % 4 == 0) {
247 tsrc
.width0
= util_next_power_of_two(tsrc
.width0
);
248 tsrc
.height0
= util_next_power_of_two(tsrc
.height0
);
251 if (!do_partial_copies
) {
252 /* whole-surface copies only, same dimensions */
255 max_tex_side_gen
= generate_max_tex_side(max_tex_side
);
256 max_tex_layers
= rand() % 4 ? 1 : 5;
258 /* many partial copies, dimensions can be different */
259 tdst
.width0
= (rand() % max_tex_side_gen
) + 1;
260 tdst
.height0
= (rand() % max_tex_side_gen
) + 1;
261 tdst
.array_size
= (rand() % max_tex_layers
) + 1;
263 /* Have a 1/4 chance of getting power-of-two dimensions. */
264 if (rand() % 4 == 0) {
265 tdst
.width0
= util_next_power_of_two(tdst
.width0
);
266 tdst
.height0
= util_next_power_of_two(tdst
.height0
);
270 /* check texture sizes */
271 if ((uint64_t)tsrc
.width0
* tsrc
.height0
* tsrc
.array_size
* bpp
+
272 (uint64_t)tdst
.width0
* tdst
.height0
* tdst
.array_size
* bpp
>
274 /* too large, try again */
279 /* VRAM + the tiling mode depends on dimensions (3/4 of cases),
280 * or GTT + linear only (1/4 of cases)
282 tsrc
.usage
= rand() % 4 ? PIPE_USAGE_DEFAULT
: PIPE_USAGE_STAGING
;
283 tdst
.usage
= rand() % 4 ? PIPE_USAGE_DEFAULT
: PIPE_USAGE_STAGING
;
285 /* Allocate textures (both the GPU and CPU copies).
286 * The CPU will emulate what the GPU should be doing.
288 src
= screen
->resource_create(screen
, &tsrc
);
289 dst
= screen
->resource_create(screen
, &tdst
);
292 rdst
= (struct r600_texture
*)dst
;
293 rsrc
= (struct r600_texture
*)src
;
294 alloc_cpu_texture(&src_cpu
, &tsrc
, bpp
);
295 alloc_cpu_texture(&dst_cpu
, &tdst
, bpp
);
297 printf("%4u: dst = (%5u x %5u x %u, %s), "
298 " src = (%5u x %5u x %u, %s), bpp = %2u, ",
299 i
, tdst
.width0
, tdst
.height0
, tdst
.array_size
,
300 array_mode_to_string(sscreen
, &rdst
->surface
),
301 tsrc
.width0
, tsrc
.height0
, tsrc
.array_size
,
302 array_mode_to_string(sscreen
, &rsrc
->surface
), bpp
);
306 set_random_pixels(ctx
, src
, &src_cpu
);
308 /* clear dst pixels */
309 si_clear_buffer(ctx
, dst
, 0, rdst
->surface
.surf_size
, 0, true);
310 memset(dst_cpu
.ptr
, 0, dst_cpu
.layer_stride
* tdst
.array_size
);
313 max_width
= MIN2(tsrc
.width0
, tdst
.width0
);
314 max_height
= MIN2(tsrc
.height0
, tdst
.height0
);
315 max_depth
= MIN2(tsrc
.array_size
, tdst
.array_size
);
317 num
= do_partial_copies
? num_partial_copies
: 1;
318 for (j
= 0; j
< num
; j
++) {
319 int width
, height
, depth
;
320 int srcx
, srcy
, srcz
, dstx
, dsty
, dstz
;
322 unsigned old_num_draw_calls
= sctx
->b
.num_draw_calls
;
323 unsigned old_num_dma_calls
= sctx
->b
.num_dma_calls
;
325 if (!do_partial_copies
) {
326 /* copy whole src to dst */
331 srcx
= srcy
= srcz
= dstx
= dsty
= dstz
= 0;
333 /* random sub-rectangle copies from src to dst */
334 depth
= (rand() % max_depth
) + 1;
335 srcz
= rand() % (tsrc
.array_size
- depth
+ 1);
336 dstz
= rand() % (tdst
.array_size
- depth
+ 1);
338 /* special code path to hit the tiled partial copies */
339 if (!rsrc
->surface
.is_linear
&&
340 !rdst
->surface
.is_linear
&&
342 if (max_width
< 8 || max_height
< 8)
344 width
= ((rand() % (max_width
/ 8)) + 1) * 8;
345 height
= ((rand() % (max_height
/ 8)) + 1) * 8;
347 srcx
= rand() % (tsrc
.width0
- width
+ 1) & ~0x7;
348 srcy
= rand() % (tsrc
.height0
- height
+ 1) & ~0x7;
350 dstx
= rand() % (tdst
.width0
- width
+ 1) & ~0x7;
351 dsty
= rand() % (tdst
.height0
- height
+ 1) & ~0x7;
353 /* just make sure that it doesn't divide by zero */
354 assert(max_width
> 0 && max_height
> 0);
356 width
= (rand() % max_width
) + 1;
357 height
= (rand() % max_height
) + 1;
359 srcx
= rand() % (tsrc
.width0
- width
+ 1);
360 srcy
= rand() % (tsrc
.height0
- height
+ 1);
362 dstx
= rand() % (tdst
.width0
- width
+ 1);
363 dsty
= rand() % (tdst
.height0
- height
+ 1);
366 /* special code path to hit out-of-bounds reads in L2T */
367 if (rsrc
->surface
.is_linear
&&
368 !rdst
->surface
.is_linear
&&
377 u_box_3d(srcx
, srcy
, srcz
, width
, height
, depth
, &box
);
378 sctx
->b
.dma_copy(ctx
, dst
, 0, dstx
, dsty
, dstz
, src
, 0, &box
);
380 /* See which engine was used. */
381 gfx_blits
+= sctx
->b
.num_draw_calls
> old_num_draw_calls
;
382 dma_blits
+= sctx
->b
.num_dma_calls
> old_num_dma_calls
;
385 util_copy_box(dst_cpu
.ptr
, tdst
.format
, dst_cpu
.stride
,
386 dst_cpu
.layer_stride
,
387 dstx
, dsty
, dstz
, width
, height
, depth
,
388 src_cpu
.ptr
, src_cpu
.stride
,
389 src_cpu
.layer_stride
,
393 pass
= compare_textures(ctx
, dst
, &dst_cpu
, bpp
);
399 printf("BLITs: GFX = %2u, DMA = %2u, %s [%u/%u]\n",
400 gfx_blits
, dma_blits
, pass
? "pass" : "fail",
401 num_pass
, num_pass
+num_fail
);
404 pipe_resource_reference(&src
, NULL
);
405 pipe_resource_reference(&dst
, NULL
);