2 * Copyright 2016 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 /* This file implements randomized SDMA texture blit tests. */
27 #include "r600_pipe_common.h"
28 #include "util/u_surface.h"
29 #include "util/rand_xor.h"
31 static uint64_t seed_xorshift128plus
[2];
33 #define RAND_NUM_SIZE 8
35 /* The GPU blits are emulated on the CPU using these CPU textures. */
40 uint64_t layer_stride
;
44 static void alloc_cpu_texture(struct cpu_texture
*tex
,
45 struct pipe_resource
*templ
, int bpp
)
47 tex
->stride
= align(templ
->width0
* bpp
, RAND_NUM_SIZE
);
48 tex
->layer_stride
= (uint64_t)tex
->stride
* templ
->height0
;
49 tex
->size
= tex
->layer_stride
* templ
->array_size
;
50 tex
->ptr
= malloc(tex
->size
);
54 static void set_random_pixels(struct pipe_context
*ctx
,
55 struct pipe_resource
*tex
,
56 struct cpu_texture
*cpu
)
58 struct pipe_transfer
*t
;
62 map
= pipe_transfer_map_3d(ctx
, tex
, 0, PIPE_TRANSFER_WRITE
,
63 0, 0, 0, tex
->width0
, tex
->height0
,
67 for (z
= 0; z
< tex
->array_size
; z
++) {
68 for (y
= 0; y
< tex
->height0
; y
++) {
69 uint64_t *ptr
= (uint64_t*)
70 (map
+ t
->layer_stride
*z
+ t
->stride
*y
);
71 uint64_t *ptr_cpu
= (uint64_t*)
72 (cpu
->ptr
+ cpu
->layer_stride
*z
+ cpu
->stride
*y
);
73 unsigned size
= cpu
->stride
/ RAND_NUM_SIZE
;
75 assert(t
->stride
% RAND_NUM_SIZE
== 0);
76 assert(cpu
->stride
% RAND_NUM_SIZE
== 0);
78 for (x
= 0; x
< size
; x
++) {
80 rand_xorshift128plus(seed_xorshift128plus
);
85 pipe_transfer_unmap(ctx
, t
);
88 static bool compare_textures(struct pipe_context
*ctx
,
89 struct pipe_resource
*tex
,
90 struct cpu_texture
*cpu
, int bpp
)
92 struct pipe_transfer
*t
;
97 map
= pipe_transfer_map_3d(ctx
, tex
, 0, PIPE_TRANSFER_READ
,
98 0, 0, 0, tex
->width0
, tex
->height0
,
102 for (z
= 0; z
< tex
->array_size
; z
++) {
103 for (y
= 0; y
< tex
->height0
; y
++) {
104 uint8_t *ptr
= map
+ t
->layer_stride
*z
+ t
->stride
*y
;
105 uint8_t *cpu_ptr
= cpu
->ptr
+
106 cpu
->layer_stride
*z
+ cpu
->stride
*y
;
108 if (memcmp(ptr
, cpu_ptr
, tex
->width0
* bpp
)) {
115 pipe_transfer_unmap(ctx
, t
);
119 static enum pipe_format
get_format_from_bpp(int bpp
)
123 return PIPE_FORMAT_R8_UINT
;
125 return PIPE_FORMAT_R16_UINT
;
127 return PIPE_FORMAT_R32_UINT
;
129 return PIPE_FORMAT_R32G32_UINT
;
131 return PIPE_FORMAT_R32G32B32A32_UINT
;
134 return PIPE_FORMAT_NONE
;
138 static const char *array_mode_to_string(struct r600_common_screen
*rscreen
,
139 struct radeon_surf
*surf
)
141 if (rscreen
->chip_class
>= GFX9
) {
145 switch (surf
->u
.legacy
.level
[0].mode
) {
146 case RADEON_SURF_MODE_LINEAR_ALIGNED
:
147 return "LINEAR_ALIGNED";
148 case RADEON_SURF_MODE_1D
:
149 return "1D_TILED_THIN1";
150 case RADEON_SURF_MODE_2D
:
151 return "2D_TILED_THIN1";
159 static unsigned generate_max_tex_side(unsigned max_tex_side
)
161 switch (rand() % 4) {
163 /* Try to hit large sizes in 1/4 of the cases. */
166 /* Try to hit 1D tiling in 1/4 of the cases. */
169 /* Try to hit common sizes in 2/4 of the cases. */
174 void r600_test_dma(struct r600_common_screen
*rscreen
)
176 struct pipe_screen
*screen
= &rscreen
->b
;
177 struct pipe_context
*ctx
= screen
->context_create(screen
, NULL
, 0);
178 struct r600_common_context
*rctx
= (struct r600_common_context
*)ctx
;
179 uint64_t max_alloc_size
;
180 unsigned i
, iterations
, num_partial_copies
, max_tex_side
;
181 unsigned num_pass
= 0, num_fail
= 0;
183 max_tex_side
= screen
->get_param(screen
, PIPE_CAP_MAX_TEXTURE_2D_SIZE
);
185 /* Max 128 MB allowed for both textures. */
186 max_alloc_size
= 128 * 1024 * 1024;
188 /* the seed for random test parameters */
190 /* the seed for random pixel data */
191 s_rand_xorshift128plus(seed_xorshift128plus
, false);
193 iterations
= 1000000000; /* just kill it when you are bored */
194 num_partial_copies
= 30;
196 /* These parameters are randomly generated per test:
197 * - whether to do one whole-surface copy or N partial copies per test
198 * - which tiling modes to use (LINEAR_ALIGNED, 1D, 2D)
199 * - which texture dimensions to use
200 * - whether to use VRAM (all tiling modes) and GTT (staging, linear
202 * - random initial pixels in src
203 * - generate random subrectangle copies for partial blits
205 for (i
= 0; i
< iterations
; i
++) {
206 struct pipe_resource tsrc
= {}, tdst
= {}, *src
, *dst
;
207 struct r600_texture
*rdst
;
208 struct r600_texture
*rsrc
;
209 struct cpu_texture src_cpu
, dst_cpu
;
210 unsigned bpp
, max_width
, max_height
, max_depth
, j
, num
;
211 unsigned gfx_blits
= 0, dma_blits
= 0, max_tex_side_gen
;
212 unsigned max_tex_layers
;
214 bool do_partial_copies
= rand() & 1;
216 /* generate a random test case */
217 tsrc
.target
= tdst
.target
= PIPE_TEXTURE_2D_ARRAY
;
218 tsrc
.depth0
= tdst
.depth0
= 1;
220 bpp
= 1 << (rand() % 5);
221 tsrc
.format
= tdst
.format
= get_format_from_bpp(bpp
);
223 max_tex_side_gen
= generate_max_tex_side(max_tex_side
);
224 max_tex_layers
= rand() % 4 ? 1 : 5;
226 tsrc
.width0
= (rand() % max_tex_side_gen
) + 1;
227 tsrc
.height0
= (rand() % max_tex_side_gen
) + 1;
228 tsrc
.array_size
= (rand() % max_tex_layers
) + 1;
230 /* Have a 1/4 chance of getting power-of-two dimensions. */
231 if (rand() % 4 == 0) {
232 tsrc
.width0
= util_next_power_of_two(tsrc
.width0
);
233 tsrc
.height0
= util_next_power_of_two(tsrc
.height0
);
236 if (!do_partial_copies
) {
237 /* whole-surface copies only, same dimensions */
240 max_tex_side_gen
= generate_max_tex_side(max_tex_side
);
241 max_tex_layers
= rand() % 4 ? 1 : 5;
243 /* many partial copies, dimensions can be different */
244 tdst
.width0
= (rand() % max_tex_side_gen
) + 1;
245 tdst
.height0
= (rand() % max_tex_side_gen
) + 1;
246 tdst
.array_size
= (rand() % max_tex_layers
) + 1;
248 /* Have a 1/4 chance of getting power-of-two dimensions. */
249 if (rand() % 4 == 0) {
250 tdst
.width0
= util_next_power_of_two(tdst
.width0
);
251 tdst
.height0
= util_next_power_of_two(tdst
.height0
);
255 /* check texture sizes */
256 if ((uint64_t)tsrc
.width0
* tsrc
.height0
* tsrc
.array_size
* bpp
+
257 (uint64_t)tdst
.width0
* tdst
.height0
* tdst
.array_size
* bpp
>
259 /* too large, try again */
264 /* VRAM + the tiling mode depends on dimensions (3/4 of cases),
265 * or GTT + linear only (1/4 of cases)
267 tsrc
.usage
= rand() % 4 ? PIPE_USAGE_DEFAULT
: PIPE_USAGE_STAGING
;
268 tdst
.usage
= rand() % 4 ? PIPE_USAGE_DEFAULT
: PIPE_USAGE_STAGING
;
270 /* Allocate textures (both the GPU and CPU copies).
271 * The CPU will emulate what the GPU should be doing.
273 src
= screen
->resource_create(screen
, &tsrc
);
274 dst
= screen
->resource_create(screen
, &tdst
);
277 rdst
= (struct r600_texture
*)dst
;
278 rsrc
= (struct r600_texture
*)src
;
279 alloc_cpu_texture(&src_cpu
, &tsrc
, bpp
);
280 alloc_cpu_texture(&dst_cpu
, &tdst
, bpp
);
282 printf("%4u: dst = (%5u x %5u x %u, %s), "
283 " src = (%5u x %5u x %u, %s), bpp = %2u, ",
284 i
, tdst
.width0
, tdst
.height0
, tdst
.array_size
,
285 array_mode_to_string(rscreen
, &rdst
->surface
),
286 tsrc
.width0
, tsrc
.height0
, tsrc
.array_size
,
287 array_mode_to_string(rscreen
, &rsrc
->surface
), bpp
);
291 set_random_pixels(ctx
, src
, &src_cpu
);
293 /* clear dst pixels */
294 rctx
->clear_buffer(ctx
, dst
, 0, rdst
->surface
.surf_size
, 0, true);
295 memset(dst_cpu
.ptr
, 0, dst_cpu
.layer_stride
* tdst
.array_size
);
298 max_width
= MIN2(tsrc
.width0
, tdst
.width0
);
299 max_height
= MIN2(tsrc
.height0
, tdst
.height0
);
300 max_depth
= MIN2(tsrc
.array_size
, tdst
.array_size
);
302 num
= do_partial_copies
? num_partial_copies
: 1;
303 for (j
= 0; j
< num
; j
++) {
304 int width
, height
, depth
;
305 int srcx
, srcy
, srcz
, dstx
, dsty
, dstz
;
307 unsigned old_num_draw_calls
= rctx
->num_draw_calls
;
308 unsigned old_num_dma_calls
= rctx
->num_dma_calls
;
310 if (!do_partial_copies
) {
311 /* copy whole src to dst */
316 srcx
= srcy
= srcz
= dstx
= dsty
= dstz
= 0;
318 /* random sub-rectangle copies from src to dst */
319 depth
= (rand() % max_depth
) + 1;
320 srcz
= rand() % (tsrc
.array_size
- depth
+ 1);
321 dstz
= rand() % (tdst
.array_size
- depth
+ 1);
323 /* special code path to hit the tiled partial copies */
324 if (!rsrc
->surface
.is_linear
&&
325 !rdst
->surface
.is_linear
&&
327 if (max_width
< 8 || max_height
< 8)
329 width
= ((rand() % (max_width
/ 8)) + 1) * 8;
330 height
= ((rand() % (max_height
/ 8)) + 1) * 8;
332 srcx
= rand() % (tsrc
.width0
- width
+ 1) & ~0x7;
333 srcy
= rand() % (tsrc
.height0
- height
+ 1) & ~0x7;
335 dstx
= rand() % (tdst
.width0
- width
+ 1) & ~0x7;
336 dsty
= rand() % (tdst
.height0
- height
+ 1) & ~0x7;
338 /* just make sure that it doesn't divide by zero */
339 assert(max_width
> 0 && max_height
> 0);
341 width
= (rand() % max_width
) + 1;
342 height
= (rand() % max_height
) + 1;
344 srcx
= rand() % (tsrc
.width0
- width
+ 1);
345 srcy
= rand() % (tsrc
.height0
- height
+ 1);
347 dstx
= rand() % (tdst
.width0
- width
+ 1);
348 dsty
= rand() % (tdst
.height0
- height
+ 1);
351 /* special code path to hit out-of-bounds reads in L2T */
352 if (rsrc
->surface
.is_linear
&&
353 !rdst
->surface
.is_linear
&&
362 u_box_3d(srcx
, srcy
, srcz
, width
, height
, depth
, &box
);
363 rctx
->dma_copy(ctx
, dst
, 0, dstx
, dsty
, dstz
, src
, 0, &box
);
365 /* See which engine was used. */
366 gfx_blits
+= rctx
->num_draw_calls
> old_num_draw_calls
;
367 dma_blits
+= rctx
->num_dma_calls
> old_num_dma_calls
;
370 util_copy_box(dst_cpu
.ptr
, tdst
.format
, dst_cpu
.stride
,
371 dst_cpu
.layer_stride
,
372 dstx
, dsty
, dstz
, width
, height
, depth
,
373 src_cpu
.ptr
, src_cpu
.stride
,
374 src_cpu
.layer_stride
,
378 pass
= compare_textures(ctx
, dst
, &dst_cpu
, bpp
);
384 printf("BLITs: GFX = %2u, DMA = %2u, %s [%u/%u]\n",
385 gfx_blits
, dma_blits
, pass
? "pass" : "fail",
386 num_pass
, num_pass
+num_fail
);
389 pipe_resource_reference(&src
, NULL
);
390 pipe_resource_reference(&dst
, NULL
);