2 * Copyright 2016 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 /* This file implements randomized SDMA texture blit tests. */
27 #include "r600_pipe_common.h"
28 #include "util/u_surface.h"
30 static uint64_t seed_xorshift128plus
[2];
32 /* Super fast random number generator.
34 * This rand_xorshift128plus function by Sebastiano Vigna belongs
35 * to the public domain.
37 static uint64_t rand_xorshift128plus(void)
39 uint64_t *s
= seed_xorshift128plus
;
42 const uint64_t s0
= s
[1];
45 s
[1] = s1
^ s0
^ (s1
>> 18) ^ (s0
>> 5);
49 #define RAND_NUM_SIZE 8
51 /* The GPU blits are emulated on the CPU using these CPU textures. */
56 uint64_t layer_stride
;
60 static void alloc_cpu_texture(struct cpu_texture
*tex
,
61 struct pipe_resource
*templ
, int bpp
)
63 tex
->stride
= align(templ
->width0
* bpp
, RAND_NUM_SIZE
);
64 tex
->layer_stride
= (uint64_t)tex
->stride
* templ
->height0
;
65 tex
->size
= tex
->layer_stride
* templ
->array_size
;
66 tex
->ptr
= malloc(tex
->size
);
70 static void set_random_pixels(struct pipe_context
*ctx
,
71 struct pipe_resource
*tex
,
72 struct cpu_texture
*cpu
)
74 struct pipe_transfer
*t
;
78 map
= pipe_transfer_map_3d(ctx
, tex
, 0, PIPE_TRANSFER_WRITE
,
79 0, 0, 0, tex
->width0
, tex
->height0
,
83 for (z
= 0; z
< tex
->array_size
; z
++) {
84 for (y
= 0; y
< tex
->height0
; y
++) {
85 uint64_t *ptr
= (uint64_t*)
86 (map
+ t
->layer_stride
*z
+ t
->stride
*y
);
87 uint64_t *ptr_cpu
= (uint64_t*)
88 (cpu
->ptr
+ cpu
->layer_stride
*z
+ cpu
->stride
*y
);
89 unsigned size
= cpu
->stride
/ RAND_NUM_SIZE
;
91 assert(t
->stride
% RAND_NUM_SIZE
== 0);
92 assert(cpu
->stride
% RAND_NUM_SIZE
== 0);
94 for (x
= 0; x
< size
; x
++)
95 *ptr
++ = *ptr_cpu
++ = rand_xorshift128plus();
99 pipe_transfer_unmap(ctx
, t
);
102 static bool compare_textures(struct pipe_context
*ctx
,
103 struct pipe_resource
*tex
,
104 struct cpu_texture
*cpu
, int bpp
)
106 struct pipe_transfer
*t
;
111 map
= pipe_transfer_map_3d(ctx
, tex
, 0, PIPE_TRANSFER_READ
,
112 0, 0, 0, tex
->width0
, tex
->height0
,
113 tex
->array_size
, &t
);
116 for (z
= 0; z
< tex
->array_size
; z
++) {
117 for (y
= 0; y
< tex
->height0
; y
++) {
118 uint8_t *ptr
= map
+ t
->layer_stride
*z
+ t
->stride
*y
;
119 uint8_t *cpu_ptr
= cpu
->ptr
+
120 cpu
->layer_stride
*z
+ cpu
->stride
*y
;
122 if (memcmp(ptr
, cpu_ptr
, tex
->width0
* bpp
)) {
129 pipe_transfer_unmap(ctx
, t
);
133 static enum pipe_format
get_format_from_bpp(int bpp
)
137 return PIPE_FORMAT_R8_UINT
;
139 return PIPE_FORMAT_R16_UINT
;
141 return PIPE_FORMAT_R32_UINT
;
143 return PIPE_FORMAT_R32G32_UINT
;
145 return PIPE_FORMAT_R32G32B32A32_UINT
;
148 return PIPE_FORMAT_NONE
;
152 static const char *array_mode_to_string(unsigned mode
)
155 case RADEON_SURF_MODE_LINEAR_ALIGNED
:
156 return "LINEAR_ALIGNED";
157 case RADEON_SURF_MODE_1D
:
158 return "1D_TILED_THIN1";
159 case RADEON_SURF_MODE_2D
:
160 return "2D_TILED_THIN1";
167 static unsigned generate_max_tex_side(unsigned max_tex_side
)
169 switch (rand() % 4) {
171 /* Try to hit large sizes in 1/4 of the cases. */
174 /* Try to hit 1D tiling in 1/4 of the cases. */
177 /* Try to hit common sizes in 2/4 of the cases. */
182 void r600_test_dma(struct r600_common_screen
*rscreen
)
184 struct pipe_screen
*screen
= &rscreen
->b
;
185 struct pipe_context
*ctx
= screen
->context_create(screen
, NULL
, 0);
186 struct r600_common_context
*rctx
= (struct r600_common_context
*)ctx
;
187 uint64_t max_alloc_size
;
188 unsigned i
, iterations
, num_partial_copies
, max_levels
, max_tex_side
;
189 unsigned num_pass
= 0, num_fail
= 0;
191 max_levels
= screen
->get_param(screen
, PIPE_CAP_MAX_TEXTURE_2D_LEVELS
);
192 max_tex_side
= 1 << (max_levels
- 1);
194 /* Max 128 MB allowed for both textures. */
195 max_alloc_size
= 128 * 1024 * 1024;
197 /* the seed for random test parameters */
199 /* the seed for random pixel data */
200 seed_xorshift128plus
[0] = 0x3bffb83978e24f88;
201 seed_xorshift128plus
[1] = 0x9238d5d56c71cd35;
203 iterations
= 1000000000; /* just kill it when you are bored */
204 num_partial_copies
= 30;
206 /* These parameters are randomly generated per test:
207 * - whether to do one whole-surface copy or N partial copies per test
208 * - which tiling modes to use (LINEAR_ALIGNED, 1D, 2D)
209 * - which texture dimensions to use
210 * - whether to use VRAM (all tiling modes) and GTT (staging, linear
212 * - random initial pixels in src
213 * - generate random subrectangle copies for partial blits
215 for (i
= 0; i
< iterations
; i
++) {
216 struct pipe_resource tsrc
= {}, tdst
= {}, *src
, *dst
;
217 struct r600_texture
*rdst
;
218 struct r600_texture
*rsrc
;
219 struct cpu_texture src_cpu
, dst_cpu
;
220 unsigned bpp
, max_width
, max_height
, max_depth
, j
, num
;
221 unsigned gfx_blits
= 0, dma_blits
= 0, max_tex_side_gen
;
222 unsigned max_tex_layers
;
224 bool do_partial_copies
= rand() & 1;
226 /* generate a random test case */
227 tsrc
.target
= tdst
.target
= PIPE_TEXTURE_2D_ARRAY
;
228 tsrc
.depth0
= tdst
.depth0
= 1;
230 bpp
= 1 << (rand() % 5);
231 tsrc
.format
= tdst
.format
= get_format_from_bpp(bpp
);
233 max_tex_side_gen
= generate_max_tex_side(max_tex_side
);
234 max_tex_layers
= rand() % 4 ? 1 : 5;
236 tsrc
.width0
= (rand() % max_tex_side_gen
) + 1;
237 tsrc
.height0
= (rand() % max_tex_side_gen
) + 1;
238 tsrc
.array_size
= (rand() % max_tex_layers
) + 1;
240 /* Have a 1/4 chance of getting power-of-two dimensions. */
241 if (rand() % 4 == 0) {
242 tsrc
.width0
= util_next_power_of_two(tsrc
.width0
);
243 tsrc
.height0
= util_next_power_of_two(tsrc
.height0
);
246 if (!do_partial_copies
) {
247 /* whole-surface copies only, same dimensions */
250 max_tex_side_gen
= generate_max_tex_side(max_tex_side
);
251 max_tex_layers
= rand() % 4 ? 1 : 5;
253 /* many partial copies, dimensions can be different */
254 tdst
.width0
= (rand() % max_tex_side_gen
) + 1;
255 tdst
.height0
= (rand() % max_tex_side_gen
) + 1;
256 tdst
.array_size
= (rand() % max_tex_layers
) + 1;
258 /* Have a 1/4 chance of getting power-of-two dimensions. */
259 if (rand() % 4 == 0) {
260 tdst
.width0
= util_next_power_of_two(tdst
.width0
);
261 tdst
.height0
= util_next_power_of_two(tdst
.height0
);
265 /* check texture sizes */
266 if ((uint64_t)tsrc
.width0
* tsrc
.height0
* tsrc
.array_size
* bpp
+
267 (uint64_t)tdst
.width0
* tdst
.height0
* tdst
.array_size
* bpp
>
269 /* too large, try again */
274 /* VRAM + the tiling mode depends on dimensions (3/4 of cases),
275 * or GTT + linear only (1/4 of cases)
277 tsrc
.usage
= rand() % 4 ? PIPE_USAGE_DEFAULT
: PIPE_USAGE_STAGING
;
278 tdst
.usage
= rand() % 4 ? PIPE_USAGE_DEFAULT
: PIPE_USAGE_STAGING
;
280 /* Allocate textures (both the GPU and CPU copies).
281 * The CPU will emulate what the GPU should be doing.
283 src
= screen
->resource_create(screen
, &tsrc
);
284 dst
= screen
->resource_create(screen
, &tdst
);
287 rdst
= (struct r600_texture
*)dst
;
288 rsrc
= (struct r600_texture
*)src
;
289 alloc_cpu_texture(&src_cpu
, &tsrc
, bpp
);
290 alloc_cpu_texture(&dst_cpu
, &tdst
, bpp
);
292 printf("%4u: dst = (%5u x %5u x %u, %s), "
293 " src = (%5u x %5u x %u, %s), bpp = %2u, ",
294 i
, tdst
.width0
, tdst
.height0
, tdst
.array_size
,
295 array_mode_to_string(rdst
->surface
.level
[0].mode
),
296 tsrc
.width0
, tsrc
.height0
, tsrc
.array_size
,
297 array_mode_to_string(rsrc
->surface
.level
[0].mode
), bpp
);
301 set_random_pixels(ctx
, src
, &src_cpu
);
303 /* clear dst pixels */
304 rctx
->clear_buffer(ctx
, dst
, 0, rdst
->surface
.surf_size
, 0, true);
305 memset(dst_cpu
.ptr
, 0, dst_cpu
.layer_stride
* tdst
.array_size
);
308 max_width
= MIN2(tsrc
.width0
, tdst
.width0
);
309 max_height
= MIN2(tsrc
.height0
, tdst
.height0
);
310 max_depth
= MIN2(tsrc
.array_size
, tdst
.array_size
);
312 num
= do_partial_copies
? num_partial_copies
: 1;
313 for (j
= 0; j
< num
; j
++) {
314 int width
, height
, depth
;
315 int srcx
, srcy
, srcz
, dstx
, dsty
, dstz
;
317 unsigned old_num_draw_calls
= rctx
->num_draw_calls
;
318 unsigned old_num_dma_calls
= rctx
->num_dma_calls
;
320 if (!do_partial_copies
) {
321 /* copy whole src to dst */
326 srcx
= srcy
= srcz
= dstx
= dsty
= dstz
= 0;
328 /* random sub-rectangle copies from src to dst */
329 depth
= (rand() % max_depth
) + 1;
330 srcz
= rand() % (tsrc
.array_size
- depth
+ 1);
331 dstz
= rand() % (tdst
.array_size
- depth
+ 1);
333 /* special code path to hit the tiled partial copies */
334 if (rsrc
->surface
.level
[0].mode
>= RADEON_SURF_MODE_1D
&&
335 rdst
->surface
.level
[0].mode
>= RADEON_SURF_MODE_1D
&&
337 if (max_width
< 8 || max_height
< 8)
339 width
= ((rand() % (max_width
/ 8)) + 1) * 8;
340 height
= ((rand() % (max_height
/ 8)) + 1) * 8;
342 srcx
= rand() % (tsrc
.width0
- width
+ 1) & ~0x7;
343 srcy
= rand() % (tsrc
.height0
- height
+ 1) & ~0x7;
345 dstx
= rand() % (tdst
.width0
- width
+ 1) & ~0x7;
346 dsty
= rand() % (tdst
.height0
- height
+ 1) & ~0x7;
348 /* just make sure that it doesn't divide by zero */
349 assert(max_width
> 0 && max_height
> 0);
351 width
= (rand() % max_width
) + 1;
352 height
= (rand() % max_height
) + 1;
354 srcx
= rand() % (tsrc
.width0
- width
+ 1);
355 srcy
= rand() % (tsrc
.height0
- height
+ 1);
357 dstx
= rand() % (tdst
.width0
- width
+ 1);
358 dsty
= rand() % (tdst
.height0
- height
+ 1);
361 /* special code path to hit out-of-bounds reads in L2T */
362 if (rsrc
->surface
.level
[0].mode
== RADEON_SURF_MODE_LINEAR_ALIGNED
&&
363 rdst
->surface
.level
[0].mode
>= RADEON_SURF_MODE_1D
&&
372 u_box_3d(srcx
, srcy
, srcz
, width
, height
, depth
, &box
);
373 rctx
->dma_copy(ctx
, dst
, 0, dstx
, dsty
, dstz
, src
, 0, &box
);
375 /* See which engine was used. */
376 gfx_blits
+= rctx
->num_draw_calls
> old_num_draw_calls
;
377 dma_blits
+= rctx
->num_dma_calls
> old_num_dma_calls
;
380 util_copy_box(dst_cpu
.ptr
, tdst
.format
, dst_cpu
.stride
,
381 dst_cpu
.layer_stride
,
382 dstx
, dsty
, dstz
, width
, height
, depth
,
383 src_cpu
.ptr
, src_cpu
.stride
,
384 src_cpu
.layer_stride
,
388 pass
= compare_textures(ctx
, dst
, &dst_cpu
, bpp
);
394 printf("BLITs: GFX = %2u, DMA = %2u, %s [%u/%u]\n",
395 gfx_blits
, dma_blits
, pass
? "pass" : "fail",
396 num_pass
, num_pass
+num_fail
);
399 pipe_resource_reference(&src
, NULL
);
400 pipe_resource_reference(&dst
, NULL
);