2 * Copyright 2015 Advanced Micro Devices, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 #include "util/u_memory.h"
26 #include "r600_query.h"
27 #include "radeonsi/si_pipe.h"
28 #include "amd/common/sid.h"
30 /* Max counters per HW block */
31 #define R600_QUERY_MAX_COUNTERS 16
33 static struct r600_perfcounter_block
*
34 lookup_counter(struct r600_perfcounters
*pc
, unsigned index
,
35 unsigned *base_gid
, unsigned *sub_index
)
37 struct r600_perfcounter_block
*block
= pc
->blocks
;
41 for (bid
= 0; bid
< pc
->num_blocks
; ++bid
, ++block
) {
42 unsigned total
= block
->num_groups
* block
->num_selectors
;
50 *base_gid
+= block
->num_groups
;
56 static struct r600_perfcounter_block
*
57 lookup_group(struct r600_perfcounters
*pc
, unsigned *index
)
60 struct r600_perfcounter_block
*block
= pc
->blocks
;
62 for (bid
= 0; bid
< pc
->num_blocks
; ++bid
, ++block
) {
63 if (*index
< block
->num_groups
)
65 *index
-= block
->num_groups
;
71 struct r600_pc_group
{
72 struct r600_pc_group
*next
;
73 struct r600_perfcounter_block
*block
;
74 unsigned sub_gid
; /* only used during init */
75 unsigned result_base
; /* only used during init */
78 unsigned num_counters
;
79 unsigned selectors
[R600_QUERY_MAX_COUNTERS
];
82 struct r600_pc_counter
{
85 unsigned stride
; /* in uint64s */
88 #define R600_PC_SHADERS_WINDOWING (1 << 31)
90 struct r600_query_pc
{
91 struct r600_query_hw b
;
94 unsigned num_counters
;
95 struct r600_pc_counter
*counters
;
96 struct r600_pc_group
*groups
;
99 static void r600_pc_query_destroy(struct si_screen
*sscreen
,
100 struct r600_query
*rquery
)
102 struct r600_query_pc
*query
= (struct r600_query_pc
*)rquery
;
104 while (query
->groups
) {
105 struct r600_pc_group
*group
= query
->groups
;
106 query
->groups
= group
->next
;
110 FREE(query
->counters
);
112 si_query_hw_destroy(sscreen
, rquery
);
115 static bool r600_pc_query_prepare_buffer(struct si_screen
*screen
,
116 struct r600_query_hw
*hwquery
,
117 struct r600_resource
*buffer
)
123 static void r600_pc_query_emit_start(struct si_context
*sctx
,
124 struct r600_query_hw
*hwquery
,
125 struct r600_resource
*buffer
, uint64_t va
)
127 struct r600_perfcounters
*pc
= sctx
->screen
->perfcounters
;
128 struct r600_query_pc
*query
= (struct r600_query_pc
*)hwquery
;
129 struct r600_pc_group
*group
;
131 int current_instance
= -1;
134 pc
->emit_shaders(sctx
, query
->shaders
);
136 for (group
= query
->groups
; group
; group
= group
->next
) {
137 struct r600_perfcounter_block
*block
= group
->block
;
139 if (group
->se
!= current_se
|| group
->instance
!= current_instance
) {
140 current_se
= group
->se
;
141 current_instance
= group
->instance
;
142 pc
->emit_instance(sctx
, group
->se
, group
->instance
);
145 pc
->emit_select(sctx
, block
, group
->num_counters
, group
->selectors
);
148 if (current_se
!= -1 || current_instance
!= -1)
149 pc
->emit_instance(sctx
, -1, -1);
151 pc
->emit_start(sctx
, buffer
, va
);
154 static void r600_pc_query_emit_stop(struct si_context
*sctx
,
155 struct r600_query_hw
*hwquery
,
156 struct r600_resource
*buffer
, uint64_t va
)
158 struct r600_perfcounters
*pc
= sctx
->screen
->perfcounters
;
159 struct r600_query_pc
*query
= (struct r600_query_pc
*)hwquery
;
160 struct r600_pc_group
*group
;
162 pc
->emit_stop(sctx
, buffer
, va
);
164 for (group
= query
->groups
; group
; group
= group
->next
) {
165 struct r600_perfcounter_block
*block
= group
->block
;
166 unsigned se
= group
->se
>= 0 ? group
->se
: 0;
167 unsigned se_end
= se
+ 1;
169 if ((block
->flags
& R600_PC_BLOCK_SE
) && (group
->se
< 0))
170 se_end
= sctx
->screen
->info
.max_se
;
173 unsigned instance
= group
->instance
>= 0 ? group
->instance
: 0;
176 pc
->emit_instance(sctx
, se
, instance
);
177 pc
->emit_read(sctx
, block
,
178 group
->num_counters
, group
->selectors
,
180 va
+= sizeof(uint64_t) * group
->num_counters
;
181 } while (group
->instance
< 0 && ++instance
< block
->num_instances
);
182 } while (++se
< se_end
);
185 pc
->emit_instance(sctx
, -1, -1);
188 static void r600_pc_query_clear_result(struct r600_query_hw
*hwquery
,
189 union pipe_query_result
*result
)
191 struct r600_query_pc
*query
= (struct r600_query_pc
*)hwquery
;
193 memset(result
, 0, sizeof(result
->batch
[0]) * query
->num_counters
);
196 static void r600_pc_query_add_result(struct si_screen
*sscreen
,
197 struct r600_query_hw
*hwquery
,
199 union pipe_query_result
*result
)
201 struct r600_query_pc
*query
= (struct r600_query_pc
*)hwquery
;
202 uint64_t *results
= buffer
;
205 for (i
= 0; i
< query
->num_counters
; ++i
) {
206 struct r600_pc_counter
*counter
= &query
->counters
[i
];
208 for (j
= 0; j
< counter
->qwords
; ++j
) {
209 uint32_t value
= results
[counter
->base
+ j
* counter
->stride
];
210 result
->batch
[i
].u64
+= value
;
215 static struct r600_query_ops batch_query_ops
= {
216 .destroy
= r600_pc_query_destroy
,
217 .begin
= si_query_hw_begin
,
218 .end
= si_query_hw_end
,
219 .get_result
= si_query_hw_get_result
222 static struct r600_query_hw_ops batch_query_hw_ops
= {
223 .prepare_buffer
= r600_pc_query_prepare_buffer
,
224 .emit_start
= r600_pc_query_emit_start
,
225 .emit_stop
= r600_pc_query_emit_stop
,
226 .clear_result
= r600_pc_query_clear_result
,
227 .add_result
= r600_pc_query_add_result
,
230 static struct r600_pc_group
*get_group_state(struct si_screen
*screen
,
231 struct r600_query_pc
*query
,
232 struct r600_perfcounter_block
*block
,
235 struct r600_pc_group
*group
= query
->groups
;
238 if (group
->block
== block
&& group
->sub_gid
== sub_gid
)
243 group
= CALLOC_STRUCT(r600_pc_group
);
247 group
->block
= block
;
248 group
->sub_gid
= sub_gid
;
250 if (block
->flags
& R600_PC_BLOCK_SHADER
) {
251 unsigned sub_gids
= block
->num_instances
;
254 unsigned query_shaders
;
256 if (block
->flags
& R600_PC_BLOCK_SE_GROUPS
)
257 sub_gids
= sub_gids
* screen
->info
.max_se
;
258 shader_id
= sub_gid
/ sub_gids
;
259 sub_gid
= sub_gid
% sub_gids
;
261 shaders
= screen
->perfcounters
->shader_type_bits
[shader_id
];
263 query_shaders
= query
->shaders
& ~R600_PC_SHADERS_WINDOWING
;
264 if (query_shaders
&& query_shaders
!= shaders
) {
265 fprintf(stderr
, "r600_perfcounter: incompatible shader groups\n");
269 query
->shaders
= shaders
;
272 if (block
->flags
& R600_PC_BLOCK_SHADER_WINDOWED
&& !query
->shaders
) {
273 // A non-zero value in query->shaders ensures that the shader
274 // masking is reset unless the user explicitly requests one.
275 query
->shaders
= R600_PC_SHADERS_WINDOWING
;
278 if (block
->flags
& R600_PC_BLOCK_SE_GROUPS
) {
279 group
->se
= sub_gid
/ block
->num_instances
;
280 sub_gid
= sub_gid
% block
->num_instances
;
285 if (block
->flags
& R600_PC_BLOCK_INSTANCE_GROUPS
) {
286 group
->instance
= sub_gid
;
288 group
->instance
= -1;
291 group
->next
= query
->groups
;
292 query
->groups
= group
;
297 struct pipe_query
*si_create_batch_query(struct pipe_context
*ctx
,
298 unsigned num_queries
,
299 unsigned *query_types
)
301 struct si_screen
*screen
=
302 (struct si_screen
*)ctx
->screen
;
303 struct r600_perfcounters
*pc
= screen
->perfcounters
;
304 struct r600_perfcounter_block
*block
;
305 struct r600_pc_group
*group
;
306 struct r600_query_pc
*query
;
307 unsigned base_gid
, sub_gid
, sub_index
;
313 query
= CALLOC_STRUCT(r600_query_pc
);
317 query
->b
.b
.ops
= &batch_query_ops
;
318 query
->b
.ops
= &batch_query_hw_ops
;
320 query
->num_counters
= num_queries
;
322 /* Collect selectors per group */
323 for (i
= 0; i
< num_queries
; ++i
) {
326 if (query_types
[i
] < R600_QUERY_FIRST_PERFCOUNTER
)
329 block
= lookup_counter(pc
, query_types
[i
] - R600_QUERY_FIRST_PERFCOUNTER
,
330 &base_gid
, &sub_index
);
334 sub_gid
= sub_index
/ block
->num_selectors
;
335 sub_index
= sub_index
% block
->num_selectors
;
337 group
= get_group_state(screen
, query
, block
, sub_gid
);
341 if (group
->num_counters
>= block
->num_counters
) {
343 "perfcounter group %s: too many selected\n",
347 group
->selectors
[group
->num_counters
] = sub_index
;
348 ++group
->num_counters
;
351 /* Compute result bases and CS size per group */
352 query
->b
.num_cs_dw_end
= pc
->num_stop_cs_dwords
;
353 query
->b
.num_cs_dw_end
+= pc
->num_instance_cs_dwords
;
356 for (group
= query
->groups
; group
; group
= group
->next
) {
357 struct r600_perfcounter_block
*block
= group
->block
;
359 unsigned instances
= 1;
361 if ((block
->flags
& R600_PC_BLOCK_SE
) && group
->se
< 0)
362 instances
= screen
->info
.max_se
;
363 if (group
->instance
< 0)
364 instances
*= block
->num_instances
;
366 group
->result_base
= i
;
367 query
->b
.result_size
+= sizeof(uint64_t) * instances
* group
->num_counters
;
368 i
+= instances
* group
->num_counters
;
370 read_dw
= 6 * group
->num_counters
;
371 query
->b
.num_cs_dw_end
+= instances
* read_dw
;
372 query
->b
.num_cs_dw_end
+= instances
* pc
->num_instance_cs_dwords
;
375 if (query
->shaders
) {
376 if (query
->shaders
== R600_PC_SHADERS_WINDOWING
)
377 query
->shaders
= 0xffffffff;
380 /* Map user-supplied query array to result indices */
381 query
->counters
= CALLOC(num_queries
, sizeof(*query
->counters
));
382 for (i
= 0; i
< num_queries
; ++i
) {
383 struct r600_pc_counter
*counter
= &query
->counters
[i
];
384 struct r600_perfcounter_block
*block
;
386 block
= lookup_counter(pc
, query_types
[i
] - R600_QUERY_FIRST_PERFCOUNTER
,
387 &base_gid
, &sub_index
);
389 sub_gid
= sub_index
/ block
->num_selectors
;
390 sub_index
= sub_index
% block
->num_selectors
;
392 group
= get_group_state(screen
, query
, block
, sub_gid
);
393 assert(group
!= NULL
);
395 for (j
= 0; j
< group
->num_counters
; ++j
) {
396 if (group
->selectors
[j
] == sub_index
)
400 counter
->base
= group
->result_base
+ j
;
401 counter
->stride
= group
->num_counters
;
404 if ((block
->flags
& R600_PC_BLOCK_SE
) && group
->se
< 0)
405 counter
->qwords
= screen
->info
.max_se
;
406 if (group
->instance
< 0)
407 counter
->qwords
*= block
->num_instances
;
410 if (!si_query_hw_init(screen
, &query
->b
))
413 return (struct pipe_query
*)query
;
416 r600_pc_query_destroy(screen
, &query
->b
.b
);
420 static bool r600_init_block_names(struct si_screen
*screen
,
421 struct r600_perfcounter_block
*block
)
424 unsigned groups_shader
= 1, groups_se
= 1, groups_instance
= 1;
429 if (block
->flags
& R600_PC_BLOCK_INSTANCE_GROUPS
)
430 groups_instance
= block
->num_instances
;
431 if (block
->flags
& R600_PC_BLOCK_SE_GROUPS
)
432 groups_se
= screen
->info
.max_se
;
433 if (block
->flags
& R600_PC_BLOCK_SHADER
)
434 groups_shader
= screen
->perfcounters
->num_shader_types
;
436 namelen
= strlen(block
->basename
);
437 block
->group_name_stride
= namelen
+ 1;
438 if (block
->flags
& R600_PC_BLOCK_SHADER
)
439 block
->group_name_stride
+= 3;
440 if (block
->flags
& R600_PC_BLOCK_SE_GROUPS
) {
441 assert(groups_se
<= 10);
442 block
->group_name_stride
+= 1;
444 if (block
->flags
& R600_PC_BLOCK_INSTANCE_GROUPS
)
445 block
->group_name_stride
+= 1;
447 if (block
->flags
& R600_PC_BLOCK_INSTANCE_GROUPS
) {
448 assert(groups_instance
<= 100);
449 block
->group_name_stride
+= 2;
452 block
->group_names
= MALLOC(block
->num_groups
* block
->group_name_stride
);
453 if (!block
->group_names
)
456 groupname
= block
->group_names
;
457 for (i
= 0; i
< groups_shader
; ++i
) {
458 const char *shader_suffix
= screen
->perfcounters
->shader_type_suffixes
[i
];
459 unsigned shaderlen
= strlen(shader_suffix
);
460 for (j
= 0; j
< groups_se
; ++j
) {
461 for (k
= 0; k
< groups_instance
; ++k
) {
462 strcpy(groupname
, block
->basename
);
463 p
= groupname
+ namelen
;
465 if (block
->flags
& R600_PC_BLOCK_SHADER
) {
466 strcpy(p
, shader_suffix
);
470 if (block
->flags
& R600_PC_BLOCK_SE_GROUPS
) {
471 p
+= sprintf(p
, "%d", j
);
472 if (block
->flags
& R600_PC_BLOCK_INSTANCE_GROUPS
)
476 if (block
->flags
& R600_PC_BLOCK_INSTANCE_GROUPS
)
477 p
+= sprintf(p
, "%d", k
);
479 groupname
+= block
->group_name_stride
;
484 assert(block
->num_selectors
<= 1000);
485 block
->selector_name_stride
= block
->group_name_stride
+ 4;
486 block
->selector_names
= MALLOC(block
->num_groups
* block
->num_selectors
*
487 block
->selector_name_stride
);
488 if (!block
->selector_names
)
491 groupname
= block
->group_names
;
492 p
= block
->selector_names
;
493 for (i
= 0; i
< block
->num_groups
; ++i
) {
494 for (j
= 0; j
< block
->num_selectors
; ++j
) {
495 sprintf(p
, "%s_%03d", groupname
, j
);
496 p
+= block
->selector_name_stride
;
498 groupname
+= block
->group_name_stride
;
504 int si_get_perfcounter_info(struct si_screen
*screen
,
506 struct pipe_driver_query_info
*info
)
508 struct r600_perfcounters
*pc
= screen
->perfcounters
;
509 struct r600_perfcounter_block
*block
;
510 unsigned base_gid
, sub
;
516 unsigned bid
, num_queries
= 0;
518 for (bid
= 0; bid
< pc
->num_blocks
; ++bid
) {
519 num_queries
+= pc
->blocks
[bid
].num_selectors
*
520 pc
->blocks
[bid
].num_groups
;
526 block
= lookup_counter(pc
, index
, &base_gid
, &sub
);
530 if (!block
->selector_names
) {
531 if (!r600_init_block_names(screen
, block
))
534 info
->name
= block
->selector_names
+ sub
* block
->selector_name_stride
;
535 info
->query_type
= R600_QUERY_FIRST_PERFCOUNTER
+ index
;
536 info
->max_value
.u64
= 0;
537 info
->type
= PIPE_DRIVER_QUERY_TYPE_UINT64
;
538 info
->result_type
= PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE
;
539 info
->group_id
= base_gid
+ sub
/ block
->num_selectors
;
540 info
->flags
= PIPE_DRIVER_QUERY_FLAG_BATCH
;
541 if (sub
> 0 && sub
+ 1 < block
->num_selectors
* block
->num_groups
)
542 info
->flags
|= PIPE_DRIVER_QUERY_FLAG_DONT_LIST
;
546 int si_get_perfcounter_group_info(struct si_screen
*screen
,
548 struct pipe_driver_query_group_info
*info
)
550 struct r600_perfcounters
*pc
= screen
->perfcounters
;
551 struct r600_perfcounter_block
*block
;
557 return pc
->num_groups
;
559 block
= lookup_group(pc
, &index
);
563 if (!block
->group_names
) {
564 if (!r600_init_block_names(screen
, block
))
567 info
->name
= block
->group_names
+ index
* block
->group_name_stride
;
568 info
->num_queries
= block
->num_selectors
;
569 info
->max_active_queries
= block
->num_counters
;
573 void si_perfcounters_destroy(struct si_screen
*sscreen
)
575 if (sscreen
->perfcounters
)
576 sscreen
->perfcounters
->cleanup(sscreen
);
579 bool si_perfcounters_init(struct r600_perfcounters
*pc
,
582 pc
->blocks
= CALLOC(num_blocks
, sizeof(struct r600_perfcounter_block
));
586 pc
->separate_se
= debug_get_bool_option("RADEON_PC_SEPARATE_SE", false);
587 pc
->separate_instance
= debug_get_bool_option("RADEON_PC_SEPARATE_INSTANCE", false);
592 void si_perfcounters_add_block(struct si_screen
*sscreen
,
593 struct r600_perfcounters
*pc
,
594 const char *name
, unsigned flags
,
595 unsigned counters
, unsigned selectors
,
596 unsigned instances
, void *data
)
598 struct r600_perfcounter_block
*block
= &pc
->blocks
[pc
->num_blocks
];
600 assert(counters
<= R600_QUERY_MAX_COUNTERS
);
602 block
->basename
= name
;
603 block
->flags
= flags
;
604 block
->num_counters
= counters
;
605 block
->num_selectors
= selectors
;
606 block
->num_instances
= MAX2(instances
, 1);
609 if (pc
->separate_se
&& (block
->flags
& R600_PC_BLOCK_SE
))
610 block
->flags
|= R600_PC_BLOCK_SE_GROUPS
;
611 if (pc
->separate_instance
&& block
->num_instances
> 1)
612 block
->flags
|= R600_PC_BLOCK_INSTANCE_GROUPS
;
614 if (block
->flags
& R600_PC_BLOCK_INSTANCE_GROUPS
) {
615 block
->num_groups
= block
->num_instances
;
617 block
->num_groups
= 1;
620 if (block
->flags
& R600_PC_BLOCK_SE_GROUPS
)
621 block
->num_groups
*= sscreen
->info
.max_se
;
622 if (block
->flags
& R600_PC_BLOCK_SHADER
)
623 block
->num_groups
*= pc
->num_shader_types
;
626 pc
->num_groups
+= block
->num_groups
;
629 void si_perfcounters_do_destroy(struct r600_perfcounters
*pc
)
633 for (i
= 0; i
< pc
->num_blocks
; ++i
) {
634 FREE(pc
->blocks
[i
].group_names
);
635 FREE(pc
->blocks
[i
].selector_names
);