2 * Copyright 2015 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 * Nicolai Hähnle <nicolai.haehnle@amd.com>
28 #include "util/u_memory.h"
29 #include "r600_query.h"
30 #include "r600_pipe_common.h"
31 #include "r600d_common.h"
33 /* Max counters per HW block */
34 #define R600_QUERY_MAX_COUNTERS 16
36 static const char * const r600_pc_shader_suffix
[] = {
37 "", "_PS", "_VS", "_GS", "_ES", "_HS", "_LS", "_CS"
40 static struct r600_perfcounter_block
*
41 lookup_counter(struct r600_perfcounters
*pc
, unsigned index
,
42 unsigned *base_gid
, unsigned *sub_index
)
44 struct r600_perfcounter_block
*block
= pc
->blocks
;
48 for (bid
= 0; bid
< pc
->num_blocks
; ++bid
, ++block
) {
49 unsigned total
= block
->num_groups
* block
->num_selectors
;
57 *base_gid
+= block
->num_groups
;
63 static struct r600_perfcounter_block
*
64 lookup_group(struct r600_perfcounters
*pc
, unsigned *index
)
67 struct r600_perfcounter_block
*block
= pc
->blocks
;
69 for (bid
= 0; bid
< pc
->num_blocks
; ++bid
, ++block
) {
70 if (*index
< block
->num_groups
)
72 *index
-= block
->num_groups
;
78 struct r600_pc_group
{
79 struct r600_pc_group
*next
;
80 struct r600_perfcounter_block
*block
;
81 unsigned sub_gid
; /* only used during init */
82 unsigned result_base
; /* only used during init */
85 unsigned num_counters
;
86 unsigned selectors
[R600_QUERY_MAX_COUNTERS
];
89 struct r600_pc_counter
{
95 struct r600_query_pc
{
96 struct r600_query_hw b
;
99 unsigned num_counters
;
100 struct r600_pc_counter
*counters
;
101 struct r600_pc_group
*groups
;
104 static void r600_pc_query_destroy(struct r600_common_context
*ctx
,
105 struct r600_query
*rquery
)
107 struct r600_query_pc
*query
= (struct r600_query_pc
*)rquery
;
109 while (query
->groups
) {
110 struct r600_pc_group
*group
= query
->groups
;
111 query
->groups
= group
->next
;
115 FREE(query
->counters
);
117 r600_query_hw_destroy(ctx
, rquery
);
120 static void r600_pc_query_emit_start(struct r600_common_context
*ctx
,
121 struct r600_query_hw
*hwquery
,
122 struct r600_resource
*buffer
, uint64_t va
)
124 struct r600_perfcounters
*pc
= ctx
->screen
->perfcounters
;
125 struct r600_query_pc
*query
= (struct r600_query_pc
*)hwquery
;
126 struct r600_pc_group
*group
;
128 int current_instance
= -1;
131 pc
->emit_shaders(ctx
, query
->shaders
);
133 for (group
= query
->groups
; group
; group
= group
->next
) {
134 struct r600_perfcounter_block
*block
= group
->block
;
136 if (group
->se
!= current_se
|| group
->instance
!= current_instance
) {
137 current_se
= group
->se
;
138 current_instance
= group
->instance
;
139 pc
->emit_instance(ctx
, group
->se
, group
->instance
);
142 pc
->emit_select(ctx
, block
, group
->num_counters
, group
->selectors
);
145 if (current_se
!= -1 || current_instance
!= -1)
146 pc
->emit_instance(ctx
, -1, -1);
148 pc
->emit_start(ctx
, buffer
, va
);
151 static void r600_pc_query_emit_stop(struct r600_common_context
*ctx
,
152 struct r600_query_hw
*hwquery
,
153 struct r600_resource
*buffer
, uint64_t va
)
155 struct r600_perfcounters
*pc
= ctx
->screen
->perfcounters
;
156 struct r600_query_pc
*query
= (struct r600_query_pc
*)hwquery
;
157 struct r600_pc_group
*group
;
159 pc
->emit_stop(ctx
, buffer
, va
);
161 for (group
= query
->groups
; group
; group
= group
->next
) {
162 struct r600_perfcounter_block
*block
= group
->block
;
163 unsigned se
= group
->se
>= 0 ? group
->se
: 0;
164 unsigned se_end
= se
+ 1;
166 if ((block
->flags
& R600_PC_BLOCK_SE
) && (group
->se
< 0))
167 se_end
= ctx
->screen
->info
.max_se
;
170 unsigned instance
= group
->instance
>= 0 ? group
->instance
: 0;
173 pc
->emit_instance(ctx
, se
, instance
);
174 pc
->emit_read(ctx
, block
,
175 group
->num_counters
, group
->selectors
,
177 va
+= 4 * group
->num_counters
;
178 } while (group
->instance
< 0 && ++instance
< block
->num_instances
);
179 } while (++se
< se_end
);
182 pc
->emit_instance(ctx
, -1, -1);
185 static void r600_pc_query_clear_result(struct r600_query_hw
*hwquery
,
186 union pipe_query_result
*result
)
188 struct r600_query_pc
*query
= (struct r600_query_pc
*)hwquery
;
190 memset(result
, 0, sizeof(result
->batch
[0]) * query
->num_counters
);
193 static void r600_pc_query_add_result(struct r600_common_context
*ctx
,
194 struct r600_query_hw
*hwquery
,
196 union pipe_query_result
*result
)
198 struct r600_query_pc
*query
= (struct r600_query_pc
*)hwquery
;
199 uint32_t *results
= buffer
;
202 for (i
= 0; i
< query
->num_counters
; ++i
) {
203 struct r600_pc_counter
*counter
= &query
->counters
[i
];
205 if (counter
->base
== ~0)
208 for (j
= 0; j
< counter
->dwords
; ++j
) {
209 uint32_t value
= results
[counter
->base
+ j
* counter
->stride
];
210 result
->batch
[i
].u32
+= value
;
215 static struct r600_query_ops batch_query_ops
= {
216 .destroy
= r600_pc_query_destroy
,
217 .begin
= r600_query_hw_begin
,
218 .end
= r600_query_hw_end
,
219 .get_result
= r600_query_hw_get_result
222 static struct r600_query_hw_ops batch_query_hw_ops
= {
223 .emit_start
= r600_pc_query_emit_start
,
224 .emit_stop
= r600_pc_query_emit_stop
,
225 .clear_result
= r600_pc_query_clear_result
,
226 .add_result
= r600_pc_query_add_result
,
229 static struct r600_pc_group
*get_group_state(struct r600_common_screen
*screen
,
230 struct r600_query_pc
*query
,
231 struct r600_perfcounter_block
*block
,
234 struct r600_pc_group
*group
= query
->groups
;
237 if (group
->block
== block
&& group
->sub_gid
== sub_gid
)
242 group
= CALLOC_STRUCT(r600_pc_group
);
246 group
->block
= block
;
247 group
->sub_gid
= sub_gid
;
249 if (block
->flags
& R600_PC_BLOCK_SHADER
) {
250 unsigned sub_gids
= block
->num_instances
;
252 unsigned shader_mask
;
253 unsigned query_shader_mask
;
255 if (block
->flags
& R600_PC_BLOCK_SE_GROUPS
)
256 sub_gids
= sub_gids
* screen
->info
.max_se
;
257 shader_id
= sub_gid
/ sub_gids
;
258 sub_gid
= sub_gid
% sub_gids
;
261 shader_mask
= R600_PC_SHADER_ALL
;
263 shader_mask
= 1 << (shader_id
- 1);
265 query_shader_mask
= query
->shaders
& R600_PC_SHADER_ALL
;
266 if (query_shader_mask
&& query_shader_mask
!= shader_mask
) {
267 fprintf(stderr
, "r600_perfcounter: incompatible shader groups\n");
271 query
->shaders
|= shader_mask
;
274 if (block
->flags
& R600_PC_BLOCK_SHADER_WINDOWED
) {
275 // A non-zero value in query->shaders ensures that the shader
276 // masking is reset unless the user explicitly requests one.
277 query
->shaders
|= R600_PC_SHADER_WINDOWING
;
280 if (block
->flags
& R600_PC_BLOCK_SE_GROUPS
) {
281 group
->se
= sub_gid
/ block
->num_instances
;
282 sub_gid
= sub_gid
% block
->num_instances
;
287 if (block
->flags
& R600_PC_BLOCK_INSTANCE_GROUPS
) {
288 group
->instance
= sub_gid
;
290 group
->instance
= -1;
293 group
->next
= query
->groups
;
294 query
->groups
= group
;
299 struct pipe_query
*r600_create_batch_query(struct pipe_context
*ctx
,
300 unsigned num_queries
,
301 unsigned *query_types
)
303 struct r600_common_context
*rctx
= (struct r600_common_context
*)ctx
;
304 struct r600_common_screen
*screen
= rctx
->screen
;
305 struct r600_perfcounters
*pc
= screen
->perfcounters
;
306 struct r600_perfcounter_block
*block
;
307 struct r600_pc_group
*group
;
308 struct r600_query_pc
*query
;
309 unsigned base_gid
, sub_gid
, sub_index
;
315 query
= CALLOC_STRUCT(r600_query_pc
);
319 query
->b
.b
.ops
= &batch_query_ops
;
320 query
->b
.ops
= &batch_query_hw_ops
;
321 query
->b
.flags
= R600_QUERY_HW_FLAG_TIMER
;
323 query
->num_counters
= num_queries
;
325 /* Collect selectors per group */
326 for (i
= 0; i
< num_queries
; ++i
) {
329 if (query_types
[i
] < R600_QUERY_FIRST_PERFCOUNTER
)
332 block
= lookup_counter(pc
, query_types
[i
] - R600_QUERY_FIRST_PERFCOUNTER
,
333 &base_gid
, &sub_index
);
337 sub_gid
= sub_index
/ block
->num_selectors
;
338 sub_index
= sub_index
% block
->num_selectors
;
340 group
= get_group_state(screen
, query
, block
, sub_gid
);
344 if (group
->num_counters
>= block
->num_counters
) {
346 "perfcounter group %s: too many selected\n",
350 group
->selectors
[group
->num_counters
] = sub_index
;
351 ++group
->num_counters
;
354 /* Compute result bases and CS size per group */
355 query
->b
.num_cs_dw_begin
= pc
->num_start_cs_dwords
;
356 query
->b
.num_cs_dw_end
= pc
->num_stop_cs_dwords
;
358 query
->b
.num_cs_dw_begin
+= pc
->num_instance_cs_dwords
; /* conservative */
359 query
->b
.num_cs_dw_end
+= pc
->num_instance_cs_dwords
;
362 for (group
= query
->groups
; group
; group
= group
->next
) {
363 struct r600_perfcounter_block
*block
= group
->block
;
364 unsigned select_dw
, read_dw
;
365 unsigned instances
= 1;
367 if ((block
->flags
& R600_PC_BLOCK_SE
) && group
->se
< 0)
368 instances
= rctx
->screen
->info
.max_se
;
369 if (group
->instance
< 0)
370 instances
*= block
->num_instances
;
372 group
->result_base
= i
;
373 query
->b
.result_size
+= 4 * instances
* group
->num_counters
;
374 i
+= instances
* group
->num_counters
;
376 pc
->get_size(block
, group
->num_counters
, group
->selectors
,
377 &select_dw
, &read_dw
);
378 query
->b
.num_cs_dw_begin
+= select_dw
;
379 query
->b
.num_cs_dw_end
+= instances
* read_dw
;
380 query
->b
.num_cs_dw_begin
+= pc
->num_instance_cs_dwords
; /* conservative */
381 query
->b
.num_cs_dw_end
+= instances
* pc
->num_instance_cs_dwords
;
384 if (query
->shaders
) {
385 if ((query
->shaders
& R600_PC_SHADER_ALL
) == 0)
386 query
->shaders
|= R600_PC_SHADER_ALL
;
387 query
->b
.num_cs_dw_begin
+= pc
->num_shaders_cs_dwords
;
390 /* Map user-supplied query array to result indices */
391 query
->counters
= CALLOC(num_queries
, sizeof(*query
->counters
));
392 for (i
= 0; i
< num_queries
; ++i
) {
393 struct r600_pc_counter
*counter
= &query
->counters
[i
];
394 struct r600_perfcounter_block
*block
;
396 block
= lookup_counter(pc
, query_types
[i
] - R600_QUERY_FIRST_PERFCOUNTER
,
397 &base_gid
, &sub_index
);
399 sub_gid
= sub_index
/ block
->num_selectors
;
400 sub_index
= sub_index
% block
->num_selectors
;
402 group
= get_group_state(screen
, query
, block
, sub_gid
);
403 assert(group
!= NULL
);
405 for (j
= 0; j
< group
->num_counters
; ++j
) {
406 if (group
->selectors
[j
] == sub_index
)
410 counter
->base
= group
->result_base
+ j
;
411 counter
->stride
= group
->num_counters
;
414 if ((block
->flags
& R600_PC_BLOCK_SE
) && group
->se
< 0)
415 counter
->dwords
= screen
->info
.max_se
;
416 if (group
->instance
< 0)
417 counter
->dwords
*= block
->num_instances
;
420 if (!r600_query_hw_init(rctx
, &query
->b
))
423 return (struct pipe_query
*)query
;
426 r600_pc_query_destroy(rctx
, &query
->b
.b
);
430 int r600_get_perfcounter_info(struct r600_common_screen
*screen
,
432 struct pipe_driver_query_info
*info
)
434 struct r600_perfcounters
*pc
= screen
->perfcounters
;
435 struct r600_perfcounter_block
*block
;
436 unsigned base_gid
, sub
;
442 unsigned bid
, num_queries
= 0;
444 for (bid
= 0; bid
< pc
->num_blocks
; ++bid
) {
445 num_queries
+= pc
->blocks
[bid
].num_selectors
*
446 pc
->blocks
[bid
].num_groups
;
452 block
= lookup_counter(pc
, index
, &base_gid
, &sub
);
456 info
->name
= block
->selector_names
+ sub
* block
->selector_name_stride
;
457 info
->query_type
= R600_QUERY_FIRST_PERFCOUNTER
+ index
;
458 info
->max_value
.u64
= 0;
459 info
->type
= PIPE_DRIVER_QUERY_TYPE_UINT
;
460 info
->result_type
= PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE
;
461 info
->group_id
= base_gid
+ sub
/ block
->num_selectors
;
462 info
->flags
= PIPE_DRIVER_QUERY_FLAG_BATCH
;
466 int r600_get_perfcounter_group_info(struct r600_common_screen
*screen
,
468 struct pipe_driver_query_group_info
*info
)
470 struct r600_perfcounters
*pc
= screen
->perfcounters
;
471 struct r600_perfcounter_block
*block
;
477 return pc
->num_groups
;
479 block
= lookup_group(pc
, &index
);
482 info
->name
= block
->group_names
+ index
* block
->group_name_stride
;
483 info
->num_queries
= block
->num_selectors
;
484 info
->max_active_queries
= block
->num_counters
;
488 void r600_perfcounters_destroy(struct r600_common_screen
*rscreen
)
490 if (rscreen
->perfcounters
)
491 rscreen
->perfcounters
->cleanup(rscreen
);
494 boolean
r600_perfcounters_init(struct r600_perfcounters
*pc
,
497 pc
->blocks
= CALLOC(num_blocks
, sizeof(struct r600_perfcounter_block
));
501 pc
->separate_se
= debug_get_bool_option("RADEON_PC_SEPARATE_SE", FALSE
);
502 pc
->separate_instance
= debug_get_bool_option("RADEON_PC_SEPARATE_INSTANCE", FALSE
);
507 boolean
r600_perfcounters_add_block(struct r600_common_screen
*rscreen
,
508 struct r600_perfcounters
*pc
,
509 const char *name
, unsigned flags
,
510 unsigned counters
, unsigned selectors
,
511 unsigned instances
, void *data
)
513 struct r600_perfcounter_block
*block
= &pc
->blocks
[pc
->num_blocks
];
515 unsigned groups_shader
= 1, groups_se
= 1, groups_instance
= 1;
520 assert(counters
<= R600_QUERY_MAX_COUNTERS
);
522 block
->basename
= name
;
523 block
->flags
= flags
;
524 block
->num_counters
= counters
;
525 block
->num_selectors
= selectors
;
526 block
->num_instances
= MAX2(instances
, 1);
529 if (pc
->separate_se
&& (block
->flags
& R600_PC_BLOCK_SE
))
530 block
->flags
|= R600_PC_BLOCK_SE_GROUPS
;
531 if (pc
->separate_instance
&& block
->num_instances
> 1)
532 block
->flags
|= R600_PC_BLOCK_INSTANCE_GROUPS
;
534 if (block
->flags
& R600_PC_BLOCK_INSTANCE_GROUPS
) {
535 groups_instance
= block
->num_instances
;
536 block
->num_groups
= groups_instance
;
538 block
->num_groups
= 1;
541 if (block
->flags
& R600_PC_BLOCK_SE_GROUPS
) {
542 groups_se
= rscreen
->info
.max_se
;
543 block
->num_groups
*= groups_se
;
546 if (block
->flags
& R600_PC_BLOCK_SHADER
) {
547 groups_shader
= ARRAY_SIZE(r600_pc_shader_suffix
);
548 block
->num_groups
*= groups_shader
;
551 namelen
= strlen(name
);
552 block
->group_name_stride
= namelen
+ 1;
553 if (block
->flags
& R600_PC_BLOCK_SHADER
)
554 block
->group_name_stride
+= 3;
555 if (block
->flags
& R600_PC_BLOCK_SE_GROUPS
) {
556 assert(groups_se
<= 10);
557 block
->group_name_stride
+= 1;
559 if (block
->flags
& R600_PC_BLOCK_INSTANCE_GROUPS
)
560 block
->group_name_stride
+= 1;
562 if (block
->flags
& R600_PC_BLOCK_INSTANCE_GROUPS
) {
563 assert(groups_instance
<= 100);
564 block
->group_name_stride
+= 2;
567 block
->group_names
= MALLOC(block
->num_groups
* block
->group_name_stride
);
568 if (!block
->group_names
)
571 groupname
= block
->group_names
;
572 for (i
= 0; i
< groups_shader
; ++i
) {
573 unsigned shaderlen
= strlen(r600_pc_shader_suffix
[i
]);
574 for (j
= 0; j
< groups_se
; ++j
) {
575 for (k
= 0; k
< groups_instance
; ++k
) {
576 strcpy(groupname
, name
);
577 p
= groupname
+ namelen
;
579 if (block
->flags
& R600_PC_BLOCK_SHADER
) {
580 strcpy(p
, r600_pc_shader_suffix
[i
]);
584 if (block
->flags
& R600_PC_BLOCK_SE_GROUPS
) {
585 p
+= sprintf(p
, "%d", j
);
586 if (block
->flags
& R600_PC_BLOCK_INSTANCE_GROUPS
)
590 if (block
->flags
& R600_PC_BLOCK_INSTANCE_GROUPS
)
591 p
+= sprintf(p
, "%d", k
);
593 groupname
+= block
->group_name_stride
;
598 assert(selectors
<= 1000);
599 block
->selector_name_stride
= block
->group_name_stride
+ 4;
600 block
->selector_names
= MALLOC(block
->num_groups
* selectors
*
601 block
->selector_name_stride
);
602 if (!block
->selector_names
)
603 goto error_groupnames
;
605 groupname
= block
->group_names
;
606 p
= block
->selector_names
;
607 for (i
= 0; i
< block
->num_groups
; ++i
) {
608 for (j
= 0; j
< selectors
; ++j
) {
609 sprintf(p
, "%s_%03d", groupname
, j
);
610 p
+= block
->selector_name_stride
;
612 groupname
+= block
->group_name_stride
;
616 pc
->num_groups
+= block
->num_groups
;
621 FREE(block
->group_names
);
626 void r600_perfcounters_do_destroy(struct r600_perfcounters
*pc
)
630 for (i
= 0; i
< pc
->num_blocks
; ++i
) {
631 FREE(pc
->blocks
[i
].group_names
);
632 FREE(pc
->blocks
[i
].selector_names
);