radeonsi: Add option for SI scheduler
[mesa.git] / src / gallium / drivers / radeon / r600_perfcounter.c
1 /*
2 * Copyright 2015 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Nicolai Hähnle <nicolai.haehnle@amd.com>
25 *
26 */
27
28 #include "util/u_memory.h"
29 #include "r600_query.h"
30 #include "r600_pipe_common.h"
31 #include "r600d_common.h"
32
33 /* Max counters per HW block */
34 #define R600_QUERY_MAX_COUNTERS 16
35
36 static const char * const r600_pc_shader_suffix[] = {
37 "", "_PS", "_VS", "_GS", "_ES", "_HS", "_LS", "_CS"
38 };
39
40 static struct r600_perfcounter_block *
41 lookup_counter(struct r600_perfcounters *pc, unsigned index,
42 unsigned *base_gid, unsigned *sub_index)
43 {
44 struct r600_perfcounter_block *block = pc->blocks;
45 unsigned bid;
46
47 *base_gid = 0;
48 for (bid = 0; bid < pc->num_blocks; ++bid, ++block) {
49 unsigned total = block->num_groups * block->num_selectors;
50
51 if (index < total) {
52 *sub_index = index;
53 return block;
54 }
55
56 index -= total;
57 *base_gid += block->num_groups;
58 }
59
60 return NULL;
61 }
62
63 static struct r600_perfcounter_block *
64 lookup_group(struct r600_perfcounters *pc, unsigned *index)
65 {
66 unsigned bid;
67 struct r600_perfcounter_block *block = pc->blocks;
68
69 for (bid = 0; bid < pc->num_blocks; ++bid, ++block) {
70 if (*index < block->num_groups)
71 return block;
72 *index -= block->num_groups;
73 }
74
75 return NULL;
76 }
77
78 struct r600_pc_group {
79 struct r600_pc_group *next;
80 struct r600_perfcounter_block *block;
81 unsigned sub_gid; /* only used during init */
82 unsigned result_base; /* only used during init */
83 int se;
84 int instance;
85 unsigned num_counters;
86 unsigned selectors[R600_QUERY_MAX_COUNTERS];
87 };
88
89 struct r600_pc_counter {
90 unsigned base;
91 unsigned dwords;
92 unsigned stride;
93 };
94
95 struct r600_query_pc {
96 struct r600_query_hw b;
97
98 unsigned shaders;
99 unsigned num_counters;
100 struct r600_pc_counter *counters;
101 struct r600_pc_group *groups;
102 };
103
104 static void r600_pc_query_destroy(struct r600_common_context *ctx,
105 struct r600_query *rquery)
106 {
107 struct r600_query_pc *query = (struct r600_query_pc *)rquery;
108
109 while (query->groups) {
110 struct r600_pc_group *group = query->groups;
111 query->groups = group->next;
112 FREE(group);
113 }
114
115 FREE(query->counters);
116
117 r600_query_hw_destroy(ctx, rquery);
118 }
119
120 static void r600_pc_query_emit_start(struct r600_common_context *ctx,
121 struct r600_query_hw *hwquery,
122 struct r600_resource *buffer, uint64_t va)
123 {
124 struct r600_perfcounters *pc = ctx->screen->perfcounters;
125 struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
126 struct r600_pc_group *group;
127 int current_se = -1;
128 int current_instance = -1;
129
130 if (query->shaders)
131 pc->emit_shaders(ctx, query->shaders);
132
133 for (group = query->groups; group; group = group->next) {
134 struct r600_perfcounter_block *block = group->block;
135
136 if (group->se != current_se || group->instance != current_instance) {
137 current_se = group->se;
138 current_instance = group->instance;
139 pc->emit_instance(ctx, group->se, group->instance);
140 }
141
142 pc->emit_select(ctx, block, group->num_counters, group->selectors);
143 }
144
145 if (current_se != -1 || current_instance != -1)
146 pc->emit_instance(ctx, -1, -1);
147
148 pc->emit_start(ctx, buffer, va);
149 }
150
151 static void r600_pc_query_emit_stop(struct r600_common_context *ctx,
152 struct r600_query_hw *hwquery,
153 struct r600_resource *buffer, uint64_t va)
154 {
155 struct r600_perfcounters *pc = ctx->screen->perfcounters;
156 struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
157 struct r600_pc_group *group;
158
159 pc->emit_stop(ctx, buffer, va);
160
161 for (group = query->groups; group; group = group->next) {
162 struct r600_perfcounter_block *block = group->block;
163 unsigned se = group->se >= 0 ? group->se : 0;
164 unsigned se_end = se + 1;
165
166 if ((block->flags & R600_PC_BLOCK_SE) && (group->se < 0))
167 se_end = ctx->screen->info.max_se;
168
169 do {
170 unsigned instance = group->instance >= 0 ? group->instance : 0;
171
172 do {
173 pc->emit_instance(ctx, se, instance);
174 pc->emit_read(ctx, block,
175 group->num_counters, group->selectors,
176 buffer, va);
177 va += 4 * group->num_counters;
178 } while (group->instance < 0 && ++instance < block->num_instances);
179 } while (++se < se_end);
180 }
181
182 pc->emit_instance(ctx, -1, -1);
183 }
184
185 static void r600_pc_query_clear_result(struct r600_query_hw *hwquery,
186 union pipe_query_result *result)
187 {
188 struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
189
190 memset(result, 0, sizeof(result->batch[0]) * query->num_counters);
191 }
192
193 static void r600_pc_query_add_result(struct r600_common_context *ctx,
194 struct r600_query_hw *hwquery,
195 void *buffer,
196 union pipe_query_result *result)
197 {
198 struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
199 uint32_t *results = buffer;
200 unsigned i, j;
201
202 for (i = 0; i < query->num_counters; ++i) {
203 struct r600_pc_counter *counter = &query->counters[i];
204
205 for (j = 0; j < counter->dwords; ++j) {
206 uint32_t value = results[counter->base + j * counter->stride];
207 result->batch[i].u32 += value;
208 }
209 }
210 }
211
212 static struct r600_query_ops batch_query_ops = {
213 .destroy = r600_pc_query_destroy,
214 .begin = r600_query_hw_begin,
215 .end = r600_query_hw_end,
216 .get_result = r600_query_hw_get_result
217 };
218
219 static struct r600_query_hw_ops batch_query_hw_ops = {
220 .emit_start = r600_pc_query_emit_start,
221 .emit_stop = r600_pc_query_emit_stop,
222 .clear_result = r600_pc_query_clear_result,
223 .add_result = r600_pc_query_add_result,
224 };
225
226 static struct r600_pc_group *get_group_state(struct r600_common_screen *screen,
227 struct r600_query_pc *query,
228 struct r600_perfcounter_block *block,
229 unsigned sub_gid)
230 {
231 struct r600_pc_group *group = query->groups;
232
233 while (group) {
234 if (group->block == block && group->sub_gid == sub_gid)
235 return group;
236 group = group->next;
237 }
238
239 group = CALLOC_STRUCT(r600_pc_group);
240 if (!group)
241 return NULL;
242
243 group->block = block;
244 group->sub_gid = sub_gid;
245
246 if (block->flags & R600_PC_BLOCK_SHADER) {
247 unsigned sub_gids = block->num_instances;
248 unsigned shader_id;
249 unsigned shader_mask;
250 unsigned query_shader_mask;
251
252 if (block->flags & R600_PC_BLOCK_SE_GROUPS)
253 sub_gids = sub_gids * screen->info.max_se;
254 shader_id = sub_gid / sub_gids;
255 sub_gid = sub_gid % sub_gids;
256
257 if (shader_id == 0)
258 shader_mask = R600_PC_SHADER_ALL;
259 else
260 shader_mask = 1 << (shader_id - 1);
261
262 query_shader_mask = query->shaders & R600_PC_SHADER_ALL;
263 if (query_shader_mask && query_shader_mask != shader_mask) {
264 fprintf(stderr, "r600_perfcounter: incompatible shader groups\n");
265 FREE(group);
266 return NULL;
267 }
268 query->shaders |= shader_mask;
269 }
270
271 if (block->flags & R600_PC_BLOCK_SHADER_WINDOWED) {
272 // A non-zero value in query->shaders ensures that the shader
273 // masking is reset unless the user explicitly requests one.
274 query->shaders |= R600_PC_SHADER_WINDOWING;
275 }
276
277 if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
278 group->se = sub_gid / block->num_instances;
279 sub_gid = sub_gid % block->num_instances;
280 } else {
281 group->se = -1;
282 }
283
284 if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) {
285 group->instance = sub_gid;
286 } else {
287 group->instance = -1;
288 }
289
290 group->next = query->groups;
291 query->groups = group;
292
293 return group;
294 }
295
296 struct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
297 unsigned num_queries,
298 unsigned *query_types)
299 {
300 struct r600_common_context *rctx = (struct r600_common_context *)ctx;
301 struct r600_common_screen *screen = rctx->screen;
302 struct r600_perfcounters *pc = screen->perfcounters;
303 struct r600_perfcounter_block *block;
304 struct r600_pc_group *group;
305 struct r600_query_pc *query;
306 unsigned base_gid, sub_gid, sub_index;
307 unsigned i, j;
308
309 if (!pc)
310 return NULL;
311
312 query = CALLOC_STRUCT(r600_query_pc);
313 if (!query)
314 return NULL;
315
316 query->b.b.ops = &batch_query_ops;
317 query->b.ops = &batch_query_hw_ops;
318 query->b.flags = R600_QUERY_HW_FLAG_TIMER;
319
320 query->num_counters = num_queries;
321
322 /* Collect selectors per group */
323 for (i = 0; i < num_queries; ++i) {
324 unsigned sub_gid;
325
326 if (query_types[i] < R600_QUERY_FIRST_PERFCOUNTER)
327 goto error;
328
329 block = lookup_counter(pc, query_types[i] - R600_QUERY_FIRST_PERFCOUNTER,
330 &base_gid, &sub_index);
331 if (!block)
332 goto error;
333
334 sub_gid = sub_index / block->num_selectors;
335 sub_index = sub_index % block->num_selectors;
336
337 group = get_group_state(screen, query, block, sub_gid);
338 if (!group)
339 goto error;
340
341 if (group->num_counters >= block->num_counters) {
342 fprintf(stderr,
343 "perfcounter group %s: too many selected\n",
344 block->basename);
345 goto error;
346 }
347 group->selectors[group->num_counters] = sub_index;
348 ++group->num_counters;
349 }
350
351 /* Compute result bases and CS size per group */
352 query->b.num_cs_dw_begin = pc->num_start_cs_dwords;
353 query->b.num_cs_dw_end = pc->num_stop_cs_dwords;
354
355 query->b.num_cs_dw_begin += pc->num_instance_cs_dwords; /* conservative */
356 query->b.num_cs_dw_end += pc->num_instance_cs_dwords;
357
358 i = 0;
359 for (group = query->groups; group; group = group->next) {
360 struct r600_perfcounter_block *block = group->block;
361 unsigned select_dw, read_dw;
362 unsigned instances = 1;
363
364 if ((block->flags & R600_PC_BLOCK_SE) && group->se < 0)
365 instances = rctx->screen->info.max_se;
366 if (group->instance < 0)
367 instances *= block->num_instances;
368
369 group->result_base = i;
370 query->b.result_size += 4 * instances * group->num_counters;
371 i += instances * group->num_counters;
372
373 pc->get_size(block, group->num_counters, group->selectors,
374 &select_dw, &read_dw);
375 query->b.num_cs_dw_begin += select_dw;
376 query->b.num_cs_dw_end += instances * read_dw;
377 query->b.num_cs_dw_begin += pc->num_instance_cs_dwords; /* conservative */
378 query->b.num_cs_dw_end += instances * pc->num_instance_cs_dwords;
379 }
380
381 if (query->shaders) {
382 if ((query->shaders & R600_PC_SHADER_ALL) == 0)
383 query->shaders |= R600_PC_SHADER_ALL;
384 query->b.num_cs_dw_begin += pc->num_shaders_cs_dwords;
385 }
386
387 /* Map user-supplied query array to result indices */
388 query->counters = CALLOC(num_queries, sizeof(*query->counters));
389 for (i = 0; i < num_queries; ++i) {
390 struct r600_pc_counter *counter = &query->counters[i];
391 struct r600_perfcounter_block *block;
392
393 block = lookup_counter(pc, query_types[i] - R600_QUERY_FIRST_PERFCOUNTER,
394 &base_gid, &sub_index);
395
396 sub_gid = sub_index / block->num_selectors;
397 sub_index = sub_index % block->num_selectors;
398
399 group = get_group_state(screen, query, block, sub_gid);
400 assert(group != NULL);
401
402 for (j = 0; j < group->num_counters; ++j) {
403 if (group->selectors[j] == sub_index)
404 break;
405 }
406
407 counter->base = group->result_base + j;
408 counter->stride = group->num_counters;
409
410 counter->dwords = 1;
411 if ((block->flags & R600_PC_BLOCK_SE) && group->se < 0)
412 counter->dwords = screen->info.max_se;
413 if (group->instance < 0)
414 counter->dwords *= block->num_instances;
415 }
416
417 if (!r600_query_hw_init(rctx, &query->b))
418 goto error;
419
420 return (struct pipe_query *)query;
421
422 error:
423 r600_pc_query_destroy(rctx, &query->b.b);
424 return NULL;
425 }
426
427 static boolean r600_init_block_names(struct r600_common_screen *screen,
428 struct r600_perfcounter_block *block)
429 {
430 unsigned i, j, k;
431 unsigned groups_shader = 1, groups_se = 1, groups_instance = 1;
432 unsigned namelen;
433 char *groupname;
434 char *p;
435
436 if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
437 groups_instance = block->num_instances;
438 if (block->flags & R600_PC_BLOCK_SE_GROUPS)
439 groups_se = screen->info.max_se;
440 if (block->flags & R600_PC_BLOCK_SHADER)
441 groups_shader = ARRAY_SIZE(r600_pc_shader_suffix);
442
443 namelen = strlen(block->basename);
444 block->group_name_stride = namelen + 1;
445 if (block->flags & R600_PC_BLOCK_SHADER)
446 block->group_name_stride += 3;
447 if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
448 assert(groups_se <= 10);
449 block->group_name_stride += 1;
450
451 if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
452 block->group_name_stride += 1;
453 }
454 if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) {
455 assert(groups_instance <= 100);
456 block->group_name_stride += 2;
457 }
458
459 block->group_names = MALLOC(block->num_groups * block->group_name_stride);
460 if (!block->group_names)
461 return FALSE;
462
463 groupname = block->group_names;
464 for (i = 0; i < groups_shader; ++i) {
465 unsigned shaderlen = strlen(r600_pc_shader_suffix[i]);
466 for (j = 0; j < groups_se; ++j) {
467 for (k = 0; k < groups_instance; ++k) {
468 strcpy(groupname, block->basename);
469 p = groupname + namelen;
470
471 if (block->flags & R600_PC_BLOCK_SHADER) {
472 strcpy(p, r600_pc_shader_suffix[i]);
473 p += shaderlen;
474 }
475
476 if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
477 p += sprintf(p, "%d", j);
478 if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
479 *p++ = '_';
480 }
481
482 if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
483 p += sprintf(p, "%d", k);
484
485 groupname += block->group_name_stride;
486 }
487 }
488 }
489
490 assert(block->num_selectors <= 1000);
491 block->selector_name_stride = block->group_name_stride + 4;
492 block->selector_names = MALLOC(block->num_groups * block->num_selectors *
493 block->selector_name_stride);
494 if (!block->selector_names)
495 return FALSE;
496
497 groupname = block->group_names;
498 p = block->selector_names;
499 for (i = 0; i < block->num_groups; ++i) {
500 for (j = 0; j < block->num_selectors; ++j) {
501 sprintf(p, "%s_%03d", groupname, j);
502 p += block->selector_name_stride;
503 }
504 groupname += block->group_name_stride;
505 }
506
507 return TRUE;
508 }
509
510 int r600_get_perfcounter_info(struct r600_common_screen *screen,
511 unsigned index,
512 struct pipe_driver_query_info *info)
513 {
514 struct r600_perfcounters *pc = screen->perfcounters;
515 struct r600_perfcounter_block *block;
516 unsigned base_gid, sub;
517
518 if (!pc)
519 return 0;
520
521 if (!info) {
522 unsigned bid, num_queries = 0;
523
524 for (bid = 0; bid < pc->num_blocks; ++bid) {
525 num_queries += pc->blocks[bid].num_selectors *
526 pc->blocks[bid].num_groups;
527 }
528
529 return num_queries;
530 }
531
532 block = lookup_counter(pc, index, &base_gid, &sub);
533 if (!block)
534 return 0;
535
536 if (!block->selector_names) {
537 if (!r600_init_block_names(screen, block))
538 return 0;
539 }
540 info->name = block->selector_names + sub * block->selector_name_stride;
541 info->query_type = R600_QUERY_FIRST_PERFCOUNTER + index;
542 info->max_value.u64 = 0;
543 info->type = PIPE_DRIVER_QUERY_TYPE_UINT;
544 info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE;
545 info->group_id = base_gid + sub / block->num_selectors;
546 info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
547 if (sub > 0 && sub + 1 < block->num_selectors * block->num_groups)
548 info->flags |= PIPE_DRIVER_QUERY_FLAG_DONT_LIST;
549 return 1;
550 }
551
552 int r600_get_perfcounter_group_info(struct r600_common_screen *screen,
553 unsigned index,
554 struct pipe_driver_query_group_info *info)
555 {
556 struct r600_perfcounters *pc = screen->perfcounters;
557 struct r600_perfcounter_block *block;
558
559 if (!pc)
560 return 0;
561
562 if (!info)
563 return pc->num_groups;
564
565 block = lookup_group(pc, &index);
566 if (!block)
567 return 0;
568
569 if (!block->group_names) {
570 if (!r600_init_block_names(screen, block))
571 return 0;
572 }
573 info->name = block->group_names + index * block->group_name_stride;
574 info->num_queries = block->num_selectors;
575 info->max_active_queries = block->num_counters;
576 return 1;
577 }
578
579 void r600_perfcounters_destroy(struct r600_common_screen *rscreen)
580 {
581 if (rscreen->perfcounters)
582 rscreen->perfcounters->cleanup(rscreen);
583 }
584
585 boolean r600_perfcounters_init(struct r600_perfcounters *pc,
586 unsigned num_blocks)
587 {
588 pc->blocks = CALLOC(num_blocks, sizeof(struct r600_perfcounter_block));
589 if (!pc->blocks)
590 return FALSE;
591
592 pc->separate_se = debug_get_bool_option("RADEON_PC_SEPARATE_SE", FALSE);
593 pc->separate_instance = debug_get_bool_option("RADEON_PC_SEPARATE_INSTANCE", FALSE);
594
595 return TRUE;
596 }
597
598 void r600_perfcounters_add_block(struct r600_common_screen *rscreen,
599 struct r600_perfcounters *pc,
600 const char *name, unsigned flags,
601 unsigned counters, unsigned selectors,
602 unsigned instances, void *data)
603 {
604 struct r600_perfcounter_block *block = &pc->blocks[pc->num_blocks];
605
606 assert(counters <= R600_QUERY_MAX_COUNTERS);
607
608 block->basename = name;
609 block->flags = flags;
610 block->num_counters = counters;
611 block->num_selectors = selectors;
612 block->num_instances = MAX2(instances, 1);
613 block->data = data;
614
615 if (pc->separate_se && (block->flags & R600_PC_BLOCK_SE))
616 block->flags |= R600_PC_BLOCK_SE_GROUPS;
617 if (pc->separate_instance && block->num_instances > 1)
618 block->flags |= R600_PC_BLOCK_INSTANCE_GROUPS;
619
620 if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) {
621 block->num_groups = block->num_instances;
622 } else {
623 block->num_groups = 1;
624 }
625
626 if (block->flags & R600_PC_BLOCK_SE_GROUPS)
627 block->num_groups *= rscreen->info.max_se;
628 if (block->flags & R600_PC_BLOCK_SHADER)
629 block->num_groups *= ARRAY_SIZE(r600_pc_shader_suffix);
630
631 ++pc->num_blocks;
632 pc->num_groups += block->num_groups;
633 }
634
635 void r600_perfcounters_do_destroy(struct r600_perfcounters *pc)
636 {
637 unsigned i;
638
639 for (i = 0; i < pc->num_blocks; ++i) {
640 FREE(pc->blocks[i].group_names);
641 FREE(pc->blocks[i].selector_names);
642 }
643 FREE(pc->blocks);
644 FREE(pc);
645 }