radeonsi: switch radeon_add_to_buffer_list parameter to si_context
[mesa.git] / src / gallium / drivers / radeon / r600_perfcounter.c
1 /*
2 * Copyright 2015 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "util/u_memory.h"
25 #include "r600_query.h"
26 #include "radeonsi/si_pipe.h"
27 #include "amd/common/sid.h"
28
29 /* Max counters per HW block */
30 #define R600_QUERY_MAX_COUNTERS 16
31
32 static struct r600_perfcounter_block *
33 lookup_counter(struct r600_perfcounters *pc, unsigned index,
34 unsigned *base_gid, unsigned *sub_index)
35 {
36 struct r600_perfcounter_block *block = pc->blocks;
37 unsigned bid;
38
39 *base_gid = 0;
40 for (bid = 0; bid < pc->num_blocks; ++bid, ++block) {
41 unsigned total = block->num_groups * block->num_selectors;
42
43 if (index < total) {
44 *sub_index = index;
45 return block;
46 }
47
48 index -= total;
49 *base_gid += block->num_groups;
50 }
51
52 return NULL;
53 }
54
55 static struct r600_perfcounter_block *
56 lookup_group(struct r600_perfcounters *pc, unsigned *index)
57 {
58 unsigned bid;
59 struct r600_perfcounter_block *block = pc->blocks;
60
61 for (bid = 0; bid < pc->num_blocks; ++bid, ++block) {
62 if (*index < block->num_groups)
63 return block;
64 *index -= block->num_groups;
65 }
66
67 return NULL;
68 }
69
70 struct r600_pc_group {
71 struct r600_pc_group *next;
72 struct r600_perfcounter_block *block;
73 unsigned sub_gid; /* only used during init */
74 unsigned result_base; /* only used during init */
75 int se;
76 int instance;
77 unsigned num_counters;
78 unsigned selectors[R600_QUERY_MAX_COUNTERS];
79 };
80
81 struct r600_pc_counter {
82 unsigned base;
83 unsigned qwords;
84 unsigned stride; /* in uint64s */
85 };
86
87 #define R600_PC_SHADERS_WINDOWING (1 << 31)
88
89 struct r600_query_pc {
90 struct r600_query_hw b;
91
92 unsigned shaders;
93 unsigned num_counters;
94 struct r600_pc_counter *counters;
95 struct r600_pc_group *groups;
96 };
97
98 static void r600_pc_query_destroy(struct si_screen *sscreen,
99 struct r600_query *rquery)
100 {
101 struct r600_query_pc *query = (struct r600_query_pc *)rquery;
102
103 while (query->groups) {
104 struct r600_pc_group *group = query->groups;
105 query->groups = group->next;
106 FREE(group);
107 }
108
109 FREE(query->counters);
110
111 si_query_hw_destroy(sscreen, rquery);
112 }
113
114 static bool r600_pc_query_prepare_buffer(struct si_screen *screen,
115 struct r600_query_hw *hwquery,
116 struct r600_resource *buffer)
117 {
118 /* no-op */
119 return true;
120 }
121
122 static void r600_pc_query_emit_start(struct si_context *sctx,
123 struct r600_query_hw *hwquery,
124 struct r600_resource *buffer, uint64_t va)
125 {
126 struct r600_perfcounters *pc = sctx->screen->perfcounters;
127 struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
128 struct r600_pc_group *group;
129 int current_se = -1;
130 int current_instance = -1;
131
132 if (query->shaders)
133 pc->emit_shaders(sctx, query->shaders);
134
135 for (group = query->groups; group; group = group->next) {
136 struct r600_perfcounter_block *block = group->block;
137
138 if (group->se != current_se || group->instance != current_instance) {
139 current_se = group->se;
140 current_instance = group->instance;
141 pc->emit_instance(sctx, group->se, group->instance);
142 }
143
144 pc->emit_select(sctx, block, group->num_counters, group->selectors);
145 }
146
147 if (current_se != -1 || current_instance != -1)
148 pc->emit_instance(sctx, -1, -1);
149
150 pc->emit_start(sctx, buffer, va);
151 }
152
153 static void r600_pc_query_emit_stop(struct si_context *sctx,
154 struct r600_query_hw *hwquery,
155 struct r600_resource *buffer, uint64_t va)
156 {
157 struct r600_perfcounters *pc = sctx->screen->perfcounters;
158 struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
159 struct r600_pc_group *group;
160
161 pc->emit_stop(sctx, buffer, va);
162
163 for (group = query->groups; group; group = group->next) {
164 struct r600_perfcounter_block *block = group->block;
165 unsigned se = group->se >= 0 ? group->se : 0;
166 unsigned se_end = se + 1;
167
168 if ((block->flags & R600_PC_BLOCK_SE) && (group->se < 0))
169 se_end = sctx->screen->info.max_se;
170
171 do {
172 unsigned instance = group->instance >= 0 ? group->instance : 0;
173
174 do {
175 pc->emit_instance(sctx, se, instance);
176 pc->emit_read(sctx, block,
177 group->num_counters, group->selectors,
178 buffer, va);
179 va += sizeof(uint64_t) * group->num_counters;
180 } while (group->instance < 0 && ++instance < block->num_instances);
181 } while (++se < se_end);
182 }
183
184 pc->emit_instance(sctx, -1, -1);
185 }
186
187 static void r600_pc_query_clear_result(struct r600_query_hw *hwquery,
188 union pipe_query_result *result)
189 {
190 struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
191
192 memset(result, 0, sizeof(result->batch[0]) * query->num_counters);
193 }
194
195 static void r600_pc_query_add_result(struct si_screen *sscreen,
196 struct r600_query_hw *hwquery,
197 void *buffer,
198 union pipe_query_result *result)
199 {
200 struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
201 uint64_t *results = buffer;
202 unsigned i, j;
203
204 for (i = 0; i < query->num_counters; ++i) {
205 struct r600_pc_counter *counter = &query->counters[i];
206
207 for (j = 0; j < counter->qwords; ++j) {
208 uint32_t value = results[counter->base + j * counter->stride];
209 result->batch[i].u64 += value;
210 }
211 }
212 }
213
214 static struct r600_query_ops batch_query_ops = {
215 .destroy = r600_pc_query_destroy,
216 .begin = si_query_hw_begin,
217 .end = si_query_hw_end,
218 .get_result = si_query_hw_get_result
219 };
220
221 static struct r600_query_hw_ops batch_query_hw_ops = {
222 .prepare_buffer = r600_pc_query_prepare_buffer,
223 .emit_start = r600_pc_query_emit_start,
224 .emit_stop = r600_pc_query_emit_stop,
225 .clear_result = r600_pc_query_clear_result,
226 .add_result = r600_pc_query_add_result,
227 };
228
229 static struct r600_pc_group *get_group_state(struct si_screen *screen,
230 struct r600_query_pc *query,
231 struct r600_perfcounter_block *block,
232 unsigned sub_gid)
233 {
234 struct r600_pc_group *group = query->groups;
235
236 while (group) {
237 if (group->block == block && group->sub_gid == sub_gid)
238 return group;
239 group = group->next;
240 }
241
242 group = CALLOC_STRUCT(r600_pc_group);
243 if (!group)
244 return NULL;
245
246 group->block = block;
247 group->sub_gid = sub_gid;
248
249 if (block->flags & R600_PC_BLOCK_SHADER) {
250 unsigned sub_gids = block->num_instances;
251 unsigned shader_id;
252 unsigned shaders;
253 unsigned query_shaders;
254
255 if (block->flags & R600_PC_BLOCK_SE_GROUPS)
256 sub_gids = sub_gids * screen->info.max_se;
257 shader_id = sub_gid / sub_gids;
258 sub_gid = sub_gid % sub_gids;
259
260 shaders = screen->perfcounters->shader_type_bits[shader_id];
261
262 query_shaders = query->shaders & ~R600_PC_SHADERS_WINDOWING;
263 if (query_shaders && query_shaders != shaders) {
264 fprintf(stderr, "r600_perfcounter: incompatible shader groups\n");
265 FREE(group);
266 return NULL;
267 }
268 query->shaders = shaders;
269 }
270
271 if (block->flags & R600_PC_BLOCK_SHADER_WINDOWED && !query->shaders) {
272 // A non-zero value in query->shaders ensures that the shader
273 // masking is reset unless the user explicitly requests one.
274 query->shaders = R600_PC_SHADERS_WINDOWING;
275 }
276
277 if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
278 group->se = sub_gid / block->num_instances;
279 sub_gid = sub_gid % block->num_instances;
280 } else {
281 group->se = -1;
282 }
283
284 if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) {
285 group->instance = sub_gid;
286 } else {
287 group->instance = -1;
288 }
289
290 group->next = query->groups;
291 query->groups = group;
292
293 return group;
294 }
295
296 struct pipe_query *si_create_batch_query(struct pipe_context *ctx,
297 unsigned num_queries,
298 unsigned *query_types)
299 {
300 struct si_screen *screen =
301 (struct si_screen *)ctx->screen;
302 struct r600_perfcounters *pc = screen->perfcounters;
303 struct r600_perfcounter_block *block;
304 struct r600_pc_group *group;
305 struct r600_query_pc *query;
306 unsigned base_gid, sub_gid, sub_index;
307 unsigned i, j;
308
309 if (!pc)
310 return NULL;
311
312 query = CALLOC_STRUCT(r600_query_pc);
313 if (!query)
314 return NULL;
315
316 query->b.b.ops = &batch_query_ops;
317 query->b.ops = &batch_query_hw_ops;
318
319 query->num_counters = num_queries;
320
321 /* Collect selectors per group */
322 for (i = 0; i < num_queries; ++i) {
323 unsigned sub_gid;
324
325 if (query_types[i] < R600_QUERY_FIRST_PERFCOUNTER)
326 goto error;
327
328 block = lookup_counter(pc, query_types[i] - R600_QUERY_FIRST_PERFCOUNTER,
329 &base_gid, &sub_index);
330 if (!block)
331 goto error;
332
333 sub_gid = sub_index / block->num_selectors;
334 sub_index = sub_index % block->num_selectors;
335
336 group = get_group_state(screen, query, block, sub_gid);
337 if (!group)
338 goto error;
339
340 if (group->num_counters >= block->num_counters) {
341 fprintf(stderr,
342 "perfcounter group %s: too many selected\n",
343 block->basename);
344 goto error;
345 }
346 group->selectors[group->num_counters] = sub_index;
347 ++group->num_counters;
348 }
349
350 /* Compute result bases and CS size per group */
351 query->b.num_cs_dw_end = pc->num_stop_cs_dwords;
352 query->b.num_cs_dw_end += pc->num_instance_cs_dwords;
353
354 i = 0;
355 for (group = query->groups; group; group = group->next) {
356 struct r600_perfcounter_block *block = group->block;
357 unsigned read_dw;
358 unsigned instances = 1;
359
360 if ((block->flags & R600_PC_BLOCK_SE) && group->se < 0)
361 instances = screen->info.max_se;
362 if (group->instance < 0)
363 instances *= block->num_instances;
364
365 group->result_base = i;
366 query->b.result_size += sizeof(uint64_t) * instances * group->num_counters;
367 i += instances * group->num_counters;
368
369 read_dw = 6 * group->num_counters;
370 query->b.num_cs_dw_end += instances * read_dw;
371 query->b.num_cs_dw_end += instances * pc->num_instance_cs_dwords;
372 }
373
374 if (query->shaders) {
375 if (query->shaders == R600_PC_SHADERS_WINDOWING)
376 query->shaders = 0xffffffff;
377 }
378
379 /* Map user-supplied query array to result indices */
380 query->counters = CALLOC(num_queries, sizeof(*query->counters));
381 for (i = 0; i < num_queries; ++i) {
382 struct r600_pc_counter *counter = &query->counters[i];
383 struct r600_perfcounter_block *block;
384
385 block = lookup_counter(pc, query_types[i] - R600_QUERY_FIRST_PERFCOUNTER,
386 &base_gid, &sub_index);
387
388 sub_gid = sub_index / block->num_selectors;
389 sub_index = sub_index % block->num_selectors;
390
391 group = get_group_state(screen, query, block, sub_gid);
392 assert(group != NULL);
393
394 for (j = 0; j < group->num_counters; ++j) {
395 if (group->selectors[j] == sub_index)
396 break;
397 }
398
399 counter->base = group->result_base + j;
400 counter->stride = group->num_counters;
401
402 counter->qwords = 1;
403 if ((block->flags & R600_PC_BLOCK_SE) && group->se < 0)
404 counter->qwords = screen->info.max_se;
405 if (group->instance < 0)
406 counter->qwords *= block->num_instances;
407 }
408
409 if (!si_query_hw_init(screen, &query->b))
410 goto error;
411
412 return (struct pipe_query *)query;
413
414 error:
415 r600_pc_query_destroy(screen, &query->b.b);
416 return NULL;
417 }
418
419 static bool r600_init_block_names(struct si_screen *screen,
420 struct r600_perfcounter_block *block)
421 {
422 unsigned i, j, k;
423 unsigned groups_shader = 1, groups_se = 1, groups_instance = 1;
424 unsigned namelen;
425 char *groupname;
426 char *p;
427
428 if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
429 groups_instance = block->num_instances;
430 if (block->flags & R600_PC_BLOCK_SE_GROUPS)
431 groups_se = screen->info.max_se;
432 if (block->flags & R600_PC_BLOCK_SHADER)
433 groups_shader = screen->perfcounters->num_shader_types;
434
435 namelen = strlen(block->basename);
436 block->group_name_stride = namelen + 1;
437 if (block->flags & R600_PC_BLOCK_SHADER)
438 block->group_name_stride += 3;
439 if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
440 assert(groups_se <= 10);
441 block->group_name_stride += 1;
442
443 if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
444 block->group_name_stride += 1;
445 }
446 if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) {
447 assert(groups_instance <= 100);
448 block->group_name_stride += 2;
449 }
450
451 block->group_names = MALLOC(block->num_groups * block->group_name_stride);
452 if (!block->group_names)
453 return false;
454
455 groupname = block->group_names;
456 for (i = 0; i < groups_shader; ++i) {
457 const char *shader_suffix = screen->perfcounters->shader_type_suffixes[i];
458 unsigned shaderlen = strlen(shader_suffix);
459 for (j = 0; j < groups_se; ++j) {
460 for (k = 0; k < groups_instance; ++k) {
461 strcpy(groupname, block->basename);
462 p = groupname + namelen;
463
464 if (block->flags & R600_PC_BLOCK_SHADER) {
465 strcpy(p, shader_suffix);
466 p += shaderlen;
467 }
468
469 if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
470 p += sprintf(p, "%d", j);
471 if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
472 *p++ = '_';
473 }
474
475 if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
476 p += sprintf(p, "%d", k);
477
478 groupname += block->group_name_stride;
479 }
480 }
481 }
482
483 assert(block->num_selectors <= 1000);
484 block->selector_name_stride = block->group_name_stride + 4;
485 block->selector_names = MALLOC(block->num_groups * block->num_selectors *
486 block->selector_name_stride);
487 if (!block->selector_names)
488 return false;
489
490 groupname = block->group_names;
491 p = block->selector_names;
492 for (i = 0; i < block->num_groups; ++i) {
493 for (j = 0; j < block->num_selectors; ++j) {
494 sprintf(p, "%s_%03d", groupname, j);
495 p += block->selector_name_stride;
496 }
497 groupname += block->group_name_stride;
498 }
499
500 return true;
501 }
502
503 int si_get_perfcounter_info(struct si_screen *screen,
504 unsigned index,
505 struct pipe_driver_query_info *info)
506 {
507 struct r600_perfcounters *pc = screen->perfcounters;
508 struct r600_perfcounter_block *block;
509 unsigned base_gid, sub;
510
511 if (!pc)
512 return 0;
513
514 if (!info) {
515 unsigned bid, num_queries = 0;
516
517 for (bid = 0; bid < pc->num_blocks; ++bid) {
518 num_queries += pc->blocks[bid].num_selectors *
519 pc->blocks[bid].num_groups;
520 }
521
522 return num_queries;
523 }
524
525 block = lookup_counter(pc, index, &base_gid, &sub);
526 if (!block)
527 return 0;
528
529 if (!block->selector_names) {
530 if (!r600_init_block_names(screen, block))
531 return 0;
532 }
533 info->name = block->selector_names + sub * block->selector_name_stride;
534 info->query_type = R600_QUERY_FIRST_PERFCOUNTER + index;
535 info->max_value.u64 = 0;
536 info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
537 info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE;
538 info->group_id = base_gid + sub / block->num_selectors;
539 info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
540 if (sub > 0 && sub + 1 < block->num_selectors * block->num_groups)
541 info->flags |= PIPE_DRIVER_QUERY_FLAG_DONT_LIST;
542 return 1;
543 }
544
545 int si_get_perfcounter_group_info(struct si_screen *screen,
546 unsigned index,
547 struct pipe_driver_query_group_info *info)
548 {
549 struct r600_perfcounters *pc = screen->perfcounters;
550 struct r600_perfcounter_block *block;
551
552 if (!pc)
553 return 0;
554
555 if (!info)
556 return pc->num_groups;
557
558 block = lookup_group(pc, &index);
559 if (!block)
560 return 0;
561
562 if (!block->group_names) {
563 if (!r600_init_block_names(screen, block))
564 return 0;
565 }
566 info->name = block->group_names + index * block->group_name_stride;
567 info->num_queries = block->num_selectors;
568 info->max_active_queries = block->num_counters;
569 return 1;
570 }
571
572 void si_perfcounters_destroy(struct si_screen *sscreen)
573 {
574 if (sscreen->perfcounters)
575 sscreen->perfcounters->cleanup(sscreen);
576 }
577
578 bool si_perfcounters_init(struct r600_perfcounters *pc,
579 unsigned num_blocks)
580 {
581 pc->blocks = CALLOC(num_blocks, sizeof(struct r600_perfcounter_block));
582 if (!pc->blocks)
583 return false;
584
585 pc->separate_se = debug_get_bool_option("RADEON_PC_SEPARATE_SE", false);
586 pc->separate_instance = debug_get_bool_option("RADEON_PC_SEPARATE_INSTANCE", false);
587
588 return true;
589 }
590
591 void si_perfcounters_add_block(struct si_screen *sscreen,
592 struct r600_perfcounters *pc,
593 const char *name, unsigned flags,
594 unsigned counters, unsigned selectors,
595 unsigned instances, void *data)
596 {
597 struct r600_perfcounter_block *block = &pc->blocks[pc->num_blocks];
598
599 assert(counters <= R600_QUERY_MAX_COUNTERS);
600
601 block->basename = name;
602 block->flags = flags;
603 block->num_counters = counters;
604 block->num_selectors = selectors;
605 block->num_instances = MAX2(instances, 1);
606 block->data = data;
607
608 if (pc->separate_se && (block->flags & R600_PC_BLOCK_SE))
609 block->flags |= R600_PC_BLOCK_SE_GROUPS;
610 if (pc->separate_instance && block->num_instances > 1)
611 block->flags |= R600_PC_BLOCK_INSTANCE_GROUPS;
612
613 if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) {
614 block->num_groups = block->num_instances;
615 } else {
616 block->num_groups = 1;
617 }
618
619 if (block->flags & R600_PC_BLOCK_SE_GROUPS)
620 block->num_groups *= sscreen->info.max_se;
621 if (block->flags & R600_PC_BLOCK_SHADER)
622 block->num_groups *= pc->num_shader_types;
623
624 ++pc->num_blocks;
625 pc->num_groups += block->num_groups;
626 }
627
628 void si_perfcounters_do_destroy(struct r600_perfcounters *pc)
629 {
630 unsigned i;
631
632 for (i = 0; i < pc->num_blocks; ++i) {
633 FREE(pc->blocks[i].group_names);
634 FREE(pc->blocks[i].selector_names);
635 }
636 FREE(pc->blocks);
637 FREE(pc);
638 }