d53828dc0b6f693b7bccb6bc8fb151ba90854d99
[mesa.git] / src / freedreno / perfcntrs / fdperf.c
1 /*
2 * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 #include <arpa/inet.h>
26 #include <assert.h>
27 #include <ctype.h>
28 #include <err.h>
29 #include <fcntl.h>
30 #include <ftw.h>
31 #include <locale.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <stdint.h>
36 #include <sys/types.h>
37 #include <sys/stat.h>
38 #include <sys/mman.h>
39 #include <time.h>
40 #include <unistd.h>
41 #include <curses.h>
42 #include <libconfig.h>
43 #include <inttypes.h>
44 #include <xf86drm.h>
45
46 #include "drm/freedreno_drmif.h"
47 #include "drm/freedreno_ringbuffer.h"
48
49 #include "freedreno_perfcntr.h"
50
51 #define MAX_CNTR_PER_GROUP 24
52
53 /* NOTE first counter group should always be CP, since we unconditionally
54 * use CP counter to measure the gpu freq.
55 */
56
57 struct counter_group {
58 const struct fd_perfcntr_group *group;
59
60 struct {
61 const struct fd_perfcntr_counter *counter;
62 uint16_t select_val;
63 volatile uint32_t *val_hi;
64 volatile uint32_t *val_lo;
65 } counter[MAX_CNTR_PER_GROUP];
66
67 /* last sample time: */
68 uint32_t stime[MAX_CNTR_PER_GROUP];
69 /* for now just care about the low 32b value.. at least then we don't
70 * have to really care that we can't sample both hi and lo regs at the
71 * same time:
72 */
73 uint32_t last[MAX_CNTR_PER_GROUP];
74 /* current value, ie. by how many did the counter increase in last
75 * sampling period divided by the sampling period:
76 */
77 float current[MAX_CNTR_PER_GROUP];
78 /* name of currently selected counters (for UI): */
79 const char *label[MAX_CNTR_PER_GROUP];
80 };
81
82 static struct {
83 char *dtnode;
84 int address_cells, size_cells;
85 uint64_t base;
86 uint32_t size;
87 void *io;
88 uint32_t chipid;
89 uint32_t min_freq;
90 uint32_t max_freq;
91 /* per-generation table of counters: */
92 unsigned ngroups;
93 struct counter_group *groups;
94 /* drm device (for writing select regs via ring): */
95 struct fd_device *dev;
96 struct fd_pipe *pipe;
97 struct fd_submit *submit;
98 struct fd_ringbuffer *ring;
99 } dev;
100
101 static void config_save(void);
102 static void config_restore(void);
103 static void restore_counter_groups(void);
104
105 /*
106 * helpers
107 */
108
109 #define CHUNKSIZE 32
110
111 static void *
112 readfile(const char *path, int *sz)
113 {
114 char *buf = NULL;
115 int fd, ret, n = 0;
116
117 fd = open(path, O_RDONLY);
118 if (fd < 0)
119 return NULL;
120
121 while (1) {
122 buf = realloc(buf, n + CHUNKSIZE);
123 ret = read(fd, buf + n, CHUNKSIZE);
124 if (ret < 0) {
125 free(buf);
126 *sz = 0;
127 close(fd);
128 return NULL;
129 } else if (ret < CHUNKSIZE) {
130 n += ret;
131 *sz = n;
132 close(fd);
133 return buf;
134 } else {
135 n += CHUNKSIZE;
136 }
137 }
138 }
139
140 static uint32_t
141 gettime_us(void)
142 {
143 struct timespec ts;
144 clock_gettime(CLOCK_MONOTONIC, &ts);
145 return (ts.tv_sec * 1000000) + (ts.tv_nsec / 1000);
146 }
147
148 static uint32_t
149 delta(uint32_t a, uint32_t b)
150 {
151 /* deal with rollover: */
152 if (a > b)
153 return 0xffffffff - a + b;
154 else
155 return b - a;
156 }
157
158 /*
159 * code to find stuff in /proc/device-tree:
160 *
161 * NOTE: if we sampled the counters from the cmdstream, we could avoid needing
162 * /dev/mem and /proc/device-tree crawling. OTOH when the GPU is heavily loaded
163 * we would be competing with whatever else is using the GPU.
164 */
165
166 static void *
167 readdt(const char *node)
168 {
169 char *path;
170 void *buf;
171 int sz;
172
173 (void) asprintf(&path, "%s/%s", dev.dtnode, node);
174 buf = readfile(path, &sz);
175 free(path);
176
177 return buf;
178 }
179
180 static int
181 find_freqs_fn(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf)
182 {
183 const char *fname = fpath + ftwbuf->base;
184 int sz;
185
186 if (strcmp(fname, "qcom,gpu-freq") == 0) {
187 uint32_t *buf = readfile(fpath, &sz);
188 uint32_t freq = ntohl(buf[0]);
189 free(buf);
190 dev.max_freq = MAX2(dev.max_freq, freq);
191 dev.min_freq = MIN2(dev.min_freq, freq);
192 }
193
194 return 0;
195 }
196
197 static void
198 find_freqs(void)
199 {
200 char *path;
201 int ret;
202
203 dev.min_freq = ~0;
204 dev.max_freq = 0;
205
206 (void) asprintf(&path, "%s/%s", dev.dtnode, "qcom,gpu-pwrlevels");
207
208 ret = nftw(path, find_freqs_fn, 64, 0);
209 if (ret < 0)
210 err(1, "could not find power levels");
211
212 free(path);
213 }
214
215 static const char * compatibles[] = {
216 "qcom,adreno-3xx",
217 "qcom,kgsl-3d0",
218 "amd,imageon",
219 "qcom,adreno",
220 };
221
222 /**
223 * compatstrs is a list of compatible strings separated by null, ie.
224 *
225 * compatible = "qcom,adreno-630.2", "qcom,adreno";
226 *
227 * would result in "qcom,adreno-630.2\0qcom,adreno\0"
228 */
229 static bool match_compatible(char *compatstrs, int sz)
230 {
231 while (sz > 0) {
232 char *compatible = compatstrs;
233
234 for (unsigned i = 0; i < ARRAY_SIZE(compatibles); i++) {
235 if (strcmp(compatible, compatibles[i]) == 0) {
236 return true;
237 }
238 }
239
240 compatstrs += strlen(compatible) + 1;
241 sz -= strlen(compatible) + 1;
242 }
243 return false;
244 }
245
246 static int
247 find_device_fn(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf)
248 {
249 const char *fname = fpath + ftwbuf->base;
250 int sz;
251
252 if (strcmp(fname, "compatible") == 0) {
253 char *str = readfile(fpath, &sz);
254 if (match_compatible(str, sz)) {
255 int dlen = strlen(fpath) - strlen("/compatible");
256 dev.dtnode = malloc(dlen + 1);
257 memcpy(dev.dtnode, fpath, dlen);
258 printf("found dt node: %s\n", dev.dtnode);
259
260 char buf[dlen + sizeof("/../#address-cells") + 1];
261 int sz, *val;
262
263 sprintf(buf, "%s/../#address-cells", dev.dtnode);
264 val = readfile(buf, &sz);
265 dev.address_cells = ntohl(*val);
266 free(val);
267
268 sprintf(buf, "%s/../#size-cells", dev.dtnode);
269 val = readfile(buf, &sz);
270 dev.size_cells = ntohl(*val);
271 free(val);
272
273 printf("#address-cells=%d, #size-cells=%d\n",
274 dev.address_cells, dev.size_cells);
275 }
276 free(str);
277 }
278 if (dev.dtnode) {
279 /* we found it! */
280 return 1;
281 }
282 return 0;
283 }
284
285 static void
286 find_device(void)
287 {
288 int ret, fd;
289 uint32_t *buf, *b;
290
291 ret = nftw("/proc/device-tree/", find_device_fn, 64, 0);
292 if (ret < 0)
293 err(1, "could not find adreno gpu");
294
295 if (!dev.dtnode)
296 errx(1, "could not find qcom,adreno-3xx node");
297
298 fd = drmOpenWithType("msm", NULL, DRM_NODE_RENDER);
299 if (fd < 0)
300 err(1, "could not open drm device");
301
302 dev.dev = fd_device_new(fd);
303 dev.pipe = fd_pipe_new(dev.dev, FD_PIPE_3D);
304
305 uint64_t val;
306 ret = fd_pipe_get_param(dev.pipe, FD_CHIP_ID, &val);
307 if (ret) {
308 err(1, "could not get gpu-id");
309 }
310 dev.chipid = val;
311
312 #define CHIP_FMT "d%d%d.%d"
313 #define CHIP_ARGS(chipid) \
314 ((chipid) >> 24) & 0xff, \
315 ((chipid) >> 16) & 0xff, \
316 ((chipid) >> 8) & 0xff, \
317 ((chipid) >> 0) & 0xff
318 printf("device: a%"CHIP_FMT"\n", CHIP_ARGS(dev.chipid));
319
320 b = buf = readdt("reg");
321
322 if (dev.address_cells == 2) {
323 uint32_t u[2] = { ntohl(buf[0]), ntohl(buf[1]) };
324 dev.base = (((uint64_t)u[0]) << 32) | u[1];
325 buf += 2;
326 } else {
327 dev.base = ntohl(buf[0]);
328 buf += 1;
329 }
330
331 if (dev.size_cells == 2) {
332 uint32_t u[2] = { ntohl(buf[0]), ntohl(buf[1]) };
333 dev.size = (((uint64_t)u[0]) << 32) | u[1];
334 buf += 2;
335 } else {
336 dev.size = ntohl(buf[0]);
337 buf += 1;
338 }
339
340 free(b);
341
342 printf("i/o region at %08"PRIx64" (size: %x)\n", dev.base, dev.size);
343
344 /* try MAX_FREQ first as that will work regardless of old dt
345 * dt bindings vs upstream bindings:
346 */
347 ret = fd_pipe_get_param(dev.pipe, FD_MAX_FREQ, &val);
348 if (ret) {
349 printf("falling back to parsing DT bindings for freq\n");
350 find_freqs();
351 } else {
352 dev.min_freq = 0;
353 dev.max_freq = val;
354 }
355
356 printf("min_freq=%u, max_freq=%u\n", dev.min_freq, dev.max_freq);
357
358 fd = open("/dev/mem", O_RDWR | O_SYNC);
359 if (fd < 0)
360 err(1, "could not open /dev/mem");
361
362 dev.io = mmap(0, dev.size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, dev.base);
363 if (dev.io == MAP_FAILED) {
364 close(fd);
365 err(1, "could not map device");
366 }
367 }
368
369 /*
370 * perf-monitor
371 */
372
373 static void
374 flush_ring(void)
375 {
376 int ret;
377
378 if (!dev.submit)
379 return;
380
381 ret = fd_submit_flush(dev.submit, -1, NULL, NULL);
382 if (ret)
383 errx(1, "submit failed: %d", ret);
384 fd_ringbuffer_del(dev.ring);
385 fd_submit_del(dev.submit);
386
387 dev.ring = NULL;
388 dev.submit = NULL;
389 }
390
391 static void
392 select_counter(struct counter_group *group, int ctr, int n)
393 {
394 assert(n < group->group->num_countables);
395 assert(ctr < group->group->num_counters);
396
397 group->label[ctr] = group->group->countables[n].name;
398 group->counter[ctr].select_val = n;
399
400 if (!dev.submit) {
401 dev.submit = fd_submit_new(dev.pipe);
402 dev.ring = fd_submit_new_ringbuffer(dev.submit, 0x1000,
403 FD_RINGBUFFER_PRIMARY | FD_RINGBUFFER_GROWABLE);
404 }
405
406 /* bashing select register directly while gpu is active will end
407 * in tears.. so we need to write it via the ring:
408 *
409 * TODO it would help startup time, if gpu is loaded, to batch
410 * all the initial writes and do a single flush.. although that
411 * makes things more complicated for capturing inital sample value
412 */
413 struct fd_ringbuffer *ring = dev.ring;
414 switch (dev.chipid >> 24) {
415 case 2:
416 case 3:
417 case 4:
418 OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
419 OUT_RING(ring, 0x00000000);
420
421 if (group->group->counters[ctr].enable) {
422 OUT_PKT0(ring, group->group->counters[ctr].enable, 1);
423 OUT_RING(ring, 0);
424 }
425
426 if (group->group->counters[ctr].clear) {
427 OUT_PKT0(ring, group->group->counters[ctr].clear, 1);
428 OUT_RING(ring, 1);
429
430 OUT_PKT0(ring, group->group->counters[ctr].clear, 1);
431 OUT_RING(ring, 0);
432 }
433
434 OUT_PKT0(ring, group->group->counters[ctr].select_reg, 1);
435 OUT_RING(ring, n);
436
437 if (group->group->counters[ctr].enable) {
438 OUT_PKT0(ring, group->group->counters[ctr].enable, 1);
439 OUT_RING(ring, 1);
440 }
441
442 break;
443 case 5:
444 case 6:
445 OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
446
447 if (group->group->counters[ctr].enable) {
448 OUT_PKT4(ring, group->group->counters[ctr].enable, 1);
449 OUT_RING(ring, 0);
450 }
451
452 if (group->group->counters[ctr].clear) {
453 OUT_PKT4(ring, group->group->counters[ctr].clear, 1);
454 OUT_RING(ring, 1);
455
456 OUT_PKT4(ring, group->group->counters[ctr].clear, 1);
457 OUT_RING(ring, 0);
458 }
459
460 OUT_PKT4(ring, group->group->counters[ctr].select_reg, 1);
461 OUT_RING(ring, n);
462
463 if (group->group->counters[ctr].enable) {
464 OUT_PKT4(ring, group->group->counters[ctr].enable, 1);
465 OUT_RING(ring, 1);
466 }
467
468 break;
469 }
470
471 group->last[ctr] = *group->counter[ctr].val_lo;
472 group->stime[ctr] = gettime_us();
473 }
474
475 static void
476 resample_counter(struct counter_group *group, int ctr)
477 {
478 uint32_t val = *group->counter[ctr].val_lo;
479 uint32_t t = gettime_us();
480 uint32_t dt = delta(group->stime[ctr], t);
481 uint32_t dval = delta(group->last[ctr], val);
482 group->current[ctr] = (float)dval * 1000000.0 / (float)dt;
483 group->last[ctr] = val;
484 group->stime[ctr] = t;
485 }
486
487 #define REFRESH_MS 500
488
489 /* sample all the counters: */
490 static void
491 resample(void)
492 {
493 static uint64_t last_time;
494 uint64_t current_time = gettime_us();
495
496 if ((current_time - last_time) < (REFRESH_MS * 1000 / 2))
497 return;
498
499 last_time = current_time;
500
501 for (unsigned i = 0; i < dev.ngroups; i++) {
502 struct counter_group *group = &dev.groups[i];
503 for (unsigned j = 0; j < group->group->num_counters; j++) {
504 resample_counter(group, j);
505 }
506 }
507 }
508
509 /*
510 * The UI
511 */
512
513 #define COLOR_GROUP_HEADER 1
514 #define COLOR_FOOTER 2
515 #define COLOR_INVERSE 3
516
517 static int w, h;
518 static int ctr_width;
519 static int max_rows, current_cntr = 1;
520
521 static void
522 redraw_footer(WINDOW *win)
523 {
524 char *footer;
525 int n;
526
527 n = asprintf(&footer, " fdperf: a%"CHIP_FMT" (%.2fMHz..%.2fMHz)",
528 CHIP_ARGS(dev.chipid),
529 ((float)dev.min_freq) / 1000000.0,
530 ((float)dev.max_freq) / 1000000.0);
531
532 wmove(win, h - 1, 0);
533 wattron(win, COLOR_PAIR(COLOR_FOOTER));
534 waddstr(win, footer);
535 whline(win, ' ', w - n);
536 wattroff(win, COLOR_PAIR(COLOR_FOOTER));
537
538 free(footer);
539 }
540
541 static void
542 redraw_group_header(WINDOW *win, int row, const char *name)
543 {
544 wmove(win, row, 0);
545 wattron(win, A_BOLD);
546 wattron(win, COLOR_PAIR(COLOR_GROUP_HEADER));
547 waddstr(win, name);
548 whline(win, ' ', w - strlen(name));
549 wattroff(win, COLOR_PAIR(COLOR_GROUP_HEADER));
550 wattroff(win, A_BOLD);
551 }
552
553 static void
554 redraw_counter_label(WINDOW *win, int row, const char *name, bool selected)
555 {
556 int n = strlen(name);
557 assert(n <= ctr_width);
558 wmove(win, row, 0);
559 whline(win, ' ', ctr_width - n);
560 wmove(win, row, ctr_width - n);
561 if (selected)
562 wattron(win, COLOR_PAIR(COLOR_INVERSE));
563 waddstr(win, name);
564 if (selected)
565 wattroff(win, COLOR_PAIR(COLOR_INVERSE));
566 waddstr(win, ": ");
567 }
568
569 static void
570 redraw_counter_value_cycles(WINDOW *win, float val)
571 {
572 char *str;
573 int x = getcurx(win);
574 int valwidth = w - x;
575 int barwidth, n;
576
577 /* convert to fraction of max freq: */
578 val = val / (float)dev.max_freq;
579
580 /* figure out percentage-bar width: */
581 barwidth = (int)(val * valwidth);
582
583 /* sometimes things go over 100%.. idk why, could be
584 * things running faster than base clock, or counter
585 * summing up cycles in multiple cores?
586 */
587 barwidth = MIN2(barwidth, valwidth - 1);
588
589 n = asprintf(&str, "%.2f%%", 100.0 * val);
590 wattron(win, COLOR_PAIR(COLOR_INVERSE));
591 waddnstr(win, str, barwidth);
592 if (barwidth > n) {
593 whline(win, ' ', barwidth - n);
594 wmove(win, getcury(win), x + barwidth);
595 }
596 wattroff(win, COLOR_PAIR(COLOR_INVERSE));
597 if (barwidth < n)
598 waddstr(win, str + barwidth);
599 whline(win, ' ', w - getcurx(win));
600
601 free(str);
602 }
603
604 static void
605 redraw_counter_value_raw(WINDOW *win, float val)
606 {
607 char *str;
608 (void) asprintf(&str, "%'.2f", val);
609 waddstr(win, str);
610 whline(win, ' ', w - getcurx(win));
611 free(str);
612 }
613
614 static void
615 redraw_counter(WINDOW *win, int row, struct counter_group *group,
616 int ctr, bool selected)
617 {
618 redraw_counter_label(win, row, group->label[ctr], selected);
619
620 /* quick hack, if the label has "CYCLE" in the name, it is
621 * probably a cycle counter ;-)
622 * Perhaps add more info in rnndb schema to know how to
623 * treat individual counters (ie. which are cycles, and
624 * for those we want to present as a percentage do we
625 * need to scale the result.. ie. is it running at some
626 * multiple or divisor of core clk, etc)
627 *
628 * TODO it would be much more clever to get this from xml
629 * Also.. in some cases I think we want to know how many
630 * units the counter is counting for, ie. if a320 has 2x
631 * shader as a306 we might need to scale the result..
632 */
633 if (strstr(group->label[ctr], "CYCLE") ||
634 strstr(group->label[ctr], "BUSY") ||
635 strstr(group->label[ctr], "IDLE"))
636 redraw_counter_value_cycles(win, group->current[ctr]);
637 else
638 redraw_counter_value_raw(win, group->current[ctr]);
639 }
640
641 static void
642 redraw(WINDOW *win)
643 {
644 static int scroll = 0;
645 int max, row = 0;
646
647 w = getmaxx(win);
648 h = getmaxy(win);
649
650 max = h - 3;
651
652 if ((current_cntr - scroll) > (max - 1)) {
653 scroll = current_cntr - (max - 1);
654 } else if ((current_cntr - 1) < scroll) {
655 scroll = current_cntr - 1;
656 }
657
658 for (unsigned i = 0; i < dev.ngroups; i++) {
659 struct counter_group *group = &dev.groups[i];
660 unsigned j = 0;
661
662 /* NOTE skip CP the first CP counter */
663 if (i == 0)
664 j++;
665
666 if (j < group->group->num_counters) {
667 if ((scroll <= row) && ((row - scroll) < max))
668 redraw_group_header(win, row - scroll, group->group->name);
669 row++;
670 }
671
672 for (; j < group->group->num_counters; j++) {
673 if ((scroll <= row) && ((row - scroll) < max))
674 redraw_counter(win, row - scroll, group, j, row == current_cntr);
675 row++;
676 }
677 }
678
679 /* convert back to physical (unscrolled) offset: */
680 row = max;
681
682 redraw_group_header(win, row, "Status");
683 row++;
684
685 /* Draw GPU freq row: */
686 redraw_counter_label(win, row, "Freq (MHz)", false);
687 redraw_counter_value_raw(win, dev.groups[0].current[0] / 1000000.0);
688 row++;
689
690 redraw_footer(win);
691
692 refresh();
693 }
694
695 static struct counter_group *
696 current_counter(int *ctr)
697 {
698 int n = 0;
699
700 for (unsigned i = 0; i < dev.ngroups; i++) {
701 struct counter_group *group = &dev.groups[i];
702 unsigned j = 0;
703
704 /* NOTE skip the first CP counter (CP_ALWAYS_COUNT) */
705 if (i == 0)
706 j++;
707
708 /* account for group header: */
709 if (j < group->group->num_counters) {
710 /* cannot select group header.. return null to indicate this
711 * main_ui():
712 */
713 if (n == current_cntr)
714 return NULL;
715 n++;
716 }
717
718
719 for (; j < group->group->num_counters; j++) {
720 if (n == current_cntr) {
721 if (ctr)
722 *ctr = j;
723 return group;
724 }
725 n++;
726 }
727 }
728
729 assert(0);
730 return NULL;
731 }
732
733 static void
734 counter_dialog(void)
735 {
736 WINDOW *dialog;
737 struct counter_group *group;
738 int cnt, current = 0, scroll;
739
740 /* figure out dialog size: */
741 int dh = h/2;
742 int dw = ctr_width + 2;
743
744 group = current_counter(&cnt);
745
746 /* find currently selected idx (note there can be discontinuities
747 * so the selected value does not map 1:1 to current idx)
748 */
749 uint32_t selected = group->counter[cnt].select_val;
750 for (int i = 0; i < group->group->num_countables; i++) {
751 if (group->group->countables[i].selector == selected) {
752 current = i;
753 break;
754 }
755 }
756
757 /* scrolling offset, if dialog is too small for all the choices: */
758 scroll = 0;
759
760 dialog = newwin(dh, dw, (h-dh)/2, (w-dw)/2);
761 box(dialog, 0, 0);
762 wrefresh(dialog);
763 keypad(dialog, TRUE);
764
765 while (true) {
766 int max = MIN2(dh - 2, group->group->num_countables);
767 int selector = -1;
768
769 if ((current - scroll) >= (dh - 3)) {
770 scroll = current - (dh - 3);
771 } else if (current < scroll) {
772 scroll = current;
773 }
774
775 for (int i = 0; i < max; i++) {
776 int n = scroll + i;
777 wmove(dialog, i+1, 1);
778 if (n == current) {
779 assert (n < group->group->num_countables);
780 selector = group->group->countables[n].selector;
781 wattron(dialog, COLOR_PAIR(COLOR_INVERSE));
782 }
783 if (n < group->group->num_countables)
784 waddstr(dialog, group->group->countables[n].name);
785 whline(dialog, ' ', dw - getcurx(dialog) - 1);
786 if (n == current)
787 wattroff(dialog, COLOR_PAIR(COLOR_INVERSE));
788 }
789
790 assert (selector >= 0);
791
792 switch (wgetch(dialog)) {
793 case KEY_UP:
794 current = MAX2(0, current - 1);
795 break;
796 case KEY_DOWN:
797 current = MIN2(group->group->num_countables - 1, current + 1);
798 break;
799 case KEY_LEFT:
800 case KEY_ENTER:
801 /* select new sampler */
802 select_counter(group, cnt, selector);
803 flush_ring();
804 config_save();
805 goto out;
806 case 'q':
807 goto out;
808 default:
809 /* ignore */
810 break;
811 }
812
813 resample();
814 }
815
816 out:
817 wborder(dialog, ' ', ' ', ' ',' ',' ',' ',' ',' ');
818 delwin(dialog);
819 }
820
821 static void
822 scroll_cntr(int amount)
823 {
824 if (amount < 0) {
825 current_cntr = MAX2(1, current_cntr + amount);
826 if (current_counter(NULL) == NULL) {
827 current_cntr = MAX2(1, current_cntr - 1);
828 }
829 } else {
830 current_cntr = MIN2(max_rows - 1, current_cntr + amount);
831 if (current_counter(NULL) == NULL)
832 current_cntr = MIN2(max_rows - 1, current_cntr + 1);
833 }
834 }
835
836 static void
837 main_ui(void)
838 {
839 WINDOW *mainwin;
840 uint32_t last_time = gettime_us();
841
842 /* curses setup: */
843 mainwin = initscr();
844 if (!mainwin)
845 goto out;
846
847 cbreak();
848 wtimeout(mainwin, REFRESH_MS);
849 noecho();
850 keypad(mainwin, TRUE);
851 curs_set(0);
852 start_color();
853 init_pair(COLOR_GROUP_HEADER, COLOR_WHITE, COLOR_GREEN);
854 init_pair(COLOR_FOOTER, COLOR_WHITE, COLOR_BLUE);
855 init_pair(COLOR_INVERSE, COLOR_BLACK, COLOR_WHITE);
856
857 while (true) {
858 switch (wgetch(mainwin)) {
859 case KEY_UP:
860 scroll_cntr(-1);
861 break;
862 case KEY_DOWN:
863 scroll_cntr(+1);
864 break;
865 case KEY_NPAGE: /* page-down */
866 /* TODO figure out # of rows visible? */
867 scroll_cntr(+15);
868 break;
869 case KEY_PPAGE: /* page-up */
870 /* TODO figure out # of rows visible? */
871 scroll_cntr(-15);
872 break;
873 case KEY_RIGHT:
874 counter_dialog();
875 break;
876 case 'q':
877 goto out;
878 break;
879 default:
880 /* ignore */
881 break;
882 }
883 resample();
884 redraw(mainwin);
885
886 /* restore the counters every 0.5s in case the GPU has suspended,
887 * in which case the current selected countables will have reset:
888 */
889 uint32_t t = gettime_us();
890 if (delta(last_time, t) > 500000) {
891 restore_counter_groups();
892 flush_ring();
893 last_time = t;
894 }
895 }
896
897 /* restore settings.. maybe we need an atexit()??*/
898 out:
899 delwin(mainwin);
900 endwin();
901 refresh();
902 }
903
904 static void
905 restore_counter_groups(void)
906 {
907 for (unsigned i = 0; i < dev.ngroups; i++) {
908 struct counter_group *group = &dev.groups[i];
909 unsigned j = 0;
910
911 /* NOTE skip CP the first CP counter */
912 if (i == 0)
913 j++;
914
915 for (; j < group->group->num_counters; j++) {
916 select_counter(group, j, group->counter[j].select_val);
917 }
918 }
919 }
920
921 static void
922 setup_counter_groups(const struct fd_perfcntr_group *groups)
923 {
924 for (unsigned i = 0; i < dev.ngroups; i++) {
925 struct counter_group *group = &dev.groups[i];
926
927 group->group = &groups[i];
928
929 max_rows += group->group->num_counters + 1;
930
931 /* the first CP counter is hidden: */
932 if (i == 0) {
933 max_rows--;
934 if (group->group->num_counters <= 1)
935 max_rows--;
936 }
937
938 for (unsigned j = 0; j < group->group->num_counters; j++) {
939 group->counter[j].counter = &group->group->counters[j];
940
941 group->counter[j].val_hi = dev.io + (group->counter[j].counter->counter_reg_hi * 4);
942 group->counter[j].val_lo = dev.io + (group->counter[j].counter->counter_reg_lo * 4);
943
944 group->counter[j].select_val = j;
945 }
946
947 for (unsigned j = 0; j < group->group->num_countables; j++) {
948 ctr_width = MAX2(ctr_width, strlen(group->group->countables[j].name) + 1);
949 }
950 }
951 }
952
953 /*
954 * configuration / persistence
955 */
956
957 static config_t cfg;
958 static config_setting_t *setting;
959
960 static void
961 config_save(void)
962 {
963 for (unsigned i = 0; i < dev.ngroups; i++) {
964 struct counter_group *group = &dev.groups[i];
965 unsigned j = 0;
966
967 /* NOTE skip CP the first CP counter */
968 if (i == 0)
969 j++;
970
971 config_setting_t *sect =
972 config_setting_get_member(setting, group->group->name);
973
974 for (; j < group->group->num_counters; j++) {
975 char name[] = "counter0000";
976 sprintf(name, "counter%d", j);
977 config_setting_t *s =
978 config_setting_lookup(sect, name);
979 config_setting_set_int(s, group->counter[j].select_val);
980 }
981 }
982
983 config_write_file(&cfg, "fdperf.cfg");
984 }
985
986 static void
987 config_restore(void)
988 {
989 char *str;
990
991 config_init(&cfg);
992
993 /* Read the file. If there is an error, report it and exit. */
994 if(!config_read_file(&cfg, "fdperf.cfg")) {
995 warn("could not restore settings");
996 }
997
998 config_setting_t *root = config_root_setting(&cfg);
999
1000 /* per device settings: */
1001 (void) asprintf(&str, "a%dxx", dev.chipid >> 24);
1002 setting = config_setting_get_member(root, str);
1003 if (!setting)
1004 setting = config_setting_add(root, str, CONFIG_TYPE_GROUP);
1005 free(str);
1006
1007 for (unsigned i = 0; i < dev.ngroups; i++) {
1008 struct counter_group *group = &dev.groups[i];
1009 unsigned j = 0;
1010
1011 /* NOTE skip CP the first CP counter */
1012 if (i == 0)
1013 j++;
1014
1015 config_setting_t *sect =
1016 config_setting_get_member(setting, group->group->name);
1017
1018 if (!sect) {
1019 sect = config_setting_add(setting, group->group->name,
1020 CONFIG_TYPE_GROUP);
1021 }
1022
1023 for (; j < group->group->num_counters; j++) {
1024 char name[] = "counter0000";
1025 sprintf(name, "counter%d", j);
1026 config_setting_t *s = config_setting_lookup(sect, name);
1027 if (!s) {
1028 config_setting_add(sect, name, CONFIG_TYPE_INT);
1029 continue;
1030 }
1031 select_counter(group, j, config_setting_get_int(s));
1032 }
1033 }
1034 }
1035
1036 /*
1037 * main
1038 */
1039
1040 int
1041 main(int argc, char **argv)
1042 {
1043 find_device();
1044
1045 const struct fd_perfcntr_group *groups;
1046 groups = fd_perfcntrs((dev.chipid >> 24) * 100, &dev.ngroups);
1047 if (!groups) {
1048 errx(1, "no perfcntr support");
1049 }
1050
1051 dev.groups = calloc(dev.ngroups, sizeof(struct counter_group));
1052
1053 setlocale(LC_NUMERIC, "en_US.UTF-8");
1054
1055 setup_counter_groups(groups);
1056 restore_counter_groups();
1057 config_restore();
1058 flush_ring();
1059
1060 main_ui();
1061
1062 return 0;
1063 }