freedreno/perfcntrs: add fdperf
[mesa.git] / src / freedreno / perfcntrs / fdperf.c
1 /*
2 * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 #include <arpa/inet.h>
26 #include <assert.h>
27 #include <ctype.h>
28 #include <err.h>
29 #include <fcntl.h>
30 #include <ftw.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <stdint.h>
35 #include <sys/types.h>
36 #include <sys/stat.h>
37 #include <sys/mman.h>
38 #include <time.h>
39 #include <unistd.h>
40 #include <curses.h>
41 #include <libconfig.h>
42
43 #include "drm/freedreno_drmif.h"
44 #include "drm/freedreno_ringbuffer.h"
45
46 #include "freedreno_perfcntr.h"
47
48 #define MAX_CNTR_PER_GROUP 24
49
50 /* NOTE first counter group should always be CP, since we unconditionally
51 * use CP counter to measure the gpu freq.
52 */
53
54 struct counter_group {
55 const struct fd_perfcntr_group *group;
56
57 struct {
58 const struct fd_perfcntr_counter *counter;
59 uint16_t select_val;
60 volatile uint32_t *val_hi;
61 volatile uint32_t *val_lo;
62 } counter[MAX_CNTR_PER_GROUP];
63
64 /* last sample time: */
65 uint32_t stime[MAX_CNTR_PER_GROUP];
66 /* for now just care about the low 32b value.. at least then we don't
67 * have to really care that we can't sample both hi and lo regs at the
68 * same time:
69 */
70 uint32_t last[MAX_CNTR_PER_GROUP];
71 /* current value, ie. by how many did the counter increase in last
72 * sampling period divided by the sampling period:
73 */
74 float current[MAX_CNTR_PER_GROUP];
75 /* name of currently selected counters (for UI): */
76 const char *label[MAX_CNTR_PER_GROUP];
77 };
78
79 static struct {
80 char *dtnode;
81 int address_cells, size_cells;
82 uint64_t base;
83 uint32_t size;
84 void *io;
85 uint32_t chipid;
86 uint32_t min_freq;
87 uint32_t max_freq;
88 /* per-generation table of counters: */
89 unsigned ngroups;
90 struct counter_group *groups;
91 /* drm device (for writing select regs via ring): */
92 struct fd_device *dev;
93 struct fd_pipe *pipe;
94 struct fd_submit *submit;
95 struct fd_ringbuffer *ring;
96 } dev;
97
98 static void config_save(void);
99 static void config_restore(void);
100
101 /*
102 * helpers
103 */
104
105 #define CHUNKSIZE 32
106
107 static void *
108 readfile(const char *path, int *sz)
109 {
110 char *buf = NULL;
111 int fd, ret, n = 0;
112
113 fd = open(path, O_RDONLY);
114 if (fd < 0)
115 return NULL;
116
117 while (1) {
118 buf = realloc(buf, n + CHUNKSIZE);
119 ret = read(fd, buf + n, CHUNKSIZE);
120 if (ret < 0) {
121 free(buf);
122 *sz = 0;
123 return NULL;
124 } else if (ret < CHUNKSIZE) {
125 n += ret;
126 *sz = n;
127 return buf;
128 } else {
129 n += CHUNKSIZE;
130 }
131 }
132 }
133
134 static uint32_t
135 gettime_us(void)
136 {
137 struct timespec ts;
138 clock_gettime(CLOCK_MONOTONIC, &ts);
139 return (ts.tv_sec * 1000000) + (ts.tv_nsec / 1000);
140 }
141
142 static uint32_t
143 delta(uint32_t a, uint32_t b)
144 {
145 /* deal with rollover: */
146 if (a > b)
147 return 0xffffffff - a + b;
148 else
149 return b - a;
150 }
151
152 /*
153 * TODO de-duplicate OUT_RING() and friends
154 */
155
156 #define CP_WAIT_FOR_IDLE 38
157 #define CP_TYPE0_PKT 0x00000000
158 #define CP_TYPE3_PKT 0xc0000000
159 #define CP_TYPE4_PKT 0x40000000
160 #define CP_TYPE7_PKT 0x70000000
161
162 static inline void
163 OUT_RING(struct fd_ringbuffer *ring, uint32_t data)
164 {
165 *(ring->cur++) = data;
166 }
167
168 static inline void
169 OUT_PKT0(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
170 {
171 OUT_RING(ring, CP_TYPE0_PKT | ((cnt-1) << 16) | (regindx & 0x7FFF));
172 }
173
174 static inline void
175 OUT_PKT3(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
176 {
177 OUT_RING(ring, CP_TYPE3_PKT | ((cnt-1) << 16) | ((opcode & 0xFF) << 8));
178 }
179
180
181 /*
182 * Starting with a5xx, pkt4/pkt7 are used instead of pkt0/pkt3
183 */
184
185 static inline unsigned
186 _odd_parity_bit(unsigned val)
187 {
188 /* See: http://graphics.stanford.edu/~seander/bithacks.html#ParityParallel
189 * note that we want odd parity so 0x6996 is inverted.
190 */
191 val ^= val >> 16;
192 val ^= val >> 8;
193 val ^= val >> 4;
194 val &= 0xf;
195 return (~0x6996 >> val) & 1;
196 }
197
198 static inline void
199 OUT_PKT4(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
200 {
201 OUT_RING(ring, CP_TYPE4_PKT | cnt |
202 (_odd_parity_bit(cnt) << 7) |
203 ((regindx & 0x3ffff) << 8) |
204 ((_odd_parity_bit(regindx) << 27)));
205 }
206
207 static inline void
208 OUT_PKT7(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
209 {
210 OUT_RING(ring, CP_TYPE7_PKT | cnt |
211 (_odd_parity_bit(cnt) << 15) |
212 ((opcode & 0x7f) << 16) |
213 ((_odd_parity_bit(opcode) << 23)));
214 }
215
216 /*
217 * code to find stuff in /proc/device-tree:
218 *
219 * NOTE: if we sampled the counters from the cmdstream, we could avoid needing
220 * /dev/mem and /proc/device-tree crawling. OTOH when the GPU is heavily loaded
221 * we would be competing with whatever else is using the GPU.
222 */
223
224 static void *
225 readdt(const char *node)
226 {
227 char *path;
228 void *buf;
229 int sz;
230
231 asprintf(&path, "%s/%s", dev.dtnode, node);
232 buf = readfile(path, &sz);
233 free(path);
234
235 return buf;
236 }
237
238 static int
239 find_freqs_fn(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf)
240 {
241 const char *fname = fpath + ftwbuf->base;
242 int sz;
243
244 if (strcmp(fname, "qcom,gpu-freq") == 0) {
245 uint32_t *buf = readfile(fpath, &sz);
246 uint32_t freq = ntohl(buf[0]);
247 free(buf);
248 dev.max_freq = MAX2(dev.max_freq, freq);
249 dev.min_freq = MIN2(dev.min_freq, freq);
250 }
251
252 return 0;
253 }
254
255 static void
256 find_freqs(void)
257 {
258 char *path;
259 int ret;
260
261 dev.min_freq = ~0;
262 dev.max_freq = 0;
263
264 asprintf(&path, "%s/%s", dev.dtnode, "qcom,gpu-pwrlevels");
265
266 ret = nftw(path, find_freqs_fn, 64, 0);
267 if (ret < 0)
268 err(1, "could not find power levels");
269
270 free(path);
271 }
272
273 static int
274 find_device_fn(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf)
275 {
276 const char *fname = fpath + ftwbuf->base;
277 int sz;
278
279 if (strcmp(fname, "compatible") == 0) {
280 char *str = readfile(fpath, &sz);
281 if ((strcmp(str, "qcom,adreno-3xx") == 0) ||
282 (strcmp(str, "qcom,kgsl-3d0") == 0) ||
283 (strstr(str, "qcom,adreno") == str)) {
284 int dlen = strlen(fpath) - strlen("/compatible");
285 dev.dtnode = malloc(dlen + 1);
286 memcpy(dev.dtnode, fpath, dlen);
287 printf("found dt node: %s\n", dev.dtnode);
288
289 char buf[dlen + sizeof("/../#address-cells") + 1];
290 int sz, *val;
291
292 sprintf(buf, "%s/../#address-cells", dev.dtnode);
293 val = readfile(buf, &sz);
294 dev.address_cells = ntohl(*val);
295 free(val);
296
297 sprintf(buf, "%s/../#size-cells", dev.dtnode);
298 val = readfile(buf, &sz);
299 dev.size_cells = ntohl(*val);
300 free(val);
301
302 printf("#address-cells=%d, #size-cells=%d\n",
303 dev.address_cells, dev.size_cells);
304 }
305 free(str);
306 }
307 if (dev.dtnode) {
308 /* we found it! */
309 return 1;
310 }
311 return 0;
312 }
313
314 static void
315 find_device(void)
316 {
317 int ret, fd;
318 uint32_t *buf, *b;
319
320 ret = nftw("/proc/device-tree/", find_device_fn, 64, 0);
321 if (ret < 0)
322 err(1, "could not find adreno gpu");
323
324 if (!dev.dtnode)
325 errx(1, "could not find qcom,adreno-3xx node");
326
327 fd = open("/dev/dri/card0", O_RDWR);
328 if (fd < 0)
329 err(1, "could not open drm device");
330
331 dev.dev = fd_device_new(fd);
332 dev.pipe = fd_pipe_new(dev.dev, FD_PIPE_3D);
333
334 uint64_t val;
335 ret = fd_pipe_get_param(dev.pipe, FD_CHIP_ID, &val);
336 if (ret) {
337 err(1, "could not get gpu-id");
338 }
339 dev.chipid = val;
340
341 #define CHIP_FMT "d%d%d.%d"
342 #define CHIP_ARGS(chipid) \
343 ((chipid) >> 24) & 0xff, \
344 ((chipid) >> 16) & 0xff, \
345 ((chipid) >> 8) & 0xff, \
346 ((chipid) >> 0) & 0xff
347 printf("device: a%"CHIP_FMT"\n", CHIP_ARGS(dev.chipid));
348
349 b = buf = readdt("reg");
350
351 if (dev.address_cells == 2) {
352 uint32_t u[2] = { ntohl(buf[0]), ntohl(buf[1]) };
353 dev.base = (((uint64_t)u[0]) << 32) | u[1];
354 buf += 2;
355 } else {
356 dev.base = ntohl(buf[0]);
357 buf += 1;
358 }
359
360 if (dev.size_cells == 2) {
361 uint32_t u[2] = { ntohl(buf[0]), ntohl(buf[1]) };
362 dev.size = (((uint64_t)u[0]) << 32) | u[1];
363 buf += 2;
364 } else {
365 dev.size = ntohl(buf[0]);
366 buf += 1;
367 }
368
369 free(b);
370
371 printf("i/o region at %08lx (size: %x)\n", dev.base, dev.size);
372
373 /* try MAX_FREQ first as that will work regardless of old dt
374 * dt bindings vs upstream bindings:
375 */
376 ret = fd_pipe_get_param(dev.pipe, FD_MAX_FREQ, &val);
377 if (ret) {
378 printf("falling back to parsing DT bindings for freq\n");
379 find_freqs();
380 } else {
381 dev.min_freq = 0;
382 dev.max_freq = val;
383 }
384
385 printf("min_freq=%u, max_freq=%u\n", dev.min_freq, dev.max_freq);
386
387 fd = open("/dev/mem", O_RDWR | O_SYNC);
388 if (fd < 0)
389 err(1, "could not open /dev/mem");
390
391 dev.io = mmap(0, dev.size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, dev.base);
392 if (!dev.io)
393 err(1, "could not map device");
394 }
395
396 /*
397 * perf-monitor
398 */
399
400 static void
401 flush_ring(void)
402 {
403 int ret;
404
405 if (!dev.submit)
406 return;
407
408 ret = fd_submit_flush(dev.submit, -1, NULL, NULL);
409 if (ret)
410 errx(1, "submit failed: %d", ret);
411 fd_ringbuffer_del(dev.ring);
412 fd_submit_del(dev.submit);
413
414 dev.ring = NULL;
415 dev.submit = NULL;
416 }
417
418 static void
419 select_counter(struct counter_group *group, int ctr, int n)
420 {
421 assert(n < group->group->num_countables);
422 assert(ctr < group->group->num_counters);
423
424 group->label[ctr] = group->group->countables[n].name;
425 group->counter[ctr].select_val = n;
426
427 if (!dev.submit) {
428 dev.submit = fd_submit_new(dev.pipe);
429 dev.ring = fd_submit_new_ringbuffer(dev.submit, 0x1000,
430 FD_RINGBUFFER_PRIMARY | FD_RINGBUFFER_GROWABLE);
431 }
432
433 /* bashing select register directly while gpu is active will end
434 * in tears.. so we need to write it via the ring:
435 *
436 * TODO it would help startup time, if gpu is loaded, to batch
437 * all the initial writes and do a single flush.. although that
438 * makes things more complicated for capturing inital sample value
439 */
440 struct fd_ringbuffer *ring = dev.ring;
441 switch (dev.chipid >> 24) {
442 case 3:
443 case 4:
444 OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
445 OUT_RING(ring, 0x00000000);
446
447 if (group->group->counters[ctr].enable) {
448 OUT_PKT0(ring, group->group->counters[ctr].enable, 1);
449 OUT_RING(ring, 0);
450 }
451
452 if (group->group->counters[ctr].clear) {
453 OUT_PKT0(ring, group->group->counters[ctr].clear, 1);
454 OUT_RING(ring, 1);
455
456 OUT_PKT0(ring, group->group->counters[ctr].clear, 1);
457 OUT_RING(ring, 0);
458 }
459
460 OUT_PKT0(ring, group->group->counters[ctr].select_reg, 1);
461 OUT_RING(ring, n);
462
463 if (group->group->counters[ctr].enable) {
464 OUT_PKT0(ring, group->group->counters[ctr].enable, 1);
465 OUT_RING(ring, 1);
466 }
467
468 break;
469 case 5:
470 case 6:
471 OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
472
473 if (group->group->counters[ctr].enable) {
474 OUT_PKT4(ring, group->group->counters[ctr].enable, 1);
475 OUT_RING(ring, 0);
476 }
477
478 if (group->group->counters[ctr].clear) {
479 OUT_PKT4(ring, group->group->counters[ctr].clear, 1);
480 OUT_RING(ring, 1);
481
482 OUT_PKT4(ring, group->group->counters[ctr].clear, 1);
483 OUT_RING(ring, 0);
484 }
485
486 OUT_PKT4(ring, group->group->counters[ctr].select_reg, 1);
487 OUT_RING(ring, n);
488
489 if (group->group->counters[ctr].enable) {
490 OUT_PKT4(ring, group->group->counters[ctr].enable, 1);
491 OUT_RING(ring, 1);
492 }
493
494 break;
495 }
496
497 group->last[ctr] = *group->counter[ctr].val_lo;
498 group->stime[ctr] = gettime_us();
499 }
500
501 static void
502 resample_counter(struct counter_group *group, int ctr)
503 {
504 uint32_t val = *group->counter[ctr].val_lo;
505 uint32_t t = gettime_us();
506 uint32_t dt = delta(group->stime[ctr], t);
507 uint32_t dval = delta(group->last[ctr], val);
508 group->current[ctr] = (float)dval * 1000000.0 / (float)dt;
509 group->last[ctr] = val;
510 group->stime[ctr] = t;
511 }
512
513 #define REFRESH_MS 500
514
515 /* sample all the counters: */
516 static void
517 resample(void)
518 {
519 static uint64_t last_time;
520 uint64_t current_time = gettime_us();
521
522 if ((current_time - last_time) < (REFRESH_MS * 1000 / 2))
523 return;
524
525 last_time = current_time;
526
527 for (unsigned i = 0; i < dev.ngroups; i++) {
528 struct counter_group *group = &dev.groups[i];
529 for (unsigned j = 0; j < group->group->num_counters; j++) {
530 resample_counter(group, j);
531 }
532 }
533 }
534
535 /*
536 * The UI
537 */
538
539 #define COLOR_GROUP_HEADER 1
540 #define COLOR_FOOTER 2
541 #define COLOR_INVERSE 3
542
543 static int w, h;
544 static int ctr_width;
545 static int max_rows, current_cntr = 1;
546
547 static void
548 redraw_footer(WINDOW *win)
549 {
550 char *footer;
551 int n;
552
553 n = asprintf(&footer, " fdperf: a%"CHIP_FMT" (%.2fMHz..%.2fMHz)",
554 CHIP_ARGS(dev.chipid),
555 ((float)dev.min_freq) / 1000000.0,
556 ((float)dev.max_freq) / 1000000.0);
557
558 wmove(win, h - 1, 0);
559 wattron(win, COLOR_PAIR(COLOR_FOOTER));
560 waddstr(win, footer);
561 whline(win, ' ', w - n);
562 wattroff(win, COLOR_PAIR(COLOR_FOOTER));
563
564 free(footer);
565 }
566
567 static void
568 redraw_group_header(WINDOW *win, int row, const char *name)
569 {
570 wmove(win, row, 0);
571 wattron(win, A_BOLD);
572 wattron(win, COLOR_PAIR(COLOR_GROUP_HEADER));
573 waddstr(win, name);
574 whline(win, ' ', w - strlen(name));
575 wattroff(win, COLOR_PAIR(COLOR_GROUP_HEADER));
576 wattroff(win, A_BOLD);
577 }
578
579 static void
580 redraw_counter_label(WINDOW *win, int row, const char *name, bool selected)
581 {
582 int n = strlen(name);
583 assert(n <= ctr_width);
584 wmove(win, row, 0);
585 whline(win, ' ', ctr_width - n);
586 wmove(win, row, ctr_width - n);
587 if (selected)
588 wattron(win, COLOR_PAIR(COLOR_INVERSE));
589 waddstr(win, name);
590 if (selected)
591 wattroff(win, COLOR_PAIR(COLOR_INVERSE));
592 waddstr(win, ": ");
593 }
594
595 static void
596 redraw_counter_value_cycles(WINDOW *win, float val)
597 {
598 char *str;
599 int x = getcurx(win);
600 int valwidth = w - x;
601 int barwidth, n;
602
603 /* convert to fraction of max freq: */
604 val = val / (float)dev.max_freq;
605
606 /* figure out percentage-bar width: */
607 barwidth = (int)(val * valwidth);
608
609 /* sometimes things go over 100%.. idk why, could be
610 * things running faster than base clock, or counter
611 * summing up cycles in multiple cores?
612 */
613 barwidth = MIN2(barwidth, valwidth - 1);
614
615 n = asprintf(&str, "%.2f%%", 100.0 * val);
616 wattron(win, COLOR_PAIR(COLOR_INVERSE));
617 waddnstr(win, str, barwidth);
618 if (barwidth > n) {
619 whline(win, ' ', barwidth - n);
620 wmove(win, getcury(win), x + barwidth);
621 }
622 wattroff(win, COLOR_PAIR(COLOR_INVERSE));
623 if (barwidth < n)
624 waddstr(win, str + barwidth);
625 whline(win, ' ', w - getcurx(win));
626
627 free(str);
628 }
629
630 static void
631 redraw_counter_value_raw(WINDOW *win, float val)
632 {
633 char *str;
634 asprintf(&str, "%'.2f", val);
635 waddstr(win, str);
636 whline(win, ' ', w - getcurx(win));
637 free(str);
638 }
639
640 static void
641 redraw_counter(WINDOW *win, int row, struct counter_group *group,
642 int ctr, bool selected)
643 {
644 redraw_counter_label(win, row, group->label[ctr], selected);
645
646 /* quick hack, if the label has "CYCLE" in the name, it is
647 * probably a cycle counter ;-)
648 * Perhaps add more info in rnndb schema to know how to
649 * treat individual counters (ie. which are cycles, and
650 * for those we want to present as a percentage do we
651 * need to scale the result.. ie. is it running at some
652 * multiple or divisor of core clk, etc)
653 *
654 * TODO it would be much more clever to get this from xml
655 * Also.. in some cases I think we want to know how many
656 * units the counter is counting for, ie. if a320 has 2x
657 * shader as a306 we might need to scale the result..
658 */
659 if (strstr(group->label[ctr], "CYCLE") ||
660 strstr(group->label[ctr], "BUSY") ||
661 strstr(group->label[ctr], "IDLE"))
662 redraw_counter_value_cycles(win, group->current[ctr]);
663 else
664 redraw_counter_value_raw(win, group->current[ctr]);
665 }
666
667 static void
668 redraw(WINDOW *win)
669 {
670 static int scroll = 0;
671 int max, row = 0;
672
673 w = getmaxx(win);
674 h = getmaxy(win);
675
676 max = h - 3;
677
678 if ((current_cntr - scroll) > (max - 1)) {
679 scroll = current_cntr - (max - 1);
680 } else if ((current_cntr - 1) < scroll) {
681 scroll = current_cntr - 1;
682 }
683
684 for (unsigned i = 0; i < dev.ngroups; i++) {
685 struct counter_group *group = &dev.groups[i];
686 unsigned j = 0;
687
688 /* NOTE skip CP the first CP counter */
689 if (i == 0)
690 j++;
691
692 if (j < group->group->num_counters) {
693 if ((scroll <= row) && ((row - scroll) < max))
694 redraw_group_header(win, row - scroll, group->group->name);
695 row++;
696 }
697
698 for (; j < group->group->num_counters; j++) {
699 if ((scroll <= row) && ((row - scroll) < max))
700 redraw_counter(win, row - scroll, group, j, row == current_cntr);
701 row++;
702 }
703 }
704
705 /* convert back to physical (unscrolled) offset: */
706 row = max;
707
708 redraw_group_header(win, row, "Status");
709 row++;
710
711 /* Draw GPU freq row: */
712 redraw_counter_label(win, row, "Freq (MHz)", false);
713 redraw_counter_value_raw(win, dev.groups[0].current[0] / 1000000.0);
714 row++;
715
716 redraw_footer(win);
717
718 refresh();
719 }
720
721 static struct counter_group *
722 current_counter(int *ctr)
723 {
724 int n = 0;
725
726 for (unsigned i = 0; i < dev.ngroups; i++) {
727 struct counter_group *group = &dev.groups[i];
728 unsigned j = 0;
729
730 /* NOTE skip the first CP counter (CP_ALWAYS_COUNT) */
731 if (i == 0)
732 j++;
733
734 /* account for group header: */
735 if (j < group->group->num_counters) {
736 /* cannot select group header.. return null to indicate this
737 * main_ui():
738 */
739 if (n == current_cntr)
740 return NULL;
741 n++;
742 }
743
744
745 for (; j < group->group->num_counters; j++) {
746 if (n == current_cntr) {
747 if (ctr)
748 *ctr = j;
749 return group;
750 }
751 n++;
752 }
753 }
754
755 assert(0);
756 return NULL;
757 }
758
759 static void
760 counter_dialog(void)
761 {
762 WINDOW *dialog;
763 struct counter_group *group;
764 int cnt, current = 0, scroll;
765
766 /* figure out dialog size: */
767 int dh = h/2;
768 int dw = ctr_width + 2;
769
770 group = current_counter(&cnt);
771
772 /* find currently selected idx (note there can be discontinuities
773 * so the selected value does not map 1:1 to current idx)
774 */
775 uint32_t selected = group->counter[cnt].select_val;
776 for (int i = 0; i < group->group->num_countables; i++) {
777 if (group->group->countables[i].selector == selected) {
778 current = i;
779 break;
780 }
781 }
782
783 /* scrolling offset, if dialog is too small for all the choices: */
784 scroll = 0;
785
786 dialog = newwin(dh, dw, (h-dh)/2, (w-dw)/2);
787 box(dialog, 0, 0);
788 wrefresh(dialog);
789 keypad(dialog, TRUE);
790
791 while (true) {
792 int max = MIN2(dh - 2, group->group->num_countables);
793 int selector = -1;
794
795 if ((current - scroll) >= (dh - 3)) {
796 scroll = current - (dh - 3);
797 } else if (current < scroll) {
798 scroll = current;
799 }
800
801 for (int i = 0; i < max; i++) {
802 int n = scroll + i;
803 wmove(dialog, i+1, 1);
804 if (n == current) {
805 assert (n < group->group->num_countables);
806 selector = group->group->countables[n].selector;
807 wattron(dialog, COLOR_PAIR(COLOR_INVERSE));
808 }
809 if (n < group->group->num_countables)
810 waddstr(dialog, group->group->countables[n].name);
811 whline(dialog, ' ', dw - getcurx(dialog) - 1);
812 if (n == current)
813 wattroff(dialog, COLOR_PAIR(COLOR_INVERSE));
814 }
815
816 assert (selector >= 0);
817
818 switch (wgetch(dialog)) {
819 case KEY_UP:
820 current = MAX2(0, current - 1);
821 break;
822 case KEY_DOWN:
823 current = MIN2(group->group->num_countables - 1, current + 1);
824 break;
825 case KEY_LEFT:
826 case KEY_ENTER:
827 /* select new sampler */
828 select_counter(group, cnt, selector);
829 flush_ring();
830 config_save();
831 goto out;
832 case 'q':
833 goto out;
834 default:
835 /* ignore */
836 break;
837 }
838
839 resample();
840 }
841
842 out:
843 wborder(dialog, ' ', ' ', ' ',' ',' ',' ',' ',' ');
844 delwin(dialog);
845 }
846
847 static void
848 scroll_cntr(int amount)
849 {
850 if (amount < 0) {
851 current_cntr = MAX2(1, current_cntr + amount);
852 if (current_counter(NULL) == NULL) {
853 current_cntr = MAX2(1, current_cntr - 1);
854 }
855 } else {
856 current_cntr = MIN2(max_rows - 1, current_cntr + amount);
857 if (current_counter(NULL) == NULL)
858 current_cntr = MIN2(max_rows - 1, current_cntr + 1);
859 }
860 }
861
862 static void
863 main_ui(void)
864 {
865 WINDOW *mainwin;
866
867 /* curses setup: */
868 mainwin = initscr();
869 if (!mainwin)
870 goto out;
871
872 cbreak();
873 wtimeout(mainwin, REFRESH_MS);
874 noecho();
875 keypad(mainwin, TRUE);
876 curs_set(0);
877 start_color();
878 init_pair(COLOR_GROUP_HEADER, COLOR_WHITE, COLOR_GREEN);
879 init_pair(COLOR_FOOTER, COLOR_WHITE, COLOR_BLUE);
880 init_pair(COLOR_INVERSE, COLOR_BLACK, COLOR_WHITE);
881
882 while (true) {
883 switch (wgetch(mainwin)) {
884 case KEY_UP:
885 scroll_cntr(-1);
886 break;
887 case KEY_DOWN:
888 scroll_cntr(+1);
889 break;
890 case KEY_NPAGE: /* page-down */
891 /* TODO figure out # of rows visible? */
892 scroll_cntr(+15);
893 break;
894 case KEY_PPAGE: /* page-up */
895 /* TODO figure out # of rows visible? */
896 scroll_cntr(-15);
897 break;
898 case KEY_RIGHT:
899 counter_dialog();
900 break;
901 case 'q':
902 goto out;
903 break;
904 default:
905 /* ignore */
906 break;
907 }
908 resample();
909 redraw(mainwin);
910 }
911
912 /* restore settings.. maybe we need an atexit()??*/
913 out:
914 delwin(mainwin);
915 endwin();
916 refresh();
917 }
918
919 static void
920 setup_counter_groups(const struct fd_perfcntr_group *groups)
921 {
922 for (unsigned i = 0; i < dev.ngroups; i++) {
923 struct counter_group *group = &dev.groups[i];
924
925 group->group = &groups[i];
926
927 max_rows += group->group->num_counters + 1;
928
929 /* the first CP counter is hidden: */
930 if (i == 0) {
931 max_rows--;
932 if (group->group->num_counters <= 1)
933 max_rows--;
934 }
935
936 for (unsigned j = 0; j < group->group->num_counters; j++) {
937 group->counter[j].counter = &group->group->counters[j];
938
939 group->counter[j].val_hi = dev.io + (group->counter[j].counter->counter_reg_hi * 4);
940 group->counter[j].val_lo = dev.io + (group->counter[j].counter->counter_reg_lo * 4);
941
942 select_counter(group, j, j);
943 }
944
945 for (unsigned j = 0; j < group->group->num_countables; j++) {
946 ctr_width = MAX2(ctr_width, strlen(group->group->countables[j].name) + 1);
947 }
948 }
949 }
950
951 /*
952 * configuration / persistence
953 */
954
955 static config_t cfg;
956 static config_setting_t *setting;
957
958 static void
959 config_save(void)
960 {
961 for (unsigned i = 0; i < dev.ngroups; i++) {
962 struct counter_group *group = &dev.groups[i];
963 unsigned j = 0;
964
965 /* NOTE skip CP the first CP counter */
966 if (i == 0)
967 j++;
968
969 config_setting_t *sect =
970 config_setting_get_member(setting, group->group->name);
971
972 for (; j < group->group->num_counters; j++) {
973 char name[] = "counter0000";
974 sprintf(name, "counter%d", j);
975 config_setting_t *s =
976 config_setting_lookup(sect, name);
977 config_setting_set_int(s, group->counter[j].select_val);
978 }
979 }
980
981 config_write_file(&cfg, "fdperf.cfg");
982 }
983
984 static void
985 config_restore(void)
986 {
987 char *str;
988
989 config_init(&cfg);
990
991 /* Read the file. If there is an error, report it and exit. */
992 if(!config_read_file(&cfg, "fdperf.cfg")) {
993 warn("could not restore settings");
994 }
995
996 config_setting_t *root = config_root_setting(&cfg);
997
998 /* per device settings: */
999 asprintf(&str, "a%dxx", dev.chipid >> 24);
1000 setting = config_setting_get_member(root, str);
1001 if (!setting)
1002 setting = config_setting_add(root, str, CONFIG_TYPE_GROUP);
1003 free(str);
1004
1005 for (unsigned i = 0; i < dev.ngroups; i++) {
1006 struct counter_group *group = &dev.groups[i];
1007 unsigned j = 0;
1008
1009 /* NOTE skip CP the first CP counter */
1010 if (i == 0)
1011 j++;
1012
1013 config_setting_t *sect =
1014 config_setting_get_member(setting, group->group->name);
1015
1016 if (!sect) {
1017 sect = config_setting_add(setting, group->group->name,
1018 CONFIG_TYPE_GROUP);
1019 }
1020
1021 for (; j < group->group->num_counters; j++) {
1022 char name[] = "counter0000";
1023 sprintf(name, "counter%d", j);
1024 config_setting_t *s = config_setting_lookup(sect, name);
1025 if (!s) {
1026 config_setting_add(sect, name, CONFIG_TYPE_INT);
1027 continue;
1028 }
1029 select_counter(group, j, config_setting_get_int(s));
1030 }
1031 }
1032 }
1033
1034 /*
1035 * main
1036 */
1037
1038 int
1039 main(int argc, char **argv)
1040 {
1041 find_device();
1042
1043 const struct fd_perfcntr_group *groups;
1044 groups = fd_perfcntrs((dev.chipid >> 24) * 100, &dev.ngroups);
1045 if (!groups) {
1046 errx(1, "no perfcntr support");
1047 }
1048
1049 dev.groups = calloc(dev.ngroups, sizeof(struct counter_group));
1050
1051 setup_counter_groups(groups);
1052 config_restore();
1053 flush_ring();
1054
1055 main_ui();
1056
1057 return 0;
1058 }