freedreno/perfcntrs/fdperf: fix u64 print on 32-bit builds
[mesa.git] / src / freedreno / perfcntrs / fdperf.c
1 /*
2 * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 #include <arpa/inet.h>
26 #include <assert.h>
27 #include <ctype.h>
28 #include <err.h>
29 #include <fcntl.h>
30 #include <ftw.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <stdint.h>
35 #include <sys/types.h>
36 #include <sys/stat.h>
37 #include <sys/mman.h>
38 #include <time.h>
39 #include <unistd.h>
40 #include <curses.h>
41 #include <libconfig.h>
42 #include <inttypes.h>
43
44 #include "drm/freedreno_drmif.h"
45 #include "drm/freedreno_ringbuffer.h"
46
47 #include "freedreno_perfcntr.h"
48
49 #define MAX_CNTR_PER_GROUP 24
50
51 /* NOTE first counter group should always be CP, since we unconditionally
52 * use CP counter to measure the gpu freq.
53 */
54
55 struct counter_group {
56 const struct fd_perfcntr_group *group;
57
58 struct {
59 const struct fd_perfcntr_counter *counter;
60 uint16_t select_val;
61 volatile uint32_t *val_hi;
62 volatile uint32_t *val_lo;
63 } counter[MAX_CNTR_PER_GROUP];
64
65 /* last sample time: */
66 uint32_t stime[MAX_CNTR_PER_GROUP];
67 /* for now just care about the low 32b value.. at least then we don't
68 * have to really care that we can't sample both hi and lo regs at the
69 * same time:
70 */
71 uint32_t last[MAX_CNTR_PER_GROUP];
72 /* current value, ie. by how many did the counter increase in last
73 * sampling period divided by the sampling period:
74 */
75 float current[MAX_CNTR_PER_GROUP];
76 /* name of currently selected counters (for UI): */
77 const char *label[MAX_CNTR_PER_GROUP];
78 };
79
80 static struct {
81 char *dtnode;
82 int address_cells, size_cells;
83 uint64_t base;
84 uint32_t size;
85 void *io;
86 uint32_t chipid;
87 uint32_t min_freq;
88 uint32_t max_freq;
89 /* per-generation table of counters: */
90 unsigned ngroups;
91 struct counter_group *groups;
92 /* drm device (for writing select regs via ring): */
93 struct fd_device *dev;
94 struct fd_pipe *pipe;
95 struct fd_submit *submit;
96 struct fd_ringbuffer *ring;
97 } dev;
98
99 static void config_save(void);
100 static void config_restore(void);
101 static void restore_counter_groups(void);
102
103 /*
104 * helpers
105 */
106
107 #define CHUNKSIZE 32
108
109 static void *
110 readfile(const char *path, int *sz)
111 {
112 char *buf = NULL;
113 int fd, ret, n = 0;
114
115 fd = open(path, O_RDONLY);
116 if (fd < 0)
117 return NULL;
118
119 while (1) {
120 buf = realloc(buf, n + CHUNKSIZE);
121 ret = read(fd, buf + n, CHUNKSIZE);
122 if (ret < 0) {
123 free(buf);
124 *sz = 0;
125 return NULL;
126 } else if (ret < CHUNKSIZE) {
127 n += ret;
128 *sz = n;
129 return buf;
130 } else {
131 n += CHUNKSIZE;
132 }
133 }
134 }
135
136 static uint32_t
137 gettime_us(void)
138 {
139 struct timespec ts;
140 clock_gettime(CLOCK_MONOTONIC, &ts);
141 return (ts.tv_sec * 1000000) + (ts.tv_nsec / 1000);
142 }
143
144 static uint32_t
145 delta(uint32_t a, uint32_t b)
146 {
147 /* deal with rollover: */
148 if (a > b)
149 return 0xffffffff - a + b;
150 else
151 return b - a;
152 }
153
154 /*
155 * TODO de-duplicate OUT_RING() and friends
156 */
157
158 #define CP_WAIT_FOR_IDLE 38
159 #define CP_TYPE0_PKT 0x00000000
160 #define CP_TYPE3_PKT 0xc0000000
161 #define CP_TYPE4_PKT 0x40000000
162 #define CP_TYPE7_PKT 0x70000000
163
164 static inline void
165 OUT_RING(struct fd_ringbuffer *ring, uint32_t data)
166 {
167 *(ring->cur++) = data;
168 }
169
170 static inline void
171 OUT_PKT0(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
172 {
173 OUT_RING(ring, CP_TYPE0_PKT | ((cnt-1) << 16) | (regindx & 0x7FFF));
174 }
175
176 static inline void
177 OUT_PKT3(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
178 {
179 OUT_RING(ring, CP_TYPE3_PKT | ((cnt-1) << 16) | ((opcode & 0xFF) << 8));
180 }
181
182
183 /*
184 * Starting with a5xx, pkt4/pkt7 are used instead of pkt0/pkt3
185 */
186
187 static inline unsigned
188 _odd_parity_bit(unsigned val)
189 {
190 /* See: http://graphics.stanford.edu/~seander/bithacks.html#ParityParallel
191 * note that we want odd parity so 0x6996 is inverted.
192 */
193 val ^= val >> 16;
194 val ^= val >> 8;
195 val ^= val >> 4;
196 val &= 0xf;
197 return (~0x6996 >> val) & 1;
198 }
199
200 static inline void
201 OUT_PKT4(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
202 {
203 OUT_RING(ring, CP_TYPE4_PKT | cnt |
204 (_odd_parity_bit(cnt) << 7) |
205 ((regindx & 0x3ffff) << 8) |
206 ((_odd_parity_bit(regindx) << 27)));
207 }
208
209 static inline void
210 OUT_PKT7(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
211 {
212 OUT_RING(ring, CP_TYPE7_PKT | cnt |
213 (_odd_parity_bit(cnt) << 15) |
214 ((opcode & 0x7f) << 16) |
215 ((_odd_parity_bit(opcode) << 23)));
216 }
217
218 /*
219 * code to find stuff in /proc/device-tree:
220 *
221 * NOTE: if we sampled the counters from the cmdstream, we could avoid needing
222 * /dev/mem and /proc/device-tree crawling. OTOH when the GPU is heavily loaded
223 * we would be competing with whatever else is using the GPU.
224 */
225
226 static void *
227 readdt(const char *node)
228 {
229 char *path;
230 void *buf;
231 int sz;
232
233 asprintf(&path, "%s/%s", dev.dtnode, node);
234 buf = readfile(path, &sz);
235 free(path);
236
237 return buf;
238 }
239
240 static int
241 find_freqs_fn(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf)
242 {
243 const char *fname = fpath + ftwbuf->base;
244 int sz;
245
246 if (strcmp(fname, "qcom,gpu-freq") == 0) {
247 uint32_t *buf = readfile(fpath, &sz);
248 uint32_t freq = ntohl(buf[0]);
249 free(buf);
250 dev.max_freq = MAX2(dev.max_freq, freq);
251 dev.min_freq = MIN2(dev.min_freq, freq);
252 }
253
254 return 0;
255 }
256
257 static void
258 find_freqs(void)
259 {
260 char *path;
261 int ret;
262
263 dev.min_freq = ~0;
264 dev.max_freq = 0;
265
266 asprintf(&path, "%s/%s", dev.dtnode, "qcom,gpu-pwrlevels");
267
268 ret = nftw(path, find_freqs_fn, 64, 0);
269 if (ret < 0)
270 err(1, "could not find power levels");
271
272 free(path);
273 }
274
275 static int
276 find_device_fn(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf)
277 {
278 const char *fname = fpath + ftwbuf->base;
279 int sz;
280
281 if (strcmp(fname, "compatible") == 0) {
282 char *str = readfile(fpath, &sz);
283 if ((strcmp(str, "qcom,adreno-3xx") == 0) ||
284 (strcmp(str, "qcom,kgsl-3d0") == 0) ||
285 (strstr(str, "qcom,adreno") == str)) {
286 int dlen = strlen(fpath) - strlen("/compatible");
287 dev.dtnode = malloc(dlen + 1);
288 memcpy(dev.dtnode, fpath, dlen);
289 printf("found dt node: %s\n", dev.dtnode);
290
291 char buf[dlen + sizeof("/../#address-cells") + 1];
292 int sz, *val;
293
294 sprintf(buf, "%s/../#address-cells", dev.dtnode);
295 val = readfile(buf, &sz);
296 dev.address_cells = ntohl(*val);
297 free(val);
298
299 sprintf(buf, "%s/../#size-cells", dev.dtnode);
300 val = readfile(buf, &sz);
301 dev.size_cells = ntohl(*val);
302 free(val);
303
304 printf("#address-cells=%d, #size-cells=%d\n",
305 dev.address_cells, dev.size_cells);
306 }
307 free(str);
308 }
309 if (dev.dtnode) {
310 /* we found it! */
311 return 1;
312 }
313 return 0;
314 }
315
316 static void
317 find_device(void)
318 {
319 int ret, fd;
320 uint32_t *buf, *b;
321
322 ret = nftw("/proc/device-tree/", find_device_fn, 64, 0);
323 if (ret < 0)
324 err(1, "could not find adreno gpu");
325
326 if (!dev.dtnode)
327 errx(1, "could not find qcom,adreno-3xx node");
328
329 fd = open("/dev/dri/card0", O_RDWR);
330 if (fd < 0)
331 err(1, "could not open drm device");
332
333 dev.dev = fd_device_new(fd);
334 dev.pipe = fd_pipe_new(dev.dev, FD_PIPE_3D);
335
336 uint64_t val;
337 ret = fd_pipe_get_param(dev.pipe, FD_CHIP_ID, &val);
338 if (ret) {
339 err(1, "could not get gpu-id");
340 }
341 dev.chipid = val;
342
343 #define CHIP_FMT "d%d%d.%d"
344 #define CHIP_ARGS(chipid) \
345 ((chipid) >> 24) & 0xff, \
346 ((chipid) >> 16) & 0xff, \
347 ((chipid) >> 8) & 0xff, \
348 ((chipid) >> 0) & 0xff
349 printf("device: a%"CHIP_FMT"\n", CHIP_ARGS(dev.chipid));
350
351 b = buf = readdt("reg");
352
353 if (dev.address_cells == 2) {
354 uint32_t u[2] = { ntohl(buf[0]), ntohl(buf[1]) };
355 dev.base = (((uint64_t)u[0]) << 32) | u[1];
356 buf += 2;
357 } else {
358 dev.base = ntohl(buf[0]);
359 buf += 1;
360 }
361
362 if (dev.size_cells == 2) {
363 uint32_t u[2] = { ntohl(buf[0]), ntohl(buf[1]) };
364 dev.size = (((uint64_t)u[0]) << 32) | u[1];
365 buf += 2;
366 } else {
367 dev.size = ntohl(buf[0]);
368 buf += 1;
369 }
370
371 free(b);
372
373 printf("i/o region at %08"PRIu64" (size: %x)\n", dev.base, dev.size);
374
375 /* try MAX_FREQ first as that will work regardless of old dt
376 * dt bindings vs upstream bindings:
377 */
378 ret = fd_pipe_get_param(dev.pipe, FD_MAX_FREQ, &val);
379 if (ret) {
380 printf("falling back to parsing DT bindings for freq\n");
381 find_freqs();
382 } else {
383 dev.min_freq = 0;
384 dev.max_freq = val;
385 }
386
387 printf("min_freq=%u, max_freq=%u\n", dev.min_freq, dev.max_freq);
388
389 fd = open("/dev/mem", O_RDWR | O_SYNC);
390 if (fd < 0)
391 err(1, "could not open /dev/mem");
392
393 dev.io = mmap(0, dev.size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, dev.base);
394 if (!dev.io)
395 err(1, "could not map device");
396 }
397
398 /*
399 * perf-monitor
400 */
401
402 static void
403 flush_ring(void)
404 {
405 int ret;
406
407 if (!dev.submit)
408 return;
409
410 ret = fd_submit_flush(dev.submit, -1, NULL, NULL);
411 if (ret)
412 errx(1, "submit failed: %d", ret);
413 fd_ringbuffer_del(dev.ring);
414 fd_submit_del(dev.submit);
415
416 dev.ring = NULL;
417 dev.submit = NULL;
418 }
419
420 static void
421 select_counter(struct counter_group *group, int ctr, int n)
422 {
423 assert(n < group->group->num_countables);
424 assert(ctr < group->group->num_counters);
425
426 group->label[ctr] = group->group->countables[n].name;
427 group->counter[ctr].select_val = n;
428
429 if (!dev.submit) {
430 dev.submit = fd_submit_new(dev.pipe);
431 dev.ring = fd_submit_new_ringbuffer(dev.submit, 0x1000,
432 FD_RINGBUFFER_PRIMARY | FD_RINGBUFFER_GROWABLE);
433 }
434
435 /* bashing select register directly while gpu is active will end
436 * in tears.. so we need to write it via the ring:
437 *
438 * TODO it would help startup time, if gpu is loaded, to batch
439 * all the initial writes and do a single flush.. although that
440 * makes things more complicated for capturing inital sample value
441 */
442 struct fd_ringbuffer *ring = dev.ring;
443 switch (dev.chipid >> 24) {
444 case 3:
445 case 4:
446 OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
447 OUT_RING(ring, 0x00000000);
448
449 if (group->group->counters[ctr].enable) {
450 OUT_PKT0(ring, group->group->counters[ctr].enable, 1);
451 OUT_RING(ring, 0);
452 }
453
454 if (group->group->counters[ctr].clear) {
455 OUT_PKT0(ring, group->group->counters[ctr].clear, 1);
456 OUT_RING(ring, 1);
457
458 OUT_PKT0(ring, group->group->counters[ctr].clear, 1);
459 OUT_RING(ring, 0);
460 }
461
462 OUT_PKT0(ring, group->group->counters[ctr].select_reg, 1);
463 OUT_RING(ring, n);
464
465 if (group->group->counters[ctr].enable) {
466 OUT_PKT0(ring, group->group->counters[ctr].enable, 1);
467 OUT_RING(ring, 1);
468 }
469
470 break;
471 case 5:
472 case 6:
473 OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
474
475 if (group->group->counters[ctr].enable) {
476 OUT_PKT4(ring, group->group->counters[ctr].enable, 1);
477 OUT_RING(ring, 0);
478 }
479
480 if (group->group->counters[ctr].clear) {
481 OUT_PKT4(ring, group->group->counters[ctr].clear, 1);
482 OUT_RING(ring, 1);
483
484 OUT_PKT4(ring, group->group->counters[ctr].clear, 1);
485 OUT_RING(ring, 0);
486 }
487
488 OUT_PKT4(ring, group->group->counters[ctr].select_reg, 1);
489 OUT_RING(ring, n);
490
491 if (group->group->counters[ctr].enable) {
492 OUT_PKT4(ring, group->group->counters[ctr].enable, 1);
493 OUT_RING(ring, 1);
494 }
495
496 break;
497 }
498
499 group->last[ctr] = *group->counter[ctr].val_lo;
500 group->stime[ctr] = gettime_us();
501 }
502
503 static void
504 resample_counter(struct counter_group *group, int ctr)
505 {
506 uint32_t val = *group->counter[ctr].val_lo;
507 uint32_t t = gettime_us();
508 uint32_t dt = delta(group->stime[ctr], t);
509 uint32_t dval = delta(group->last[ctr], val);
510 group->current[ctr] = (float)dval * 1000000.0 / (float)dt;
511 group->last[ctr] = val;
512 group->stime[ctr] = t;
513 }
514
515 #define REFRESH_MS 500
516
517 /* sample all the counters: */
518 static void
519 resample(void)
520 {
521 static uint64_t last_time;
522 uint64_t current_time = gettime_us();
523
524 if ((current_time - last_time) < (REFRESH_MS * 1000 / 2))
525 return;
526
527 last_time = current_time;
528
529 for (unsigned i = 0; i < dev.ngroups; i++) {
530 struct counter_group *group = &dev.groups[i];
531 for (unsigned j = 0; j < group->group->num_counters; j++) {
532 resample_counter(group, j);
533 }
534 }
535 }
536
537 /*
538 * The UI
539 */
540
541 #define COLOR_GROUP_HEADER 1
542 #define COLOR_FOOTER 2
543 #define COLOR_INVERSE 3
544
545 static int w, h;
546 static int ctr_width;
547 static int max_rows, current_cntr = 1;
548
549 static void
550 redraw_footer(WINDOW *win)
551 {
552 char *footer;
553 int n;
554
555 n = asprintf(&footer, " fdperf: a%"CHIP_FMT" (%.2fMHz..%.2fMHz)",
556 CHIP_ARGS(dev.chipid),
557 ((float)dev.min_freq) / 1000000.0,
558 ((float)dev.max_freq) / 1000000.0);
559
560 wmove(win, h - 1, 0);
561 wattron(win, COLOR_PAIR(COLOR_FOOTER));
562 waddstr(win, footer);
563 whline(win, ' ', w - n);
564 wattroff(win, COLOR_PAIR(COLOR_FOOTER));
565
566 free(footer);
567 }
568
569 static void
570 redraw_group_header(WINDOW *win, int row, const char *name)
571 {
572 wmove(win, row, 0);
573 wattron(win, A_BOLD);
574 wattron(win, COLOR_PAIR(COLOR_GROUP_HEADER));
575 waddstr(win, name);
576 whline(win, ' ', w - strlen(name));
577 wattroff(win, COLOR_PAIR(COLOR_GROUP_HEADER));
578 wattroff(win, A_BOLD);
579 }
580
581 static void
582 redraw_counter_label(WINDOW *win, int row, const char *name, bool selected)
583 {
584 int n = strlen(name);
585 assert(n <= ctr_width);
586 wmove(win, row, 0);
587 whline(win, ' ', ctr_width - n);
588 wmove(win, row, ctr_width - n);
589 if (selected)
590 wattron(win, COLOR_PAIR(COLOR_INVERSE));
591 waddstr(win, name);
592 if (selected)
593 wattroff(win, COLOR_PAIR(COLOR_INVERSE));
594 waddstr(win, ": ");
595 }
596
597 static void
598 redraw_counter_value_cycles(WINDOW *win, float val)
599 {
600 char *str;
601 int x = getcurx(win);
602 int valwidth = w - x;
603 int barwidth, n;
604
605 /* convert to fraction of max freq: */
606 val = val / (float)dev.max_freq;
607
608 /* figure out percentage-bar width: */
609 barwidth = (int)(val * valwidth);
610
611 /* sometimes things go over 100%.. idk why, could be
612 * things running faster than base clock, or counter
613 * summing up cycles in multiple cores?
614 */
615 barwidth = MIN2(barwidth, valwidth - 1);
616
617 n = asprintf(&str, "%.2f%%", 100.0 * val);
618 wattron(win, COLOR_PAIR(COLOR_INVERSE));
619 waddnstr(win, str, barwidth);
620 if (barwidth > n) {
621 whline(win, ' ', barwidth - n);
622 wmove(win, getcury(win), x + barwidth);
623 }
624 wattroff(win, COLOR_PAIR(COLOR_INVERSE));
625 if (barwidth < n)
626 waddstr(win, str + barwidth);
627 whline(win, ' ', w - getcurx(win));
628
629 free(str);
630 }
631
632 static void
633 redraw_counter_value_raw(WINDOW *win, float val)
634 {
635 char *str;
636 asprintf(&str, "%'.2f", val);
637 waddstr(win, str);
638 whline(win, ' ', w - getcurx(win));
639 free(str);
640 }
641
642 static void
643 redraw_counter(WINDOW *win, int row, struct counter_group *group,
644 int ctr, bool selected)
645 {
646 redraw_counter_label(win, row, group->label[ctr], selected);
647
648 /* quick hack, if the label has "CYCLE" in the name, it is
649 * probably a cycle counter ;-)
650 * Perhaps add more info in rnndb schema to know how to
651 * treat individual counters (ie. which are cycles, and
652 * for those we want to present as a percentage do we
653 * need to scale the result.. ie. is it running at some
654 * multiple or divisor of core clk, etc)
655 *
656 * TODO it would be much more clever to get this from xml
657 * Also.. in some cases I think we want to know how many
658 * units the counter is counting for, ie. if a320 has 2x
659 * shader as a306 we might need to scale the result..
660 */
661 if (strstr(group->label[ctr], "CYCLE") ||
662 strstr(group->label[ctr], "BUSY") ||
663 strstr(group->label[ctr], "IDLE"))
664 redraw_counter_value_cycles(win, group->current[ctr]);
665 else
666 redraw_counter_value_raw(win, group->current[ctr]);
667 }
668
669 static void
670 redraw(WINDOW *win)
671 {
672 static int scroll = 0;
673 int max, row = 0;
674
675 w = getmaxx(win);
676 h = getmaxy(win);
677
678 max = h - 3;
679
680 if ((current_cntr - scroll) > (max - 1)) {
681 scroll = current_cntr - (max - 1);
682 } else if ((current_cntr - 1) < scroll) {
683 scroll = current_cntr - 1;
684 }
685
686 for (unsigned i = 0; i < dev.ngroups; i++) {
687 struct counter_group *group = &dev.groups[i];
688 unsigned j = 0;
689
690 /* NOTE skip CP the first CP counter */
691 if (i == 0)
692 j++;
693
694 if (j < group->group->num_counters) {
695 if ((scroll <= row) && ((row - scroll) < max))
696 redraw_group_header(win, row - scroll, group->group->name);
697 row++;
698 }
699
700 for (; j < group->group->num_counters; j++) {
701 if ((scroll <= row) && ((row - scroll) < max))
702 redraw_counter(win, row - scroll, group, j, row == current_cntr);
703 row++;
704 }
705 }
706
707 /* convert back to physical (unscrolled) offset: */
708 row = max;
709
710 redraw_group_header(win, row, "Status");
711 row++;
712
713 /* Draw GPU freq row: */
714 redraw_counter_label(win, row, "Freq (MHz)", false);
715 redraw_counter_value_raw(win, dev.groups[0].current[0] / 1000000.0);
716 row++;
717
718 redraw_footer(win);
719
720 refresh();
721 }
722
723 static struct counter_group *
724 current_counter(int *ctr)
725 {
726 int n = 0;
727
728 for (unsigned i = 0; i < dev.ngroups; i++) {
729 struct counter_group *group = &dev.groups[i];
730 unsigned j = 0;
731
732 /* NOTE skip the first CP counter (CP_ALWAYS_COUNT) */
733 if (i == 0)
734 j++;
735
736 /* account for group header: */
737 if (j < group->group->num_counters) {
738 /* cannot select group header.. return null to indicate this
739 * main_ui():
740 */
741 if (n == current_cntr)
742 return NULL;
743 n++;
744 }
745
746
747 for (; j < group->group->num_counters; j++) {
748 if (n == current_cntr) {
749 if (ctr)
750 *ctr = j;
751 return group;
752 }
753 n++;
754 }
755 }
756
757 assert(0);
758 return NULL;
759 }
760
761 static void
762 counter_dialog(void)
763 {
764 WINDOW *dialog;
765 struct counter_group *group;
766 int cnt, current = 0, scroll;
767
768 /* figure out dialog size: */
769 int dh = h/2;
770 int dw = ctr_width + 2;
771
772 group = current_counter(&cnt);
773
774 /* find currently selected idx (note there can be discontinuities
775 * so the selected value does not map 1:1 to current idx)
776 */
777 uint32_t selected = group->counter[cnt].select_val;
778 for (int i = 0; i < group->group->num_countables; i++) {
779 if (group->group->countables[i].selector == selected) {
780 current = i;
781 break;
782 }
783 }
784
785 /* scrolling offset, if dialog is too small for all the choices: */
786 scroll = 0;
787
788 dialog = newwin(dh, dw, (h-dh)/2, (w-dw)/2);
789 box(dialog, 0, 0);
790 wrefresh(dialog);
791 keypad(dialog, TRUE);
792
793 while (true) {
794 int max = MIN2(dh - 2, group->group->num_countables);
795 int selector = -1;
796
797 if ((current - scroll) >= (dh - 3)) {
798 scroll = current - (dh - 3);
799 } else if (current < scroll) {
800 scroll = current;
801 }
802
803 for (int i = 0; i < max; i++) {
804 int n = scroll + i;
805 wmove(dialog, i+1, 1);
806 if (n == current) {
807 assert (n < group->group->num_countables);
808 selector = group->group->countables[n].selector;
809 wattron(dialog, COLOR_PAIR(COLOR_INVERSE));
810 }
811 if (n < group->group->num_countables)
812 waddstr(dialog, group->group->countables[n].name);
813 whline(dialog, ' ', dw - getcurx(dialog) - 1);
814 if (n == current)
815 wattroff(dialog, COLOR_PAIR(COLOR_INVERSE));
816 }
817
818 assert (selector >= 0);
819
820 switch (wgetch(dialog)) {
821 case KEY_UP:
822 current = MAX2(0, current - 1);
823 break;
824 case KEY_DOWN:
825 current = MIN2(group->group->num_countables - 1, current + 1);
826 break;
827 case KEY_LEFT:
828 case KEY_ENTER:
829 /* select new sampler */
830 select_counter(group, cnt, selector);
831 flush_ring();
832 config_save();
833 goto out;
834 case 'q':
835 goto out;
836 default:
837 /* ignore */
838 break;
839 }
840
841 resample();
842 }
843
844 out:
845 wborder(dialog, ' ', ' ', ' ',' ',' ',' ',' ',' ');
846 delwin(dialog);
847 }
848
849 static void
850 scroll_cntr(int amount)
851 {
852 if (amount < 0) {
853 current_cntr = MAX2(1, current_cntr + amount);
854 if (current_counter(NULL) == NULL) {
855 current_cntr = MAX2(1, current_cntr - 1);
856 }
857 } else {
858 current_cntr = MIN2(max_rows - 1, current_cntr + amount);
859 if (current_counter(NULL) == NULL)
860 current_cntr = MIN2(max_rows - 1, current_cntr + 1);
861 }
862 }
863
864 static void
865 main_ui(void)
866 {
867 WINDOW *mainwin;
868 uint32_t last_time = gettime_us();
869
870 /* curses setup: */
871 mainwin = initscr();
872 if (!mainwin)
873 goto out;
874
875 cbreak();
876 wtimeout(mainwin, REFRESH_MS);
877 noecho();
878 keypad(mainwin, TRUE);
879 curs_set(0);
880 start_color();
881 init_pair(COLOR_GROUP_HEADER, COLOR_WHITE, COLOR_GREEN);
882 init_pair(COLOR_FOOTER, COLOR_WHITE, COLOR_BLUE);
883 init_pair(COLOR_INVERSE, COLOR_BLACK, COLOR_WHITE);
884
885 while (true) {
886 switch (wgetch(mainwin)) {
887 case KEY_UP:
888 scroll_cntr(-1);
889 break;
890 case KEY_DOWN:
891 scroll_cntr(+1);
892 break;
893 case KEY_NPAGE: /* page-down */
894 /* TODO figure out # of rows visible? */
895 scroll_cntr(+15);
896 break;
897 case KEY_PPAGE: /* page-up */
898 /* TODO figure out # of rows visible? */
899 scroll_cntr(-15);
900 break;
901 case KEY_RIGHT:
902 counter_dialog();
903 break;
904 case 'q':
905 goto out;
906 break;
907 default:
908 /* ignore */
909 break;
910 }
911 resample();
912 redraw(mainwin);
913
914 /* restore the counters every 0.5s in case the GPU has suspended,
915 * in which case the current selected countables will have reset:
916 */
917 uint32_t t = gettime_us();
918 if (delta(last_time, t) > 500000) {
919 restore_counter_groups();
920 flush_ring();
921 last_time = t;
922 }
923 }
924
925 /* restore settings.. maybe we need an atexit()??*/
926 out:
927 delwin(mainwin);
928 endwin();
929 refresh();
930 }
931
932 static void
933 restore_counter_groups(void)
934 {
935 for (unsigned i = 0; i < dev.ngroups; i++) {
936 struct counter_group *group = &dev.groups[i];
937 unsigned j = 0;
938
939 /* NOTE skip CP the first CP counter */
940 if (i == 0)
941 j++;
942
943 for (; j < group->group->num_counters; j++) {
944 select_counter(group, j, group->counter[j].select_val);
945 }
946 }
947 }
948
949 static void
950 setup_counter_groups(const struct fd_perfcntr_group *groups)
951 {
952 for (unsigned i = 0; i < dev.ngroups; i++) {
953 struct counter_group *group = &dev.groups[i];
954
955 group->group = &groups[i];
956
957 max_rows += group->group->num_counters + 1;
958
959 /* the first CP counter is hidden: */
960 if (i == 0) {
961 max_rows--;
962 if (group->group->num_counters <= 1)
963 max_rows--;
964 }
965
966 for (unsigned j = 0; j < group->group->num_counters; j++) {
967 group->counter[j].counter = &group->group->counters[j];
968
969 group->counter[j].val_hi = dev.io + (group->counter[j].counter->counter_reg_hi * 4);
970 group->counter[j].val_lo = dev.io + (group->counter[j].counter->counter_reg_lo * 4);
971
972 group->counter[j].select_val = j;
973 }
974
975 for (unsigned j = 0; j < group->group->num_countables; j++) {
976 ctr_width = MAX2(ctr_width, strlen(group->group->countables[j].name) + 1);
977 }
978 }
979 }
980
981 /*
982 * configuration / persistence
983 */
984
985 static config_t cfg;
986 static config_setting_t *setting;
987
988 static void
989 config_save(void)
990 {
991 for (unsigned i = 0; i < dev.ngroups; i++) {
992 struct counter_group *group = &dev.groups[i];
993 unsigned j = 0;
994
995 /* NOTE skip CP the first CP counter */
996 if (i == 0)
997 j++;
998
999 config_setting_t *sect =
1000 config_setting_get_member(setting, group->group->name);
1001
1002 for (; j < group->group->num_counters; j++) {
1003 char name[] = "counter0000";
1004 sprintf(name, "counter%d", j);
1005 config_setting_t *s =
1006 config_setting_lookup(sect, name);
1007 config_setting_set_int(s, group->counter[j].select_val);
1008 }
1009 }
1010
1011 config_write_file(&cfg, "fdperf.cfg");
1012 }
1013
1014 static void
1015 config_restore(void)
1016 {
1017 char *str;
1018
1019 config_init(&cfg);
1020
1021 /* Read the file. If there is an error, report it and exit. */
1022 if(!config_read_file(&cfg, "fdperf.cfg")) {
1023 warn("could not restore settings");
1024 }
1025
1026 config_setting_t *root = config_root_setting(&cfg);
1027
1028 /* per device settings: */
1029 asprintf(&str, "a%dxx", dev.chipid >> 24);
1030 setting = config_setting_get_member(root, str);
1031 if (!setting)
1032 setting = config_setting_add(root, str, CONFIG_TYPE_GROUP);
1033 free(str);
1034
1035 for (unsigned i = 0; i < dev.ngroups; i++) {
1036 struct counter_group *group = &dev.groups[i];
1037 unsigned j = 0;
1038
1039 /* NOTE skip CP the first CP counter */
1040 if (i == 0)
1041 j++;
1042
1043 config_setting_t *sect =
1044 config_setting_get_member(setting, group->group->name);
1045
1046 if (!sect) {
1047 sect = config_setting_add(setting, group->group->name,
1048 CONFIG_TYPE_GROUP);
1049 }
1050
1051 for (; j < group->group->num_counters; j++) {
1052 char name[] = "counter0000";
1053 sprintf(name, "counter%d", j);
1054 config_setting_t *s = config_setting_lookup(sect, name);
1055 if (!s) {
1056 config_setting_add(sect, name, CONFIG_TYPE_INT);
1057 continue;
1058 }
1059 select_counter(group, j, config_setting_get_int(s));
1060 }
1061 }
1062 }
1063
1064 /*
1065 * main
1066 */
1067
1068 int
1069 main(int argc, char **argv)
1070 {
1071 find_device();
1072
1073 const struct fd_perfcntr_group *groups;
1074 groups = fd_perfcntrs((dev.chipid >> 24) * 100, &dev.ngroups);
1075 if (!groups) {
1076 errx(1, "no perfcntr support");
1077 }
1078
1079 dev.groups = calloc(dev.ngroups, sizeof(struct counter_group));
1080
1081 setup_counter_groups(groups);
1082 restore_counter_groups();
1083 config_restore();
1084 flush_ring();
1085
1086 main_ui();
1087
1088 return 0;
1089 }