freedreno/perfcntrs: fix fd leak
[mesa.git] / src / freedreno / perfcntrs / fdperf.c
1 /*
2 * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 #include <arpa/inet.h>
26 #include <assert.h>
27 #include <ctype.h>
28 #include <err.h>
29 #include <fcntl.h>
30 #include <ftw.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <stdint.h>
35 #include <sys/types.h>
36 #include <sys/stat.h>
37 #include <sys/mman.h>
38 #include <time.h>
39 #include <unistd.h>
40 #include <curses.h>
41 #include <libconfig.h>
42 #include <inttypes.h>
43 #include <xf86drm.h>
44
45 #include "drm/freedreno_drmif.h"
46 #include "drm/freedreno_ringbuffer.h"
47
48 #include "freedreno_perfcntr.h"
49
50 #define MAX_CNTR_PER_GROUP 24
51
52 /* NOTE first counter group should always be CP, since we unconditionally
53 * use CP counter to measure the gpu freq.
54 */
55
56 struct counter_group {
57 const struct fd_perfcntr_group *group;
58
59 struct {
60 const struct fd_perfcntr_counter *counter;
61 uint16_t select_val;
62 volatile uint32_t *val_hi;
63 volatile uint32_t *val_lo;
64 } counter[MAX_CNTR_PER_GROUP];
65
66 /* last sample time: */
67 uint32_t stime[MAX_CNTR_PER_GROUP];
68 /* for now just care about the low 32b value.. at least then we don't
69 * have to really care that we can't sample both hi and lo regs at the
70 * same time:
71 */
72 uint32_t last[MAX_CNTR_PER_GROUP];
73 /* current value, ie. by how many did the counter increase in last
74 * sampling period divided by the sampling period:
75 */
76 float current[MAX_CNTR_PER_GROUP];
77 /* name of currently selected counters (for UI): */
78 const char *label[MAX_CNTR_PER_GROUP];
79 };
80
81 static struct {
82 char *dtnode;
83 int address_cells, size_cells;
84 uint64_t base;
85 uint32_t size;
86 void *io;
87 uint32_t chipid;
88 uint32_t min_freq;
89 uint32_t max_freq;
90 /* per-generation table of counters: */
91 unsigned ngroups;
92 struct counter_group *groups;
93 /* drm device (for writing select regs via ring): */
94 struct fd_device *dev;
95 struct fd_pipe *pipe;
96 struct fd_submit *submit;
97 struct fd_ringbuffer *ring;
98 } dev;
99
100 static void config_save(void);
101 static void config_restore(void);
102 static void restore_counter_groups(void);
103
104 /*
105 * helpers
106 */
107
108 #define CHUNKSIZE 32
109
110 static void *
111 readfile(const char *path, int *sz)
112 {
113 char *buf = NULL;
114 int fd, ret, n = 0;
115
116 fd = open(path, O_RDONLY);
117 if (fd < 0)
118 return NULL;
119
120 while (1) {
121 buf = realloc(buf, n + CHUNKSIZE);
122 ret = read(fd, buf + n, CHUNKSIZE);
123 if (ret < 0) {
124 free(buf);
125 *sz = 0;
126 close(fd);
127 return NULL;
128 } else if (ret < CHUNKSIZE) {
129 n += ret;
130 *sz = n;
131 close(fd);
132 return buf;
133 } else {
134 n += CHUNKSIZE;
135 }
136 }
137 }
138
139 static uint32_t
140 gettime_us(void)
141 {
142 struct timespec ts;
143 clock_gettime(CLOCK_MONOTONIC, &ts);
144 return (ts.tv_sec * 1000000) + (ts.tv_nsec / 1000);
145 }
146
147 static uint32_t
148 delta(uint32_t a, uint32_t b)
149 {
150 /* deal with rollover: */
151 if (a > b)
152 return 0xffffffff - a + b;
153 else
154 return b - a;
155 }
156
157 /*
158 * TODO de-duplicate OUT_RING() and friends
159 */
160
161 #define CP_WAIT_FOR_IDLE 38
162 #define CP_TYPE0_PKT 0x00000000
163 #define CP_TYPE3_PKT 0xc0000000
164 #define CP_TYPE4_PKT 0x40000000
165 #define CP_TYPE7_PKT 0x70000000
166
167 static inline void
168 OUT_RING(struct fd_ringbuffer *ring, uint32_t data)
169 {
170 *(ring->cur++) = data;
171 }
172
173 static inline void
174 OUT_PKT0(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
175 {
176 OUT_RING(ring, CP_TYPE0_PKT | ((cnt-1) << 16) | (regindx & 0x7FFF));
177 }
178
179 static inline void
180 OUT_PKT3(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
181 {
182 OUT_RING(ring, CP_TYPE3_PKT | ((cnt-1) << 16) | ((opcode & 0xFF) << 8));
183 }
184
185
186 /*
187 * Starting with a5xx, pkt4/pkt7 are used instead of pkt0/pkt3
188 */
189
190 static inline unsigned
191 _odd_parity_bit(unsigned val)
192 {
193 /* See: http://graphics.stanford.edu/~seander/bithacks.html#ParityParallel
194 * note that we want odd parity so 0x6996 is inverted.
195 */
196 val ^= val >> 16;
197 val ^= val >> 8;
198 val ^= val >> 4;
199 val &= 0xf;
200 return (~0x6996 >> val) & 1;
201 }
202
203 static inline void
204 OUT_PKT4(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
205 {
206 OUT_RING(ring, CP_TYPE4_PKT | cnt |
207 (_odd_parity_bit(cnt) << 7) |
208 ((regindx & 0x3ffff) << 8) |
209 ((_odd_parity_bit(regindx) << 27)));
210 }
211
212 static inline void
213 OUT_PKT7(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
214 {
215 OUT_RING(ring, CP_TYPE7_PKT | cnt |
216 (_odd_parity_bit(cnt) << 15) |
217 ((opcode & 0x7f) << 16) |
218 ((_odd_parity_bit(opcode) << 23)));
219 }
220
221 /*
222 * code to find stuff in /proc/device-tree:
223 *
224 * NOTE: if we sampled the counters from the cmdstream, we could avoid needing
225 * /dev/mem and /proc/device-tree crawling. OTOH when the GPU is heavily loaded
226 * we would be competing with whatever else is using the GPU.
227 */
228
229 static void *
230 readdt(const char *node)
231 {
232 char *path;
233 void *buf;
234 int sz;
235
236 (void) asprintf(&path, "%s/%s", dev.dtnode, node);
237 buf = readfile(path, &sz);
238 free(path);
239
240 return buf;
241 }
242
243 static int
244 find_freqs_fn(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf)
245 {
246 const char *fname = fpath + ftwbuf->base;
247 int sz;
248
249 if (strcmp(fname, "qcom,gpu-freq") == 0) {
250 uint32_t *buf = readfile(fpath, &sz);
251 uint32_t freq = ntohl(buf[0]);
252 free(buf);
253 dev.max_freq = MAX2(dev.max_freq, freq);
254 dev.min_freq = MIN2(dev.min_freq, freq);
255 }
256
257 return 0;
258 }
259
260 static void
261 find_freqs(void)
262 {
263 char *path;
264 int ret;
265
266 dev.min_freq = ~0;
267 dev.max_freq = 0;
268
269 (void) asprintf(&path, "%s/%s", dev.dtnode, "qcom,gpu-pwrlevels");
270
271 ret = nftw(path, find_freqs_fn, 64, 0);
272 if (ret < 0)
273 err(1, "could not find power levels");
274
275 free(path);
276 }
277
278 static int
279 find_device_fn(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf)
280 {
281 const char *fname = fpath + ftwbuf->base;
282 int sz;
283
284 if (strcmp(fname, "compatible") == 0) {
285 char *str = readfile(fpath, &sz);
286 if ((strcmp(str, "qcom,adreno-3xx") == 0) ||
287 (strcmp(str, "qcom,kgsl-3d0") == 0) ||
288 (strstr(str, "amd,imageon") == str) ||
289 (strstr(str, "qcom,adreno") == str)) {
290 int dlen = strlen(fpath) - strlen("/compatible");
291 dev.dtnode = malloc(dlen + 1);
292 memcpy(dev.dtnode, fpath, dlen);
293 printf("found dt node: %s\n", dev.dtnode);
294
295 char buf[dlen + sizeof("/../#address-cells") + 1];
296 int sz, *val;
297
298 sprintf(buf, "%s/../#address-cells", dev.dtnode);
299 val = readfile(buf, &sz);
300 dev.address_cells = ntohl(*val);
301 free(val);
302
303 sprintf(buf, "%s/../#size-cells", dev.dtnode);
304 val = readfile(buf, &sz);
305 dev.size_cells = ntohl(*val);
306 free(val);
307
308 printf("#address-cells=%d, #size-cells=%d\n",
309 dev.address_cells, dev.size_cells);
310 }
311 free(str);
312 }
313 if (dev.dtnode) {
314 /* we found it! */
315 return 1;
316 }
317 return 0;
318 }
319
320 static void
321 find_device(void)
322 {
323 int ret, fd;
324 uint32_t *buf, *b;
325
326 ret = nftw("/proc/device-tree/", find_device_fn, 64, 0);
327 if (ret < 0)
328 err(1, "could not find adreno gpu");
329
330 if (!dev.dtnode)
331 errx(1, "could not find qcom,adreno-3xx node");
332
333 fd = drmOpen("msm", NULL);
334 if (fd < 0)
335 err(1, "could not open drm device");
336
337 dev.dev = fd_device_new(fd);
338 dev.pipe = fd_pipe_new(dev.dev, FD_PIPE_3D);
339
340 uint64_t val;
341 ret = fd_pipe_get_param(dev.pipe, FD_CHIP_ID, &val);
342 if (ret) {
343 err(1, "could not get gpu-id");
344 }
345 dev.chipid = val;
346
347 #define CHIP_FMT "d%d%d.%d"
348 #define CHIP_ARGS(chipid) \
349 ((chipid) >> 24) & 0xff, \
350 ((chipid) >> 16) & 0xff, \
351 ((chipid) >> 8) & 0xff, \
352 ((chipid) >> 0) & 0xff
353 printf("device: a%"CHIP_FMT"\n", CHIP_ARGS(dev.chipid));
354
355 b = buf = readdt("reg");
356
357 if (dev.address_cells == 2) {
358 uint32_t u[2] = { ntohl(buf[0]), ntohl(buf[1]) };
359 dev.base = (((uint64_t)u[0]) << 32) | u[1];
360 buf += 2;
361 } else {
362 dev.base = ntohl(buf[0]);
363 buf += 1;
364 }
365
366 if (dev.size_cells == 2) {
367 uint32_t u[2] = { ntohl(buf[0]), ntohl(buf[1]) };
368 dev.size = (((uint64_t)u[0]) << 32) | u[1];
369 buf += 2;
370 } else {
371 dev.size = ntohl(buf[0]);
372 buf += 1;
373 }
374
375 free(b);
376
377 printf("i/o region at %08"PRIu64" (size: %x)\n", dev.base, dev.size);
378
379 /* try MAX_FREQ first as that will work regardless of old dt
380 * dt bindings vs upstream bindings:
381 */
382 ret = fd_pipe_get_param(dev.pipe, FD_MAX_FREQ, &val);
383 if (ret) {
384 printf("falling back to parsing DT bindings for freq\n");
385 find_freqs();
386 } else {
387 dev.min_freq = 0;
388 dev.max_freq = val;
389 }
390
391 printf("min_freq=%u, max_freq=%u\n", dev.min_freq, dev.max_freq);
392
393 fd = open("/dev/mem", O_RDWR | O_SYNC);
394 if (fd < 0)
395 err(1, "could not open /dev/mem");
396
397 dev.io = mmap(0, dev.size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, dev.base);
398 if (!dev.io) {
399 close(fd);
400 err(1, "could not map device");
401 }
402 }
403
404 /*
405 * perf-monitor
406 */
407
408 static void
409 flush_ring(void)
410 {
411 int ret;
412
413 if (!dev.submit)
414 return;
415
416 ret = fd_submit_flush(dev.submit, -1, NULL, NULL);
417 if (ret)
418 errx(1, "submit failed: %d", ret);
419 fd_ringbuffer_del(dev.ring);
420 fd_submit_del(dev.submit);
421
422 dev.ring = NULL;
423 dev.submit = NULL;
424 }
425
426 static void
427 select_counter(struct counter_group *group, int ctr, int n)
428 {
429 assert(n < group->group->num_countables);
430 assert(ctr < group->group->num_counters);
431
432 group->label[ctr] = group->group->countables[n].name;
433 group->counter[ctr].select_val = n;
434
435 if (!dev.submit) {
436 dev.submit = fd_submit_new(dev.pipe);
437 dev.ring = fd_submit_new_ringbuffer(dev.submit, 0x1000,
438 FD_RINGBUFFER_PRIMARY | FD_RINGBUFFER_GROWABLE);
439 }
440
441 /* bashing select register directly while gpu is active will end
442 * in tears.. so we need to write it via the ring:
443 *
444 * TODO it would help startup time, if gpu is loaded, to batch
445 * all the initial writes and do a single flush.. although that
446 * makes things more complicated for capturing inital sample value
447 */
448 struct fd_ringbuffer *ring = dev.ring;
449 switch (dev.chipid >> 24) {
450 case 2:
451 case 3:
452 case 4:
453 OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
454 OUT_RING(ring, 0x00000000);
455
456 if (group->group->counters[ctr].enable) {
457 OUT_PKT0(ring, group->group->counters[ctr].enable, 1);
458 OUT_RING(ring, 0);
459 }
460
461 if (group->group->counters[ctr].clear) {
462 OUT_PKT0(ring, group->group->counters[ctr].clear, 1);
463 OUT_RING(ring, 1);
464
465 OUT_PKT0(ring, group->group->counters[ctr].clear, 1);
466 OUT_RING(ring, 0);
467 }
468
469 OUT_PKT0(ring, group->group->counters[ctr].select_reg, 1);
470 OUT_RING(ring, n);
471
472 if (group->group->counters[ctr].enable) {
473 OUT_PKT0(ring, group->group->counters[ctr].enable, 1);
474 OUT_RING(ring, 1);
475 }
476
477 break;
478 case 5:
479 case 6:
480 OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
481
482 if (group->group->counters[ctr].enable) {
483 OUT_PKT4(ring, group->group->counters[ctr].enable, 1);
484 OUT_RING(ring, 0);
485 }
486
487 if (group->group->counters[ctr].clear) {
488 OUT_PKT4(ring, group->group->counters[ctr].clear, 1);
489 OUT_RING(ring, 1);
490
491 OUT_PKT4(ring, group->group->counters[ctr].clear, 1);
492 OUT_RING(ring, 0);
493 }
494
495 OUT_PKT4(ring, group->group->counters[ctr].select_reg, 1);
496 OUT_RING(ring, n);
497
498 if (group->group->counters[ctr].enable) {
499 OUT_PKT4(ring, group->group->counters[ctr].enable, 1);
500 OUT_RING(ring, 1);
501 }
502
503 break;
504 }
505
506 group->last[ctr] = *group->counter[ctr].val_lo;
507 group->stime[ctr] = gettime_us();
508 }
509
510 static void
511 resample_counter(struct counter_group *group, int ctr)
512 {
513 uint32_t val = *group->counter[ctr].val_lo;
514 uint32_t t = gettime_us();
515 uint32_t dt = delta(group->stime[ctr], t);
516 uint32_t dval = delta(group->last[ctr], val);
517 group->current[ctr] = (float)dval * 1000000.0 / (float)dt;
518 group->last[ctr] = val;
519 group->stime[ctr] = t;
520 }
521
522 #define REFRESH_MS 500
523
524 /* sample all the counters: */
525 static void
526 resample(void)
527 {
528 static uint64_t last_time;
529 uint64_t current_time = gettime_us();
530
531 if ((current_time - last_time) < (REFRESH_MS * 1000 / 2))
532 return;
533
534 last_time = current_time;
535
536 for (unsigned i = 0; i < dev.ngroups; i++) {
537 struct counter_group *group = &dev.groups[i];
538 for (unsigned j = 0; j < group->group->num_counters; j++) {
539 resample_counter(group, j);
540 }
541 }
542 }
543
544 /*
545 * The UI
546 */
547
548 #define COLOR_GROUP_HEADER 1
549 #define COLOR_FOOTER 2
550 #define COLOR_INVERSE 3
551
552 static int w, h;
553 static int ctr_width;
554 static int max_rows, current_cntr = 1;
555
556 static void
557 redraw_footer(WINDOW *win)
558 {
559 char *footer;
560 int n;
561
562 n = asprintf(&footer, " fdperf: a%"CHIP_FMT" (%.2fMHz..%.2fMHz)",
563 CHIP_ARGS(dev.chipid),
564 ((float)dev.min_freq) / 1000000.0,
565 ((float)dev.max_freq) / 1000000.0);
566
567 wmove(win, h - 1, 0);
568 wattron(win, COLOR_PAIR(COLOR_FOOTER));
569 waddstr(win, footer);
570 whline(win, ' ', w - n);
571 wattroff(win, COLOR_PAIR(COLOR_FOOTER));
572
573 free(footer);
574 }
575
576 static void
577 redraw_group_header(WINDOW *win, int row, const char *name)
578 {
579 wmove(win, row, 0);
580 wattron(win, A_BOLD);
581 wattron(win, COLOR_PAIR(COLOR_GROUP_HEADER));
582 waddstr(win, name);
583 whline(win, ' ', w - strlen(name));
584 wattroff(win, COLOR_PAIR(COLOR_GROUP_HEADER));
585 wattroff(win, A_BOLD);
586 }
587
588 static void
589 redraw_counter_label(WINDOW *win, int row, const char *name, bool selected)
590 {
591 int n = strlen(name);
592 assert(n <= ctr_width);
593 wmove(win, row, 0);
594 whline(win, ' ', ctr_width - n);
595 wmove(win, row, ctr_width - n);
596 if (selected)
597 wattron(win, COLOR_PAIR(COLOR_INVERSE));
598 waddstr(win, name);
599 if (selected)
600 wattroff(win, COLOR_PAIR(COLOR_INVERSE));
601 waddstr(win, ": ");
602 }
603
604 static void
605 redraw_counter_value_cycles(WINDOW *win, float val)
606 {
607 char *str;
608 int x = getcurx(win);
609 int valwidth = w - x;
610 int barwidth, n;
611
612 /* convert to fraction of max freq: */
613 val = val / (float)dev.max_freq;
614
615 /* figure out percentage-bar width: */
616 barwidth = (int)(val * valwidth);
617
618 /* sometimes things go over 100%.. idk why, could be
619 * things running faster than base clock, or counter
620 * summing up cycles in multiple cores?
621 */
622 barwidth = MIN2(barwidth, valwidth - 1);
623
624 n = asprintf(&str, "%.2f%%", 100.0 * val);
625 wattron(win, COLOR_PAIR(COLOR_INVERSE));
626 waddnstr(win, str, barwidth);
627 if (barwidth > n) {
628 whline(win, ' ', barwidth - n);
629 wmove(win, getcury(win), x + barwidth);
630 }
631 wattroff(win, COLOR_PAIR(COLOR_INVERSE));
632 if (barwidth < n)
633 waddstr(win, str + barwidth);
634 whline(win, ' ', w - getcurx(win));
635
636 free(str);
637 }
638
639 static void
640 redraw_counter_value_raw(WINDOW *win, float val)
641 {
642 char *str;
643 (void) asprintf(&str, "%'.2f", val);
644 waddstr(win, str);
645 whline(win, ' ', w - getcurx(win));
646 free(str);
647 }
648
649 static void
650 redraw_counter(WINDOW *win, int row, struct counter_group *group,
651 int ctr, bool selected)
652 {
653 redraw_counter_label(win, row, group->label[ctr], selected);
654
655 /* quick hack, if the label has "CYCLE" in the name, it is
656 * probably a cycle counter ;-)
657 * Perhaps add more info in rnndb schema to know how to
658 * treat individual counters (ie. which are cycles, and
659 * for those we want to present as a percentage do we
660 * need to scale the result.. ie. is it running at some
661 * multiple or divisor of core clk, etc)
662 *
663 * TODO it would be much more clever to get this from xml
664 * Also.. in some cases I think we want to know how many
665 * units the counter is counting for, ie. if a320 has 2x
666 * shader as a306 we might need to scale the result..
667 */
668 if (strstr(group->label[ctr], "CYCLE") ||
669 strstr(group->label[ctr], "BUSY") ||
670 strstr(group->label[ctr], "IDLE"))
671 redraw_counter_value_cycles(win, group->current[ctr]);
672 else
673 redraw_counter_value_raw(win, group->current[ctr]);
674 }
675
676 static void
677 redraw(WINDOW *win)
678 {
679 static int scroll = 0;
680 int max, row = 0;
681
682 w = getmaxx(win);
683 h = getmaxy(win);
684
685 max = h - 3;
686
687 if ((current_cntr - scroll) > (max - 1)) {
688 scroll = current_cntr - (max - 1);
689 } else if ((current_cntr - 1) < scroll) {
690 scroll = current_cntr - 1;
691 }
692
693 for (unsigned i = 0; i < dev.ngroups; i++) {
694 struct counter_group *group = &dev.groups[i];
695 unsigned j = 0;
696
697 /* NOTE skip CP the first CP counter */
698 if (i == 0)
699 j++;
700
701 if (j < group->group->num_counters) {
702 if ((scroll <= row) && ((row - scroll) < max))
703 redraw_group_header(win, row - scroll, group->group->name);
704 row++;
705 }
706
707 for (; j < group->group->num_counters; j++) {
708 if ((scroll <= row) && ((row - scroll) < max))
709 redraw_counter(win, row - scroll, group, j, row == current_cntr);
710 row++;
711 }
712 }
713
714 /* convert back to physical (unscrolled) offset: */
715 row = max;
716
717 redraw_group_header(win, row, "Status");
718 row++;
719
720 /* Draw GPU freq row: */
721 redraw_counter_label(win, row, "Freq (MHz)", false);
722 redraw_counter_value_raw(win, dev.groups[0].current[0] / 1000000.0);
723 row++;
724
725 redraw_footer(win);
726
727 refresh();
728 }
729
730 static struct counter_group *
731 current_counter(int *ctr)
732 {
733 int n = 0;
734
735 for (unsigned i = 0; i < dev.ngroups; i++) {
736 struct counter_group *group = &dev.groups[i];
737 unsigned j = 0;
738
739 /* NOTE skip the first CP counter (CP_ALWAYS_COUNT) */
740 if (i == 0)
741 j++;
742
743 /* account for group header: */
744 if (j < group->group->num_counters) {
745 /* cannot select group header.. return null to indicate this
746 * main_ui():
747 */
748 if (n == current_cntr)
749 return NULL;
750 n++;
751 }
752
753
754 for (; j < group->group->num_counters; j++) {
755 if (n == current_cntr) {
756 if (ctr)
757 *ctr = j;
758 return group;
759 }
760 n++;
761 }
762 }
763
764 assert(0);
765 return NULL;
766 }
767
768 static void
769 counter_dialog(void)
770 {
771 WINDOW *dialog;
772 struct counter_group *group;
773 int cnt, current = 0, scroll;
774
775 /* figure out dialog size: */
776 int dh = h/2;
777 int dw = ctr_width + 2;
778
779 group = current_counter(&cnt);
780
781 /* find currently selected idx (note there can be discontinuities
782 * so the selected value does not map 1:1 to current idx)
783 */
784 uint32_t selected = group->counter[cnt].select_val;
785 for (int i = 0; i < group->group->num_countables; i++) {
786 if (group->group->countables[i].selector == selected) {
787 current = i;
788 break;
789 }
790 }
791
792 /* scrolling offset, if dialog is too small for all the choices: */
793 scroll = 0;
794
795 dialog = newwin(dh, dw, (h-dh)/2, (w-dw)/2);
796 box(dialog, 0, 0);
797 wrefresh(dialog);
798 keypad(dialog, TRUE);
799
800 while (true) {
801 int max = MIN2(dh - 2, group->group->num_countables);
802 int selector = -1;
803
804 if ((current - scroll) >= (dh - 3)) {
805 scroll = current - (dh - 3);
806 } else if (current < scroll) {
807 scroll = current;
808 }
809
810 for (int i = 0; i < max; i++) {
811 int n = scroll + i;
812 wmove(dialog, i+1, 1);
813 if (n == current) {
814 assert (n < group->group->num_countables);
815 selector = group->group->countables[n].selector;
816 wattron(dialog, COLOR_PAIR(COLOR_INVERSE));
817 }
818 if (n < group->group->num_countables)
819 waddstr(dialog, group->group->countables[n].name);
820 whline(dialog, ' ', dw - getcurx(dialog) - 1);
821 if (n == current)
822 wattroff(dialog, COLOR_PAIR(COLOR_INVERSE));
823 }
824
825 assert (selector >= 0);
826
827 switch (wgetch(dialog)) {
828 case KEY_UP:
829 current = MAX2(0, current - 1);
830 break;
831 case KEY_DOWN:
832 current = MIN2(group->group->num_countables - 1, current + 1);
833 break;
834 case KEY_LEFT:
835 case KEY_ENTER:
836 /* select new sampler */
837 select_counter(group, cnt, selector);
838 flush_ring();
839 config_save();
840 goto out;
841 case 'q':
842 goto out;
843 default:
844 /* ignore */
845 break;
846 }
847
848 resample();
849 }
850
851 out:
852 wborder(dialog, ' ', ' ', ' ',' ',' ',' ',' ',' ');
853 delwin(dialog);
854 }
855
856 static void
857 scroll_cntr(int amount)
858 {
859 if (amount < 0) {
860 current_cntr = MAX2(1, current_cntr + amount);
861 if (current_counter(NULL) == NULL) {
862 current_cntr = MAX2(1, current_cntr - 1);
863 }
864 } else {
865 current_cntr = MIN2(max_rows - 1, current_cntr + amount);
866 if (current_counter(NULL) == NULL)
867 current_cntr = MIN2(max_rows - 1, current_cntr + 1);
868 }
869 }
870
871 static void
872 main_ui(void)
873 {
874 WINDOW *mainwin;
875 uint32_t last_time = gettime_us();
876
877 /* curses setup: */
878 mainwin = initscr();
879 if (!mainwin)
880 goto out;
881
882 cbreak();
883 wtimeout(mainwin, REFRESH_MS);
884 noecho();
885 keypad(mainwin, TRUE);
886 curs_set(0);
887 start_color();
888 init_pair(COLOR_GROUP_HEADER, COLOR_WHITE, COLOR_GREEN);
889 init_pair(COLOR_FOOTER, COLOR_WHITE, COLOR_BLUE);
890 init_pair(COLOR_INVERSE, COLOR_BLACK, COLOR_WHITE);
891
892 while (true) {
893 switch (wgetch(mainwin)) {
894 case KEY_UP:
895 scroll_cntr(-1);
896 break;
897 case KEY_DOWN:
898 scroll_cntr(+1);
899 break;
900 case KEY_NPAGE: /* page-down */
901 /* TODO figure out # of rows visible? */
902 scroll_cntr(+15);
903 break;
904 case KEY_PPAGE: /* page-up */
905 /* TODO figure out # of rows visible? */
906 scroll_cntr(-15);
907 break;
908 case KEY_RIGHT:
909 counter_dialog();
910 break;
911 case 'q':
912 goto out;
913 break;
914 default:
915 /* ignore */
916 break;
917 }
918 resample();
919 redraw(mainwin);
920
921 /* restore the counters every 0.5s in case the GPU has suspended,
922 * in which case the current selected countables will have reset:
923 */
924 uint32_t t = gettime_us();
925 if (delta(last_time, t) > 500000) {
926 restore_counter_groups();
927 flush_ring();
928 last_time = t;
929 }
930 }
931
932 /* restore settings.. maybe we need an atexit()??*/
933 out:
934 delwin(mainwin);
935 endwin();
936 refresh();
937 }
938
939 static void
940 restore_counter_groups(void)
941 {
942 for (unsigned i = 0; i < dev.ngroups; i++) {
943 struct counter_group *group = &dev.groups[i];
944 unsigned j = 0;
945
946 /* NOTE skip CP the first CP counter */
947 if (i == 0)
948 j++;
949
950 for (; j < group->group->num_counters; j++) {
951 select_counter(group, j, group->counter[j].select_val);
952 }
953 }
954 }
955
956 static void
957 setup_counter_groups(const struct fd_perfcntr_group *groups)
958 {
959 for (unsigned i = 0; i < dev.ngroups; i++) {
960 struct counter_group *group = &dev.groups[i];
961
962 group->group = &groups[i];
963
964 max_rows += group->group->num_counters + 1;
965
966 /* the first CP counter is hidden: */
967 if (i == 0) {
968 max_rows--;
969 if (group->group->num_counters <= 1)
970 max_rows--;
971 }
972
973 for (unsigned j = 0; j < group->group->num_counters; j++) {
974 group->counter[j].counter = &group->group->counters[j];
975
976 group->counter[j].val_hi = dev.io + (group->counter[j].counter->counter_reg_hi * 4);
977 group->counter[j].val_lo = dev.io + (group->counter[j].counter->counter_reg_lo * 4);
978
979 group->counter[j].select_val = j;
980 }
981
982 for (unsigned j = 0; j < group->group->num_countables; j++) {
983 ctr_width = MAX2(ctr_width, strlen(group->group->countables[j].name) + 1);
984 }
985 }
986 }
987
988 /*
989 * configuration / persistence
990 */
991
992 static config_t cfg;
993 static config_setting_t *setting;
994
995 static void
996 config_save(void)
997 {
998 for (unsigned i = 0; i < dev.ngroups; i++) {
999 struct counter_group *group = &dev.groups[i];
1000 unsigned j = 0;
1001
1002 /* NOTE skip CP the first CP counter */
1003 if (i == 0)
1004 j++;
1005
1006 config_setting_t *sect =
1007 config_setting_get_member(setting, group->group->name);
1008
1009 for (; j < group->group->num_counters; j++) {
1010 char name[] = "counter0000";
1011 sprintf(name, "counter%d", j);
1012 config_setting_t *s =
1013 config_setting_lookup(sect, name);
1014 config_setting_set_int(s, group->counter[j].select_val);
1015 }
1016 }
1017
1018 config_write_file(&cfg, "fdperf.cfg");
1019 }
1020
1021 static void
1022 config_restore(void)
1023 {
1024 char *str;
1025
1026 config_init(&cfg);
1027
1028 /* Read the file. If there is an error, report it and exit. */
1029 if(!config_read_file(&cfg, "fdperf.cfg")) {
1030 warn("could not restore settings");
1031 }
1032
1033 config_setting_t *root = config_root_setting(&cfg);
1034
1035 /* per device settings: */
1036 (void) asprintf(&str, "a%dxx", dev.chipid >> 24);
1037 setting = config_setting_get_member(root, str);
1038 if (!setting)
1039 setting = config_setting_add(root, str, CONFIG_TYPE_GROUP);
1040 free(str);
1041
1042 for (unsigned i = 0; i < dev.ngroups; i++) {
1043 struct counter_group *group = &dev.groups[i];
1044 unsigned j = 0;
1045
1046 /* NOTE skip CP the first CP counter */
1047 if (i == 0)
1048 j++;
1049
1050 config_setting_t *sect =
1051 config_setting_get_member(setting, group->group->name);
1052
1053 if (!sect) {
1054 sect = config_setting_add(setting, group->group->name,
1055 CONFIG_TYPE_GROUP);
1056 }
1057
1058 for (; j < group->group->num_counters; j++) {
1059 char name[] = "counter0000";
1060 sprintf(name, "counter%d", j);
1061 config_setting_t *s = config_setting_lookup(sect, name);
1062 if (!s) {
1063 config_setting_add(sect, name, CONFIG_TYPE_INT);
1064 continue;
1065 }
1066 select_counter(group, j, config_setting_get_int(s));
1067 }
1068 }
1069 }
1070
1071 /*
1072 * main
1073 */
1074
1075 int
1076 main(int argc, char **argv)
1077 {
1078 find_device();
1079
1080 const struct fd_perfcntr_group *groups;
1081 groups = fd_perfcntrs((dev.chipid >> 24) * 100, &dev.ngroups);
1082 if (!groups) {
1083 errx(1, "no perfcntr support");
1084 }
1085
1086 dev.groups = calloc(dev.ngroups, sizeof(struct counter_group));
1087
1088 setup_counter_groups(groups);
1089 restore_counter_groups();
1090 config_restore();
1091 flush_ring();
1092
1093 main_ui();
1094
1095 return 0;
1096 }