5b4488209c5a7583e056cf65b2808ebe7995f46c
[mesa.git] / src / freedreno / perfcntrs / fdperf.c
1 /*
2 * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 #include <arpa/inet.h>
26 #include <assert.h>
27 #include <ctype.h>
28 #include <err.h>
29 #include <fcntl.h>
30 #include <ftw.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <stdint.h>
35 #include <sys/types.h>
36 #include <sys/stat.h>
37 #include <sys/mman.h>
38 #include <time.h>
39 #include <unistd.h>
40 #include <curses.h>
41 #include <libconfig.h>
42 #include <inttypes.h>
43 #include <xf86drm.h>
44
45 #include "drm/freedreno_drmif.h"
46 #include "drm/freedreno_ringbuffer.h"
47
48 #include "freedreno_perfcntr.h"
49
50 #define MAX_CNTR_PER_GROUP 24
51
52 /* NOTE first counter group should always be CP, since we unconditionally
53 * use CP counter to measure the gpu freq.
54 */
55
56 struct counter_group {
57 const struct fd_perfcntr_group *group;
58
59 struct {
60 const struct fd_perfcntr_counter *counter;
61 uint16_t select_val;
62 volatile uint32_t *val_hi;
63 volatile uint32_t *val_lo;
64 } counter[MAX_CNTR_PER_GROUP];
65
66 /* last sample time: */
67 uint32_t stime[MAX_CNTR_PER_GROUP];
68 /* for now just care about the low 32b value.. at least then we don't
69 * have to really care that we can't sample both hi and lo regs at the
70 * same time:
71 */
72 uint32_t last[MAX_CNTR_PER_GROUP];
73 /* current value, ie. by how many did the counter increase in last
74 * sampling period divided by the sampling period:
75 */
76 float current[MAX_CNTR_PER_GROUP];
77 /* name of currently selected counters (for UI): */
78 const char *label[MAX_CNTR_PER_GROUP];
79 };
80
81 static struct {
82 char *dtnode;
83 int address_cells, size_cells;
84 uint64_t base;
85 uint32_t size;
86 void *io;
87 uint32_t chipid;
88 uint32_t min_freq;
89 uint32_t max_freq;
90 /* per-generation table of counters: */
91 unsigned ngroups;
92 struct counter_group *groups;
93 /* drm device (for writing select regs via ring): */
94 struct fd_device *dev;
95 struct fd_pipe *pipe;
96 struct fd_submit *submit;
97 struct fd_ringbuffer *ring;
98 } dev;
99
100 static void config_save(void);
101 static void config_restore(void);
102 static void restore_counter_groups(void);
103
104 /*
105 * helpers
106 */
107
108 #define CHUNKSIZE 32
109
110 static void *
111 readfile(const char *path, int *sz)
112 {
113 char *buf = NULL;
114 int fd, ret, n = 0;
115
116 fd = open(path, O_RDONLY);
117 if (fd < 0)
118 return NULL;
119
120 while (1) {
121 buf = realloc(buf, n + CHUNKSIZE);
122 ret = read(fd, buf + n, CHUNKSIZE);
123 if (ret < 0) {
124 free(buf);
125 *sz = 0;
126 return NULL;
127 } else if (ret < CHUNKSIZE) {
128 n += ret;
129 *sz = n;
130 return buf;
131 } else {
132 n += CHUNKSIZE;
133 }
134 }
135 }
136
137 static uint32_t
138 gettime_us(void)
139 {
140 struct timespec ts;
141 clock_gettime(CLOCK_MONOTONIC, &ts);
142 return (ts.tv_sec * 1000000) + (ts.tv_nsec / 1000);
143 }
144
145 static uint32_t
146 delta(uint32_t a, uint32_t b)
147 {
148 /* deal with rollover: */
149 if (a > b)
150 return 0xffffffff - a + b;
151 else
152 return b - a;
153 }
154
155 /*
156 * TODO de-duplicate OUT_RING() and friends
157 */
158
159 #define CP_WAIT_FOR_IDLE 38
160 #define CP_TYPE0_PKT 0x00000000
161 #define CP_TYPE3_PKT 0xc0000000
162 #define CP_TYPE4_PKT 0x40000000
163 #define CP_TYPE7_PKT 0x70000000
164
165 static inline void
166 OUT_RING(struct fd_ringbuffer *ring, uint32_t data)
167 {
168 *(ring->cur++) = data;
169 }
170
171 static inline void
172 OUT_PKT0(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
173 {
174 OUT_RING(ring, CP_TYPE0_PKT | ((cnt-1) << 16) | (regindx & 0x7FFF));
175 }
176
177 static inline void
178 OUT_PKT3(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
179 {
180 OUT_RING(ring, CP_TYPE3_PKT | ((cnt-1) << 16) | ((opcode & 0xFF) << 8));
181 }
182
183
184 /*
185 * Starting with a5xx, pkt4/pkt7 are used instead of pkt0/pkt3
186 */
187
188 static inline unsigned
189 _odd_parity_bit(unsigned val)
190 {
191 /* See: http://graphics.stanford.edu/~seander/bithacks.html#ParityParallel
192 * note that we want odd parity so 0x6996 is inverted.
193 */
194 val ^= val >> 16;
195 val ^= val >> 8;
196 val ^= val >> 4;
197 val &= 0xf;
198 return (~0x6996 >> val) & 1;
199 }
200
201 static inline void
202 OUT_PKT4(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
203 {
204 OUT_RING(ring, CP_TYPE4_PKT | cnt |
205 (_odd_parity_bit(cnt) << 7) |
206 ((regindx & 0x3ffff) << 8) |
207 ((_odd_parity_bit(regindx) << 27)));
208 }
209
210 static inline void
211 OUT_PKT7(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
212 {
213 OUT_RING(ring, CP_TYPE7_PKT | cnt |
214 (_odd_parity_bit(cnt) << 15) |
215 ((opcode & 0x7f) << 16) |
216 ((_odd_parity_bit(opcode) << 23)));
217 }
218
219 /*
220 * code to find stuff in /proc/device-tree:
221 *
222 * NOTE: if we sampled the counters from the cmdstream, we could avoid needing
223 * /dev/mem and /proc/device-tree crawling. OTOH when the GPU is heavily loaded
224 * we would be competing with whatever else is using the GPU.
225 */
226
227 static void *
228 readdt(const char *node)
229 {
230 char *path;
231 void *buf;
232 int sz;
233
234 (void) asprintf(&path, "%s/%s", dev.dtnode, node);
235 buf = readfile(path, &sz);
236 free(path);
237
238 return buf;
239 }
240
241 static int
242 find_freqs_fn(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf)
243 {
244 const char *fname = fpath + ftwbuf->base;
245 int sz;
246
247 if (strcmp(fname, "qcom,gpu-freq") == 0) {
248 uint32_t *buf = readfile(fpath, &sz);
249 uint32_t freq = ntohl(buf[0]);
250 free(buf);
251 dev.max_freq = MAX2(dev.max_freq, freq);
252 dev.min_freq = MIN2(dev.min_freq, freq);
253 }
254
255 return 0;
256 }
257
258 static void
259 find_freqs(void)
260 {
261 char *path;
262 int ret;
263
264 dev.min_freq = ~0;
265 dev.max_freq = 0;
266
267 (void) asprintf(&path, "%s/%s", dev.dtnode, "qcom,gpu-pwrlevels");
268
269 ret = nftw(path, find_freqs_fn, 64, 0);
270 if (ret < 0)
271 err(1, "could not find power levels");
272
273 free(path);
274 }
275
276 static int
277 find_device_fn(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf)
278 {
279 const char *fname = fpath + ftwbuf->base;
280 int sz;
281
282 if (strcmp(fname, "compatible") == 0) {
283 char *str = readfile(fpath, &sz);
284 if ((strcmp(str, "qcom,adreno-3xx") == 0) ||
285 (strcmp(str, "qcom,kgsl-3d0") == 0) ||
286 (strstr(str, "amd,imageon") == str) ||
287 (strstr(str, "qcom,adreno") == str)) {
288 int dlen = strlen(fpath) - strlen("/compatible");
289 dev.dtnode = malloc(dlen + 1);
290 memcpy(dev.dtnode, fpath, dlen);
291 printf("found dt node: %s\n", dev.dtnode);
292
293 char buf[dlen + sizeof("/../#address-cells") + 1];
294 int sz, *val;
295
296 sprintf(buf, "%s/../#address-cells", dev.dtnode);
297 val = readfile(buf, &sz);
298 dev.address_cells = ntohl(*val);
299 free(val);
300
301 sprintf(buf, "%s/../#size-cells", dev.dtnode);
302 val = readfile(buf, &sz);
303 dev.size_cells = ntohl(*val);
304 free(val);
305
306 printf("#address-cells=%d, #size-cells=%d\n",
307 dev.address_cells, dev.size_cells);
308 }
309 free(str);
310 }
311 if (dev.dtnode) {
312 /* we found it! */
313 return 1;
314 }
315 return 0;
316 }
317
318 static void
319 find_device(void)
320 {
321 int ret, fd;
322 uint32_t *buf, *b;
323
324 ret = nftw("/proc/device-tree/", find_device_fn, 64, 0);
325 if (ret < 0)
326 err(1, "could not find adreno gpu");
327
328 if (!dev.dtnode)
329 errx(1, "could not find qcom,adreno-3xx node");
330
331 fd = drmOpen("msm", NULL);
332 if (fd < 0)
333 err(1, "could not open drm device");
334
335 dev.dev = fd_device_new(fd);
336 dev.pipe = fd_pipe_new(dev.dev, FD_PIPE_3D);
337
338 uint64_t val;
339 ret = fd_pipe_get_param(dev.pipe, FD_CHIP_ID, &val);
340 if (ret) {
341 err(1, "could not get gpu-id");
342 }
343 dev.chipid = val;
344
345 #define CHIP_FMT "d%d%d.%d"
346 #define CHIP_ARGS(chipid) \
347 ((chipid) >> 24) & 0xff, \
348 ((chipid) >> 16) & 0xff, \
349 ((chipid) >> 8) & 0xff, \
350 ((chipid) >> 0) & 0xff
351 printf("device: a%"CHIP_FMT"\n", CHIP_ARGS(dev.chipid));
352
353 b = buf = readdt("reg");
354
355 if (dev.address_cells == 2) {
356 uint32_t u[2] = { ntohl(buf[0]), ntohl(buf[1]) };
357 dev.base = (((uint64_t)u[0]) << 32) | u[1];
358 buf += 2;
359 } else {
360 dev.base = ntohl(buf[0]);
361 buf += 1;
362 }
363
364 if (dev.size_cells == 2) {
365 uint32_t u[2] = { ntohl(buf[0]), ntohl(buf[1]) };
366 dev.size = (((uint64_t)u[0]) << 32) | u[1];
367 buf += 2;
368 } else {
369 dev.size = ntohl(buf[0]);
370 buf += 1;
371 }
372
373 free(b);
374
375 printf("i/o region at %08"PRIu64" (size: %x)\n", dev.base, dev.size);
376
377 /* try MAX_FREQ first as that will work regardless of old dt
378 * dt bindings vs upstream bindings:
379 */
380 ret = fd_pipe_get_param(dev.pipe, FD_MAX_FREQ, &val);
381 if (ret) {
382 printf("falling back to parsing DT bindings for freq\n");
383 find_freqs();
384 } else {
385 dev.min_freq = 0;
386 dev.max_freq = val;
387 }
388
389 printf("min_freq=%u, max_freq=%u\n", dev.min_freq, dev.max_freq);
390
391 fd = open("/dev/mem", O_RDWR | O_SYNC);
392 if (fd < 0)
393 err(1, "could not open /dev/mem");
394
395 dev.io = mmap(0, dev.size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, dev.base);
396 if (!dev.io)
397 err(1, "could not map device");
398 }
399
400 /*
401 * perf-monitor
402 */
403
404 static void
405 flush_ring(void)
406 {
407 int ret;
408
409 if (!dev.submit)
410 return;
411
412 ret = fd_submit_flush(dev.submit, -1, NULL, NULL);
413 if (ret)
414 errx(1, "submit failed: %d", ret);
415 fd_ringbuffer_del(dev.ring);
416 fd_submit_del(dev.submit);
417
418 dev.ring = NULL;
419 dev.submit = NULL;
420 }
421
422 static void
423 select_counter(struct counter_group *group, int ctr, int n)
424 {
425 assert(n < group->group->num_countables);
426 assert(ctr < group->group->num_counters);
427
428 group->label[ctr] = group->group->countables[n].name;
429 group->counter[ctr].select_val = n;
430
431 if (!dev.submit) {
432 dev.submit = fd_submit_new(dev.pipe);
433 dev.ring = fd_submit_new_ringbuffer(dev.submit, 0x1000,
434 FD_RINGBUFFER_PRIMARY | FD_RINGBUFFER_GROWABLE);
435 }
436
437 /* bashing select register directly while gpu is active will end
438 * in tears.. so we need to write it via the ring:
439 *
440 * TODO it would help startup time, if gpu is loaded, to batch
441 * all the initial writes and do a single flush.. although that
442 * makes things more complicated for capturing inital sample value
443 */
444 struct fd_ringbuffer *ring = dev.ring;
445 switch (dev.chipid >> 24) {
446 case 2:
447 case 3:
448 case 4:
449 OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
450 OUT_RING(ring, 0x00000000);
451
452 if (group->group->counters[ctr].enable) {
453 OUT_PKT0(ring, group->group->counters[ctr].enable, 1);
454 OUT_RING(ring, 0);
455 }
456
457 if (group->group->counters[ctr].clear) {
458 OUT_PKT0(ring, group->group->counters[ctr].clear, 1);
459 OUT_RING(ring, 1);
460
461 OUT_PKT0(ring, group->group->counters[ctr].clear, 1);
462 OUT_RING(ring, 0);
463 }
464
465 OUT_PKT0(ring, group->group->counters[ctr].select_reg, 1);
466 OUT_RING(ring, n);
467
468 if (group->group->counters[ctr].enable) {
469 OUT_PKT0(ring, group->group->counters[ctr].enable, 1);
470 OUT_RING(ring, 1);
471 }
472
473 break;
474 case 5:
475 case 6:
476 OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
477
478 if (group->group->counters[ctr].enable) {
479 OUT_PKT4(ring, group->group->counters[ctr].enable, 1);
480 OUT_RING(ring, 0);
481 }
482
483 if (group->group->counters[ctr].clear) {
484 OUT_PKT4(ring, group->group->counters[ctr].clear, 1);
485 OUT_RING(ring, 1);
486
487 OUT_PKT4(ring, group->group->counters[ctr].clear, 1);
488 OUT_RING(ring, 0);
489 }
490
491 OUT_PKT4(ring, group->group->counters[ctr].select_reg, 1);
492 OUT_RING(ring, n);
493
494 if (group->group->counters[ctr].enable) {
495 OUT_PKT4(ring, group->group->counters[ctr].enable, 1);
496 OUT_RING(ring, 1);
497 }
498
499 break;
500 }
501
502 group->last[ctr] = *group->counter[ctr].val_lo;
503 group->stime[ctr] = gettime_us();
504 }
505
506 static void
507 resample_counter(struct counter_group *group, int ctr)
508 {
509 uint32_t val = *group->counter[ctr].val_lo;
510 uint32_t t = gettime_us();
511 uint32_t dt = delta(group->stime[ctr], t);
512 uint32_t dval = delta(group->last[ctr], val);
513 group->current[ctr] = (float)dval * 1000000.0 / (float)dt;
514 group->last[ctr] = val;
515 group->stime[ctr] = t;
516 }
517
518 #define REFRESH_MS 500
519
520 /* sample all the counters: */
521 static void
522 resample(void)
523 {
524 static uint64_t last_time;
525 uint64_t current_time = gettime_us();
526
527 if ((current_time - last_time) < (REFRESH_MS * 1000 / 2))
528 return;
529
530 last_time = current_time;
531
532 for (unsigned i = 0; i < dev.ngroups; i++) {
533 struct counter_group *group = &dev.groups[i];
534 for (unsigned j = 0; j < group->group->num_counters; j++) {
535 resample_counter(group, j);
536 }
537 }
538 }
539
540 /*
541 * The UI
542 */
543
544 #define COLOR_GROUP_HEADER 1
545 #define COLOR_FOOTER 2
546 #define COLOR_INVERSE 3
547
548 static int w, h;
549 static int ctr_width;
550 static int max_rows, current_cntr = 1;
551
552 static void
553 redraw_footer(WINDOW *win)
554 {
555 char *footer;
556 int n;
557
558 n = asprintf(&footer, " fdperf: a%"CHIP_FMT" (%.2fMHz..%.2fMHz)",
559 CHIP_ARGS(dev.chipid),
560 ((float)dev.min_freq) / 1000000.0,
561 ((float)dev.max_freq) / 1000000.0);
562
563 wmove(win, h - 1, 0);
564 wattron(win, COLOR_PAIR(COLOR_FOOTER));
565 waddstr(win, footer);
566 whline(win, ' ', w - n);
567 wattroff(win, COLOR_PAIR(COLOR_FOOTER));
568
569 free(footer);
570 }
571
572 static void
573 redraw_group_header(WINDOW *win, int row, const char *name)
574 {
575 wmove(win, row, 0);
576 wattron(win, A_BOLD);
577 wattron(win, COLOR_PAIR(COLOR_GROUP_HEADER));
578 waddstr(win, name);
579 whline(win, ' ', w - strlen(name));
580 wattroff(win, COLOR_PAIR(COLOR_GROUP_HEADER));
581 wattroff(win, A_BOLD);
582 }
583
584 static void
585 redraw_counter_label(WINDOW *win, int row, const char *name, bool selected)
586 {
587 int n = strlen(name);
588 assert(n <= ctr_width);
589 wmove(win, row, 0);
590 whline(win, ' ', ctr_width - n);
591 wmove(win, row, ctr_width - n);
592 if (selected)
593 wattron(win, COLOR_PAIR(COLOR_INVERSE));
594 waddstr(win, name);
595 if (selected)
596 wattroff(win, COLOR_PAIR(COLOR_INVERSE));
597 waddstr(win, ": ");
598 }
599
600 static void
601 redraw_counter_value_cycles(WINDOW *win, float val)
602 {
603 char *str;
604 int x = getcurx(win);
605 int valwidth = w - x;
606 int barwidth, n;
607
608 /* convert to fraction of max freq: */
609 val = val / (float)dev.max_freq;
610
611 /* figure out percentage-bar width: */
612 barwidth = (int)(val * valwidth);
613
614 /* sometimes things go over 100%.. idk why, could be
615 * things running faster than base clock, or counter
616 * summing up cycles in multiple cores?
617 */
618 barwidth = MIN2(barwidth, valwidth - 1);
619
620 n = asprintf(&str, "%.2f%%", 100.0 * val);
621 wattron(win, COLOR_PAIR(COLOR_INVERSE));
622 waddnstr(win, str, barwidth);
623 if (barwidth > n) {
624 whline(win, ' ', barwidth - n);
625 wmove(win, getcury(win), x + barwidth);
626 }
627 wattroff(win, COLOR_PAIR(COLOR_INVERSE));
628 if (barwidth < n)
629 waddstr(win, str + barwidth);
630 whline(win, ' ', w - getcurx(win));
631
632 free(str);
633 }
634
635 static void
636 redraw_counter_value_raw(WINDOW *win, float val)
637 {
638 char *str;
639 (void) asprintf(&str, "%'.2f", val);
640 waddstr(win, str);
641 whline(win, ' ', w - getcurx(win));
642 free(str);
643 }
644
645 static void
646 redraw_counter(WINDOW *win, int row, struct counter_group *group,
647 int ctr, bool selected)
648 {
649 redraw_counter_label(win, row, group->label[ctr], selected);
650
651 /* quick hack, if the label has "CYCLE" in the name, it is
652 * probably a cycle counter ;-)
653 * Perhaps add more info in rnndb schema to know how to
654 * treat individual counters (ie. which are cycles, and
655 * for those we want to present as a percentage do we
656 * need to scale the result.. ie. is it running at some
657 * multiple or divisor of core clk, etc)
658 *
659 * TODO it would be much more clever to get this from xml
660 * Also.. in some cases I think we want to know how many
661 * units the counter is counting for, ie. if a320 has 2x
662 * shader as a306 we might need to scale the result..
663 */
664 if (strstr(group->label[ctr], "CYCLE") ||
665 strstr(group->label[ctr], "BUSY") ||
666 strstr(group->label[ctr], "IDLE"))
667 redraw_counter_value_cycles(win, group->current[ctr]);
668 else
669 redraw_counter_value_raw(win, group->current[ctr]);
670 }
671
672 static void
673 redraw(WINDOW *win)
674 {
675 static int scroll = 0;
676 int max, row = 0;
677
678 w = getmaxx(win);
679 h = getmaxy(win);
680
681 max = h - 3;
682
683 if ((current_cntr - scroll) > (max - 1)) {
684 scroll = current_cntr - (max - 1);
685 } else if ((current_cntr - 1) < scroll) {
686 scroll = current_cntr - 1;
687 }
688
689 for (unsigned i = 0; i < dev.ngroups; i++) {
690 struct counter_group *group = &dev.groups[i];
691 unsigned j = 0;
692
693 /* NOTE skip CP the first CP counter */
694 if (i == 0)
695 j++;
696
697 if (j < group->group->num_counters) {
698 if ((scroll <= row) && ((row - scroll) < max))
699 redraw_group_header(win, row - scroll, group->group->name);
700 row++;
701 }
702
703 for (; j < group->group->num_counters; j++) {
704 if ((scroll <= row) && ((row - scroll) < max))
705 redraw_counter(win, row - scroll, group, j, row == current_cntr);
706 row++;
707 }
708 }
709
710 /* convert back to physical (unscrolled) offset: */
711 row = max;
712
713 redraw_group_header(win, row, "Status");
714 row++;
715
716 /* Draw GPU freq row: */
717 redraw_counter_label(win, row, "Freq (MHz)", false);
718 redraw_counter_value_raw(win, dev.groups[0].current[0] / 1000000.0);
719 row++;
720
721 redraw_footer(win);
722
723 refresh();
724 }
725
726 static struct counter_group *
727 current_counter(int *ctr)
728 {
729 int n = 0;
730
731 for (unsigned i = 0; i < dev.ngroups; i++) {
732 struct counter_group *group = &dev.groups[i];
733 unsigned j = 0;
734
735 /* NOTE skip the first CP counter (CP_ALWAYS_COUNT) */
736 if (i == 0)
737 j++;
738
739 /* account for group header: */
740 if (j < group->group->num_counters) {
741 /* cannot select group header.. return null to indicate this
742 * main_ui():
743 */
744 if (n == current_cntr)
745 return NULL;
746 n++;
747 }
748
749
750 for (; j < group->group->num_counters; j++) {
751 if (n == current_cntr) {
752 if (ctr)
753 *ctr = j;
754 return group;
755 }
756 n++;
757 }
758 }
759
760 assert(0);
761 return NULL;
762 }
763
764 static void
765 counter_dialog(void)
766 {
767 WINDOW *dialog;
768 struct counter_group *group;
769 int cnt, current = 0, scroll;
770
771 /* figure out dialog size: */
772 int dh = h/2;
773 int dw = ctr_width + 2;
774
775 group = current_counter(&cnt);
776
777 /* find currently selected idx (note there can be discontinuities
778 * so the selected value does not map 1:1 to current idx)
779 */
780 uint32_t selected = group->counter[cnt].select_val;
781 for (int i = 0; i < group->group->num_countables; i++) {
782 if (group->group->countables[i].selector == selected) {
783 current = i;
784 break;
785 }
786 }
787
788 /* scrolling offset, if dialog is too small for all the choices: */
789 scroll = 0;
790
791 dialog = newwin(dh, dw, (h-dh)/2, (w-dw)/2);
792 box(dialog, 0, 0);
793 wrefresh(dialog);
794 keypad(dialog, TRUE);
795
796 while (true) {
797 int max = MIN2(dh - 2, group->group->num_countables);
798 int selector = -1;
799
800 if ((current - scroll) >= (dh - 3)) {
801 scroll = current - (dh - 3);
802 } else if (current < scroll) {
803 scroll = current;
804 }
805
806 for (int i = 0; i < max; i++) {
807 int n = scroll + i;
808 wmove(dialog, i+1, 1);
809 if (n == current) {
810 assert (n < group->group->num_countables);
811 selector = group->group->countables[n].selector;
812 wattron(dialog, COLOR_PAIR(COLOR_INVERSE));
813 }
814 if (n < group->group->num_countables)
815 waddstr(dialog, group->group->countables[n].name);
816 whline(dialog, ' ', dw - getcurx(dialog) - 1);
817 if (n == current)
818 wattroff(dialog, COLOR_PAIR(COLOR_INVERSE));
819 }
820
821 assert (selector >= 0);
822
823 switch (wgetch(dialog)) {
824 case KEY_UP:
825 current = MAX2(0, current - 1);
826 break;
827 case KEY_DOWN:
828 current = MIN2(group->group->num_countables - 1, current + 1);
829 break;
830 case KEY_LEFT:
831 case KEY_ENTER:
832 /* select new sampler */
833 select_counter(group, cnt, selector);
834 flush_ring();
835 config_save();
836 goto out;
837 case 'q':
838 goto out;
839 default:
840 /* ignore */
841 break;
842 }
843
844 resample();
845 }
846
847 out:
848 wborder(dialog, ' ', ' ', ' ',' ',' ',' ',' ',' ');
849 delwin(dialog);
850 }
851
852 static void
853 scroll_cntr(int amount)
854 {
855 if (amount < 0) {
856 current_cntr = MAX2(1, current_cntr + amount);
857 if (current_counter(NULL) == NULL) {
858 current_cntr = MAX2(1, current_cntr - 1);
859 }
860 } else {
861 current_cntr = MIN2(max_rows - 1, current_cntr + amount);
862 if (current_counter(NULL) == NULL)
863 current_cntr = MIN2(max_rows - 1, current_cntr + 1);
864 }
865 }
866
867 static void
868 main_ui(void)
869 {
870 WINDOW *mainwin;
871 uint32_t last_time = gettime_us();
872
873 /* curses setup: */
874 mainwin = initscr();
875 if (!mainwin)
876 goto out;
877
878 cbreak();
879 wtimeout(mainwin, REFRESH_MS);
880 noecho();
881 keypad(mainwin, TRUE);
882 curs_set(0);
883 start_color();
884 init_pair(COLOR_GROUP_HEADER, COLOR_WHITE, COLOR_GREEN);
885 init_pair(COLOR_FOOTER, COLOR_WHITE, COLOR_BLUE);
886 init_pair(COLOR_INVERSE, COLOR_BLACK, COLOR_WHITE);
887
888 while (true) {
889 switch (wgetch(mainwin)) {
890 case KEY_UP:
891 scroll_cntr(-1);
892 break;
893 case KEY_DOWN:
894 scroll_cntr(+1);
895 break;
896 case KEY_NPAGE: /* page-down */
897 /* TODO figure out # of rows visible? */
898 scroll_cntr(+15);
899 break;
900 case KEY_PPAGE: /* page-up */
901 /* TODO figure out # of rows visible? */
902 scroll_cntr(-15);
903 break;
904 case KEY_RIGHT:
905 counter_dialog();
906 break;
907 case 'q':
908 goto out;
909 break;
910 default:
911 /* ignore */
912 break;
913 }
914 resample();
915 redraw(mainwin);
916
917 /* restore the counters every 0.5s in case the GPU has suspended,
918 * in which case the current selected countables will have reset:
919 */
920 uint32_t t = gettime_us();
921 if (delta(last_time, t) > 500000) {
922 restore_counter_groups();
923 flush_ring();
924 last_time = t;
925 }
926 }
927
928 /* restore settings.. maybe we need an atexit()??*/
929 out:
930 delwin(mainwin);
931 endwin();
932 refresh();
933 }
934
935 static void
936 restore_counter_groups(void)
937 {
938 for (unsigned i = 0; i < dev.ngroups; i++) {
939 struct counter_group *group = &dev.groups[i];
940 unsigned j = 0;
941
942 /* NOTE skip CP the first CP counter */
943 if (i == 0)
944 j++;
945
946 for (; j < group->group->num_counters; j++) {
947 select_counter(group, j, group->counter[j].select_val);
948 }
949 }
950 }
951
952 static void
953 setup_counter_groups(const struct fd_perfcntr_group *groups)
954 {
955 for (unsigned i = 0; i < dev.ngroups; i++) {
956 struct counter_group *group = &dev.groups[i];
957
958 group->group = &groups[i];
959
960 max_rows += group->group->num_counters + 1;
961
962 /* the first CP counter is hidden: */
963 if (i == 0) {
964 max_rows--;
965 if (group->group->num_counters <= 1)
966 max_rows--;
967 }
968
969 for (unsigned j = 0; j < group->group->num_counters; j++) {
970 group->counter[j].counter = &group->group->counters[j];
971
972 group->counter[j].val_hi = dev.io + (group->counter[j].counter->counter_reg_hi * 4);
973 group->counter[j].val_lo = dev.io + (group->counter[j].counter->counter_reg_lo * 4);
974
975 group->counter[j].select_val = j;
976 }
977
978 for (unsigned j = 0; j < group->group->num_countables; j++) {
979 ctr_width = MAX2(ctr_width, strlen(group->group->countables[j].name) + 1);
980 }
981 }
982 }
983
984 /*
985 * configuration / persistence
986 */
987
988 static config_t cfg;
989 static config_setting_t *setting;
990
991 static void
992 config_save(void)
993 {
994 for (unsigned i = 0; i < dev.ngroups; i++) {
995 struct counter_group *group = &dev.groups[i];
996 unsigned j = 0;
997
998 /* NOTE skip CP the first CP counter */
999 if (i == 0)
1000 j++;
1001
1002 config_setting_t *sect =
1003 config_setting_get_member(setting, group->group->name);
1004
1005 for (; j < group->group->num_counters; j++) {
1006 char name[] = "counter0000";
1007 sprintf(name, "counter%d", j);
1008 config_setting_t *s =
1009 config_setting_lookup(sect, name);
1010 config_setting_set_int(s, group->counter[j].select_val);
1011 }
1012 }
1013
1014 config_write_file(&cfg, "fdperf.cfg");
1015 }
1016
1017 static void
1018 config_restore(void)
1019 {
1020 char *str;
1021
1022 config_init(&cfg);
1023
1024 /* Read the file. If there is an error, report it and exit. */
1025 if(!config_read_file(&cfg, "fdperf.cfg")) {
1026 warn("could not restore settings");
1027 }
1028
1029 config_setting_t *root = config_root_setting(&cfg);
1030
1031 /* per device settings: */
1032 (void) asprintf(&str, "a%dxx", dev.chipid >> 24);
1033 setting = config_setting_get_member(root, str);
1034 if (!setting)
1035 setting = config_setting_add(root, str, CONFIG_TYPE_GROUP);
1036 free(str);
1037
1038 for (unsigned i = 0; i < dev.ngroups; i++) {
1039 struct counter_group *group = &dev.groups[i];
1040 unsigned j = 0;
1041
1042 /* NOTE skip CP the first CP counter */
1043 if (i == 0)
1044 j++;
1045
1046 config_setting_t *sect =
1047 config_setting_get_member(setting, group->group->name);
1048
1049 if (!sect) {
1050 sect = config_setting_add(setting, group->group->name,
1051 CONFIG_TYPE_GROUP);
1052 }
1053
1054 for (; j < group->group->num_counters; j++) {
1055 char name[] = "counter0000";
1056 sprintf(name, "counter%d", j);
1057 config_setting_t *s = config_setting_lookup(sect, name);
1058 if (!s) {
1059 config_setting_add(sect, name, CONFIG_TYPE_INT);
1060 continue;
1061 }
1062 select_counter(group, j, config_setting_get_int(s));
1063 }
1064 }
1065 }
1066
1067 /*
1068 * main
1069 */
1070
1071 int
1072 main(int argc, char **argv)
1073 {
1074 find_device();
1075
1076 const struct fd_perfcntr_group *groups;
1077 groups = fd_perfcntrs((dev.chipid >> 24) * 100, &dev.ngroups);
1078 if (!groups) {
1079 errx(1, "no perfcntr support");
1080 }
1081
1082 dev.groups = calloc(dev.ngroups, sizeof(struct counter_group));
1083
1084 setup_counter_groups(groups);
1085 restore_counter_groups();
1086 config_restore();
1087 flush_ring();
1088
1089 main_ui();
1090
1091 return 0;
1092 }