freedreno/perfcntrs/fdperf: add missing a2xx case in select_counter
[mesa.git] / src / freedreno / perfcntrs / fdperf.c
1 /*
2 * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 #include <arpa/inet.h>
26 #include <assert.h>
27 #include <ctype.h>
28 #include <err.h>
29 #include <fcntl.h>
30 #include <ftw.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <stdint.h>
35 #include <sys/types.h>
36 #include <sys/stat.h>
37 #include <sys/mman.h>
38 #include <time.h>
39 #include <unistd.h>
40 #include <curses.h>
41 #include <libconfig.h>
42 #include <inttypes.h>
43
44 #include "drm/freedreno_drmif.h"
45 #include "drm/freedreno_ringbuffer.h"
46
47 #include "freedreno_perfcntr.h"
48
49 #define MAX_CNTR_PER_GROUP 24
50
51 /* NOTE first counter group should always be CP, since we unconditionally
52 * use CP counter to measure the gpu freq.
53 */
54
55 struct counter_group {
56 const struct fd_perfcntr_group *group;
57
58 struct {
59 const struct fd_perfcntr_counter *counter;
60 uint16_t select_val;
61 volatile uint32_t *val_hi;
62 volatile uint32_t *val_lo;
63 } counter[MAX_CNTR_PER_GROUP];
64
65 /* last sample time: */
66 uint32_t stime[MAX_CNTR_PER_GROUP];
67 /* for now just care about the low 32b value.. at least then we don't
68 * have to really care that we can't sample both hi and lo regs at the
69 * same time:
70 */
71 uint32_t last[MAX_CNTR_PER_GROUP];
72 /* current value, ie. by how many did the counter increase in last
73 * sampling period divided by the sampling period:
74 */
75 float current[MAX_CNTR_PER_GROUP];
76 /* name of currently selected counters (for UI): */
77 const char *label[MAX_CNTR_PER_GROUP];
78 };
79
80 static struct {
81 char *dtnode;
82 int address_cells, size_cells;
83 uint64_t base;
84 uint32_t size;
85 void *io;
86 uint32_t chipid;
87 uint32_t min_freq;
88 uint32_t max_freq;
89 /* per-generation table of counters: */
90 unsigned ngroups;
91 struct counter_group *groups;
92 /* drm device (for writing select regs via ring): */
93 struct fd_device *dev;
94 struct fd_pipe *pipe;
95 struct fd_submit *submit;
96 struct fd_ringbuffer *ring;
97 } dev;
98
99 static void config_save(void);
100 static void config_restore(void);
101 static void restore_counter_groups(void);
102
103 /*
104 * helpers
105 */
106
107 #define CHUNKSIZE 32
108
109 static void *
110 readfile(const char *path, int *sz)
111 {
112 char *buf = NULL;
113 int fd, ret, n = 0;
114
115 fd = open(path, O_RDONLY);
116 if (fd < 0)
117 return NULL;
118
119 while (1) {
120 buf = realloc(buf, n + CHUNKSIZE);
121 ret = read(fd, buf + n, CHUNKSIZE);
122 if (ret < 0) {
123 free(buf);
124 *sz = 0;
125 return NULL;
126 } else if (ret < CHUNKSIZE) {
127 n += ret;
128 *sz = n;
129 return buf;
130 } else {
131 n += CHUNKSIZE;
132 }
133 }
134 }
135
136 static uint32_t
137 gettime_us(void)
138 {
139 struct timespec ts;
140 clock_gettime(CLOCK_MONOTONIC, &ts);
141 return (ts.tv_sec * 1000000) + (ts.tv_nsec / 1000);
142 }
143
144 static uint32_t
145 delta(uint32_t a, uint32_t b)
146 {
147 /* deal with rollover: */
148 if (a > b)
149 return 0xffffffff - a + b;
150 else
151 return b - a;
152 }
153
154 /*
155 * TODO de-duplicate OUT_RING() and friends
156 */
157
158 #define CP_WAIT_FOR_IDLE 38
159 #define CP_TYPE0_PKT 0x00000000
160 #define CP_TYPE3_PKT 0xc0000000
161 #define CP_TYPE4_PKT 0x40000000
162 #define CP_TYPE7_PKT 0x70000000
163
164 static inline void
165 OUT_RING(struct fd_ringbuffer *ring, uint32_t data)
166 {
167 *(ring->cur++) = data;
168 }
169
170 static inline void
171 OUT_PKT0(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
172 {
173 OUT_RING(ring, CP_TYPE0_PKT | ((cnt-1) << 16) | (regindx & 0x7FFF));
174 }
175
176 static inline void
177 OUT_PKT3(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
178 {
179 OUT_RING(ring, CP_TYPE3_PKT | ((cnt-1) << 16) | ((opcode & 0xFF) << 8));
180 }
181
182
183 /*
184 * Starting with a5xx, pkt4/pkt7 are used instead of pkt0/pkt3
185 */
186
187 static inline unsigned
188 _odd_parity_bit(unsigned val)
189 {
190 /* See: http://graphics.stanford.edu/~seander/bithacks.html#ParityParallel
191 * note that we want odd parity so 0x6996 is inverted.
192 */
193 val ^= val >> 16;
194 val ^= val >> 8;
195 val ^= val >> 4;
196 val &= 0xf;
197 return (~0x6996 >> val) & 1;
198 }
199
200 static inline void
201 OUT_PKT4(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
202 {
203 OUT_RING(ring, CP_TYPE4_PKT | cnt |
204 (_odd_parity_bit(cnt) << 7) |
205 ((regindx & 0x3ffff) << 8) |
206 ((_odd_parity_bit(regindx) << 27)));
207 }
208
209 static inline void
210 OUT_PKT7(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
211 {
212 OUT_RING(ring, CP_TYPE7_PKT | cnt |
213 (_odd_parity_bit(cnt) << 15) |
214 ((opcode & 0x7f) << 16) |
215 ((_odd_parity_bit(opcode) << 23)));
216 }
217
218 /*
219 * code to find stuff in /proc/device-tree:
220 *
221 * NOTE: if we sampled the counters from the cmdstream, we could avoid needing
222 * /dev/mem and /proc/device-tree crawling. OTOH when the GPU is heavily loaded
223 * we would be competing with whatever else is using the GPU.
224 */
225
226 static void *
227 readdt(const char *node)
228 {
229 char *path;
230 void *buf;
231 int sz;
232
233 asprintf(&path, "%s/%s", dev.dtnode, node);
234 buf = readfile(path, &sz);
235 free(path);
236
237 return buf;
238 }
239
240 static int
241 find_freqs_fn(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf)
242 {
243 const char *fname = fpath + ftwbuf->base;
244 int sz;
245
246 if (strcmp(fname, "qcom,gpu-freq") == 0) {
247 uint32_t *buf = readfile(fpath, &sz);
248 uint32_t freq = ntohl(buf[0]);
249 free(buf);
250 dev.max_freq = MAX2(dev.max_freq, freq);
251 dev.min_freq = MIN2(dev.min_freq, freq);
252 }
253
254 return 0;
255 }
256
257 static void
258 find_freqs(void)
259 {
260 char *path;
261 int ret;
262
263 dev.min_freq = ~0;
264 dev.max_freq = 0;
265
266 asprintf(&path, "%s/%s", dev.dtnode, "qcom,gpu-pwrlevels");
267
268 ret = nftw(path, find_freqs_fn, 64, 0);
269 if (ret < 0)
270 err(1, "could not find power levels");
271
272 free(path);
273 }
274
275 static int
276 find_device_fn(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf)
277 {
278 const char *fname = fpath + ftwbuf->base;
279 int sz;
280
281 if (strcmp(fname, "compatible") == 0) {
282 char *str = readfile(fpath, &sz);
283 if ((strcmp(str, "qcom,adreno-3xx") == 0) ||
284 (strcmp(str, "qcom,kgsl-3d0") == 0) ||
285 (strstr(str, "amd,imageon") == str) ||
286 (strstr(str, "qcom,adreno") == str)) {
287 int dlen = strlen(fpath) - strlen("/compatible");
288 dev.dtnode = malloc(dlen + 1);
289 memcpy(dev.dtnode, fpath, dlen);
290 printf("found dt node: %s\n", dev.dtnode);
291
292 char buf[dlen + sizeof("/../#address-cells") + 1];
293 int sz, *val;
294
295 sprintf(buf, "%s/../#address-cells", dev.dtnode);
296 val = readfile(buf, &sz);
297 dev.address_cells = ntohl(*val);
298 free(val);
299
300 sprintf(buf, "%s/../#size-cells", dev.dtnode);
301 val = readfile(buf, &sz);
302 dev.size_cells = ntohl(*val);
303 free(val);
304
305 printf("#address-cells=%d, #size-cells=%d\n",
306 dev.address_cells, dev.size_cells);
307 }
308 free(str);
309 }
310 if (dev.dtnode) {
311 /* we found it! */
312 return 1;
313 }
314 return 0;
315 }
316
317 static void
318 find_device(void)
319 {
320 int ret, fd;
321 uint32_t *buf, *b;
322
323 ret = nftw("/proc/device-tree/", find_device_fn, 64, 0);
324 if (ret < 0)
325 err(1, "could not find adreno gpu");
326
327 if (!dev.dtnode)
328 errx(1, "could not find qcom,adreno-3xx node");
329
330 fd = open("/dev/dri/card0", O_RDWR);
331 if (fd < 0)
332 err(1, "could not open drm device");
333
334 dev.dev = fd_device_new(fd);
335 dev.pipe = fd_pipe_new(dev.dev, FD_PIPE_3D);
336
337 uint64_t val;
338 ret = fd_pipe_get_param(dev.pipe, FD_CHIP_ID, &val);
339 if (ret) {
340 err(1, "could not get gpu-id");
341 }
342 dev.chipid = val;
343
344 #define CHIP_FMT "d%d%d.%d"
345 #define CHIP_ARGS(chipid) \
346 ((chipid) >> 24) & 0xff, \
347 ((chipid) >> 16) & 0xff, \
348 ((chipid) >> 8) & 0xff, \
349 ((chipid) >> 0) & 0xff
350 printf("device: a%"CHIP_FMT"\n", CHIP_ARGS(dev.chipid));
351
352 b = buf = readdt("reg");
353
354 if (dev.address_cells == 2) {
355 uint32_t u[2] = { ntohl(buf[0]), ntohl(buf[1]) };
356 dev.base = (((uint64_t)u[0]) << 32) | u[1];
357 buf += 2;
358 } else {
359 dev.base = ntohl(buf[0]);
360 buf += 1;
361 }
362
363 if (dev.size_cells == 2) {
364 uint32_t u[2] = { ntohl(buf[0]), ntohl(buf[1]) };
365 dev.size = (((uint64_t)u[0]) << 32) | u[1];
366 buf += 2;
367 } else {
368 dev.size = ntohl(buf[0]);
369 buf += 1;
370 }
371
372 free(b);
373
374 printf("i/o region at %08"PRIu64" (size: %x)\n", dev.base, dev.size);
375
376 /* try MAX_FREQ first as that will work regardless of old dt
377 * dt bindings vs upstream bindings:
378 */
379 ret = fd_pipe_get_param(dev.pipe, FD_MAX_FREQ, &val);
380 if (ret) {
381 printf("falling back to parsing DT bindings for freq\n");
382 find_freqs();
383 } else {
384 dev.min_freq = 0;
385 dev.max_freq = val;
386 }
387
388 printf("min_freq=%u, max_freq=%u\n", dev.min_freq, dev.max_freq);
389
390 fd = open("/dev/mem", O_RDWR | O_SYNC);
391 if (fd < 0)
392 err(1, "could not open /dev/mem");
393
394 dev.io = mmap(0, dev.size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, dev.base);
395 if (!dev.io)
396 err(1, "could not map device");
397 }
398
399 /*
400 * perf-monitor
401 */
402
403 static void
404 flush_ring(void)
405 {
406 int ret;
407
408 if (!dev.submit)
409 return;
410
411 ret = fd_submit_flush(dev.submit, -1, NULL, NULL);
412 if (ret)
413 errx(1, "submit failed: %d", ret);
414 fd_ringbuffer_del(dev.ring);
415 fd_submit_del(dev.submit);
416
417 dev.ring = NULL;
418 dev.submit = NULL;
419 }
420
421 static void
422 select_counter(struct counter_group *group, int ctr, int n)
423 {
424 assert(n < group->group->num_countables);
425 assert(ctr < group->group->num_counters);
426
427 group->label[ctr] = group->group->countables[n].name;
428 group->counter[ctr].select_val = n;
429
430 if (!dev.submit) {
431 dev.submit = fd_submit_new(dev.pipe);
432 dev.ring = fd_submit_new_ringbuffer(dev.submit, 0x1000,
433 FD_RINGBUFFER_PRIMARY | FD_RINGBUFFER_GROWABLE);
434 }
435
436 /* bashing select register directly while gpu is active will end
437 * in tears.. so we need to write it via the ring:
438 *
439 * TODO it would help startup time, if gpu is loaded, to batch
440 * all the initial writes and do a single flush.. although that
441 * makes things more complicated for capturing inital sample value
442 */
443 struct fd_ringbuffer *ring = dev.ring;
444 switch (dev.chipid >> 24) {
445 case 2:
446 case 3:
447 case 4:
448 OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
449 OUT_RING(ring, 0x00000000);
450
451 if (group->group->counters[ctr].enable) {
452 OUT_PKT0(ring, group->group->counters[ctr].enable, 1);
453 OUT_RING(ring, 0);
454 }
455
456 if (group->group->counters[ctr].clear) {
457 OUT_PKT0(ring, group->group->counters[ctr].clear, 1);
458 OUT_RING(ring, 1);
459
460 OUT_PKT0(ring, group->group->counters[ctr].clear, 1);
461 OUT_RING(ring, 0);
462 }
463
464 OUT_PKT0(ring, group->group->counters[ctr].select_reg, 1);
465 OUT_RING(ring, n);
466
467 if (group->group->counters[ctr].enable) {
468 OUT_PKT0(ring, group->group->counters[ctr].enable, 1);
469 OUT_RING(ring, 1);
470 }
471
472 break;
473 case 5:
474 case 6:
475 OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
476
477 if (group->group->counters[ctr].enable) {
478 OUT_PKT4(ring, group->group->counters[ctr].enable, 1);
479 OUT_RING(ring, 0);
480 }
481
482 if (group->group->counters[ctr].clear) {
483 OUT_PKT4(ring, group->group->counters[ctr].clear, 1);
484 OUT_RING(ring, 1);
485
486 OUT_PKT4(ring, group->group->counters[ctr].clear, 1);
487 OUT_RING(ring, 0);
488 }
489
490 OUT_PKT4(ring, group->group->counters[ctr].select_reg, 1);
491 OUT_RING(ring, n);
492
493 if (group->group->counters[ctr].enable) {
494 OUT_PKT4(ring, group->group->counters[ctr].enable, 1);
495 OUT_RING(ring, 1);
496 }
497
498 break;
499 }
500
501 group->last[ctr] = *group->counter[ctr].val_lo;
502 group->stime[ctr] = gettime_us();
503 }
504
505 static void
506 resample_counter(struct counter_group *group, int ctr)
507 {
508 uint32_t val = *group->counter[ctr].val_lo;
509 uint32_t t = gettime_us();
510 uint32_t dt = delta(group->stime[ctr], t);
511 uint32_t dval = delta(group->last[ctr], val);
512 group->current[ctr] = (float)dval * 1000000.0 / (float)dt;
513 group->last[ctr] = val;
514 group->stime[ctr] = t;
515 }
516
517 #define REFRESH_MS 500
518
519 /* sample all the counters: */
520 static void
521 resample(void)
522 {
523 static uint64_t last_time;
524 uint64_t current_time = gettime_us();
525
526 if ((current_time - last_time) < (REFRESH_MS * 1000 / 2))
527 return;
528
529 last_time = current_time;
530
531 for (unsigned i = 0; i < dev.ngroups; i++) {
532 struct counter_group *group = &dev.groups[i];
533 for (unsigned j = 0; j < group->group->num_counters; j++) {
534 resample_counter(group, j);
535 }
536 }
537 }
538
539 /*
540 * The UI
541 */
542
543 #define COLOR_GROUP_HEADER 1
544 #define COLOR_FOOTER 2
545 #define COLOR_INVERSE 3
546
547 static int w, h;
548 static int ctr_width;
549 static int max_rows, current_cntr = 1;
550
551 static void
552 redraw_footer(WINDOW *win)
553 {
554 char *footer;
555 int n;
556
557 n = asprintf(&footer, " fdperf: a%"CHIP_FMT" (%.2fMHz..%.2fMHz)",
558 CHIP_ARGS(dev.chipid),
559 ((float)dev.min_freq) / 1000000.0,
560 ((float)dev.max_freq) / 1000000.0);
561
562 wmove(win, h - 1, 0);
563 wattron(win, COLOR_PAIR(COLOR_FOOTER));
564 waddstr(win, footer);
565 whline(win, ' ', w - n);
566 wattroff(win, COLOR_PAIR(COLOR_FOOTER));
567
568 free(footer);
569 }
570
571 static void
572 redraw_group_header(WINDOW *win, int row, const char *name)
573 {
574 wmove(win, row, 0);
575 wattron(win, A_BOLD);
576 wattron(win, COLOR_PAIR(COLOR_GROUP_HEADER));
577 waddstr(win, name);
578 whline(win, ' ', w - strlen(name));
579 wattroff(win, COLOR_PAIR(COLOR_GROUP_HEADER));
580 wattroff(win, A_BOLD);
581 }
582
583 static void
584 redraw_counter_label(WINDOW *win, int row, const char *name, bool selected)
585 {
586 int n = strlen(name);
587 assert(n <= ctr_width);
588 wmove(win, row, 0);
589 whline(win, ' ', ctr_width - n);
590 wmove(win, row, ctr_width - n);
591 if (selected)
592 wattron(win, COLOR_PAIR(COLOR_INVERSE));
593 waddstr(win, name);
594 if (selected)
595 wattroff(win, COLOR_PAIR(COLOR_INVERSE));
596 waddstr(win, ": ");
597 }
598
599 static void
600 redraw_counter_value_cycles(WINDOW *win, float val)
601 {
602 char *str;
603 int x = getcurx(win);
604 int valwidth = w - x;
605 int barwidth, n;
606
607 /* convert to fraction of max freq: */
608 val = val / (float)dev.max_freq;
609
610 /* figure out percentage-bar width: */
611 barwidth = (int)(val * valwidth);
612
613 /* sometimes things go over 100%.. idk why, could be
614 * things running faster than base clock, or counter
615 * summing up cycles in multiple cores?
616 */
617 barwidth = MIN2(barwidth, valwidth - 1);
618
619 n = asprintf(&str, "%.2f%%", 100.0 * val);
620 wattron(win, COLOR_PAIR(COLOR_INVERSE));
621 waddnstr(win, str, barwidth);
622 if (barwidth > n) {
623 whline(win, ' ', barwidth - n);
624 wmove(win, getcury(win), x + barwidth);
625 }
626 wattroff(win, COLOR_PAIR(COLOR_INVERSE));
627 if (barwidth < n)
628 waddstr(win, str + barwidth);
629 whline(win, ' ', w - getcurx(win));
630
631 free(str);
632 }
633
634 static void
635 redraw_counter_value_raw(WINDOW *win, float val)
636 {
637 char *str;
638 asprintf(&str, "%'.2f", val);
639 waddstr(win, str);
640 whline(win, ' ', w - getcurx(win));
641 free(str);
642 }
643
644 static void
645 redraw_counter(WINDOW *win, int row, struct counter_group *group,
646 int ctr, bool selected)
647 {
648 redraw_counter_label(win, row, group->label[ctr], selected);
649
650 /* quick hack, if the label has "CYCLE" in the name, it is
651 * probably a cycle counter ;-)
652 * Perhaps add more info in rnndb schema to know how to
653 * treat individual counters (ie. which are cycles, and
654 * for those we want to present as a percentage do we
655 * need to scale the result.. ie. is it running at some
656 * multiple or divisor of core clk, etc)
657 *
658 * TODO it would be much more clever to get this from xml
659 * Also.. in some cases I think we want to know how many
660 * units the counter is counting for, ie. if a320 has 2x
661 * shader as a306 we might need to scale the result..
662 */
663 if (strstr(group->label[ctr], "CYCLE") ||
664 strstr(group->label[ctr], "BUSY") ||
665 strstr(group->label[ctr], "IDLE"))
666 redraw_counter_value_cycles(win, group->current[ctr]);
667 else
668 redraw_counter_value_raw(win, group->current[ctr]);
669 }
670
671 static void
672 redraw(WINDOW *win)
673 {
674 static int scroll = 0;
675 int max, row = 0;
676
677 w = getmaxx(win);
678 h = getmaxy(win);
679
680 max = h - 3;
681
682 if ((current_cntr - scroll) > (max - 1)) {
683 scroll = current_cntr - (max - 1);
684 } else if ((current_cntr - 1) < scroll) {
685 scroll = current_cntr - 1;
686 }
687
688 for (unsigned i = 0; i < dev.ngroups; i++) {
689 struct counter_group *group = &dev.groups[i];
690 unsigned j = 0;
691
692 /* NOTE skip CP the first CP counter */
693 if (i == 0)
694 j++;
695
696 if (j < group->group->num_counters) {
697 if ((scroll <= row) && ((row - scroll) < max))
698 redraw_group_header(win, row - scroll, group->group->name);
699 row++;
700 }
701
702 for (; j < group->group->num_counters; j++) {
703 if ((scroll <= row) && ((row - scroll) < max))
704 redraw_counter(win, row - scroll, group, j, row == current_cntr);
705 row++;
706 }
707 }
708
709 /* convert back to physical (unscrolled) offset: */
710 row = max;
711
712 redraw_group_header(win, row, "Status");
713 row++;
714
715 /* Draw GPU freq row: */
716 redraw_counter_label(win, row, "Freq (MHz)", false);
717 redraw_counter_value_raw(win, dev.groups[0].current[0] / 1000000.0);
718 row++;
719
720 redraw_footer(win);
721
722 refresh();
723 }
724
725 static struct counter_group *
726 current_counter(int *ctr)
727 {
728 int n = 0;
729
730 for (unsigned i = 0; i < dev.ngroups; i++) {
731 struct counter_group *group = &dev.groups[i];
732 unsigned j = 0;
733
734 /* NOTE skip the first CP counter (CP_ALWAYS_COUNT) */
735 if (i == 0)
736 j++;
737
738 /* account for group header: */
739 if (j < group->group->num_counters) {
740 /* cannot select group header.. return null to indicate this
741 * main_ui():
742 */
743 if (n == current_cntr)
744 return NULL;
745 n++;
746 }
747
748
749 for (; j < group->group->num_counters; j++) {
750 if (n == current_cntr) {
751 if (ctr)
752 *ctr = j;
753 return group;
754 }
755 n++;
756 }
757 }
758
759 assert(0);
760 return NULL;
761 }
762
763 static void
764 counter_dialog(void)
765 {
766 WINDOW *dialog;
767 struct counter_group *group;
768 int cnt, current = 0, scroll;
769
770 /* figure out dialog size: */
771 int dh = h/2;
772 int dw = ctr_width + 2;
773
774 group = current_counter(&cnt);
775
776 /* find currently selected idx (note there can be discontinuities
777 * so the selected value does not map 1:1 to current idx)
778 */
779 uint32_t selected = group->counter[cnt].select_val;
780 for (int i = 0; i < group->group->num_countables; i++) {
781 if (group->group->countables[i].selector == selected) {
782 current = i;
783 break;
784 }
785 }
786
787 /* scrolling offset, if dialog is too small for all the choices: */
788 scroll = 0;
789
790 dialog = newwin(dh, dw, (h-dh)/2, (w-dw)/2);
791 box(dialog, 0, 0);
792 wrefresh(dialog);
793 keypad(dialog, TRUE);
794
795 while (true) {
796 int max = MIN2(dh - 2, group->group->num_countables);
797 int selector = -1;
798
799 if ((current - scroll) >= (dh - 3)) {
800 scroll = current - (dh - 3);
801 } else if (current < scroll) {
802 scroll = current;
803 }
804
805 for (int i = 0; i < max; i++) {
806 int n = scroll + i;
807 wmove(dialog, i+1, 1);
808 if (n == current) {
809 assert (n < group->group->num_countables);
810 selector = group->group->countables[n].selector;
811 wattron(dialog, COLOR_PAIR(COLOR_INVERSE));
812 }
813 if (n < group->group->num_countables)
814 waddstr(dialog, group->group->countables[n].name);
815 whline(dialog, ' ', dw - getcurx(dialog) - 1);
816 if (n == current)
817 wattroff(dialog, COLOR_PAIR(COLOR_INVERSE));
818 }
819
820 assert (selector >= 0);
821
822 switch (wgetch(dialog)) {
823 case KEY_UP:
824 current = MAX2(0, current - 1);
825 break;
826 case KEY_DOWN:
827 current = MIN2(group->group->num_countables - 1, current + 1);
828 break;
829 case KEY_LEFT:
830 case KEY_ENTER:
831 /* select new sampler */
832 select_counter(group, cnt, selector);
833 flush_ring();
834 config_save();
835 goto out;
836 case 'q':
837 goto out;
838 default:
839 /* ignore */
840 break;
841 }
842
843 resample();
844 }
845
846 out:
847 wborder(dialog, ' ', ' ', ' ',' ',' ',' ',' ',' ');
848 delwin(dialog);
849 }
850
851 static void
852 scroll_cntr(int amount)
853 {
854 if (amount < 0) {
855 current_cntr = MAX2(1, current_cntr + amount);
856 if (current_counter(NULL) == NULL) {
857 current_cntr = MAX2(1, current_cntr - 1);
858 }
859 } else {
860 current_cntr = MIN2(max_rows - 1, current_cntr + amount);
861 if (current_counter(NULL) == NULL)
862 current_cntr = MIN2(max_rows - 1, current_cntr + 1);
863 }
864 }
865
866 static void
867 main_ui(void)
868 {
869 WINDOW *mainwin;
870 uint32_t last_time = gettime_us();
871
872 /* curses setup: */
873 mainwin = initscr();
874 if (!mainwin)
875 goto out;
876
877 cbreak();
878 wtimeout(mainwin, REFRESH_MS);
879 noecho();
880 keypad(mainwin, TRUE);
881 curs_set(0);
882 start_color();
883 init_pair(COLOR_GROUP_HEADER, COLOR_WHITE, COLOR_GREEN);
884 init_pair(COLOR_FOOTER, COLOR_WHITE, COLOR_BLUE);
885 init_pair(COLOR_INVERSE, COLOR_BLACK, COLOR_WHITE);
886
887 while (true) {
888 switch (wgetch(mainwin)) {
889 case KEY_UP:
890 scroll_cntr(-1);
891 break;
892 case KEY_DOWN:
893 scroll_cntr(+1);
894 break;
895 case KEY_NPAGE: /* page-down */
896 /* TODO figure out # of rows visible? */
897 scroll_cntr(+15);
898 break;
899 case KEY_PPAGE: /* page-up */
900 /* TODO figure out # of rows visible? */
901 scroll_cntr(-15);
902 break;
903 case KEY_RIGHT:
904 counter_dialog();
905 break;
906 case 'q':
907 goto out;
908 break;
909 default:
910 /* ignore */
911 break;
912 }
913 resample();
914 redraw(mainwin);
915
916 /* restore the counters every 0.5s in case the GPU has suspended,
917 * in which case the current selected countables will have reset:
918 */
919 uint32_t t = gettime_us();
920 if (delta(last_time, t) > 500000) {
921 restore_counter_groups();
922 flush_ring();
923 last_time = t;
924 }
925 }
926
927 /* restore settings.. maybe we need an atexit()??*/
928 out:
929 delwin(mainwin);
930 endwin();
931 refresh();
932 }
933
934 static void
935 restore_counter_groups(void)
936 {
937 for (unsigned i = 0; i < dev.ngroups; i++) {
938 struct counter_group *group = &dev.groups[i];
939 unsigned j = 0;
940
941 /* NOTE skip CP the first CP counter */
942 if (i == 0)
943 j++;
944
945 for (; j < group->group->num_counters; j++) {
946 select_counter(group, j, group->counter[j].select_val);
947 }
948 }
949 }
950
951 static void
952 setup_counter_groups(const struct fd_perfcntr_group *groups)
953 {
954 for (unsigned i = 0; i < dev.ngroups; i++) {
955 struct counter_group *group = &dev.groups[i];
956
957 group->group = &groups[i];
958
959 max_rows += group->group->num_counters + 1;
960
961 /* the first CP counter is hidden: */
962 if (i == 0) {
963 max_rows--;
964 if (group->group->num_counters <= 1)
965 max_rows--;
966 }
967
968 for (unsigned j = 0; j < group->group->num_counters; j++) {
969 group->counter[j].counter = &group->group->counters[j];
970
971 group->counter[j].val_hi = dev.io + (group->counter[j].counter->counter_reg_hi * 4);
972 group->counter[j].val_lo = dev.io + (group->counter[j].counter->counter_reg_lo * 4);
973
974 group->counter[j].select_val = j;
975 }
976
977 for (unsigned j = 0; j < group->group->num_countables; j++) {
978 ctr_width = MAX2(ctr_width, strlen(group->group->countables[j].name) + 1);
979 }
980 }
981 }
982
983 /*
984 * configuration / persistence
985 */
986
987 static config_t cfg;
988 static config_setting_t *setting;
989
990 static void
991 config_save(void)
992 {
993 for (unsigned i = 0; i < dev.ngroups; i++) {
994 struct counter_group *group = &dev.groups[i];
995 unsigned j = 0;
996
997 /* NOTE skip CP the first CP counter */
998 if (i == 0)
999 j++;
1000
1001 config_setting_t *sect =
1002 config_setting_get_member(setting, group->group->name);
1003
1004 for (; j < group->group->num_counters; j++) {
1005 char name[] = "counter0000";
1006 sprintf(name, "counter%d", j);
1007 config_setting_t *s =
1008 config_setting_lookup(sect, name);
1009 config_setting_set_int(s, group->counter[j].select_val);
1010 }
1011 }
1012
1013 config_write_file(&cfg, "fdperf.cfg");
1014 }
1015
1016 static void
1017 config_restore(void)
1018 {
1019 char *str;
1020
1021 config_init(&cfg);
1022
1023 /* Read the file. If there is an error, report it and exit. */
1024 if(!config_read_file(&cfg, "fdperf.cfg")) {
1025 warn("could not restore settings");
1026 }
1027
1028 config_setting_t *root = config_root_setting(&cfg);
1029
1030 /* per device settings: */
1031 asprintf(&str, "a%dxx", dev.chipid >> 24);
1032 setting = config_setting_get_member(root, str);
1033 if (!setting)
1034 setting = config_setting_add(root, str, CONFIG_TYPE_GROUP);
1035 free(str);
1036
1037 for (unsigned i = 0; i < dev.ngroups; i++) {
1038 struct counter_group *group = &dev.groups[i];
1039 unsigned j = 0;
1040
1041 /* NOTE skip CP the first CP counter */
1042 if (i == 0)
1043 j++;
1044
1045 config_setting_t *sect =
1046 config_setting_get_member(setting, group->group->name);
1047
1048 if (!sect) {
1049 sect = config_setting_add(setting, group->group->name,
1050 CONFIG_TYPE_GROUP);
1051 }
1052
1053 for (; j < group->group->num_counters; j++) {
1054 char name[] = "counter0000";
1055 sprintf(name, "counter%d", j);
1056 config_setting_t *s = config_setting_lookup(sect, name);
1057 if (!s) {
1058 config_setting_add(sect, name, CONFIG_TYPE_INT);
1059 continue;
1060 }
1061 select_counter(group, j, config_setting_get_int(s));
1062 }
1063 }
1064 }
1065
1066 /*
1067 * main
1068 */
1069
1070 int
1071 main(int argc, char **argv)
1072 {
1073 find_device();
1074
1075 const struct fd_perfcntr_group *groups;
1076 groups = fd_perfcntrs((dev.chipid >> 24) * 100, &dev.ngroups);
1077 if (!groups) {
1078 errx(1, "no perfcntr support");
1079 }
1080
1081 dev.groups = calloc(dev.ngroups, sizeof(struct counter_group));
1082
1083 setup_counter_groups(groups);
1084 restore_counter_groups();
1085 config_restore();
1086 flush_ring();
1087
1088 main_ui();
1089
1090 return 0;
1091 }