freedreno: Deduplicate ringbuffer macros with computerator/fdperf
[mesa.git] / src / freedreno / perfcntrs / fdperf.c
1 /*
2 * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 #include <arpa/inet.h>
26 #include <assert.h>
27 #include <ctype.h>
28 #include <err.h>
29 #include <fcntl.h>
30 #include <ftw.h>
31 #include <locale.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <stdint.h>
36 #include <sys/types.h>
37 #include <sys/stat.h>
38 #include <sys/mman.h>
39 #include <time.h>
40 #include <unistd.h>
41 #include <curses.h>
42 #include <libconfig.h>
43 #include <inttypes.h>
44 #include <xf86drm.h>
45
46 #include "drm/freedreno_drmif.h"
47 #include "drm/freedreno_ringbuffer.h"
48
49 #include "freedreno_perfcntr.h"
50
51 #define MAX_CNTR_PER_GROUP 24
52
53 /* NOTE first counter group should always be CP, since we unconditionally
54 * use CP counter to measure the gpu freq.
55 */
56
57 struct counter_group {
58 const struct fd_perfcntr_group *group;
59
60 struct {
61 const struct fd_perfcntr_counter *counter;
62 uint16_t select_val;
63 volatile uint32_t *val_hi;
64 volatile uint32_t *val_lo;
65 } counter[MAX_CNTR_PER_GROUP];
66
67 /* last sample time: */
68 uint32_t stime[MAX_CNTR_PER_GROUP];
69 /* for now just care about the low 32b value.. at least then we don't
70 * have to really care that we can't sample both hi and lo regs at the
71 * same time:
72 */
73 uint32_t last[MAX_CNTR_PER_GROUP];
74 /* current value, ie. by how many did the counter increase in last
75 * sampling period divided by the sampling period:
76 */
77 float current[MAX_CNTR_PER_GROUP];
78 /* name of currently selected counters (for UI): */
79 const char *label[MAX_CNTR_PER_GROUP];
80 };
81
82 static struct {
83 char *dtnode;
84 int address_cells, size_cells;
85 uint64_t base;
86 uint32_t size;
87 void *io;
88 uint32_t chipid;
89 uint32_t min_freq;
90 uint32_t max_freq;
91 /* per-generation table of counters: */
92 unsigned ngroups;
93 struct counter_group *groups;
94 /* drm device (for writing select regs via ring): */
95 struct fd_device *dev;
96 struct fd_pipe *pipe;
97 struct fd_submit *submit;
98 struct fd_ringbuffer *ring;
99 } dev;
100
101 static void config_save(void);
102 static void config_restore(void);
103 static void restore_counter_groups(void);
104
105 /*
106 * helpers
107 */
108
109 #define CHUNKSIZE 32
110
111 static void *
112 readfile(const char *path, int *sz)
113 {
114 char *buf = NULL;
115 int fd, ret, n = 0;
116
117 fd = open(path, O_RDONLY);
118 if (fd < 0)
119 return NULL;
120
121 while (1) {
122 buf = realloc(buf, n + CHUNKSIZE);
123 ret = read(fd, buf + n, CHUNKSIZE);
124 if (ret < 0) {
125 free(buf);
126 *sz = 0;
127 close(fd);
128 return NULL;
129 } else if (ret < CHUNKSIZE) {
130 n += ret;
131 *sz = n;
132 close(fd);
133 return buf;
134 } else {
135 n += CHUNKSIZE;
136 }
137 }
138 }
139
140 static uint32_t
141 gettime_us(void)
142 {
143 struct timespec ts;
144 clock_gettime(CLOCK_MONOTONIC, &ts);
145 return (ts.tv_sec * 1000000) + (ts.tv_nsec / 1000);
146 }
147
148 static uint32_t
149 delta(uint32_t a, uint32_t b)
150 {
151 /* deal with rollover: */
152 if (a > b)
153 return 0xffffffff - a + b;
154 else
155 return b - a;
156 }
157
158 /*
159 * code to find stuff in /proc/device-tree:
160 *
161 * NOTE: if we sampled the counters from the cmdstream, we could avoid needing
162 * /dev/mem and /proc/device-tree crawling. OTOH when the GPU is heavily loaded
163 * we would be competing with whatever else is using the GPU.
164 */
165
166 static void *
167 readdt(const char *node)
168 {
169 char *path;
170 void *buf;
171 int sz;
172
173 (void) asprintf(&path, "%s/%s", dev.dtnode, node);
174 buf = readfile(path, &sz);
175 free(path);
176
177 return buf;
178 }
179
180 static int
181 find_freqs_fn(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf)
182 {
183 const char *fname = fpath + ftwbuf->base;
184 int sz;
185
186 if (strcmp(fname, "qcom,gpu-freq") == 0) {
187 uint32_t *buf = readfile(fpath, &sz);
188 uint32_t freq = ntohl(buf[0]);
189 free(buf);
190 dev.max_freq = MAX2(dev.max_freq, freq);
191 dev.min_freq = MIN2(dev.min_freq, freq);
192 }
193
194 return 0;
195 }
196
197 static void
198 find_freqs(void)
199 {
200 char *path;
201 int ret;
202
203 dev.min_freq = ~0;
204 dev.max_freq = 0;
205
206 (void) asprintf(&path, "%s/%s", dev.dtnode, "qcom,gpu-pwrlevels");
207
208 ret = nftw(path, find_freqs_fn, 64, 0);
209 if (ret < 0)
210 err(1, "could not find power levels");
211
212 free(path);
213 }
214
215 static int
216 find_device_fn(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf)
217 {
218 const char *fname = fpath + ftwbuf->base;
219 int sz;
220
221 if (strcmp(fname, "compatible") == 0) {
222 char *str = readfile(fpath, &sz);
223 if ((strcmp(str, "qcom,adreno-3xx") == 0) ||
224 (strcmp(str, "qcom,kgsl-3d0") == 0) ||
225 (strstr(str, "amd,imageon") == str) ||
226 (strstr(str, "qcom,adreno") == str)) {
227 int dlen = strlen(fpath) - strlen("/compatible");
228 dev.dtnode = malloc(dlen + 1);
229 memcpy(dev.dtnode, fpath, dlen);
230 printf("found dt node: %s\n", dev.dtnode);
231
232 char buf[dlen + sizeof("/../#address-cells") + 1];
233 int sz, *val;
234
235 sprintf(buf, "%s/../#address-cells", dev.dtnode);
236 val = readfile(buf, &sz);
237 dev.address_cells = ntohl(*val);
238 free(val);
239
240 sprintf(buf, "%s/../#size-cells", dev.dtnode);
241 val = readfile(buf, &sz);
242 dev.size_cells = ntohl(*val);
243 free(val);
244
245 printf("#address-cells=%d, #size-cells=%d\n",
246 dev.address_cells, dev.size_cells);
247 }
248 free(str);
249 }
250 if (dev.dtnode) {
251 /* we found it! */
252 return 1;
253 }
254 return 0;
255 }
256
257 static void
258 find_device(void)
259 {
260 int ret, fd;
261 uint32_t *buf, *b;
262
263 ret = nftw("/proc/device-tree/", find_device_fn, 64, 0);
264 if (ret < 0)
265 err(1, "could not find adreno gpu");
266
267 if (!dev.dtnode)
268 errx(1, "could not find qcom,adreno-3xx node");
269
270 fd = drmOpen("msm", NULL);
271 if (fd < 0)
272 err(1, "could not open drm device");
273
274 dev.dev = fd_device_new(fd);
275 dev.pipe = fd_pipe_new(dev.dev, FD_PIPE_3D);
276
277 uint64_t val;
278 ret = fd_pipe_get_param(dev.pipe, FD_CHIP_ID, &val);
279 if (ret) {
280 err(1, "could not get gpu-id");
281 }
282 dev.chipid = val;
283
284 #define CHIP_FMT "d%d%d.%d"
285 #define CHIP_ARGS(chipid) \
286 ((chipid) >> 24) & 0xff, \
287 ((chipid) >> 16) & 0xff, \
288 ((chipid) >> 8) & 0xff, \
289 ((chipid) >> 0) & 0xff
290 printf("device: a%"CHIP_FMT"\n", CHIP_ARGS(dev.chipid));
291
292 b = buf = readdt("reg");
293
294 if (dev.address_cells == 2) {
295 uint32_t u[2] = { ntohl(buf[0]), ntohl(buf[1]) };
296 dev.base = (((uint64_t)u[0]) << 32) | u[1];
297 buf += 2;
298 } else {
299 dev.base = ntohl(buf[0]);
300 buf += 1;
301 }
302
303 if (dev.size_cells == 2) {
304 uint32_t u[2] = { ntohl(buf[0]), ntohl(buf[1]) };
305 dev.size = (((uint64_t)u[0]) << 32) | u[1];
306 buf += 2;
307 } else {
308 dev.size = ntohl(buf[0]);
309 buf += 1;
310 }
311
312 free(b);
313
314 printf("i/o region at %08"PRIu64" (size: %x)\n", dev.base, dev.size);
315
316 /* try MAX_FREQ first as that will work regardless of old dt
317 * dt bindings vs upstream bindings:
318 */
319 ret = fd_pipe_get_param(dev.pipe, FD_MAX_FREQ, &val);
320 if (ret) {
321 printf("falling back to parsing DT bindings for freq\n");
322 find_freqs();
323 } else {
324 dev.min_freq = 0;
325 dev.max_freq = val;
326 }
327
328 printf("min_freq=%u, max_freq=%u\n", dev.min_freq, dev.max_freq);
329
330 fd = open("/dev/mem", O_RDWR | O_SYNC);
331 if (fd < 0)
332 err(1, "could not open /dev/mem");
333
334 dev.io = mmap(0, dev.size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, dev.base);
335 if (!dev.io) {
336 close(fd);
337 err(1, "could not map device");
338 }
339 }
340
341 /*
342 * perf-monitor
343 */
344
345 static void
346 flush_ring(void)
347 {
348 int ret;
349
350 if (!dev.submit)
351 return;
352
353 ret = fd_submit_flush(dev.submit, -1, NULL, NULL);
354 if (ret)
355 errx(1, "submit failed: %d", ret);
356 fd_ringbuffer_del(dev.ring);
357 fd_submit_del(dev.submit);
358
359 dev.ring = NULL;
360 dev.submit = NULL;
361 }
362
363 static void
364 select_counter(struct counter_group *group, int ctr, int n)
365 {
366 assert(n < group->group->num_countables);
367 assert(ctr < group->group->num_counters);
368
369 group->label[ctr] = group->group->countables[n].name;
370 group->counter[ctr].select_val = n;
371
372 if (!dev.submit) {
373 dev.submit = fd_submit_new(dev.pipe);
374 dev.ring = fd_submit_new_ringbuffer(dev.submit, 0x1000,
375 FD_RINGBUFFER_PRIMARY | FD_RINGBUFFER_GROWABLE);
376 }
377
378 /* bashing select register directly while gpu is active will end
379 * in tears.. so we need to write it via the ring:
380 *
381 * TODO it would help startup time, if gpu is loaded, to batch
382 * all the initial writes and do a single flush.. although that
383 * makes things more complicated for capturing inital sample value
384 */
385 struct fd_ringbuffer *ring = dev.ring;
386 switch (dev.chipid >> 24) {
387 case 2:
388 case 3:
389 case 4:
390 OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
391 OUT_RING(ring, 0x00000000);
392
393 if (group->group->counters[ctr].enable) {
394 OUT_PKT0(ring, group->group->counters[ctr].enable, 1);
395 OUT_RING(ring, 0);
396 }
397
398 if (group->group->counters[ctr].clear) {
399 OUT_PKT0(ring, group->group->counters[ctr].clear, 1);
400 OUT_RING(ring, 1);
401
402 OUT_PKT0(ring, group->group->counters[ctr].clear, 1);
403 OUT_RING(ring, 0);
404 }
405
406 OUT_PKT0(ring, group->group->counters[ctr].select_reg, 1);
407 OUT_RING(ring, n);
408
409 if (group->group->counters[ctr].enable) {
410 OUT_PKT0(ring, group->group->counters[ctr].enable, 1);
411 OUT_RING(ring, 1);
412 }
413
414 break;
415 case 5:
416 case 6:
417 OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
418
419 if (group->group->counters[ctr].enable) {
420 OUT_PKT4(ring, group->group->counters[ctr].enable, 1);
421 OUT_RING(ring, 0);
422 }
423
424 if (group->group->counters[ctr].clear) {
425 OUT_PKT4(ring, group->group->counters[ctr].clear, 1);
426 OUT_RING(ring, 1);
427
428 OUT_PKT4(ring, group->group->counters[ctr].clear, 1);
429 OUT_RING(ring, 0);
430 }
431
432 OUT_PKT4(ring, group->group->counters[ctr].select_reg, 1);
433 OUT_RING(ring, n);
434
435 if (group->group->counters[ctr].enable) {
436 OUT_PKT4(ring, group->group->counters[ctr].enable, 1);
437 OUT_RING(ring, 1);
438 }
439
440 break;
441 }
442
443 group->last[ctr] = *group->counter[ctr].val_lo;
444 group->stime[ctr] = gettime_us();
445 }
446
447 static void
448 resample_counter(struct counter_group *group, int ctr)
449 {
450 uint32_t val = *group->counter[ctr].val_lo;
451 uint32_t t = gettime_us();
452 uint32_t dt = delta(group->stime[ctr], t);
453 uint32_t dval = delta(group->last[ctr], val);
454 group->current[ctr] = (float)dval * 1000000.0 / (float)dt;
455 group->last[ctr] = val;
456 group->stime[ctr] = t;
457 }
458
459 #define REFRESH_MS 500
460
461 /* sample all the counters: */
462 static void
463 resample(void)
464 {
465 static uint64_t last_time;
466 uint64_t current_time = gettime_us();
467
468 if ((current_time - last_time) < (REFRESH_MS * 1000 / 2))
469 return;
470
471 last_time = current_time;
472
473 for (unsigned i = 0; i < dev.ngroups; i++) {
474 struct counter_group *group = &dev.groups[i];
475 for (unsigned j = 0; j < group->group->num_counters; j++) {
476 resample_counter(group, j);
477 }
478 }
479 }
480
481 /*
482 * The UI
483 */
484
485 #define COLOR_GROUP_HEADER 1
486 #define COLOR_FOOTER 2
487 #define COLOR_INVERSE 3
488
489 static int w, h;
490 static int ctr_width;
491 static int max_rows, current_cntr = 1;
492
493 static void
494 redraw_footer(WINDOW *win)
495 {
496 char *footer;
497 int n;
498
499 n = asprintf(&footer, " fdperf: a%"CHIP_FMT" (%.2fMHz..%.2fMHz)",
500 CHIP_ARGS(dev.chipid),
501 ((float)dev.min_freq) / 1000000.0,
502 ((float)dev.max_freq) / 1000000.0);
503
504 wmove(win, h - 1, 0);
505 wattron(win, COLOR_PAIR(COLOR_FOOTER));
506 waddstr(win, footer);
507 whline(win, ' ', w - n);
508 wattroff(win, COLOR_PAIR(COLOR_FOOTER));
509
510 free(footer);
511 }
512
513 static void
514 redraw_group_header(WINDOW *win, int row, const char *name)
515 {
516 wmove(win, row, 0);
517 wattron(win, A_BOLD);
518 wattron(win, COLOR_PAIR(COLOR_GROUP_HEADER));
519 waddstr(win, name);
520 whline(win, ' ', w - strlen(name));
521 wattroff(win, COLOR_PAIR(COLOR_GROUP_HEADER));
522 wattroff(win, A_BOLD);
523 }
524
525 static void
526 redraw_counter_label(WINDOW *win, int row, const char *name, bool selected)
527 {
528 int n = strlen(name);
529 assert(n <= ctr_width);
530 wmove(win, row, 0);
531 whline(win, ' ', ctr_width - n);
532 wmove(win, row, ctr_width - n);
533 if (selected)
534 wattron(win, COLOR_PAIR(COLOR_INVERSE));
535 waddstr(win, name);
536 if (selected)
537 wattroff(win, COLOR_PAIR(COLOR_INVERSE));
538 waddstr(win, ": ");
539 }
540
541 static void
542 redraw_counter_value_cycles(WINDOW *win, float val)
543 {
544 char *str;
545 int x = getcurx(win);
546 int valwidth = w - x;
547 int barwidth, n;
548
549 /* convert to fraction of max freq: */
550 val = val / (float)dev.max_freq;
551
552 /* figure out percentage-bar width: */
553 barwidth = (int)(val * valwidth);
554
555 /* sometimes things go over 100%.. idk why, could be
556 * things running faster than base clock, or counter
557 * summing up cycles in multiple cores?
558 */
559 barwidth = MIN2(barwidth, valwidth - 1);
560
561 n = asprintf(&str, "%.2f%%", 100.0 * val);
562 wattron(win, COLOR_PAIR(COLOR_INVERSE));
563 waddnstr(win, str, barwidth);
564 if (barwidth > n) {
565 whline(win, ' ', barwidth - n);
566 wmove(win, getcury(win), x + barwidth);
567 }
568 wattroff(win, COLOR_PAIR(COLOR_INVERSE));
569 if (barwidth < n)
570 waddstr(win, str + barwidth);
571 whline(win, ' ', w - getcurx(win));
572
573 free(str);
574 }
575
576 static void
577 redraw_counter_value_raw(WINDOW *win, float val)
578 {
579 char *str;
580 (void) asprintf(&str, "%'.2f", val);
581 waddstr(win, str);
582 whline(win, ' ', w - getcurx(win));
583 free(str);
584 }
585
586 static void
587 redraw_counter(WINDOW *win, int row, struct counter_group *group,
588 int ctr, bool selected)
589 {
590 redraw_counter_label(win, row, group->label[ctr], selected);
591
592 /* quick hack, if the label has "CYCLE" in the name, it is
593 * probably a cycle counter ;-)
594 * Perhaps add more info in rnndb schema to know how to
595 * treat individual counters (ie. which are cycles, and
596 * for those we want to present as a percentage do we
597 * need to scale the result.. ie. is it running at some
598 * multiple or divisor of core clk, etc)
599 *
600 * TODO it would be much more clever to get this from xml
601 * Also.. in some cases I think we want to know how many
602 * units the counter is counting for, ie. if a320 has 2x
603 * shader as a306 we might need to scale the result..
604 */
605 if (strstr(group->label[ctr], "CYCLE") ||
606 strstr(group->label[ctr], "BUSY") ||
607 strstr(group->label[ctr], "IDLE"))
608 redraw_counter_value_cycles(win, group->current[ctr]);
609 else
610 redraw_counter_value_raw(win, group->current[ctr]);
611 }
612
613 static void
614 redraw(WINDOW *win)
615 {
616 static int scroll = 0;
617 int max, row = 0;
618
619 w = getmaxx(win);
620 h = getmaxy(win);
621
622 max = h - 3;
623
624 if ((current_cntr - scroll) > (max - 1)) {
625 scroll = current_cntr - (max - 1);
626 } else if ((current_cntr - 1) < scroll) {
627 scroll = current_cntr - 1;
628 }
629
630 for (unsigned i = 0; i < dev.ngroups; i++) {
631 struct counter_group *group = &dev.groups[i];
632 unsigned j = 0;
633
634 /* NOTE skip CP the first CP counter */
635 if (i == 0)
636 j++;
637
638 if (j < group->group->num_counters) {
639 if ((scroll <= row) && ((row - scroll) < max))
640 redraw_group_header(win, row - scroll, group->group->name);
641 row++;
642 }
643
644 for (; j < group->group->num_counters; j++) {
645 if ((scroll <= row) && ((row - scroll) < max))
646 redraw_counter(win, row - scroll, group, j, row == current_cntr);
647 row++;
648 }
649 }
650
651 /* convert back to physical (unscrolled) offset: */
652 row = max;
653
654 redraw_group_header(win, row, "Status");
655 row++;
656
657 /* Draw GPU freq row: */
658 redraw_counter_label(win, row, "Freq (MHz)", false);
659 redraw_counter_value_raw(win, dev.groups[0].current[0] / 1000000.0);
660 row++;
661
662 redraw_footer(win);
663
664 refresh();
665 }
666
667 static struct counter_group *
668 current_counter(int *ctr)
669 {
670 int n = 0;
671
672 for (unsigned i = 0; i < dev.ngroups; i++) {
673 struct counter_group *group = &dev.groups[i];
674 unsigned j = 0;
675
676 /* NOTE skip the first CP counter (CP_ALWAYS_COUNT) */
677 if (i == 0)
678 j++;
679
680 /* account for group header: */
681 if (j < group->group->num_counters) {
682 /* cannot select group header.. return null to indicate this
683 * main_ui():
684 */
685 if (n == current_cntr)
686 return NULL;
687 n++;
688 }
689
690
691 for (; j < group->group->num_counters; j++) {
692 if (n == current_cntr) {
693 if (ctr)
694 *ctr = j;
695 return group;
696 }
697 n++;
698 }
699 }
700
701 assert(0);
702 return NULL;
703 }
704
705 static void
706 counter_dialog(void)
707 {
708 WINDOW *dialog;
709 struct counter_group *group;
710 int cnt, current = 0, scroll;
711
712 /* figure out dialog size: */
713 int dh = h/2;
714 int dw = ctr_width + 2;
715
716 group = current_counter(&cnt);
717
718 /* find currently selected idx (note there can be discontinuities
719 * so the selected value does not map 1:1 to current idx)
720 */
721 uint32_t selected = group->counter[cnt].select_val;
722 for (int i = 0; i < group->group->num_countables; i++) {
723 if (group->group->countables[i].selector == selected) {
724 current = i;
725 break;
726 }
727 }
728
729 /* scrolling offset, if dialog is too small for all the choices: */
730 scroll = 0;
731
732 dialog = newwin(dh, dw, (h-dh)/2, (w-dw)/2);
733 box(dialog, 0, 0);
734 wrefresh(dialog);
735 keypad(dialog, TRUE);
736
737 while (true) {
738 int max = MIN2(dh - 2, group->group->num_countables);
739 int selector = -1;
740
741 if ((current - scroll) >= (dh - 3)) {
742 scroll = current - (dh - 3);
743 } else if (current < scroll) {
744 scroll = current;
745 }
746
747 for (int i = 0; i < max; i++) {
748 int n = scroll + i;
749 wmove(dialog, i+1, 1);
750 if (n == current) {
751 assert (n < group->group->num_countables);
752 selector = group->group->countables[n].selector;
753 wattron(dialog, COLOR_PAIR(COLOR_INVERSE));
754 }
755 if (n < group->group->num_countables)
756 waddstr(dialog, group->group->countables[n].name);
757 whline(dialog, ' ', dw - getcurx(dialog) - 1);
758 if (n == current)
759 wattroff(dialog, COLOR_PAIR(COLOR_INVERSE));
760 }
761
762 assert (selector >= 0);
763
764 switch (wgetch(dialog)) {
765 case KEY_UP:
766 current = MAX2(0, current - 1);
767 break;
768 case KEY_DOWN:
769 current = MIN2(group->group->num_countables - 1, current + 1);
770 break;
771 case KEY_LEFT:
772 case KEY_ENTER:
773 /* select new sampler */
774 select_counter(group, cnt, selector);
775 flush_ring();
776 config_save();
777 goto out;
778 case 'q':
779 goto out;
780 default:
781 /* ignore */
782 break;
783 }
784
785 resample();
786 }
787
788 out:
789 wborder(dialog, ' ', ' ', ' ',' ',' ',' ',' ',' ');
790 delwin(dialog);
791 }
792
793 static void
794 scroll_cntr(int amount)
795 {
796 if (amount < 0) {
797 current_cntr = MAX2(1, current_cntr + amount);
798 if (current_counter(NULL) == NULL) {
799 current_cntr = MAX2(1, current_cntr - 1);
800 }
801 } else {
802 current_cntr = MIN2(max_rows - 1, current_cntr + amount);
803 if (current_counter(NULL) == NULL)
804 current_cntr = MIN2(max_rows - 1, current_cntr + 1);
805 }
806 }
807
808 static void
809 main_ui(void)
810 {
811 WINDOW *mainwin;
812 uint32_t last_time = gettime_us();
813
814 /* curses setup: */
815 mainwin = initscr();
816 if (!mainwin)
817 goto out;
818
819 cbreak();
820 wtimeout(mainwin, REFRESH_MS);
821 noecho();
822 keypad(mainwin, TRUE);
823 curs_set(0);
824 start_color();
825 init_pair(COLOR_GROUP_HEADER, COLOR_WHITE, COLOR_GREEN);
826 init_pair(COLOR_FOOTER, COLOR_WHITE, COLOR_BLUE);
827 init_pair(COLOR_INVERSE, COLOR_BLACK, COLOR_WHITE);
828
829 while (true) {
830 switch (wgetch(mainwin)) {
831 case KEY_UP:
832 scroll_cntr(-1);
833 break;
834 case KEY_DOWN:
835 scroll_cntr(+1);
836 break;
837 case KEY_NPAGE: /* page-down */
838 /* TODO figure out # of rows visible? */
839 scroll_cntr(+15);
840 break;
841 case KEY_PPAGE: /* page-up */
842 /* TODO figure out # of rows visible? */
843 scroll_cntr(-15);
844 break;
845 case KEY_RIGHT:
846 counter_dialog();
847 break;
848 case 'q':
849 goto out;
850 break;
851 default:
852 /* ignore */
853 break;
854 }
855 resample();
856 redraw(mainwin);
857
858 /* restore the counters every 0.5s in case the GPU has suspended,
859 * in which case the current selected countables will have reset:
860 */
861 uint32_t t = gettime_us();
862 if (delta(last_time, t) > 500000) {
863 restore_counter_groups();
864 flush_ring();
865 last_time = t;
866 }
867 }
868
869 /* restore settings.. maybe we need an atexit()??*/
870 out:
871 delwin(mainwin);
872 endwin();
873 refresh();
874 }
875
876 static void
877 restore_counter_groups(void)
878 {
879 for (unsigned i = 0; i < dev.ngroups; i++) {
880 struct counter_group *group = &dev.groups[i];
881 unsigned j = 0;
882
883 /* NOTE skip CP the first CP counter */
884 if (i == 0)
885 j++;
886
887 for (; j < group->group->num_counters; j++) {
888 select_counter(group, j, group->counter[j].select_val);
889 }
890 }
891 }
892
893 static void
894 setup_counter_groups(const struct fd_perfcntr_group *groups)
895 {
896 for (unsigned i = 0; i < dev.ngroups; i++) {
897 struct counter_group *group = &dev.groups[i];
898
899 group->group = &groups[i];
900
901 max_rows += group->group->num_counters + 1;
902
903 /* the first CP counter is hidden: */
904 if (i == 0) {
905 max_rows--;
906 if (group->group->num_counters <= 1)
907 max_rows--;
908 }
909
910 for (unsigned j = 0; j < group->group->num_counters; j++) {
911 group->counter[j].counter = &group->group->counters[j];
912
913 group->counter[j].val_hi = dev.io + (group->counter[j].counter->counter_reg_hi * 4);
914 group->counter[j].val_lo = dev.io + (group->counter[j].counter->counter_reg_lo * 4);
915
916 group->counter[j].select_val = j;
917 }
918
919 for (unsigned j = 0; j < group->group->num_countables; j++) {
920 ctr_width = MAX2(ctr_width, strlen(group->group->countables[j].name) + 1);
921 }
922 }
923 }
924
925 /*
926 * configuration / persistence
927 */
928
929 static config_t cfg;
930 static config_setting_t *setting;
931
932 static void
933 config_save(void)
934 {
935 for (unsigned i = 0; i < dev.ngroups; i++) {
936 struct counter_group *group = &dev.groups[i];
937 unsigned j = 0;
938
939 /* NOTE skip CP the first CP counter */
940 if (i == 0)
941 j++;
942
943 config_setting_t *sect =
944 config_setting_get_member(setting, group->group->name);
945
946 for (; j < group->group->num_counters; j++) {
947 char name[] = "counter0000";
948 sprintf(name, "counter%d", j);
949 config_setting_t *s =
950 config_setting_lookup(sect, name);
951 config_setting_set_int(s, group->counter[j].select_val);
952 }
953 }
954
955 config_write_file(&cfg, "fdperf.cfg");
956 }
957
958 static void
959 config_restore(void)
960 {
961 char *str;
962
963 config_init(&cfg);
964
965 /* Read the file. If there is an error, report it and exit. */
966 if(!config_read_file(&cfg, "fdperf.cfg")) {
967 warn("could not restore settings");
968 }
969
970 config_setting_t *root = config_root_setting(&cfg);
971
972 /* per device settings: */
973 (void) asprintf(&str, "a%dxx", dev.chipid >> 24);
974 setting = config_setting_get_member(root, str);
975 if (!setting)
976 setting = config_setting_add(root, str, CONFIG_TYPE_GROUP);
977 free(str);
978
979 for (unsigned i = 0; i < dev.ngroups; i++) {
980 struct counter_group *group = &dev.groups[i];
981 unsigned j = 0;
982
983 /* NOTE skip CP the first CP counter */
984 if (i == 0)
985 j++;
986
987 config_setting_t *sect =
988 config_setting_get_member(setting, group->group->name);
989
990 if (!sect) {
991 sect = config_setting_add(setting, group->group->name,
992 CONFIG_TYPE_GROUP);
993 }
994
995 for (; j < group->group->num_counters; j++) {
996 char name[] = "counter0000";
997 sprintf(name, "counter%d", j);
998 config_setting_t *s = config_setting_lookup(sect, name);
999 if (!s) {
1000 config_setting_add(sect, name, CONFIG_TYPE_INT);
1001 continue;
1002 }
1003 select_counter(group, j, config_setting_get_int(s));
1004 }
1005 }
1006 }
1007
1008 /*
1009 * main
1010 */
1011
1012 int
1013 main(int argc, char **argv)
1014 {
1015 find_device();
1016
1017 const struct fd_perfcntr_group *groups;
1018 groups = fd_perfcntrs((dev.chipid >> 24) * 100, &dev.ngroups);
1019 if (!groups) {
1020 errx(1, "no perfcntr support");
1021 }
1022
1023 dev.groups = calloc(dev.ngroups, sizeof(struct counter_group));
1024
1025 setlocale(LC_NUMERIC, "en_US.UTF-8");
1026
1027 setup_counter_groups(groups);
1028 restore_counter_groups();
1029 config_restore();
1030 flush_ring();
1031
1032 main_ui();
1033
1034 return 0;
1035 }