From 5a13507164a26fc796f02c57a24468b834254b4d Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 19 Nov 2019 14:53:49 -0800 Subject: [PATCH] freedreno/perfcntrs: add fdperf Port from the envytools tree, but converted to use the .c tables for describing the perfcounter groups/countables, rather than using rnndec to get this at runtime from the register xml. Signed-off-by: Rob Clark Reviewed-by: Kristian H. Kristensen --- src/freedreno/perfcntrs/fdperf.c | 1058 +++++++++++++++++++++++++++ src/freedreno/perfcntrs/meson.build | 24 + 2 files changed, 1082 insertions(+) create mode 100644 src/freedreno/perfcntrs/fdperf.c diff --git a/src/freedreno/perfcntrs/fdperf.c b/src/freedreno/perfcntrs/fdperf.c new file mode 100644 index 00000000000..bf2cecabae9 --- /dev/null +++ b/src/freedreno/perfcntrs/fdperf.c @@ -0,0 +1,1058 @@ +/* + * Copyright (C) 2016 Rob Clark + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "drm/freedreno_drmif.h" +#include "drm/freedreno_ringbuffer.h" + +#include "freedreno_perfcntr.h" + +#define MAX_CNTR_PER_GROUP 24 + +/* NOTE first counter group should always be CP, since we unconditionally + * use CP counter to measure the gpu freq. + */ + +struct counter_group { + const struct fd_perfcntr_group *group; + + struct { + const struct fd_perfcntr_counter *counter; + uint16_t select_val; + volatile uint32_t *val_hi; + volatile uint32_t *val_lo; + } counter[MAX_CNTR_PER_GROUP]; + + /* last sample time: */ + uint32_t stime[MAX_CNTR_PER_GROUP]; + /* for now just care about the low 32b value.. at least then we don't + * have to really care that we can't sample both hi and lo regs at the + * same time: + */ + uint32_t last[MAX_CNTR_PER_GROUP]; + /* current value, ie. by how many did the counter increase in last + * sampling period divided by the sampling period: + */ + float current[MAX_CNTR_PER_GROUP]; + /* name of currently selected counters (for UI): */ + const char *label[MAX_CNTR_PER_GROUP]; +}; + +static struct { + char *dtnode; + int address_cells, size_cells; + uint64_t base; + uint32_t size; + void *io; + uint32_t chipid; + uint32_t min_freq; + uint32_t max_freq; + /* per-generation table of counters: */ + unsigned ngroups; + struct counter_group *groups; + /* drm device (for writing select regs via ring): */ + struct fd_device *dev; + struct fd_pipe *pipe; + struct fd_submit *submit; + struct fd_ringbuffer *ring; +} dev; + +static void config_save(void); +static void config_restore(void); + +/* + * helpers + */ + +#define CHUNKSIZE 32 + +static void * +readfile(const char *path, int *sz) +{ + char *buf = NULL; + int fd, ret, n = 0; + + fd = open(path, O_RDONLY); + if (fd < 0) + return NULL; + + while (1) { + buf = realloc(buf, n + CHUNKSIZE); + ret = read(fd, buf + n, CHUNKSIZE); + if (ret < 0) { + free(buf); + *sz = 0; + return NULL; + } else if (ret < CHUNKSIZE) { + n += ret; + *sz = n; + return buf; + } else { + n += CHUNKSIZE; + } + } +} + +static uint32_t +gettime_us(void) +{ + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return (ts.tv_sec * 1000000) + (ts.tv_nsec / 1000); +} + +static uint32_t +delta(uint32_t a, uint32_t b) +{ + /* deal with rollover: */ + if (a > b) + return 0xffffffff - a + b; + else + return b - a; +} + +/* + * TODO de-duplicate OUT_RING() and friends + */ + +#define CP_WAIT_FOR_IDLE 38 +#define CP_TYPE0_PKT 0x00000000 +#define CP_TYPE3_PKT 0xc0000000 +#define CP_TYPE4_PKT 0x40000000 +#define CP_TYPE7_PKT 0x70000000 + +static inline void +OUT_RING(struct fd_ringbuffer *ring, uint32_t data) +{ + *(ring->cur++) = data; +} + +static inline void +OUT_PKT0(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt) +{ + OUT_RING(ring, CP_TYPE0_PKT | ((cnt-1) << 16) | (regindx & 0x7FFF)); +} + +static inline void +OUT_PKT3(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt) +{ + OUT_RING(ring, CP_TYPE3_PKT | ((cnt-1) << 16) | ((opcode & 0xFF) << 8)); +} + + +/* + * Starting with a5xx, pkt4/pkt7 are used instead of pkt0/pkt3 + */ + +static inline unsigned +_odd_parity_bit(unsigned val) +{ + /* See: http://graphics.stanford.edu/~seander/bithacks.html#ParityParallel + * note that we want odd parity so 0x6996 is inverted. + */ + val ^= val >> 16; + val ^= val >> 8; + val ^= val >> 4; + val &= 0xf; + return (~0x6996 >> val) & 1; +} + +static inline void +OUT_PKT4(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt) +{ + OUT_RING(ring, CP_TYPE4_PKT | cnt | + (_odd_parity_bit(cnt) << 7) | + ((regindx & 0x3ffff) << 8) | + ((_odd_parity_bit(regindx) << 27))); +} + +static inline void +OUT_PKT7(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt) +{ + OUT_RING(ring, CP_TYPE7_PKT | cnt | + (_odd_parity_bit(cnt) << 15) | + ((opcode & 0x7f) << 16) | + ((_odd_parity_bit(opcode) << 23))); +} + +/* + * code to find stuff in /proc/device-tree: + * + * NOTE: if we sampled the counters from the cmdstream, we could avoid needing + * /dev/mem and /proc/device-tree crawling. OTOH when the GPU is heavily loaded + * we would be competing with whatever else is using the GPU. + */ + +static void * +readdt(const char *node) +{ + char *path; + void *buf; + int sz; + + asprintf(&path, "%s/%s", dev.dtnode, node); + buf = readfile(path, &sz); + free(path); + + return buf; +} + +static int +find_freqs_fn(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf) +{ + const char *fname = fpath + ftwbuf->base; + int sz; + + if (strcmp(fname, "qcom,gpu-freq") == 0) { + uint32_t *buf = readfile(fpath, &sz); + uint32_t freq = ntohl(buf[0]); + free(buf); + dev.max_freq = MAX2(dev.max_freq, freq); + dev.min_freq = MIN2(dev.min_freq, freq); + } + + return 0; +} + +static void +find_freqs(void) +{ + char *path; + int ret; + + dev.min_freq = ~0; + dev.max_freq = 0; + + asprintf(&path, "%s/%s", dev.dtnode, "qcom,gpu-pwrlevels"); + + ret = nftw(path, find_freqs_fn, 64, 0); + if (ret < 0) + err(1, "could not find power levels"); + + free(path); +} + +static int +find_device_fn(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf) +{ + const char *fname = fpath + ftwbuf->base; + int sz; + + if (strcmp(fname, "compatible") == 0) { + char *str = readfile(fpath, &sz); + if ((strcmp(str, "qcom,adreno-3xx") == 0) || + (strcmp(str, "qcom,kgsl-3d0") == 0) || + (strstr(str, "qcom,adreno") == str)) { + int dlen = strlen(fpath) - strlen("/compatible"); + dev.dtnode = malloc(dlen + 1); + memcpy(dev.dtnode, fpath, dlen); + printf("found dt node: %s\n", dev.dtnode); + + char buf[dlen + sizeof("/../#address-cells") + 1]; + int sz, *val; + + sprintf(buf, "%s/../#address-cells", dev.dtnode); + val = readfile(buf, &sz); + dev.address_cells = ntohl(*val); + free(val); + + sprintf(buf, "%s/../#size-cells", dev.dtnode); + val = readfile(buf, &sz); + dev.size_cells = ntohl(*val); + free(val); + + printf("#address-cells=%d, #size-cells=%d\n", + dev.address_cells, dev.size_cells); + } + free(str); + } + if (dev.dtnode) { + /* we found it! */ + return 1; + } + return 0; +} + +static void +find_device(void) +{ + int ret, fd; + uint32_t *buf, *b; + + ret = nftw("/proc/device-tree/", find_device_fn, 64, 0); + if (ret < 0) + err(1, "could not find adreno gpu"); + + if (!dev.dtnode) + errx(1, "could not find qcom,adreno-3xx node"); + + fd = open("/dev/dri/card0", O_RDWR); + if (fd < 0) + err(1, "could not open drm device"); + + dev.dev = fd_device_new(fd); + dev.pipe = fd_pipe_new(dev.dev, FD_PIPE_3D); + + uint64_t val; + ret = fd_pipe_get_param(dev.pipe, FD_CHIP_ID, &val); + if (ret) { + err(1, "could not get gpu-id"); + } + dev.chipid = val; + +#define CHIP_FMT "d%d%d.%d" +#define CHIP_ARGS(chipid) \ + ((chipid) >> 24) & 0xff, \ + ((chipid) >> 16) & 0xff, \ + ((chipid) >> 8) & 0xff, \ + ((chipid) >> 0) & 0xff + printf("device: a%"CHIP_FMT"\n", CHIP_ARGS(dev.chipid)); + + b = buf = readdt("reg"); + + if (dev.address_cells == 2) { + uint32_t u[2] = { ntohl(buf[0]), ntohl(buf[1]) }; + dev.base = (((uint64_t)u[0]) << 32) | u[1]; + buf += 2; + } else { + dev.base = ntohl(buf[0]); + buf += 1; + } + + if (dev.size_cells == 2) { + uint32_t u[2] = { ntohl(buf[0]), ntohl(buf[1]) }; + dev.size = (((uint64_t)u[0]) << 32) | u[1]; + buf += 2; + } else { + dev.size = ntohl(buf[0]); + buf += 1; + } + + free(b); + + printf("i/o region at %08lx (size: %x)\n", dev.base, dev.size); + + /* try MAX_FREQ first as that will work regardless of old dt + * dt bindings vs upstream bindings: + */ + ret = fd_pipe_get_param(dev.pipe, FD_MAX_FREQ, &val); + if (ret) { + printf("falling back to parsing DT bindings for freq\n"); + find_freqs(); + } else { + dev.min_freq = 0; + dev.max_freq = val; + } + + printf("min_freq=%u, max_freq=%u\n", dev.min_freq, dev.max_freq); + + fd = open("/dev/mem", O_RDWR | O_SYNC); + if (fd < 0) + err(1, "could not open /dev/mem"); + + dev.io = mmap(0, dev.size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, dev.base); + if (!dev.io) + err(1, "could not map device"); +} + +/* + * perf-monitor + */ + +static void +flush_ring(void) +{ + int ret; + + if (!dev.submit) + return; + + ret = fd_submit_flush(dev.submit, -1, NULL, NULL); + if (ret) + errx(1, "submit failed: %d", ret); + fd_ringbuffer_del(dev.ring); + fd_submit_del(dev.submit); + + dev.ring = NULL; + dev.submit = NULL; +} + +static void +select_counter(struct counter_group *group, int ctr, int n) +{ + assert(n < group->group->num_countables); + assert(ctr < group->group->num_counters); + + group->label[ctr] = group->group->countables[n].name; + group->counter[ctr].select_val = n; + + if (!dev.submit) { + dev.submit = fd_submit_new(dev.pipe); + dev.ring = fd_submit_new_ringbuffer(dev.submit, 0x1000, + FD_RINGBUFFER_PRIMARY | FD_RINGBUFFER_GROWABLE); + } + + /* bashing select register directly while gpu is active will end + * in tears.. so we need to write it via the ring: + * + * TODO it would help startup time, if gpu is loaded, to batch + * all the initial writes and do a single flush.. although that + * makes things more complicated for capturing inital sample value + */ + struct fd_ringbuffer *ring = dev.ring; + switch (dev.chipid >> 24) { + case 3: + case 4: + OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1); + OUT_RING(ring, 0x00000000); + + if (group->group->counters[ctr].enable) { + OUT_PKT0(ring, group->group->counters[ctr].enable, 1); + OUT_RING(ring, 0); + } + + if (group->group->counters[ctr].clear) { + OUT_PKT0(ring, group->group->counters[ctr].clear, 1); + OUT_RING(ring, 1); + + OUT_PKT0(ring, group->group->counters[ctr].clear, 1); + OUT_RING(ring, 0); + } + + OUT_PKT0(ring, group->group->counters[ctr].select_reg, 1); + OUT_RING(ring, n); + + if (group->group->counters[ctr].enable) { + OUT_PKT0(ring, group->group->counters[ctr].enable, 1); + OUT_RING(ring, 1); + } + + break; + case 5: + case 6: + OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0); + + if (group->group->counters[ctr].enable) { + OUT_PKT4(ring, group->group->counters[ctr].enable, 1); + OUT_RING(ring, 0); + } + + if (group->group->counters[ctr].clear) { + OUT_PKT4(ring, group->group->counters[ctr].clear, 1); + OUT_RING(ring, 1); + + OUT_PKT4(ring, group->group->counters[ctr].clear, 1); + OUT_RING(ring, 0); + } + + OUT_PKT4(ring, group->group->counters[ctr].select_reg, 1); + OUT_RING(ring, n); + + if (group->group->counters[ctr].enable) { + OUT_PKT4(ring, group->group->counters[ctr].enable, 1); + OUT_RING(ring, 1); + } + + break; + } + + group->last[ctr] = *group->counter[ctr].val_lo; + group->stime[ctr] = gettime_us(); +} + +static void +resample_counter(struct counter_group *group, int ctr) +{ + uint32_t val = *group->counter[ctr].val_lo; + uint32_t t = gettime_us(); + uint32_t dt = delta(group->stime[ctr], t); + uint32_t dval = delta(group->last[ctr], val); + group->current[ctr] = (float)dval * 1000000.0 / (float)dt; + group->last[ctr] = val; + group->stime[ctr] = t; +} + +#define REFRESH_MS 500 + +/* sample all the counters: */ +static void +resample(void) +{ + static uint64_t last_time; + uint64_t current_time = gettime_us(); + + if ((current_time - last_time) < (REFRESH_MS * 1000 / 2)) + return; + + last_time = current_time; + + for (unsigned i = 0; i < dev.ngroups; i++) { + struct counter_group *group = &dev.groups[i]; + for (unsigned j = 0; j < group->group->num_counters; j++) { + resample_counter(group, j); + } + } +} + +/* + * The UI + */ + +#define COLOR_GROUP_HEADER 1 +#define COLOR_FOOTER 2 +#define COLOR_INVERSE 3 + +static int w, h; +static int ctr_width; +static int max_rows, current_cntr = 1; + +static void +redraw_footer(WINDOW *win) +{ + char *footer; + int n; + + n = asprintf(&footer, " fdperf: a%"CHIP_FMT" (%.2fMHz..%.2fMHz)", + CHIP_ARGS(dev.chipid), + ((float)dev.min_freq) / 1000000.0, + ((float)dev.max_freq) / 1000000.0); + + wmove(win, h - 1, 0); + wattron(win, COLOR_PAIR(COLOR_FOOTER)); + waddstr(win, footer); + whline(win, ' ', w - n); + wattroff(win, COLOR_PAIR(COLOR_FOOTER)); + + free(footer); +} + +static void +redraw_group_header(WINDOW *win, int row, const char *name) +{ + wmove(win, row, 0); + wattron(win, A_BOLD); + wattron(win, COLOR_PAIR(COLOR_GROUP_HEADER)); + waddstr(win, name); + whline(win, ' ', w - strlen(name)); + wattroff(win, COLOR_PAIR(COLOR_GROUP_HEADER)); + wattroff(win, A_BOLD); +} + +static void +redraw_counter_label(WINDOW *win, int row, const char *name, bool selected) +{ + int n = strlen(name); + assert(n <= ctr_width); + wmove(win, row, 0); + whline(win, ' ', ctr_width - n); + wmove(win, row, ctr_width - n); + if (selected) + wattron(win, COLOR_PAIR(COLOR_INVERSE)); + waddstr(win, name); + if (selected) + wattroff(win, COLOR_PAIR(COLOR_INVERSE)); + waddstr(win, ": "); +} + +static void +redraw_counter_value_cycles(WINDOW *win, float val) +{ + char *str; + int x = getcurx(win); + int valwidth = w - x; + int barwidth, n; + + /* convert to fraction of max freq: */ + val = val / (float)dev.max_freq; + + /* figure out percentage-bar width: */ + barwidth = (int)(val * valwidth); + + /* sometimes things go over 100%.. idk why, could be + * things running faster than base clock, or counter + * summing up cycles in multiple cores? + */ + barwidth = MIN2(barwidth, valwidth - 1); + + n = asprintf(&str, "%.2f%%", 100.0 * val); + wattron(win, COLOR_PAIR(COLOR_INVERSE)); + waddnstr(win, str, barwidth); + if (barwidth > n) { + whline(win, ' ', barwidth - n); + wmove(win, getcury(win), x + barwidth); + } + wattroff(win, COLOR_PAIR(COLOR_INVERSE)); + if (barwidth < n) + waddstr(win, str + barwidth); + whline(win, ' ', w - getcurx(win)); + + free(str); +} + +static void +redraw_counter_value_raw(WINDOW *win, float val) +{ + char *str; + asprintf(&str, "%'.2f", val); + waddstr(win, str); + whline(win, ' ', w - getcurx(win)); + free(str); +} + +static void +redraw_counter(WINDOW *win, int row, struct counter_group *group, + int ctr, bool selected) +{ + redraw_counter_label(win, row, group->label[ctr], selected); + + /* quick hack, if the label has "CYCLE" in the name, it is + * probably a cycle counter ;-) + * Perhaps add more info in rnndb schema to know how to + * treat individual counters (ie. which are cycles, and + * for those we want to present as a percentage do we + * need to scale the result.. ie. is it running at some + * multiple or divisor of core clk, etc) + * + * TODO it would be much more clever to get this from xml + * Also.. in some cases I think we want to know how many + * units the counter is counting for, ie. if a320 has 2x + * shader as a306 we might need to scale the result.. + */ + if (strstr(group->label[ctr], "CYCLE") || + strstr(group->label[ctr], "BUSY") || + strstr(group->label[ctr], "IDLE")) + redraw_counter_value_cycles(win, group->current[ctr]); + else + redraw_counter_value_raw(win, group->current[ctr]); +} + +static void +redraw(WINDOW *win) +{ + static int scroll = 0; + int max, row = 0; + + w = getmaxx(win); + h = getmaxy(win); + + max = h - 3; + + if ((current_cntr - scroll) > (max - 1)) { + scroll = current_cntr - (max - 1); + } else if ((current_cntr - 1) < scroll) { + scroll = current_cntr - 1; + } + + for (unsigned i = 0; i < dev.ngroups; i++) { + struct counter_group *group = &dev.groups[i]; + unsigned j = 0; + + /* NOTE skip CP the first CP counter */ + if (i == 0) + j++; + + if (j < group->group->num_counters) { + if ((scroll <= row) && ((row - scroll) < max)) + redraw_group_header(win, row - scroll, group->group->name); + row++; + } + + for (; j < group->group->num_counters; j++) { + if ((scroll <= row) && ((row - scroll) < max)) + redraw_counter(win, row - scroll, group, j, row == current_cntr); + row++; + } + } + + /* convert back to physical (unscrolled) offset: */ + row = max; + + redraw_group_header(win, row, "Status"); + row++; + + /* Draw GPU freq row: */ + redraw_counter_label(win, row, "Freq (MHz)", false); + redraw_counter_value_raw(win, dev.groups[0].current[0] / 1000000.0); + row++; + + redraw_footer(win); + + refresh(); +} + +static struct counter_group * +current_counter(int *ctr) +{ + int n = 0; + + for (unsigned i = 0; i < dev.ngroups; i++) { + struct counter_group *group = &dev.groups[i]; + unsigned j = 0; + + /* NOTE skip the first CP counter (CP_ALWAYS_COUNT) */ + if (i == 0) + j++; + + /* account for group header: */ + if (j < group->group->num_counters) { + /* cannot select group header.. return null to indicate this + * main_ui(): + */ + if (n == current_cntr) + return NULL; + n++; + } + + + for (; j < group->group->num_counters; j++) { + if (n == current_cntr) { + if (ctr) + *ctr = j; + return group; + } + n++; + } + } + + assert(0); + return NULL; +} + +static void +counter_dialog(void) +{ + WINDOW *dialog; + struct counter_group *group; + int cnt, current = 0, scroll; + + /* figure out dialog size: */ + int dh = h/2; + int dw = ctr_width + 2; + + group = current_counter(&cnt); + + /* find currently selected idx (note there can be discontinuities + * so the selected value does not map 1:1 to current idx) + */ + uint32_t selected = group->counter[cnt].select_val; + for (int i = 0; i < group->group->num_countables; i++) { + if (group->group->countables[i].selector == selected) { + current = i; + break; + } + } + + /* scrolling offset, if dialog is too small for all the choices: */ + scroll = 0; + + dialog = newwin(dh, dw, (h-dh)/2, (w-dw)/2); + box(dialog, 0, 0); + wrefresh(dialog); + keypad(dialog, TRUE); + + while (true) { + int max = MIN2(dh - 2, group->group->num_countables); + int selector = -1; + + if ((current - scroll) >= (dh - 3)) { + scroll = current - (dh - 3); + } else if (current < scroll) { + scroll = current; + } + + for (int i = 0; i < max; i++) { + int n = scroll + i; + wmove(dialog, i+1, 1); + if (n == current) { + assert (n < group->group->num_countables); + selector = group->group->countables[n].selector; + wattron(dialog, COLOR_PAIR(COLOR_INVERSE)); + } + if (n < group->group->num_countables) + waddstr(dialog, group->group->countables[n].name); + whline(dialog, ' ', dw - getcurx(dialog) - 1); + if (n == current) + wattroff(dialog, COLOR_PAIR(COLOR_INVERSE)); + } + + assert (selector >= 0); + + switch (wgetch(dialog)) { + case KEY_UP: + current = MAX2(0, current - 1); + break; + case KEY_DOWN: + current = MIN2(group->group->num_countables - 1, current + 1); + break; + case KEY_LEFT: + case KEY_ENTER: + /* select new sampler */ + select_counter(group, cnt, selector); + flush_ring(); + config_save(); + goto out; + case 'q': + goto out; + default: + /* ignore */ + break; + } + + resample(); + } + +out: + wborder(dialog, ' ', ' ', ' ',' ',' ',' ',' ',' '); + delwin(dialog); +} + +static void +scroll_cntr(int amount) +{ + if (amount < 0) { + current_cntr = MAX2(1, current_cntr + amount); + if (current_counter(NULL) == NULL) { + current_cntr = MAX2(1, current_cntr - 1); + } + } else { + current_cntr = MIN2(max_rows - 1, current_cntr + amount); + if (current_counter(NULL) == NULL) + current_cntr = MIN2(max_rows - 1, current_cntr + 1); + } +} + +static void +main_ui(void) +{ + WINDOW *mainwin; + + /* curses setup: */ + mainwin = initscr(); + if (!mainwin) + goto out; + + cbreak(); + wtimeout(mainwin, REFRESH_MS); + noecho(); + keypad(mainwin, TRUE); + curs_set(0); + start_color(); + init_pair(COLOR_GROUP_HEADER, COLOR_WHITE, COLOR_GREEN); + init_pair(COLOR_FOOTER, COLOR_WHITE, COLOR_BLUE); + init_pair(COLOR_INVERSE, COLOR_BLACK, COLOR_WHITE); + + while (true) { + switch (wgetch(mainwin)) { + case KEY_UP: + scroll_cntr(-1); + break; + case KEY_DOWN: + scroll_cntr(+1); + break; + case KEY_NPAGE: /* page-down */ + /* TODO figure out # of rows visible? */ + scroll_cntr(+15); + break; + case KEY_PPAGE: /* page-up */ + /* TODO figure out # of rows visible? */ + scroll_cntr(-15); + break; + case KEY_RIGHT: + counter_dialog(); + break; + case 'q': + goto out; + break; + default: + /* ignore */ + break; + } + resample(); + redraw(mainwin); + } + + /* restore settings.. maybe we need an atexit()??*/ +out: + delwin(mainwin); + endwin(); + refresh(); +} + +static void +setup_counter_groups(const struct fd_perfcntr_group *groups) +{ + for (unsigned i = 0; i < dev.ngroups; i++) { + struct counter_group *group = &dev.groups[i]; + + group->group = &groups[i]; + + max_rows += group->group->num_counters + 1; + + /* the first CP counter is hidden: */ + if (i == 0) { + max_rows--; + if (group->group->num_counters <= 1) + max_rows--; + } + + for (unsigned j = 0; j < group->group->num_counters; j++) { + group->counter[j].counter = &group->group->counters[j]; + + group->counter[j].val_hi = dev.io + (group->counter[j].counter->counter_reg_hi * 4); + group->counter[j].val_lo = dev.io + (group->counter[j].counter->counter_reg_lo * 4); + + select_counter(group, j, j); + } + + for (unsigned j = 0; j < group->group->num_countables; j++) { + ctr_width = MAX2(ctr_width, strlen(group->group->countables[j].name) + 1); + } + } +} + +/* + * configuration / persistence + */ + +static config_t cfg; +static config_setting_t *setting; + +static void +config_save(void) +{ + for (unsigned i = 0; i < dev.ngroups; i++) { + struct counter_group *group = &dev.groups[i]; + unsigned j = 0; + + /* NOTE skip CP the first CP counter */ + if (i == 0) + j++; + + config_setting_t *sect = + config_setting_get_member(setting, group->group->name); + + for (; j < group->group->num_counters; j++) { + char name[] = "counter0000"; + sprintf(name, "counter%d", j); + config_setting_t *s = + config_setting_lookup(sect, name); + config_setting_set_int(s, group->counter[j].select_val); + } + } + + config_write_file(&cfg, "fdperf.cfg"); +} + +static void +config_restore(void) +{ + char *str; + + config_init(&cfg); + + /* Read the file. If there is an error, report it and exit. */ + if(!config_read_file(&cfg, "fdperf.cfg")) { + warn("could not restore settings"); + } + + config_setting_t *root = config_root_setting(&cfg); + + /* per device settings: */ + asprintf(&str, "a%dxx", dev.chipid >> 24); + setting = config_setting_get_member(root, str); + if (!setting) + setting = config_setting_add(root, str, CONFIG_TYPE_GROUP); + free(str); + + for (unsigned i = 0; i < dev.ngroups; i++) { + struct counter_group *group = &dev.groups[i]; + unsigned j = 0; + + /* NOTE skip CP the first CP counter */ + if (i == 0) + j++; + + config_setting_t *sect = + config_setting_get_member(setting, group->group->name); + + if (!sect) { + sect = config_setting_add(setting, group->group->name, + CONFIG_TYPE_GROUP); + } + + for (; j < group->group->num_counters; j++) { + char name[] = "counter0000"; + sprintf(name, "counter%d", j); + config_setting_t *s = config_setting_lookup(sect, name); + if (!s) { + config_setting_add(sect, name, CONFIG_TYPE_INT); + continue; + } + select_counter(group, j, config_setting_get_int(s)); + } + } +} + +/* + * main + */ + +int +main(int argc, char **argv) +{ + find_device(); + + const struct fd_perfcntr_group *groups; + groups = fd_perfcntrs((dev.chipid >> 24) * 100, &dev.ngroups); + if (!groups) { + errx(1, "no perfcntr support"); + } + + dev.groups = calloc(dev.ngroups, sizeof(struct counter_group)); + + setup_counter_groups(groups); + config_restore(); + flush_ring(); + + main_ui(); + + return 0; +} diff --git a/src/freedreno/perfcntrs/meson.build b/src/freedreno/perfcntrs/meson.build index 64f77b0390a..ae607eb0414 100644 --- a/src/freedreno/perfcntrs/meson.build +++ b/src/freedreno/perfcntrs/meson.build @@ -36,3 +36,27 @@ libfreedreno_perfcntrs = static_library( build_by_default : false, ) +dep_libconfig = dependency('libconfig', required : false) +dep_ncurses = dependency('ncurses', required : false) + +if dep_libconfig.found() and dep_ncurses.found() + fdperf = executable( + 'fdperf', + 'fdperf.c', + include_directories : [ + inc_common, + inc_freedreno, + ], + link_with : [ + libfreedreno_drm, + libfreedreno_perfcntrs, + ], + dependencies : [ + dep_libconfig, + dep_ncurses, + idep_mesautil, + ], + build_by_default : with_tools.contains('freedreno'), + install : with_tools.contains('freedreno'), + ) +endif -- 2.30.2