unsigned seqno;
struct fd_bo *control_mem;
+
+ struct fd_bo *query_mem;
+ const struct perfcntr *perfcntrs;
+ unsigned num_perfcntrs;
};
define_cast(backend, a6xx_backend);
+/*
+ * Data structures shared with GPU:
+ */
+
/* This struct defines the layout of the fd6_context::control buffer: */
struct fd6_control {
uint32_t seqno; /* seqno for async CP_EVENT_WRITE, etc */
#define control_ptr(a6xx_backend, member) \
(a6xx_backend)->control_mem, offsetof(struct fd6_control, member), 0, 0
+
+struct PACKED fd6_query_sample {
+ uint64_t start;
+ uint64_t result;
+ uint64_t stop;
+};
+
+
+/* offset of a single field of an array of fd6_query_sample: */
+#define query_sample_idx(a6xx_backend, idx, field) \
+ (a6xx_backend)->query_mem, \
+ (idx * sizeof(struct fd6_query_sample)) + \
+ offsetof(struct fd6_query_sample, field), \
+ 0, 0
+
+
+/*
+ * Backend implementation:
+ */
+
static struct kernel *
a6xx_assemble(struct backend *b, FILE *in)
{
static void
a6xx_emit_grid(struct kernel *kernel, uint32_t grid[3], struct fd_submit *submit)
{
+ struct ir3_kernel *ir3_kernel = to_ir3_kernel(kernel);
+ struct a6xx_backend *a6xx_backend = to_a6xx_backend(ir3_kernel->backend);
struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(submit, 0,
FD_RINGBUFFER_PRIMARY | FD_RINGBUFFER_GROWABLE);
OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_Y */
OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_Z */
+ if (a6xx_backend->num_perfcntrs > 0) {
+ a6xx_backend->query_mem = fd_bo_new(a6xx_backend->dev,
+ a6xx_backend->num_perfcntrs * sizeof(struct fd6_query_sample),
+ DRM_FREEDRENO_GEM_TYPE_KMEM, "query");
+
+ /* configure the performance counters to count the requested
+ * countables:
+ */
+ for (unsigned i = 0; i < a6xx_backend->num_perfcntrs; i++) {
+ const struct perfcntr *counter = &a6xx_backend->perfcntrs[i];
+
+ OUT_PKT4(ring, counter->select_reg, 1);
+ OUT_RING(ring, counter->selector);
+ }
+
+ OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
+
+ /* and snapshot the start values: */
+ for (unsigned i = 0; i < a6xx_backend->num_perfcntrs; i++) {
+ const struct perfcntr *counter = &a6xx_backend->perfcntrs[i];
+
+ OUT_PKT7(ring, CP_REG_TO_MEM, 3);
+ OUT_RING(ring, CP_REG_TO_MEM_0_64B |
+ CP_REG_TO_MEM_0_REG(counter->counter_reg_lo));
+ OUT_RELOCW(ring, query_sample_idx(a6xx_backend, i, start));
+ }
+ }
+
OUT_PKT7(ring, CP_EXEC_CS, 4);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, CP_EXEC_CS_1_NGROUPS_X(grid[0]));
OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
+ if (a6xx_backend->num_perfcntrs > 0) {
+ /* snapshot the end values: */
+ for (unsigned i = 0; i < a6xx_backend->num_perfcntrs; i++) {
+ const struct perfcntr *counter = &a6xx_backend->perfcntrs[i];
+
+ OUT_PKT7(ring, CP_REG_TO_MEM, 3);
+ OUT_RING(ring, CP_REG_TO_MEM_0_64B |
+ CP_REG_TO_MEM_0_REG(counter->counter_reg_lo));
+ OUT_RELOCW(ring, query_sample_idx(a6xx_backend, i, stop));
+ }
+
+ /* and compute the result: */
+ for (unsigned i = 0; i < a6xx_backend->num_perfcntrs; i++) {
+ /* result += stop - start: */
+ OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
+ OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE |
+ CP_MEM_TO_MEM_0_NEG_C);
+ OUT_RELOCW(ring, query_sample_idx(a6xx_backend, i, result)); /* dst */
+ OUT_RELOC(ring, query_sample_idx(a6xx_backend, i, result)); /* srcA */
+ OUT_RELOC(ring, query_sample_idx(a6xx_backend, i, stop)); /* srcB */
+ OUT_RELOC(ring, query_sample_idx(a6xx_backend, i, start)); /* srcC */
+ }
+ }
+
cache_flush(ring, kernel);
}
+static void
+a6xx_set_perfcntrs(struct backend *b, const struct perfcntr *perfcntrs,
+ unsigned num_perfcntrs)
+{
+ struct a6xx_backend *a6xx_backend = to_a6xx_backend(b);
+
+ a6xx_backend->perfcntrs = perfcntrs;
+ a6xx_backend->num_perfcntrs = num_perfcntrs;
+}
+
+static void
+a6xx_read_perfcntrs(struct backend *b, uint64_t *results)
+{
+ struct a6xx_backend *a6xx_backend = to_a6xx_backend(b);
+
+ fd_bo_cpu_prep(a6xx_backend->query_mem, NULL, DRM_FREEDRENO_PREP_READ);
+ struct fd6_query_sample *samples = fd_bo_map(a6xx_backend->query_mem);
+
+ for (unsigned i = 0; i < a6xx_backend->num_perfcntrs; i++) {
+ results[i] = samples[i].result;
+ }
+}
+
struct backend *
a6xx_init(struct fd_device *dev, uint32_t gpu_id)
{
.assemble = a6xx_assemble,
.disassemble = a6xx_disassemble,
.emit_grid = a6xx_emit_grid,
+ .set_perfcntrs = a6xx_set_perfcntrs,
+ .read_perfcntrs = a6xx_read_perfcntrs,
};
a6xx_backend->compiler = ir3_compiler_create(dev, gpu_id);
*/
#include <getopt.h>
+#include <inttypes.h>
+#include <locale.h>
#include <xf86drm.h>
#include "util/u_math.h"
+#include "perfcntrs/freedreno_perfcntr.h"
+
#include "main.h"
}
}
-static const char *shortopts = "df:g:h";
+static const char *shortopts = "df:g:hp:";
static const struct option longopts[] = {
{"disasm", no_argument, 0, 'd'},
{"file", required_argument, 0, 'f'},
{"groups", required_argument, 0, 'g'},
{"help", no_argument, 0, 'h'},
+ {"perfcntr", required_argument, 0, 'p'},
{0, 0, 0, 0}
};
" -f, --file=FILE read shader from file (instead of stdin)\n"
" -g, --groups=X,Y,Z use specified group size\n"
" -h, --help show this message\n"
+ " -p, --perfcntr=LIST sample specified performance counters (comma\n"
+ " separated list)\n"
,
name);
}
+/* performance counter description: */
+static unsigned num_groups;
+static const struct fd_perfcntr_group *groups;
+
+/* Track enabled counters per group: */
+static unsigned *enabled_counters;
+
+static void
+setup_counter(const char *name, struct perfcntr *c)
+{
+ for (int i = 0; i < num_groups; i++) {
+ const struct fd_perfcntr_group *group = &groups[i];
+
+ for (int j = 0; j < group->num_countables; j++) {
+ const struct fd_perfcntr_countable *countable = &group->countables[j];
+
+ if (strcmp(name, countable->name) != 0)
+ continue;
+
+ /*
+ * Allocate a counter to use to monitor the requested countable:
+ */
+ if (enabled_counters[i] >= group->num_counters) {
+ errx(-1, "Too many counters selected in group: %s", group->name);
+ }
+
+ unsigned idx = enabled_counters[i]++;
+ const struct fd_perfcntr_counter *counter = &group->counters[idx];
+
+ /*
+ * And initialize the perfcntr struct, pulling together the info
+ * about selected counter and countable, to simplify life for the
+ * backend:
+ */
+ c->name = name;
+ c->select_reg = counter->select_reg;
+ c->counter_reg_lo = counter->counter_reg_lo;
+ c->counter_reg_hi = counter->counter_reg_hi;
+ c->selector = countable->selector;
+
+ return;
+ }
+ }
+
+ errx(-1, "could not find countable: %s", name);
+}
+
+static struct perfcntr *
+parse_perfcntrs(uint32_t gpu_id, const char *perfcntrstr, unsigned *num_perfcntrs)
+{
+ struct perfcntr *counters = NULL;
+ char *cnames, *s;
+ unsigned cnt = 0;
+
+ groups = fd_perfcntrs(gpu_id, &num_groups);
+ enabled_counters = calloc(num_groups, sizeof(enabled_counters[0]));
+
+ cnames = strdup(perfcntrstr);
+ while ((s = strstr(cnames, ","))) {
+ char *name = cnames;
+ s[0] = '\0';
+ cnames = &s[1];
+
+ counters = realloc(counters, ++cnt * sizeof(counters[0]));
+ setup_counter(name, &counters[cnt-1]);
+ }
+
+ char * name = cnames;
+ counters = realloc(counters, ++cnt * sizeof(counters[0]));
+ setup_counter(name, &counters[cnt-1]);
+
+ *num_perfcntrs = cnt;
+
+ return counters;
+}
+
int
main(int argc, char **argv)
{
FILE *in = stdin;
+ const char *perfcntrstr = NULL;
+ struct perfcntr *perfcntrs = NULL;
+ unsigned num_perfcntrs = 0;
bool disasm = false;
uint32_t grid[3] = {0};
int opt, ret;
+ setlocale(LC_NUMERIC, "en_US.UTF-8");
+
while ((opt = getopt_long_only(argc, argv, shortopts, longopts, NULL)) != -1) {
switch (opt) {
case 'd':
break;
case 'h':
goto usage;
+ case 'p':
+ perfcntrstr = optarg;
+ break;
default:
printf("unrecognized arg: %c\n", opt);
goto usage;
struct fd_submit *submit = fd_submit_new(pipe);
+ if (perfcntrstr) {
+ if (!backend->set_perfcntrs) {
+ err(1, "performance counters not supported");
+ }
+ perfcntrs = parse_perfcntrs(gpu_id, perfcntrstr, &num_perfcntrs);
+ backend->set_perfcntrs(backend, perfcntrs, num_perfcntrs);
+ }
+
backend->emit_grid(kernel, grid, submit);
fd_submit_flush(submit, -1, NULL, NULL);
dump_float(map, kernel->buf_sizes[i] * 4);
}
+ if (perfcntrstr) {
+ uint64_t results[num_perfcntrs];
+ backend->read_perfcntrs(backend, results);
+
+ for (unsigned i = 0; i < num_perfcntrs; i++) {
+ printf("%s:\t%'"PRIu64"\n", perfcntrs[i].name, results[i]);
+ }
+ }
+
return 0;
usage: