From c188235d1be7d4fc65d99d2620d0dcdec5a21c84 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Sun, 11 Oct 2015 11:23:54 +0200 Subject: [PATCH] nvc0: add support for performance monitoring metrics on Fermi As explained in the CUDA toolkit documentation, "a metric is a characteristic of an application that is calculated from one or more event values." Signed-off-by: Samuel Pitoiset Reviewed-by: Ilia Mirkin --- src/gallium/drivers/nouveau/Makefile.sources | 2 + .../drivers/nouveau/nvc0/nvc0_query_hw.c | 19 +- .../nouveau/nvc0/nvc0_query_hw_metric.c | 440 ++++++++++++++++++ .../nouveau/nvc0/nvc0_query_hw_metric.h | 42 ++ 4 files changed, 500 insertions(+), 3 deletions(-) create mode 100644 src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c create mode 100644 src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.h diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources index edc6cf41885..c18e9f5b435 100644 --- a/src/gallium/drivers/nouveau/Makefile.sources +++ b/src/gallium/drivers/nouveau/Makefile.sources @@ -154,6 +154,8 @@ NVC0_C_SOURCES := \ nvc0/nvc0_query.h \ nvc0/nvc0_query_hw.c \ nvc0/nvc0_query_hw.h \ + nvc0/nvc0_query_hw_metric.c \ + nvc0/nvc0_query_hw_metric.h \ nvc0/nvc0_query_hw_sm.c \ nvc0/nvc0_query_hw_sm.h \ nvc0/nvc0_query_sw.c \ diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c index 91254bedf1e..90ee82f21e5 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c @@ -25,6 +25,7 @@ #include "nvc0/nvc0_context.h" #include "nvc0/nvc0_query_hw.h" +#include "nvc0/nvc0_query_hw_metric.h" #include "nvc0/nvc0_query_hw_sm.h" #define NVC0_HW_QUERY_STATE_READY 0 @@ -371,6 +372,12 @@ nvc0_hw_create_query(struct nvc0_context *nvc0, unsigned type, unsigned index) return (struct nvc0_query *)hq; } + hq = nvc0_hw_metric_create_query(nvc0, type); + if (hq) { + hq->base.funcs = &hw_query_funcs; + return (struct nvc0_query *)hq; + } + hq = CALLOC_STRUCT(nvc0_hw_query); if (!hq) return NULL; @@ -435,14 +442,20 @@ int nvc0_hw_get_driver_query_info(struct nvc0_screen *screen, unsigned id, struct pipe_driver_query_info *info) { - int num_hw_sm_queries = 0; + int num_hw_sm_queries = 0, num_hw_metric_queries = 0; num_hw_sm_queries = nvc0_hw_sm_get_driver_query_info(screen, 0, NULL); + num_hw_metric_queries = + nvc0_hw_metric_get_driver_query_info(screen, 0, NULL); if (!info) - return num_hw_sm_queries; + return num_hw_sm_queries + num_hw_metric_queries; + + if (id < num_hw_sm_queries) + return nvc0_hw_sm_get_driver_query_info(screen, id, info); - return nvc0_hw_sm_get_driver_query_info(screen, id, info); + return nvc0_hw_metric_get_driver_query_info(screen, + id - num_hw_sm_queries, info); } void diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c new file mode 100644 index 00000000000..25aa09be42a --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c @@ -0,0 +1,440 @@ +/* + * Copyright 2015 Samuel Pitoiset + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "nvc0/nvc0_context.h" +#include "nvc0/nvc0_query_hw_metric.h" +#include "nvc0/nvc0_query_hw_sm.h" + +/* === PERFORMANCE MONITORING METRICS for NVC0:NVE4 === */ +static const char *nvc0_hw_metric_names[] = +{ + "metric-achieved_occupancy", + "metric-branch_efficiency", + "metric-inst_issued", + "metric-inst_per_wrap", + "metric-inst_replay_overhead", + "metric-issued_ipc", + "metric-issue_slots", + "metric-issue_slot_utilization", + "metric-ipc", +}; + +struct nvc0_hw_metric_query_cfg { + uint32_t queries[8]; + uint32_t num_queries; +}; + +#define _SM(n) NVC0_HW_SM_QUERY(NVC0_HW_SM_QUERY_ ##n) +#define _M(n, c) [NVC0_HW_METRIC_QUERY_##n] = c + +/* ==== Compute capability 2.0 (GF100/GF110) ==== */ +static const struct nvc0_hw_metric_query_cfg +sm20_achieved_occupancy = +{ + .queries[0] = _SM(ACTIVE_WARPS), + .queries[1] = _SM(ACTIVE_CYCLES), + .num_queries = 2, +}; + +static const struct nvc0_hw_metric_query_cfg +sm20_branch_efficiency = +{ + .queries[0] = _SM(BRANCH), + .queries[1] = _SM(DIVERGENT_BRANCH), + .num_queries = 2, +}; + +static const struct nvc0_hw_metric_query_cfg +sm20_inst_per_wrap = +{ + .queries[0] = _SM(INST_EXECUTED), + .queries[1] = _SM(WARPS_LAUNCHED), + .num_queries = 2, +}; + +static const struct nvc0_hw_metric_query_cfg +sm20_inst_replay_overhead = +{ + .queries[0] = _SM(INST_ISSUED), + .queries[1] = _SM(INST_EXECUTED), + .num_queries = 2, +}; + +static const struct nvc0_hw_metric_query_cfg +sm20_issued_ipc = +{ + .queries[0] = _SM(INST_ISSUED), + .queries[1] = _SM(ACTIVE_CYCLES), + .num_queries = 2, +}; + +static const struct nvc0_hw_metric_query_cfg +sm20_ipc = +{ + .queries[0] = _SM(INST_EXECUTED), + .queries[1] = _SM(ACTIVE_CYCLES), + .num_queries = 2, +}; + +static const struct nvc0_hw_metric_query_cfg *sm20_hw_metric_queries[] = +{ + _M(ACHIEVED_OCCUPANCY, &sm20_achieved_occupancy), + _M(BRANCH_EFFICIENCY, &sm20_branch_efficiency), + _M(INST_ISSUED, NULL), + _M(INST_PER_WRAP, &sm20_inst_per_wrap), + _M(INST_REPLAY_OVERHEAD, &sm20_inst_replay_overhead), + _M(ISSUED_IPC, &sm20_issued_ipc), + _M(ISSUE_SLOTS, NULL), + _M(ISSUE_SLOT_UTILIZATION, &sm20_issued_ipc), + _M(IPC, &sm20_ipc), +}; + +/* ==== Compute capability 2.1 (GF108+ except GF110) ==== */ +static const struct nvc0_hw_metric_query_cfg +sm21_inst_issued = +{ + .queries[0] = _SM(INST_ISSUED1_0), + .queries[1] = _SM(INST_ISSUED1_1), + .queries[2] = _SM(INST_ISSUED2_0), + .queries[3] = _SM(INST_ISSUED2_1), + .num_queries = 4, +}; + +static const struct nvc0_hw_metric_query_cfg +sm21_inst_replay_overhead = +{ + .queries[0] = _SM(INST_ISSUED1_0), + .queries[1] = _SM(INST_ISSUED1_1), + .queries[2] = _SM(INST_ISSUED2_0), + .queries[3] = _SM(INST_ISSUED2_1), + .queries[4] = _SM(INST_EXECUTED), + .num_queries = 5, +}; + +static const struct nvc0_hw_metric_query_cfg +sm21_issued_ipc = +{ + .queries[0] = _SM(INST_ISSUED1_0), + .queries[1] = _SM(INST_ISSUED1_1), + .queries[2] = _SM(INST_ISSUED2_0), + .queries[3] = _SM(INST_ISSUED2_1), + .queries[4] = _SM(ACTIVE_CYCLES), + .num_queries = 5, +}; + +static const struct nvc0_hw_metric_query_cfg *sm21_hw_metric_queries[] = +{ + _M(ACHIEVED_OCCUPANCY, &sm20_achieved_occupancy), + _M(BRANCH_EFFICIENCY, &sm20_branch_efficiency), + _M(INST_ISSUED, &sm21_inst_issued), + _M(INST_PER_WRAP, &sm20_inst_per_wrap), + _M(INST_REPLAY_OVERHEAD, &sm21_inst_replay_overhead), + _M(ISSUED_IPC, &sm21_issued_ipc), + _M(ISSUE_SLOTS, &sm21_inst_issued), + _M(ISSUE_SLOT_UTILIZATION, &sm21_issued_ipc), + _M(IPC, &sm20_ipc), +}; + +#undef _SM +#undef _M + +static inline const struct nvc0_hw_metric_query_cfg ** +nvc0_hw_metric_get_queries(struct nvc0_screen *screen) +{ + struct nouveau_device *dev = screen->base.device; + + if (dev->chipset == 0xc0 || dev->chipset == 0xc8) + return sm20_hw_metric_queries; + return sm21_hw_metric_queries; +} + +static const struct nvc0_hw_metric_query_cfg * +nvc0_hw_metric_query_get_cfg(struct nvc0_context *nvc0, + struct nvc0_hw_query *hq) +{ + const struct nvc0_hw_metric_query_cfg **queries; + struct nvc0_screen *screen = nvc0->screen; + struct nvc0_query *q = &hq->base; + + queries = nvc0_hw_metric_get_queries(screen); + return queries[q->type - NVC0_HW_METRIC_QUERY(0)]; +} + +static void +nvc0_hw_metric_destroy_query(struct nvc0_context *nvc0, + struct nvc0_hw_query *hq) +{ + struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq); + unsigned i; + + for (i = 0; i < hmq->num_queries; i++) + hmq->queries[i]->funcs->destroy_query(nvc0, hmq->queries[i]); + FREE(hmq); +} + +static boolean +nvc0_hw_metric_begin_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq) +{ + struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq); + boolean ret = false; + unsigned i; + + for (i = 0; i < hmq->num_queries; i++) { + ret = hmq->queries[i]->funcs->begin_query(nvc0, hmq->queries[i]); + if (!ret) + return ret; + } + return ret; +} + +static void +nvc0_hw_metric_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq) +{ + struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq); + unsigned i; + + for (i = 0; i < hmq->num_queries; i++) + hmq->queries[i]->funcs->end_query(nvc0, hmq->queries[i]); +} + +static uint64_t +sm20_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8]) +{ + switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) { + case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY: + /* (active_warps / active_cycles) / max. number of warps on a MP */ + if (res64[1]) + return (res64[0] / (double)res64[1]) / 48; + break; + case NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY: + /* (branch / (branch + divergent_branch)) * 100 */ + if (res64[0] + res64[1]) + return (res64[0] / (double)(res64[0] + res64[1])) * 100; + break; + case NVC0_HW_METRIC_QUERY_INST_PER_WRAP: + /* inst_executed / warps_launched */ + if (res64[1]) + return res64[0] / (double)res64[1]; + break; + case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD: + /* (inst_issued - inst_executed) / inst_executed */ + if (res64[1]) + return (res64[0] - res64[1]) / (double)res64[1]; + break; + case NVC0_HW_METRIC_QUERY_ISSUED_IPC: + /* inst_issued / active_cycles */ + if (res64[1]) + return res64[0] / (double)res64[1]; + break; + case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION: + /* ((inst_issued / 2) / active_cycles) * 100 */ + if (res64[1]) + return ((res64[0] / 2) / (double)res64[1]) * 100; + break; + case NVC0_HW_METRIC_QUERY_IPC: + /* inst_executed / active_cycles */ + if (res64[1]) + return res64[0] / (double)res64[1]; + break; + default: + debug_printf("invalid metric type: %d\n", + hq->base.type - NVC0_HW_METRIC_QUERY(0)); + break; + } + return 0; +} + +static uint64_t +sm21_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8]) +{ + switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) { + case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY: + return sm20_hw_metric_calc_result(hq, res64); + case NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY: + return sm20_hw_metric_calc_result(hq, res64); + case NVC0_HW_METRIC_QUERY_INST_ISSUED: + /* issued1_0 + issued1_1 + (issued2_0 + issued2_1) * 2 */ + return res64[0] + res64[1] + (res64[2] + res64[3]) * 2; + break; + case NVC0_HW_METRIC_QUERY_INST_PER_WRAP: + return sm20_hw_metric_calc_result(hq, res64); + case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD: + /* (metric-inst_issued - inst_executed) / inst_executed */ + if (res64[4]) + return (((res64[0] + res64[1] + (res64[2] + res64[3]) * 2) - + res64[4]) / (double)res64[4]); + break; + case NVC0_HW_METRIC_QUERY_ISSUED_IPC: + /* metric-inst_issued / active_cycles */ + if (res64[4]) + return (res64[0] + res64[1] + (res64[2] + res64[3]) * 2) / + (double)res64[4]; + break; + case NVC0_HW_METRIC_QUERY_ISSUE_SLOTS: + /* issued1_0 + issued1_1 + issued2_0 + issued2_1 */ + return res64[0] + res64[1] + res64[2] + res64[3]; + break; + case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION: + /* ((metric-issue_slots / 2) / active_cycles) * 100 */ + if (res64[4]) + return (((res64[0] + res64[1] + res64[2] + res64[3]) / 2) / + (double)res64[4]) * 100; + break; + case NVC0_HW_METRIC_QUERY_IPC: + return sm20_hw_metric_calc_result(hq, res64); + default: + debug_printf("invalid metric type: %d\n", + hq->base.type - NVC0_HW_METRIC_QUERY(0)); + break; + } + return 0; +} + +static boolean +nvc0_hw_metric_get_query_result(struct nvc0_context *nvc0, + struct nvc0_hw_query *hq, boolean wait, + union pipe_query_result *result) +{ + struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq); + struct nvc0_screen *screen = nvc0->screen; + struct nouveau_device *dev = screen->base.device; + union pipe_query_result results[8] = {}; + uint64_t res64[8] = {}; + uint64_t value = 0; + boolean ret = false; + unsigned i; + + for (i = 0; i < hmq->num_queries; i++) { + ret = hmq->queries[i]->funcs->get_query_result(nvc0, hmq->queries[i], + wait, &results[i]); + if (!ret) + return ret; + res64[i] = *(uint64_t *)&results[i]; + } + + if (dev->chipset == 0xc0 || dev->chipset == 0xc8) + value = sm20_hw_metric_calc_result(hq, res64); + else + value = sm21_hw_metric_calc_result(hq, res64); + + *(uint64_t *)result = value; + return ret; +} + +static const struct nvc0_hw_query_funcs hw_metric_query_funcs = { + .destroy_query = nvc0_hw_metric_destroy_query, + .begin_query = nvc0_hw_metric_begin_query, + .end_query = nvc0_hw_metric_end_query, + .get_query_result = nvc0_hw_metric_get_query_result, +}; + +struct nvc0_hw_query * +nvc0_hw_metric_create_query(struct nvc0_context *nvc0, unsigned type) +{ + const struct nvc0_hw_metric_query_cfg *cfg; + struct nvc0_hw_metric_query *hmq; + struct nvc0_hw_query *hq; + unsigned i; + + if (type < NVC0_HW_METRIC_QUERY(0) || type > NVC0_HW_METRIC_QUERY_LAST) + return NULL; + + hmq = CALLOC_STRUCT(nvc0_hw_metric_query); + if (!hmq) + return NULL; + + hq = &hmq->base; + hq->funcs = &hw_metric_query_funcs; + hq->base.type = type; + + cfg = nvc0_hw_metric_query_get_cfg(nvc0, hq); + + for (i = 0; i < cfg->num_queries; i++) { + hmq->queries[i] = nvc0_hw_sm_create_query(nvc0, cfg->queries[i]); + if (!hmq->queries[i]) { + nvc0_hw_metric_destroy_query(nvc0, hq); + return NULL; + } + hmq->num_queries++; + } + + return hq; +} + +static int +nvc0_hw_metric_get_next_query_id(const struct nvc0_hw_metric_query_cfg **queries, + unsigned id) +{ + unsigned i, next = 0; + + for (i = 0; i < NVC0_HW_METRIC_QUERY_COUNT; i++) { + if (!queries[i]) { + next++; + } else + if (i >= id && queries[id + next]) { + break; + } + } + return id + next; +} + +int +nvc0_hw_metric_get_driver_query_info(struct nvc0_screen *screen, unsigned id, + struct pipe_driver_query_info *info) +{ + uint16_t class_3d = screen->base.class_3d; + int count = 0; + + if (screen->base.device->drm_version >= 0x01000101) { + if (screen->compute) { + if (class_3d < NVE4_3D_CLASS) { + const struct nvc0_hw_metric_query_cfg **queries = + nvc0_hw_metric_get_queries(screen); + unsigned i; + + for (i = 0; i < NVC0_HW_METRIC_QUERY_COUNT; i++) { + if (queries[i]) + count++; + } + } + } + } + + if (!info) + return count; + + if (id < count) { + if (screen->compute) { + if (class_3d < NVE4_3D_CLASS) { + const struct nvc0_hw_metric_query_cfg **queries = + nvc0_hw_metric_get_queries(screen); + + id = nvc0_hw_metric_get_next_query_id(queries, id); + info->name = nvc0_hw_metric_names[id]; + info->query_type = NVC0_HW_METRIC_QUERY(id); + info->group_id = -1; + return 1; + } + } + } + return 0; +} diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.h b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.h new file mode 100644 index 00000000000..95675fd19b7 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.h @@ -0,0 +1,42 @@ +#ifndef __NVC0_QUERY_HW_METRIC_H__ +#define __NVC0_QUERY_HW_METRIC_H__ + +#include "nvc0_query_hw.h" + +struct nvc0_hw_metric_query { + struct nvc0_hw_query base; + struct nvc0_hw_query *queries[8]; + unsigned num_queries; +}; + +static inline struct nvc0_hw_metric_query * +nvc0_hw_metric_query(struct nvc0_hw_query *hq) +{ + return (struct nvc0_hw_metric_query *)hq; +} + +/* + * Driver metrics queries: + */ +#define NVC0_HW_METRIC_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + 3072 + (i)) +#define NVC0_HW_METRIC_QUERY_LAST NVC0_HW_METRIC_QUERY(NVC0_HW_METRIC_QUERY_COUNT - 1) +enum nvc0_hw_metric_queries +{ + NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY = 0, + NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY, + NVC0_HW_METRIC_QUERY_INST_ISSUED, + NVC0_HW_METRIC_QUERY_INST_PER_WRAP, + NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD, + NVC0_HW_METRIC_QUERY_ISSUED_IPC, + NVC0_HW_METRIC_QUERY_ISSUE_SLOTS, + NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION, + NVC0_HW_METRIC_QUERY_IPC, + NVC0_HW_METRIC_QUERY_COUNT +}; + +struct nvc0_hw_query * +nvc0_hw_metric_create_query(struct nvc0_context *, unsigned); +int +nvc0_hw_metric_get_driver_query_info(struct nvc0_screen *, unsigned, + struct pipe_driver_query_info *); +#endif -- 2.30.2