2 * Copyright 2015 Samuel Pitoiset
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include "nvc0/nvc0_context.h"
24 #include "nvc0/nvc0_query_hw_metric.h"
25 #include "nvc0/nvc0_query_hw_sm.h"
27 #define _Q(t,n) { NVC0_HW_METRIC_QUERY_##t, n }
31 } nvc0_hw_metric_queries
[] = {
32 _Q(ACHIEVED_OCCUPANCY
, "metric-achieved_occupancy" ),
33 _Q(BRANCH_EFFICIENCY
, "metric-branch_efficiency" ),
34 _Q(INST_ISSUED
, "metric-inst_issued" ),
35 _Q(INST_PER_WRAP
, "metric-inst_per_wrap" ),
36 _Q(INST_REPLAY_OVERHEAD
, "metric-inst_replay_overhead" ),
37 _Q(ISSUED_IPC
, "metric-issued_ipc" ),
38 _Q(ISSUE_SLOTS
, "metric-issue_slots" ),
39 _Q(ISSUE_SLOT_UTILIZATION
, "metric-issue_slot_utilization" ),
40 _Q(IPC
, "metric-ipc" ),
41 _Q(SHARED_REPLAY_OVERHEAD
, "metric-shared_replay_overhead" ),
46 static inline const char *
47 nvc0_hw_metric_query_get_name(unsigned query_type
)
51 for (i
= 0; i
< ARRAY_SIZE(nvc0_hw_metric_queries
); i
++) {
52 if (nvc0_hw_metric_queries
[i
].type
== query_type
)
53 return nvc0_hw_metric_queries
[i
].name
;
59 struct nvc0_hw_metric_query_cfg
{
65 #define _SM(n) NVC0_HW_SM_QUERY(NVC0_HW_SM_QUERY_ ##n)
67 /* ==== Compute capability 2.0 (GF100/GF110) ==== */
68 static const struct nvc0_hw_metric_query_cfg
69 sm20_achieved_occupancy
=
71 .type
= NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY
,
72 .queries
[0] = _SM(ACTIVE_WARPS
),
73 .queries
[1] = _SM(ACTIVE_CYCLES
),
77 static const struct nvc0_hw_metric_query_cfg
78 sm20_branch_efficiency
=
80 .type
= NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY
,
81 .queries
[0] = _SM(BRANCH
),
82 .queries
[1] = _SM(DIVERGENT_BRANCH
),
86 static const struct nvc0_hw_metric_query_cfg
89 .type
= NVC0_HW_METRIC_QUERY_INST_PER_WRAP
,
90 .queries
[0] = _SM(INST_EXECUTED
),
91 .queries
[1] = _SM(WARPS_LAUNCHED
),
95 static const struct nvc0_hw_metric_query_cfg
96 sm20_inst_replay_overhead
=
98 .type
= NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD
,
99 .queries
[0] = _SM(INST_ISSUED
),
100 .queries
[1] = _SM(INST_EXECUTED
),
104 static const struct nvc0_hw_metric_query_cfg
107 .type
= NVC0_HW_METRIC_QUERY_ISSUED_IPC
,
108 .queries
[0] = _SM(INST_ISSUED
),
109 .queries
[1] = _SM(ACTIVE_CYCLES
),
113 static const struct nvc0_hw_metric_query_cfg
114 sm20_issue_slot_utilization
=
116 .type
= NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION
,
117 .queries
[0] = _SM(INST_ISSUED
),
118 .queries
[1] = _SM(ACTIVE_CYCLES
),
122 static const struct nvc0_hw_metric_query_cfg
125 .type
= NVC0_HW_METRIC_QUERY_IPC
,
126 .queries
[0] = _SM(INST_EXECUTED
),
127 .queries
[1] = _SM(ACTIVE_CYCLES
),
131 static const struct nvc0_hw_metric_query_cfg
*sm20_hw_metric_queries
[] =
133 &sm20_achieved_occupancy
,
134 &sm20_branch_efficiency
,
136 &sm20_inst_replay_overhead
,
138 &sm20_issue_slot_utilization
,
142 /* ==== Compute capability 2.1 (GF108+ except GF110) ==== */
143 static const struct nvc0_hw_metric_query_cfg
146 .type
= NVC0_HW_METRIC_QUERY_INST_ISSUED
,
147 .queries
[0] = _SM(INST_ISSUED1_0
),
148 .queries
[1] = _SM(INST_ISSUED1_1
),
149 .queries
[2] = _SM(INST_ISSUED2_0
),
150 .queries
[3] = _SM(INST_ISSUED2_1
),
154 static const struct nvc0_hw_metric_query_cfg
155 sm21_inst_replay_overhead
=
157 .type
= NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD
,
158 .queries
[0] = _SM(INST_ISSUED1_0
),
159 .queries
[1] = _SM(INST_ISSUED1_1
),
160 .queries
[2] = _SM(INST_ISSUED2_0
),
161 .queries
[3] = _SM(INST_ISSUED2_1
),
162 .queries
[4] = _SM(INST_EXECUTED
),
166 static const struct nvc0_hw_metric_query_cfg
169 .type
= NVC0_HW_METRIC_QUERY_ISSUED_IPC
,
170 .queries
[0] = _SM(INST_ISSUED1_0
),
171 .queries
[1] = _SM(INST_ISSUED1_1
),
172 .queries
[2] = _SM(INST_ISSUED2_0
),
173 .queries
[3] = _SM(INST_ISSUED2_1
),
174 .queries
[4] = _SM(ACTIVE_CYCLES
),
178 static const struct nvc0_hw_metric_query_cfg
179 sm21_issue_slot_utilization
=
181 .type
= NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION
,
182 .queries
[0] = _SM(INST_ISSUED1_0
),
183 .queries
[1] = _SM(INST_ISSUED1_1
),
184 .queries
[2] = _SM(INST_ISSUED2_0
),
185 .queries
[3] = _SM(INST_ISSUED2_1
),
186 .queries
[4] = _SM(ACTIVE_CYCLES
),
190 static const struct nvc0_hw_metric_query_cfg
*sm21_hw_metric_queries
[] =
192 &sm20_achieved_occupancy
,
193 &sm20_branch_efficiency
,
196 &sm21_inst_replay_overhead
,
199 &sm21_issue_slot_utilization
,
203 /* ==== Compute capability 3.0 (GK104/GK106/GK107) ==== */
204 static const struct nvc0_hw_metric_query_cfg
205 sm30_achieved_occupancy
=
207 .type
= NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY
,
208 .queries
[0] = _SM(ACTIVE_WARPS
),
209 .queries
[1] = _SM(ACTIVE_CYCLES
),
213 static const struct nvc0_hw_metric_query_cfg
214 sm30_branch_efficiency
=
216 .type
= NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY
,
217 .queries
[0] = _SM(BRANCH
),
218 .queries
[1] = _SM(DIVERGENT_BRANCH
),
222 static const struct nvc0_hw_metric_query_cfg
225 .type
= NVC0_HW_METRIC_QUERY_INST_ISSUED
,
226 .queries
[0] = _SM(INST_ISSUED1
),
227 .queries
[1] = _SM(INST_ISSUED2
),
231 static const struct nvc0_hw_metric_query_cfg
234 .type
= NVC0_HW_METRIC_QUERY_INST_PER_WRAP
,
235 .queries
[0] = _SM(INST_EXECUTED
),
236 .queries
[1] = _SM(WARPS_LAUNCHED
),
240 static const struct nvc0_hw_metric_query_cfg
241 sm30_inst_replay_overhead
=
243 .type
= NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD
,
244 .queries
[0] = _SM(INST_ISSUED1
),
245 .queries
[1] = _SM(INST_ISSUED2
),
246 .queries
[2] = _SM(INST_EXECUTED
),
250 static const struct nvc0_hw_metric_query_cfg
253 .type
= NVC0_HW_METRIC_QUERY_ISSUED_IPC
,
254 .queries
[0] = _SM(INST_ISSUED1
),
255 .queries
[1] = _SM(INST_ISSUED2
),
256 .queries
[2] = _SM(ACTIVE_CYCLES
),
260 static const struct nvc0_hw_metric_query_cfg
261 sm30_issue_slot_utilization
=
263 .type
= NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION
,
264 .queries
[0] = _SM(INST_ISSUED1
),
265 .queries
[1] = _SM(INST_ISSUED2
),
266 .queries
[2] = _SM(ACTIVE_CYCLES
),
270 static const struct nvc0_hw_metric_query_cfg
273 .type
= NVC0_HW_METRIC_QUERY_IPC
,
274 .queries
[0] = _SM(INST_EXECUTED
),
275 .queries
[1] = _SM(ACTIVE_CYCLES
),
279 static const struct nvc0_hw_metric_query_cfg
280 sm30_shared_replay_overhead
=
282 .type
= NVC0_HW_METRIC_QUERY_SHARED_REPLAY_OVERHEAD
,
283 .queries
[0] = _SM(SHARED_LD_REPLAY
),
284 .queries
[1] = _SM(SHARED_ST_REPLAY
),
285 .queries
[2] = _SM(INST_EXECUTED
),
289 static const struct nvc0_hw_metric_query_cfg
*sm30_hw_metric_queries
[] =
291 &sm30_achieved_occupancy
,
292 &sm30_branch_efficiency
,
295 &sm30_inst_replay_overhead
,
298 &sm30_issue_slot_utilization
,
300 &sm30_shared_replay_overhead
,
303 /* ==== Compute capability 3.5 (GK110) ==== */
304 static const struct nvc0_hw_metric_query_cfg
*sm35_hw_metric_queries
[] =
306 &sm30_achieved_occupancy
,
309 &sm30_inst_replay_overhead
,
312 &sm30_issue_slot_utilization
,
314 &sm30_shared_replay_overhead
,
319 static inline const struct nvc0_hw_metric_query_cfg
**
320 nvc0_hw_metric_get_queries(struct nvc0_screen
*screen
)
322 struct nouveau_device
*dev
= screen
->base
.device
;
324 switch (screen
->base
.class_3d
) {
326 return sm35_hw_metric_queries
;
328 return sm30_hw_metric_queries
;
330 if (dev
->chipset
== 0xc0 || dev
->chipset
== 0xc8)
331 return sm20_hw_metric_queries
;
332 return sm21_hw_metric_queries
;
339 nvc0_hw_metric_get_num_queries(struct nvc0_screen
*screen
)
341 struct nouveau_device
*dev
= screen
->base
.device
;
343 switch (screen
->base
.class_3d
) {
345 return ARRAY_SIZE(sm35_hw_metric_queries
);
347 return ARRAY_SIZE(sm30_hw_metric_queries
);
349 if (dev
->chipset
== 0xc0 || dev
->chipset
== 0xc8)
350 return ARRAY_SIZE(sm20_hw_metric_queries
);
351 return ARRAY_SIZE(sm21_hw_metric_queries
);
356 static const struct nvc0_hw_metric_query_cfg
*
357 nvc0_hw_metric_query_get_cfg(struct nvc0_context
*nvc0
, struct nvc0_hw_query
*hq
)
359 const struct nvc0_hw_metric_query_cfg
**queries
;
360 struct nvc0_screen
*screen
= nvc0
->screen
;
361 struct nvc0_query
*q
= &hq
->base
;
362 unsigned num_queries
;
365 num_queries
= nvc0_hw_metric_get_num_queries(screen
);
366 queries
= nvc0_hw_metric_get_queries(screen
);
368 for (i
= 0; i
< num_queries
; i
++) {
369 if (NVC0_HW_METRIC_QUERY(queries
[i
]->type
) == q
->type
)
377 nvc0_hw_metric_destroy_query(struct nvc0_context
*nvc0
,
378 struct nvc0_hw_query
*hq
)
380 struct nvc0_hw_metric_query
*hmq
= nvc0_hw_metric_query(hq
);
383 for (i
= 0; i
< hmq
->num_queries
; i
++)
384 if (hmq
->queries
[i
]->funcs
->destroy_query
)
385 hmq
->queries
[i
]->funcs
->destroy_query(nvc0
, hmq
->queries
[i
]);
390 nvc0_hw_metric_begin_query(struct nvc0_context
*nvc0
, struct nvc0_hw_query
*hq
)
392 struct nvc0_hw_metric_query
*hmq
= nvc0_hw_metric_query(hq
);
396 for (i
= 0; i
< hmq
->num_queries
; i
++) {
397 ret
= hmq
->queries
[i
]->funcs
->begin_query(nvc0
, hmq
->queries
[i
]);
405 nvc0_hw_metric_end_query(struct nvc0_context
*nvc0
, struct nvc0_hw_query
*hq
)
407 struct nvc0_hw_metric_query
*hmq
= nvc0_hw_metric_query(hq
);
410 for (i
= 0; i
< hmq
->num_queries
; i
++)
411 hmq
->queries
[i
]->funcs
->end_query(nvc0
, hmq
->queries
[i
]);
415 sm20_hw_metric_calc_result(struct nvc0_hw_query
*hq
, uint64_t res64
[8])
417 switch (hq
->base
.type
- NVC0_HW_METRIC_QUERY(0)) {
418 case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY
:
419 /* (active_warps / active_cycles) / max. number of warps on a MP */
421 return (res64
[0] / (double)res64
[1]) / 48;
423 case NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY
:
424 /* (branch / (branch + divergent_branch)) * 100 */
425 if (res64
[0] + res64
[1])
426 return (res64
[0] / (double)(res64
[0] + res64
[1])) * 100;
428 case NVC0_HW_METRIC_QUERY_INST_PER_WRAP
:
429 /* inst_executed / warps_launched */
431 return res64
[0] / (double)res64
[1];
433 case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD
:
434 /* (inst_issued - inst_executed) / inst_executed */
436 return (res64
[0] - res64
[1]) / (double)res64
[1];
438 case NVC0_HW_METRIC_QUERY_ISSUED_IPC
:
439 /* inst_issued / active_cycles */
441 return res64
[0] / (double)res64
[1];
443 case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION
:
444 /* ((inst_issued / 2) / active_cycles) * 100 */
446 return ((res64
[0] / 2) / (double)res64
[1]) * 100;
448 case NVC0_HW_METRIC_QUERY_IPC
:
449 /* inst_executed / active_cycles */
451 return res64
[0] / (double)res64
[1];
454 debug_printf("invalid metric type: %d\n",
455 hq
->base
.type
- NVC0_HW_METRIC_QUERY(0));
462 sm21_hw_metric_calc_result(struct nvc0_hw_query
*hq
, uint64_t res64
[8])
464 switch (hq
->base
.type
- NVC0_HW_METRIC_QUERY(0)) {
465 case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY
:
466 return sm20_hw_metric_calc_result(hq
, res64
);
467 case NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY
:
468 return sm20_hw_metric_calc_result(hq
, res64
);
469 case NVC0_HW_METRIC_QUERY_INST_ISSUED
:
470 /* issued1_0 + issued1_1 + (issued2_0 + issued2_1) * 2 */
471 return res64
[0] + res64
[1] + (res64
[2] + res64
[3]) * 2;
473 case NVC0_HW_METRIC_QUERY_INST_PER_WRAP
:
474 return sm20_hw_metric_calc_result(hq
, res64
);
475 case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD
:
476 /* (metric-inst_issued - inst_executed) / inst_executed */
478 return (((res64
[0] + res64
[1] + (res64
[2] + res64
[3]) * 2) -
479 res64
[4]) / (double)res64
[4]);
481 case NVC0_HW_METRIC_QUERY_ISSUED_IPC
:
482 /* metric-inst_issued / active_cycles */
484 return (res64
[0] + res64
[1] + (res64
[2] + res64
[3]) * 2) /
487 case NVC0_HW_METRIC_QUERY_ISSUE_SLOTS
:
488 /* issued1_0 + issued1_1 + issued2_0 + issued2_1 */
489 return res64
[0] + res64
[1] + res64
[2] + res64
[3];
491 case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION
:
492 /* ((metric-issue_slots / 2) / active_cycles) * 100 */
494 return (((res64
[0] + res64
[1] + res64
[2] + res64
[3]) / 2) /
495 (double)res64
[4]) * 100;
497 case NVC0_HW_METRIC_QUERY_IPC
:
498 return sm20_hw_metric_calc_result(hq
, res64
);
500 debug_printf("invalid metric type: %d\n",
501 hq
->base
.type
- NVC0_HW_METRIC_QUERY(0));
508 sm30_hw_metric_calc_result(struct nvc0_hw_query
*hq
, uint64_t res64
[8])
510 switch (hq
->base
.type
- NVC0_HW_METRIC_QUERY(0)) {
511 case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY
:
512 /* (active_warps / active_cycles) / max. number of warps on a MP */
514 return (res64
[0] / (double)res64
[1]) / 64;
516 case NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY
:
517 return sm20_hw_metric_calc_result(hq
, res64
);
518 case NVC0_HW_METRIC_QUERY_INST_ISSUED
:
519 /* inst_issued1 + inst_issued2 * 2 */
520 return res64
[0] + res64
[1] * 2;
521 case NVC0_HW_METRIC_QUERY_INST_PER_WRAP
:
522 return sm20_hw_metric_calc_result(hq
, res64
);
523 case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD
:
524 /* (metric-inst_issued - inst_executed) / inst_executed */
526 return (((res64
[0] + res64
[1] * 2) - res64
[2]) / (double)res64
[2]);
528 case NVC0_HW_METRIC_QUERY_ISSUED_IPC
:
529 /* metric-inst_issued / active_cycles */
531 return (res64
[0] + res64
[1] * 2) / (double)res64
[2];
533 case NVC0_HW_METRIC_QUERY_ISSUE_SLOTS
:
534 /* inst_issued1 + inst_issued2 */
535 return res64
[0] + res64
[1];
536 case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION
:
537 /* ((metric-issue_slots / 2) / active_cycles) * 100 */
539 return (((res64
[0] + res64
[1]) / 2) / (double)res64
[2]) * 100;
541 case NVC0_HW_METRIC_QUERY_IPC
:
542 return sm20_hw_metric_calc_result(hq
, res64
);
543 case NVC0_HW_METRIC_QUERY_SHARED_REPLAY_OVERHEAD
:
544 /* (shared_load_replay + shared_store_replay) / inst_executed */
546 return (res64
[0] + res64
[1]) / (double)res64
[2];
549 debug_printf("invalid metric type: %d\n",
550 hq
->base
.type
- NVC0_HW_METRIC_QUERY(0));
557 nvc0_hw_metric_get_query_result(struct nvc0_context
*nvc0
,
558 struct nvc0_hw_query
*hq
, boolean wait
,
559 union pipe_query_result
*result
)
561 struct nvc0_hw_metric_query
*hmq
= nvc0_hw_metric_query(hq
);
562 struct nvc0_screen
*screen
= nvc0
->screen
;
563 struct nouveau_device
*dev
= screen
->base
.device
;
564 union pipe_query_result results
[8] = {};
565 uint64_t res64
[8] = {};
570 for (i
= 0; i
< hmq
->num_queries
; i
++) {
571 ret
= hmq
->queries
[i
]->funcs
->get_query_result(nvc0
, hmq
->queries
[i
],
575 res64
[i
] = *(uint64_t *)&results
[i
];
578 switch (screen
->base
.class_3d
) {
581 value
= sm30_hw_metric_calc_result(hq
, res64
);
584 if (dev
->chipset
== 0xc0 || dev
->chipset
== 0xc8)
585 value
= sm20_hw_metric_calc_result(hq
, res64
);
587 value
= sm21_hw_metric_calc_result(hq
, res64
);
591 *(uint64_t *)result
= value
;
595 static const struct nvc0_hw_query_funcs hw_metric_query_funcs
= {
596 .destroy_query
= nvc0_hw_metric_destroy_query
,
597 .begin_query
= nvc0_hw_metric_begin_query
,
598 .end_query
= nvc0_hw_metric_end_query
,
599 .get_query_result
= nvc0_hw_metric_get_query_result
,
602 struct nvc0_hw_query
*
603 nvc0_hw_metric_create_query(struct nvc0_context
*nvc0
, unsigned type
)
605 const struct nvc0_hw_metric_query_cfg
*cfg
;
606 struct nvc0_hw_metric_query
*hmq
;
607 struct nvc0_hw_query
*hq
;
610 if (type
< NVC0_HW_METRIC_QUERY(0) || type
> NVC0_HW_METRIC_QUERY_LAST
)
613 hmq
= CALLOC_STRUCT(nvc0_hw_metric_query
);
618 hq
->funcs
= &hw_metric_query_funcs
;
619 hq
->base
.type
= type
;
621 cfg
= nvc0_hw_metric_query_get_cfg(nvc0
, hq
);
623 for (i
= 0; i
< cfg
->num_queries
; i
++) {
624 hmq
->queries
[i
] = nvc0_hw_sm_create_query(nvc0
, cfg
->queries
[i
]);
625 if (!hmq
->queries
[i
]) {
626 nvc0_hw_metric_destroy_query(nvc0
, hq
);
636 nvc0_hw_metric_get_driver_query_info(struct nvc0_screen
*screen
, unsigned id
,
637 struct pipe_driver_query_info
*info
)
641 if (screen
->base
.drm
->version
>= 0x01000101) {
643 count
= nvc0_hw_metric_get_num_queries(screen
);
650 if (screen
->compute
) {
651 if (screen
->base
.class_3d
<= NVF0_3D_CLASS
) {
652 const struct nvc0_hw_metric_query_cfg
**queries
=
653 nvc0_hw_metric_get_queries(screen
);
655 info
->name
= nvc0_hw_metric_query_get_name(queries
[id
]->type
);
656 info
->query_type
= NVC0_HW_METRIC_QUERY(queries
[id
]->type
);
657 info
->group_id
= NVC0_HW_METRIC_QUERY_GROUP
;