2 * Copyright 2015 Samuel Pitoiset
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include "nvc0/nvc0_context.h"
24 #include "nvc0/nvc0_query_hw_metric.h"
25 #include "nvc0/nvc0_query_hw_sm.h"
27 #define _Q(i,n,t,d) { NVC0_HW_METRIC_QUERY_##i, n, PIPE_DRIVER_QUERY_TYPE_##t, d }
28 static const struct nvc0_hw_metric_cfg
{
31 enum pipe_driver_query_type type
;
33 } nvc0_hw_metric_queries
[] = {
34 _Q(ACHIEVED_OCCUPANCY
,
35 "metric-achieved_occupancy",
37 "Ratio of the average active warps per active cycle to the maximum "
38 "number of warps supported on a multiprocessor"),
41 "metric-branch_efficiency",
43 "Ratio of non-divergent branches to total branches"),
48 "The number of instructions issued"),
51 "metric-inst_per_wrap",
53 "Average number of instructions executed by each warp"),
55 _Q(INST_REPLAY_OVERHEAD
,
56 "metric-inst_replay_overhead",
58 "Average number of replays for each instruction executed"),
63 "Instructions issued per cycle"),
68 "The number of issue slots used"),
70 _Q(ISSUE_SLOT_UTILIZATION
,
71 "metric-issue_slot_utilization",
73 "Percentage of issue slots that issued at least one instruction, "
74 "averaged across all cycles"),
79 "Instructions executed per cycle"),
81 _Q(SHARED_REPLAY_OVERHEAD
,
82 "metric-shared_replay_overhead",
84 "Average number of replays due to shared memory conflicts for each "
85 "instruction executed"),
87 _Q(WARP_EXECUTION_EFFICIENCY
,
88 "metric-warp_execution_efficiency",
90 "Ratio of the average active threads per warp to the maximum number of "
91 "threads per warp supported on a multiprocessor"),
93 _Q(WARP_NONPRED_EXECUTION_EFFICIENCY
,
94 "metric-warp_nonpred_execution_efficiency",
96 "Ratio of the average active threads per warp executing non-predicated "
97 "instructions to the maximum number of threads per warp supported on a "
103 static inline const struct nvc0_hw_metric_cfg
*
104 nvc0_hw_metric_get_cfg(unsigned metric_id
)
108 for (i
= 0; i
< ARRAY_SIZE(nvc0_hw_metric_queries
); i
++) {
109 if (nvc0_hw_metric_queries
[i
].id
== metric_id
)
110 return &nvc0_hw_metric_queries
[i
];
116 struct nvc0_hw_metric_query_cfg
{
119 uint32_t num_queries
;
122 #define _SM(n) NVC0_HW_SM_QUERY(NVC0_HW_SM_QUERY_ ##n)
124 /* ==== Compute capability 2.0 (GF100/GF110) ==== */
125 static const struct nvc0_hw_metric_query_cfg
126 sm20_achieved_occupancy
=
128 .type
= NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY
,
129 .queries
[0] = _SM(ACTIVE_WARPS
),
130 .queries
[1] = _SM(ACTIVE_CYCLES
),
134 static const struct nvc0_hw_metric_query_cfg
135 sm20_branch_efficiency
=
137 .type
= NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY
,
138 .queries
[0] = _SM(BRANCH
),
139 .queries
[1] = _SM(DIVERGENT_BRANCH
),
143 static const struct nvc0_hw_metric_query_cfg
146 .type
= NVC0_HW_METRIC_QUERY_INST_PER_WRAP
,
147 .queries
[0] = _SM(INST_EXECUTED
),
148 .queries
[1] = _SM(WARPS_LAUNCHED
),
152 static const struct nvc0_hw_metric_query_cfg
153 sm20_inst_replay_overhead
=
155 .type
= NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD
,
156 .queries
[0] = _SM(INST_ISSUED
),
157 .queries
[1] = _SM(INST_EXECUTED
),
161 static const struct nvc0_hw_metric_query_cfg
164 .type
= NVC0_HW_METRIC_QUERY_ISSUED_IPC
,
165 .queries
[0] = _SM(INST_ISSUED
),
166 .queries
[1] = _SM(ACTIVE_CYCLES
),
170 static const struct nvc0_hw_metric_query_cfg
171 sm20_issue_slot_utilization
=
173 .type
= NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION
,
174 .queries
[0] = _SM(INST_ISSUED
),
175 .queries
[1] = _SM(ACTIVE_CYCLES
),
179 static const struct nvc0_hw_metric_query_cfg
182 .type
= NVC0_HW_METRIC_QUERY_IPC
,
183 .queries
[0] = _SM(INST_EXECUTED
),
184 .queries
[1] = _SM(ACTIVE_CYCLES
),
188 static const struct nvc0_hw_metric_query_cfg
*sm20_hw_metric_queries
[] =
190 &sm20_achieved_occupancy
,
191 &sm20_branch_efficiency
,
193 &sm20_inst_replay_overhead
,
196 &sm20_issue_slot_utilization
,
199 /* ==== Compute capability 2.1 (GF108+ except GF110) ==== */
200 static const struct nvc0_hw_metric_query_cfg
203 .type
= NVC0_HW_METRIC_QUERY_INST_ISSUED
,
204 .queries
[0] = _SM(INST_ISSUED1_0
),
205 .queries
[1] = _SM(INST_ISSUED1_1
),
206 .queries
[2] = _SM(INST_ISSUED2_0
),
207 .queries
[3] = _SM(INST_ISSUED2_1
),
211 static const struct nvc0_hw_metric_query_cfg
212 sm21_inst_replay_overhead
=
214 .type
= NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD
,
215 .queries
[0] = _SM(INST_ISSUED1_0
),
216 .queries
[1] = _SM(INST_ISSUED1_1
),
217 .queries
[2] = _SM(INST_ISSUED2_0
),
218 .queries
[3] = _SM(INST_ISSUED2_1
),
219 .queries
[4] = _SM(INST_EXECUTED
),
223 static const struct nvc0_hw_metric_query_cfg
226 .type
= NVC0_HW_METRIC_QUERY_ISSUED_IPC
,
227 .queries
[0] = _SM(INST_ISSUED1_0
),
228 .queries
[1] = _SM(INST_ISSUED1_1
),
229 .queries
[2] = _SM(INST_ISSUED2_0
),
230 .queries
[3] = _SM(INST_ISSUED2_1
),
231 .queries
[4] = _SM(ACTIVE_CYCLES
),
235 static const struct nvc0_hw_metric_query_cfg
238 .type
= NVC0_HW_METRIC_QUERY_ISSUE_SLOTS
,
239 .queries
[0] = _SM(INST_ISSUED1_0
),
240 .queries
[1] = _SM(INST_ISSUED1_1
),
241 .queries
[2] = _SM(INST_ISSUED2_0
),
242 .queries
[3] = _SM(INST_ISSUED2_1
),
246 static const struct nvc0_hw_metric_query_cfg
247 sm21_issue_slot_utilization
=
249 .type
= NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION
,
250 .queries
[0] = _SM(INST_ISSUED1_0
),
251 .queries
[1] = _SM(INST_ISSUED1_1
),
252 .queries
[2] = _SM(INST_ISSUED2_0
),
253 .queries
[3] = _SM(INST_ISSUED2_1
),
254 .queries
[4] = _SM(ACTIVE_CYCLES
),
258 static const struct nvc0_hw_metric_query_cfg
*sm21_hw_metric_queries
[] =
260 &sm20_achieved_occupancy
,
261 &sm20_branch_efficiency
,
264 &sm21_inst_replay_overhead
,
268 &sm21_issue_slot_utilization
,
271 /* ==== Compute capability 3.0 (GK104/GK106/GK107) ==== */
272 static const struct nvc0_hw_metric_query_cfg
275 .type
= NVC0_HW_METRIC_QUERY_INST_ISSUED
,
276 .queries
[0] = _SM(INST_ISSUED1
),
277 .queries
[1] = _SM(INST_ISSUED2
),
281 static const struct nvc0_hw_metric_query_cfg
282 sm30_inst_replay_overhead
=
284 .type
= NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD
,
285 .queries
[0] = _SM(INST_ISSUED1
),
286 .queries
[1] = _SM(INST_ISSUED2
),
287 .queries
[2] = _SM(INST_EXECUTED
),
291 static const struct nvc0_hw_metric_query_cfg
294 .type
= NVC0_HW_METRIC_QUERY_ISSUED_IPC
,
295 .queries
[0] = _SM(INST_ISSUED1
),
296 .queries
[1] = _SM(INST_ISSUED2
),
297 .queries
[2] = _SM(ACTIVE_CYCLES
),
301 static const struct nvc0_hw_metric_query_cfg
304 .type
= NVC0_HW_METRIC_QUERY_ISSUE_SLOTS
,
305 .queries
[0] = _SM(INST_ISSUED1
),
306 .queries
[1] = _SM(INST_ISSUED2
),
310 static const struct nvc0_hw_metric_query_cfg
311 sm30_issue_slot_utilization
=
313 .type
= NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION
,
314 .queries
[0] = _SM(INST_ISSUED1
),
315 .queries
[1] = _SM(INST_ISSUED2
),
316 .queries
[2] = _SM(ACTIVE_CYCLES
),
320 static const struct nvc0_hw_metric_query_cfg
321 sm30_shared_replay_overhead
=
323 .type
= NVC0_HW_METRIC_QUERY_SHARED_REPLAY_OVERHEAD
,
324 .queries
[0] = _SM(SHARED_LD_REPLAY
),
325 .queries
[1] = _SM(SHARED_ST_REPLAY
),
326 .queries
[2] = _SM(INST_EXECUTED
),
330 static const struct nvc0_hw_metric_query_cfg
331 sm30_warp_execution_efficiency
=
333 .type
= NVC0_HW_METRIC_QUERY_WARP_EXECUTION_EFFICIENCY
,
334 .queries
[0] = _SM(INST_EXECUTED
),
335 .queries
[1] = _SM(TH_INST_EXECUTED
),
339 static const struct nvc0_hw_metric_query_cfg
*sm30_hw_metric_queries
[] =
341 &sm20_achieved_occupancy
,
342 &sm20_branch_efficiency
,
345 &sm30_inst_replay_overhead
,
349 &sm30_issue_slot_utilization
,
350 &sm30_shared_replay_overhead
,
351 &sm30_warp_execution_efficiency
,
354 /* ==== Compute capability 3.5 (GK110/GK208) ==== */
355 static const struct nvc0_hw_metric_query_cfg
356 sm35_warp_nonpred_execution_efficiency
=
358 .type
= NVC0_HW_METRIC_QUERY_WARP_NONPRED_EXECUTION_EFFICIENCY
,
359 .queries
[0] = _SM(INST_EXECUTED
),
360 .queries
[1] = _SM(NOT_PRED_OFF_INST_EXECUTED
),
364 static const struct nvc0_hw_metric_query_cfg
*sm35_hw_metric_queries
[] =
366 &sm20_achieved_occupancy
,
369 &sm30_inst_replay_overhead
,
373 &sm30_issue_slot_utilization
,
374 &sm30_shared_replay_overhead
,
375 &sm30_warp_execution_efficiency
,
376 &sm35_warp_nonpred_execution_efficiency
,
379 /* ==== Compute capability 5.0 (GM107/GM108) ==== */
380 static const struct nvc0_hw_metric_query_cfg
*sm50_hw_metric_queries
[] =
382 &sm20_achieved_occupancy
,
383 &sm20_branch_efficiency
,
386 &sm30_inst_replay_overhead
,
390 &sm30_issue_slot_utilization
,
391 &sm30_warp_execution_efficiency
,
392 &sm35_warp_nonpred_execution_efficiency
,
397 static inline const struct nvc0_hw_metric_query_cfg
**
398 nvc0_hw_metric_get_queries(struct nvc0_screen
*screen
)
400 struct nouveau_device
*dev
= screen
->base
.device
;
402 switch (screen
->base
.class_3d
) {
405 return sm50_hw_metric_queries
;
407 return sm35_hw_metric_queries
;
409 return sm30_hw_metric_queries
;
411 if (dev
->chipset
== 0xc0 || dev
->chipset
== 0xc8)
412 return sm20_hw_metric_queries
;
413 return sm21_hw_metric_queries
;
420 nvc0_hw_metric_get_num_queries(struct nvc0_screen
*screen
)
422 struct nouveau_device
*dev
= screen
->base
.device
;
424 switch (screen
->base
.class_3d
) {
427 return ARRAY_SIZE(sm50_hw_metric_queries
);
429 return ARRAY_SIZE(sm35_hw_metric_queries
);
431 return ARRAY_SIZE(sm30_hw_metric_queries
);
433 if (dev
->chipset
== 0xc0 || dev
->chipset
== 0xc8)
434 return ARRAY_SIZE(sm20_hw_metric_queries
);
435 return ARRAY_SIZE(sm21_hw_metric_queries
);
440 static const struct nvc0_hw_metric_query_cfg
*
441 nvc0_hw_metric_query_get_cfg(struct nvc0_context
*nvc0
, struct nvc0_hw_query
*hq
)
443 const struct nvc0_hw_metric_query_cfg
**queries
;
444 struct nvc0_screen
*screen
= nvc0
->screen
;
445 struct nvc0_query
*q
= &hq
->base
;
446 unsigned num_queries
;
449 num_queries
= nvc0_hw_metric_get_num_queries(screen
);
450 queries
= nvc0_hw_metric_get_queries(screen
);
452 for (i
= 0; i
< num_queries
; i
++) {
453 if (NVC0_HW_METRIC_QUERY(queries
[i
]->type
) == q
->type
)
461 nvc0_hw_metric_destroy_query(struct nvc0_context
*nvc0
,
462 struct nvc0_hw_query
*hq
)
464 struct nvc0_hw_metric_query
*hmq
= nvc0_hw_metric_query(hq
);
467 for (i
= 0; i
< hmq
->num_queries
; i
++)
468 if (hmq
->queries
[i
]->funcs
->destroy_query
)
469 hmq
->queries
[i
]->funcs
->destroy_query(nvc0
, hmq
->queries
[i
]);
474 nvc0_hw_metric_begin_query(struct nvc0_context
*nvc0
, struct nvc0_hw_query
*hq
)
476 struct nvc0_hw_metric_query
*hmq
= nvc0_hw_metric_query(hq
);
480 for (i
= 0; i
< hmq
->num_queries
; i
++) {
481 ret
= hmq
->queries
[i
]->funcs
->begin_query(nvc0
, hmq
->queries
[i
]);
489 nvc0_hw_metric_end_query(struct nvc0_context
*nvc0
, struct nvc0_hw_query
*hq
)
491 struct nvc0_hw_metric_query
*hmq
= nvc0_hw_metric_query(hq
);
494 for (i
= 0; i
< hmq
->num_queries
; i
++)
495 hmq
->queries
[i
]->funcs
->end_query(nvc0
, hmq
->queries
[i
]);
499 sm20_hw_metric_calc_result(struct nvc0_hw_query
*hq
, uint64_t res64
[8])
501 switch (hq
->base
.type
- NVC0_HW_METRIC_QUERY(0)) {
502 case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY
:
503 /* ((active_warps / active_cycles) / max. number of warps on a MP) * 100 */
505 return ((res64
[0] / (double)res64
[1]) / 48) * 100;
507 case NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY
:
508 /* (branch / (branch + divergent_branch)) * 100 */
509 if (res64
[0] + res64
[1])
510 return (res64
[0] / (double)(res64
[0] + res64
[1])) * 100;
512 case NVC0_HW_METRIC_QUERY_INST_PER_WRAP
:
513 /* inst_executed / warps_launched */
515 return res64
[0] / (double)res64
[1];
517 case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD
:
518 /* (inst_issued - inst_executed) / inst_executed */
520 return (res64
[0] - res64
[1]) / (double)res64
[1];
522 case NVC0_HW_METRIC_QUERY_ISSUED_IPC
:
523 /* inst_issued / active_cycles */
525 return res64
[0] / (double)res64
[1];
527 case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION
:
528 /* ((inst_issued / 2) / active_cycles) * 100 */
530 return ((res64
[0] / 2) / (double)res64
[1]) * 100;
532 case NVC0_HW_METRIC_QUERY_IPC
:
533 /* inst_executed / active_cycles */
535 return res64
[0] / (double)res64
[1];
538 debug_printf("invalid metric type: %d\n",
539 hq
->base
.type
- NVC0_HW_METRIC_QUERY(0));
546 sm21_hw_metric_calc_result(struct nvc0_hw_query
*hq
, uint64_t res64
[8])
548 switch (hq
->base
.type
- NVC0_HW_METRIC_QUERY(0)) {
549 case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY
:
550 return sm20_hw_metric_calc_result(hq
, res64
);
551 case NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY
:
552 return sm20_hw_metric_calc_result(hq
, res64
);
553 case NVC0_HW_METRIC_QUERY_INST_ISSUED
:
554 /* issued1_0 + issued1_1 + (issued2_0 + issued2_1) * 2 */
555 return res64
[0] + res64
[1] + (res64
[2] + res64
[3]) * 2;
557 case NVC0_HW_METRIC_QUERY_INST_PER_WRAP
:
558 return sm20_hw_metric_calc_result(hq
, res64
);
559 case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD
:
560 /* (metric-inst_issued - inst_executed) / inst_executed */
562 return (((res64
[0] + res64
[1] + (res64
[2] + res64
[3]) * 2) -
563 res64
[4]) / (double)res64
[4]);
565 case NVC0_HW_METRIC_QUERY_ISSUED_IPC
:
566 /* metric-inst_issued / active_cycles */
568 return (res64
[0] + res64
[1] + (res64
[2] + res64
[3]) * 2) /
571 case NVC0_HW_METRIC_QUERY_ISSUE_SLOTS
:
572 /* issued1_0 + issued1_1 + issued2_0 + issued2_1 */
573 return res64
[0] + res64
[1] + res64
[2] + res64
[3];
575 case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION
:
576 /* ((metric-issue_slots / 2) / active_cycles) * 100 */
578 return (((res64
[0] + res64
[1] + res64
[2] + res64
[3]) / 2) /
579 (double)res64
[4]) * 100;
581 case NVC0_HW_METRIC_QUERY_IPC
:
582 return sm20_hw_metric_calc_result(hq
, res64
);
584 debug_printf("invalid metric type: %d\n",
585 hq
->base
.type
- NVC0_HW_METRIC_QUERY(0));
592 sm30_hw_metric_calc_result(struct nvc0_hw_query
*hq
, uint64_t res64
[8])
594 switch (hq
->base
.type
- NVC0_HW_METRIC_QUERY(0)) {
595 case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY
:
596 /* ((active_warps / active_cycles) / max. number of warps on a MP) * 100 */
598 return ((res64
[0] / (double)res64
[1]) / 64) * 100;
600 case NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY
:
601 return sm20_hw_metric_calc_result(hq
, res64
);
602 case NVC0_HW_METRIC_QUERY_INST_ISSUED
:
603 /* inst_issued1 + inst_issued2 * 2 */
604 return res64
[0] + res64
[1] * 2;
605 case NVC0_HW_METRIC_QUERY_INST_PER_WRAP
:
606 return sm20_hw_metric_calc_result(hq
, res64
);
607 case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD
:
608 /* (metric-inst_issued - inst_executed) / inst_executed */
610 return (((res64
[0] + res64
[1] * 2) - res64
[2]) / (double)res64
[2]);
612 case NVC0_HW_METRIC_QUERY_ISSUED_IPC
:
613 /* metric-inst_issued / active_cycles */
615 return (res64
[0] + res64
[1] * 2) / (double)res64
[2];
617 case NVC0_HW_METRIC_QUERY_ISSUE_SLOTS
:
618 /* inst_issued1 + inst_issued2 */
619 return res64
[0] + res64
[1];
620 case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION
:
621 /* ((metric-issue_slots / 2) / active_cycles) * 100 */
623 return (((res64
[0] + res64
[1]) / 2) / (double)res64
[2]) * 100;
625 case NVC0_HW_METRIC_QUERY_IPC
:
626 return sm20_hw_metric_calc_result(hq
, res64
);
627 case NVC0_HW_METRIC_QUERY_SHARED_REPLAY_OVERHEAD
:
628 /* (shared_load_replay + shared_store_replay) / inst_executed */
630 return (res64
[0] + res64
[1]) / (double)res64
[2];
632 case NVC0_HW_METRIC_QUERY_WARP_EXECUTION_EFFICIENCY
:
633 /* thread_inst_executed / (inst_executed * max. number of threads per
636 return (res64
[1] / ((double)res64
[0] * 32)) * 100;
639 debug_printf("invalid metric type: %d\n",
640 hq
->base
.type
- NVC0_HW_METRIC_QUERY(0));
647 sm35_hw_metric_calc_result(struct nvc0_hw_query
*hq
, uint64_t res64
[8])
649 switch (hq
->base
.type
- NVC0_HW_METRIC_QUERY(0)) {
650 case NVC0_HW_METRIC_QUERY_WARP_NONPRED_EXECUTION_EFFICIENCY
:
651 /* not_predicated_off_thread_inst_executed / (inst_executed * max. number
652 * of threads per wrap) * 100 */
654 return (res64
[1] / ((double)res64
[0] * 32)) * 100;
657 return sm30_hw_metric_calc_result(hq
, res64
);
663 nvc0_hw_metric_get_query_result(struct nvc0_context
*nvc0
,
664 struct nvc0_hw_query
*hq
, boolean wait
,
665 union pipe_query_result
*result
)
667 struct nvc0_hw_metric_query
*hmq
= nvc0_hw_metric_query(hq
);
668 struct nvc0_screen
*screen
= nvc0
->screen
;
669 struct nouveau_device
*dev
= screen
->base
.device
;
670 union pipe_query_result results
[8] = {};
671 uint64_t res64
[8] = {};
676 for (i
= 0; i
< hmq
->num_queries
; i
++) {
677 ret
= hmq
->queries
[i
]->funcs
->get_query_result(nvc0
, hmq
->queries
[i
],
681 res64
[i
] = *(uint64_t *)&results
[i
];
684 switch (screen
->base
.class_3d
) {
688 value
= sm35_hw_metric_calc_result(hq
, res64
);
691 value
= sm30_hw_metric_calc_result(hq
, res64
);
694 if (dev
->chipset
== 0xc0 || dev
->chipset
== 0xc8)
695 value
= sm20_hw_metric_calc_result(hq
, res64
);
697 value
= sm21_hw_metric_calc_result(hq
, res64
);
701 *(uint64_t *)result
= value
;
705 static const struct nvc0_hw_query_funcs hw_metric_query_funcs
= {
706 .destroy_query
= nvc0_hw_metric_destroy_query
,
707 .begin_query
= nvc0_hw_metric_begin_query
,
708 .end_query
= nvc0_hw_metric_end_query
,
709 .get_query_result
= nvc0_hw_metric_get_query_result
,
712 struct nvc0_hw_query
*
713 nvc0_hw_metric_create_query(struct nvc0_context
*nvc0
, unsigned type
)
715 const struct nvc0_hw_metric_query_cfg
*cfg
;
716 struct nvc0_hw_metric_query
*hmq
;
717 struct nvc0_hw_query
*hq
;
720 if (type
< NVC0_HW_METRIC_QUERY(0) || type
> NVC0_HW_METRIC_QUERY_LAST
)
723 hmq
= CALLOC_STRUCT(nvc0_hw_metric_query
);
728 hq
->funcs
= &hw_metric_query_funcs
;
729 hq
->base
.type
= type
;
731 cfg
= nvc0_hw_metric_query_get_cfg(nvc0
, hq
);
733 for (i
= 0; i
< cfg
->num_queries
; i
++) {
734 hmq
->queries
[i
] = nvc0_hw_sm_create_query(nvc0
, cfg
->queries
[i
]);
735 if (!hmq
->queries
[i
]) {
736 nvc0_hw_metric_destroy_query(nvc0
, hq
);
746 nvc0_hw_metric_get_driver_query_info(struct nvc0_screen
*screen
, unsigned id
,
747 struct pipe_driver_query_info
*info
)
751 if (screen
->base
.drm
->version
>= 0x01000101) {
753 count
= nvc0_hw_metric_get_num_queries(screen
);
760 if (screen
->compute
) {
761 if (screen
->base
.class_3d
<= GM200_3D_CLASS
) {
762 const struct nvc0_hw_metric_query_cfg
**queries
=
763 nvc0_hw_metric_get_queries(screen
);
764 const struct nvc0_hw_metric_cfg
*cfg
=
765 nvc0_hw_metric_get_cfg(queries
[id
]->type
);
767 info
->name
= cfg
->name
;
768 info
->query_type
= NVC0_HW_METRIC_QUERY(queries
[id
]->type
);
769 info
->type
= cfg
->type
;
770 info
->group_id
= NVC0_HW_METRIC_QUERY_GROUP
;