nvc0: add new warp_execution_efficiency metric on SM30+
[mesa.git] / src / gallium / drivers / nouveau / nvc0 / nvc0_query_hw_metric.c
1 /*
2 * Copyright 2015 Samuel Pitoiset
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "nvc0/nvc0_context.h"
24 #include "nvc0/nvc0_query_hw_metric.h"
25 #include "nvc0/nvc0_query_hw_sm.h"
26
27 #define _Q(i,n,t,d) { NVC0_HW_METRIC_QUERY_##i, n, PIPE_DRIVER_QUERY_TYPE_##t, d }
28 static const struct nvc0_hw_metric_cfg {
29 unsigned id;
30 const char *name;
31 enum pipe_driver_query_type type;
32 const char *desc;
33 } nvc0_hw_metric_queries[] = {
34 _Q(ACHIEVED_OCCUPANCY,
35 "metric-achieved_occupancy",
36 PERCENTAGE,
37 "Ratio of the average active warps per active cycle to the maximum "
38 "number of warps supported on a multiprocessor"),
39
40 _Q(BRANCH_EFFICIENCY,
41 "metric-branch_efficiency",
42 PERCENTAGE,
43 "Ratio of non-divergent branches to total branches"),
44
45 _Q(INST_ISSUED,
46 "metric-inst_issued",
47 UINT64,
48 "The number of instructions issued"),
49
50 _Q(INST_PER_WRAP,
51 "metric-inst_per_wrap",
52 UINT64,
53 "Average number of instructions executed by each warp"),
54
55 _Q(INST_REPLAY_OVERHEAD,
56 "metric-inst_replay_overhead",
57 UINT64,
58 "Average number of replays for each instruction executed"),
59
60 _Q(ISSUED_IPC,
61 "metric-issued_ipc",
62 UINT64,
63 "Instructions issued per cycle"),
64
65 _Q(ISSUE_SLOTS,
66 "metric-issue_slots",
67 UINT64,
68 "The number of issue slots used"),
69
70 _Q(ISSUE_SLOT_UTILIZATION,
71 "metric-issue_slot_utilization",
72 PERCENTAGE,
73 "Percentage of issue slots that issued at least one instruction, "
74 "averaged across all cycles"),
75
76 _Q(IPC,
77 "metric-ipc",
78 UINT64,
79 "Instructions executed per cycle"),
80
81 _Q(SHARED_REPLAY_OVERHEAD,
82 "metric-shared_replay_overhead",
83 UINT64,
84 "Average number of replays due to shared memory conflicts for each "
85 "instruction executed"),
86
87 _Q(WARP_EXECUTION_EFFICIENCY,
88 "metric-warp_execution_efficiency",
89 PERCENTAGE,
90 "Ratio of the average active threads per warp to the maximum number of "
91 "threads per warp supported on a multiprocessor"),
92 };
93
94 #undef _Q
95
96 static inline const struct nvc0_hw_metric_cfg *
97 nvc0_hw_metric_get_cfg(unsigned metric_id)
98 {
99 unsigned i;
100
101 for (i = 0; i < ARRAY_SIZE(nvc0_hw_metric_queries); i++) {
102 if (nvc0_hw_metric_queries[i].id == metric_id)
103 return &nvc0_hw_metric_queries[i];
104 }
105 assert(0);
106 return NULL;
107 }
108
109 struct nvc0_hw_metric_query_cfg {
110 unsigned type;
111 uint32_t queries[8];
112 uint32_t num_queries;
113 };
114
115 #define _SM(n) NVC0_HW_SM_QUERY(NVC0_HW_SM_QUERY_ ##n)
116
117 /* ==== Compute capability 2.0 (GF100/GF110) ==== */
118 static const struct nvc0_hw_metric_query_cfg
119 sm20_achieved_occupancy =
120 {
121 .type = NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY,
122 .queries[0] = _SM(ACTIVE_WARPS),
123 .queries[1] = _SM(ACTIVE_CYCLES),
124 .num_queries = 2,
125 };
126
127 static const struct nvc0_hw_metric_query_cfg
128 sm20_branch_efficiency =
129 {
130 .type = NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY,
131 .queries[0] = _SM(BRANCH),
132 .queries[1] = _SM(DIVERGENT_BRANCH),
133 .num_queries = 2,
134 };
135
136 static const struct nvc0_hw_metric_query_cfg
137 sm20_inst_per_wrap =
138 {
139 .type = NVC0_HW_METRIC_QUERY_INST_PER_WRAP,
140 .queries[0] = _SM(INST_EXECUTED),
141 .queries[1] = _SM(WARPS_LAUNCHED),
142 .num_queries = 2,
143 };
144
145 static const struct nvc0_hw_metric_query_cfg
146 sm20_inst_replay_overhead =
147 {
148 .type = NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD,
149 .queries[0] = _SM(INST_ISSUED),
150 .queries[1] = _SM(INST_EXECUTED),
151 .num_queries = 2,
152 };
153
154 static const struct nvc0_hw_metric_query_cfg
155 sm20_issued_ipc =
156 {
157 .type = NVC0_HW_METRIC_QUERY_ISSUED_IPC,
158 .queries[0] = _SM(INST_ISSUED),
159 .queries[1] = _SM(ACTIVE_CYCLES),
160 .num_queries = 2,
161 };
162
163 static const struct nvc0_hw_metric_query_cfg
164 sm20_issue_slot_utilization =
165 {
166 .type = NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION,
167 .queries[0] = _SM(INST_ISSUED),
168 .queries[1] = _SM(ACTIVE_CYCLES),
169 .num_queries = 2,
170 };
171
172 static const struct nvc0_hw_metric_query_cfg
173 sm20_ipc =
174 {
175 .type = NVC0_HW_METRIC_QUERY_IPC,
176 .queries[0] = _SM(INST_EXECUTED),
177 .queries[1] = _SM(ACTIVE_CYCLES),
178 .num_queries = 2,
179 };
180
181 static const struct nvc0_hw_metric_query_cfg *sm20_hw_metric_queries[] =
182 {
183 &sm20_achieved_occupancy,
184 &sm20_branch_efficiency,
185 &sm20_inst_per_wrap,
186 &sm20_inst_replay_overhead,
187 &sm20_ipc,
188 &sm20_issued_ipc,
189 &sm20_issue_slot_utilization,
190 };
191
192 /* ==== Compute capability 2.1 (GF108+ except GF110) ==== */
193 static const struct nvc0_hw_metric_query_cfg
194 sm21_inst_issued =
195 {
196 .type = NVC0_HW_METRIC_QUERY_INST_ISSUED,
197 .queries[0] = _SM(INST_ISSUED1_0),
198 .queries[1] = _SM(INST_ISSUED1_1),
199 .queries[2] = _SM(INST_ISSUED2_0),
200 .queries[3] = _SM(INST_ISSUED2_1),
201 .num_queries = 4,
202 };
203
204 static const struct nvc0_hw_metric_query_cfg
205 sm21_inst_replay_overhead =
206 {
207 .type = NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD,
208 .queries[0] = _SM(INST_ISSUED1_0),
209 .queries[1] = _SM(INST_ISSUED1_1),
210 .queries[2] = _SM(INST_ISSUED2_0),
211 .queries[3] = _SM(INST_ISSUED2_1),
212 .queries[4] = _SM(INST_EXECUTED),
213 .num_queries = 5,
214 };
215
216 static const struct nvc0_hw_metric_query_cfg
217 sm21_issued_ipc =
218 {
219 .type = NVC0_HW_METRIC_QUERY_ISSUED_IPC,
220 .queries[0] = _SM(INST_ISSUED1_0),
221 .queries[1] = _SM(INST_ISSUED1_1),
222 .queries[2] = _SM(INST_ISSUED2_0),
223 .queries[3] = _SM(INST_ISSUED2_1),
224 .queries[4] = _SM(ACTIVE_CYCLES),
225 .num_queries = 5,
226 };
227
228 static const struct nvc0_hw_metric_query_cfg
229 sm21_issue_slots =
230 {
231 .type = NVC0_HW_METRIC_QUERY_ISSUE_SLOTS,
232 .queries[0] = _SM(INST_ISSUED1_0),
233 .queries[1] = _SM(INST_ISSUED1_1),
234 .queries[2] = _SM(INST_ISSUED2_0),
235 .queries[3] = _SM(INST_ISSUED2_1),
236 .num_queries = 4,
237 };
238
239 static const struct nvc0_hw_metric_query_cfg
240 sm21_issue_slot_utilization =
241 {
242 .type = NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION,
243 .queries[0] = _SM(INST_ISSUED1_0),
244 .queries[1] = _SM(INST_ISSUED1_1),
245 .queries[2] = _SM(INST_ISSUED2_0),
246 .queries[3] = _SM(INST_ISSUED2_1),
247 .queries[4] = _SM(ACTIVE_CYCLES),
248 .num_queries = 5,
249 };
250
251 static const struct nvc0_hw_metric_query_cfg *sm21_hw_metric_queries[] =
252 {
253 &sm20_achieved_occupancy,
254 &sm20_branch_efficiency,
255 &sm21_inst_issued,
256 &sm20_inst_per_wrap,
257 &sm21_inst_replay_overhead,
258 &sm20_ipc,
259 &sm21_issued_ipc,
260 &sm21_issue_slots,
261 &sm21_issue_slot_utilization,
262 };
263
264 /* ==== Compute capability 3.0 (GK104/GK106/GK107) ==== */
265 static const struct nvc0_hw_metric_query_cfg
266 sm30_inst_issued =
267 {
268 .type = NVC0_HW_METRIC_QUERY_INST_ISSUED,
269 .queries[0] = _SM(INST_ISSUED1),
270 .queries[1] = _SM(INST_ISSUED2),
271 .num_queries = 2,
272 };
273
274 static const struct nvc0_hw_metric_query_cfg
275 sm30_inst_replay_overhead =
276 {
277 .type = NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD,
278 .queries[0] = _SM(INST_ISSUED1),
279 .queries[1] = _SM(INST_ISSUED2),
280 .queries[2] = _SM(INST_EXECUTED),
281 .num_queries = 3,
282 };
283
284 static const struct nvc0_hw_metric_query_cfg
285 sm30_issued_ipc =
286 {
287 .type = NVC0_HW_METRIC_QUERY_ISSUED_IPC,
288 .queries[0] = _SM(INST_ISSUED1),
289 .queries[1] = _SM(INST_ISSUED2),
290 .queries[2] = _SM(ACTIVE_CYCLES),
291 .num_queries = 3,
292 };
293
294 static const struct nvc0_hw_metric_query_cfg
295 sm30_issue_slots =
296 {
297 .type = NVC0_HW_METRIC_QUERY_ISSUE_SLOTS,
298 .queries[0] = _SM(INST_ISSUED1),
299 .queries[1] = _SM(INST_ISSUED2),
300 .num_queries = 2,
301 };
302
303 static const struct nvc0_hw_metric_query_cfg
304 sm30_issue_slot_utilization =
305 {
306 .type = NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION,
307 .queries[0] = _SM(INST_ISSUED1),
308 .queries[1] = _SM(INST_ISSUED2),
309 .queries[2] = _SM(ACTIVE_CYCLES),
310 .num_queries = 3,
311 };
312
313 static const struct nvc0_hw_metric_query_cfg
314 sm30_shared_replay_overhead =
315 {
316 .type = NVC0_HW_METRIC_QUERY_SHARED_REPLAY_OVERHEAD,
317 .queries[0] = _SM(SHARED_LD_REPLAY),
318 .queries[1] = _SM(SHARED_ST_REPLAY),
319 .queries[2] = _SM(INST_EXECUTED),
320 .num_queries = 3,
321 };
322
323 static const struct nvc0_hw_metric_query_cfg
324 sm30_warp_execution_efficiency =
325 {
326 .type = NVC0_HW_METRIC_QUERY_WARP_EXECUTION_EFFICIENCY,
327 .queries[0] = _SM(INST_EXECUTED),
328 .queries[1] = _SM(TH_INST_EXECUTED),
329 .num_queries = 2,
330 };
331
332 static const struct nvc0_hw_metric_query_cfg *sm30_hw_metric_queries[] =
333 {
334 &sm20_achieved_occupancy,
335 &sm20_branch_efficiency,
336 &sm30_inst_issued,
337 &sm20_inst_per_wrap,
338 &sm30_inst_replay_overhead,
339 &sm20_ipc,
340 &sm30_issued_ipc,
341 &sm30_issue_slots,
342 &sm30_issue_slot_utilization,
343 &sm30_shared_replay_overhead,
344 &sm30_warp_execution_efficiency,
345 };
346
347 /* ==== Compute capability 3.5 (GK110) ==== */
348 static const struct nvc0_hw_metric_query_cfg *sm35_hw_metric_queries[] =
349 {
350 &sm20_achieved_occupancy,
351 &sm30_inst_issued,
352 &sm20_inst_per_wrap,
353 &sm30_inst_replay_overhead,
354 &sm20_ipc,
355 &sm30_issued_ipc,
356 &sm30_inst_issued,
357 &sm30_issue_slot_utilization,
358 &sm30_shared_replay_overhead,
359 &sm30_warp_execution_efficiency,
360 };
361
362 #undef _SM
363
364 static inline const struct nvc0_hw_metric_query_cfg **
365 nvc0_hw_metric_get_queries(struct nvc0_screen *screen)
366 {
367 struct nouveau_device *dev = screen->base.device;
368
369 switch (screen->base.class_3d) {
370 case NVF0_3D_CLASS:
371 return sm35_hw_metric_queries;
372 case NVE4_3D_CLASS:
373 return sm30_hw_metric_queries;
374 default:
375 if (dev->chipset == 0xc0 || dev->chipset == 0xc8)
376 return sm20_hw_metric_queries;
377 return sm21_hw_metric_queries;
378 }
379 assert(0);
380 return NULL;
381 }
382
383 unsigned
384 nvc0_hw_metric_get_num_queries(struct nvc0_screen *screen)
385 {
386 struct nouveau_device *dev = screen->base.device;
387
388 switch (screen->base.class_3d) {
389 case NVF0_3D_CLASS:
390 return ARRAY_SIZE(sm35_hw_metric_queries);
391 case NVE4_3D_CLASS:
392 return ARRAY_SIZE(sm30_hw_metric_queries);
393 default:
394 if (dev->chipset == 0xc0 || dev->chipset == 0xc8)
395 return ARRAY_SIZE(sm20_hw_metric_queries);
396 return ARRAY_SIZE(sm21_hw_metric_queries);
397 }
398 return 0;
399 }
400
401 static const struct nvc0_hw_metric_query_cfg *
402 nvc0_hw_metric_query_get_cfg(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
403 {
404 const struct nvc0_hw_metric_query_cfg **queries;
405 struct nvc0_screen *screen = nvc0->screen;
406 struct nvc0_query *q = &hq->base;
407 unsigned num_queries;
408 unsigned i;
409
410 num_queries = nvc0_hw_metric_get_num_queries(screen);
411 queries = nvc0_hw_metric_get_queries(screen);
412
413 for (i = 0; i < num_queries; i++) {
414 if (NVC0_HW_METRIC_QUERY(queries[i]->type) == q->type)
415 return queries[i];
416 }
417 assert(0);
418 return NULL;
419 }
420
421 static void
422 nvc0_hw_metric_destroy_query(struct nvc0_context *nvc0,
423 struct nvc0_hw_query *hq)
424 {
425 struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq);
426 unsigned i;
427
428 for (i = 0; i < hmq->num_queries; i++)
429 if (hmq->queries[i]->funcs->destroy_query)
430 hmq->queries[i]->funcs->destroy_query(nvc0, hmq->queries[i]);
431 FREE(hmq);
432 }
433
434 static boolean
435 nvc0_hw_metric_begin_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
436 {
437 struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq);
438 boolean ret = false;
439 unsigned i;
440
441 for (i = 0; i < hmq->num_queries; i++) {
442 ret = hmq->queries[i]->funcs->begin_query(nvc0, hmq->queries[i]);
443 if (!ret)
444 return ret;
445 }
446 return ret;
447 }
448
449 static void
450 nvc0_hw_metric_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
451 {
452 struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq);
453 unsigned i;
454
455 for (i = 0; i < hmq->num_queries; i++)
456 hmq->queries[i]->funcs->end_query(nvc0, hmq->queries[i]);
457 }
458
459 static uint64_t
460 sm20_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
461 {
462 switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) {
463 case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY:
464 /* ((active_warps / active_cycles) / max. number of warps on a MP) * 100 */
465 if (res64[1])
466 return ((res64[0] / (double)res64[1]) / 48) * 100;
467 break;
468 case NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY:
469 /* (branch / (branch + divergent_branch)) * 100 */
470 if (res64[0] + res64[1])
471 return (res64[0] / (double)(res64[0] + res64[1])) * 100;
472 break;
473 case NVC0_HW_METRIC_QUERY_INST_PER_WRAP:
474 /* inst_executed / warps_launched */
475 if (res64[1])
476 return res64[0] / (double)res64[1];
477 break;
478 case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD:
479 /* (inst_issued - inst_executed) / inst_executed */
480 if (res64[1])
481 return (res64[0] - res64[1]) / (double)res64[1];
482 break;
483 case NVC0_HW_METRIC_QUERY_ISSUED_IPC:
484 /* inst_issued / active_cycles */
485 if (res64[1])
486 return res64[0] / (double)res64[1];
487 break;
488 case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION:
489 /* ((inst_issued / 2) / active_cycles) * 100 */
490 if (res64[1])
491 return ((res64[0] / 2) / (double)res64[1]) * 100;
492 break;
493 case NVC0_HW_METRIC_QUERY_IPC:
494 /* inst_executed / active_cycles */
495 if (res64[1])
496 return res64[0] / (double)res64[1];
497 break;
498 default:
499 debug_printf("invalid metric type: %d\n",
500 hq->base.type - NVC0_HW_METRIC_QUERY(0));
501 break;
502 }
503 return 0;
504 }
505
506 static uint64_t
507 sm21_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
508 {
509 switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) {
510 case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY:
511 return sm20_hw_metric_calc_result(hq, res64);
512 case NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY:
513 return sm20_hw_metric_calc_result(hq, res64);
514 case NVC0_HW_METRIC_QUERY_INST_ISSUED:
515 /* issued1_0 + issued1_1 + (issued2_0 + issued2_1) * 2 */
516 return res64[0] + res64[1] + (res64[2] + res64[3]) * 2;
517 break;
518 case NVC0_HW_METRIC_QUERY_INST_PER_WRAP:
519 return sm20_hw_metric_calc_result(hq, res64);
520 case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD:
521 /* (metric-inst_issued - inst_executed) / inst_executed */
522 if (res64[4])
523 return (((res64[0] + res64[1] + (res64[2] + res64[3]) * 2) -
524 res64[4]) / (double)res64[4]);
525 break;
526 case NVC0_HW_METRIC_QUERY_ISSUED_IPC:
527 /* metric-inst_issued / active_cycles */
528 if (res64[4])
529 return (res64[0] + res64[1] + (res64[2] + res64[3]) * 2) /
530 (double)res64[4];
531 break;
532 case NVC0_HW_METRIC_QUERY_ISSUE_SLOTS:
533 /* issued1_0 + issued1_1 + issued2_0 + issued2_1 */
534 return res64[0] + res64[1] + res64[2] + res64[3];
535 break;
536 case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION:
537 /* ((metric-issue_slots / 2) / active_cycles) * 100 */
538 if (res64[4])
539 return (((res64[0] + res64[1] + res64[2] + res64[3]) / 2) /
540 (double)res64[4]) * 100;
541 break;
542 case NVC0_HW_METRIC_QUERY_IPC:
543 return sm20_hw_metric_calc_result(hq, res64);
544 default:
545 debug_printf("invalid metric type: %d\n",
546 hq->base.type - NVC0_HW_METRIC_QUERY(0));
547 break;
548 }
549 return 0;
550 }
551
552 static uint64_t
553 sm30_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
554 {
555 switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) {
556 case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY:
557 /* ((active_warps / active_cycles) / max. number of warps on a MP) * 100 */
558 if (res64[1])
559 return ((res64[0] / (double)res64[1]) / 64) * 100;
560 break;
561 case NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY:
562 return sm20_hw_metric_calc_result(hq, res64);
563 case NVC0_HW_METRIC_QUERY_INST_ISSUED:
564 /* inst_issued1 + inst_issued2 * 2 */
565 return res64[0] + res64[1] * 2;
566 case NVC0_HW_METRIC_QUERY_INST_PER_WRAP:
567 return sm20_hw_metric_calc_result(hq, res64);
568 case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD:
569 /* (metric-inst_issued - inst_executed) / inst_executed */
570 if (res64[2])
571 return (((res64[0] + res64[1] * 2) - res64[2]) / (double)res64[2]);
572 break;
573 case NVC0_HW_METRIC_QUERY_ISSUED_IPC:
574 /* metric-inst_issued / active_cycles */
575 if (res64[2])
576 return (res64[0] + res64[1] * 2) / (double)res64[2];
577 break;
578 case NVC0_HW_METRIC_QUERY_ISSUE_SLOTS:
579 /* inst_issued1 + inst_issued2 */
580 return res64[0] + res64[1];
581 case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION:
582 /* ((metric-issue_slots / 2) / active_cycles) * 100 */
583 if (res64[2])
584 return (((res64[0] + res64[1]) / 2) / (double)res64[2]) * 100;
585 break;
586 case NVC0_HW_METRIC_QUERY_IPC:
587 return sm20_hw_metric_calc_result(hq, res64);
588 case NVC0_HW_METRIC_QUERY_SHARED_REPLAY_OVERHEAD:
589 /* (shared_load_replay + shared_store_replay) / inst_executed */
590 if (res64[2])
591 return (res64[0] + res64[1]) / (double)res64[2];
592 break;
593 case NVC0_HW_METRIC_QUERY_WARP_EXECUTION_EFFICIENCY:
594 /* thread_inst_executed / (inst_executed * max. number of threads per
595 * wrap) * 100 */
596 if (res64[0])
597 return (res64[1] / ((double)res64[0] * 32)) * 100;
598 break;
599 default:
600 debug_printf("invalid metric type: %d\n",
601 hq->base.type - NVC0_HW_METRIC_QUERY(0));
602 break;
603 }
604 return 0;
605 }
606
607 static boolean
608 nvc0_hw_metric_get_query_result(struct nvc0_context *nvc0,
609 struct nvc0_hw_query *hq, boolean wait,
610 union pipe_query_result *result)
611 {
612 struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq);
613 struct nvc0_screen *screen = nvc0->screen;
614 struct nouveau_device *dev = screen->base.device;
615 union pipe_query_result results[8] = {};
616 uint64_t res64[8] = {};
617 uint64_t value = 0;
618 boolean ret = false;
619 unsigned i;
620
621 for (i = 0; i < hmq->num_queries; i++) {
622 ret = hmq->queries[i]->funcs->get_query_result(nvc0, hmq->queries[i],
623 wait, &results[i]);
624 if (!ret)
625 return ret;
626 res64[i] = *(uint64_t *)&results[i];
627 }
628
629 switch (screen->base.class_3d) {
630 case NVF0_3D_CLASS:
631 case NVE4_3D_CLASS:
632 value = sm30_hw_metric_calc_result(hq, res64);
633 break;
634 default:
635 if (dev->chipset == 0xc0 || dev->chipset == 0xc8)
636 value = sm20_hw_metric_calc_result(hq, res64);
637 else
638 value = sm21_hw_metric_calc_result(hq, res64);
639 break;
640 }
641
642 *(uint64_t *)result = value;
643 return ret;
644 }
645
646 static const struct nvc0_hw_query_funcs hw_metric_query_funcs = {
647 .destroy_query = nvc0_hw_metric_destroy_query,
648 .begin_query = nvc0_hw_metric_begin_query,
649 .end_query = nvc0_hw_metric_end_query,
650 .get_query_result = nvc0_hw_metric_get_query_result,
651 };
652
653 struct nvc0_hw_query *
654 nvc0_hw_metric_create_query(struct nvc0_context *nvc0, unsigned type)
655 {
656 const struct nvc0_hw_metric_query_cfg *cfg;
657 struct nvc0_hw_metric_query *hmq;
658 struct nvc0_hw_query *hq;
659 unsigned i;
660
661 if (type < NVC0_HW_METRIC_QUERY(0) || type > NVC0_HW_METRIC_QUERY_LAST)
662 return NULL;
663
664 hmq = CALLOC_STRUCT(nvc0_hw_metric_query);
665 if (!hmq)
666 return NULL;
667
668 hq = &hmq->base;
669 hq->funcs = &hw_metric_query_funcs;
670 hq->base.type = type;
671
672 cfg = nvc0_hw_metric_query_get_cfg(nvc0, hq);
673
674 for (i = 0; i < cfg->num_queries; i++) {
675 hmq->queries[i] = nvc0_hw_sm_create_query(nvc0, cfg->queries[i]);
676 if (!hmq->queries[i]) {
677 nvc0_hw_metric_destroy_query(nvc0, hq);
678 return NULL;
679 }
680 hmq->num_queries++;
681 }
682
683 return hq;
684 }
685
686 int
687 nvc0_hw_metric_get_driver_query_info(struct nvc0_screen *screen, unsigned id,
688 struct pipe_driver_query_info *info)
689 {
690 int count = 0;
691
692 if (screen->base.drm->version >= 0x01000101) {
693 if (screen->compute)
694 count = nvc0_hw_metric_get_num_queries(screen);
695 }
696
697 if (!info)
698 return count;
699
700 if (id < count) {
701 if (screen->compute) {
702 if (screen->base.class_3d <= NVF0_3D_CLASS) {
703 const struct nvc0_hw_metric_query_cfg **queries =
704 nvc0_hw_metric_get_queries(screen);
705 const struct nvc0_hw_metric_cfg *cfg =
706 nvc0_hw_metric_get_cfg(queries[id]->type);
707
708 info->name = cfg->name;
709 info->query_type = NVC0_HW_METRIC_QUERY(queries[id]->type);
710 info->type = cfg->type;
711 info->group_id = NVC0_HW_METRIC_QUERY_GROUP;
712 return 1;
713 }
714 }
715 }
716 return 0;
717 }