6f02be30306d7abc768995a6bbe70417b35cbbbe
[mesa.git] / src / gallium / drivers / nouveau / nvc0 / nvc0_query_hw_metric.c
1 /*
2 * Copyright 2015 Samuel Pitoiset
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "nvc0/nvc0_context.h"
24 #include "nvc0/nvc0_query_hw_metric.h"
25 #include "nvc0/nvc0_query_hw_sm.h"
26
27 #define _Q(i,n,t,d) { NVC0_HW_METRIC_QUERY_##i, n, PIPE_DRIVER_QUERY_TYPE_##t, d }
28 static const struct nvc0_hw_metric_cfg {
29 unsigned id;
30 const char *name;
31 enum pipe_driver_query_type type;
32 const char *desc;
33 } nvc0_hw_metric_queries[] = {
34 _Q(ACHIEVED_OCCUPANCY,
35 "metric-achieved_occupancy",
36 PERCENTAGE,
37 "Ratio of the average active warps per active cycle to the maximum number "
38 "of warps supported on a multiprocessor"),
39
40 _Q(BRANCH_EFFICIENCY,
41 "metric-branch_efficiency",
42 PERCENTAGE,
43 "Ratio of non-divergent branches to total branches"),
44
45 _Q(INST_ISSUED,
46 "metric-inst_issued",
47 UINT64,
48 "The number of instructions issued"),
49
50 _Q(INST_PER_WRAP,
51 "metric-inst_per_wrap",
52 UINT64,
53 "Average number of instructions executed by each warp"),
54
55 _Q(INST_REPLAY_OVERHEAD,
56 "metric-inst_replay_overhead",
57 UINT64,
58 "Average number of replays for each instruction executed"),
59
60 _Q(ISSUED_IPC,
61 "metric-issued_ipc",
62 UINT64,
63 "Instructions issued per cycle"),
64
65 _Q(ISSUE_SLOTS,
66 "metric-issue_slots",
67 UINT64,
68 "The number of issue slots used"),
69
70 _Q(ISSUE_SLOT_UTILIZATION,
71 "metric-issue_slot_utilization",
72 PERCENTAGE,
73 "Percentage of issue slots that issued at least one instruction, averaged "
74 "across all cycles"),
75
76 _Q(IPC,
77 "metric-ipc",
78 UINT64,
79 "Instructions executed per cycle"),
80
81 _Q(SHARED_REPLAY_OVERHEAD,
82 "metric-shared_replay_overhead",
83 UINT64,
84 "Average number of replays due to shared memory conflicts for each "
85 "instruction executed"),
86 };
87
88 #undef _Q
89
90 static inline const struct nvc0_hw_metric_cfg *
91 nvc0_hw_metric_get_cfg(unsigned metric_id)
92 {
93 unsigned i;
94
95 for (i = 0; i < ARRAY_SIZE(nvc0_hw_metric_queries); i++) {
96 if (nvc0_hw_metric_queries[i].id == metric_id)
97 return &nvc0_hw_metric_queries[i];
98 }
99 assert(0);
100 return NULL;
101 }
102
103 struct nvc0_hw_metric_query_cfg {
104 unsigned type;
105 uint32_t queries[8];
106 uint32_t num_queries;
107 };
108
109 #define _SM(n) NVC0_HW_SM_QUERY(NVC0_HW_SM_QUERY_ ##n)
110
111 /* ==== Compute capability 2.0 (GF100/GF110) ==== */
112 static const struct nvc0_hw_metric_query_cfg
113 sm20_achieved_occupancy =
114 {
115 .type = NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY,
116 .queries[0] = _SM(ACTIVE_WARPS),
117 .queries[1] = _SM(ACTIVE_CYCLES),
118 .num_queries = 2,
119 };
120
121 static const struct nvc0_hw_metric_query_cfg
122 sm20_branch_efficiency =
123 {
124 .type = NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY,
125 .queries[0] = _SM(BRANCH),
126 .queries[1] = _SM(DIVERGENT_BRANCH),
127 .num_queries = 2,
128 };
129
130 static const struct nvc0_hw_metric_query_cfg
131 sm20_inst_per_wrap =
132 {
133 .type = NVC0_HW_METRIC_QUERY_INST_PER_WRAP,
134 .queries[0] = _SM(INST_EXECUTED),
135 .queries[1] = _SM(WARPS_LAUNCHED),
136 .num_queries = 2,
137 };
138
139 static const struct nvc0_hw_metric_query_cfg
140 sm20_inst_replay_overhead =
141 {
142 .type = NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD,
143 .queries[0] = _SM(INST_ISSUED),
144 .queries[1] = _SM(INST_EXECUTED),
145 .num_queries = 2,
146 };
147
148 static const struct nvc0_hw_metric_query_cfg
149 sm20_issued_ipc =
150 {
151 .type = NVC0_HW_METRIC_QUERY_ISSUED_IPC,
152 .queries[0] = _SM(INST_ISSUED),
153 .queries[1] = _SM(ACTIVE_CYCLES),
154 .num_queries = 2,
155 };
156
157 static const struct nvc0_hw_metric_query_cfg
158 sm20_issue_slot_utilization =
159 {
160 .type = NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION,
161 .queries[0] = _SM(INST_ISSUED),
162 .queries[1] = _SM(ACTIVE_CYCLES),
163 .num_queries = 2,
164 };
165
166 static const struct nvc0_hw_metric_query_cfg
167 sm20_ipc =
168 {
169 .type = NVC0_HW_METRIC_QUERY_IPC,
170 .queries[0] = _SM(INST_EXECUTED),
171 .queries[1] = _SM(ACTIVE_CYCLES),
172 .num_queries = 2,
173 };
174
175 static const struct nvc0_hw_metric_query_cfg *sm20_hw_metric_queries[] =
176 {
177 &sm20_achieved_occupancy,
178 &sm20_branch_efficiency,
179 &sm20_inst_per_wrap,
180 &sm20_inst_replay_overhead,
181 &sm20_ipc,
182 &sm20_issued_ipc,
183 &sm20_issue_slot_utilization,
184 };
185
186 /* ==== Compute capability 2.1 (GF108+ except GF110) ==== */
187 static const struct nvc0_hw_metric_query_cfg
188 sm21_inst_issued =
189 {
190 .type = NVC0_HW_METRIC_QUERY_INST_ISSUED,
191 .queries[0] = _SM(INST_ISSUED1_0),
192 .queries[1] = _SM(INST_ISSUED1_1),
193 .queries[2] = _SM(INST_ISSUED2_0),
194 .queries[3] = _SM(INST_ISSUED2_1),
195 .num_queries = 4,
196 };
197
198 static const struct nvc0_hw_metric_query_cfg
199 sm21_inst_replay_overhead =
200 {
201 .type = NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD,
202 .queries[0] = _SM(INST_ISSUED1_0),
203 .queries[1] = _SM(INST_ISSUED1_1),
204 .queries[2] = _SM(INST_ISSUED2_0),
205 .queries[3] = _SM(INST_ISSUED2_1),
206 .queries[4] = _SM(INST_EXECUTED),
207 .num_queries = 5,
208 };
209
210 static const struct nvc0_hw_metric_query_cfg
211 sm21_issued_ipc =
212 {
213 .type = NVC0_HW_METRIC_QUERY_ISSUED_IPC,
214 .queries[0] = _SM(INST_ISSUED1_0),
215 .queries[1] = _SM(INST_ISSUED1_1),
216 .queries[2] = _SM(INST_ISSUED2_0),
217 .queries[3] = _SM(INST_ISSUED2_1),
218 .queries[4] = _SM(ACTIVE_CYCLES),
219 .num_queries = 5,
220 };
221
222 static const struct nvc0_hw_metric_query_cfg
223 sm21_issue_slots =
224 {
225 .type = NVC0_HW_METRIC_QUERY_ISSUE_SLOTS,
226 .queries[0] = _SM(INST_ISSUED1_0),
227 .queries[1] = _SM(INST_ISSUED1_1),
228 .queries[2] = _SM(INST_ISSUED2_0),
229 .queries[3] = _SM(INST_ISSUED2_1),
230 .num_queries = 4,
231 };
232
233 static const struct nvc0_hw_metric_query_cfg
234 sm21_issue_slot_utilization =
235 {
236 .type = NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION,
237 .queries[0] = _SM(INST_ISSUED1_0),
238 .queries[1] = _SM(INST_ISSUED1_1),
239 .queries[2] = _SM(INST_ISSUED2_0),
240 .queries[3] = _SM(INST_ISSUED2_1),
241 .queries[4] = _SM(ACTIVE_CYCLES),
242 .num_queries = 5,
243 };
244
245 static const struct nvc0_hw_metric_query_cfg *sm21_hw_metric_queries[] =
246 {
247 &sm20_achieved_occupancy,
248 &sm20_branch_efficiency,
249 &sm21_inst_issued,
250 &sm20_inst_per_wrap,
251 &sm21_inst_replay_overhead,
252 &sm20_ipc,
253 &sm21_issued_ipc,
254 &sm21_issue_slots,
255 &sm21_issue_slot_utilization,
256 };
257
258 /* ==== Compute capability 3.0 (GK104/GK106/GK107) ==== */
259 static const struct nvc0_hw_metric_query_cfg
260 sm30_inst_issued =
261 {
262 .type = NVC0_HW_METRIC_QUERY_INST_ISSUED,
263 .queries[0] = _SM(INST_ISSUED1),
264 .queries[1] = _SM(INST_ISSUED2),
265 .num_queries = 2,
266 };
267
268 static const struct nvc0_hw_metric_query_cfg
269 sm30_inst_replay_overhead =
270 {
271 .type = NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD,
272 .queries[0] = _SM(INST_ISSUED1),
273 .queries[1] = _SM(INST_ISSUED2),
274 .queries[2] = _SM(INST_EXECUTED),
275 .num_queries = 3,
276 };
277
278 static const struct nvc0_hw_metric_query_cfg
279 sm30_issued_ipc =
280 {
281 .type = NVC0_HW_METRIC_QUERY_ISSUED_IPC,
282 .queries[0] = _SM(INST_ISSUED1),
283 .queries[1] = _SM(INST_ISSUED2),
284 .queries[2] = _SM(ACTIVE_CYCLES),
285 .num_queries = 3,
286 };
287
288 static const struct nvc0_hw_metric_query_cfg
289 sm30_issue_slots =
290 {
291 .type = NVC0_HW_METRIC_QUERY_ISSUE_SLOTS,
292 .queries[0] = _SM(INST_ISSUED1),
293 .queries[1] = _SM(INST_ISSUED2),
294 .num_queries = 2,
295 };
296
297 static const struct nvc0_hw_metric_query_cfg
298 sm30_issue_slot_utilization =
299 {
300 .type = NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION,
301 .queries[0] = _SM(INST_ISSUED1),
302 .queries[1] = _SM(INST_ISSUED2),
303 .queries[2] = _SM(ACTIVE_CYCLES),
304 .num_queries = 3,
305 };
306
307 static const struct nvc0_hw_metric_query_cfg
308 sm30_shared_replay_overhead =
309 {
310 .type = NVC0_HW_METRIC_QUERY_SHARED_REPLAY_OVERHEAD,
311 .queries[0] = _SM(SHARED_LD_REPLAY),
312 .queries[1] = _SM(SHARED_ST_REPLAY),
313 .queries[2] = _SM(INST_EXECUTED),
314 .num_queries = 3,
315 };
316
317 static const struct nvc0_hw_metric_query_cfg *sm30_hw_metric_queries[] =
318 {
319 &sm20_achieved_occupancy,
320 &sm20_branch_efficiency,
321 &sm30_inst_issued,
322 &sm20_inst_per_wrap,
323 &sm30_inst_replay_overhead,
324 &sm20_ipc,
325 &sm30_issued_ipc,
326 &sm30_issue_slots,
327 &sm30_issue_slot_utilization,
328 &sm30_shared_replay_overhead,
329 };
330
331 /* ==== Compute capability 3.5 (GK110) ==== */
332 static const struct nvc0_hw_metric_query_cfg *sm35_hw_metric_queries[] =
333 {
334 &sm20_achieved_occupancy,
335 &sm30_inst_issued,
336 &sm20_inst_per_wrap,
337 &sm30_inst_replay_overhead,
338 &sm20_ipc,
339 &sm30_issued_ipc,
340 &sm30_inst_issued,
341 &sm30_issue_slot_utilization,
342 &sm30_shared_replay_overhead,
343 };
344
345 #undef _SM
346
347 static inline const struct nvc0_hw_metric_query_cfg **
348 nvc0_hw_metric_get_queries(struct nvc0_screen *screen)
349 {
350 struct nouveau_device *dev = screen->base.device;
351
352 switch (screen->base.class_3d) {
353 case NVF0_3D_CLASS:
354 return sm35_hw_metric_queries;
355 case NVE4_3D_CLASS:
356 return sm30_hw_metric_queries;
357 default:
358 if (dev->chipset == 0xc0 || dev->chipset == 0xc8)
359 return sm20_hw_metric_queries;
360 return sm21_hw_metric_queries;
361 }
362 assert(0);
363 return NULL;
364 }
365
366 unsigned
367 nvc0_hw_metric_get_num_queries(struct nvc0_screen *screen)
368 {
369 struct nouveau_device *dev = screen->base.device;
370
371 switch (screen->base.class_3d) {
372 case NVF0_3D_CLASS:
373 return ARRAY_SIZE(sm35_hw_metric_queries);
374 case NVE4_3D_CLASS:
375 return ARRAY_SIZE(sm30_hw_metric_queries);
376 default:
377 if (dev->chipset == 0xc0 || dev->chipset == 0xc8)
378 return ARRAY_SIZE(sm20_hw_metric_queries);
379 return ARRAY_SIZE(sm21_hw_metric_queries);
380 }
381 return 0;
382 }
383
384 static const struct nvc0_hw_metric_query_cfg *
385 nvc0_hw_metric_query_get_cfg(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
386 {
387 const struct nvc0_hw_metric_query_cfg **queries;
388 struct nvc0_screen *screen = nvc0->screen;
389 struct nvc0_query *q = &hq->base;
390 unsigned num_queries;
391 unsigned i;
392
393 num_queries = nvc0_hw_metric_get_num_queries(screen);
394 queries = nvc0_hw_metric_get_queries(screen);
395
396 for (i = 0; i < num_queries; i++) {
397 if (NVC0_HW_METRIC_QUERY(queries[i]->type) == q->type)
398 return queries[i];
399 }
400 assert(0);
401 return NULL;
402 }
403
404 static void
405 nvc0_hw_metric_destroy_query(struct nvc0_context *nvc0,
406 struct nvc0_hw_query *hq)
407 {
408 struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq);
409 unsigned i;
410
411 for (i = 0; i < hmq->num_queries; i++)
412 if (hmq->queries[i]->funcs->destroy_query)
413 hmq->queries[i]->funcs->destroy_query(nvc0, hmq->queries[i]);
414 FREE(hmq);
415 }
416
417 static boolean
418 nvc0_hw_metric_begin_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
419 {
420 struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq);
421 boolean ret = false;
422 unsigned i;
423
424 for (i = 0; i < hmq->num_queries; i++) {
425 ret = hmq->queries[i]->funcs->begin_query(nvc0, hmq->queries[i]);
426 if (!ret)
427 return ret;
428 }
429 return ret;
430 }
431
432 static void
433 nvc0_hw_metric_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
434 {
435 struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq);
436 unsigned i;
437
438 for (i = 0; i < hmq->num_queries; i++)
439 hmq->queries[i]->funcs->end_query(nvc0, hmq->queries[i]);
440 }
441
442 static uint64_t
443 sm20_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
444 {
445 switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) {
446 case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY:
447 /* ((active_warps / active_cycles) / max. number of warps on a MP) * 100 */
448 if (res64[1])
449 return ((res64[0] / (double)res64[1]) / 48) * 100;
450 break;
451 case NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY:
452 /* (branch / (branch + divergent_branch)) * 100 */
453 if (res64[0] + res64[1])
454 return (res64[0] / (double)(res64[0] + res64[1])) * 100;
455 break;
456 case NVC0_HW_METRIC_QUERY_INST_PER_WRAP:
457 /* inst_executed / warps_launched */
458 if (res64[1])
459 return res64[0] / (double)res64[1];
460 break;
461 case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD:
462 /* (inst_issued - inst_executed) / inst_executed */
463 if (res64[1])
464 return (res64[0] - res64[1]) / (double)res64[1];
465 break;
466 case NVC0_HW_METRIC_QUERY_ISSUED_IPC:
467 /* inst_issued / active_cycles */
468 if (res64[1])
469 return res64[0] / (double)res64[1];
470 break;
471 case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION:
472 /* ((inst_issued / 2) / active_cycles) * 100 */
473 if (res64[1])
474 return ((res64[0] / 2) / (double)res64[1]) * 100;
475 break;
476 case NVC0_HW_METRIC_QUERY_IPC:
477 /* inst_executed / active_cycles */
478 if (res64[1])
479 return res64[0] / (double)res64[1];
480 break;
481 default:
482 debug_printf("invalid metric type: %d\n",
483 hq->base.type - NVC0_HW_METRIC_QUERY(0));
484 break;
485 }
486 return 0;
487 }
488
489 static uint64_t
490 sm21_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
491 {
492 switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) {
493 case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY:
494 return sm20_hw_metric_calc_result(hq, res64);
495 case NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY:
496 return sm20_hw_metric_calc_result(hq, res64);
497 case NVC0_HW_METRIC_QUERY_INST_ISSUED:
498 /* issued1_0 + issued1_1 + (issued2_0 + issued2_1) * 2 */
499 return res64[0] + res64[1] + (res64[2] + res64[3]) * 2;
500 break;
501 case NVC0_HW_METRIC_QUERY_INST_PER_WRAP:
502 return sm20_hw_metric_calc_result(hq, res64);
503 case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD:
504 /* (metric-inst_issued - inst_executed) / inst_executed */
505 if (res64[4])
506 return (((res64[0] + res64[1] + (res64[2] + res64[3]) * 2) -
507 res64[4]) / (double)res64[4]);
508 break;
509 case NVC0_HW_METRIC_QUERY_ISSUED_IPC:
510 /* metric-inst_issued / active_cycles */
511 if (res64[4])
512 return (res64[0] + res64[1] + (res64[2] + res64[3]) * 2) /
513 (double)res64[4];
514 break;
515 case NVC0_HW_METRIC_QUERY_ISSUE_SLOTS:
516 /* issued1_0 + issued1_1 + issued2_0 + issued2_1 */
517 return res64[0] + res64[1] + res64[2] + res64[3];
518 break;
519 case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION:
520 /* ((metric-issue_slots / 2) / active_cycles) * 100 */
521 if (res64[4])
522 return (((res64[0] + res64[1] + res64[2] + res64[3]) / 2) /
523 (double)res64[4]) * 100;
524 break;
525 case NVC0_HW_METRIC_QUERY_IPC:
526 return sm20_hw_metric_calc_result(hq, res64);
527 default:
528 debug_printf("invalid metric type: %d\n",
529 hq->base.type - NVC0_HW_METRIC_QUERY(0));
530 break;
531 }
532 return 0;
533 }
534
535 static uint64_t
536 sm30_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
537 {
538 switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) {
539 case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY:
540 /* ((active_warps / active_cycles) / max. number of warps on a MP) * 100 */
541 if (res64[1])
542 return ((res64[0] / (double)res64[1]) / 64) * 100;
543 break;
544 case NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY:
545 return sm20_hw_metric_calc_result(hq, res64);
546 case NVC0_HW_METRIC_QUERY_INST_ISSUED:
547 /* inst_issued1 + inst_issued2 * 2 */
548 return res64[0] + res64[1] * 2;
549 case NVC0_HW_METRIC_QUERY_INST_PER_WRAP:
550 return sm20_hw_metric_calc_result(hq, res64);
551 case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD:
552 /* (metric-inst_issued - inst_executed) / inst_executed */
553 if (res64[2])
554 return (((res64[0] + res64[1] * 2) - res64[2]) / (double)res64[2]);
555 break;
556 case NVC0_HW_METRIC_QUERY_ISSUED_IPC:
557 /* metric-inst_issued / active_cycles */
558 if (res64[2])
559 return (res64[0] + res64[1] * 2) / (double)res64[2];
560 break;
561 case NVC0_HW_METRIC_QUERY_ISSUE_SLOTS:
562 /* inst_issued1 + inst_issued2 */
563 return res64[0] + res64[1];
564 case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION:
565 /* ((metric-issue_slots / 2) / active_cycles) * 100 */
566 if (res64[2])
567 return (((res64[0] + res64[1]) / 2) / (double)res64[2]) * 100;
568 break;
569 case NVC0_HW_METRIC_QUERY_IPC:
570 return sm20_hw_metric_calc_result(hq, res64);
571 case NVC0_HW_METRIC_QUERY_SHARED_REPLAY_OVERHEAD:
572 /* (shared_load_replay + shared_store_replay) / inst_executed */
573 if (res64[2])
574 return (res64[0] + res64[1]) / (double)res64[2];
575 break;
576 default:
577 debug_printf("invalid metric type: %d\n",
578 hq->base.type - NVC0_HW_METRIC_QUERY(0));
579 break;
580 }
581 return 0;
582 }
583
584 static boolean
585 nvc0_hw_metric_get_query_result(struct nvc0_context *nvc0,
586 struct nvc0_hw_query *hq, boolean wait,
587 union pipe_query_result *result)
588 {
589 struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq);
590 struct nvc0_screen *screen = nvc0->screen;
591 struct nouveau_device *dev = screen->base.device;
592 union pipe_query_result results[8] = {};
593 uint64_t res64[8] = {};
594 uint64_t value = 0;
595 boolean ret = false;
596 unsigned i;
597
598 for (i = 0; i < hmq->num_queries; i++) {
599 ret = hmq->queries[i]->funcs->get_query_result(nvc0, hmq->queries[i],
600 wait, &results[i]);
601 if (!ret)
602 return ret;
603 res64[i] = *(uint64_t *)&results[i];
604 }
605
606 switch (screen->base.class_3d) {
607 case NVF0_3D_CLASS:
608 case NVE4_3D_CLASS:
609 value = sm30_hw_metric_calc_result(hq, res64);
610 break;
611 default:
612 if (dev->chipset == 0xc0 || dev->chipset == 0xc8)
613 value = sm20_hw_metric_calc_result(hq, res64);
614 else
615 value = sm21_hw_metric_calc_result(hq, res64);
616 break;
617 }
618
619 *(uint64_t *)result = value;
620 return ret;
621 }
622
623 static const struct nvc0_hw_query_funcs hw_metric_query_funcs = {
624 .destroy_query = nvc0_hw_metric_destroy_query,
625 .begin_query = nvc0_hw_metric_begin_query,
626 .end_query = nvc0_hw_metric_end_query,
627 .get_query_result = nvc0_hw_metric_get_query_result,
628 };
629
630 struct nvc0_hw_query *
631 nvc0_hw_metric_create_query(struct nvc0_context *nvc0, unsigned type)
632 {
633 const struct nvc0_hw_metric_query_cfg *cfg;
634 struct nvc0_hw_metric_query *hmq;
635 struct nvc0_hw_query *hq;
636 unsigned i;
637
638 if (type < NVC0_HW_METRIC_QUERY(0) || type > NVC0_HW_METRIC_QUERY_LAST)
639 return NULL;
640
641 hmq = CALLOC_STRUCT(nvc0_hw_metric_query);
642 if (!hmq)
643 return NULL;
644
645 hq = &hmq->base;
646 hq->funcs = &hw_metric_query_funcs;
647 hq->base.type = type;
648
649 cfg = nvc0_hw_metric_query_get_cfg(nvc0, hq);
650
651 for (i = 0; i < cfg->num_queries; i++) {
652 hmq->queries[i] = nvc0_hw_sm_create_query(nvc0, cfg->queries[i]);
653 if (!hmq->queries[i]) {
654 nvc0_hw_metric_destroy_query(nvc0, hq);
655 return NULL;
656 }
657 hmq->num_queries++;
658 }
659
660 return hq;
661 }
662
663 int
664 nvc0_hw_metric_get_driver_query_info(struct nvc0_screen *screen, unsigned id,
665 struct pipe_driver_query_info *info)
666 {
667 int count = 0;
668
669 if (screen->base.drm->version >= 0x01000101) {
670 if (screen->compute)
671 count = nvc0_hw_metric_get_num_queries(screen);
672 }
673
674 if (!info)
675 return count;
676
677 if (id < count) {
678 if (screen->compute) {
679 if (screen->base.class_3d <= NVF0_3D_CLASS) {
680 const struct nvc0_hw_metric_query_cfg **queries =
681 nvc0_hw_metric_get_queries(screen);
682 const struct nvc0_hw_metric_cfg *cfg =
683 nvc0_hw_metric_get_cfg(queries[id]->type);
684
685 info->name = cfg->name;
686 info->query_type = NVC0_HW_METRIC_QUERY(queries[id]->type);
687 info->type = cfg->type;
688 info->group_id = NVC0_HW_METRIC_QUERY_GROUP;
689 return 1;
690 }
691 }
692 }
693 return 0;
694 }