b961cbf652ed6f9c7fa45720c9415d14a72cb4bf
[mesa.git] / src / gallium / drivers / nouveau / nvc0 / nvc0_query_hw_metric.c
1 /*
2 * Copyright 2015 Samuel Pitoiset
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "nvc0/nvc0_context.h"
24 #include "nvc0/nvc0_query_hw_metric.h"
25 #include "nvc0/nvc0_query_hw_sm.h"
26
27 #define _Q(t,n) { NVC0_HW_METRIC_QUERY_##t, n }
28 struct {
29 unsigned type;
30 const char *name;
31 } nvc0_hw_metric_queries[] = {
32 _Q(ACHIEVED_OCCUPANCY, "metric-achieved_occupancy" ),
33 _Q(BRANCH_EFFICIENCY, "metric-branch_efficiency" ),
34 _Q(INST_ISSUED, "metric-inst_issued" ),
35 _Q(INST_PER_WRAP, "metric-inst_per_wrap" ),
36 _Q(INST_REPLAY_OVERHEAD, "metric-inst_replay_overhead" ),
37 _Q(ISSUED_IPC, "metric-issued_ipc" ),
38 _Q(ISSUE_SLOTS, "metric-issue_slots" ),
39 _Q(ISSUE_SLOT_UTILIZATION, "metric-issue_slot_utilization" ),
40 _Q(IPC, "metric-ipc" ),
41 _Q(SHARED_REPLAY_OVERHEAD, "metric-shared_replay_overhead" ),
42 };
43
44 #undef _Q
45
46 static inline const char *
47 nvc0_hw_metric_query_get_name(unsigned query_type)
48 {
49 unsigned i;
50
51 for (i = 0; i < ARRAY_SIZE(nvc0_hw_metric_queries); i++) {
52 if (nvc0_hw_metric_queries[i].type == query_type)
53 return nvc0_hw_metric_queries[i].name;
54 }
55 assert(0);
56 return NULL;
57 }
58
59 struct nvc0_hw_metric_query_cfg {
60 unsigned type;
61 uint32_t queries[8];
62 uint32_t num_queries;
63 };
64
65 #define _SM(n) NVC0_HW_SM_QUERY(NVC0_HW_SM_QUERY_ ##n)
66
67 /* ==== Compute capability 2.0 (GF100/GF110) ==== */
68 static const struct nvc0_hw_metric_query_cfg
69 sm20_achieved_occupancy =
70 {
71 .type = NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY,
72 .queries[0] = _SM(ACTIVE_WARPS),
73 .queries[1] = _SM(ACTIVE_CYCLES),
74 .num_queries = 2,
75 };
76
77 static const struct nvc0_hw_metric_query_cfg
78 sm20_branch_efficiency =
79 {
80 .type = NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY,
81 .queries[0] = _SM(BRANCH),
82 .queries[1] = _SM(DIVERGENT_BRANCH),
83 .num_queries = 2,
84 };
85
86 static const struct nvc0_hw_metric_query_cfg
87 sm20_inst_per_wrap =
88 {
89 .type = NVC0_HW_METRIC_QUERY_INST_PER_WRAP,
90 .queries[0] = _SM(INST_EXECUTED),
91 .queries[1] = _SM(WARPS_LAUNCHED),
92 .num_queries = 2,
93 };
94
95 static const struct nvc0_hw_metric_query_cfg
96 sm20_inst_replay_overhead =
97 {
98 .type = NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD,
99 .queries[0] = _SM(INST_ISSUED),
100 .queries[1] = _SM(INST_EXECUTED),
101 .num_queries = 2,
102 };
103
104 static const struct nvc0_hw_metric_query_cfg
105 sm20_issued_ipc =
106 {
107 .type = NVC0_HW_METRIC_QUERY_ISSUED_IPC,
108 .queries[0] = _SM(INST_ISSUED),
109 .queries[1] = _SM(ACTIVE_CYCLES),
110 .num_queries = 2,
111 };
112
113 static const struct nvc0_hw_metric_query_cfg
114 sm20_issue_slot_utilization =
115 {
116 .type = NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION,
117 .queries[0] = _SM(INST_ISSUED),
118 .queries[1] = _SM(ACTIVE_CYCLES),
119 .num_queries = 2,
120 };
121
122 static const struct nvc0_hw_metric_query_cfg
123 sm20_ipc =
124 {
125 .type = NVC0_HW_METRIC_QUERY_IPC,
126 .queries[0] = _SM(INST_EXECUTED),
127 .queries[1] = _SM(ACTIVE_CYCLES),
128 .num_queries = 2,
129 };
130
131 static const struct nvc0_hw_metric_query_cfg *sm20_hw_metric_queries[] =
132 {
133 &sm20_achieved_occupancy,
134 &sm20_branch_efficiency,
135 &sm20_inst_per_wrap,
136 &sm20_inst_replay_overhead,
137 &sm20_issued_ipc,
138 &sm20_issue_slot_utilization,
139 &sm20_ipc,
140 };
141
142 /* ==== Compute capability 2.1 (GF108+ except GF110) ==== */
143 static const struct nvc0_hw_metric_query_cfg
144 sm21_inst_issued =
145 {
146 .type = NVC0_HW_METRIC_QUERY_INST_ISSUED,
147 .queries[0] = _SM(INST_ISSUED1_0),
148 .queries[1] = _SM(INST_ISSUED1_1),
149 .queries[2] = _SM(INST_ISSUED2_0),
150 .queries[3] = _SM(INST_ISSUED2_1),
151 .num_queries = 4,
152 };
153
154 static const struct nvc0_hw_metric_query_cfg
155 sm21_inst_replay_overhead =
156 {
157 .type = NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD,
158 .queries[0] = _SM(INST_ISSUED1_0),
159 .queries[1] = _SM(INST_ISSUED1_1),
160 .queries[2] = _SM(INST_ISSUED2_0),
161 .queries[3] = _SM(INST_ISSUED2_1),
162 .queries[4] = _SM(INST_EXECUTED),
163 .num_queries = 5,
164 };
165
166 static const struct nvc0_hw_metric_query_cfg
167 sm21_issued_ipc =
168 {
169 .type = NVC0_HW_METRIC_QUERY_ISSUED_IPC,
170 .queries[0] = _SM(INST_ISSUED1_0),
171 .queries[1] = _SM(INST_ISSUED1_1),
172 .queries[2] = _SM(INST_ISSUED2_0),
173 .queries[3] = _SM(INST_ISSUED2_1),
174 .queries[4] = _SM(ACTIVE_CYCLES),
175 .num_queries = 5,
176 };
177
178 static const struct nvc0_hw_metric_query_cfg
179 sm21_issue_slot_utilization =
180 {
181 .type = NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION,
182 .queries[0] = _SM(INST_ISSUED1_0),
183 .queries[1] = _SM(INST_ISSUED1_1),
184 .queries[2] = _SM(INST_ISSUED2_0),
185 .queries[3] = _SM(INST_ISSUED2_1),
186 .queries[4] = _SM(ACTIVE_CYCLES),
187 .num_queries = 5,
188 };
189
190 static const struct nvc0_hw_metric_query_cfg *sm21_hw_metric_queries[] =
191 {
192 &sm20_achieved_occupancy,
193 &sm20_branch_efficiency,
194 &sm21_inst_issued,
195 &sm20_inst_per_wrap,
196 &sm21_inst_replay_overhead,
197 &sm21_issued_ipc,
198 &sm21_inst_issued,
199 &sm21_issue_slot_utilization,
200 &sm20_ipc,
201 };
202
203 /* ==== Compute capability 3.0 (GK104/GK106/GK107) ==== */
204 static const struct nvc0_hw_metric_query_cfg
205 sm30_achieved_occupancy =
206 {
207 .type = NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY,
208 .queries[0] = _SM(ACTIVE_WARPS),
209 .queries[1] = _SM(ACTIVE_CYCLES),
210 .num_queries = 2,
211 };
212
213 static const struct nvc0_hw_metric_query_cfg
214 sm30_branch_efficiency =
215 {
216 .type = NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY,
217 .queries[0] = _SM(BRANCH),
218 .queries[1] = _SM(DIVERGENT_BRANCH),
219 .num_queries = 2,
220 };
221
222 static const struct nvc0_hw_metric_query_cfg
223 sm30_inst_issued =
224 {
225 .type = NVC0_HW_METRIC_QUERY_INST_ISSUED,
226 .queries[0] = _SM(INST_ISSUED1),
227 .queries[1] = _SM(INST_ISSUED2),
228 .num_queries = 2,
229 };
230
231 static const struct nvc0_hw_metric_query_cfg
232 sm30_inst_per_wrap =
233 {
234 .type = NVC0_HW_METRIC_QUERY_INST_PER_WRAP,
235 .queries[0] = _SM(INST_EXECUTED),
236 .queries[1] = _SM(WARPS_LAUNCHED),
237 .num_queries = 2,
238 };
239
240 static const struct nvc0_hw_metric_query_cfg
241 sm30_inst_replay_overhead =
242 {
243 .type = NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD,
244 .queries[0] = _SM(INST_ISSUED1),
245 .queries[1] = _SM(INST_ISSUED2),
246 .queries[2] = _SM(INST_EXECUTED),
247 .num_queries = 3,
248 };
249
250 static const struct nvc0_hw_metric_query_cfg
251 sm30_issued_ipc =
252 {
253 .type = NVC0_HW_METRIC_QUERY_ISSUED_IPC,
254 .queries[0] = _SM(INST_ISSUED1),
255 .queries[1] = _SM(INST_ISSUED2),
256 .queries[2] = _SM(ACTIVE_CYCLES),
257 .num_queries = 3,
258 };
259
260 static const struct nvc0_hw_metric_query_cfg
261 sm30_issue_slot_utilization =
262 {
263 .type = NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION,
264 .queries[0] = _SM(INST_ISSUED1),
265 .queries[1] = _SM(INST_ISSUED2),
266 .queries[2] = _SM(ACTIVE_CYCLES),
267 .num_queries = 3,
268 };
269
270 static const struct nvc0_hw_metric_query_cfg
271 sm30_ipc =
272 {
273 .type = NVC0_HW_METRIC_QUERY_IPC,
274 .queries[0] = _SM(INST_EXECUTED),
275 .queries[1] = _SM(ACTIVE_CYCLES),
276 .num_queries = 2,
277 };
278
279 static const struct nvc0_hw_metric_query_cfg
280 sm30_shared_replay_overhead =
281 {
282 .type = NVC0_HW_METRIC_QUERY_SHARED_REPLAY_OVERHEAD,
283 .queries[0] = _SM(SHARED_LD_REPLAY),
284 .queries[1] = _SM(SHARED_ST_REPLAY),
285 .queries[2] = _SM(INST_EXECUTED),
286 .num_queries = 3,
287 };
288
289 static const struct nvc0_hw_metric_query_cfg *sm30_hw_metric_queries[] =
290 {
291 &sm30_achieved_occupancy,
292 &sm30_branch_efficiency,
293 &sm30_inst_issued,
294 &sm30_inst_per_wrap,
295 &sm30_inst_replay_overhead,
296 &sm30_issued_ipc,
297 &sm30_inst_issued,
298 &sm30_issue_slot_utilization,
299 &sm30_ipc,
300 &sm30_shared_replay_overhead,
301 };
302
303 /* ==== Compute capability 3.5 (GK110) ==== */
304 static const struct nvc0_hw_metric_query_cfg *sm35_hw_metric_queries[] =
305 {
306 &sm30_achieved_occupancy,
307 &sm30_inst_issued,
308 &sm30_inst_per_wrap,
309 &sm30_inst_replay_overhead,
310 &sm30_issued_ipc,
311 &sm30_inst_issued,
312 &sm30_issue_slot_utilization,
313 &sm30_ipc,
314 &sm30_shared_replay_overhead,
315 };
316
317 #undef _SM
318
319 static inline const struct nvc0_hw_metric_query_cfg **
320 nvc0_hw_metric_get_queries(struct nvc0_screen *screen)
321 {
322 struct nouveau_device *dev = screen->base.device;
323
324 switch (screen->base.class_3d) {
325 case NVF0_3D_CLASS:
326 return sm35_hw_metric_queries;
327 case NVE4_3D_CLASS:
328 return sm30_hw_metric_queries;
329 default:
330 if (dev->chipset == 0xc0 || dev->chipset == 0xc8)
331 return sm20_hw_metric_queries;
332 return sm21_hw_metric_queries;
333 }
334 assert(0);
335 return NULL;
336 }
337
338 unsigned
339 nvc0_hw_metric_get_num_queries(struct nvc0_screen *screen)
340 {
341 struct nouveau_device *dev = screen->base.device;
342
343 switch (screen->base.class_3d) {
344 case NVF0_3D_CLASS:
345 return ARRAY_SIZE(sm35_hw_metric_queries);
346 case NVE4_3D_CLASS:
347 return ARRAY_SIZE(sm30_hw_metric_queries);
348 default:
349 if (dev->chipset == 0xc0 || dev->chipset == 0xc8)
350 return ARRAY_SIZE(sm20_hw_metric_queries);
351 return ARRAY_SIZE(sm21_hw_metric_queries);
352 }
353 return 0;
354 }
355
356 static const struct nvc0_hw_metric_query_cfg *
357 nvc0_hw_metric_query_get_cfg(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
358 {
359 const struct nvc0_hw_metric_query_cfg **queries;
360 struct nvc0_screen *screen = nvc0->screen;
361 struct nvc0_query *q = &hq->base;
362 unsigned num_queries;
363 unsigned i;
364
365 num_queries = nvc0_hw_metric_get_num_queries(screen);
366 queries = nvc0_hw_metric_get_queries(screen);
367
368 for (i = 0; i < num_queries; i++) {
369 if (NVC0_HW_METRIC_QUERY(queries[i]->type) == q->type)
370 return queries[i];
371 }
372 assert(0);
373 return NULL;
374 }
375
376 static void
377 nvc0_hw_metric_destroy_query(struct nvc0_context *nvc0,
378 struct nvc0_hw_query *hq)
379 {
380 struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq);
381 unsigned i;
382
383 for (i = 0; i < hmq->num_queries; i++)
384 if (hmq->queries[i]->funcs->destroy_query)
385 hmq->queries[i]->funcs->destroy_query(nvc0, hmq->queries[i]);
386 FREE(hmq);
387 }
388
389 static boolean
390 nvc0_hw_metric_begin_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
391 {
392 struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq);
393 boolean ret = false;
394 unsigned i;
395
396 for (i = 0; i < hmq->num_queries; i++) {
397 ret = hmq->queries[i]->funcs->begin_query(nvc0, hmq->queries[i]);
398 if (!ret)
399 return ret;
400 }
401 return ret;
402 }
403
404 static void
405 nvc0_hw_metric_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
406 {
407 struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq);
408 unsigned i;
409
410 for (i = 0; i < hmq->num_queries; i++)
411 hmq->queries[i]->funcs->end_query(nvc0, hmq->queries[i]);
412 }
413
414 static uint64_t
415 sm20_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
416 {
417 switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) {
418 case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY:
419 /* (active_warps / active_cycles) / max. number of warps on a MP */
420 if (res64[1])
421 return (res64[0] / (double)res64[1]) / 48;
422 break;
423 case NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY:
424 /* (branch / (branch + divergent_branch)) * 100 */
425 if (res64[0] + res64[1])
426 return (res64[0] / (double)(res64[0] + res64[1])) * 100;
427 break;
428 case NVC0_HW_METRIC_QUERY_INST_PER_WRAP:
429 /* inst_executed / warps_launched */
430 if (res64[1])
431 return res64[0] / (double)res64[1];
432 break;
433 case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD:
434 /* (inst_issued - inst_executed) / inst_executed */
435 if (res64[1])
436 return (res64[0] - res64[1]) / (double)res64[1];
437 break;
438 case NVC0_HW_METRIC_QUERY_ISSUED_IPC:
439 /* inst_issued / active_cycles */
440 if (res64[1])
441 return res64[0] / (double)res64[1];
442 break;
443 case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION:
444 /* ((inst_issued / 2) / active_cycles) * 100 */
445 if (res64[1])
446 return ((res64[0] / 2) / (double)res64[1]) * 100;
447 break;
448 case NVC0_HW_METRIC_QUERY_IPC:
449 /* inst_executed / active_cycles */
450 if (res64[1])
451 return res64[0] / (double)res64[1];
452 break;
453 default:
454 debug_printf("invalid metric type: %d\n",
455 hq->base.type - NVC0_HW_METRIC_QUERY(0));
456 break;
457 }
458 return 0;
459 }
460
461 static uint64_t
462 sm21_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
463 {
464 switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) {
465 case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY:
466 return sm20_hw_metric_calc_result(hq, res64);
467 case NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY:
468 return sm20_hw_metric_calc_result(hq, res64);
469 case NVC0_HW_METRIC_QUERY_INST_ISSUED:
470 /* issued1_0 + issued1_1 + (issued2_0 + issued2_1) * 2 */
471 return res64[0] + res64[1] + (res64[2] + res64[3]) * 2;
472 break;
473 case NVC0_HW_METRIC_QUERY_INST_PER_WRAP:
474 return sm20_hw_metric_calc_result(hq, res64);
475 case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD:
476 /* (metric-inst_issued - inst_executed) / inst_executed */
477 if (res64[4])
478 return (((res64[0] + res64[1] + (res64[2] + res64[3]) * 2) -
479 res64[4]) / (double)res64[4]);
480 break;
481 case NVC0_HW_METRIC_QUERY_ISSUED_IPC:
482 /* metric-inst_issued / active_cycles */
483 if (res64[4])
484 return (res64[0] + res64[1] + (res64[2] + res64[3]) * 2) /
485 (double)res64[4];
486 break;
487 case NVC0_HW_METRIC_QUERY_ISSUE_SLOTS:
488 /* issued1_0 + issued1_1 + issued2_0 + issued2_1 */
489 return res64[0] + res64[1] + res64[2] + res64[3];
490 break;
491 case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION:
492 /* ((metric-issue_slots / 2) / active_cycles) * 100 */
493 if (res64[4])
494 return (((res64[0] + res64[1] + res64[2] + res64[3]) / 2) /
495 (double)res64[4]) * 100;
496 break;
497 case NVC0_HW_METRIC_QUERY_IPC:
498 return sm20_hw_metric_calc_result(hq, res64);
499 default:
500 debug_printf("invalid metric type: %d\n",
501 hq->base.type - NVC0_HW_METRIC_QUERY(0));
502 break;
503 }
504 return 0;
505 }
506
507 static uint64_t
508 sm30_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
509 {
510 switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) {
511 case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY:
512 /* (active_warps / active_cycles) / max. number of warps on a MP */
513 if (res64[1])
514 return (res64[0] / (double)res64[1]) / 64;
515 break;
516 case NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY:
517 return sm20_hw_metric_calc_result(hq, res64);
518 case NVC0_HW_METRIC_QUERY_INST_ISSUED:
519 /* inst_issued1 + inst_issued2 * 2 */
520 return res64[0] + res64[1] * 2;
521 case NVC0_HW_METRIC_QUERY_INST_PER_WRAP:
522 return sm20_hw_metric_calc_result(hq, res64);
523 case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD:
524 /* (metric-inst_issued - inst_executed) / inst_executed */
525 if (res64[2])
526 return (((res64[0] + res64[1] * 2) - res64[2]) / (double)res64[2]);
527 break;
528 case NVC0_HW_METRIC_QUERY_ISSUED_IPC:
529 /* metric-inst_issued / active_cycles */
530 if (res64[2])
531 return (res64[0] + res64[1] * 2) / (double)res64[2];
532 break;
533 case NVC0_HW_METRIC_QUERY_ISSUE_SLOTS:
534 /* inst_issued1 + inst_issued2 */
535 return res64[0] + res64[1];
536 case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION:
537 /* ((metric-issue_slots / 2) / active_cycles) * 100 */
538 if (res64[2])
539 return (((res64[0] + res64[1]) / 2) / (double)res64[2]) * 100;
540 break;
541 case NVC0_HW_METRIC_QUERY_IPC:
542 return sm20_hw_metric_calc_result(hq, res64);
543 case NVC0_HW_METRIC_QUERY_SHARED_REPLAY_OVERHEAD:
544 /* (shared_load_replay + shared_store_replay) / inst_executed */
545 if (res64[2])
546 return (res64[0] + res64[1]) / (double)res64[2];
547 break;
548 default:
549 debug_printf("invalid metric type: %d\n",
550 hq->base.type - NVC0_HW_METRIC_QUERY(0));
551 break;
552 }
553 return 0;
554 }
555
556 static boolean
557 nvc0_hw_metric_get_query_result(struct nvc0_context *nvc0,
558 struct nvc0_hw_query *hq, boolean wait,
559 union pipe_query_result *result)
560 {
561 struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq);
562 struct nvc0_screen *screen = nvc0->screen;
563 struct nouveau_device *dev = screen->base.device;
564 union pipe_query_result results[8] = {};
565 uint64_t res64[8] = {};
566 uint64_t value = 0;
567 boolean ret = false;
568 unsigned i;
569
570 for (i = 0; i < hmq->num_queries; i++) {
571 ret = hmq->queries[i]->funcs->get_query_result(nvc0, hmq->queries[i],
572 wait, &results[i]);
573 if (!ret)
574 return ret;
575 res64[i] = *(uint64_t *)&results[i];
576 }
577
578 switch (screen->base.class_3d) {
579 case NVF0_3D_CLASS:
580 case NVE4_3D_CLASS:
581 value = sm30_hw_metric_calc_result(hq, res64);
582 break;
583 default:
584 if (dev->chipset == 0xc0 || dev->chipset == 0xc8)
585 value = sm20_hw_metric_calc_result(hq, res64);
586 else
587 value = sm21_hw_metric_calc_result(hq, res64);
588 break;
589 }
590
591 *(uint64_t *)result = value;
592 return ret;
593 }
594
595 static const struct nvc0_hw_query_funcs hw_metric_query_funcs = {
596 .destroy_query = nvc0_hw_metric_destroy_query,
597 .begin_query = nvc0_hw_metric_begin_query,
598 .end_query = nvc0_hw_metric_end_query,
599 .get_query_result = nvc0_hw_metric_get_query_result,
600 };
601
602 struct nvc0_hw_query *
603 nvc0_hw_metric_create_query(struct nvc0_context *nvc0, unsigned type)
604 {
605 const struct nvc0_hw_metric_query_cfg *cfg;
606 struct nvc0_hw_metric_query *hmq;
607 struct nvc0_hw_query *hq;
608 unsigned i;
609
610 if (type < NVC0_HW_METRIC_QUERY(0) || type > NVC0_HW_METRIC_QUERY_LAST)
611 return NULL;
612
613 hmq = CALLOC_STRUCT(nvc0_hw_metric_query);
614 if (!hmq)
615 return NULL;
616
617 hq = &hmq->base;
618 hq->funcs = &hw_metric_query_funcs;
619 hq->base.type = type;
620
621 cfg = nvc0_hw_metric_query_get_cfg(nvc0, hq);
622
623 for (i = 0; i < cfg->num_queries; i++) {
624 hmq->queries[i] = nvc0_hw_sm_create_query(nvc0, cfg->queries[i]);
625 if (!hmq->queries[i]) {
626 nvc0_hw_metric_destroy_query(nvc0, hq);
627 return NULL;
628 }
629 hmq->num_queries++;
630 }
631
632 return hq;
633 }
634
635 int
636 nvc0_hw_metric_get_driver_query_info(struct nvc0_screen *screen, unsigned id,
637 struct pipe_driver_query_info *info)
638 {
639 int count = 0;
640
641 if (screen->base.drm->version >= 0x01000101) {
642 if (screen->compute)
643 count = nvc0_hw_metric_get_num_queries(screen);
644 }
645
646 if (!info)
647 return count;
648
649 if (id < count) {
650 if (screen->compute) {
651 if (screen->base.class_3d <= NVF0_3D_CLASS) {
652 const struct nvc0_hw_metric_query_cfg **queries =
653 nvc0_hw_metric_get_queries(screen);
654
655 info->name = nvc0_hw_metric_query_get_name(queries[id]->type);
656 info->query_type = NVC0_HW_METRIC_QUERY(queries[id]->type);
657 info->group_id = NVC0_HW_METRIC_QUERY_GROUP;
658 return 1;
659 }
660 }
661 }
662 return 0;
663 }