gm107/ir: allow indirect inputs to be loaded by frag shader
[mesa.git] / src / gallium / drivers / nouveau / nvc0 / nvc0_query_hw_metric.c
1 /*
2 * Copyright 2015 Samuel Pitoiset
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "nvc0/nvc0_context.h"
24 #include "nvc0/nvc0_query_hw_metric.h"
25 #include "nvc0/nvc0_query_hw_sm.h"
26
27 #define _Q(i,n,t,d) { NVC0_HW_METRIC_QUERY_##i, n, PIPE_DRIVER_QUERY_TYPE_##t, d }
28 static const struct nvc0_hw_metric_cfg {
29 unsigned id;
30 const char *name;
31 enum pipe_driver_query_type type;
32 const char *desc;
33 } nvc0_hw_metric_queries[] = {
34 _Q(ACHIEVED_OCCUPANCY,
35 "metric-achieved_occupancy",
36 PERCENTAGE,
37 "Ratio of the average active warps per active cycle to the maximum number "
38 "of warps supported on a multiprocessor"),
39
40 _Q(BRANCH_EFFICIENCY,
41 "metric-branch_efficiency",
42 PERCENTAGE,
43 "Ratio of non-divergent branches to total branches"),
44
45 _Q(INST_ISSUED,
46 "metric-inst_issued",
47 UINT64,
48 "The number of instructions issued"),
49
50 _Q(INST_PER_WRAP,
51 "metric-inst_per_wrap",
52 UINT64,
53 "Average number of instructions executed by each warp"),
54
55 _Q(INST_REPLAY_OVERHEAD,
56 "metric-inst_replay_overhead",
57 UINT64,
58 "Average number of replays for each instruction executed"),
59
60 _Q(ISSUED_IPC,
61 "metric-issued_ipc",
62 UINT64,
63 "Instructions issued per cycle"),
64
65 _Q(ISSUE_SLOTS,
66 "metric-issue_slots",
67 UINT64,
68 "The number of issue slots used"),
69
70 _Q(ISSUE_SLOT_UTILIZATION,
71 "metric-issue_slot_utilization",
72 PERCENTAGE,
73 "Percentage of issue slots that issued at least one instruction, averaged "
74 "across all cycles"),
75
76 _Q(IPC,
77 "metric-ipc",
78 UINT64,
79 "Instructions executed per cycle"),
80
81 _Q(SHARED_REPLAY_OVERHEAD,
82 "metric-shared_replay_overhead",
83 UINT64,
84 "Average number of replays due to shared memory conflicts for each "
85 "instruction executed"),
86 };
87
88 #undef _Q
89
90 static inline const struct nvc0_hw_metric_cfg *
91 nvc0_hw_metric_get_cfg(unsigned metric_id)
92 {
93 unsigned i;
94
95 for (i = 0; i < ARRAY_SIZE(nvc0_hw_metric_queries); i++) {
96 if (nvc0_hw_metric_queries[i].id == metric_id)
97 return &nvc0_hw_metric_queries[i];
98 }
99 assert(0);
100 return NULL;
101 }
102
103 struct nvc0_hw_metric_query_cfg {
104 unsigned type;
105 uint32_t queries[8];
106 uint32_t num_queries;
107 };
108
109 #define _SM(n) NVC0_HW_SM_QUERY(NVC0_HW_SM_QUERY_ ##n)
110
111 /* ==== Compute capability 2.0 (GF100/GF110) ==== */
112 static const struct nvc0_hw_metric_query_cfg
113 sm20_achieved_occupancy =
114 {
115 .type = NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY,
116 .queries[0] = _SM(ACTIVE_WARPS),
117 .queries[1] = _SM(ACTIVE_CYCLES),
118 .num_queries = 2,
119 };
120
121 static const struct nvc0_hw_metric_query_cfg
122 sm20_branch_efficiency =
123 {
124 .type = NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY,
125 .queries[0] = _SM(BRANCH),
126 .queries[1] = _SM(DIVERGENT_BRANCH),
127 .num_queries = 2,
128 };
129
130 static const struct nvc0_hw_metric_query_cfg
131 sm20_inst_per_wrap =
132 {
133 .type = NVC0_HW_METRIC_QUERY_INST_PER_WRAP,
134 .queries[0] = _SM(INST_EXECUTED),
135 .queries[1] = _SM(WARPS_LAUNCHED),
136 .num_queries = 2,
137 };
138
139 static const struct nvc0_hw_metric_query_cfg
140 sm20_inst_replay_overhead =
141 {
142 .type = NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD,
143 .queries[0] = _SM(INST_ISSUED),
144 .queries[1] = _SM(INST_EXECUTED),
145 .num_queries = 2,
146 };
147
148 static const struct nvc0_hw_metric_query_cfg
149 sm20_issued_ipc =
150 {
151 .type = NVC0_HW_METRIC_QUERY_ISSUED_IPC,
152 .queries[0] = _SM(INST_ISSUED),
153 .queries[1] = _SM(ACTIVE_CYCLES),
154 .num_queries = 2,
155 };
156
157 static const struct nvc0_hw_metric_query_cfg
158 sm20_issue_slot_utilization =
159 {
160 .type = NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION,
161 .queries[0] = _SM(INST_ISSUED),
162 .queries[1] = _SM(ACTIVE_CYCLES),
163 .num_queries = 2,
164 };
165
166 static const struct nvc0_hw_metric_query_cfg
167 sm20_ipc =
168 {
169 .type = NVC0_HW_METRIC_QUERY_IPC,
170 .queries[0] = _SM(INST_EXECUTED),
171 .queries[1] = _SM(ACTIVE_CYCLES),
172 .num_queries = 2,
173 };
174
175 static const struct nvc0_hw_metric_query_cfg *sm20_hw_metric_queries[] =
176 {
177 &sm20_achieved_occupancy,
178 &sm20_branch_efficiency,
179 &sm20_inst_per_wrap,
180 &sm20_inst_replay_overhead,
181 &sm20_issued_ipc,
182 &sm20_issue_slot_utilization,
183 &sm20_ipc,
184 };
185
186 /* ==== Compute capability 2.1 (GF108+ except GF110) ==== */
187 static const struct nvc0_hw_metric_query_cfg
188 sm21_inst_issued =
189 {
190 .type = NVC0_HW_METRIC_QUERY_INST_ISSUED,
191 .queries[0] = _SM(INST_ISSUED1_0),
192 .queries[1] = _SM(INST_ISSUED1_1),
193 .queries[2] = _SM(INST_ISSUED2_0),
194 .queries[3] = _SM(INST_ISSUED2_1),
195 .num_queries = 4,
196 };
197
198 static const struct nvc0_hw_metric_query_cfg
199 sm21_inst_replay_overhead =
200 {
201 .type = NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD,
202 .queries[0] = _SM(INST_ISSUED1_0),
203 .queries[1] = _SM(INST_ISSUED1_1),
204 .queries[2] = _SM(INST_ISSUED2_0),
205 .queries[3] = _SM(INST_ISSUED2_1),
206 .queries[4] = _SM(INST_EXECUTED),
207 .num_queries = 5,
208 };
209
210 static const struct nvc0_hw_metric_query_cfg
211 sm21_issued_ipc =
212 {
213 .type = NVC0_HW_METRIC_QUERY_ISSUED_IPC,
214 .queries[0] = _SM(INST_ISSUED1_0),
215 .queries[1] = _SM(INST_ISSUED1_1),
216 .queries[2] = _SM(INST_ISSUED2_0),
217 .queries[3] = _SM(INST_ISSUED2_1),
218 .queries[4] = _SM(ACTIVE_CYCLES),
219 .num_queries = 5,
220 };
221
222 static const struct nvc0_hw_metric_query_cfg
223 sm21_issue_slots =
224 {
225 .type = NVC0_HW_METRIC_QUERY_ISSUE_SLOTS,
226 .queries[0] = _SM(INST_ISSUED1_0),
227 .queries[1] = _SM(INST_ISSUED1_1),
228 .queries[2] = _SM(INST_ISSUED2_0),
229 .queries[3] = _SM(INST_ISSUED2_1),
230 .num_queries = 4,
231 };
232
233 static const struct nvc0_hw_metric_query_cfg
234 sm21_issue_slot_utilization =
235 {
236 .type = NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION,
237 .queries[0] = _SM(INST_ISSUED1_0),
238 .queries[1] = _SM(INST_ISSUED1_1),
239 .queries[2] = _SM(INST_ISSUED2_0),
240 .queries[3] = _SM(INST_ISSUED2_1),
241 .queries[4] = _SM(ACTIVE_CYCLES),
242 .num_queries = 5,
243 };
244
245 static const struct nvc0_hw_metric_query_cfg *sm21_hw_metric_queries[] =
246 {
247 &sm20_achieved_occupancy,
248 &sm20_branch_efficiency,
249 &sm21_inst_issued,
250 &sm20_inst_per_wrap,
251 &sm21_inst_replay_overhead,
252 &sm21_issued_ipc,
253 &sm21_issue_slots,
254 &sm21_issue_slot_utilization,
255 &sm20_ipc,
256 };
257
258 /* ==== Compute capability 3.0 (GK104/GK106/GK107) ==== */
259 static const struct nvc0_hw_metric_query_cfg
260 sm30_achieved_occupancy =
261 {
262 .type = NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY,
263 .queries[0] = _SM(ACTIVE_WARPS),
264 .queries[1] = _SM(ACTIVE_CYCLES),
265 .num_queries = 2,
266 };
267
268 static const struct nvc0_hw_metric_query_cfg
269 sm30_branch_efficiency =
270 {
271 .type = NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY,
272 .queries[0] = _SM(BRANCH),
273 .queries[1] = _SM(DIVERGENT_BRANCH),
274 .num_queries = 2,
275 };
276
277 static const struct nvc0_hw_metric_query_cfg
278 sm30_inst_issued =
279 {
280 .type = NVC0_HW_METRIC_QUERY_INST_ISSUED,
281 .queries[0] = _SM(INST_ISSUED1),
282 .queries[1] = _SM(INST_ISSUED2),
283 .num_queries = 2,
284 };
285
286 static const struct nvc0_hw_metric_query_cfg
287 sm30_inst_per_wrap =
288 {
289 .type = NVC0_HW_METRIC_QUERY_INST_PER_WRAP,
290 .queries[0] = _SM(INST_EXECUTED),
291 .queries[1] = _SM(WARPS_LAUNCHED),
292 .num_queries = 2,
293 };
294
295 static const struct nvc0_hw_metric_query_cfg
296 sm30_inst_replay_overhead =
297 {
298 .type = NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD,
299 .queries[0] = _SM(INST_ISSUED1),
300 .queries[1] = _SM(INST_ISSUED2),
301 .queries[2] = _SM(INST_EXECUTED),
302 .num_queries = 3,
303 };
304
305 static const struct nvc0_hw_metric_query_cfg
306 sm30_issued_ipc =
307 {
308 .type = NVC0_HW_METRIC_QUERY_ISSUED_IPC,
309 .queries[0] = _SM(INST_ISSUED1),
310 .queries[1] = _SM(INST_ISSUED2),
311 .queries[2] = _SM(ACTIVE_CYCLES),
312 .num_queries = 3,
313 };
314
315 static const struct nvc0_hw_metric_query_cfg
316 sm30_issue_slots =
317 {
318 .type = NVC0_HW_METRIC_QUERY_ISSUE_SLOTS,
319 .queries[0] = _SM(INST_ISSUED1),
320 .queries[1] = _SM(INST_ISSUED2),
321 .num_queries = 2,
322 };
323
324 static const struct nvc0_hw_metric_query_cfg
325 sm30_issue_slot_utilization =
326 {
327 .type = NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION,
328 .queries[0] = _SM(INST_ISSUED1),
329 .queries[1] = _SM(INST_ISSUED2),
330 .queries[2] = _SM(ACTIVE_CYCLES),
331 .num_queries = 3,
332 };
333
334 static const struct nvc0_hw_metric_query_cfg
335 sm30_ipc =
336 {
337 .type = NVC0_HW_METRIC_QUERY_IPC,
338 .queries[0] = _SM(INST_EXECUTED),
339 .queries[1] = _SM(ACTIVE_CYCLES),
340 .num_queries = 2,
341 };
342
343 static const struct nvc0_hw_metric_query_cfg
344 sm30_shared_replay_overhead =
345 {
346 .type = NVC0_HW_METRIC_QUERY_SHARED_REPLAY_OVERHEAD,
347 .queries[0] = _SM(SHARED_LD_REPLAY),
348 .queries[1] = _SM(SHARED_ST_REPLAY),
349 .queries[2] = _SM(INST_EXECUTED),
350 .num_queries = 3,
351 };
352
353 static const struct nvc0_hw_metric_query_cfg *sm30_hw_metric_queries[] =
354 {
355 &sm30_achieved_occupancy,
356 &sm30_branch_efficiency,
357 &sm30_inst_issued,
358 &sm30_inst_per_wrap,
359 &sm30_inst_replay_overhead,
360 &sm30_issued_ipc,
361 &sm30_issue_slots,
362 &sm30_issue_slot_utilization,
363 &sm30_ipc,
364 &sm30_shared_replay_overhead,
365 };
366
367 /* ==== Compute capability 3.5 (GK110) ==== */
368 static const struct nvc0_hw_metric_query_cfg *sm35_hw_metric_queries[] =
369 {
370 &sm30_achieved_occupancy,
371 &sm30_inst_issued,
372 &sm30_inst_per_wrap,
373 &sm30_inst_replay_overhead,
374 &sm30_issued_ipc,
375 &sm30_inst_issued,
376 &sm30_issue_slot_utilization,
377 &sm30_ipc,
378 &sm30_shared_replay_overhead,
379 };
380
381 #undef _SM
382
383 static inline const struct nvc0_hw_metric_query_cfg **
384 nvc0_hw_metric_get_queries(struct nvc0_screen *screen)
385 {
386 struct nouveau_device *dev = screen->base.device;
387
388 switch (screen->base.class_3d) {
389 case NVF0_3D_CLASS:
390 return sm35_hw_metric_queries;
391 case NVE4_3D_CLASS:
392 return sm30_hw_metric_queries;
393 default:
394 if (dev->chipset == 0xc0 || dev->chipset == 0xc8)
395 return sm20_hw_metric_queries;
396 return sm21_hw_metric_queries;
397 }
398 assert(0);
399 return NULL;
400 }
401
402 unsigned
403 nvc0_hw_metric_get_num_queries(struct nvc0_screen *screen)
404 {
405 struct nouveau_device *dev = screen->base.device;
406
407 switch (screen->base.class_3d) {
408 case NVF0_3D_CLASS:
409 return ARRAY_SIZE(sm35_hw_metric_queries);
410 case NVE4_3D_CLASS:
411 return ARRAY_SIZE(sm30_hw_metric_queries);
412 default:
413 if (dev->chipset == 0xc0 || dev->chipset == 0xc8)
414 return ARRAY_SIZE(sm20_hw_metric_queries);
415 return ARRAY_SIZE(sm21_hw_metric_queries);
416 }
417 return 0;
418 }
419
420 static const struct nvc0_hw_metric_query_cfg *
421 nvc0_hw_metric_query_get_cfg(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
422 {
423 const struct nvc0_hw_metric_query_cfg **queries;
424 struct nvc0_screen *screen = nvc0->screen;
425 struct nvc0_query *q = &hq->base;
426 unsigned num_queries;
427 unsigned i;
428
429 num_queries = nvc0_hw_metric_get_num_queries(screen);
430 queries = nvc0_hw_metric_get_queries(screen);
431
432 for (i = 0; i < num_queries; i++) {
433 if (NVC0_HW_METRIC_QUERY(queries[i]->type) == q->type)
434 return queries[i];
435 }
436 assert(0);
437 return NULL;
438 }
439
440 static void
441 nvc0_hw_metric_destroy_query(struct nvc0_context *nvc0,
442 struct nvc0_hw_query *hq)
443 {
444 struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq);
445 unsigned i;
446
447 for (i = 0; i < hmq->num_queries; i++)
448 if (hmq->queries[i]->funcs->destroy_query)
449 hmq->queries[i]->funcs->destroy_query(nvc0, hmq->queries[i]);
450 FREE(hmq);
451 }
452
453 static boolean
454 nvc0_hw_metric_begin_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
455 {
456 struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq);
457 boolean ret = false;
458 unsigned i;
459
460 for (i = 0; i < hmq->num_queries; i++) {
461 ret = hmq->queries[i]->funcs->begin_query(nvc0, hmq->queries[i]);
462 if (!ret)
463 return ret;
464 }
465 return ret;
466 }
467
468 static void
469 nvc0_hw_metric_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
470 {
471 struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq);
472 unsigned i;
473
474 for (i = 0; i < hmq->num_queries; i++)
475 hmq->queries[i]->funcs->end_query(nvc0, hmq->queries[i]);
476 }
477
478 static uint64_t
479 sm20_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
480 {
481 switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) {
482 case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY:
483 /* ((active_warps / active_cycles) / max. number of warps on a MP) * 100 */
484 if (res64[1])
485 return ((res64[0] / (double)res64[1]) / 48) * 100;
486 break;
487 case NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY:
488 /* (branch / (branch + divergent_branch)) * 100 */
489 if (res64[0] + res64[1])
490 return (res64[0] / (double)(res64[0] + res64[1])) * 100;
491 break;
492 case NVC0_HW_METRIC_QUERY_INST_PER_WRAP:
493 /* inst_executed / warps_launched */
494 if (res64[1])
495 return res64[0] / (double)res64[1];
496 break;
497 case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD:
498 /* (inst_issued - inst_executed) / inst_executed */
499 if (res64[1])
500 return (res64[0] - res64[1]) / (double)res64[1];
501 break;
502 case NVC0_HW_METRIC_QUERY_ISSUED_IPC:
503 /* inst_issued / active_cycles */
504 if (res64[1])
505 return res64[0] / (double)res64[1];
506 break;
507 case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION:
508 /* ((inst_issued / 2) / active_cycles) * 100 */
509 if (res64[1])
510 return ((res64[0] / 2) / (double)res64[1]) * 100;
511 break;
512 case NVC0_HW_METRIC_QUERY_IPC:
513 /* inst_executed / active_cycles */
514 if (res64[1])
515 return res64[0] / (double)res64[1];
516 break;
517 default:
518 debug_printf("invalid metric type: %d\n",
519 hq->base.type - NVC0_HW_METRIC_QUERY(0));
520 break;
521 }
522 return 0;
523 }
524
525 static uint64_t
526 sm21_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
527 {
528 switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) {
529 case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY:
530 return sm20_hw_metric_calc_result(hq, res64);
531 case NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY:
532 return sm20_hw_metric_calc_result(hq, res64);
533 case NVC0_HW_METRIC_QUERY_INST_ISSUED:
534 /* issued1_0 + issued1_1 + (issued2_0 + issued2_1) * 2 */
535 return res64[0] + res64[1] + (res64[2] + res64[3]) * 2;
536 break;
537 case NVC0_HW_METRIC_QUERY_INST_PER_WRAP:
538 return sm20_hw_metric_calc_result(hq, res64);
539 case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD:
540 /* (metric-inst_issued - inst_executed) / inst_executed */
541 if (res64[4])
542 return (((res64[0] + res64[1] + (res64[2] + res64[3]) * 2) -
543 res64[4]) / (double)res64[4]);
544 break;
545 case NVC0_HW_METRIC_QUERY_ISSUED_IPC:
546 /* metric-inst_issued / active_cycles */
547 if (res64[4])
548 return (res64[0] + res64[1] + (res64[2] + res64[3]) * 2) /
549 (double)res64[4];
550 break;
551 case NVC0_HW_METRIC_QUERY_ISSUE_SLOTS:
552 /* issued1_0 + issued1_1 + issued2_0 + issued2_1 */
553 return res64[0] + res64[1] + res64[2] + res64[3];
554 break;
555 case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION:
556 /* ((metric-issue_slots / 2) / active_cycles) * 100 */
557 if (res64[4])
558 return (((res64[0] + res64[1] + res64[2] + res64[3]) / 2) /
559 (double)res64[4]) * 100;
560 break;
561 case NVC0_HW_METRIC_QUERY_IPC:
562 return sm20_hw_metric_calc_result(hq, res64);
563 default:
564 debug_printf("invalid metric type: %d\n",
565 hq->base.type - NVC0_HW_METRIC_QUERY(0));
566 break;
567 }
568 return 0;
569 }
570
571 static uint64_t
572 sm30_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
573 {
574 switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) {
575 case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY:
576 /* ((active_warps / active_cycles) / max. number of warps on a MP) * 100 */
577 if (res64[1])
578 return ((res64[0] / (double)res64[1]) / 64) * 100;
579 break;
580 case NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY:
581 return sm20_hw_metric_calc_result(hq, res64);
582 case NVC0_HW_METRIC_QUERY_INST_ISSUED:
583 /* inst_issued1 + inst_issued2 * 2 */
584 return res64[0] + res64[1] * 2;
585 case NVC0_HW_METRIC_QUERY_INST_PER_WRAP:
586 return sm20_hw_metric_calc_result(hq, res64);
587 case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD:
588 /* (metric-inst_issued - inst_executed) / inst_executed */
589 if (res64[2])
590 return (((res64[0] + res64[1] * 2) - res64[2]) / (double)res64[2]);
591 break;
592 case NVC0_HW_METRIC_QUERY_ISSUED_IPC:
593 /* metric-inst_issued / active_cycles */
594 if (res64[2])
595 return (res64[0] + res64[1] * 2) / (double)res64[2];
596 break;
597 case NVC0_HW_METRIC_QUERY_ISSUE_SLOTS:
598 /* inst_issued1 + inst_issued2 */
599 return res64[0] + res64[1];
600 case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION:
601 /* ((metric-issue_slots / 2) / active_cycles) * 100 */
602 if (res64[2])
603 return (((res64[0] + res64[1]) / 2) / (double)res64[2]) * 100;
604 break;
605 case NVC0_HW_METRIC_QUERY_IPC:
606 return sm20_hw_metric_calc_result(hq, res64);
607 case NVC0_HW_METRIC_QUERY_SHARED_REPLAY_OVERHEAD:
608 /* (shared_load_replay + shared_store_replay) / inst_executed */
609 if (res64[2])
610 return (res64[0] + res64[1]) / (double)res64[2];
611 break;
612 default:
613 debug_printf("invalid metric type: %d\n",
614 hq->base.type - NVC0_HW_METRIC_QUERY(0));
615 break;
616 }
617 return 0;
618 }
619
620 static boolean
621 nvc0_hw_metric_get_query_result(struct nvc0_context *nvc0,
622 struct nvc0_hw_query *hq, boolean wait,
623 union pipe_query_result *result)
624 {
625 struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq);
626 struct nvc0_screen *screen = nvc0->screen;
627 struct nouveau_device *dev = screen->base.device;
628 union pipe_query_result results[8] = {};
629 uint64_t res64[8] = {};
630 uint64_t value = 0;
631 boolean ret = false;
632 unsigned i;
633
634 for (i = 0; i < hmq->num_queries; i++) {
635 ret = hmq->queries[i]->funcs->get_query_result(nvc0, hmq->queries[i],
636 wait, &results[i]);
637 if (!ret)
638 return ret;
639 res64[i] = *(uint64_t *)&results[i];
640 }
641
642 switch (screen->base.class_3d) {
643 case NVF0_3D_CLASS:
644 case NVE4_3D_CLASS:
645 value = sm30_hw_metric_calc_result(hq, res64);
646 break;
647 default:
648 if (dev->chipset == 0xc0 || dev->chipset == 0xc8)
649 value = sm20_hw_metric_calc_result(hq, res64);
650 else
651 value = sm21_hw_metric_calc_result(hq, res64);
652 break;
653 }
654
655 *(uint64_t *)result = value;
656 return ret;
657 }
658
659 static const struct nvc0_hw_query_funcs hw_metric_query_funcs = {
660 .destroy_query = nvc0_hw_metric_destroy_query,
661 .begin_query = nvc0_hw_metric_begin_query,
662 .end_query = nvc0_hw_metric_end_query,
663 .get_query_result = nvc0_hw_metric_get_query_result,
664 };
665
666 struct nvc0_hw_query *
667 nvc0_hw_metric_create_query(struct nvc0_context *nvc0, unsigned type)
668 {
669 const struct nvc0_hw_metric_query_cfg *cfg;
670 struct nvc0_hw_metric_query *hmq;
671 struct nvc0_hw_query *hq;
672 unsigned i;
673
674 if (type < NVC0_HW_METRIC_QUERY(0) || type > NVC0_HW_METRIC_QUERY_LAST)
675 return NULL;
676
677 hmq = CALLOC_STRUCT(nvc0_hw_metric_query);
678 if (!hmq)
679 return NULL;
680
681 hq = &hmq->base;
682 hq->funcs = &hw_metric_query_funcs;
683 hq->base.type = type;
684
685 cfg = nvc0_hw_metric_query_get_cfg(nvc0, hq);
686
687 for (i = 0; i < cfg->num_queries; i++) {
688 hmq->queries[i] = nvc0_hw_sm_create_query(nvc0, cfg->queries[i]);
689 if (!hmq->queries[i]) {
690 nvc0_hw_metric_destroy_query(nvc0, hq);
691 return NULL;
692 }
693 hmq->num_queries++;
694 }
695
696 return hq;
697 }
698
699 int
700 nvc0_hw_metric_get_driver_query_info(struct nvc0_screen *screen, unsigned id,
701 struct pipe_driver_query_info *info)
702 {
703 int count = 0;
704
705 if (screen->base.drm->version >= 0x01000101) {
706 if (screen->compute)
707 count = nvc0_hw_metric_get_num_queries(screen);
708 }
709
710 if (!info)
711 return count;
712
713 if (id < count) {
714 if (screen->compute) {
715 if (screen->base.class_3d <= NVF0_3D_CLASS) {
716 const struct nvc0_hw_metric_query_cfg **queries =
717 nvc0_hw_metric_get_queries(screen);
718 const struct nvc0_hw_metric_cfg *cfg =
719 nvc0_hw_metric_get_cfg(queries[id]->type);
720
721 info->name = cfg->name;
722 info->query_type = NVC0_HW_METRIC_QUERY(queries[id]->type);
723 info->type = cfg->type;
724 info->group_id = NVC0_HW_METRIC_QUERY_GROUP;
725 return 1;
726 }
727 }
728 }
729 return 0;
730 }