2 * Copyright © 2018 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "brw_defines.h"
25 #include "brw_performance_query.h"
27 #include "perf/gen_perf.h"
30 * Data format expected by MDAPI.
33 struct mdapi_gen7_metrics
{
36 uint64_t ACounters
[45];
37 uint64_t NOACounters
[16];
39 uint64_t PerfCounter1
;
40 uint64_t PerfCounter2
;
41 uint32_t SplitOccured
;
42 uint32_t CoreFrequencyChanged
;
43 uint64_t CoreFrequency
;
45 uint32_t ReportsCount
;
48 #define GTDI_QUERY_BDW_METRICS_OA_COUNT 36
49 #define GTDI_QUERY_BDW_METRICS_OA_40b_COUNT 32
50 #define GTDI_QUERY_BDW_METRICS_NOA_COUNT 16
51 struct mdapi_gen8_metrics
{
54 uint64_t OaCntr
[GTDI_QUERY_BDW_METRICS_OA_COUNT
];
55 uint64_t NoaCntr
[GTDI_QUERY_BDW_METRICS_NOA_COUNT
];
56 uint64_t BeginTimestamp
;
60 uint32_t OverrunOccured
;
62 uint64_t MarkerDriver
;
64 uint64_t SliceFrequency
;
65 uint64_t UnsliceFrequency
;
66 uint64_t PerfCounter1
;
67 uint64_t PerfCounter2
;
68 uint32_t SplitOccured
;
69 uint32_t CoreFrequencyChanged
;
70 uint64_t CoreFrequency
;
72 uint32_t ReportsCount
;
75 #define GTDI_MAX_READ_REGS 16
77 struct mdapi_gen9_metrics
{
80 uint64_t OaCntr
[GTDI_QUERY_BDW_METRICS_OA_COUNT
];
81 uint64_t NoaCntr
[GTDI_QUERY_BDW_METRICS_NOA_COUNT
];
82 uint64_t BeginTimestamp
;
86 uint32_t OverrunOccured
;
88 uint64_t MarkerDriver
;
90 uint64_t SliceFrequency
;
91 uint64_t UnsliceFrequency
;
92 uint64_t PerfCounter1
;
93 uint64_t PerfCounter2
;
94 uint32_t SplitOccured
;
95 uint32_t CoreFrequencyChanged
;
96 uint64_t CoreFrequency
;
98 uint32_t ReportsCount
;
100 uint64_t UserCntr
[GTDI_MAX_READ_REGS
];
101 uint32_t UserCntrCfgId
;
105 struct mdapi_pipeline_metrics
{
107 uint64_t IAPrimitives
;
108 uint64_t VSInvocations
;
109 uint64_t GSInvocations
;
110 uint64_t GSPrimitives
;
111 uint64_t CInvocations
;
112 uint64_t CPrimitives
;
113 uint64_t PSInvocations
;
114 uint64_t HSInvocations
;
115 uint64_t DSInvocations
;
116 uint64_t CSInvocations
;
120 brw_perf_query_get_mdapi_oa_data(struct brw_context
*brw
,
121 struct brw_perf_query_object
*obj
,
125 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
127 switch (devinfo
->gen
) {
129 struct mdapi_gen7_metrics
*mdapi_data
= (struct mdapi_gen7_metrics
*) data
;
131 if (data_size
< sizeof(*mdapi_data
))
134 assert(devinfo
->is_haswell
);
136 for (int i
= 0; i
< ARRAY_SIZE(mdapi_data
->ACounters
); i
++)
137 mdapi_data
->ACounters
[i
] = obj
->oa
.accumulator
[1 + i
];
139 for (int i
= 0; i
< ARRAY_SIZE(mdapi_data
->NOACounters
); i
++) {
140 mdapi_data
->NOACounters
[i
] =
141 obj
->oa
.accumulator
[1 + ARRAY_SIZE(mdapi_data
->ACounters
) + i
];
144 mdapi_data
->ReportsCount
= obj
->oa
.reports_accumulated
;
145 mdapi_data
->TotalTime
= brw_timebase_scale(brw
, obj
->oa
.accumulator
[0]);
146 mdapi_data
->CoreFrequency
= obj
->oa
.gt_frequency
[1];
147 mdapi_data
->CoreFrequencyChanged
= obj
->oa
.gt_frequency
[0] != obj
->oa
.gt_frequency
[1];
148 return sizeof(*mdapi_data
);
151 struct mdapi_gen8_metrics
*mdapi_data
= (struct mdapi_gen8_metrics
*) data
;
153 if (data_size
< sizeof(*mdapi_data
))
156 for (int i
= 0; i
< ARRAY_SIZE(mdapi_data
->OaCntr
); i
++)
157 mdapi_data
->OaCntr
[i
] = obj
->oa
.accumulator
[2 + i
];
158 for (int i
= 0; i
< ARRAY_SIZE(mdapi_data
->NoaCntr
); i
++) {
159 mdapi_data
->NoaCntr
[i
] =
160 obj
->oa
.accumulator
[2 + ARRAY_SIZE(mdapi_data
->OaCntr
) + i
];
163 mdapi_data
->ReportId
= obj
->oa
.hw_id
;
164 mdapi_data
->ReportsCount
= obj
->oa
.reports_accumulated
;
165 mdapi_data
->TotalTime
= brw_timebase_scale(brw
, obj
->oa
.accumulator
[0]);
166 mdapi_data
->GPUTicks
= obj
->oa
.accumulator
[1];
167 mdapi_data
->CoreFrequency
= obj
->oa
.gt_frequency
[1];
168 mdapi_data
->CoreFrequencyChanged
= obj
->oa
.gt_frequency
[0] != obj
->oa
.gt_frequency
[1];
169 mdapi_data
->SliceFrequency
= (obj
->oa
.slice_frequency
[0] + obj
->oa
.slice_frequency
[1]) / 2ULL;
170 mdapi_data
->UnsliceFrequency
= (obj
->oa
.unslice_frequency
[0] + obj
->oa
.unslice_frequency
[1]) / 2ULL;
172 return sizeof(*mdapi_data
);
177 struct mdapi_gen9_metrics
*mdapi_data
= (struct mdapi_gen9_metrics
*) data
;
179 if (data_size
< sizeof(*mdapi_data
))
182 for (int i
= 0; i
< ARRAY_SIZE(mdapi_data
->OaCntr
); i
++)
183 mdapi_data
->OaCntr
[i
] = obj
->oa
.accumulator
[2 + i
];
184 for (int i
= 0; i
< ARRAY_SIZE(mdapi_data
->NoaCntr
); i
++) {
185 mdapi_data
->NoaCntr
[i
] =
186 obj
->oa
.accumulator
[2 + ARRAY_SIZE(mdapi_data
->OaCntr
) + i
];
189 mdapi_data
->ReportId
= obj
->oa
.hw_id
;
190 mdapi_data
->ReportsCount
= obj
->oa
.reports_accumulated
;
191 mdapi_data
->TotalTime
= brw_timebase_scale(brw
, obj
->oa
.accumulator
[0]);
192 mdapi_data
->GPUTicks
= obj
->oa
.accumulator
[1];
193 mdapi_data
->CoreFrequency
= obj
->oa
.gt_frequency
[1];
194 mdapi_data
->CoreFrequencyChanged
= obj
->oa
.gt_frequency
[0] != obj
->oa
.gt_frequency
[1];
195 mdapi_data
->SliceFrequency
= (obj
->oa
.slice_frequency
[0] + obj
->oa
.slice_frequency
[1]) / 2ULL;
196 mdapi_data
->UnsliceFrequency
= (obj
->oa
.unslice_frequency
[0] + obj
->oa
.unslice_frequency
[1]) / 2ULL;
198 return sizeof(*mdapi_data
);
201 unreachable("unexpected gen");
208 fill_mdapi_perf_query_counter(struct gen_perf_query_info
*query
,
210 uint32_t data_offset
,
212 enum gen_perf_counter_data_type data_type
)
214 struct gen_perf_query_counter
*counter
= &query
->counters
[query
->n_counters
];
216 assert(query
->n_counters
<= query
->max_counters
);
218 counter
->name
= name
;
219 counter
->desc
= "Raw counter value";
220 counter
->type
= GEN_PERF_COUNTER_TYPE_RAW
;
221 counter
->data_type
= data_type
;
222 counter
->offset
= data_offset
;
223 counter
->size
= data_size
;
224 assert(counter
->offset
+ counter
->size
<= query
->data_size
);
229 #define MDAPI_QUERY_ADD_COUNTER(query, struct_name, field_name, type_name) \
230 fill_mdapi_perf_query_counter(query, #field_name, \
231 (uint8_t *) &struct_name.field_name - \
232 (uint8_t *) &struct_name, \
233 sizeof(struct_name.field_name), \
234 GEN_PERF_COUNTER_DATA_TYPE_##type_name)
235 #define MDAPI_QUERY_ADD_ARRAY_COUNTER(ctx, query, struct_name, field_name, idx, type_name) \
236 fill_mdapi_perf_query_counter(query, \
237 ralloc_asprintf(ctx, "%s%i", #field_name, idx), \
238 (uint8_t *) &struct_name.field_name[idx] - \
239 (uint8_t *) &struct_name, \
240 sizeof(struct_name.field_name[0]), \
241 GEN_PERF_COUNTER_DATA_TYPE_##type_name)
244 brw_perf_query_register_mdapi_oa_query(struct brw_context
*brw
)
246 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
247 struct gen_perf
*perf
= brw
->perfquery
.perf
;
248 struct gen_perf_query_info
*query
= NULL
;
250 /* MDAPI requires different structures for pretty much every generation
251 * (right now we have definitions for gen 7 to 11).
253 if (!(devinfo
->gen
>= 7 && devinfo
->gen
<= 11))
256 switch (devinfo
->gen
) {
258 query
= gen_perf_query_append_query_info(perf
, 1 + 45 + 16 + 7);
259 query
->oa_format
= I915_OA_FORMAT_A45_B8_C8
;
261 struct mdapi_gen7_metrics metric_data
;
262 query
->data_size
= sizeof(metric_data
);
264 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, TotalTime
, UINT64
);
265 for (int i
= 0; i
< ARRAY_SIZE(metric_data
.ACounters
); i
++) {
266 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf
->queries
, query
,
267 metric_data
, ACounters
, i
, UINT64
);
269 for (int i
= 0; i
< ARRAY_SIZE(metric_data
.NOACounters
); i
++) {
270 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf
->queries
, query
,
271 metric_data
, NOACounters
, i
, UINT64
);
273 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, PerfCounter1
, UINT64
);
274 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, PerfCounter2
, UINT64
);
275 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, SplitOccured
, BOOL32
);
276 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, CoreFrequencyChanged
, BOOL32
);
277 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, CoreFrequency
, UINT64
);
278 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, ReportId
, UINT32
);
279 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, ReportsCount
, UINT32
);
283 query
= gen_perf_query_append_query_info(perf
, 2 + 36 + 16 + 16);
284 query
->oa_format
= I915_OA_FORMAT_A32u40_A4u32_B8_C8
;
286 struct mdapi_gen8_metrics metric_data
;
287 query
->data_size
= sizeof(metric_data
);
289 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, TotalTime
, UINT64
);
290 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, GPUTicks
, UINT64
);
291 for (int i
= 0; i
< ARRAY_SIZE(metric_data
.OaCntr
); i
++) {
292 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf
->queries
, query
,
293 metric_data
, OaCntr
, i
, UINT64
);
295 for (int i
= 0; i
< ARRAY_SIZE(metric_data
.NoaCntr
); i
++) {
296 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf
->queries
, query
,
297 metric_data
, NoaCntr
, i
, UINT64
);
299 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, BeginTimestamp
, UINT64
);
300 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, Reserved1
, UINT64
);
301 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, Reserved2
, UINT64
);
302 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, Reserved3
, UINT32
);
303 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, OverrunOccured
, BOOL32
);
304 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, MarkerUser
, UINT64
);
305 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, MarkerDriver
, UINT64
);
306 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, SliceFrequency
, UINT64
);
307 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, UnsliceFrequency
, UINT64
);
308 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, PerfCounter1
, UINT64
);
309 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, PerfCounter2
, UINT64
);
310 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, SplitOccured
, BOOL32
);
311 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, CoreFrequencyChanged
, BOOL32
);
312 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, CoreFrequency
, UINT64
);
313 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, ReportId
, UINT32
);
314 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, ReportsCount
, UINT32
);
320 query
= gen_perf_query_append_query_info(perf
, 2 + 36 + 16 + 16 + 16 + 2);
321 query
->oa_format
= I915_OA_FORMAT_A32u40_A4u32_B8_C8
;
323 struct mdapi_gen9_metrics metric_data
;
324 query
->data_size
= sizeof(metric_data
);
326 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, TotalTime
, UINT64
);
327 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, GPUTicks
, UINT64
);
328 for (int i
= 0; i
< ARRAY_SIZE(metric_data
.OaCntr
); i
++) {
329 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf
->queries
, query
,
330 metric_data
, OaCntr
, i
, UINT64
);
332 for (int i
= 0; i
< ARRAY_SIZE(metric_data
.NoaCntr
); i
++) {
333 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf
->queries
, query
,
334 metric_data
, NoaCntr
, i
, UINT64
);
336 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, BeginTimestamp
, UINT64
);
337 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, Reserved1
, UINT64
);
338 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, Reserved2
, UINT64
);
339 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, Reserved3
, UINT32
);
340 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, OverrunOccured
, BOOL32
);
341 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, MarkerUser
, UINT64
);
342 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, MarkerDriver
, UINT64
);
343 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, SliceFrequency
, UINT64
);
344 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, UnsliceFrequency
, UINT64
);
345 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, PerfCounter1
, UINT64
);
346 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, PerfCounter2
, UINT64
);
347 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, SplitOccured
, BOOL32
);
348 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, CoreFrequencyChanged
, BOOL32
);
349 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, CoreFrequency
, UINT64
);
350 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, ReportId
, UINT32
);
351 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, ReportsCount
, UINT32
);
352 for (int i
= 0; i
< ARRAY_SIZE(metric_data
.UserCntr
); i
++) {
353 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf
->queries
, query
,
354 metric_data
, UserCntr
, i
, UINT64
);
356 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, UserCntrCfgId
, UINT32
);
357 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, Reserved4
, UINT32
);
361 unreachable("Unsupported gen");
365 query
->kind
= GEN_PERF_QUERY_TYPE_RAW
;
366 query
->name
= "Intel_Raw_Hardware_Counters_Set_0_Query";
367 /* Guid has to matches with MDAPI's. */
368 query
->guid
= "2f01b241-7014-42a7-9eb6-a925cad3daba";
371 /* Accumulation buffer offsets copied from an actual query... */
372 const struct gen_perf_query_info
*copy_query
=
373 &brw
->perfquery
.perf
->queries
[0];
375 query
->gpu_time_offset
= copy_query
->gpu_time_offset
;
376 query
->gpu_clock_offset
= copy_query
->gpu_clock_offset
;
377 query
->a_offset
= copy_query
->a_offset
;
378 query
->b_offset
= copy_query
->b_offset
;
379 query
->c_offset
= copy_query
->c_offset
;
384 brw_perf_query_register_mdapi_statistic_query(struct brw_context
*brw
)
386 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
388 if (!(devinfo
->gen
>= 7 && devinfo
->gen
<= 9))
391 struct gen_perf_query_info
*query
=
392 gen_perf_query_append_query_info(brw
->perfquery
.perf
, MAX_STAT_COUNTERS
);
394 query
->kind
= GEN_PERF_QUERY_TYPE_PIPELINE
;
395 query
->name
= "Intel_Raw_Pipeline_Statistics_Query";
397 /* The order has to match mdapi_pipeline_metrics. */
398 gen_perf_query_info_add_basic_stat_reg(query
, IA_VERTICES_COUNT
,
399 "N vertices submitted");
400 gen_perf_query_info_add_basic_stat_reg(query
, IA_PRIMITIVES_COUNT
,
401 "N primitives submitted");
402 gen_perf_query_info_add_basic_stat_reg(query
, VS_INVOCATION_COUNT
,
403 "N vertex shader invocations");
404 gen_perf_query_info_add_basic_stat_reg(query
, GS_INVOCATION_COUNT
,
405 "N geometry shader invocations");
406 gen_perf_query_info_add_basic_stat_reg(query
, GS_PRIMITIVES_COUNT
,
407 "N geometry shader primitives emitted");
408 gen_perf_query_info_add_basic_stat_reg(query
, CL_INVOCATION_COUNT
,
409 "N primitives entering clipping");
410 gen_perf_query_info_add_basic_stat_reg(query
, CL_PRIMITIVES_COUNT
,
411 "N primitives leaving clipping");
412 if (devinfo
->is_haswell
|| devinfo
->gen
== 8) {
413 gen_perf_query_info_add_stat_reg(query
, PS_INVOCATION_COUNT
, 1, 4,
414 "N fragment shader invocations",
415 "N fragment shader invocations");
417 gen_perf_query_info_add_basic_stat_reg(query
, PS_INVOCATION_COUNT
,
418 "N fragment shader invocations");
420 gen_perf_query_info_add_basic_stat_reg(query
, HS_INVOCATION_COUNT
,
421 "N TCS shader invocations");
422 gen_perf_query_info_add_basic_stat_reg(query
, DS_INVOCATION_COUNT
,
423 "N TES shader invocations");
424 if (devinfo
->gen
>= 7) {
425 gen_perf_query_info_add_basic_stat_reg(query
, CS_INVOCATION_COUNT
,
426 "N compute shader invocations");
429 query
->data_size
= sizeof(uint64_t) * query
->n_counters
;