2 * Copyright © 2018 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "brw_defines.h"
25 #include "brw_performance_query.h"
28 * Data format expected by MDAPI.
31 struct mdapi_gen7_metrics
{
34 uint64_t ACounters
[45];
35 uint64_t NOACounters
[16];
37 uint64_t PerfCounter1
;
38 uint64_t PerfCounter2
;
39 uint32_t SplitOccured
;
40 uint32_t CoreFrequencyChanged
;
41 uint64_t CoreFrequency
;
43 uint32_t ReportsCount
;
46 #define GTDI_QUERY_BDW_METRICS_OA_COUNT 36
47 #define GTDI_QUERY_BDW_METRICS_OA_40b_COUNT 32
48 #define GTDI_QUERY_BDW_METRICS_NOA_COUNT 16
49 struct mdapi_gen8_metrics
{
52 uint64_t OaCntr
[GTDI_QUERY_BDW_METRICS_OA_COUNT
];
53 uint64_t NoaCntr
[GTDI_QUERY_BDW_METRICS_NOA_COUNT
];
54 uint64_t BeginTimestamp
;
58 uint32_t OverrunOccured
;
60 uint64_t MarkerDriver
;
62 uint64_t SliceFrequency
;
63 uint64_t UnsliceFrequency
;
64 uint64_t PerfCounter1
;
65 uint64_t PerfCounter2
;
66 uint32_t SplitOccured
;
67 uint32_t CoreFrequencyChanged
;
68 uint64_t CoreFrequency
;
70 uint32_t ReportsCount
;
73 #define GTDI_MAX_READ_REGS 16
75 struct mdapi_gen9_metrics
{
78 uint64_t OaCntr
[GTDI_QUERY_BDW_METRICS_OA_COUNT
];
79 uint64_t NoaCntr
[GTDI_QUERY_BDW_METRICS_NOA_COUNT
];
80 uint64_t BeginTimestamp
;
84 uint32_t OverrunOccured
;
86 uint64_t MarkerDriver
;
88 uint64_t SliceFrequency
;
89 uint64_t UnsliceFrequency
;
90 uint64_t PerfCounter1
;
91 uint64_t PerfCounter2
;
92 uint32_t SplitOccured
;
93 uint32_t CoreFrequencyChanged
;
94 uint64_t CoreFrequency
;
96 uint32_t ReportsCount
;
98 uint64_t UserCntr
[GTDI_MAX_READ_REGS
];
99 uint32_t UserCntrCfgId
;
103 struct mdapi_pipeline_metrics
{
105 uint64_t IAPrimitives
;
106 uint64_t VSInvocations
;
107 uint64_t GSInvocations
;
108 uint64_t GSPrimitives
;
109 uint64_t CInvocations
;
110 uint64_t CPrimitives
;
111 uint64_t PSInvocations
;
112 uint64_t HSInvocations
;
113 uint64_t DSInvocations
;
114 uint64_t CSInvocations
;
118 brw_perf_query_get_mdapi_oa_data(struct brw_context
*brw
,
119 struct brw_perf_query_object
*obj
,
123 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
125 switch (devinfo
->gen
) {
127 struct mdapi_gen7_metrics
*mdapi_data
= (struct mdapi_gen7_metrics
*) data
;
129 if (data_size
< sizeof(*mdapi_data
))
132 assert(devinfo
->is_haswell
);
134 for (int i
= 0; i
< ARRAY_SIZE(mdapi_data
->ACounters
); i
++)
135 mdapi_data
->ACounters
[i
] = obj
->oa
.accumulator
[1 + i
];
137 for (int i
= 0; i
< ARRAY_SIZE(mdapi_data
->NOACounters
); i
++) {
138 mdapi_data
->NOACounters
[i
] =
139 obj
->oa
.accumulator
[1 + ARRAY_SIZE(mdapi_data
->ACounters
) + i
];
142 mdapi_data
->ReportsCount
= obj
->oa
.reports_accumulated
;
143 mdapi_data
->TotalTime
= brw_timebase_scale(brw
, obj
->oa
.accumulator
[0]);
144 mdapi_data
->CoreFrequency
= obj
->oa
.gt_frequency
[1];
145 mdapi_data
->CoreFrequencyChanged
= obj
->oa
.gt_frequency
[0] != obj
->oa
.gt_frequency
[1];
146 return sizeof(*mdapi_data
);
149 struct mdapi_gen8_metrics
*mdapi_data
= (struct mdapi_gen8_metrics
*) data
;
151 if (data_size
< sizeof(*mdapi_data
))
154 for (int i
= 0; i
< ARRAY_SIZE(mdapi_data
->OaCntr
); i
++)
155 mdapi_data
->OaCntr
[i
] = obj
->oa
.accumulator
[2 + i
];
156 for (int i
= 0; i
< ARRAY_SIZE(mdapi_data
->NoaCntr
); i
++) {
157 mdapi_data
->NoaCntr
[i
] =
158 obj
->oa
.accumulator
[2 + ARRAY_SIZE(mdapi_data
->OaCntr
) + i
];
161 mdapi_data
->ReportId
= obj
->oa
.hw_id
;
162 mdapi_data
->ReportsCount
= obj
->oa
.reports_accumulated
;
163 mdapi_data
->TotalTime
= brw_timebase_scale(brw
, obj
->oa
.accumulator
[0]);
164 mdapi_data
->GPUTicks
= obj
->oa
.accumulator
[1];
165 mdapi_data
->CoreFrequency
= obj
->oa
.gt_frequency
[1];
166 mdapi_data
->CoreFrequencyChanged
= obj
->oa
.gt_frequency
[0] != obj
->oa
.gt_frequency
[1];
167 mdapi_data
->SliceFrequency
= (obj
->oa
.slice_frequency
[0] + obj
->oa
.slice_frequency
[1]) / 2ULL;
168 mdapi_data
->UnsliceFrequency
= (obj
->oa
.unslice_frequency
[0] + obj
->oa
.unslice_frequency
[1]) / 2ULL;
170 return sizeof(*mdapi_data
);
175 struct mdapi_gen9_metrics
*mdapi_data
= (struct mdapi_gen9_metrics
*) data
;
177 if (data_size
< sizeof(*mdapi_data
))
180 for (int i
= 0; i
< ARRAY_SIZE(mdapi_data
->OaCntr
); i
++)
181 mdapi_data
->OaCntr
[i
] = obj
->oa
.accumulator
[2 + i
];
182 for (int i
= 0; i
< ARRAY_SIZE(mdapi_data
->NoaCntr
); i
++) {
183 mdapi_data
->NoaCntr
[i
] =
184 obj
->oa
.accumulator
[2 + ARRAY_SIZE(mdapi_data
->OaCntr
) + i
];
187 mdapi_data
->ReportId
= obj
->oa
.hw_id
;
188 mdapi_data
->ReportsCount
= obj
->oa
.reports_accumulated
;
189 mdapi_data
->TotalTime
= brw_timebase_scale(brw
, obj
->oa
.accumulator
[0]);
190 mdapi_data
->GPUTicks
= obj
->oa
.accumulator
[1];
191 mdapi_data
->CoreFrequency
= obj
->oa
.gt_frequency
[1];
192 mdapi_data
->CoreFrequencyChanged
= obj
->oa
.gt_frequency
[0] != obj
->oa
.gt_frequency
[1];
193 mdapi_data
->SliceFrequency
= (obj
->oa
.slice_frequency
[0] + obj
->oa
.slice_frequency
[1]) / 2ULL;
194 mdapi_data
->UnsliceFrequency
= (obj
->oa
.unslice_frequency
[0] + obj
->oa
.unslice_frequency
[1]) / 2ULL;
196 return sizeof(*mdapi_data
);
199 unreachable("unexpected gen");
206 fill_mdapi_perf_query_counter(struct brw_perf_query_info
*query
,
208 uint32_t data_offset
,
212 struct brw_perf_query_counter
*counter
= &query
->counters
[query
->n_counters
];
214 counter
->name
= name
;
215 counter
->desc
= "Raw counter value";
216 counter
->data_type
= data_type
;
217 counter
->offset
= data_offset
;
218 counter
->size
= data_size
;
219 assert(counter
->offset
+ counter
->size
<= query
->data_size
);
224 #define MDAPI_QUERY_ADD_COUNTER(query, struct_name, field_name, type_name) \
225 fill_mdapi_perf_query_counter(query, #field_name, \
226 (uint8_t *) &struct_name.field_name - \
227 (uint8_t *) &struct_name, \
228 sizeof(struct_name.field_name), \
229 GL_PERFQUERY_COUNTER_DATA_##type_name##_INTEL)
230 #define MDAPI_QUERY_ADD_ARRAY_COUNTER(ctx, query, struct_name, field_name, idx, type_name) \
231 fill_mdapi_perf_query_counter(query, \
232 ralloc_asprintf(ctx, "%s%i", #field_name, idx), \
233 (uint8_t *) &struct_name.field_name[idx] - \
234 (uint8_t *) &struct_name, \
235 sizeof(struct_name.field_name[0]), \
236 GL_PERFQUERY_COUNTER_DATA_##type_name##_INTEL)
239 brw_perf_query_register_mdapi_oa_query(struct brw_context
*brw
)
241 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
243 /* MDAPI requires different structures for pretty much every generation
244 * (right now we have definitions for gen 7 to 11).
246 if (!(devinfo
->gen
>= 7 && devinfo
->gen
<= 11))
249 struct brw_perf_query_info
*query
= brw_perf_query_append_query_info(brw
);
251 query
->kind
= OA_COUNTERS_RAW
;
252 query
->name
= "Intel_Raw_Hardware_Counters_Set_0_Query";
253 /* Guid has to matches with MDAPI's. */
254 query
->guid
= "2f01b241-7014-42a7-9eb6-a925cad3daba";
255 query
->n_counters
= 0;
256 query
->oa_metrics_set_id
= 0; /* Set by MDAPI */
259 switch (devinfo
->gen
) {
261 query
->oa_format
= I915_OA_FORMAT_A45_B8_C8
;
263 struct mdapi_gen7_metrics metric_data
;
264 query
->data_size
= sizeof(metric_data
);
266 n_counters
= 1 + 45 + 16 + 7;
268 rzalloc_array_size(brw
->perfquery
.queries
,
269 sizeof(*query
->counters
), n_counters
);
271 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, TotalTime
, UINT64
);
272 for (int i
= 0; i
< ARRAY_SIZE(metric_data
.ACounters
); i
++) {
273 MDAPI_QUERY_ADD_ARRAY_COUNTER(brw
->perfquery
.queries
,
274 query
, metric_data
, ACounters
, i
, UINT64
);
276 for (int i
= 0; i
< ARRAY_SIZE(metric_data
.NOACounters
); i
++) {
277 MDAPI_QUERY_ADD_ARRAY_COUNTER(brw
->perfquery
.queries
,
278 query
, metric_data
, NOACounters
, i
, UINT64
);
280 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, PerfCounter1
, UINT64
);
281 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, PerfCounter2
, UINT64
);
282 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, SplitOccured
, BOOL32
);
283 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, CoreFrequencyChanged
, BOOL32
);
284 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, CoreFrequency
, UINT64
);
285 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, ReportId
, UINT32
);
286 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, ReportsCount
, UINT32
);
290 query
->oa_format
= I915_OA_FORMAT_A32u40_A4u32_B8_C8
;
292 struct mdapi_gen8_metrics metric_data
;
293 query
->data_size
= sizeof(metric_data
);
295 n_counters
= 2 + 36 + 16 + 16;
297 rzalloc_array_size(brw
->perfquery
.queries
,
298 sizeof(*query
->counters
), n_counters
);
300 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, TotalTime
, UINT64
);
301 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, GPUTicks
, UINT64
);
302 for (int i
= 0; i
< ARRAY_SIZE(metric_data
.OaCntr
); i
++) {
303 MDAPI_QUERY_ADD_ARRAY_COUNTER(brw
->perfquery
.queries
,
304 query
, metric_data
, OaCntr
, i
, UINT64
);
306 for (int i
= 0; i
< ARRAY_SIZE(metric_data
.NoaCntr
); i
++) {
307 MDAPI_QUERY_ADD_ARRAY_COUNTER(brw
->perfquery
.queries
,
308 query
, metric_data
, NoaCntr
, i
, UINT64
);
310 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, BeginTimestamp
, UINT64
);
311 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, Reserved1
, UINT64
);
312 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, Reserved2
, UINT64
);
313 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, Reserved3
, UINT32
);
314 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, OverrunOccured
, BOOL32
);
315 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, MarkerUser
, UINT64
);
316 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, MarkerDriver
, UINT64
);
317 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, SliceFrequency
, UINT64
);
318 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, UnsliceFrequency
, UINT64
);
319 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, PerfCounter1
, UINT64
);
320 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, PerfCounter2
, UINT64
);
321 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, SplitOccured
, BOOL32
);
322 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, CoreFrequencyChanged
, BOOL32
);
323 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, CoreFrequency
, UINT64
);
324 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, ReportId
, UINT32
);
325 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, ReportsCount
, UINT32
);
331 query
->oa_format
= I915_OA_FORMAT_A32u40_A4u32_B8_C8
;
333 struct mdapi_gen9_metrics metric_data
;
334 query
->data_size
= sizeof(metric_data
);
336 n_counters
= 2 + 36 + 16 + 16 + 16 + 2;
338 rzalloc_array_size(brw
->perfquery
.queries
,
339 sizeof(*query
->counters
), n_counters
);
341 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, TotalTime
, UINT64
);
342 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, GPUTicks
, UINT64
);
343 for (int i
= 0; i
< ARRAY_SIZE(metric_data
.OaCntr
); i
++) {
344 MDAPI_QUERY_ADD_ARRAY_COUNTER(brw
->perfquery
.queries
,
345 query
, metric_data
, OaCntr
, i
, UINT64
);
347 for (int i
= 0; i
< ARRAY_SIZE(metric_data
.NoaCntr
); i
++) {
348 MDAPI_QUERY_ADD_ARRAY_COUNTER(brw
->perfquery
.queries
,
349 query
, metric_data
, NoaCntr
, i
, UINT64
);
351 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, BeginTimestamp
, UINT64
);
352 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, Reserved1
, UINT64
);
353 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, Reserved2
, UINT64
);
354 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, Reserved3
, UINT32
);
355 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, OverrunOccured
, BOOL32
);
356 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, MarkerUser
, UINT64
);
357 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, MarkerDriver
, UINT64
);
358 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, SliceFrequency
, UINT64
);
359 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, UnsliceFrequency
, UINT64
);
360 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, PerfCounter1
, UINT64
);
361 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, PerfCounter2
, UINT64
);
362 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, SplitOccured
, BOOL32
);
363 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, CoreFrequencyChanged
, BOOL32
);
364 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, CoreFrequency
, UINT64
);
365 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, ReportId
, UINT32
);
366 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, ReportsCount
, UINT32
);
367 for (int i
= 0; i
< ARRAY_SIZE(metric_data
.UserCntr
); i
++) {
368 MDAPI_QUERY_ADD_ARRAY_COUNTER(brw
->perfquery
.queries
,
369 query
, metric_data
, UserCntr
, i
, UINT64
);
371 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, UserCntrCfgId
, UINT32
);
372 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, Reserved4
, UINT32
);
376 unreachable("Unsupported gen");
380 assert(query
->n_counters
<= n_counters
);
383 /* Accumulation buffer offsets copied from an actual query... */
384 const struct brw_perf_query_info
*copy_query
=
385 &brw
->perfquery
.queries
[0];
387 query
->gpu_time_offset
= copy_query
->gpu_time_offset
;
388 query
->gpu_clock_offset
= copy_query
->gpu_clock_offset
;
389 query
->a_offset
= copy_query
->a_offset
;
390 query
->b_offset
= copy_query
->b_offset
;
391 query
->c_offset
= copy_query
->c_offset
;
396 brw_perf_query_register_mdapi_statistic_query(struct brw_context
*brw
)
398 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
400 if (!(devinfo
->gen
>= 7 && devinfo
->gen
<= 9))
403 struct brw_perf_query_info
*query
= brw_perf_query_append_query_info(brw
);
405 query
->kind
= PIPELINE_STATS
;
406 query
->name
= "Intel_Raw_Pipeline_Statistics_Query";
407 query
->n_counters
= 0;
409 rzalloc_array(brw
, struct brw_perf_query_counter
, MAX_STAT_COUNTERS
);
411 /* The order has to match mdapi_pipeline_metrics. */
412 brw_perf_query_info_add_basic_stat_reg(query
, IA_VERTICES_COUNT
,
413 "N vertices submitted");
414 brw_perf_query_info_add_basic_stat_reg(query
, IA_PRIMITIVES_COUNT
,
415 "N primitives submitted");
416 brw_perf_query_info_add_basic_stat_reg(query
, VS_INVOCATION_COUNT
,
417 "N vertex shader invocations");
418 brw_perf_query_info_add_basic_stat_reg(query
, GS_INVOCATION_COUNT
,
419 "N geometry shader invocations");
420 brw_perf_query_info_add_basic_stat_reg(query
, GS_PRIMITIVES_COUNT
,
421 "N geometry shader primitives emitted");
422 brw_perf_query_info_add_basic_stat_reg(query
, CL_INVOCATION_COUNT
,
423 "N primitives entering clipping");
424 brw_perf_query_info_add_basic_stat_reg(query
, CL_PRIMITIVES_COUNT
,
425 "N primitives leaving clipping");
426 if (devinfo
->is_haswell
|| devinfo
->gen
== 8) {
427 brw_perf_query_info_add_stat_reg(query
, PS_INVOCATION_COUNT
, 1, 4,
428 "N fragment shader invocations",
429 "N fragment shader invocations");
431 brw_perf_query_info_add_basic_stat_reg(query
, PS_INVOCATION_COUNT
,
432 "N fragment shader invocations");
434 brw_perf_query_info_add_basic_stat_reg(query
, HS_INVOCATION_COUNT
,
435 "N TCS shader invocations");
436 brw_perf_query_info_add_basic_stat_reg(query
, DS_INVOCATION_COUNT
,
437 "N TES shader invocations");
438 if (devinfo
->gen
>= 7) {
439 brw_perf_query_info_add_basic_stat_reg(query
, CS_INVOCATION_COUNT
,
440 "N compute shader invocations");
443 query
->data_size
= sizeof(uint64_t) * query
->n_counters
;