2 * Copyright © 2018 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "brw_defines.h"
25 #include "brw_performance_query.h"
27 #include "perf/gen_perf.h"
28 #include "perf/gen_perf_mdapi.h"
31 brw_perf_query_get_mdapi_oa_data(struct brw_context
*brw
,
32 struct brw_perf_query_object
*obj
,
36 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
38 switch (devinfo
->gen
) {
40 struct gen7_mdapi_metrics
*mdapi_data
= (struct gen7_mdapi_metrics
*) data
;
42 if (data_size
< sizeof(*mdapi_data
))
45 assert(devinfo
->is_haswell
);
47 for (int i
= 0; i
< ARRAY_SIZE(mdapi_data
->ACounters
); i
++)
48 mdapi_data
->ACounters
[i
] = obj
->oa
.accumulator
[1 + i
];
50 for (int i
= 0; i
< ARRAY_SIZE(mdapi_data
->NOACounters
); i
++) {
51 mdapi_data
->NOACounters
[i
] =
52 obj
->oa
.accumulator
[1 + ARRAY_SIZE(mdapi_data
->ACounters
) + i
];
55 mdapi_data
->ReportsCount
= obj
->oa
.reports_accumulated
;
56 mdapi_data
->TotalTime
= brw_timebase_scale(brw
, obj
->oa
.accumulator
[0]);
57 mdapi_data
->CoreFrequency
= obj
->oa
.gt_frequency
[1];
58 mdapi_data
->CoreFrequencyChanged
= obj
->oa
.gt_frequency
[0] != obj
->oa
.gt_frequency
[1];
59 return sizeof(*mdapi_data
);
62 struct gen8_mdapi_metrics
*mdapi_data
= (struct gen8_mdapi_metrics
*) data
;
64 if (data_size
< sizeof(*mdapi_data
))
67 for (int i
= 0; i
< ARRAY_SIZE(mdapi_data
->OaCntr
); i
++)
68 mdapi_data
->OaCntr
[i
] = obj
->oa
.accumulator
[2 + i
];
69 for (int i
= 0; i
< ARRAY_SIZE(mdapi_data
->NoaCntr
); i
++) {
70 mdapi_data
->NoaCntr
[i
] =
71 obj
->oa
.accumulator
[2 + ARRAY_SIZE(mdapi_data
->OaCntr
) + i
];
74 mdapi_data
->ReportId
= obj
->oa
.hw_id
;
75 mdapi_data
->ReportsCount
= obj
->oa
.reports_accumulated
;
76 mdapi_data
->TotalTime
= brw_timebase_scale(brw
, obj
->oa
.accumulator
[0]);
77 mdapi_data
->GPUTicks
= obj
->oa
.accumulator
[1];
78 mdapi_data
->CoreFrequency
= obj
->oa
.gt_frequency
[1];
79 mdapi_data
->CoreFrequencyChanged
= obj
->oa
.gt_frequency
[0] != obj
->oa
.gt_frequency
[1];
80 mdapi_data
->SliceFrequency
= (obj
->oa
.slice_frequency
[0] + obj
->oa
.slice_frequency
[1]) / 2ULL;
81 mdapi_data
->UnsliceFrequency
= (obj
->oa
.unslice_frequency
[0] + obj
->oa
.unslice_frequency
[1]) / 2ULL;
83 return sizeof(*mdapi_data
);
88 struct gen9_mdapi_metrics
*mdapi_data
= (struct gen9_mdapi_metrics
*) data
;
90 if (data_size
< sizeof(*mdapi_data
))
93 for (int i
= 0; i
< ARRAY_SIZE(mdapi_data
->OaCntr
); i
++)
94 mdapi_data
->OaCntr
[i
] = obj
->oa
.accumulator
[2 + i
];
95 for (int i
= 0; i
< ARRAY_SIZE(mdapi_data
->NoaCntr
); i
++) {
96 mdapi_data
->NoaCntr
[i
] =
97 obj
->oa
.accumulator
[2 + ARRAY_SIZE(mdapi_data
->OaCntr
) + i
];
100 mdapi_data
->ReportId
= obj
->oa
.hw_id
;
101 mdapi_data
->ReportsCount
= obj
->oa
.reports_accumulated
;
102 mdapi_data
->TotalTime
= brw_timebase_scale(brw
, obj
->oa
.accumulator
[0]);
103 mdapi_data
->GPUTicks
= obj
->oa
.accumulator
[1];
104 mdapi_data
->CoreFrequency
= obj
->oa
.gt_frequency
[1];
105 mdapi_data
->CoreFrequencyChanged
= obj
->oa
.gt_frequency
[0] != obj
->oa
.gt_frequency
[1];
106 mdapi_data
->SliceFrequency
= (obj
->oa
.slice_frequency
[0] + obj
->oa
.slice_frequency
[1]) / 2ULL;
107 mdapi_data
->UnsliceFrequency
= (obj
->oa
.unslice_frequency
[0] + obj
->oa
.unslice_frequency
[1]) / 2ULL;
109 return sizeof(*mdapi_data
);
112 unreachable("unexpected gen");
119 fill_mdapi_perf_query_counter(struct gen_perf_query_info
*query
,
121 uint32_t data_offset
,
123 enum gen_perf_counter_data_type data_type
)
125 struct gen_perf_query_counter
*counter
= &query
->counters
[query
->n_counters
];
127 assert(query
->n_counters
<= query
->max_counters
);
129 counter
->name
= name
;
130 counter
->desc
= "Raw counter value";
131 counter
->type
= GEN_PERF_COUNTER_TYPE_RAW
;
132 counter
->data_type
= data_type
;
133 counter
->offset
= data_offset
;
134 counter
->size
= data_size
;
135 assert(counter
->offset
+ counter
->size
<= query
->data_size
);
140 #define MDAPI_QUERY_ADD_COUNTER(query, struct_name, field_name, type_name) \
141 fill_mdapi_perf_query_counter(query, #field_name, \
142 (uint8_t *) &struct_name.field_name - \
143 (uint8_t *) &struct_name, \
144 sizeof(struct_name.field_name), \
145 GEN_PERF_COUNTER_DATA_TYPE_##type_name)
146 #define MDAPI_QUERY_ADD_ARRAY_COUNTER(ctx, query, struct_name, field_name, idx, type_name) \
147 fill_mdapi_perf_query_counter(query, \
148 ralloc_asprintf(ctx, "%s%i", #field_name, idx), \
149 (uint8_t *) &struct_name.field_name[idx] - \
150 (uint8_t *) &struct_name, \
151 sizeof(struct_name.field_name[0]), \
152 GEN_PERF_COUNTER_DATA_TYPE_##type_name)
155 brw_perf_query_register_mdapi_oa_query(struct brw_context
*brw
)
157 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
158 struct gen_perf
*perf
= brw
->perfquery
.perf
;
159 struct gen_perf_query_info
*query
= NULL
;
161 /* MDAPI requires different structures for pretty much every generation
162 * (right now we have definitions for gen 7 to 11).
164 if (!(devinfo
->gen
>= 7 && devinfo
->gen
<= 11))
167 switch (devinfo
->gen
) {
169 query
= gen_perf_query_append_query_info(perf
, 1 + 45 + 16 + 7);
170 query
->oa_format
= I915_OA_FORMAT_A45_B8_C8
;
172 struct gen7_mdapi_metrics metric_data
;
173 query
->data_size
= sizeof(metric_data
);
175 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, TotalTime
, UINT64
);
176 for (int i
= 0; i
< ARRAY_SIZE(metric_data
.ACounters
); i
++) {
177 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf
->queries
, query
,
178 metric_data
, ACounters
, i
, UINT64
);
180 for (int i
= 0; i
< ARRAY_SIZE(metric_data
.NOACounters
); i
++) {
181 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf
->queries
, query
,
182 metric_data
, NOACounters
, i
, UINT64
);
184 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, PerfCounter1
, UINT64
);
185 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, PerfCounter2
, UINT64
);
186 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, SplitOccured
, BOOL32
);
187 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, CoreFrequencyChanged
, BOOL32
);
188 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, CoreFrequency
, UINT64
);
189 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, ReportId
, UINT32
);
190 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, ReportsCount
, UINT32
);
194 query
= gen_perf_query_append_query_info(perf
, 2 + 36 + 16 + 16);
195 query
->oa_format
= I915_OA_FORMAT_A32u40_A4u32_B8_C8
;
197 struct gen8_mdapi_metrics metric_data
;
198 query
->data_size
= sizeof(metric_data
);
200 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, TotalTime
, UINT64
);
201 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, GPUTicks
, UINT64
);
202 for (int i
= 0; i
< ARRAY_SIZE(metric_data
.OaCntr
); i
++) {
203 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf
->queries
, query
,
204 metric_data
, OaCntr
, i
, UINT64
);
206 for (int i
= 0; i
< ARRAY_SIZE(metric_data
.NoaCntr
); i
++) {
207 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf
->queries
, query
,
208 metric_data
, NoaCntr
, i
, UINT64
);
210 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, BeginTimestamp
, UINT64
);
211 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, Reserved1
, UINT64
);
212 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, Reserved2
, UINT64
);
213 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, Reserved3
, UINT32
);
214 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, OverrunOccured
, BOOL32
);
215 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, MarkerUser
, UINT64
);
216 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, MarkerDriver
, UINT64
);
217 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, SliceFrequency
, UINT64
);
218 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, UnsliceFrequency
, UINT64
);
219 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, PerfCounter1
, UINT64
);
220 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, PerfCounter2
, UINT64
);
221 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, SplitOccured
, BOOL32
);
222 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, CoreFrequencyChanged
, BOOL32
);
223 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, CoreFrequency
, UINT64
);
224 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, ReportId
, UINT32
);
225 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, ReportsCount
, UINT32
);
231 query
= gen_perf_query_append_query_info(perf
, 2 + 36 + 16 + 16 + 16 + 2);
232 query
->oa_format
= I915_OA_FORMAT_A32u40_A4u32_B8_C8
;
234 struct gen9_mdapi_metrics metric_data
;
235 query
->data_size
= sizeof(metric_data
);
237 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, TotalTime
, UINT64
);
238 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, GPUTicks
, UINT64
);
239 for (int i
= 0; i
< ARRAY_SIZE(metric_data
.OaCntr
); i
++) {
240 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf
->queries
, query
,
241 metric_data
, OaCntr
, i
, UINT64
);
243 for (int i
= 0; i
< ARRAY_SIZE(metric_data
.NoaCntr
); i
++) {
244 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf
->queries
, query
,
245 metric_data
, NoaCntr
, i
, UINT64
);
247 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, BeginTimestamp
, UINT64
);
248 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, Reserved1
, UINT64
);
249 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, Reserved2
, UINT64
);
250 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, Reserved3
, UINT32
);
251 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, OverrunOccured
, BOOL32
);
252 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, MarkerUser
, UINT64
);
253 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, MarkerDriver
, UINT64
);
254 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, SliceFrequency
, UINT64
);
255 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, UnsliceFrequency
, UINT64
);
256 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, PerfCounter1
, UINT64
);
257 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, PerfCounter2
, UINT64
);
258 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, SplitOccured
, BOOL32
);
259 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, CoreFrequencyChanged
, BOOL32
);
260 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, CoreFrequency
, UINT64
);
261 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, ReportId
, UINT32
);
262 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, ReportsCount
, UINT32
);
263 for (int i
= 0; i
< ARRAY_SIZE(metric_data
.UserCntr
); i
++) {
264 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf
->queries
, query
,
265 metric_data
, UserCntr
, i
, UINT64
);
267 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, UserCntrCfgId
, UINT32
);
268 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, Reserved4
, UINT32
);
272 unreachable("Unsupported gen");
276 query
->kind
= GEN_PERF_QUERY_TYPE_RAW
;
277 query
->name
= "Intel_Raw_Hardware_Counters_Set_0_Query";
278 /* Guid has to matches with MDAPI's. */
279 query
->guid
= "2f01b241-7014-42a7-9eb6-a925cad3daba";
282 /* Accumulation buffer offsets copied from an actual query... */
283 const struct gen_perf_query_info
*copy_query
=
284 &brw
->perfquery
.perf
->queries
[0];
286 query
->gpu_time_offset
= copy_query
->gpu_time_offset
;
287 query
->gpu_clock_offset
= copy_query
->gpu_clock_offset
;
288 query
->a_offset
= copy_query
->a_offset
;
289 query
->b_offset
= copy_query
->b_offset
;
290 query
->c_offset
= copy_query
->c_offset
;
295 brw_perf_query_register_mdapi_statistic_query(struct brw_context
*brw
)
297 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
299 if (!(devinfo
->gen
>= 7 && devinfo
->gen
<= 9))
302 struct gen_perf_query_info
*query
=
303 gen_perf_query_append_query_info(brw
->perfquery
.perf
, MAX_STAT_COUNTERS
);
305 query
->kind
= GEN_PERF_QUERY_TYPE_PIPELINE
;
306 query
->name
= "Intel_Raw_Pipeline_Statistics_Query";
308 /* The order has to match mdapi_pipeline_metrics. */
309 gen_perf_query_info_add_basic_stat_reg(query
, IA_VERTICES_COUNT
,
310 "N vertices submitted");
311 gen_perf_query_info_add_basic_stat_reg(query
, IA_PRIMITIVES_COUNT
,
312 "N primitives submitted");
313 gen_perf_query_info_add_basic_stat_reg(query
, VS_INVOCATION_COUNT
,
314 "N vertex shader invocations");
315 gen_perf_query_info_add_basic_stat_reg(query
, GS_INVOCATION_COUNT
,
316 "N geometry shader invocations");
317 gen_perf_query_info_add_basic_stat_reg(query
, GS_PRIMITIVES_COUNT
,
318 "N geometry shader primitives emitted");
319 gen_perf_query_info_add_basic_stat_reg(query
, CL_INVOCATION_COUNT
,
320 "N primitives entering clipping");
321 gen_perf_query_info_add_basic_stat_reg(query
, CL_PRIMITIVES_COUNT
,
322 "N primitives leaving clipping");
323 if (devinfo
->is_haswell
|| devinfo
->gen
== 8) {
324 gen_perf_query_info_add_stat_reg(query
, PS_INVOCATION_COUNT
, 1, 4,
325 "N fragment shader invocations",
326 "N fragment shader invocations");
328 gen_perf_query_info_add_basic_stat_reg(query
, PS_INVOCATION_COUNT
,
329 "N fragment shader invocations");
331 gen_perf_query_info_add_basic_stat_reg(query
, HS_INVOCATION_COUNT
,
332 "N TCS shader invocations");
333 gen_perf_query_info_add_basic_stat_reg(query
, DS_INVOCATION_COUNT
,
334 "N TES shader invocations");
335 if (devinfo
->gen
>= 7) {
336 gen_perf_query_info_add_basic_stat_reg(query
, CS_INVOCATION_COUNT
,
337 "N compute shader invocations");
340 query
->data_size
= sizeof(uint64_t) * query
->n_counters
;