2 * Copyright © 2018 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "gen_perf_mdapi.h"
26 #include "gen_perf_private.h"
27 #include "gen_perf_regs.h"
29 #include "dev/gen_device_info.h"
31 #include <drm-uapi/i915_drm.h>
35 gen_perf_query_result_write_mdapi(void *data
, uint32_t data_size
,
36 const struct gen_device_info
*devinfo
,
37 const struct gen_perf_query_result
*result
,
38 uint64_t freq_start
, uint64_t freq_end
)
40 switch (devinfo
->gen
) {
42 struct gen7_mdapi_metrics
*mdapi_data
= (struct gen7_mdapi_metrics
*) data
;
44 if (data_size
< sizeof(*mdapi_data
))
47 assert(devinfo
->is_haswell
);
49 for (int i
= 0; i
< ARRAY_SIZE(mdapi_data
->ACounters
); i
++)
50 mdapi_data
->ACounters
[i
] = result
->accumulator
[1 + i
];
52 for (int i
= 0; i
< ARRAY_SIZE(mdapi_data
->NOACounters
); i
++) {
53 mdapi_data
->NOACounters
[i
] =
54 result
->accumulator
[1 + ARRAY_SIZE(mdapi_data
->ACounters
) + i
];
57 mdapi_data
->ReportsCount
= result
->reports_accumulated
;
58 mdapi_data
->TotalTime
=
59 gen_device_info_timebase_scale(devinfo
, result
->accumulator
[0]);
60 mdapi_data
->CoreFrequency
= freq_end
;
61 mdapi_data
->CoreFrequencyChanged
= freq_end
!= freq_start
;
62 mdapi_data
->SplitOccured
= result
->query_disjoint
;
63 return sizeof(*mdapi_data
);
66 struct gen8_mdapi_metrics
*mdapi_data
= (struct gen8_mdapi_metrics
*) data
;
68 if (data_size
< sizeof(*mdapi_data
))
71 for (int i
= 0; i
< ARRAY_SIZE(mdapi_data
->OaCntr
); i
++)
72 mdapi_data
->OaCntr
[i
] = result
->accumulator
[2 + i
];
73 for (int i
= 0; i
< ARRAY_SIZE(mdapi_data
->NoaCntr
); i
++) {
74 mdapi_data
->NoaCntr
[i
] =
75 result
->accumulator
[2 + ARRAY_SIZE(mdapi_data
->OaCntr
) + i
];
78 mdapi_data
->ReportId
= result
->hw_id
;
79 mdapi_data
->ReportsCount
= result
->reports_accumulated
;
80 mdapi_data
->TotalTime
=
81 gen_device_info_timebase_scale(devinfo
, result
->accumulator
[0]);
82 mdapi_data
->BeginTimestamp
=
83 gen_device_info_timebase_scale(devinfo
, result
->begin_timestamp
);
84 mdapi_data
->GPUTicks
= result
->accumulator
[1];
85 mdapi_data
->CoreFrequency
= freq_end
;
86 mdapi_data
->CoreFrequencyChanged
= freq_end
!= freq_start
;
87 mdapi_data
->SliceFrequency
=
88 (result
->slice_frequency
[0] + result
->slice_frequency
[1]) / 2ULL;
89 mdapi_data
->UnsliceFrequency
=
90 (result
->unslice_frequency
[0] + result
->unslice_frequency
[1]) / 2ULL;
91 mdapi_data
->SplitOccured
= result
->query_disjoint
;
92 return sizeof(*mdapi_data
);
97 struct gen9_mdapi_metrics
*mdapi_data
= (struct gen9_mdapi_metrics
*) data
;
99 if (data_size
< sizeof(*mdapi_data
))
102 for (int i
= 0; i
< ARRAY_SIZE(mdapi_data
->OaCntr
); i
++)
103 mdapi_data
->OaCntr
[i
] = result
->accumulator
[2 + i
];
104 for (int i
= 0; i
< ARRAY_SIZE(mdapi_data
->NoaCntr
); i
++) {
105 mdapi_data
->NoaCntr
[i
] =
106 result
->accumulator
[2 + ARRAY_SIZE(mdapi_data
->OaCntr
) + i
];
109 mdapi_data
->ReportId
= result
->hw_id
;
110 mdapi_data
->ReportsCount
= result
->reports_accumulated
;
111 mdapi_data
->TotalTime
=
112 gen_device_info_timebase_scale(devinfo
, result
->accumulator
[0]);
113 mdapi_data
->BeginTimestamp
=
114 gen_device_info_timebase_scale(devinfo
, result
->begin_timestamp
);
115 mdapi_data
->GPUTicks
= result
->accumulator
[1];
116 mdapi_data
->CoreFrequency
= freq_end
;
117 mdapi_data
->CoreFrequencyChanged
= freq_end
!= freq_start
;
118 mdapi_data
->SliceFrequency
=
119 (result
->slice_frequency
[0] + result
->slice_frequency
[1]) / 2ULL;
120 mdapi_data
->UnsliceFrequency
=
121 (result
->unslice_frequency
[0] + result
->unslice_frequency
[1]) / 2ULL;
122 mdapi_data
->SplitOccured
= result
->query_disjoint
;
123 return sizeof(*mdapi_data
);
126 unreachable("unexpected gen");
131 gen_perf_register_mdapi_statistic_query(struct gen_perf_config
*perf_cfg
,
132 const struct gen_device_info
*devinfo
)
134 if (!(devinfo
->gen
>= 7 && devinfo
->gen
<= 11))
137 struct gen_perf_query_info
*query
=
138 gen_perf_append_query_info(perf_cfg
, MAX_STAT_COUNTERS
);
140 query
->kind
= GEN_PERF_QUERY_TYPE_PIPELINE
;
141 query
->name
= "Intel_Raw_Pipeline_Statistics_Query";
143 /* The order has to match mdapi_pipeline_metrics. */
144 gen_perf_query_add_basic_stat_reg(query
, IA_VERTICES_COUNT
,
145 "N vertices submitted");
146 gen_perf_query_add_basic_stat_reg(query
, IA_PRIMITIVES_COUNT
,
147 "N primitives submitted");
148 gen_perf_query_add_basic_stat_reg(query
, VS_INVOCATION_COUNT
,
149 "N vertex shader invocations");
150 gen_perf_query_add_basic_stat_reg(query
, GS_INVOCATION_COUNT
,
151 "N geometry shader invocations");
152 gen_perf_query_add_basic_stat_reg(query
, GS_PRIMITIVES_COUNT
,
153 "N geometry shader primitives emitted");
154 gen_perf_query_add_basic_stat_reg(query
, CL_INVOCATION_COUNT
,
155 "N primitives entering clipping");
156 gen_perf_query_add_basic_stat_reg(query
, CL_PRIMITIVES_COUNT
,
157 "N primitives leaving clipping");
158 if (devinfo
->is_haswell
|| devinfo
->gen
== 8) {
159 gen_perf_query_add_stat_reg(query
, PS_INVOCATION_COUNT
, 1, 4,
160 "N fragment shader invocations",
161 "N fragment shader invocations");
163 gen_perf_query_add_basic_stat_reg(query
, PS_INVOCATION_COUNT
,
164 "N fragment shader invocations");
166 gen_perf_query_add_basic_stat_reg(query
, HS_INVOCATION_COUNT
,
167 "N TCS shader invocations");
168 gen_perf_query_add_basic_stat_reg(query
, DS_INVOCATION_COUNT
,
169 "N TES shader invocations");
170 if (devinfo
->gen
>= 7) {
171 gen_perf_query_add_basic_stat_reg(query
, CS_INVOCATION_COUNT
,
172 "N compute shader invocations");
175 if (devinfo
->gen
>= 10) {
176 /* Reuse existing CS invocation register until we can expose this new
179 gen_perf_query_add_basic_stat_reg(query
, CS_INVOCATION_COUNT
,
183 query
->data_size
= sizeof(uint64_t) * query
->n_counters
;
187 fill_mdapi_perf_query_counter(struct gen_perf_query_info
*query
,
189 uint32_t data_offset
,
191 enum gen_perf_counter_data_type data_type
)
193 struct gen_perf_query_counter
*counter
= &query
->counters
[query
->n_counters
];
195 assert(query
->n_counters
<= query
->max_counters
);
197 counter
->name
= name
;
198 counter
->desc
= "Raw counter value";
199 counter
->type
= GEN_PERF_COUNTER_TYPE_RAW
;
200 counter
->data_type
= data_type
;
201 counter
->offset
= data_offset
;
205 assert(counter
->offset
+ gen_perf_query_counter_get_size(counter
) <= query
->data_size
);
208 #define MDAPI_QUERY_ADD_COUNTER(query, struct_name, field_name, type_name) \
209 fill_mdapi_perf_query_counter(query, #field_name, \
210 (uint8_t *) &struct_name.field_name - \
211 (uint8_t *) &struct_name, \
212 sizeof(struct_name.field_name), \
213 GEN_PERF_COUNTER_DATA_TYPE_##type_name)
214 #define MDAPI_QUERY_ADD_ARRAY_COUNTER(ctx, query, struct_name, field_name, idx, type_name) \
215 fill_mdapi_perf_query_counter(query, \
216 ralloc_asprintf(ctx, "%s%i", #field_name, idx), \
217 (uint8_t *) &struct_name.field_name[idx] - \
218 (uint8_t *) &struct_name, \
219 sizeof(struct_name.field_name[0]), \
220 GEN_PERF_COUNTER_DATA_TYPE_##type_name)
223 gen_perf_register_mdapi_oa_query(struct gen_perf_config
*perf
,
224 const struct gen_device_info
*devinfo
)
226 struct gen_perf_query_info
*query
= NULL
;
228 /* MDAPI requires different structures for pretty much every generation
229 * (right now we have definitions for gen 7 to 11).
231 if (!(devinfo
->gen
>= 7 && devinfo
->gen
<= 11))
234 switch (devinfo
->gen
) {
236 query
= gen_perf_append_query_info(perf
, 1 + 45 + 16 + 7);
237 query
->oa_format
= I915_OA_FORMAT_A45_B8_C8
;
239 struct gen7_mdapi_metrics metric_data
;
240 query
->data_size
= sizeof(metric_data
);
242 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, TotalTime
, UINT64
);
243 for (int i
= 0; i
< ARRAY_SIZE(metric_data
.ACounters
); i
++) {
244 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf
->queries
, query
,
245 metric_data
, ACounters
, i
, UINT64
);
247 for (int i
= 0; i
< ARRAY_SIZE(metric_data
.NOACounters
); i
++) {
248 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf
->queries
, query
,
249 metric_data
, NOACounters
, i
, UINT64
);
251 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, PerfCounter1
, UINT64
);
252 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, PerfCounter2
, UINT64
);
253 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, SplitOccured
, BOOL32
);
254 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, CoreFrequencyChanged
, BOOL32
);
255 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, CoreFrequency
, UINT64
);
256 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, ReportId
, UINT32
);
257 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, ReportsCount
, UINT32
);
261 query
= gen_perf_append_query_info(perf
, 2 + 36 + 16 + 16);
262 query
->oa_format
= I915_OA_FORMAT_A32u40_A4u32_B8_C8
;
264 struct gen8_mdapi_metrics metric_data
;
265 query
->data_size
= sizeof(metric_data
);
267 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, TotalTime
, UINT64
);
268 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, GPUTicks
, UINT64
);
269 for (int i
= 0; i
< ARRAY_SIZE(metric_data
.OaCntr
); i
++) {
270 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf
->queries
, query
,
271 metric_data
, OaCntr
, i
, UINT64
);
273 for (int i
= 0; i
< ARRAY_SIZE(metric_data
.NoaCntr
); i
++) {
274 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf
->queries
, query
,
275 metric_data
, NoaCntr
, i
, UINT64
);
277 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, BeginTimestamp
, UINT64
);
278 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, Reserved1
, UINT64
);
279 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, Reserved2
, UINT64
);
280 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, Reserved3
, UINT32
);
281 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, OverrunOccured
, BOOL32
);
282 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, MarkerUser
, UINT64
);
283 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, MarkerDriver
, UINT64
);
284 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, SliceFrequency
, UINT64
);
285 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, UnsliceFrequency
, UINT64
);
286 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, PerfCounter1
, UINT64
);
287 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, PerfCounter2
, UINT64
);
288 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, SplitOccured
, BOOL32
);
289 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, CoreFrequencyChanged
, BOOL32
);
290 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, CoreFrequency
, UINT64
);
291 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, ReportId
, UINT32
);
292 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, ReportsCount
, UINT32
);
298 query
= gen_perf_append_query_info(perf
, 2 + 36 + 16 + 16 + 16 + 2);
299 query
->oa_format
= I915_OA_FORMAT_A32u40_A4u32_B8_C8
;
301 struct gen9_mdapi_metrics metric_data
;
302 query
->data_size
= sizeof(metric_data
);
304 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, TotalTime
, UINT64
);
305 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, GPUTicks
, UINT64
);
306 for (int i
= 0; i
< ARRAY_SIZE(metric_data
.OaCntr
); i
++) {
307 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf
->queries
, query
,
308 metric_data
, OaCntr
, i
, UINT64
);
310 for (int i
= 0; i
< ARRAY_SIZE(metric_data
.NoaCntr
); i
++) {
311 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf
->queries
, query
,
312 metric_data
, NoaCntr
, i
, UINT64
);
314 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, BeginTimestamp
, UINT64
);
315 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, Reserved1
, UINT64
);
316 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, Reserved2
, UINT64
);
317 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, Reserved3
, UINT32
);
318 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, OverrunOccured
, BOOL32
);
319 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, MarkerUser
, UINT64
);
320 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, MarkerDriver
, UINT64
);
321 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, SliceFrequency
, UINT64
);
322 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, UnsliceFrequency
, UINT64
);
323 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, PerfCounter1
, UINT64
);
324 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, PerfCounter2
, UINT64
);
325 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, SplitOccured
, BOOL32
);
326 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, CoreFrequencyChanged
, BOOL32
);
327 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, CoreFrequency
, UINT64
);
328 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, ReportId
, UINT32
);
329 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, ReportsCount
, UINT32
);
330 for (int i
= 0; i
< ARRAY_SIZE(metric_data
.UserCntr
); i
++) {
331 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf
->queries
, query
,
332 metric_data
, UserCntr
, i
, UINT64
);
334 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, UserCntrCfgId
, UINT32
);
335 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, Reserved4
, UINT32
);
339 unreachable("Unsupported gen");
343 query
->kind
= GEN_PERF_QUERY_TYPE_RAW
;
344 query
->name
= "Intel_Raw_Hardware_Counters_Set_0_Query";
345 query
->guid
= GEN_PERF_QUERY_GUID_MDAPI
;
348 /* Accumulation buffer offsets copied from an actual query... */
349 const struct gen_perf_query_info
*copy_query
=
352 query
->gpu_time_offset
= copy_query
->gpu_time_offset
;
353 query
->gpu_clock_offset
= copy_query
->gpu_clock_offset
;
354 query
->a_offset
= copy_query
->a_offset
;
355 query
->b_offset
= copy_query
->b_offset
;
356 query
->c_offset
= copy_query
->c_offset
;