2 * Copyright © 2018 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "gen_perf_mdapi.h"
26 #include "gen_perf_private.h"
27 #include "gen_perf_regs.h"
29 #include "dev/gen_device_info.h"
31 #include <drm-uapi/i915_drm.h>
35 gen_perf_query_result_write_mdapi(void *data
, uint32_t data_size
,
36 const struct gen_device_info
*devinfo
,
37 const struct gen_perf_query_result
*result
,
38 uint64_t freq_start
, uint64_t freq_end
)
40 switch (devinfo
->gen
) {
42 struct gen7_mdapi_metrics
*mdapi_data
= (struct gen7_mdapi_metrics
*) data
;
44 if (data_size
< sizeof(*mdapi_data
))
47 assert(devinfo
->is_haswell
);
49 for (int i
= 0; i
< ARRAY_SIZE(mdapi_data
->ACounters
); i
++)
50 mdapi_data
->ACounters
[i
] = result
->accumulator
[1 + i
];
52 for (int i
= 0; i
< ARRAY_SIZE(mdapi_data
->NOACounters
); i
++) {
53 mdapi_data
->NOACounters
[i
] =
54 result
->accumulator
[1 + ARRAY_SIZE(mdapi_data
->ACounters
) + i
];
57 mdapi_data
->ReportsCount
= result
->reports_accumulated
;
58 mdapi_data
->TotalTime
=
59 gen_device_info_timebase_scale(devinfo
, result
->accumulator
[0]);
60 mdapi_data
->CoreFrequency
= freq_end
;
61 mdapi_data
->CoreFrequencyChanged
= freq_end
!= freq_start
;
62 mdapi_data
->SplitOccured
= result
->query_disjoint
;
63 return sizeof(*mdapi_data
);
66 struct gen8_mdapi_metrics
*mdapi_data
= (struct gen8_mdapi_metrics
*) data
;
68 if (data_size
< sizeof(*mdapi_data
))
71 for (int i
= 0; i
< ARRAY_SIZE(mdapi_data
->OaCntr
); i
++)
72 mdapi_data
->OaCntr
[i
] = result
->accumulator
[2 + i
];
73 for (int i
= 0; i
< ARRAY_SIZE(mdapi_data
->NoaCntr
); i
++) {
74 mdapi_data
->NoaCntr
[i
] =
75 result
->accumulator
[2 + ARRAY_SIZE(mdapi_data
->OaCntr
) + i
];
78 mdapi_data
->ReportId
= result
->hw_id
;
79 mdapi_data
->ReportsCount
= result
->reports_accumulated
;
80 mdapi_data
->TotalTime
=
81 gen_device_info_timebase_scale(devinfo
, result
->accumulator
[0]);
82 mdapi_data
->BeginTimestamp
=
83 gen_device_info_timebase_scale(devinfo
, result
->begin_timestamp
);
84 mdapi_data
->GPUTicks
= result
->accumulator
[1];
85 mdapi_data
->CoreFrequency
= freq_end
;
86 mdapi_data
->CoreFrequencyChanged
= freq_end
!= freq_start
;
87 mdapi_data
->SliceFrequency
=
88 (result
->slice_frequency
[0] + result
->slice_frequency
[1]) / 2ULL;
89 mdapi_data
->UnsliceFrequency
=
90 (result
->unslice_frequency
[0] + result
->unslice_frequency
[1]) / 2ULL;
91 mdapi_data
->SplitOccured
= result
->query_disjoint
;
92 return sizeof(*mdapi_data
);
98 struct gen9_mdapi_metrics
*mdapi_data
= (struct gen9_mdapi_metrics
*) data
;
100 if (data_size
< sizeof(*mdapi_data
))
103 for (int i
= 0; i
< ARRAY_SIZE(mdapi_data
->OaCntr
); i
++)
104 mdapi_data
->OaCntr
[i
] = result
->accumulator
[2 + i
];
105 for (int i
= 0; i
< ARRAY_SIZE(mdapi_data
->NoaCntr
); i
++) {
106 mdapi_data
->NoaCntr
[i
] =
107 result
->accumulator
[2 + ARRAY_SIZE(mdapi_data
->OaCntr
) + i
];
110 mdapi_data
->ReportId
= result
->hw_id
;
111 mdapi_data
->ReportsCount
= result
->reports_accumulated
;
112 mdapi_data
->TotalTime
=
113 gen_device_info_timebase_scale(devinfo
, result
->accumulator
[0]);
114 mdapi_data
->BeginTimestamp
=
115 gen_device_info_timebase_scale(devinfo
, result
->begin_timestamp
);
116 mdapi_data
->GPUTicks
= result
->accumulator
[1];
117 mdapi_data
->CoreFrequency
= freq_end
;
118 mdapi_data
->CoreFrequencyChanged
= freq_end
!= freq_start
;
119 mdapi_data
->SliceFrequency
=
120 (result
->slice_frequency
[0] + result
->slice_frequency
[1]) / 2ULL;
121 mdapi_data
->UnsliceFrequency
=
122 (result
->unslice_frequency
[0] + result
->unslice_frequency
[1]) / 2ULL;
123 mdapi_data
->SplitOccured
= result
->query_disjoint
;
124 return sizeof(*mdapi_data
);
127 unreachable("unexpected gen");
132 gen_perf_register_mdapi_statistic_query(struct gen_perf_config
*perf_cfg
,
133 const struct gen_device_info
*devinfo
)
135 if (!(devinfo
->gen
>= 7 && devinfo
->gen
<= 12))
138 struct gen_perf_query_info
*query
=
139 gen_perf_append_query_info(perf_cfg
, MAX_STAT_COUNTERS
);
141 query
->kind
= GEN_PERF_QUERY_TYPE_PIPELINE
;
142 query
->name
= "Intel_Raw_Pipeline_Statistics_Query";
144 /* The order has to match mdapi_pipeline_metrics. */
145 gen_perf_query_add_basic_stat_reg(query
, IA_VERTICES_COUNT
,
146 "N vertices submitted");
147 gen_perf_query_add_basic_stat_reg(query
, IA_PRIMITIVES_COUNT
,
148 "N primitives submitted");
149 gen_perf_query_add_basic_stat_reg(query
, VS_INVOCATION_COUNT
,
150 "N vertex shader invocations");
151 gen_perf_query_add_basic_stat_reg(query
, GS_INVOCATION_COUNT
,
152 "N geometry shader invocations");
153 gen_perf_query_add_basic_stat_reg(query
, GS_PRIMITIVES_COUNT
,
154 "N geometry shader primitives emitted");
155 gen_perf_query_add_basic_stat_reg(query
, CL_INVOCATION_COUNT
,
156 "N primitives entering clipping");
157 gen_perf_query_add_basic_stat_reg(query
, CL_PRIMITIVES_COUNT
,
158 "N primitives leaving clipping");
159 if (devinfo
->is_haswell
|| devinfo
->gen
== 8) {
160 gen_perf_query_add_stat_reg(query
, PS_INVOCATION_COUNT
, 1, 4,
161 "N fragment shader invocations",
162 "N fragment shader invocations");
164 gen_perf_query_add_basic_stat_reg(query
, PS_INVOCATION_COUNT
,
165 "N fragment shader invocations");
167 gen_perf_query_add_basic_stat_reg(query
, HS_INVOCATION_COUNT
,
168 "N TCS shader invocations");
169 gen_perf_query_add_basic_stat_reg(query
, DS_INVOCATION_COUNT
,
170 "N TES shader invocations");
171 if (devinfo
->gen
>= 7) {
172 gen_perf_query_add_basic_stat_reg(query
, CS_INVOCATION_COUNT
,
173 "N compute shader invocations");
176 if (devinfo
->gen
>= 10) {
177 /* Reuse existing CS invocation register until we can expose this new
180 gen_perf_query_add_basic_stat_reg(query
, CS_INVOCATION_COUNT
,
184 query
->data_size
= sizeof(uint64_t) * query
->n_counters
;
188 fill_mdapi_perf_query_counter(struct gen_perf_query_info
*query
,
190 uint32_t data_offset
,
192 enum gen_perf_counter_data_type data_type
)
194 struct gen_perf_query_counter
*counter
= &query
->counters
[query
->n_counters
];
196 assert(query
->n_counters
<= query
->max_counters
);
198 counter
->name
= name
;
199 counter
->desc
= "Raw counter value";
200 counter
->type
= GEN_PERF_COUNTER_TYPE_RAW
;
201 counter
->data_type
= data_type
;
202 counter
->offset
= data_offset
;
206 assert(counter
->offset
+ gen_perf_query_counter_get_size(counter
) <= query
->data_size
);
209 #define MDAPI_QUERY_ADD_COUNTER(query, struct_name, field_name, type_name) \
210 fill_mdapi_perf_query_counter(query, #field_name, \
211 (uint8_t *) &struct_name.field_name - \
212 (uint8_t *) &struct_name, \
213 sizeof(struct_name.field_name), \
214 GEN_PERF_COUNTER_DATA_TYPE_##type_name)
215 #define MDAPI_QUERY_ADD_ARRAY_COUNTER(ctx, query, struct_name, field_name, idx, type_name) \
216 fill_mdapi_perf_query_counter(query, \
217 ralloc_asprintf(ctx, "%s%i", #field_name, idx), \
218 (uint8_t *) &struct_name.field_name[idx] - \
219 (uint8_t *) &struct_name, \
220 sizeof(struct_name.field_name[0]), \
221 GEN_PERF_COUNTER_DATA_TYPE_##type_name)
224 gen_perf_register_mdapi_oa_query(struct gen_perf_config
*perf
,
225 const struct gen_device_info
*devinfo
)
227 struct gen_perf_query_info
*query
= NULL
;
229 /* MDAPI requires different structures for pretty much every generation
230 * (right now we have definitions for gen 7 to 12).
232 if (!(devinfo
->gen
>= 7 && devinfo
->gen
<= 12))
235 switch (devinfo
->gen
) {
237 query
= gen_perf_append_query_info(perf
, 1 + 45 + 16 + 7);
238 query
->oa_format
= I915_OA_FORMAT_A45_B8_C8
;
240 struct gen7_mdapi_metrics metric_data
;
241 query
->data_size
= sizeof(metric_data
);
243 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, TotalTime
, UINT64
);
244 for (int i
= 0; i
< ARRAY_SIZE(metric_data
.ACounters
); i
++) {
245 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf
->queries
, query
,
246 metric_data
, ACounters
, i
, UINT64
);
248 for (int i
= 0; i
< ARRAY_SIZE(metric_data
.NOACounters
); i
++) {
249 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf
->queries
, query
,
250 metric_data
, NOACounters
, i
, UINT64
);
252 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, PerfCounter1
, UINT64
);
253 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, PerfCounter2
, UINT64
);
254 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, SplitOccured
, BOOL32
);
255 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, CoreFrequencyChanged
, BOOL32
);
256 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, CoreFrequency
, UINT64
);
257 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, ReportId
, UINT32
);
258 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, ReportsCount
, UINT32
);
262 query
= gen_perf_append_query_info(perf
, 2 + 36 + 16 + 16);
263 query
->oa_format
= I915_OA_FORMAT_A32u40_A4u32_B8_C8
;
265 struct gen8_mdapi_metrics metric_data
;
266 query
->data_size
= sizeof(metric_data
);
268 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, TotalTime
, UINT64
);
269 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, GPUTicks
, UINT64
);
270 for (int i
= 0; i
< ARRAY_SIZE(metric_data
.OaCntr
); i
++) {
271 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf
->queries
, query
,
272 metric_data
, OaCntr
, i
, UINT64
);
274 for (int i
= 0; i
< ARRAY_SIZE(metric_data
.NoaCntr
); i
++) {
275 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf
->queries
, query
,
276 metric_data
, NoaCntr
, i
, UINT64
);
278 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, BeginTimestamp
, UINT64
);
279 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, Reserved1
, UINT64
);
280 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, Reserved2
, UINT64
);
281 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, Reserved3
, UINT32
);
282 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, OverrunOccured
, BOOL32
);
283 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, MarkerUser
, UINT64
);
284 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, MarkerDriver
, UINT64
);
285 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, SliceFrequency
, UINT64
);
286 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, UnsliceFrequency
, UINT64
);
287 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, PerfCounter1
, UINT64
);
288 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, PerfCounter2
, UINT64
);
289 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, SplitOccured
, BOOL32
);
290 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, CoreFrequencyChanged
, BOOL32
);
291 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, CoreFrequency
, UINT64
);
292 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, ReportId
, UINT32
);
293 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, ReportsCount
, UINT32
);
300 query
= gen_perf_append_query_info(perf
, 2 + 36 + 16 + 16 + 16 + 2);
301 query
->oa_format
= I915_OA_FORMAT_A32u40_A4u32_B8_C8
;
303 struct gen9_mdapi_metrics metric_data
;
304 query
->data_size
= sizeof(metric_data
);
306 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, TotalTime
, UINT64
);
307 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, GPUTicks
, UINT64
);
308 for (int i
= 0; i
< ARRAY_SIZE(metric_data
.OaCntr
); i
++) {
309 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf
->queries
, query
,
310 metric_data
, OaCntr
, i
, UINT64
);
312 for (int i
= 0; i
< ARRAY_SIZE(metric_data
.NoaCntr
); i
++) {
313 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf
->queries
, query
,
314 metric_data
, NoaCntr
, i
, UINT64
);
316 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, BeginTimestamp
, UINT64
);
317 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, Reserved1
, UINT64
);
318 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, Reserved2
, UINT64
);
319 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, Reserved3
, UINT32
);
320 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, OverrunOccured
, BOOL32
);
321 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, MarkerUser
, UINT64
);
322 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, MarkerDriver
, UINT64
);
323 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, SliceFrequency
, UINT64
);
324 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, UnsliceFrequency
, UINT64
);
325 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, PerfCounter1
, UINT64
);
326 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, PerfCounter2
, UINT64
);
327 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, SplitOccured
, BOOL32
);
328 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, CoreFrequencyChanged
, BOOL32
);
329 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, CoreFrequency
, UINT64
);
330 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, ReportId
, UINT32
);
331 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, ReportsCount
, UINT32
);
332 for (int i
= 0; i
< ARRAY_SIZE(metric_data
.UserCntr
); i
++) {
333 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf
->queries
, query
,
334 metric_data
, UserCntr
, i
, UINT64
);
336 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, UserCntrCfgId
, UINT32
);
337 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, Reserved4
, UINT32
);
341 unreachable("Unsupported gen");
345 query
->kind
= GEN_PERF_QUERY_TYPE_RAW
;
346 query
->name
= "Intel_Raw_Hardware_Counters_Set_0_Query";
347 query
->guid
= GEN_PERF_QUERY_GUID_MDAPI
;
350 /* Accumulation buffer offsets copied from an actual query... */
351 const struct gen_perf_query_info
*copy_query
=
354 query
->gpu_time_offset
= copy_query
->gpu_time_offset
;
355 query
->gpu_clock_offset
= copy_query
->gpu_clock_offset
;
356 query
->a_offset
= copy_query
->a_offset
;
357 query
->b_offset
= copy_query
->b_offset
;
358 query
->c_offset
= copy_query
->c_offset
;