0676e868b814b2056a38497dcd604c7747986a76
[mesa.git] / src / mesa / drivers / dri / i965 / brw_performance_query_mdapi.c
1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "brw_defines.h"
25 #include "brw_performance_query.h"
26
27 #include "perf/gen_perf.h"
28
29 /**
30 * Data format expected by MDAPI.
31 */
32
33 struct mdapi_gen7_metrics {
34 uint64_t TotalTime;
35
36 uint64_t ACounters[45];
37 uint64_t NOACounters[16];
38
39 uint64_t PerfCounter1;
40 uint64_t PerfCounter2;
41 uint32_t SplitOccured;
42 uint32_t CoreFrequencyChanged;
43 uint64_t CoreFrequency;
44 uint32_t ReportId;
45 uint32_t ReportsCount;
46 };
47
48 #define GTDI_QUERY_BDW_METRICS_OA_COUNT 36
49 #define GTDI_QUERY_BDW_METRICS_OA_40b_COUNT 32
50 #define GTDI_QUERY_BDW_METRICS_NOA_COUNT 16
51 struct mdapi_gen8_metrics {
52 uint64_t TotalTime;
53 uint64_t GPUTicks;
54 uint64_t OaCntr[GTDI_QUERY_BDW_METRICS_OA_COUNT];
55 uint64_t NoaCntr[GTDI_QUERY_BDW_METRICS_NOA_COUNT];
56 uint64_t BeginTimestamp;
57 uint64_t Reserved1;
58 uint64_t Reserved2;
59 uint32_t Reserved3;
60 uint32_t OverrunOccured;
61 uint64_t MarkerUser;
62 uint64_t MarkerDriver;
63
64 uint64_t SliceFrequency;
65 uint64_t UnsliceFrequency;
66 uint64_t PerfCounter1;
67 uint64_t PerfCounter2;
68 uint32_t SplitOccured;
69 uint32_t CoreFrequencyChanged;
70 uint64_t CoreFrequency;
71 uint32_t ReportId;
72 uint32_t ReportsCount;
73 };
74
75 #define GTDI_MAX_READ_REGS 16
76
77 struct mdapi_gen9_metrics {
78 uint64_t TotalTime;
79 uint64_t GPUTicks;
80 uint64_t OaCntr[GTDI_QUERY_BDW_METRICS_OA_COUNT];
81 uint64_t NoaCntr[GTDI_QUERY_BDW_METRICS_NOA_COUNT];
82 uint64_t BeginTimestamp;
83 uint64_t Reserved1;
84 uint64_t Reserved2;
85 uint32_t Reserved3;
86 uint32_t OverrunOccured;
87 uint64_t MarkerUser;
88 uint64_t MarkerDriver;
89
90 uint64_t SliceFrequency;
91 uint64_t UnsliceFrequency;
92 uint64_t PerfCounter1;
93 uint64_t PerfCounter2;
94 uint32_t SplitOccured;
95 uint32_t CoreFrequencyChanged;
96 uint64_t CoreFrequency;
97 uint32_t ReportId;
98 uint32_t ReportsCount;
99
100 uint64_t UserCntr[GTDI_MAX_READ_REGS];
101 uint32_t UserCntrCfgId;
102 uint32_t Reserved4;
103 };
104
105 struct mdapi_pipeline_metrics {
106 uint64_t IAVertices;
107 uint64_t IAPrimitives;
108 uint64_t VSInvocations;
109 uint64_t GSInvocations;
110 uint64_t GSPrimitives;
111 uint64_t CInvocations;
112 uint64_t CPrimitives;
113 uint64_t PSInvocations;
114 uint64_t HSInvocations;
115 uint64_t DSInvocations;
116 uint64_t CSInvocations;
117 };
118
119 int
120 brw_perf_query_get_mdapi_oa_data(struct brw_context *brw,
121 struct brw_perf_query_object *obj,
122 size_t data_size,
123 uint8_t *data)
124 {
125 const struct gen_device_info *devinfo = &brw->screen->devinfo;
126
127 switch (devinfo->gen) {
128 case 7: {
129 struct mdapi_gen7_metrics *mdapi_data = (struct mdapi_gen7_metrics *) data;
130
131 if (data_size < sizeof(*mdapi_data))
132 return 0;
133
134 assert(devinfo->is_haswell);
135
136 for (int i = 0; i < ARRAY_SIZE(mdapi_data->ACounters); i++)
137 mdapi_data->ACounters[i] = obj->oa.accumulator[1 + i];
138
139 for (int i = 0; i < ARRAY_SIZE(mdapi_data->NOACounters); i++) {
140 mdapi_data->NOACounters[i] =
141 obj->oa.accumulator[1 + ARRAY_SIZE(mdapi_data->ACounters) + i];
142 }
143
144 mdapi_data->ReportsCount = obj->oa.reports_accumulated;
145 mdapi_data->TotalTime = brw_timebase_scale(brw, obj->oa.accumulator[0]);
146 mdapi_data->CoreFrequency = obj->oa.gt_frequency[1];
147 mdapi_data->CoreFrequencyChanged = obj->oa.gt_frequency[0] != obj->oa.gt_frequency[1];
148 return sizeof(*mdapi_data);
149 }
150 case 8: {
151 struct mdapi_gen8_metrics *mdapi_data = (struct mdapi_gen8_metrics *) data;
152
153 if (data_size < sizeof(*mdapi_data))
154 return 0;
155
156 for (int i = 0; i < ARRAY_SIZE(mdapi_data->OaCntr); i++)
157 mdapi_data->OaCntr[i] = obj->oa.accumulator[2 + i];
158 for (int i = 0; i < ARRAY_SIZE(mdapi_data->NoaCntr); i++) {
159 mdapi_data->NoaCntr[i] =
160 obj->oa.accumulator[2 + ARRAY_SIZE(mdapi_data->OaCntr) + i];
161 }
162
163 mdapi_data->ReportId = obj->oa.hw_id;
164 mdapi_data->ReportsCount = obj->oa.reports_accumulated;
165 mdapi_data->TotalTime = brw_timebase_scale(brw, obj->oa.accumulator[0]);
166 mdapi_data->GPUTicks = obj->oa.accumulator[1];
167 mdapi_data->CoreFrequency = obj->oa.gt_frequency[1];
168 mdapi_data->CoreFrequencyChanged = obj->oa.gt_frequency[0] != obj->oa.gt_frequency[1];
169 mdapi_data->SliceFrequency = (obj->oa.slice_frequency[0] + obj->oa.slice_frequency[1]) / 2ULL;
170 mdapi_data->UnsliceFrequency = (obj->oa.unslice_frequency[0] + obj->oa.unslice_frequency[1]) / 2ULL;
171
172 return sizeof(*mdapi_data);
173 }
174 case 9:
175 case 10:
176 case 11: {
177 struct mdapi_gen9_metrics *mdapi_data = (struct mdapi_gen9_metrics *) data;
178
179 if (data_size < sizeof(*mdapi_data))
180 return 0;
181
182 for (int i = 0; i < ARRAY_SIZE(mdapi_data->OaCntr); i++)
183 mdapi_data->OaCntr[i] = obj->oa.accumulator[2 + i];
184 for (int i = 0; i < ARRAY_SIZE(mdapi_data->NoaCntr); i++) {
185 mdapi_data->NoaCntr[i] =
186 obj->oa.accumulator[2 + ARRAY_SIZE(mdapi_data->OaCntr) + i];
187 }
188
189 mdapi_data->ReportId = obj->oa.hw_id;
190 mdapi_data->ReportsCount = obj->oa.reports_accumulated;
191 mdapi_data->TotalTime = brw_timebase_scale(brw, obj->oa.accumulator[0]);
192 mdapi_data->GPUTicks = obj->oa.accumulator[1];
193 mdapi_data->CoreFrequency = obj->oa.gt_frequency[1];
194 mdapi_data->CoreFrequencyChanged = obj->oa.gt_frequency[0] != obj->oa.gt_frequency[1];
195 mdapi_data->SliceFrequency = (obj->oa.slice_frequency[0] + obj->oa.slice_frequency[1]) / 2ULL;
196 mdapi_data->UnsliceFrequency = (obj->oa.unslice_frequency[0] + obj->oa.unslice_frequency[1]) / 2ULL;
197
198 return sizeof(*mdapi_data);
199 }
200 default:
201 unreachable("unexpected gen");
202 }
203
204 return 0;
205 }
206
207 static void
208 fill_mdapi_perf_query_counter(struct gen_perf_query_info *query,
209 const char *name,
210 uint32_t data_offset,
211 uint32_t data_size,
212 enum gen_perf_counter_data_type data_type)
213 {
214 struct gen_perf_query_counter *counter = &query->counters[query->n_counters];
215
216 assert(query->n_counters <= query->max_counters);
217
218 counter->name = name;
219 counter->desc = "Raw counter value";
220 counter->type = GEN_PERF_COUNTER_TYPE_RAW;
221 counter->data_type = data_type;
222 counter->offset = data_offset;
223 counter->size = data_size;
224 assert(counter->offset + counter->size <= query->data_size);
225
226 query->n_counters++;
227 }
228
229 #define MDAPI_QUERY_ADD_COUNTER(query, struct_name, field_name, type_name) \
230 fill_mdapi_perf_query_counter(query, #field_name, \
231 (uint8_t *) &struct_name.field_name - \
232 (uint8_t *) &struct_name, \
233 sizeof(struct_name.field_name), \
234 GEN_PERF_COUNTER_DATA_TYPE_##type_name)
235 #define MDAPI_QUERY_ADD_ARRAY_COUNTER(ctx, query, struct_name, field_name, idx, type_name) \
236 fill_mdapi_perf_query_counter(query, \
237 ralloc_asprintf(ctx, "%s%i", #field_name, idx), \
238 (uint8_t *) &struct_name.field_name[idx] - \
239 (uint8_t *) &struct_name, \
240 sizeof(struct_name.field_name[0]), \
241 GEN_PERF_COUNTER_DATA_TYPE_##type_name)
242
243 void
244 brw_perf_query_register_mdapi_oa_query(struct brw_context *brw)
245 {
246 const struct gen_device_info *devinfo = &brw->screen->devinfo;
247 struct gen_perf *perf = brw->perfquery.perf;
248 struct gen_perf_query_info *query = NULL;
249
250 /* MDAPI requires different structures for pretty much every generation
251 * (right now we have definitions for gen 7 to 11).
252 */
253 if (!(devinfo->gen >= 7 && devinfo->gen <= 11))
254 return;
255
256 switch (devinfo->gen) {
257 case 7: {
258 query = gen_perf_query_append_query_info(perf, 1 + 45 + 16 + 7);
259 query->oa_format = I915_OA_FORMAT_A45_B8_C8;
260
261 struct mdapi_gen7_metrics metric_data;
262 query->data_size = sizeof(metric_data);
263
264 MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
265 for (int i = 0; i < ARRAY_SIZE(metric_data.ACounters); i++) {
266 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
267 metric_data, ACounters, i, UINT64);
268 }
269 for (int i = 0; i < ARRAY_SIZE(metric_data.NOACounters); i++) {
270 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
271 metric_data, NOACounters, i, UINT64);
272 }
273 MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
274 MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
275 MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
276 MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
277 MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
278 MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
279 MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
280 break;
281 }
282 case 8: {
283 query = gen_perf_query_append_query_info(perf, 2 + 36 + 16 + 16);
284 query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
285
286 struct mdapi_gen8_metrics metric_data;
287 query->data_size = sizeof(metric_data);
288
289 MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
290 MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64);
291 for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) {
292 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
293 metric_data, OaCntr, i, UINT64);
294 }
295 for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) {
296 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
297 metric_data, NoaCntr, i, UINT64);
298 }
299 MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64);
300 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64);
301 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved2, UINT64);
302 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved3, UINT32);
303 MDAPI_QUERY_ADD_COUNTER(query, metric_data, OverrunOccured, BOOL32);
304 MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerUser, UINT64);
305 MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerDriver, UINT64);
306 MDAPI_QUERY_ADD_COUNTER(query, metric_data, SliceFrequency, UINT64);
307 MDAPI_QUERY_ADD_COUNTER(query, metric_data, UnsliceFrequency, UINT64);
308 MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
309 MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
310 MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
311 MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
312 MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
313 MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
314 MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
315 break;
316 }
317 case 9:
318 case 10:
319 case 11: {
320 query = gen_perf_query_append_query_info(perf, 2 + 36 + 16 + 16 + 16 + 2);
321 query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
322
323 struct mdapi_gen9_metrics metric_data;
324 query->data_size = sizeof(metric_data);
325
326 MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
327 MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64);
328 for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) {
329 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
330 metric_data, OaCntr, i, UINT64);
331 }
332 for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) {
333 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
334 metric_data, NoaCntr, i, UINT64);
335 }
336 MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64);
337 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64);
338 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved2, UINT64);
339 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved3, UINT32);
340 MDAPI_QUERY_ADD_COUNTER(query, metric_data, OverrunOccured, BOOL32);
341 MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerUser, UINT64);
342 MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerDriver, UINT64);
343 MDAPI_QUERY_ADD_COUNTER(query, metric_data, SliceFrequency, UINT64);
344 MDAPI_QUERY_ADD_COUNTER(query, metric_data, UnsliceFrequency, UINT64);
345 MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
346 MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
347 MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
348 MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
349 MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
350 MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
351 MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
352 for (int i = 0; i < ARRAY_SIZE(metric_data.UserCntr); i++) {
353 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
354 metric_data, UserCntr, i, UINT64);
355 }
356 MDAPI_QUERY_ADD_COUNTER(query, metric_data, UserCntrCfgId, UINT32);
357 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved4, UINT32);
358 break;
359 }
360 default:
361 unreachable("Unsupported gen");
362 break;
363 }
364
365 query->kind = GEN_PERF_QUERY_TYPE_RAW;
366 query->name = "Intel_Raw_Hardware_Counters_Set_0_Query";
367 /* Guid has to matches with MDAPI's. */
368 query->guid = "2f01b241-7014-42a7-9eb6-a925cad3daba";
369
370 {
371 /* Accumulation buffer offsets copied from an actual query... */
372 const struct gen_perf_query_info *copy_query =
373 &brw->perfquery.perf->queries[0];
374
375 query->gpu_time_offset = copy_query->gpu_time_offset;
376 query->gpu_clock_offset = copy_query->gpu_clock_offset;
377 query->a_offset = copy_query->a_offset;
378 query->b_offset = copy_query->b_offset;
379 query->c_offset = copy_query->c_offset;
380 }
381 }
382
383 void
384 brw_perf_query_register_mdapi_statistic_query(struct brw_context *brw)
385 {
386 const struct gen_device_info *devinfo = &brw->screen->devinfo;
387
388 if (!(devinfo->gen >= 7 && devinfo->gen <= 9))
389 return;
390
391 struct gen_perf_query_info *query =
392 gen_perf_query_append_query_info(brw->perfquery.perf, MAX_STAT_COUNTERS);
393
394 query->kind = GEN_PERF_QUERY_TYPE_PIPELINE;
395 query->name = "Intel_Raw_Pipeline_Statistics_Query";
396
397 /* The order has to match mdapi_pipeline_metrics. */
398 gen_perf_query_info_add_basic_stat_reg(query, IA_VERTICES_COUNT,
399 "N vertices submitted");
400 gen_perf_query_info_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT,
401 "N primitives submitted");
402 gen_perf_query_info_add_basic_stat_reg(query, VS_INVOCATION_COUNT,
403 "N vertex shader invocations");
404 gen_perf_query_info_add_basic_stat_reg(query, GS_INVOCATION_COUNT,
405 "N geometry shader invocations");
406 gen_perf_query_info_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT,
407 "N geometry shader primitives emitted");
408 gen_perf_query_info_add_basic_stat_reg(query, CL_INVOCATION_COUNT,
409 "N primitives entering clipping");
410 gen_perf_query_info_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT,
411 "N primitives leaving clipping");
412 if (devinfo->is_haswell || devinfo->gen == 8) {
413 gen_perf_query_info_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4,
414 "N fragment shader invocations",
415 "N fragment shader invocations");
416 } else {
417 gen_perf_query_info_add_basic_stat_reg(query, PS_INVOCATION_COUNT,
418 "N fragment shader invocations");
419 }
420 gen_perf_query_info_add_basic_stat_reg(query, HS_INVOCATION_COUNT,
421 "N TCS shader invocations");
422 gen_perf_query_info_add_basic_stat_reg(query, DS_INVOCATION_COUNT,
423 "N TES shader invocations");
424 if (devinfo->gen >= 7) {
425 gen_perf_query_info_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
426 "N compute shader invocations");
427 }
428
429 query->data_size = sizeof(uint64_t) * query->n_counters;
430 }