i965: Enabled the OES_copy_image extension on Gen 7 GPUs
[mesa.git] / src / mesa / drivers / dri / i965 / brw_performance_query_mdapi.c
1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "brw_defines.h"
25 #include "brw_performance_query.h"
26
27 /**
28 * Data format expected by MDAPI.
29 */
30
31 struct mdapi_gen7_metrics {
32 uint64_t TotalTime;
33
34 uint64_t ACounters[45];
35 uint64_t NOACounters[16];
36
37 uint64_t PerfCounter1;
38 uint64_t PerfCounter2;
39 uint32_t SplitOccured;
40 uint32_t CoreFrequencyChanged;
41 uint64_t CoreFrequency;
42 uint32_t ReportId;
43 uint32_t ReportsCount;
44 };
45
46 #define GTDI_QUERY_BDW_METRICS_OA_COUNT 36
47 #define GTDI_QUERY_BDW_METRICS_OA_40b_COUNT 32
48 #define GTDI_QUERY_BDW_METRICS_NOA_COUNT 16
49 struct mdapi_gen8_metrics {
50 uint64_t TotalTime;
51 uint64_t GPUTicks;
52 uint64_t OaCntr[GTDI_QUERY_BDW_METRICS_OA_COUNT];
53 uint64_t NoaCntr[GTDI_QUERY_BDW_METRICS_NOA_COUNT];
54 uint64_t BeginTimestamp;
55 uint64_t Reserved1;
56 uint64_t Reserved2;
57 uint32_t Reserved3;
58 uint32_t OverrunOccured;
59 uint64_t MarkerUser;
60 uint64_t MarkerDriver;
61
62 uint64_t SliceFrequency;
63 uint64_t UnsliceFrequency;
64 uint64_t PerfCounter1;
65 uint64_t PerfCounter2;
66 uint32_t SplitOccured;
67 uint32_t CoreFrequencyChanged;
68 uint64_t CoreFrequency;
69 uint32_t ReportId;
70 uint32_t ReportsCount;
71 };
72
73 #define GTDI_MAX_READ_REGS 16
74
75 struct mdapi_gen9_metrics {
76 uint64_t TotalTime;
77 uint64_t GPUTicks;
78 uint64_t OaCntr[GTDI_QUERY_BDW_METRICS_OA_COUNT];
79 uint64_t NoaCntr[GTDI_QUERY_BDW_METRICS_NOA_COUNT];
80 uint64_t BeginTimestamp;
81 uint64_t Reserved1;
82 uint64_t Reserved2;
83 uint32_t Reserved3;
84 uint32_t OverrunOccured;
85 uint64_t MarkerUser;
86 uint64_t MarkerDriver;
87
88 uint64_t SliceFrequency;
89 uint64_t UnsliceFrequency;
90 uint64_t PerfCounter1;
91 uint64_t PerfCounter2;
92 uint32_t SplitOccured;
93 uint32_t CoreFrequencyChanged;
94 uint64_t CoreFrequency;
95 uint32_t ReportId;
96 uint32_t ReportsCount;
97
98 uint64_t UserCntr[GTDI_MAX_READ_REGS];
99 uint32_t UserCntrCfgId;
100 uint32_t Reserved4;
101 };
102
103 struct mdapi_pipeline_metrics {
104 uint64_t IAVertices;
105 uint64_t IAPrimitives;
106 uint64_t VSInvocations;
107 uint64_t GSInvocations;
108 uint64_t GSPrimitives;
109 uint64_t CInvocations;
110 uint64_t CPrimitives;
111 uint64_t PSInvocations;
112 uint64_t HSInvocations;
113 uint64_t DSInvocations;
114 uint64_t CSInvocations;
115 };
116
117 int
118 brw_perf_query_get_mdapi_oa_data(struct brw_context *brw,
119 struct brw_perf_query_object *obj,
120 size_t data_size,
121 uint8_t *data)
122 {
123 const struct gen_device_info *devinfo = &brw->screen->devinfo;
124
125 switch (devinfo->gen) {
126 case 7: {
127 struct mdapi_gen7_metrics *mdapi_data = (struct mdapi_gen7_metrics *) data;
128
129 if (data_size < sizeof(*mdapi_data))
130 return 0;
131
132 assert(devinfo->is_haswell);
133
134 for (int i = 0; i < ARRAY_SIZE(mdapi_data->ACounters); i++)
135 mdapi_data->ACounters[i] = obj->oa.accumulator[1 + i];
136
137 for (int i = 0; i < ARRAY_SIZE(mdapi_data->NOACounters); i++) {
138 mdapi_data->NOACounters[i] =
139 obj->oa.accumulator[1 + ARRAY_SIZE(mdapi_data->ACounters) + i];
140 }
141
142 mdapi_data->ReportsCount = obj->oa.reports_accumulated;
143 mdapi_data->TotalTime = brw_timebase_scale(brw, obj->oa.accumulator[0]);
144 mdapi_data->CoreFrequency = obj->oa.gt_frequency[1];
145 mdapi_data->CoreFrequencyChanged = obj->oa.gt_frequency[0] != obj->oa.gt_frequency[1];
146 return sizeof(*mdapi_data);
147 }
148 case 8: {
149 struct mdapi_gen8_metrics *mdapi_data = (struct mdapi_gen8_metrics *) data;
150
151 if (data_size < sizeof(*mdapi_data))
152 return 0;
153
154 for (int i = 0; i < ARRAY_SIZE(mdapi_data->OaCntr); i++)
155 mdapi_data->OaCntr[i] = obj->oa.accumulator[2 + i];
156 for (int i = 0; i < ARRAY_SIZE(mdapi_data->NoaCntr); i++) {
157 mdapi_data->NoaCntr[i] =
158 obj->oa.accumulator[2 + ARRAY_SIZE(mdapi_data->OaCntr) + i];
159 }
160
161 mdapi_data->ReportId = obj->oa.hw_id;
162 mdapi_data->ReportsCount = obj->oa.reports_accumulated;
163 mdapi_data->TotalTime = brw_timebase_scale(brw, obj->oa.accumulator[0]);
164 mdapi_data->GPUTicks = obj->oa.accumulator[1];
165 mdapi_data->CoreFrequency = obj->oa.gt_frequency[1];
166 mdapi_data->CoreFrequencyChanged = obj->oa.gt_frequency[0] != obj->oa.gt_frequency[1];
167 mdapi_data->SliceFrequency = (obj->oa.slice_frequency[0] + obj->oa.slice_frequency[1]) / 2ULL;
168 mdapi_data->UnsliceFrequency = (obj->oa.unslice_frequency[0] + obj->oa.unslice_frequency[1]) / 2ULL;
169
170 return sizeof(*mdapi_data);
171 }
172 case 9:
173 case 10:
174 case 11: {
175 struct mdapi_gen9_metrics *mdapi_data = (struct mdapi_gen9_metrics *) data;
176
177 if (data_size < sizeof(*mdapi_data))
178 return 0;
179
180 for (int i = 0; i < ARRAY_SIZE(mdapi_data->OaCntr); i++)
181 mdapi_data->OaCntr[i] = obj->oa.accumulator[2 + i];
182 for (int i = 0; i < ARRAY_SIZE(mdapi_data->NoaCntr); i++) {
183 mdapi_data->NoaCntr[i] =
184 obj->oa.accumulator[2 + ARRAY_SIZE(mdapi_data->OaCntr) + i];
185 }
186
187 mdapi_data->ReportId = obj->oa.hw_id;
188 mdapi_data->ReportsCount = obj->oa.reports_accumulated;
189 mdapi_data->TotalTime = brw_timebase_scale(brw, obj->oa.accumulator[0]);
190 mdapi_data->GPUTicks = obj->oa.accumulator[1];
191 mdapi_data->CoreFrequency = obj->oa.gt_frequency[1];
192 mdapi_data->CoreFrequencyChanged = obj->oa.gt_frequency[0] != obj->oa.gt_frequency[1];
193 mdapi_data->SliceFrequency = (obj->oa.slice_frequency[0] + obj->oa.slice_frequency[1]) / 2ULL;
194 mdapi_data->UnsliceFrequency = (obj->oa.unslice_frequency[0] + obj->oa.unslice_frequency[1]) / 2ULL;
195
196 return sizeof(*mdapi_data);
197 }
198 default:
199 unreachable("unexpected gen");
200 }
201
202 return 0;
203 }
204
205 static void
206 fill_mdapi_perf_query_counter(struct brw_perf_query_info *query,
207 const char *name,
208 uint32_t data_offset,
209 uint32_t data_size,
210 GLenum data_type)
211 {
212 struct brw_perf_query_counter *counter = &query->counters[query->n_counters];
213
214 counter->name = name;
215 counter->desc = "Raw counter value";
216 counter->data_type = data_type;
217 counter->offset = data_offset;
218 counter->size = data_size;
219 assert(counter->offset + counter->size <= query->data_size);
220
221 query->n_counters++;
222 }
223
224 #define MDAPI_QUERY_ADD_COUNTER(query, struct_name, field_name, type_name) \
225 fill_mdapi_perf_query_counter(query, #field_name, \
226 (uint8_t *) &struct_name.field_name - \
227 (uint8_t *) &struct_name, \
228 sizeof(struct_name.field_name), \
229 GL_PERFQUERY_COUNTER_DATA_##type_name##_INTEL)
230 #define MDAPI_QUERY_ADD_ARRAY_COUNTER(ctx, query, struct_name, field_name, idx, type_name) \
231 fill_mdapi_perf_query_counter(query, \
232 ralloc_asprintf(ctx, "%s%i", #field_name, idx), \
233 (uint8_t *) &struct_name.field_name[idx] - \
234 (uint8_t *) &struct_name, \
235 sizeof(struct_name.field_name[0]), \
236 GL_PERFQUERY_COUNTER_DATA_##type_name##_INTEL)
237
238 void
239 brw_perf_query_register_mdapi_oa_query(struct brw_context *brw)
240 {
241 const struct gen_device_info *devinfo = &brw->screen->devinfo;
242
243 /* MDAPI requires different structures for pretty much every generation
244 * (right now we have definitions for gen 7 to 11).
245 */
246 if (!(devinfo->gen >= 7 && devinfo->gen <= 11))
247 return;
248
249 struct brw_perf_query_info *query = brw_perf_query_append_query_info(brw);
250
251 query->kind = OA_COUNTERS_RAW;
252 query->name = "Intel_Raw_Hardware_Counters_Set_0_Query";
253 /* Guid has to matches with MDAPI's. */
254 query->guid = "2f01b241-7014-42a7-9eb6-a925cad3daba";
255 query->n_counters = 0;
256 query->oa_metrics_set_id = 0; /* Set by MDAPI */
257
258 int n_counters;
259 switch (devinfo->gen) {
260 case 7: {
261 query->oa_format = I915_OA_FORMAT_A45_B8_C8;
262
263 struct mdapi_gen7_metrics metric_data;
264 query->data_size = sizeof(metric_data);
265
266 n_counters = 1 + 45 + 16 + 7;
267 query->counters =
268 rzalloc_array_size(brw->perfquery.queries,
269 sizeof(*query->counters), n_counters);
270
271 MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
272 for (int i = 0; i < ARRAY_SIZE(metric_data.ACounters); i++) {
273 MDAPI_QUERY_ADD_ARRAY_COUNTER(brw->perfquery.queries,
274 query, metric_data, ACounters, i, UINT64);
275 }
276 for (int i = 0; i < ARRAY_SIZE(metric_data.NOACounters); i++) {
277 MDAPI_QUERY_ADD_ARRAY_COUNTER(brw->perfquery.queries,
278 query, metric_data, NOACounters, i, UINT64);
279 }
280 MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
281 MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
282 MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
283 MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
284 MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
285 MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
286 MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
287 break;
288 }
289 case 8: {
290 query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
291
292 struct mdapi_gen8_metrics metric_data;
293 query->data_size = sizeof(metric_data);
294
295 n_counters = 2 + 36 + 16 + 16;
296 query->counters =
297 rzalloc_array_size(brw->perfquery.queries,
298 sizeof(*query->counters), n_counters);
299
300 MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
301 MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64);
302 for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) {
303 MDAPI_QUERY_ADD_ARRAY_COUNTER(brw->perfquery.queries,
304 query, metric_data, OaCntr, i, UINT64);
305 }
306 for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) {
307 MDAPI_QUERY_ADD_ARRAY_COUNTER(brw->perfquery.queries,
308 query, metric_data, NoaCntr, i, UINT64);
309 }
310 MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64);
311 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64);
312 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved2, UINT64);
313 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved3, UINT32);
314 MDAPI_QUERY_ADD_COUNTER(query, metric_data, OverrunOccured, BOOL32);
315 MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerUser, UINT64);
316 MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerDriver, UINT64);
317 MDAPI_QUERY_ADD_COUNTER(query, metric_data, SliceFrequency, UINT64);
318 MDAPI_QUERY_ADD_COUNTER(query, metric_data, UnsliceFrequency, UINT64);
319 MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
320 MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
321 MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
322 MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
323 MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
324 MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
325 MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
326 break;
327 }
328 case 9:
329 case 10:
330 case 11: {
331 query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
332
333 struct mdapi_gen9_metrics metric_data;
334 query->data_size = sizeof(metric_data);
335
336 n_counters = 2 + 36 + 16 + 16 + 16 + 2;
337 query->counters =
338 rzalloc_array_size(brw->perfquery.queries,
339 sizeof(*query->counters), n_counters);
340
341 MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
342 MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64);
343 for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) {
344 MDAPI_QUERY_ADD_ARRAY_COUNTER(brw->perfquery.queries,
345 query, metric_data, OaCntr, i, UINT64);
346 }
347 for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) {
348 MDAPI_QUERY_ADD_ARRAY_COUNTER(brw->perfquery.queries,
349 query, metric_data, NoaCntr, i, UINT64);
350 }
351 MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64);
352 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64);
353 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved2, UINT64);
354 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved3, UINT32);
355 MDAPI_QUERY_ADD_COUNTER(query, metric_data, OverrunOccured, BOOL32);
356 MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerUser, UINT64);
357 MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerDriver, UINT64);
358 MDAPI_QUERY_ADD_COUNTER(query, metric_data, SliceFrequency, UINT64);
359 MDAPI_QUERY_ADD_COUNTER(query, metric_data, UnsliceFrequency, UINT64);
360 MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
361 MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
362 MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
363 MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
364 MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
365 MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
366 MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
367 for (int i = 0; i < ARRAY_SIZE(metric_data.UserCntr); i++) {
368 MDAPI_QUERY_ADD_ARRAY_COUNTER(brw->perfquery.queries,
369 query, metric_data, UserCntr, i, UINT64);
370 }
371 MDAPI_QUERY_ADD_COUNTER(query, metric_data, UserCntrCfgId, UINT32);
372 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved4, UINT32);
373 break;
374 }
375 default:
376 unreachable("Unsupported gen");
377 break;
378 }
379
380 assert(query->n_counters <= n_counters);
381
382 {
383 /* Accumulation buffer offsets copied from an actual query... */
384 const struct brw_perf_query_info *copy_query =
385 &brw->perfquery.queries[0];
386
387 query->gpu_time_offset = copy_query->gpu_time_offset;
388 query->gpu_clock_offset = copy_query->gpu_clock_offset;
389 query->a_offset = copy_query->a_offset;
390 query->b_offset = copy_query->b_offset;
391 query->c_offset = copy_query->c_offset;
392 }
393 }
394
395 void
396 brw_perf_query_register_mdapi_statistic_query(struct brw_context *brw)
397 {
398 const struct gen_device_info *devinfo = &brw->screen->devinfo;
399
400 if (!(devinfo->gen >= 7 && devinfo->gen <= 9))
401 return;
402
403 struct brw_perf_query_info *query = brw_perf_query_append_query_info(brw);
404
405 query->kind = PIPELINE_STATS;
406 query->name = "Intel_Raw_Pipeline_Statistics_Query";
407 query->n_counters = 0;
408 query->counters =
409 rzalloc_array(brw, struct brw_perf_query_counter, MAX_STAT_COUNTERS);
410
411 /* The order has to match mdapi_pipeline_metrics. */
412 brw_perf_query_info_add_basic_stat_reg(query, IA_VERTICES_COUNT,
413 "N vertices submitted");
414 brw_perf_query_info_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT,
415 "N primitives submitted");
416 brw_perf_query_info_add_basic_stat_reg(query, VS_INVOCATION_COUNT,
417 "N vertex shader invocations");
418 brw_perf_query_info_add_basic_stat_reg(query, GS_INVOCATION_COUNT,
419 "N geometry shader invocations");
420 brw_perf_query_info_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT,
421 "N geometry shader primitives emitted");
422 brw_perf_query_info_add_basic_stat_reg(query, CL_INVOCATION_COUNT,
423 "N primitives entering clipping");
424 brw_perf_query_info_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT,
425 "N primitives leaving clipping");
426 if (devinfo->is_haswell || devinfo->gen == 8) {
427 brw_perf_query_info_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4,
428 "N fragment shader invocations",
429 "N fragment shader invocations");
430 } else {
431 brw_perf_query_info_add_basic_stat_reg(query, PS_INVOCATION_COUNT,
432 "N fragment shader invocations");
433 }
434 brw_perf_query_info_add_basic_stat_reg(query, HS_INVOCATION_COUNT,
435 "N TCS shader invocations");
436 brw_perf_query_info_add_basic_stat_reg(query, DS_INVOCATION_COUNT,
437 "N TES shader invocations");
438 if (devinfo->gen >= 7) {
439 brw_perf_query_info_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
440 "N compute shader invocations");
441 }
442
443 query->data_size = sizeof(uint64_t) * query->n_counters;
444 }