i965: move mdapi data structure to intel/perf
[mesa.git] / src / mesa / drivers / dri / i965 / brw_performance_query_mdapi.c
1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "brw_defines.h"
25 #include "brw_performance_query.h"
26
27 #include "perf/gen_perf.h"
28 #include "perf/gen_perf_mdapi.h"
29
30 int
31 brw_perf_query_get_mdapi_oa_data(struct brw_context *brw,
32 struct brw_perf_query_object *obj,
33 size_t data_size,
34 uint8_t *data)
35 {
36 const struct gen_device_info *devinfo = &brw->screen->devinfo;
37
38 switch (devinfo->gen) {
39 case 7: {
40 struct gen7_mdapi_metrics *mdapi_data = (struct gen7_mdapi_metrics *) data;
41
42 if (data_size < sizeof(*mdapi_data))
43 return 0;
44
45 assert(devinfo->is_haswell);
46
47 for (int i = 0; i < ARRAY_SIZE(mdapi_data->ACounters); i++)
48 mdapi_data->ACounters[i] = obj->oa.accumulator[1 + i];
49
50 for (int i = 0; i < ARRAY_SIZE(mdapi_data->NOACounters); i++) {
51 mdapi_data->NOACounters[i] =
52 obj->oa.accumulator[1 + ARRAY_SIZE(mdapi_data->ACounters) + i];
53 }
54
55 mdapi_data->ReportsCount = obj->oa.reports_accumulated;
56 mdapi_data->TotalTime = brw_timebase_scale(brw, obj->oa.accumulator[0]);
57 mdapi_data->CoreFrequency = obj->oa.gt_frequency[1];
58 mdapi_data->CoreFrequencyChanged = obj->oa.gt_frequency[0] != obj->oa.gt_frequency[1];
59 return sizeof(*mdapi_data);
60 }
61 case 8: {
62 struct gen8_mdapi_metrics *mdapi_data = (struct gen8_mdapi_metrics *) data;
63
64 if (data_size < sizeof(*mdapi_data))
65 return 0;
66
67 for (int i = 0; i < ARRAY_SIZE(mdapi_data->OaCntr); i++)
68 mdapi_data->OaCntr[i] = obj->oa.accumulator[2 + i];
69 for (int i = 0; i < ARRAY_SIZE(mdapi_data->NoaCntr); i++) {
70 mdapi_data->NoaCntr[i] =
71 obj->oa.accumulator[2 + ARRAY_SIZE(mdapi_data->OaCntr) + i];
72 }
73
74 mdapi_data->ReportId = obj->oa.hw_id;
75 mdapi_data->ReportsCount = obj->oa.reports_accumulated;
76 mdapi_data->TotalTime = brw_timebase_scale(brw, obj->oa.accumulator[0]);
77 mdapi_data->GPUTicks = obj->oa.accumulator[1];
78 mdapi_data->CoreFrequency = obj->oa.gt_frequency[1];
79 mdapi_data->CoreFrequencyChanged = obj->oa.gt_frequency[0] != obj->oa.gt_frequency[1];
80 mdapi_data->SliceFrequency = (obj->oa.slice_frequency[0] + obj->oa.slice_frequency[1]) / 2ULL;
81 mdapi_data->UnsliceFrequency = (obj->oa.unslice_frequency[0] + obj->oa.unslice_frequency[1]) / 2ULL;
82
83 return sizeof(*mdapi_data);
84 }
85 case 9:
86 case 10:
87 case 11: {
88 struct gen9_mdapi_metrics *mdapi_data = (struct gen9_mdapi_metrics *) data;
89
90 if (data_size < sizeof(*mdapi_data))
91 return 0;
92
93 for (int i = 0; i < ARRAY_SIZE(mdapi_data->OaCntr); i++)
94 mdapi_data->OaCntr[i] = obj->oa.accumulator[2 + i];
95 for (int i = 0; i < ARRAY_SIZE(mdapi_data->NoaCntr); i++) {
96 mdapi_data->NoaCntr[i] =
97 obj->oa.accumulator[2 + ARRAY_SIZE(mdapi_data->OaCntr) + i];
98 }
99
100 mdapi_data->ReportId = obj->oa.hw_id;
101 mdapi_data->ReportsCount = obj->oa.reports_accumulated;
102 mdapi_data->TotalTime = brw_timebase_scale(brw, obj->oa.accumulator[0]);
103 mdapi_data->GPUTicks = obj->oa.accumulator[1];
104 mdapi_data->CoreFrequency = obj->oa.gt_frequency[1];
105 mdapi_data->CoreFrequencyChanged = obj->oa.gt_frequency[0] != obj->oa.gt_frequency[1];
106 mdapi_data->SliceFrequency = (obj->oa.slice_frequency[0] + obj->oa.slice_frequency[1]) / 2ULL;
107 mdapi_data->UnsliceFrequency = (obj->oa.unslice_frequency[0] + obj->oa.unslice_frequency[1]) / 2ULL;
108
109 return sizeof(*mdapi_data);
110 }
111 default:
112 unreachable("unexpected gen");
113 }
114
115 return 0;
116 }
117
118 static void
119 fill_mdapi_perf_query_counter(struct gen_perf_query_info *query,
120 const char *name,
121 uint32_t data_offset,
122 uint32_t data_size,
123 enum gen_perf_counter_data_type data_type)
124 {
125 struct gen_perf_query_counter *counter = &query->counters[query->n_counters];
126
127 assert(query->n_counters <= query->max_counters);
128
129 counter->name = name;
130 counter->desc = "Raw counter value";
131 counter->type = GEN_PERF_COUNTER_TYPE_RAW;
132 counter->data_type = data_type;
133 counter->offset = data_offset;
134 counter->size = data_size;
135 assert(counter->offset + counter->size <= query->data_size);
136
137 query->n_counters++;
138 }
139
140 #define MDAPI_QUERY_ADD_COUNTER(query, struct_name, field_name, type_name) \
141 fill_mdapi_perf_query_counter(query, #field_name, \
142 (uint8_t *) &struct_name.field_name - \
143 (uint8_t *) &struct_name, \
144 sizeof(struct_name.field_name), \
145 GEN_PERF_COUNTER_DATA_TYPE_##type_name)
146 #define MDAPI_QUERY_ADD_ARRAY_COUNTER(ctx, query, struct_name, field_name, idx, type_name) \
147 fill_mdapi_perf_query_counter(query, \
148 ralloc_asprintf(ctx, "%s%i", #field_name, idx), \
149 (uint8_t *) &struct_name.field_name[idx] - \
150 (uint8_t *) &struct_name, \
151 sizeof(struct_name.field_name[0]), \
152 GEN_PERF_COUNTER_DATA_TYPE_##type_name)
153
154 void
155 brw_perf_query_register_mdapi_oa_query(struct brw_context *brw)
156 {
157 const struct gen_device_info *devinfo = &brw->screen->devinfo;
158 struct gen_perf *perf = brw->perfquery.perf;
159 struct gen_perf_query_info *query = NULL;
160
161 /* MDAPI requires different structures for pretty much every generation
162 * (right now we have definitions for gen 7 to 11).
163 */
164 if (!(devinfo->gen >= 7 && devinfo->gen <= 11))
165 return;
166
167 switch (devinfo->gen) {
168 case 7: {
169 query = gen_perf_query_append_query_info(perf, 1 + 45 + 16 + 7);
170 query->oa_format = I915_OA_FORMAT_A45_B8_C8;
171
172 struct gen7_mdapi_metrics metric_data;
173 query->data_size = sizeof(metric_data);
174
175 MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
176 for (int i = 0; i < ARRAY_SIZE(metric_data.ACounters); i++) {
177 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
178 metric_data, ACounters, i, UINT64);
179 }
180 for (int i = 0; i < ARRAY_SIZE(metric_data.NOACounters); i++) {
181 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
182 metric_data, NOACounters, i, UINT64);
183 }
184 MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
185 MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
186 MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
187 MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
188 MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
189 MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
190 MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
191 break;
192 }
193 case 8: {
194 query = gen_perf_query_append_query_info(perf, 2 + 36 + 16 + 16);
195 query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
196
197 struct gen8_mdapi_metrics metric_data;
198 query->data_size = sizeof(metric_data);
199
200 MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
201 MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64);
202 for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) {
203 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
204 metric_data, OaCntr, i, UINT64);
205 }
206 for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) {
207 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
208 metric_data, NoaCntr, i, UINT64);
209 }
210 MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64);
211 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64);
212 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved2, UINT64);
213 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved3, UINT32);
214 MDAPI_QUERY_ADD_COUNTER(query, metric_data, OverrunOccured, BOOL32);
215 MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerUser, UINT64);
216 MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerDriver, UINT64);
217 MDAPI_QUERY_ADD_COUNTER(query, metric_data, SliceFrequency, UINT64);
218 MDAPI_QUERY_ADD_COUNTER(query, metric_data, UnsliceFrequency, UINT64);
219 MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
220 MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
221 MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
222 MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
223 MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
224 MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
225 MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
226 break;
227 }
228 case 9:
229 case 10:
230 case 11: {
231 query = gen_perf_query_append_query_info(perf, 2 + 36 + 16 + 16 + 16 + 2);
232 query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
233
234 struct gen9_mdapi_metrics metric_data;
235 query->data_size = sizeof(metric_data);
236
237 MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
238 MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64);
239 for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) {
240 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
241 metric_data, OaCntr, i, UINT64);
242 }
243 for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) {
244 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
245 metric_data, NoaCntr, i, UINT64);
246 }
247 MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64);
248 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64);
249 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved2, UINT64);
250 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved3, UINT32);
251 MDAPI_QUERY_ADD_COUNTER(query, metric_data, OverrunOccured, BOOL32);
252 MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerUser, UINT64);
253 MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerDriver, UINT64);
254 MDAPI_QUERY_ADD_COUNTER(query, metric_data, SliceFrequency, UINT64);
255 MDAPI_QUERY_ADD_COUNTER(query, metric_data, UnsliceFrequency, UINT64);
256 MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
257 MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
258 MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
259 MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
260 MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
261 MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
262 MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
263 for (int i = 0; i < ARRAY_SIZE(metric_data.UserCntr); i++) {
264 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
265 metric_data, UserCntr, i, UINT64);
266 }
267 MDAPI_QUERY_ADD_COUNTER(query, metric_data, UserCntrCfgId, UINT32);
268 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved4, UINT32);
269 break;
270 }
271 default:
272 unreachable("Unsupported gen");
273 break;
274 }
275
276 query->kind = GEN_PERF_QUERY_TYPE_RAW;
277 query->name = "Intel_Raw_Hardware_Counters_Set_0_Query";
278 /* Guid has to matches with MDAPI's. */
279 query->guid = "2f01b241-7014-42a7-9eb6-a925cad3daba";
280
281 {
282 /* Accumulation buffer offsets copied from an actual query... */
283 const struct gen_perf_query_info *copy_query =
284 &brw->perfquery.perf->queries[0];
285
286 query->gpu_time_offset = copy_query->gpu_time_offset;
287 query->gpu_clock_offset = copy_query->gpu_clock_offset;
288 query->a_offset = copy_query->a_offset;
289 query->b_offset = copy_query->b_offset;
290 query->c_offset = copy_query->c_offset;
291 }
292 }
293
294 void
295 brw_perf_query_register_mdapi_statistic_query(struct brw_context *brw)
296 {
297 const struct gen_device_info *devinfo = &brw->screen->devinfo;
298
299 if (!(devinfo->gen >= 7 && devinfo->gen <= 9))
300 return;
301
302 struct gen_perf_query_info *query =
303 gen_perf_query_append_query_info(brw->perfquery.perf, MAX_STAT_COUNTERS);
304
305 query->kind = GEN_PERF_QUERY_TYPE_PIPELINE;
306 query->name = "Intel_Raw_Pipeline_Statistics_Query";
307
308 /* The order has to match mdapi_pipeline_metrics. */
309 gen_perf_query_info_add_basic_stat_reg(query, IA_VERTICES_COUNT,
310 "N vertices submitted");
311 gen_perf_query_info_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT,
312 "N primitives submitted");
313 gen_perf_query_info_add_basic_stat_reg(query, VS_INVOCATION_COUNT,
314 "N vertex shader invocations");
315 gen_perf_query_info_add_basic_stat_reg(query, GS_INVOCATION_COUNT,
316 "N geometry shader invocations");
317 gen_perf_query_info_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT,
318 "N geometry shader primitives emitted");
319 gen_perf_query_info_add_basic_stat_reg(query, CL_INVOCATION_COUNT,
320 "N primitives entering clipping");
321 gen_perf_query_info_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT,
322 "N primitives leaving clipping");
323 if (devinfo->is_haswell || devinfo->gen == 8) {
324 gen_perf_query_info_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4,
325 "N fragment shader invocations",
326 "N fragment shader invocations");
327 } else {
328 gen_perf_query_info_add_basic_stat_reg(query, PS_INVOCATION_COUNT,
329 "N fragment shader invocations");
330 }
331 gen_perf_query_info_add_basic_stat_reg(query, HS_INVOCATION_COUNT,
332 "N TCS shader invocations");
333 gen_perf_query_info_add_basic_stat_reg(query, DS_INVOCATION_COUNT,
334 "N TES shader invocations");
335 if (devinfo->gen >= 7) {
336 gen_perf_query_info_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
337 "N compute shader invocations");
338 }
339
340 query->data_size = sizeof(uint64_t) * query->n_counters;
341 }