i965/program_cache: Cast the key to char * before adding key_size
[mesa.git] / src / mesa / drivers / dri / i965 / brw_performance_query_mdapi.c
1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "brw_defines.h"
25 #include "brw_performance_query.h"
26
27 #include "perf/gen_perf.h"
28 #include "perf/gen_perf_mdapi.h"
29
30 static void
31 fill_mdapi_perf_query_counter(struct gen_perf_query_info *query,
32 const char *name,
33 uint32_t data_offset,
34 uint32_t data_size,
35 enum gen_perf_counter_data_type data_type)
36 {
37 struct gen_perf_query_counter *counter = &query->counters[query->n_counters];
38
39 assert(query->n_counters <= query->max_counters);
40
41 counter->name = name;
42 counter->desc = "Raw counter value";
43 counter->type = GEN_PERF_COUNTER_TYPE_RAW;
44 counter->data_type = data_type;
45 counter->offset = data_offset;
46
47 query->n_counters++;
48
49 assert(counter->offset + gen_perf_query_counter_get_size(counter) <= query->data_size);
50 }
51
52 #define MDAPI_QUERY_ADD_COUNTER(query, struct_name, field_name, type_name) \
53 fill_mdapi_perf_query_counter(query, #field_name, \
54 (uint8_t *) &struct_name.field_name - \
55 (uint8_t *) &struct_name, \
56 sizeof(struct_name.field_name), \
57 GEN_PERF_COUNTER_DATA_TYPE_##type_name)
58 #define MDAPI_QUERY_ADD_ARRAY_COUNTER(ctx, query, struct_name, field_name, idx, type_name) \
59 fill_mdapi_perf_query_counter(query, \
60 ralloc_asprintf(ctx, "%s%i", #field_name, idx), \
61 (uint8_t *) &struct_name.field_name[idx] - \
62 (uint8_t *) &struct_name, \
63 sizeof(struct_name.field_name[0]), \
64 GEN_PERF_COUNTER_DATA_TYPE_##type_name)
65
66 void
67 brw_perf_query_register_mdapi_oa_query(struct brw_context *brw)
68 {
69 const struct gen_device_info *devinfo = &brw->screen->devinfo;
70 struct gen_perf *perf = brw->perfquery.perf;
71 struct gen_perf_query_info *query = NULL;
72
73 /* MDAPI requires different structures for pretty much every generation
74 * (right now we have definitions for gen 7 to 11).
75 */
76 if (!(devinfo->gen >= 7 && devinfo->gen <= 11))
77 return;
78
79 switch (devinfo->gen) {
80 case 7: {
81 query = gen_perf_query_append_query_info(perf, 1 + 45 + 16 + 7);
82 query->oa_format = I915_OA_FORMAT_A45_B8_C8;
83
84 struct gen7_mdapi_metrics metric_data;
85 query->data_size = sizeof(metric_data);
86
87 MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
88 for (int i = 0; i < ARRAY_SIZE(metric_data.ACounters); i++) {
89 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
90 metric_data, ACounters, i, UINT64);
91 }
92 for (int i = 0; i < ARRAY_SIZE(metric_data.NOACounters); i++) {
93 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
94 metric_data, NOACounters, i, UINT64);
95 }
96 MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
97 MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
98 MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
99 MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
100 MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
101 MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
102 MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
103 break;
104 }
105 case 8: {
106 query = gen_perf_query_append_query_info(perf, 2 + 36 + 16 + 16);
107 query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
108
109 struct gen8_mdapi_metrics metric_data;
110 query->data_size = sizeof(metric_data);
111
112 MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
113 MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64);
114 for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) {
115 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
116 metric_data, OaCntr, i, UINT64);
117 }
118 for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) {
119 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
120 metric_data, NoaCntr, i, UINT64);
121 }
122 MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64);
123 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64);
124 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved2, UINT64);
125 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved3, UINT32);
126 MDAPI_QUERY_ADD_COUNTER(query, metric_data, OverrunOccured, BOOL32);
127 MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerUser, UINT64);
128 MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerDriver, UINT64);
129 MDAPI_QUERY_ADD_COUNTER(query, metric_data, SliceFrequency, UINT64);
130 MDAPI_QUERY_ADD_COUNTER(query, metric_data, UnsliceFrequency, UINT64);
131 MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
132 MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
133 MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
134 MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
135 MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
136 MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
137 MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
138 break;
139 }
140 case 9:
141 case 10:
142 case 11: {
143 query = gen_perf_query_append_query_info(perf, 2 + 36 + 16 + 16 + 16 + 2);
144 query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
145
146 struct gen9_mdapi_metrics metric_data;
147 query->data_size = sizeof(metric_data);
148
149 MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
150 MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64);
151 for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) {
152 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
153 metric_data, OaCntr, i, UINT64);
154 }
155 for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) {
156 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
157 metric_data, NoaCntr, i, UINT64);
158 }
159 MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64);
160 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64);
161 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved2, UINT64);
162 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved3, UINT32);
163 MDAPI_QUERY_ADD_COUNTER(query, metric_data, OverrunOccured, BOOL32);
164 MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerUser, UINT64);
165 MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerDriver, UINT64);
166 MDAPI_QUERY_ADD_COUNTER(query, metric_data, SliceFrequency, UINT64);
167 MDAPI_QUERY_ADD_COUNTER(query, metric_data, UnsliceFrequency, UINT64);
168 MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
169 MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
170 MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
171 MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
172 MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
173 MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
174 MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
175 for (int i = 0; i < ARRAY_SIZE(metric_data.UserCntr); i++) {
176 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
177 metric_data, UserCntr, i, UINT64);
178 }
179 MDAPI_QUERY_ADD_COUNTER(query, metric_data, UserCntrCfgId, UINT32);
180 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved4, UINT32);
181 break;
182 }
183 default:
184 unreachable("Unsupported gen");
185 break;
186 }
187
188 query->kind = GEN_PERF_QUERY_TYPE_RAW;
189 query->name = "Intel_Raw_Hardware_Counters_Set_0_Query";
190 query->guid = GEN_PERF_QUERY_GUID_MDAPI;
191
192 {
193 /* Accumulation buffer offsets copied from an actual query... */
194 const struct gen_perf_query_info *copy_query =
195 &brw->perfquery.perf->queries[0];
196
197 query->gpu_time_offset = copy_query->gpu_time_offset;
198 query->gpu_clock_offset = copy_query->gpu_clock_offset;
199 query->a_offset = copy_query->a_offset;
200 query->b_offset = copy_query->b_offset;
201 query->c_offset = copy_query->c_offset;
202 }
203 }
204
205 void
206 brw_perf_query_register_mdapi_statistic_query(struct brw_context *brw)
207 {
208 const struct gen_device_info *devinfo = &brw->screen->devinfo;
209
210 if (!(devinfo->gen >= 7 && devinfo->gen <= 11))
211 return;
212
213 struct gen_perf_query_info *query =
214 gen_perf_query_append_query_info(brw->perfquery.perf, MAX_STAT_COUNTERS);
215
216 query->kind = GEN_PERF_QUERY_TYPE_PIPELINE;
217 query->name = "Intel_Raw_Pipeline_Statistics_Query";
218
219 /* The order has to match mdapi_pipeline_metrics. */
220 gen_perf_query_info_add_basic_stat_reg(query, IA_VERTICES_COUNT,
221 "N vertices submitted");
222 gen_perf_query_info_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT,
223 "N primitives submitted");
224 gen_perf_query_info_add_basic_stat_reg(query, VS_INVOCATION_COUNT,
225 "N vertex shader invocations");
226 gen_perf_query_info_add_basic_stat_reg(query, GS_INVOCATION_COUNT,
227 "N geometry shader invocations");
228 gen_perf_query_info_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT,
229 "N geometry shader primitives emitted");
230 gen_perf_query_info_add_basic_stat_reg(query, CL_INVOCATION_COUNT,
231 "N primitives entering clipping");
232 gen_perf_query_info_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT,
233 "N primitives leaving clipping");
234 if (devinfo->is_haswell || devinfo->gen == 8) {
235 gen_perf_query_info_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4,
236 "N fragment shader invocations",
237 "N fragment shader invocations");
238 } else {
239 gen_perf_query_info_add_basic_stat_reg(query, PS_INVOCATION_COUNT,
240 "N fragment shader invocations");
241 }
242 gen_perf_query_info_add_basic_stat_reg(query, HS_INVOCATION_COUNT,
243 "N TCS shader invocations");
244 gen_perf_query_info_add_basic_stat_reg(query, DS_INVOCATION_COUNT,
245 "N TES shader invocations");
246 if (devinfo->gen >= 7) {
247 gen_perf_query_info_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
248 "N compute shader invocations");
249 }
250
251 if (devinfo->gen >= 10) {
252 /* Reuse existing CS invocation register until we can expose this new
253 * one.
254 */
255 gen_perf_query_info_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
256 "Reserved1");
257 }
258
259 query->data_size = sizeof(uint64_t) * query->n_counters;
260 }