2 * Copyright © 2018 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "brw_defines.h"
25 #include "brw_performance_query.h"
27 #include "perf/gen_perf.h"
28 #include "perf/gen_perf_mdapi.h"
31 fill_mdapi_perf_query_counter(struct gen_perf_query_info
*query
,
35 enum gen_perf_counter_data_type data_type
)
37 struct gen_perf_query_counter
*counter
= &query
->counters
[query
->n_counters
];
39 assert(query
->n_counters
<= query
->max_counters
);
42 counter
->desc
= "Raw counter value";
43 counter
->type
= GEN_PERF_COUNTER_TYPE_RAW
;
44 counter
->data_type
= data_type
;
45 counter
->offset
= data_offset
;
46 counter
->size
= data_size
;
47 assert(counter
->offset
+ counter
->size
<= query
->data_size
);
52 #define MDAPI_QUERY_ADD_COUNTER(query, struct_name, field_name, type_name) \
53 fill_mdapi_perf_query_counter(query, #field_name, \
54 (uint8_t *) &struct_name.field_name - \
55 (uint8_t *) &struct_name, \
56 sizeof(struct_name.field_name), \
57 GEN_PERF_COUNTER_DATA_TYPE_##type_name)
58 #define MDAPI_QUERY_ADD_ARRAY_COUNTER(ctx, query, struct_name, field_name, idx, type_name) \
59 fill_mdapi_perf_query_counter(query, \
60 ralloc_asprintf(ctx, "%s%i", #field_name, idx), \
61 (uint8_t *) &struct_name.field_name[idx] - \
62 (uint8_t *) &struct_name, \
63 sizeof(struct_name.field_name[0]), \
64 GEN_PERF_COUNTER_DATA_TYPE_##type_name)
67 brw_perf_query_register_mdapi_oa_query(struct brw_context
*brw
)
69 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
70 struct gen_perf
*perf
= brw
->perfquery
.perf
;
71 struct gen_perf_query_info
*query
= NULL
;
73 /* MDAPI requires different structures for pretty much every generation
74 * (right now we have definitions for gen 7 to 11).
76 if (!(devinfo
->gen
>= 7 && devinfo
->gen
<= 11))
79 switch (devinfo
->gen
) {
81 query
= gen_perf_query_append_query_info(perf
, 1 + 45 + 16 + 7);
82 query
->oa_format
= I915_OA_FORMAT_A45_B8_C8
;
84 struct gen7_mdapi_metrics metric_data
;
85 query
->data_size
= sizeof(metric_data
);
87 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, TotalTime
, UINT64
);
88 for (int i
= 0; i
< ARRAY_SIZE(metric_data
.ACounters
); i
++) {
89 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf
->queries
, query
,
90 metric_data
, ACounters
, i
, UINT64
);
92 for (int i
= 0; i
< ARRAY_SIZE(metric_data
.NOACounters
); i
++) {
93 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf
->queries
, query
,
94 metric_data
, NOACounters
, i
, UINT64
);
96 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, PerfCounter1
, UINT64
);
97 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, PerfCounter2
, UINT64
);
98 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, SplitOccured
, BOOL32
);
99 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, CoreFrequencyChanged
, BOOL32
);
100 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, CoreFrequency
, UINT64
);
101 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, ReportId
, UINT32
);
102 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, ReportsCount
, UINT32
);
106 query
= gen_perf_query_append_query_info(perf
, 2 + 36 + 16 + 16);
107 query
->oa_format
= I915_OA_FORMAT_A32u40_A4u32_B8_C8
;
109 struct gen8_mdapi_metrics metric_data
;
110 query
->data_size
= sizeof(metric_data
);
112 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, TotalTime
, UINT64
);
113 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, GPUTicks
, UINT64
);
114 for (int i
= 0; i
< ARRAY_SIZE(metric_data
.OaCntr
); i
++) {
115 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf
->queries
, query
,
116 metric_data
, OaCntr
, i
, UINT64
);
118 for (int i
= 0; i
< ARRAY_SIZE(metric_data
.NoaCntr
); i
++) {
119 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf
->queries
, query
,
120 metric_data
, NoaCntr
, i
, UINT64
);
122 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, BeginTimestamp
, UINT64
);
123 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, Reserved1
, UINT64
);
124 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, Reserved2
, UINT64
);
125 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, Reserved3
, UINT32
);
126 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, OverrunOccured
, BOOL32
);
127 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, MarkerUser
, UINT64
);
128 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, MarkerDriver
, UINT64
);
129 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, SliceFrequency
, UINT64
);
130 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, UnsliceFrequency
, UINT64
);
131 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, PerfCounter1
, UINT64
);
132 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, PerfCounter2
, UINT64
);
133 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, SplitOccured
, BOOL32
);
134 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, CoreFrequencyChanged
, BOOL32
);
135 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, CoreFrequency
, UINT64
);
136 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, ReportId
, UINT32
);
137 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, ReportsCount
, UINT32
);
143 query
= gen_perf_query_append_query_info(perf
, 2 + 36 + 16 + 16 + 16 + 2);
144 query
->oa_format
= I915_OA_FORMAT_A32u40_A4u32_B8_C8
;
146 struct gen9_mdapi_metrics metric_data
;
147 query
->data_size
= sizeof(metric_data
);
149 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, TotalTime
, UINT64
);
150 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, GPUTicks
, UINT64
);
151 for (int i
= 0; i
< ARRAY_SIZE(metric_data
.OaCntr
); i
++) {
152 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf
->queries
, query
,
153 metric_data
, OaCntr
, i
, UINT64
);
155 for (int i
= 0; i
< ARRAY_SIZE(metric_data
.NoaCntr
); i
++) {
156 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf
->queries
, query
,
157 metric_data
, NoaCntr
, i
, UINT64
);
159 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, BeginTimestamp
, UINT64
);
160 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, Reserved1
, UINT64
);
161 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, Reserved2
, UINT64
);
162 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, Reserved3
, UINT32
);
163 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, OverrunOccured
, BOOL32
);
164 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, MarkerUser
, UINT64
);
165 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, MarkerDriver
, UINT64
);
166 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, SliceFrequency
, UINT64
);
167 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, UnsliceFrequency
, UINT64
);
168 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, PerfCounter1
, UINT64
);
169 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, PerfCounter2
, UINT64
);
170 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, SplitOccured
, BOOL32
);
171 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, CoreFrequencyChanged
, BOOL32
);
172 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, CoreFrequency
, UINT64
);
173 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, ReportId
, UINT32
);
174 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, ReportsCount
, UINT32
);
175 for (int i
= 0; i
< ARRAY_SIZE(metric_data
.UserCntr
); i
++) {
176 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf
->queries
, query
,
177 metric_data
, UserCntr
, i
, UINT64
);
179 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, UserCntrCfgId
, UINT32
);
180 MDAPI_QUERY_ADD_COUNTER(query
, metric_data
, Reserved4
, UINT32
);
184 unreachable("Unsupported gen");
188 query
->kind
= GEN_PERF_QUERY_TYPE_RAW
;
189 query
->name
= "Intel_Raw_Hardware_Counters_Set_0_Query";
190 query
->guid
= GEN_PERF_QUERY_GUID_MDAPI
;
193 /* Accumulation buffer offsets copied from an actual query... */
194 const struct gen_perf_query_info
*copy_query
=
195 &brw
->perfquery
.perf
->queries
[0];
197 query
->gpu_time_offset
= copy_query
->gpu_time_offset
;
198 query
->gpu_clock_offset
= copy_query
->gpu_clock_offset
;
199 query
->a_offset
= copy_query
->a_offset
;
200 query
->b_offset
= copy_query
->b_offset
;
201 query
->c_offset
= copy_query
->c_offset
;
206 brw_perf_query_register_mdapi_statistic_query(struct brw_context
*brw
)
208 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
210 if (!(devinfo
->gen
>= 7 && devinfo
->gen
<= 9))
213 struct gen_perf_query_info
*query
=
214 gen_perf_query_append_query_info(brw
->perfquery
.perf
, MAX_STAT_COUNTERS
);
216 query
->kind
= GEN_PERF_QUERY_TYPE_PIPELINE
;
217 query
->name
= "Intel_Raw_Pipeline_Statistics_Query";
219 /* The order has to match mdapi_pipeline_metrics. */
220 gen_perf_query_info_add_basic_stat_reg(query
, IA_VERTICES_COUNT
,
221 "N vertices submitted");
222 gen_perf_query_info_add_basic_stat_reg(query
, IA_PRIMITIVES_COUNT
,
223 "N primitives submitted");
224 gen_perf_query_info_add_basic_stat_reg(query
, VS_INVOCATION_COUNT
,
225 "N vertex shader invocations");
226 gen_perf_query_info_add_basic_stat_reg(query
, GS_INVOCATION_COUNT
,
227 "N geometry shader invocations");
228 gen_perf_query_info_add_basic_stat_reg(query
, GS_PRIMITIVES_COUNT
,
229 "N geometry shader primitives emitted");
230 gen_perf_query_info_add_basic_stat_reg(query
, CL_INVOCATION_COUNT
,
231 "N primitives entering clipping");
232 gen_perf_query_info_add_basic_stat_reg(query
, CL_PRIMITIVES_COUNT
,
233 "N primitives leaving clipping");
234 if (devinfo
->is_haswell
|| devinfo
->gen
== 8) {
235 gen_perf_query_info_add_stat_reg(query
, PS_INVOCATION_COUNT
, 1, 4,
236 "N fragment shader invocations",
237 "N fragment shader invocations");
239 gen_perf_query_info_add_basic_stat_reg(query
, PS_INVOCATION_COUNT
,
240 "N fragment shader invocations");
242 gen_perf_query_info_add_basic_stat_reg(query
, HS_INVOCATION_COUNT
,
243 "N TCS shader invocations");
244 gen_perf_query_info_add_basic_stat_reg(query
, DS_INVOCATION_COUNT
,
245 "N TES shader invocations");
246 if (devinfo
->gen
>= 7) {
247 gen_perf_query_info_add_basic_stat_reg(query
, CS_INVOCATION_COUNT
,
248 "N compute shader invocations");
251 query
->data_size
= sizeof(uint64_t) * query
->n_counters
;