2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #ifndef BRW_PERFORMANCE_QUERY_H
25 #define BRW_PERFORMANCE_QUERY_H
29 #include "brw_context.h"
30 #include "brw_performance_query_metrics.h"
33 * When currently allocate only one page for pipeline statistics queries. Here
34 * we derived the maximum number of counters for that amount.
36 #define STATS_BO_SIZE 4096
37 #define STATS_BO_END_OFFSET_BYTES (STATS_BO_SIZE / 2)
38 #define MAX_STAT_COUNTERS (STATS_BO_END_OFFSET_BYTES / 8)
41 * The largest OA formats we can use include:
43 * 1 timestamp, 45 A counters, 8 B counters and 8 C counters.
45 * 1 timestamp, 1 clock, 36 A counters, 8 B counters and 8 C counters
47 #define MAX_OA_REPORT_COUNTERS 62
50 * i965 representation of a performance query object.
52 * NB: We want to keep this structure relatively lean considering that
53 * applications may expect to allocate enough objects to be able to
54 * query around all draw calls in a frame.
56 struct brw_perf_query_object
58 struct gl_perf_query_object base
;
60 const struct brw_perf_query_info
*query
;
62 /* See query->kind to know which state below is in use... */
67 * BO containing OA counter snapshots at query Begin/End time.
72 * Address of mapped of @bo
77 * The MI_REPORT_PERF_COUNT command lets us specify a unique
78 * ID that will be reflected in the resulting OA report
79 * that's written by the GPU. This is the ID we're expecting
80 * in the begin report and the the end report should be
81 * @begin_report_id + 1.
86 * Reference the head of the brw->perfquery.sample_buffers
87 * list at the time that the query started (so we only need
88 * to look at nodes after this point when looking for samples
89 * related to this query)
91 * (See struct brw_oa_sample_buf description for more details)
93 struct exec_node
*samples_head
;
96 * Storage for the final accumulated OA counters.
98 uint64_t accumulator
[MAX_OA_REPORT_COUNTERS
];
101 * Hw ID used by the context on which the query was running.
106 * false while in the unaccumulated_elements list, and set to
107 * true when the final, end MI_RPC snapshot has been
110 bool results_accumulated
;
113 * Number of reports accumulated to produce the results.
115 uint32_t reports_accumulated
;
120 * BO containing starting and ending snapshots for the
121 * statistics counters.
128 static inline struct brw_perf_query_info
*
129 brw_perf_query_append_query_info(struct brw_context
*brw
)
131 brw
->perfquery
.queries
=
132 reralloc(brw
, brw
->perfquery
.queries
,
133 struct brw_perf_query_info
, ++brw
->perfquery
.n_queries
);
135 return &brw
->perfquery
.queries
[brw
->perfquery
.n_queries
- 1];
139 brw_perf_query_info_add_stat_reg(struct brw_perf_query_info
*query
,
142 uint32_t denominator
,
144 const char *description
)
146 struct brw_perf_query_counter
*counter
;
148 assert(query
->n_counters
< MAX_STAT_COUNTERS
);
150 counter
= &query
->counters
[query
->n_counters
];
151 counter
->name
= name
;
152 counter
->desc
= description
;
153 counter
->type
= GL_PERFQUERY_COUNTER_RAW_INTEL
;
154 counter
->data_type
= GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL
;
155 counter
->size
= sizeof(uint64_t);
156 counter
->offset
= sizeof(uint64_t) * query
->n_counters
;
157 counter
->pipeline_stat
.reg
= reg
;
158 counter
->pipeline_stat
.numerator
= numerator
;
159 counter
->pipeline_stat
.denominator
= denominator
;
165 brw_perf_query_info_add_basic_stat_reg(struct brw_perf_query_info
*query
,
166 uint32_t reg
, const char *name
)
168 brw_perf_query_info_add_stat_reg(query
, reg
, 1, 1, name
, name
);
171 /* Accumulate 32bits OA counters */
173 brw_perf_query_accumulate_uint32(const uint32_t *report0
,
174 const uint32_t *report1
,
175 uint64_t *accumulator
)
177 *accumulator
+= (uint32_t)(*report1
- *report0
);
180 /* Accumulate 40bits OA counters */
182 brw_perf_query_accumulate_uint40(int a_index
,
183 const uint32_t *report0
,
184 const uint32_t *report1
,
185 uint64_t *accumulator
)
187 const uint8_t *high_bytes0
= (uint8_t *)(report0
+ 40);
188 const uint8_t *high_bytes1
= (uint8_t *)(report1
+ 40);
189 uint64_t high0
= (uint64_t)(high_bytes0
[a_index
]) << 32;
190 uint64_t high1
= (uint64_t)(high_bytes1
[a_index
]) << 32;
191 uint64_t value0
= report0
[a_index
+ 4] | high0
;
192 uint64_t value1
= report1
[a_index
+ 4] | high1
;
196 delta
= (1ULL << 40) + value1
- value0
;
198 delta
= value1
- value0
;
200 *accumulator
+= delta
;
203 #endif /* BRW_PERFORMANCE_QUERY_H */