i965: avoid 'unused variable' warnings
[mesa.git] / src / mesa / drivers / dri / i965 / brw_performance_query.h
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #ifndef BRW_PERFORMANCE_QUERY_H
25 #define BRW_PERFORMANCE_QUERY_H
26
27 #include <stdint.h>
28
29 #include "brw_context.h"
30 #include "brw_performance_query_metrics.h"
31
32 /*
33 * When currently allocate only one page for pipeline statistics queries. Here
34 * we derived the maximum number of counters for that amount.
35 */
36 #define STATS_BO_SIZE 4096
37 #define STATS_BO_END_OFFSET_BYTES (STATS_BO_SIZE / 2)
38 #define MAX_STAT_COUNTERS (STATS_BO_END_OFFSET_BYTES / 8)
39
40 /*
41 * The largest OA formats we can use include:
42 * For Haswell:
43 * 1 timestamp, 45 A counters, 8 B counters and 8 C counters.
44 * For Gen8+
45 * 1 timestamp, 1 clock, 36 A counters, 8 B counters and 8 C counters
46 */
47 #define MAX_OA_REPORT_COUNTERS 62
48
49 /**
50 * i965 representation of a performance query object.
51 *
52 * NB: We want to keep this structure relatively lean considering that
53 * applications may expect to allocate enough objects to be able to
54 * query around all draw calls in a frame.
55 */
56 struct brw_perf_query_object
57 {
58 struct gl_perf_query_object base;
59
60 const struct brw_perf_query_info *query;
61
62 /* See query->kind to know which state below is in use... */
63 union {
64 struct {
65
66 /**
67 * BO containing OA counter snapshots at query Begin/End time.
68 */
69 struct brw_bo *bo;
70
71 /**
72 * Address of mapped of @bo
73 */
74 void *map;
75
76 /**
77 * The MI_REPORT_PERF_COUNT command lets us specify a unique
78 * ID that will be reflected in the resulting OA report
79 * that's written by the GPU. This is the ID we're expecting
80 * in the begin report and the the end report should be
81 * @begin_report_id + 1.
82 */
83 int begin_report_id;
84
85 /**
86 * Reference the head of the brw->perfquery.sample_buffers
87 * list at the time that the query started (so we only need
88 * to look at nodes after this point when looking for samples
89 * related to this query)
90 *
91 * (See struct brw_oa_sample_buf description for more details)
92 */
93 struct exec_node *samples_head;
94
95 /**
96 * Storage for the final accumulated OA counters.
97 */
98 uint64_t accumulator[MAX_OA_REPORT_COUNTERS];
99
100 /**
101 * Hw ID used by the context on which the query was running.
102 */
103 uint32_t hw_id;
104
105 /**
106 * false while in the unaccumulated_elements list, and set to
107 * true when the final, end MI_RPC snapshot has been
108 * accumulated.
109 */
110 bool results_accumulated;
111
112 /**
113 * Number of reports accumulated to produce the results.
114 */
115 uint32_t reports_accumulated;
116
117 /**
118 * Frequency of the GT at begin and end of the query.
119 */
120 uint64_t gt_frequency[2];
121
122 /**
123 * Frequency in the slices of the GT at the begin and end of the
124 * query.
125 */
126 uint64_t slice_frequency[2];
127
128 /**
129 * Frequency in the unslice of the GT at the begin and end of the
130 * query.
131 */
132 uint64_t unslice_frequency[2];
133 } oa;
134
135 struct {
136 /**
137 * BO containing starting and ending snapshots for the
138 * statistics counters.
139 */
140 struct brw_bo *bo;
141 } pipeline_stats;
142 };
143 };
144
145 static inline struct brw_perf_query_info *
146 brw_perf_query_append_query_info(struct brw_context *brw)
147 {
148 brw->perfquery.queries =
149 reralloc(brw, brw->perfquery.queries,
150 struct brw_perf_query_info, ++brw->perfquery.n_queries);
151
152 return &brw->perfquery.queries[brw->perfquery.n_queries - 1];
153 }
154
155 static inline void
156 brw_perf_query_info_add_stat_reg(struct brw_perf_query_info *query,
157 uint32_t reg,
158 uint32_t numerator,
159 uint32_t denominator,
160 const char *name,
161 const char *description)
162 {
163 struct brw_perf_query_counter *counter;
164
165 assert(query->n_counters < MAX_STAT_COUNTERS);
166
167 counter = &query->counters[query->n_counters];
168 counter->name = name;
169 counter->desc = description;
170 counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
171 counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
172 counter->size = sizeof(uint64_t);
173 counter->offset = sizeof(uint64_t) * query->n_counters;
174 counter->pipeline_stat.reg = reg;
175 counter->pipeline_stat.numerator = numerator;
176 counter->pipeline_stat.denominator = denominator;
177
178 query->n_counters++;
179 }
180
181 static inline void
182 brw_perf_query_info_add_basic_stat_reg(struct brw_perf_query_info *query,
183 uint32_t reg, const char *name)
184 {
185 brw_perf_query_info_add_stat_reg(query, reg, 1, 1, name, name);
186 }
187
188 /* Accumulate 32bits OA counters */
189 static inline void
190 brw_perf_query_accumulate_uint32(const uint32_t *report0,
191 const uint32_t *report1,
192 uint64_t *accumulator)
193 {
194 *accumulator += (uint32_t)(*report1 - *report0);
195 }
196
197 /* Accumulate 40bits OA counters */
198 static inline void
199 brw_perf_query_accumulate_uint40(int a_index,
200 const uint32_t *report0,
201 const uint32_t *report1,
202 uint64_t *accumulator)
203 {
204 const uint8_t *high_bytes0 = (uint8_t *)(report0 + 40);
205 const uint8_t *high_bytes1 = (uint8_t *)(report1 + 40);
206 uint64_t high0 = (uint64_t)(high_bytes0[a_index]) << 32;
207 uint64_t high1 = (uint64_t)(high_bytes1[a_index]) << 32;
208 uint64_t value0 = report0[a_index + 4] | high0;
209 uint64_t value1 = report1[a_index + 4] | high1;
210 uint64_t delta;
211
212 if (value0 > value1)
213 delta = (1ULL << 40) + value1 - value0;
214 else
215 delta = value1 - value0;
216
217 *accumulator += delta;
218 }
219
220 int brw_perf_query_get_mdapi_oa_data(struct brw_context *brw,
221 struct brw_perf_query_object *obj,
222 size_t data_size,
223 uint8_t *data);
224 void brw_perf_query_register_mdapi_oa_query(struct brw_context *brw);
225 void brw_perf_query_register_mdapi_statistic_query(struct brw_context *brw);
226
227 #endif /* BRW_PERFORMANCE_QUERY_H */