6cf9e2d614029f200a2f5003570eb1fb39cd6ce0
[mesa.git] / src / gallium / drivers / freedreno / a5xx / fd5_query.c
1 /*
2 * Copyright (C) 2017 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <robclark@freedesktop.org>
25 */
26
27 /* NOTE: see https://github.com/freedreno/freedreno/wiki/A5xx-Queries */
28
29 #include "freedreno_query_acc.h"
30 #include "freedreno_resource.h"
31
32 #include "fd5_context.h"
33 #include "fd5_format.h"
34 #include "fd5_query.h"
35
36 struct PACKED fd5_query_sample {
37 uint64_t start;
38 uint64_t result;
39 uint64_t stop;
40 };
41
42 /* offset of a single field of an array of fd5_query_sample: */
43 #define query_sample_idx(aq, idx, field) \
44 fd_resource((aq)->prsc)->bo, \
45 (idx * sizeof(struct fd5_query_sample)) + \
46 offsetof(struct fd5_query_sample, field), \
47 0, 0
48
49 /* offset of a single field of fd5_query_sample: */
50 #define query_sample(aq, field) \
51 query_sample_idx(aq, 0, field)
52
53 /*
54 * Occlusion Query:
55 *
56 * OCCLUSION_COUNTER and OCCLUSION_PREDICATE differ only in how they
57 * interpret results
58 */
59
60 static void
61 occlusion_resume(struct fd_acc_query *aq, struct fd_batch *batch)
62 {
63 struct fd_ringbuffer *ring = batch->draw;
64
65 OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_CONTROL, 1);
66 OUT_RING(ring, A5XX_RB_SAMPLE_COUNT_CONTROL_COPY);
67
68 OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_ADDR_LO, 2);
69 OUT_RELOCW(ring, query_sample(aq, start));
70
71 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
72 OUT_RING(ring, ZPASS_DONE);
73 fd_reset_wfi(batch);
74
75 fd5_context(batch->ctx)->samples_passed_queries++;
76 }
77
78 static void
79 occlusion_pause(struct fd_acc_query *aq, struct fd_batch *batch)
80 {
81 struct fd_ringbuffer *ring = batch->draw;
82
83 OUT_PKT7(ring, CP_MEM_WRITE, 4);
84 OUT_RELOCW(ring, query_sample(aq, stop));
85 OUT_RING(ring, 0xffffffff);
86 OUT_RING(ring, 0xffffffff);
87
88 OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0);
89
90 OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_CONTROL, 1);
91 OUT_RING(ring, A5XX_RB_SAMPLE_COUNT_CONTROL_COPY);
92
93 OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_ADDR_LO, 2);
94 OUT_RELOCW(ring, query_sample(aq, stop));
95
96 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
97 OUT_RING(ring, ZPASS_DONE);
98 fd_reset_wfi(batch);
99
100 OUT_PKT7(ring, CP_WAIT_REG_MEM, 6);
101 OUT_RING(ring, 0x00000014); // XXX
102 OUT_RELOC(ring, query_sample(aq, stop));
103 OUT_RING(ring, 0xffffffff);
104 OUT_RING(ring, 0xffffffff);
105 OUT_RING(ring, 0x00000010); // XXX
106
107 /* result += stop - start: */
108 OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
109 OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE |
110 CP_MEM_TO_MEM_0_NEG_C);
111 OUT_RELOCW(ring, query_sample(aq, result)); /* dst */
112 OUT_RELOC(ring, query_sample(aq, result)); /* srcA */
113 OUT_RELOC(ring, query_sample(aq, stop)); /* srcB */
114 OUT_RELOC(ring, query_sample(aq, start)); /* srcC */
115
116 fd5_context(batch->ctx)->samples_passed_queries--;
117 }
118
119 static void
120 occlusion_counter_result(struct fd_acc_query *aq, void *buf,
121 union pipe_query_result *result)
122 {
123 struct fd5_query_sample *sp = buf;
124 result->u64 = sp->result;
125 }
126
127 static void
128 occlusion_predicate_result(struct fd_acc_query *aq, void *buf,
129 union pipe_query_result *result)
130 {
131 struct fd5_query_sample *sp = buf;
132 result->b = !!sp->result;
133 }
134
135 static const struct fd_acc_sample_provider occlusion_counter = {
136 .query_type = PIPE_QUERY_OCCLUSION_COUNTER,
137 .active = FD_STAGE_DRAW,
138 .size = sizeof(struct fd5_query_sample),
139 .resume = occlusion_resume,
140 .pause = occlusion_pause,
141 .result = occlusion_counter_result,
142 };
143
144 static const struct fd_acc_sample_provider occlusion_predicate = {
145 .query_type = PIPE_QUERY_OCCLUSION_PREDICATE,
146 .active = FD_STAGE_DRAW,
147 .size = sizeof(struct fd5_query_sample),
148 .resume = occlusion_resume,
149 .pause = occlusion_pause,
150 .result = occlusion_predicate_result,
151 };
152
153 static const struct fd_acc_sample_provider occlusion_predicate_conservative = {
154 .query_type = PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE,
155 .active = FD_STAGE_DRAW,
156 .size = sizeof(struct fd5_query_sample),
157 .resume = occlusion_resume,
158 .pause = occlusion_pause,
159 .result = occlusion_predicate_result,
160 };
161
162 /*
163 * Timestamp Queries:
164 */
165
166 static void
167 timestamp_resume(struct fd_acc_query *aq, struct fd_batch *batch)
168 {
169 struct fd_ringbuffer *ring = batch->draw;
170
171 OUT_PKT7(ring, CP_EVENT_WRITE, 4);
172 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_AND_INV_EVENT) |
173 CP_EVENT_WRITE_0_TIMESTAMP);
174 OUT_RELOCW(ring, query_sample(aq, start));
175 OUT_RING(ring, 0x00000000);
176
177 fd_reset_wfi(batch);
178 }
179
180 static void
181 timestamp_pause(struct fd_acc_query *aq, struct fd_batch *batch)
182 {
183 struct fd_ringbuffer *ring = batch->draw;
184
185 OUT_PKT7(ring, CP_EVENT_WRITE, 4);
186 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_AND_INV_EVENT) |
187 CP_EVENT_WRITE_0_TIMESTAMP);
188 OUT_RELOCW(ring, query_sample(aq, stop));
189 OUT_RING(ring, 0x00000000);
190
191 fd_reset_wfi(batch);
192 fd_wfi(batch, ring);
193
194 /* result += stop - start: */
195 OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
196 OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE |
197 CP_MEM_TO_MEM_0_NEG_C);
198 OUT_RELOCW(ring, query_sample(aq, result)); /* dst */
199 OUT_RELOC(ring, query_sample(aq, result)); /* srcA */
200 OUT_RELOC(ring, query_sample(aq, stop)); /* srcB */
201 OUT_RELOC(ring, query_sample(aq, start)); /* srcC */
202 }
203
204 static uint64_t
205 ticks_to_ns(uint32_t ts)
206 {
207 /* This is based on the 19.2MHz always-on rbbm timer.
208 *
209 * TODO we should probably query this value from kernel..
210 */
211 return ts * (1000000000 / 19200000);
212 }
213
214 static void
215 time_elapsed_accumulate_result(struct fd_acc_query *aq, void *buf,
216 union pipe_query_result *result)
217 {
218 struct fd5_query_sample *sp = buf;
219 result->u64 = ticks_to_ns(sp->result);
220 }
221
222 static void
223 timestamp_accumulate_result(struct fd_acc_query *aq, void *buf,
224 union pipe_query_result *result)
225 {
226 struct fd5_query_sample *sp = buf;
227 result->u64 = ticks_to_ns(sp->result);
228 }
229
230 static const struct fd_acc_sample_provider time_elapsed = {
231 .query_type = PIPE_QUERY_TIME_ELAPSED,
232 .active = FD_STAGE_ALL,
233 .size = sizeof(struct fd5_query_sample),
234 .resume = timestamp_resume,
235 .pause = timestamp_pause,
236 .result = time_elapsed_accumulate_result,
237 };
238
239 /* NOTE: timestamp query isn't going to give terribly sensible results
240 * on a tiler. But it is needed by qapitrace profile heatmap. If you
241 * add in a binning pass, the results get even more non-sensical. So
242 * we just return the timestamp on the first tile and hope that is
243 * kind of good enough.
244 */
245
246 static const struct fd_acc_sample_provider timestamp = {
247 .query_type = PIPE_QUERY_TIMESTAMP,
248 .active = FD_STAGE_ALL,
249 .size = sizeof(struct fd5_query_sample),
250 .resume = timestamp_resume,
251 .pause = timestamp_pause,
252 .result = timestamp_accumulate_result,
253 };
254
255 /*
256 * Performance Counter (batch) queries:
257 *
258 * Only one of these is active at a time, per design of the gallium
259 * batch_query API design. On perfcntr query tracks N query_types,
260 * each of which has a 'fd_batch_query_entry' that maps it back to
261 * the associated group and counter.
262 */
263
264 struct fd_batch_query_entry {
265 uint8_t gid; /* group-id */
266 uint8_t cid; /* countable-id within the group */
267 };
268
269 struct fd_batch_query_data {
270 struct fd_screen *screen;
271 unsigned num_query_entries;
272 struct fd_batch_query_entry query_entries[];
273 };
274
275 static void
276 perfcntr_resume(struct fd_acc_query *aq, struct fd_batch *batch)
277 {
278 struct fd_batch_query_data *data = aq->query_data;
279 struct fd_screen *screen = data->screen;
280 struct fd_ringbuffer *ring = batch->draw;
281
282 unsigned counters_per_group[screen->num_perfcntr_groups];
283 memset(counters_per_group, 0, sizeof(counters_per_group));
284
285 fd_wfi(batch, ring);
286
287 /* configure performance counters for the requested queries: */
288 for (unsigned i = 0; i < data->num_query_entries; i++) {
289 struct fd_batch_query_entry *entry = &data->query_entries[i];
290 const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
291 unsigned counter_idx = counters_per_group[entry->gid]++;
292
293 debug_assert(counter_idx < g->num_counters);
294
295 OUT_PKT4(ring, g->counters[counter_idx].select_reg, 1);
296 OUT_RING(ring, g->countables[entry->cid].selector);
297 }
298
299 memset(counters_per_group, 0, sizeof(counters_per_group));
300
301 /* and snapshot the start values */
302 for (unsigned i = 0; i < data->num_query_entries; i++) {
303 struct fd_batch_query_entry *entry = &data->query_entries[i];
304 const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
305 unsigned counter_idx = counters_per_group[entry->gid]++;
306 const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
307
308 OUT_PKT7(ring, CP_REG_TO_MEM, 3);
309 OUT_RING(ring, CP_REG_TO_MEM_0_64B |
310 CP_REG_TO_MEM_0_REG(counter->counter_reg_lo));
311 OUT_RELOCW(ring, query_sample_idx(aq, i, start));
312 }
313 }
314
315 static void
316 perfcntr_pause(struct fd_acc_query *aq, struct fd_batch *batch)
317 {
318 struct fd_batch_query_data *data = aq->query_data;
319 struct fd_screen *screen = data->screen;
320 struct fd_ringbuffer *ring = batch->draw;
321
322 unsigned counters_per_group[screen->num_perfcntr_groups];
323 memset(counters_per_group, 0, sizeof(counters_per_group));
324
325 fd_wfi(batch, ring);
326
327 /* TODO do we need to bother to turn anything off? */
328
329 /* snapshot the end values: */
330 for (unsigned i = 0; i < data->num_query_entries; i++) {
331 struct fd_batch_query_entry *entry = &data->query_entries[i];
332 const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
333 unsigned counter_idx = counters_per_group[entry->gid]++;
334 const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
335
336 OUT_PKT7(ring, CP_REG_TO_MEM, 3);
337 OUT_RING(ring, CP_REG_TO_MEM_0_64B |
338 CP_REG_TO_MEM_0_REG(counter->counter_reg_lo));
339 OUT_RELOCW(ring, query_sample_idx(aq, i, stop));
340 }
341
342 /* and compute the result: */
343 for (unsigned i = 0; i < data->num_query_entries; i++) {
344 /* result += stop - start: */
345 OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
346 OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE |
347 CP_MEM_TO_MEM_0_NEG_C);
348 OUT_RELOCW(ring, query_sample_idx(aq, i, result)); /* dst */
349 OUT_RELOC(ring, query_sample_idx(aq, i, result)); /* srcA */
350 OUT_RELOC(ring, query_sample_idx(aq, i, stop)); /* srcB */
351 OUT_RELOC(ring, query_sample_idx(aq, i, start)); /* srcC */
352 }
353 }
354
355 static void
356 perfcntr_accumulate_result(struct fd_acc_query *aq, void *buf,
357 union pipe_query_result *result)
358 {
359 struct fd_batch_query_data *data = aq->query_data;
360 struct fd5_query_sample *sp = buf;
361
362 for (unsigned i = 0; i < data->num_query_entries; i++) {
363 result->batch[i].u64 = sp[i].result;
364 }
365 }
366
367 static const struct fd_acc_sample_provider perfcntr = {
368 .query_type = FD_QUERY_FIRST_PERFCNTR,
369 .active = FD_STAGE_ALL,
370 .resume = perfcntr_resume,
371 .pause = perfcntr_pause,
372 .result = perfcntr_accumulate_result,
373 };
374
375 static struct pipe_query *
376 fd5_create_batch_query(struct pipe_context *pctx,
377 unsigned num_queries, unsigned *query_types)
378 {
379 struct fd_context *ctx = fd_context(pctx);
380 struct fd_screen *screen = ctx->screen;
381 struct fd_query *q;
382 struct fd_acc_query *aq;
383 struct fd_batch_query_data *data;
384
385 data = CALLOC_VARIANT_LENGTH_STRUCT(fd_batch_query_data,
386 num_queries * sizeof(data->query_entries[0]));
387
388 data->screen = screen;
389 data->num_query_entries = num_queries;
390
391 /* validate the requested query_types and ensure we don't try
392 * to request more query_types of a given group than we have
393 * counters:
394 */
395 unsigned counters_per_group[screen->num_perfcntr_groups];
396 memset(counters_per_group, 0, sizeof(counters_per_group));
397
398 for (unsigned i = 0; i < num_queries; i++) {
399 unsigned idx = query_types[i] - FD_QUERY_FIRST_PERFCNTR;
400
401 /* verify valid query_type, ie. is it actually a perfcntr? */
402 if ((query_types[i] < FD_QUERY_FIRST_PERFCNTR) ||
403 (idx >= screen->num_perfcntr_queries)) {
404 debug_printf("invalid batch query query_type: %u\n", query_types[i]);
405 goto error;
406 }
407
408 struct fd_batch_query_entry *entry = &data->query_entries[i];
409 struct pipe_driver_query_info *pq = &screen->perfcntr_queries[idx];
410
411 entry->gid = pq->group_id;
412
413 /* the perfcntr_queries[] table flattens all the countables
414 * for each group in series, ie:
415 *
416 * (G0,C0), .., (G0,Cn), (G1,C0), .., (G1,Cm), ...
417 *
418 * So to find the countable index just step back through the
419 * table to find the first entry with the same group-id.
420 */
421 while (pq > screen->perfcntr_queries) {
422 pq--;
423 if (pq->group_id == entry->gid)
424 entry->cid++;
425 }
426
427 if (counters_per_group[entry->gid] >=
428 screen->perfcntr_groups[entry->gid].num_counters) {
429 debug_printf("too many counters for group %u\n", entry->gid);
430 goto error;
431 }
432
433 counters_per_group[entry->gid]++;
434 }
435
436 q = fd_acc_create_query2(ctx, 0, 0, &perfcntr);
437 aq = fd_acc_query(q);
438
439 /* sample buffer size is based on # of queries: */
440 aq->size = num_queries * sizeof(struct fd5_query_sample);
441 aq->query_data = data;
442
443 return (struct pipe_query *)q;
444
445 error:
446 free(data);
447 return NULL;
448 }
449
450 void
451 fd5_query_context_init(struct pipe_context *pctx)
452 {
453 struct fd_context *ctx = fd_context(pctx);
454
455 ctx->create_query = fd_acc_create_query;
456 ctx->query_set_stage = fd_acc_query_set_stage;
457
458 pctx->create_batch_query = fd5_create_batch_query;
459
460 fd_acc_query_register_provider(pctx, &occlusion_counter);
461 fd_acc_query_register_provider(pctx, &occlusion_predicate);
462 fd_acc_query_register_provider(pctx, &occlusion_predicate_conservative);
463
464 fd_acc_query_register_provider(pctx, &time_elapsed);
465 fd_acc_query_register_provider(pctx, &timestamp);
466 }