intel/perf: move perf-related constants to common location
[mesa.git] / src / gallium / drivers / iris / iris_query.c
1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23 /**
24 * @file iris_query.c
25 *
26 * ============================= GENXML CODE =============================
27 * [This file is compiled once per generation.]
28 * =======================================================================
29 *
30 * Query object support. This allows measuring various simple statistics
31 * via counters on the GPU. We use GenX code for MI_MATH calculations.
32 */
33
34 #include <stdio.h>
35 #include <errno.h>
36 #include "perf/gen_perf.h"
37 #include "pipe/p_defines.h"
38 #include "pipe/p_state.h"
39 #include "pipe/p_context.h"
40 #include "pipe/p_screen.h"
41 #include "util/u_inlines.h"
42 #include "util/u_upload_mgr.h"
43 #include "iris_context.h"
44 #include "iris_defines.h"
45 #include "iris_fence.h"
46 #include "iris_resource.h"
47 #include "iris_screen.h"
48
49 #include "iris_genx_macros.h"
50
51 #define SO_PRIM_STORAGE_NEEDED(n) (GENX(SO_PRIM_STORAGE_NEEDED0_num) + (n) * 8)
52 #define SO_NUM_PRIMS_WRITTEN(n) (GENX(SO_NUM_PRIMS_WRITTEN0_num) + (n) * 8)
53
54 struct iris_query {
55 enum pipe_query_type type;
56 int index;
57
58 bool ready;
59
60 bool stalled;
61
62 uint64_t result;
63
64 struct iris_state_ref query_state_ref;
65 struct iris_query_snapshots *map;
66 struct iris_syncpt *syncpt;
67
68 int batch_idx;
69 };
70
71 struct iris_query_snapshots {
72 /** iris_render_condition's saved MI_PREDICATE_RESULT value. */
73 uint64_t predicate_result;
74
75 /** Have the start/end snapshots landed? */
76 uint64_t snapshots_landed;
77
78 /** Starting and ending counter snapshots */
79 uint64_t start;
80 uint64_t end;
81 };
82
83 struct iris_query_so_overflow {
84 uint64_t predicate_result;
85 uint64_t snapshots_landed;
86
87 struct {
88 uint64_t prim_storage_needed[2];
89 uint64_t num_prims[2];
90 } stream[4];
91 };
92
93 static struct gen_mi_value
94 query_mem64(struct iris_query *q, uint32_t offset)
95 {
96 struct iris_address addr = {
97 .bo = iris_resource_bo(q->query_state_ref.res),
98 .offset = q->query_state_ref.offset + offset,
99 .write = true
100 };
101 return gen_mi_mem64(addr);
102 }
103
104 /**
105 * Is this type of query written by PIPE_CONTROL?
106 */
107 static bool
108 iris_is_query_pipelined(struct iris_query *q)
109 {
110 switch (q->type) {
111 case PIPE_QUERY_OCCLUSION_COUNTER:
112 case PIPE_QUERY_OCCLUSION_PREDICATE:
113 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
114 case PIPE_QUERY_TIMESTAMP:
115 case PIPE_QUERY_TIMESTAMP_DISJOINT:
116 case PIPE_QUERY_TIME_ELAPSED:
117 return true;
118
119 default:
120 return false;
121 }
122 }
123
124 static void
125 mark_available(struct iris_context *ice, struct iris_query *q)
126 {
127 struct iris_batch *batch = &ice->batches[q->batch_idx];
128 unsigned flags = PIPE_CONTROL_WRITE_IMMEDIATE;
129 unsigned offset = offsetof(struct iris_query_snapshots, snapshots_landed);
130 struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
131 offset += q->query_state_ref.offset;
132
133 if (!iris_is_query_pipelined(q)) {
134 ice->vtbl.store_data_imm64(batch, bo, offset, true);
135 } else {
136 /* Order available *after* the query results. */
137 flags |= PIPE_CONTROL_FLUSH_ENABLE;
138 iris_emit_pipe_control_write(batch, "query: mark available",
139 flags, bo, offset, true);
140 }
141 }
142
143 /**
144 * Write PS_DEPTH_COUNT to q->(dest) via a PIPE_CONTROL.
145 */
146 static void
147 iris_pipelined_write(struct iris_batch *batch,
148 struct iris_query *q,
149 enum pipe_control_flags flags,
150 unsigned offset)
151 {
152 const struct gen_device_info *devinfo = &batch->screen->devinfo;
153 const unsigned optional_cs_stall =
154 GEN_GEN == 9 && devinfo->gt == 4 ? PIPE_CONTROL_CS_STALL : 0;
155 struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
156
157 iris_emit_pipe_control_write(batch, "query: pipelined snapshot write",
158 flags | optional_cs_stall,
159 bo, offset, 0ull);
160 }
161
162 static void
163 write_value(struct iris_context *ice, struct iris_query *q, unsigned offset)
164 {
165 struct iris_batch *batch = &ice->batches[q->batch_idx];
166 struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
167
168 if (!iris_is_query_pipelined(q)) {
169 iris_emit_pipe_control_flush(batch,
170 "query: non-pipelined snapshot write",
171 PIPE_CONTROL_CS_STALL |
172 PIPE_CONTROL_STALL_AT_SCOREBOARD);
173 q->stalled = true;
174 }
175
176 switch (q->type) {
177 case PIPE_QUERY_OCCLUSION_COUNTER:
178 case PIPE_QUERY_OCCLUSION_PREDICATE:
179 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
180 if (GEN_GEN >= 10) {
181 /* "Driver must program PIPE_CONTROL with only Depth Stall Enable
182 * bit set prior to programming a PIPE_CONTROL with Write PS Depth
183 * Count sync operation."
184 */
185 iris_emit_pipe_control_flush(batch,
186 "workaround: depth stall before writing "
187 "PS_DEPTH_COUNT",
188 PIPE_CONTROL_DEPTH_STALL);
189 }
190 iris_pipelined_write(&ice->batches[IRIS_BATCH_RENDER], q,
191 PIPE_CONTROL_WRITE_DEPTH_COUNT |
192 PIPE_CONTROL_DEPTH_STALL,
193 offset);
194 break;
195 case PIPE_QUERY_TIME_ELAPSED:
196 case PIPE_QUERY_TIMESTAMP:
197 case PIPE_QUERY_TIMESTAMP_DISJOINT:
198 iris_pipelined_write(&ice->batches[IRIS_BATCH_RENDER], q,
199 PIPE_CONTROL_WRITE_TIMESTAMP,
200 offset);
201 break;
202 case PIPE_QUERY_PRIMITIVES_GENERATED:
203 ice->vtbl.store_register_mem64(batch,
204 q->index == 0 ?
205 GENX(CL_INVOCATION_COUNT_num) :
206 SO_PRIM_STORAGE_NEEDED(q->index),
207 bo, offset, false);
208 break;
209 case PIPE_QUERY_PRIMITIVES_EMITTED:
210 ice->vtbl.store_register_mem64(batch,
211 SO_NUM_PRIMS_WRITTEN(q->index),
212 bo, offset, false);
213 break;
214 case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE: {
215 static const uint32_t index_to_reg[] = {
216 GENX(IA_VERTICES_COUNT_num),
217 GENX(IA_PRIMITIVES_COUNT_num),
218 GENX(VS_INVOCATION_COUNT_num),
219 GENX(GS_INVOCATION_COUNT_num),
220 GENX(GS_PRIMITIVES_COUNT_num),
221 GENX(CL_INVOCATION_COUNT_num),
222 GENX(CL_PRIMITIVES_COUNT_num),
223 GENX(PS_INVOCATION_COUNT_num),
224 GENX(HS_INVOCATION_COUNT_num),
225 GENX(DS_INVOCATION_COUNT_num),
226 GENX(CS_INVOCATION_COUNT_num),
227 };
228 const uint32_t reg = index_to_reg[q->index];
229
230 ice->vtbl.store_register_mem64(batch, reg, bo, offset, false);
231 break;
232 }
233 default:
234 assert(false);
235 }
236 }
237
238 static void
239 write_overflow_values(struct iris_context *ice, struct iris_query *q, bool end)
240 {
241 struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
242 uint32_t count = q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ? 1 : 4;
243 struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
244 uint32_t offset = q->query_state_ref.offset;
245
246 iris_emit_pipe_control_flush(batch,
247 "query: write SO overflow snapshots",
248 PIPE_CONTROL_CS_STALL |
249 PIPE_CONTROL_STALL_AT_SCOREBOARD);
250 for (uint32_t i = 0; i < count; i++) {
251 int s = q->index + i;
252 int g_idx = offset + offsetof(struct iris_query_so_overflow,
253 stream[s].num_prims[end]);
254 int w_idx = offset + offsetof(struct iris_query_so_overflow,
255 stream[s].prim_storage_needed[end]);
256 ice->vtbl.store_register_mem64(batch, SO_NUM_PRIMS_WRITTEN(s),
257 bo, g_idx, false);
258 ice->vtbl.store_register_mem64(batch, SO_PRIM_STORAGE_NEEDED(s),
259 bo, w_idx, false);
260 }
261 }
262
263 static uint64_t
264 iris_raw_timestamp_delta(uint64_t time0, uint64_t time1)
265 {
266 if (time0 > time1) {
267 return (1ULL << TIMESTAMP_BITS) + time1 - time0;
268 } else {
269 return time1 - time0;
270 }
271 }
272
273 static bool
274 stream_overflowed(struct iris_query_so_overflow *so, int s)
275 {
276 return (so->stream[s].prim_storage_needed[1] -
277 so->stream[s].prim_storage_needed[0]) !=
278 (so->stream[s].num_prims[1] - so->stream[s].num_prims[0]);
279 }
280
281 static void
282 calculate_result_on_cpu(const struct gen_device_info *devinfo,
283 struct iris_query *q)
284 {
285 switch (q->type) {
286 case PIPE_QUERY_OCCLUSION_PREDICATE:
287 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
288 q->result = q->map->end != q->map->start;
289 break;
290 case PIPE_QUERY_TIMESTAMP:
291 case PIPE_QUERY_TIMESTAMP_DISJOINT:
292 /* The timestamp is the single starting snapshot. */
293 q->result = gen_device_info_timebase_scale(devinfo, q->map->start);
294 q->result &= (1ull << TIMESTAMP_BITS) - 1;
295 break;
296 case PIPE_QUERY_TIME_ELAPSED:
297 q->result = iris_raw_timestamp_delta(q->map->start, q->map->end);
298 q->result = gen_device_info_timebase_scale(devinfo, q->result);
299 q->result &= (1ull << TIMESTAMP_BITS) - 1;
300 break;
301 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
302 q->result = stream_overflowed((void *) q->map, q->index);
303 break;
304 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
305 q->result = false;
306 for (int i = 0; i < MAX_VERTEX_STREAMS; i++)
307 q->result |= stream_overflowed((void *) q->map, i);
308 break;
309 case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
310 q->result = q->map->end - q->map->start;
311
312 /* WaDividePSInvocationCountBy4:HSW,BDW */
313 if (GEN_GEN == 8 && q->index == PIPE_STAT_QUERY_PS_INVOCATIONS)
314 q->result /= 4;
315 break;
316 case PIPE_QUERY_OCCLUSION_COUNTER:
317 case PIPE_QUERY_PRIMITIVES_GENERATED:
318 case PIPE_QUERY_PRIMITIVES_EMITTED:
319 default:
320 q->result = q->map->end - q->map->start;
321 break;
322 }
323
324 q->ready = true;
325 }
326
327 /**
328 * Calculate the streamout overflow for stream \p idx:
329 *
330 * (num_prims[1] - num_prims[0]) - (storage_needed[1] - storage_needed[0])
331 */
332 static struct gen_mi_value
333 calc_overflow_for_stream(struct gen_mi_builder *b,
334 struct iris_query *q,
335 int idx)
336 {
337 #define C(counter, i) query_mem64(q, \
338 offsetof(struct iris_query_so_overflow, stream[idx].counter[i]))
339
340 return gen_mi_isub(b, gen_mi_isub(b, C(num_prims, 1), C(num_prims, 0)),
341 gen_mi_isub(b, C(prim_storage_needed, 1),
342 C(prim_storage_needed, 0)));
343 #undef C
344 }
345
346 /**
347 * Calculate whether any stream has overflowed.
348 */
349 static struct gen_mi_value
350 calc_overflow_any_stream(struct gen_mi_builder *b, struct iris_query *q)
351 {
352 struct gen_mi_value stream_result[MAX_VERTEX_STREAMS];
353 for (int i = 0; i < MAX_VERTEX_STREAMS; i++)
354 stream_result[i] = calc_overflow_for_stream(b, q, i);
355
356 struct gen_mi_value result = stream_result[0];
357 for (int i = 1; i < MAX_VERTEX_STREAMS; i++)
358 result = gen_mi_ior(b, result, stream_result[i]);
359
360 return result;
361 }
362
363 static bool
364 query_is_boolean(enum pipe_query_type type)
365 {
366 switch (type) {
367 case PIPE_QUERY_OCCLUSION_PREDICATE:
368 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
369 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
370 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
371 return true;
372 default:
373 return false;
374 }
375 }
376
377 /**
378 * Calculate the result using MI_MATH.
379 */
380 static struct gen_mi_value
381 calculate_result_on_gpu(const struct gen_device_info *devinfo,
382 struct gen_mi_builder *b,
383 struct iris_query *q)
384 {
385 struct gen_mi_value result;
386 struct gen_mi_value start_val =
387 query_mem64(q, offsetof(struct iris_query_snapshots, start));
388 struct gen_mi_value end_val =
389 query_mem64(q, offsetof(struct iris_query_snapshots, end));
390
391 switch (q->type) {
392 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
393 result = calc_overflow_for_stream(b, q, q->index);
394 break;
395 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
396 result = calc_overflow_any_stream(b, q);
397 break;
398 case PIPE_QUERY_TIMESTAMP: {
399 /* TODO: This discards any fractional bits of the timebase scale.
400 * We would need to do a bit of fixed point math on the CS ALU, or
401 * launch an actual shader to calculate this with full precision.
402 */
403 uint32_t scale = 1000000000ull / devinfo->timestamp_frequency;
404 result = gen_mi_iand(b, gen_mi_imm((1ull << 36) - 1),
405 gen_mi_imul_imm(b, start_val, scale));
406 break;
407 }
408 case PIPE_QUERY_TIME_ELAPSED: {
409 /* TODO: This discards fractional bits (see above). */
410 uint32_t scale = 1000000000ull / devinfo->timestamp_frequency;
411 result = gen_mi_imul_imm(b, gen_mi_isub(b, end_val, start_val), scale);
412 break;
413 }
414 default:
415 result = gen_mi_isub(b, end_val, start_val);
416 break;
417 }
418
419 /* WaDividePSInvocationCountBy4:HSW,BDW */
420 if (GEN_GEN == 8 &&
421 q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE &&
422 q->index == PIPE_STAT_QUERY_PS_INVOCATIONS)
423 result = gen_mi_ushr32_imm(b, result, 2);
424
425 if (query_is_boolean(q->type))
426 result = gen_mi_iand(b, gen_mi_nz(b, result), gen_mi_imm(1));
427
428 return result;
429 }
430
431 static struct pipe_query *
432 iris_create_query(struct pipe_context *ctx,
433 unsigned query_type,
434 unsigned index)
435 {
436 struct iris_query *q = calloc(1, sizeof(struct iris_query));
437
438 q->type = query_type;
439 q->index = index;
440
441 if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE &&
442 q->index == PIPE_STAT_QUERY_CS_INVOCATIONS)
443 q->batch_idx = IRIS_BATCH_COMPUTE;
444 else
445 q->batch_idx = IRIS_BATCH_RENDER;
446 return (struct pipe_query *) q;
447 }
448
449 static void
450 iris_destroy_query(struct pipe_context *ctx, struct pipe_query *p_query)
451 {
452 struct iris_query *query = (void *) p_query;
453 struct iris_screen *screen = (void *) ctx->screen;
454 iris_syncpt_reference(screen, &query->syncpt, NULL);
455 free(query);
456 }
457
458
459 static bool
460 iris_begin_query(struct pipe_context *ctx, struct pipe_query *query)
461 {
462 struct iris_context *ice = (void *) ctx;
463 struct iris_query *q = (void *) query;
464 void *ptr = NULL;
465 uint32_t size;
466
467 if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
468 q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
469 size = sizeof(struct iris_query_so_overflow);
470 else
471 size = sizeof(struct iris_query_snapshots);
472
473 u_upload_alloc(ice->query_buffer_uploader, 0,
474 size, size, &q->query_state_ref.offset,
475 &q->query_state_ref.res, &ptr);
476
477 if (!iris_resource_bo(q->query_state_ref.res))
478 return false;
479
480 q->map = ptr;
481 if (!q->map)
482 return false;
483
484 q->result = 0ull;
485 q->ready = false;
486 WRITE_ONCE(q->map->snapshots_landed, false);
487
488 if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {
489 ice->state.prims_generated_query_active = true;
490 ice->state.dirty |= IRIS_DIRTY_STREAMOUT | IRIS_DIRTY_CLIP;
491 }
492
493 if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
494 q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
495 write_overflow_values(ice, q, false);
496 else
497 write_value(ice, q,
498 q->query_state_ref.offset +
499 offsetof(struct iris_query_snapshots, start));
500
501 return true;
502 }
503
504 static bool
505 iris_end_query(struct pipe_context *ctx, struct pipe_query *query)
506 {
507 struct iris_context *ice = (void *) ctx;
508 struct iris_query *q = (void *) query;
509 struct iris_batch *batch = &ice->batches[q->batch_idx];
510
511 if (q->type == PIPE_QUERY_TIMESTAMP) {
512 iris_begin_query(ctx, query);
513 iris_batch_reference_signal_syncpt(batch, &q->syncpt);
514 mark_available(ice, q);
515 return true;
516 }
517
518 if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {
519 ice->state.prims_generated_query_active = false;
520 ice->state.dirty |= IRIS_DIRTY_STREAMOUT | IRIS_DIRTY_CLIP;
521 }
522
523 if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
524 q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
525 write_overflow_values(ice, q, true);
526 else
527 write_value(ice, q,
528 q->query_state_ref.offset +
529 offsetof(struct iris_query_snapshots, end));
530
531 iris_batch_reference_signal_syncpt(batch, &q->syncpt);
532 mark_available(ice, q);
533
534 return true;
535 }
536
537 /**
538 * See if the snapshots have landed for a query, and if so, compute the
539 * result and mark it ready. Does not flush (unlike iris_get_query_result).
540 */
541 static void
542 iris_check_query_no_flush(struct iris_context *ice, struct iris_query *q)
543 {
544 struct iris_screen *screen = (void *) ice->ctx.screen;
545 const struct gen_device_info *devinfo = &screen->devinfo;
546
547 if (!q->ready && READ_ONCE(q->map->snapshots_landed)) {
548 calculate_result_on_cpu(devinfo, q);
549 }
550 }
551
552 static bool
553 iris_get_query_result(struct pipe_context *ctx,
554 struct pipe_query *query,
555 bool wait,
556 union pipe_query_result *result)
557 {
558 struct iris_context *ice = (void *) ctx;
559 struct iris_query *q = (void *) query;
560 struct iris_screen *screen = (void *) ctx->screen;
561 const struct gen_device_info *devinfo = &screen->devinfo;
562
563 if (unlikely(screen->no_hw)) {
564 result->u64 = 0;
565 return true;
566 }
567
568 if (!q->ready) {
569 struct iris_batch *batch = &ice->batches[q->batch_idx];
570 if (q->syncpt == iris_batch_get_signal_syncpt(batch))
571 iris_batch_flush(batch);
572
573 while (!READ_ONCE(q->map->snapshots_landed)) {
574 if (wait)
575 iris_wait_syncpt(ctx->screen, q->syncpt, INT64_MAX);
576 else
577 return false;
578 }
579
580 assert(READ_ONCE(q->map->snapshots_landed));
581 calculate_result_on_cpu(devinfo, q);
582 }
583
584 assert(q->ready);
585
586 result->u64 = q->result;
587
588 return true;
589 }
590
591 static void
592 iris_get_query_result_resource(struct pipe_context *ctx,
593 struct pipe_query *query,
594 bool wait,
595 enum pipe_query_value_type result_type,
596 int index,
597 struct pipe_resource *p_res,
598 unsigned offset)
599 {
600 struct iris_context *ice = (void *) ctx;
601 struct iris_query *q = (void *) query;
602 struct iris_batch *batch = &ice->batches[q->batch_idx];
603 const struct gen_device_info *devinfo = &batch->screen->devinfo;
604 struct iris_resource *res = (void *) p_res;
605 struct iris_bo *query_bo = iris_resource_bo(q->query_state_ref.res);
606 struct iris_bo *dst_bo = iris_resource_bo(p_res);
607 unsigned snapshots_landed_offset =
608 offsetof(struct iris_query_snapshots, snapshots_landed);
609
610 res->bind_history |= PIPE_BIND_QUERY_BUFFER;
611
612 if (index == -1) {
613 /* They're asking for the availability of the result. If we still
614 * have commands queued up which produce the result, submit them
615 * now so that progress happens. Either way, copy the snapshots
616 * landed field to the destination resource.
617 */
618 if (q->syncpt == iris_batch_get_signal_syncpt(batch))
619 iris_batch_flush(batch);
620
621 ice->vtbl.copy_mem_mem(batch, dst_bo, offset,
622 query_bo, snapshots_landed_offset,
623 result_type <= PIPE_QUERY_TYPE_U32 ? 4 : 8);
624 return;
625 }
626
627 if (!q->ready && READ_ONCE(q->map->snapshots_landed)) {
628 /* The final snapshots happen to have landed, so let's just compute
629 * the result on the CPU now...
630 */
631 calculate_result_on_cpu(devinfo, q);
632 }
633
634 if (q->ready) {
635 /* We happen to have the result on the CPU, so just copy it. */
636 if (result_type <= PIPE_QUERY_TYPE_U32) {
637 ice->vtbl.store_data_imm32(batch, dst_bo, offset, q->result);
638 } else {
639 ice->vtbl.store_data_imm64(batch, dst_bo, offset, q->result);
640 }
641
642 /* Make sure the result lands before they use bind the QBO elsewhere
643 * and use the result.
644 */
645 // XXX: Why? i965 doesn't do this.
646 iris_emit_pipe_control_flush(batch,
647 "query: unknown QBO flushing hack",
648 PIPE_CONTROL_CS_STALL);
649 return;
650 }
651
652 bool predicated = !wait && !q->stalled;
653
654 struct gen_mi_builder b;
655 gen_mi_builder_init(&b, batch);
656
657 struct gen_mi_value result = calculate_result_on_gpu(devinfo, &b, q);
658 struct gen_mi_value dst =
659 result_type <= PIPE_QUERY_TYPE_U32 ? gen_mi_mem32(rw_bo(dst_bo, offset))
660 : gen_mi_mem64(rw_bo(dst_bo, offset));
661
662 if (predicated) {
663 gen_mi_store(&b, gen_mi_reg32(MI_PREDICATE_RESULT),
664 gen_mi_mem64(ro_bo(query_bo, snapshots_landed_offset)));
665 gen_mi_store_if(&b, dst, result);
666 } else {
667 gen_mi_store(&b, dst, result);
668 }
669 }
670
671 static void
672 iris_set_active_query_state(struct pipe_context *ctx, bool enable)
673 {
674 struct iris_context *ice = (void *) ctx;
675
676 if (ice->state.statistics_counters_enabled == enable)
677 return;
678
679 // XXX: most packets aren't paying attention to this yet, because it'd
680 // have to be done dynamically at draw time, which is a pain
681 ice->state.statistics_counters_enabled = enable;
682 ice->state.dirty |= IRIS_DIRTY_CLIP |
683 IRIS_DIRTY_GS |
684 IRIS_DIRTY_RASTER |
685 IRIS_DIRTY_STREAMOUT |
686 IRIS_DIRTY_TCS |
687 IRIS_DIRTY_TES |
688 IRIS_DIRTY_VS |
689 IRIS_DIRTY_WM;
690 }
691
692 static void
693 set_predicate_enable(struct iris_context *ice, bool value)
694 {
695 if (value)
696 ice->state.predicate = IRIS_PREDICATE_STATE_RENDER;
697 else
698 ice->state.predicate = IRIS_PREDICATE_STATE_DONT_RENDER;
699 }
700
701 static void
702 set_predicate_for_result(struct iris_context *ice,
703 struct iris_query *q,
704 bool inverted)
705 {
706 struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
707 struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
708
709 /* The CPU doesn't have the query result yet; use hardware predication */
710 ice->state.predicate = IRIS_PREDICATE_STATE_USE_BIT;
711
712 /* Ensure the memory is coherent for MI_LOAD_REGISTER_* commands. */
713 iris_emit_pipe_control_flush(batch,
714 "conditional rendering: set predicate",
715 PIPE_CONTROL_FLUSH_ENABLE);
716 q->stalled = true;
717
718 struct gen_mi_builder b;
719 gen_mi_builder_init(&b, batch);
720
721 struct gen_mi_value result;
722
723 switch (q->type) {
724 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
725 result = calc_overflow_for_stream(&b, q, q->index);
726 break;
727 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
728 result = calc_overflow_any_stream(&b, q);
729 break;
730 default: {
731 /* PIPE_QUERY_OCCLUSION_* */
732 struct gen_mi_value start =
733 query_mem64(q, offsetof(struct iris_query_snapshots, start));
734 struct gen_mi_value end =
735 query_mem64(q, offsetof(struct iris_query_snapshots, end));
736 result = gen_mi_isub(&b, end, start);
737 break;
738 }
739 }
740
741 result = inverted ? gen_mi_z(&b, result) : gen_mi_nz(&b, result);
742 result = gen_mi_iand(&b, result, gen_mi_imm(1));
743
744 /* We immediately set the predicate on the render batch, as all the
745 * counters come from 3D operations. However, we may need to predicate
746 * a compute dispatch, which executes in a different GEM context and has
747 * a different MI_PREDICATE_RESULT register. So, we save the result to
748 * memory and reload it in iris_launch_grid.
749 */
750 gen_mi_value_ref(&b, result);
751 gen_mi_store(&b, gen_mi_reg32(MI_PREDICATE_RESULT), result);
752 gen_mi_store(&b, query_mem64(q, offsetof(struct iris_query_snapshots,
753 predicate_result)), result);
754 ice->state.compute_predicate = bo;
755 }
756
757 static void
758 iris_render_condition(struct pipe_context *ctx,
759 struct pipe_query *query,
760 bool condition,
761 enum pipe_render_cond_flag mode)
762 {
763 struct iris_context *ice = (void *) ctx;
764 struct iris_query *q = (void *) query;
765
766 /* The old condition isn't relevant; we'll update it if necessary */
767 ice->state.compute_predicate = NULL;
768 ice->condition.query = q;
769 ice->condition.condition = condition;
770
771 if (!q) {
772 ice->state.predicate = IRIS_PREDICATE_STATE_RENDER;
773 return;
774 }
775
776 iris_check_query_no_flush(ice, q);
777
778 if (q->result || q->ready) {
779 set_predicate_enable(ice, (q->result != 0) ^ condition);
780 } else {
781 if (mode == PIPE_RENDER_COND_NO_WAIT ||
782 mode == PIPE_RENDER_COND_BY_REGION_NO_WAIT) {
783 perf_debug(&ice->dbg, "Conditional rendering demoted from "
784 "\"no wait\" to \"wait\".");
785 }
786 set_predicate_for_result(ice, q, condition);
787 }
788 }
789
790 static void
791 iris_resolve_conditional_render(struct iris_context *ice)
792 {
793 struct pipe_context *ctx = (void *) ice;
794 struct iris_query *q = ice->condition.query;
795 struct pipe_query *query = (void *) q;
796 union pipe_query_result result;
797
798 if (ice->state.predicate != IRIS_PREDICATE_STATE_USE_BIT)
799 return;
800
801 assert(q);
802
803 iris_get_query_result(ctx, query, true, &result);
804 set_predicate_enable(ice, (q->result != 0) ^ ice->condition.condition);
805 }
806
807 void
808 genX(init_query)(struct iris_context *ice)
809 {
810 struct pipe_context *ctx = &ice->ctx;
811
812 ctx->create_query = iris_create_query;
813 ctx->destroy_query = iris_destroy_query;
814 ctx->begin_query = iris_begin_query;
815 ctx->end_query = iris_end_query;
816 ctx->get_query_result = iris_get_query_result;
817 ctx->get_query_result_resource = iris_get_query_result_resource;
818 ctx->set_active_query_state = iris_set_active_query_state;
819 ctx->render_condition = iris_render_condition;
820
821 ice->vtbl.resolve_conditional_render = iris_resolve_conditional_render;
822 }