iris: Annotate all BO uses with domain and sequence number information.
[mesa.git] / src / gallium / drivers / iris / iris_query.c
1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23 /**
24 * @file iris_query.c
25 *
26 * ============================= GENXML CODE =============================
27 * [This file is compiled once per generation.]
28 * =======================================================================
29 *
30 * Query object support. This allows measuring various simple statistics
31 * via counters on the GPU. We use GenX code for MI_MATH calculations.
32 */
33
34 #include <stdio.h>
35 #include <errno.h>
36 #include "pipe/p_defines.h"
37 #include "pipe/p_state.h"
38 #include "pipe/p_context.h"
39 #include "pipe/p_screen.h"
40 #include "util/u_inlines.h"
41 #include "util/u_upload_mgr.h"
42 #include "iris_context.h"
43 #include "iris_defines.h"
44 #include "iris_fence.h"
45 #include "iris_monitor.h"
46 #include "iris_resource.h"
47 #include "iris_screen.h"
48
49 #include "iris_genx_macros.h"
50
51 #define SO_PRIM_STORAGE_NEEDED(n) (GENX(SO_PRIM_STORAGE_NEEDED0_num) + (n) * 8)
52 #define SO_NUM_PRIMS_WRITTEN(n) (GENX(SO_NUM_PRIMS_WRITTEN0_num) + (n) * 8)
53
54 struct iris_query {
55 enum pipe_query_type type;
56 int index;
57
58 bool ready;
59
60 bool stalled;
61
62 uint64_t result;
63
64 struct iris_state_ref query_state_ref;
65 struct iris_query_snapshots *map;
66 struct iris_syncobj *syncobj;
67
68 int batch_idx;
69
70 struct iris_monitor_object *monitor;
71
72 /* Fence for PIPE_QUERY_GPU_FINISHED. */
73 struct pipe_fence_handle *fence;
74 };
75
76 struct iris_query_snapshots {
77 /** iris_render_condition's saved MI_PREDICATE_RESULT value. */
78 uint64_t predicate_result;
79
80 /** Have the start/end snapshots landed? */
81 uint64_t snapshots_landed;
82
83 /** Starting and ending counter snapshots */
84 uint64_t start;
85 uint64_t end;
86 };
87
88 struct iris_query_so_overflow {
89 uint64_t predicate_result;
90 uint64_t snapshots_landed;
91
92 struct {
93 uint64_t prim_storage_needed[2];
94 uint64_t num_prims[2];
95 } stream[4];
96 };
97
98 static struct gen_mi_value
99 query_mem64(struct iris_query *q, uint32_t offset)
100 {
101 struct iris_address addr = {
102 .bo = iris_resource_bo(q->query_state_ref.res),
103 .offset = q->query_state_ref.offset + offset,
104 .write = true,
105 .access = IRIS_DOMAIN_OTHER_WRITE
106 };
107 return gen_mi_mem64(addr);
108 }
109
110 /**
111 * Is this type of query written by PIPE_CONTROL?
112 */
113 static bool
114 iris_is_query_pipelined(struct iris_query *q)
115 {
116 switch (q->type) {
117 case PIPE_QUERY_OCCLUSION_COUNTER:
118 case PIPE_QUERY_OCCLUSION_PREDICATE:
119 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
120 case PIPE_QUERY_TIMESTAMP:
121 case PIPE_QUERY_TIMESTAMP_DISJOINT:
122 case PIPE_QUERY_TIME_ELAPSED:
123 return true;
124
125 default:
126 return false;
127 }
128 }
129
130 static void
131 mark_available(struct iris_context *ice, struct iris_query *q)
132 {
133 struct iris_batch *batch = &ice->batches[q->batch_idx];
134 unsigned flags = PIPE_CONTROL_WRITE_IMMEDIATE;
135 unsigned offset = offsetof(struct iris_query_snapshots, snapshots_landed);
136 struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
137 offset += q->query_state_ref.offset;
138
139 if (!iris_is_query_pipelined(q)) {
140 batch->screen->vtbl.store_data_imm64(batch, bo, offset, true);
141 } else {
142 /* Order available *after* the query results. */
143 flags |= PIPE_CONTROL_FLUSH_ENABLE;
144 iris_emit_pipe_control_write(batch, "query: mark available",
145 flags, bo, offset, true);
146 }
147 }
148
149 /**
150 * Write PS_DEPTH_COUNT to q->(dest) via a PIPE_CONTROL.
151 */
152 static void
153 iris_pipelined_write(struct iris_batch *batch,
154 struct iris_query *q,
155 enum pipe_control_flags flags,
156 unsigned offset)
157 {
158 const struct gen_device_info *devinfo = &batch->screen->devinfo;
159 const unsigned optional_cs_stall =
160 GEN_GEN == 9 && devinfo->gt == 4 ? PIPE_CONTROL_CS_STALL : 0;
161 struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
162
163 iris_emit_pipe_control_write(batch, "query: pipelined snapshot write",
164 flags | optional_cs_stall,
165 bo, offset, 0ull);
166 }
167
168 static void
169 write_value(struct iris_context *ice, struct iris_query *q, unsigned offset)
170 {
171 struct iris_batch *batch = &ice->batches[q->batch_idx];
172 struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
173
174 if (!iris_is_query_pipelined(q)) {
175 iris_emit_pipe_control_flush(batch,
176 "query: non-pipelined snapshot write",
177 PIPE_CONTROL_CS_STALL |
178 PIPE_CONTROL_STALL_AT_SCOREBOARD);
179 q->stalled = true;
180 }
181
182 switch (q->type) {
183 case PIPE_QUERY_OCCLUSION_COUNTER:
184 case PIPE_QUERY_OCCLUSION_PREDICATE:
185 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
186 if (GEN_GEN >= 10) {
187 /* "Driver must program PIPE_CONTROL with only Depth Stall Enable
188 * bit set prior to programming a PIPE_CONTROL with Write PS Depth
189 * Count sync operation."
190 */
191 iris_emit_pipe_control_flush(batch,
192 "workaround: depth stall before writing "
193 "PS_DEPTH_COUNT",
194 PIPE_CONTROL_DEPTH_STALL);
195 }
196 iris_pipelined_write(&ice->batches[IRIS_BATCH_RENDER], q,
197 PIPE_CONTROL_WRITE_DEPTH_COUNT |
198 PIPE_CONTROL_DEPTH_STALL,
199 offset);
200 break;
201 case PIPE_QUERY_TIME_ELAPSED:
202 case PIPE_QUERY_TIMESTAMP:
203 case PIPE_QUERY_TIMESTAMP_DISJOINT:
204 iris_pipelined_write(&ice->batches[IRIS_BATCH_RENDER], q,
205 PIPE_CONTROL_WRITE_TIMESTAMP,
206 offset);
207 break;
208 case PIPE_QUERY_PRIMITIVES_GENERATED:
209 batch->screen->vtbl.store_register_mem64(batch,
210 q->index == 0 ?
211 GENX(CL_INVOCATION_COUNT_num) :
212 SO_PRIM_STORAGE_NEEDED(q->index),
213 bo, offset, false);
214 break;
215 case PIPE_QUERY_PRIMITIVES_EMITTED:
216 batch->screen->vtbl.store_register_mem64(batch,
217 SO_NUM_PRIMS_WRITTEN(q->index),
218 bo, offset, false);
219 break;
220 case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE: {
221 static const uint32_t index_to_reg[] = {
222 GENX(IA_VERTICES_COUNT_num),
223 GENX(IA_PRIMITIVES_COUNT_num),
224 GENX(VS_INVOCATION_COUNT_num),
225 GENX(GS_INVOCATION_COUNT_num),
226 GENX(GS_PRIMITIVES_COUNT_num),
227 GENX(CL_INVOCATION_COUNT_num),
228 GENX(CL_PRIMITIVES_COUNT_num),
229 GENX(PS_INVOCATION_COUNT_num),
230 GENX(HS_INVOCATION_COUNT_num),
231 GENX(DS_INVOCATION_COUNT_num),
232 GENX(CS_INVOCATION_COUNT_num),
233 };
234 const uint32_t reg = index_to_reg[q->index];
235
236 batch->screen->vtbl.store_register_mem64(batch, reg, bo, offset, false);
237 break;
238 }
239 default:
240 assert(false);
241 }
242 }
243
244 static void
245 write_overflow_values(struct iris_context *ice, struct iris_query *q, bool end)
246 {
247 struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
248 uint32_t count = q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ? 1 : 4;
249 struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
250 uint32_t offset = q->query_state_ref.offset;
251
252 iris_emit_pipe_control_flush(batch,
253 "query: write SO overflow snapshots",
254 PIPE_CONTROL_CS_STALL |
255 PIPE_CONTROL_STALL_AT_SCOREBOARD);
256 for (uint32_t i = 0; i < count; i++) {
257 int s = q->index + i;
258 int g_idx = offset + offsetof(struct iris_query_so_overflow,
259 stream[s].num_prims[end]);
260 int w_idx = offset + offsetof(struct iris_query_so_overflow,
261 stream[s].prim_storage_needed[end]);
262 batch->screen->vtbl.store_register_mem64(batch, SO_NUM_PRIMS_WRITTEN(s),
263 bo, g_idx, false);
264 batch->screen->vtbl.store_register_mem64(batch, SO_PRIM_STORAGE_NEEDED(s),
265 bo, w_idx, false);
266 }
267 }
268
269 static uint64_t
270 iris_raw_timestamp_delta(uint64_t time0, uint64_t time1)
271 {
272 if (time0 > time1) {
273 return (1ULL << TIMESTAMP_BITS) + time1 - time0;
274 } else {
275 return time1 - time0;
276 }
277 }
278
279 static bool
280 stream_overflowed(struct iris_query_so_overflow *so, int s)
281 {
282 return (so->stream[s].prim_storage_needed[1] -
283 so->stream[s].prim_storage_needed[0]) !=
284 (so->stream[s].num_prims[1] - so->stream[s].num_prims[0]);
285 }
286
287 static void
288 calculate_result_on_cpu(const struct gen_device_info *devinfo,
289 struct iris_query *q)
290 {
291 switch (q->type) {
292 case PIPE_QUERY_OCCLUSION_PREDICATE:
293 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
294 q->result = q->map->end != q->map->start;
295 break;
296 case PIPE_QUERY_TIMESTAMP:
297 case PIPE_QUERY_TIMESTAMP_DISJOINT:
298 /* The timestamp is the single starting snapshot. */
299 q->result = gen_device_info_timebase_scale(devinfo, q->map->start);
300 q->result &= (1ull << TIMESTAMP_BITS) - 1;
301 break;
302 case PIPE_QUERY_TIME_ELAPSED:
303 q->result = iris_raw_timestamp_delta(q->map->start, q->map->end);
304 q->result = gen_device_info_timebase_scale(devinfo, q->result);
305 q->result &= (1ull << TIMESTAMP_BITS) - 1;
306 break;
307 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
308 q->result = stream_overflowed((void *) q->map, q->index);
309 break;
310 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
311 q->result = false;
312 for (int i = 0; i < MAX_VERTEX_STREAMS; i++)
313 q->result |= stream_overflowed((void *) q->map, i);
314 break;
315 case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
316 q->result = q->map->end - q->map->start;
317
318 /* WaDividePSInvocationCountBy4:HSW,BDW */
319 if (GEN_GEN == 8 && q->index == PIPE_STAT_QUERY_PS_INVOCATIONS)
320 q->result /= 4;
321 break;
322 case PIPE_QUERY_OCCLUSION_COUNTER:
323 case PIPE_QUERY_PRIMITIVES_GENERATED:
324 case PIPE_QUERY_PRIMITIVES_EMITTED:
325 default:
326 q->result = q->map->end - q->map->start;
327 break;
328 }
329
330 q->ready = true;
331 }
332
333 /**
334 * Calculate the streamout overflow for stream \p idx:
335 *
336 * (num_prims[1] - num_prims[0]) - (storage_needed[1] - storage_needed[0])
337 */
338 static struct gen_mi_value
339 calc_overflow_for_stream(struct gen_mi_builder *b,
340 struct iris_query *q,
341 int idx)
342 {
343 #define C(counter, i) query_mem64(q, \
344 offsetof(struct iris_query_so_overflow, stream[idx].counter[i]))
345
346 return gen_mi_isub(b, gen_mi_isub(b, C(num_prims, 1), C(num_prims, 0)),
347 gen_mi_isub(b, C(prim_storage_needed, 1),
348 C(prim_storage_needed, 0)));
349 #undef C
350 }
351
352 /**
353 * Calculate whether any stream has overflowed.
354 */
355 static struct gen_mi_value
356 calc_overflow_any_stream(struct gen_mi_builder *b, struct iris_query *q)
357 {
358 struct gen_mi_value stream_result[MAX_VERTEX_STREAMS];
359 for (int i = 0; i < MAX_VERTEX_STREAMS; i++)
360 stream_result[i] = calc_overflow_for_stream(b, q, i);
361
362 struct gen_mi_value result = stream_result[0];
363 for (int i = 1; i < MAX_VERTEX_STREAMS; i++)
364 result = gen_mi_ior(b, result, stream_result[i]);
365
366 return result;
367 }
368
369 static bool
370 query_is_boolean(enum pipe_query_type type)
371 {
372 switch (type) {
373 case PIPE_QUERY_OCCLUSION_PREDICATE:
374 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
375 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
376 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
377 return true;
378 default:
379 return false;
380 }
381 }
382
383 /**
384 * Calculate the result using MI_MATH.
385 */
386 static struct gen_mi_value
387 calculate_result_on_gpu(const struct gen_device_info *devinfo,
388 struct gen_mi_builder *b,
389 struct iris_query *q)
390 {
391 struct gen_mi_value result;
392 struct gen_mi_value start_val =
393 query_mem64(q, offsetof(struct iris_query_snapshots, start));
394 struct gen_mi_value end_val =
395 query_mem64(q, offsetof(struct iris_query_snapshots, end));
396
397 switch (q->type) {
398 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
399 result = calc_overflow_for_stream(b, q, q->index);
400 break;
401 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
402 result = calc_overflow_any_stream(b, q);
403 break;
404 case PIPE_QUERY_TIMESTAMP: {
405 /* TODO: This discards any fractional bits of the timebase scale.
406 * We would need to do a bit of fixed point math on the CS ALU, or
407 * launch an actual shader to calculate this with full precision.
408 */
409 uint32_t scale = 1000000000ull / devinfo->timestamp_frequency;
410 result = gen_mi_iand(b, gen_mi_imm((1ull << 36) - 1),
411 gen_mi_imul_imm(b, start_val, scale));
412 break;
413 }
414 case PIPE_QUERY_TIME_ELAPSED: {
415 /* TODO: This discards fractional bits (see above). */
416 uint32_t scale = 1000000000ull / devinfo->timestamp_frequency;
417 result = gen_mi_imul_imm(b, gen_mi_isub(b, end_val, start_val), scale);
418 break;
419 }
420 default:
421 result = gen_mi_isub(b, end_val, start_val);
422 break;
423 }
424
425 /* WaDividePSInvocationCountBy4:HSW,BDW */
426 if (GEN_GEN == 8 &&
427 q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE &&
428 q->index == PIPE_STAT_QUERY_PS_INVOCATIONS)
429 result = gen_mi_ushr32_imm(b, result, 2);
430
431 if (query_is_boolean(q->type))
432 result = gen_mi_iand(b, gen_mi_nz(b, result), gen_mi_imm(1));
433
434 return result;
435 }
436
437 static struct pipe_query *
438 iris_create_query(struct pipe_context *ctx,
439 unsigned query_type,
440 unsigned index)
441 {
442 struct iris_query *q = calloc(1, sizeof(struct iris_query));
443
444 q->type = query_type;
445 q->index = index;
446 q->monitor = NULL;
447
448 if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE &&
449 q->index == PIPE_STAT_QUERY_CS_INVOCATIONS)
450 q->batch_idx = IRIS_BATCH_COMPUTE;
451 else
452 q->batch_idx = IRIS_BATCH_RENDER;
453 return (struct pipe_query *) q;
454 }
455
456 static struct pipe_query *
457 iris_create_batch_query(struct pipe_context *ctx,
458 unsigned num_queries,
459 unsigned *query_types)
460 {
461 struct iris_context *ice = (void *) ctx;
462 struct iris_query *q = calloc(1, sizeof(struct iris_query));
463 if (unlikely(!q))
464 return NULL;
465 q->type = PIPE_QUERY_DRIVER_SPECIFIC;
466 q->index = -1;
467 q->monitor = iris_create_monitor_object(ice, num_queries, query_types);
468 if (unlikely(!q->monitor)) {
469 free(q);
470 return NULL;
471 }
472
473 return (struct pipe_query *) q;
474 }
475
476 static void
477 iris_destroy_query(struct pipe_context *ctx, struct pipe_query *p_query)
478 {
479 struct iris_query *query = (void *) p_query;
480 struct iris_screen *screen = (void *) ctx->screen;
481 if (query->monitor) {
482 iris_destroy_monitor_object(ctx, query->monitor);
483 query->monitor = NULL;
484 } else {
485 iris_syncobj_reference(screen, &query->syncobj, NULL);
486 screen->base.fence_reference(ctx->screen, &query->fence, NULL);
487 }
488 free(query);
489 }
490
491
492 static bool
493 iris_begin_query(struct pipe_context *ctx, struct pipe_query *query)
494 {
495 struct iris_context *ice = (void *) ctx;
496 struct iris_query *q = (void *) query;
497
498 if (q->monitor)
499 return iris_begin_monitor(ctx, q->monitor);
500
501 void *ptr = NULL;
502 uint32_t size;
503
504 if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
505 q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
506 size = sizeof(struct iris_query_so_overflow);
507 else
508 size = sizeof(struct iris_query_snapshots);
509
510 u_upload_alloc(ice->query_buffer_uploader, 0,
511 size, size, &q->query_state_ref.offset,
512 &q->query_state_ref.res, &ptr);
513
514 if (!iris_resource_bo(q->query_state_ref.res))
515 return false;
516
517 q->map = ptr;
518 if (!q->map)
519 return false;
520
521 q->result = 0ull;
522 q->ready = false;
523 WRITE_ONCE(q->map->snapshots_landed, false);
524
525 if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {
526 ice->state.prims_generated_query_active = true;
527 ice->state.dirty |= IRIS_DIRTY_STREAMOUT | IRIS_DIRTY_CLIP;
528 }
529
530 if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
531 q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
532 write_overflow_values(ice, q, false);
533 else
534 write_value(ice, q,
535 q->query_state_ref.offset +
536 offsetof(struct iris_query_snapshots, start));
537
538 return true;
539 }
540
541 static bool
542 iris_end_query(struct pipe_context *ctx, struct pipe_query *query)
543 {
544 struct iris_context *ice = (void *) ctx;
545 struct iris_query *q = (void *) query;
546
547 if (q->monitor)
548 return iris_end_monitor(ctx, q->monitor);
549
550 if (q->type == PIPE_QUERY_GPU_FINISHED) {
551 ctx->flush(ctx, &q->fence, PIPE_FLUSH_DEFERRED);
552 return true;
553 }
554
555 struct iris_batch *batch = &ice->batches[q->batch_idx];
556
557 if (q->type == PIPE_QUERY_TIMESTAMP) {
558 iris_begin_query(ctx, query);
559 iris_batch_reference_signal_syncobj(batch, &q->syncobj);
560 mark_available(ice, q);
561 return true;
562 }
563
564 if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {
565 ice->state.prims_generated_query_active = false;
566 ice->state.dirty |= IRIS_DIRTY_STREAMOUT | IRIS_DIRTY_CLIP;
567 }
568
569 if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
570 q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
571 write_overflow_values(ice, q, true);
572 else
573 write_value(ice, q,
574 q->query_state_ref.offset +
575 offsetof(struct iris_query_snapshots, end));
576
577 iris_batch_reference_signal_syncobj(batch, &q->syncobj);
578 mark_available(ice, q);
579
580 return true;
581 }
582
583 /**
584 * See if the snapshots have landed for a query, and if so, compute the
585 * result and mark it ready. Does not flush (unlike iris_get_query_result).
586 */
587 static void
588 iris_check_query_no_flush(struct iris_context *ice, struct iris_query *q)
589 {
590 struct iris_screen *screen = (void *) ice->ctx.screen;
591 const struct gen_device_info *devinfo = &screen->devinfo;
592
593 if (!q->ready && READ_ONCE(q->map->snapshots_landed)) {
594 calculate_result_on_cpu(devinfo, q);
595 }
596 }
597
598 static bool
599 iris_get_query_result(struct pipe_context *ctx,
600 struct pipe_query *query,
601 bool wait,
602 union pipe_query_result *result)
603 {
604 struct iris_context *ice = (void *) ctx;
605 struct iris_query *q = (void *) query;
606
607 if (q->monitor)
608 return iris_get_monitor_result(ctx, q->monitor, wait, result->batch);
609
610 struct iris_screen *screen = (void *) ctx->screen;
611 const struct gen_device_info *devinfo = &screen->devinfo;
612
613 if (unlikely(screen->no_hw)) {
614 result->u64 = 0;
615 return true;
616 }
617
618 if (q->type == PIPE_QUERY_GPU_FINISHED) {
619 struct pipe_screen *screen = ctx->screen;
620
621 result->b = screen->fence_finish(screen, ctx, q->fence,
622 wait ? PIPE_TIMEOUT_INFINITE : 0);
623 return result->b;
624 }
625
626 if (!q->ready) {
627 struct iris_batch *batch = &ice->batches[q->batch_idx];
628 if (q->syncobj == iris_batch_get_signal_syncobj(batch))
629 iris_batch_flush(batch);
630
631 while (!READ_ONCE(q->map->snapshots_landed)) {
632 if (wait)
633 iris_wait_syncobj(ctx->screen, q->syncobj, INT64_MAX);
634 else
635 return false;
636 }
637
638 assert(READ_ONCE(q->map->snapshots_landed));
639 calculate_result_on_cpu(devinfo, q);
640 }
641
642 assert(q->ready);
643
644 result->u64 = q->result;
645
646 return true;
647 }
648
649 static void
650 iris_get_query_result_resource(struct pipe_context *ctx,
651 struct pipe_query *query,
652 bool wait,
653 enum pipe_query_value_type result_type,
654 int index,
655 struct pipe_resource *p_res,
656 unsigned offset)
657 {
658 struct iris_context *ice = (void *) ctx;
659 struct iris_query *q = (void *) query;
660 struct iris_batch *batch = &ice->batches[q->batch_idx];
661 const struct gen_device_info *devinfo = &batch->screen->devinfo;
662 struct iris_resource *res = (void *) p_res;
663 struct iris_bo *query_bo = iris_resource_bo(q->query_state_ref.res);
664 struct iris_bo *dst_bo = iris_resource_bo(p_res);
665 unsigned snapshots_landed_offset =
666 offsetof(struct iris_query_snapshots, snapshots_landed);
667
668 res->bind_history |= PIPE_BIND_QUERY_BUFFER;
669
670 if (index == -1) {
671 /* They're asking for the availability of the result. If we still
672 * have commands queued up which produce the result, submit them
673 * now so that progress happens. Either way, copy the snapshots
674 * landed field to the destination resource.
675 */
676 if (q->syncobj == iris_batch_get_signal_syncobj(batch))
677 iris_batch_flush(batch);
678
679 batch->screen->vtbl.copy_mem_mem(batch, dst_bo, offset,
680 query_bo, snapshots_landed_offset,
681 result_type <= PIPE_QUERY_TYPE_U32 ? 4 : 8);
682 return;
683 }
684
685 if (!q->ready && READ_ONCE(q->map->snapshots_landed)) {
686 /* The final snapshots happen to have landed, so let's just compute
687 * the result on the CPU now...
688 */
689 calculate_result_on_cpu(devinfo, q);
690 }
691
692 if (q->ready) {
693 /* We happen to have the result on the CPU, so just copy it. */
694 if (result_type <= PIPE_QUERY_TYPE_U32) {
695 batch->screen->vtbl.store_data_imm32(batch, dst_bo, offset, q->result);
696 } else {
697 batch->screen->vtbl.store_data_imm64(batch, dst_bo, offset, q->result);
698 }
699
700 /* Make sure the result lands before they use bind the QBO elsewhere
701 * and use the result.
702 */
703 // XXX: Why? i965 doesn't do this.
704 iris_emit_pipe_control_flush(batch,
705 "query: unknown QBO flushing hack",
706 PIPE_CONTROL_CS_STALL);
707 return;
708 }
709
710 bool predicated = !wait && !q->stalled;
711
712 struct gen_mi_builder b;
713 gen_mi_builder_init(&b, batch);
714
715 iris_batch_sync_region_start(batch);
716
717 struct gen_mi_value result = calculate_result_on_gpu(devinfo, &b, q);
718 struct gen_mi_value dst =
719 result_type <= PIPE_QUERY_TYPE_U32 ?
720 gen_mi_mem32(rw_bo(dst_bo, offset, IRIS_DOMAIN_OTHER_WRITE)) :
721 gen_mi_mem64(rw_bo(dst_bo, offset, IRIS_DOMAIN_OTHER_WRITE));
722
723 if (predicated) {
724 gen_mi_store(&b, gen_mi_reg32(MI_PREDICATE_RESULT),
725 gen_mi_mem64(ro_bo(query_bo, snapshots_landed_offset)));
726 gen_mi_store_if(&b, dst, result);
727 } else {
728 gen_mi_store(&b, dst, result);
729 }
730
731 iris_batch_sync_region_end(batch);
732 }
733
734 static void
735 iris_set_active_query_state(struct pipe_context *ctx, bool enable)
736 {
737 struct iris_context *ice = (void *) ctx;
738
739 if (ice->state.statistics_counters_enabled == enable)
740 return;
741
742 // XXX: most packets aren't paying attention to this yet, because it'd
743 // have to be done dynamically at draw time, which is a pain
744 ice->state.statistics_counters_enabled = enable;
745 ice->state.dirty |= IRIS_DIRTY_CLIP |
746 IRIS_DIRTY_RASTER |
747 IRIS_DIRTY_STREAMOUT |
748 IRIS_DIRTY_WM;
749 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_GS |
750 IRIS_STAGE_DIRTY_TCS |
751 IRIS_STAGE_DIRTY_TES |
752 IRIS_STAGE_DIRTY_VS;
753 }
754
755 static void
756 set_predicate_enable(struct iris_context *ice, bool value)
757 {
758 if (value)
759 ice->state.predicate = IRIS_PREDICATE_STATE_RENDER;
760 else
761 ice->state.predicate = IRIS_PREDICATE_STATE_DONT_RENDER;
762 }
763
764 static void
765 set_predicate_for_result(struct iris_context *ice,
766 struct iris_query *q,
767 bool inverted)
768 {
769 struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
770 struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
771
772 iris_batch_sync_region_start(batch);
773
774 /* The CPU doesn't have the query result yet; use hardware predication */
775 ice->state.predicate = IRIS_PREDICATE_STATE_USE_BIT;
776
777 /* Ensure the memory is coherent for MI_LOAD_REGISTER_* commands. */
778 iris_emit_pipe_control_flush(batch,
779 "conditional rendering: set predicate",
780 PIPE_CONTROL_FLUSH_ENABLE);
781 q->stalled = true;
782
783 struct gen_mi_builder b;
784 gen_mi_builder_init(&b, batch);
785
786 struct gen_mi_value result;
787
788 switch (q->type) {
789 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
790 result = calc_overflow_for_stream(&b, q, q->index);
791 break;
792 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
793 result = calc_overflow_any_stream(&b, q);
794 break;
795 default: {
796 /* PIPE_QUERY_OCCLUSION_* */
797 struct gen_mi_value start =
798 query_mem64(q, offsetof(struct iris_query_snapshots, start));
799 struct gen_mi_value end =
800 query_mem64(q, offsetof(struct iris_query_snapshots, end));
801 result = gen_mi_isub(&b, end, start);
802 break;
803 }
804 }
805
806 result = inverted ? gen_mi_z(&b, result) : gen_mi_nz(&b, result);
807 result = gen_mi_iand(&b, result, gen_mi_imm(1));
808
809 /* We immediately set the predicate on the render batch, as all the
810 * counters come from 3D operations. However, we may need to predicate
811 * a compute dispatch, which executes in a different GEM context and has
812 * a different MI_PREDICATE_RESULT register. So, we save the result to
813 * memory and reload it in iris_launch_grid.
814 */
815 gen_mi_value_ref(&b, result);
816 gen_mi_store(&b, gen_mi_reg32(MI_PREDICATE_RESULT), result);
817 gen_mi_store(&b, query_mem64(q, offsetof(struct iris_query_snapshots,
818 predicate_result)), result);
819 ice->state.compute_predicate = bo;
820
821 iris_batch_sync_region_end(batch);
822 }
823
824 static void
825 iris_render_condition(struct pipe_context *ctx,
826 struct pipe_query *query,
827 bool condition,
828 enum pipe_render_cond_flag mode)
829 {
830 struct iris_context *ice = (void *) ctx;
831 struct iris_query *q = (void *) query;
832
833 /* The old condition isn't relevant; we'll update it if necessary */
834 ice->state.compute_predicate = NULL;
835 ice->condition.query = q;
836 ice->condition.condition = condition;
837
838 if (!q) {
839 ice->state.predicate = IRIS_PREDICATE_STATE_RENDER;
840 return;
841 }
842
843 iris_check_query_no_flush(ice, q);
844
845 if (q->result || q->ready) {
846 set_predicate_enable(ice, (q->result != 0) ^ condition);
847 } else {
848 if (mode == PIPE_RENDER_COND_NO_WAIT ||
849 mode == PIPE_RENDER_COND_BY_REGION_NO_WAIT) {
850 perf_debug(&ice->dbg, "Conditional rendering demoted from "
851 "\"no wait\" to \"wait\".");
852 }
853 set_predicate_for_result(ice, q, condition);
854 }
855 }
856
857 static void
858 iris_resolve_conditional_render(struct iris_context *ice)
859 {
860 struct pipe_context *ctx = (void *) ice;
861 struct iris_query *q = ice->condition.query;
862 struct pipe_query *query = (void *) q;
863 union pipe_query_result result;
864
865 if (ice->state.predicate != IRIS_PREDICATE_STATE_USE_BIT)
866 return;
867
868 assert(q);
869
870 iris_get_query_result(ctx, query, true, &result);
871 set_predicate_enable(ice, (q->result != 0) ^ ice->condition.condition);
872 }
873
874 void
875 genX(init_query)(struct iris_context *ice)
876 {
877 struct pipe_context *ctx = &ice->ctx;
878 struct iris_screen *screen = (struct iris_screen *)ctx->screen;
879
880 ctx->create_query = iris_create_query;
881 ctx->create_batch_query = iris_create_batch_query;
882 ctx->destroy_query = iris_destroy_query;
883 ctx->begin_query = iris_begin_query;
884 ctx->end_query = iris_end_query;
885 ctx->get_query_result = iris_get_query_result;
886 ctx->get_query_result_resource = iris_get_query_result_resource;
887 ctx->set_active_query_state = iris_set_active_query_state;
888 ctx->render_condition = iris_render_condition;
889
890 screen->vtbl.resolve_conditional_render = iris_resolve_conditional_render;
891 }