iris: Make iris_has_color_unresolved more generic
[mesa.git] / src / gallium / drivers / iris / iris_query.c
1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23 /**
24 * @file iris_query.c
25 *
26 * ============================= GENXML CODE =============================
27 * [This file is compiled once per generation.]
28 * =======================================================================
29 *
30 * Query object support. This allows measuring various simple statistics
31 * via counters on the GPU. We use GenX code for MI_MATH calculations.
32 */
33
34 #include <stdio.h>
35 #include <errno.h>
36 #include "pipe/p_defines.h"
37 #include "pipe/p_state.h"
38 #include "pipe/p_context.h"
39 #include "pipe/p_screen.h"
40 #include "util/u_inlines.h"
41 #include "util/u_upload_mgr.h"
42 #include "iris_context.h"
43 #include "iris_defines.h"
44 #include "iris_fence.h"
45 #include "iris_monitor.h"
46 #include "iris_resource.h"
47 #include "iris_screen.h"
48
49 #include "iris_genx_macros.h"
50
51 #define SO_PRIM_STORAGE_NEEDED(n) (GENX(SO_PRIM_STORAGE_NEEDED0_num) + (n) * 8)
52 #define SO_NUM_PRIMS_WRITTEN(n) (GENX(SO_NUM_PRIMS_WRITTEN0_num) + (n) * 8)
53
54 struct iris_query {
55 enum pipe_query_type type;
56 int index;
57
58 bool ready;
59
60 bool stalled;
61
62 uint64_t result;
63
64 struct iris_state_ref query_state_ref;
65 struct iris_query_snapshots *map;
66 struct iris_syncobj *syncobj;
67
68 int batch_idx;
69
70 struct iris_monitor_object *monitor;
71
72 /* Fence for PIPE_QUERY_GPU_FINISHED. */
73 struct pipe_fence_handle *fence;
74 };
75
76 struct iris_query_snapshots {
77 /** iris_render_condition's saved MI_PREDICATE_RESULT value. */
78 uint64_t predicate_result;
79
80 /** Have the start/end snapshots landed? */
81 uint64_t snapshots_landed;
82
83 /** Starting and ending counter snapshots */
84 uint64_t start;
85 uint64_t end;
86 };
87
88 struct iris_query_so_overflow {
89 uint64_t predicate_result;
90 uint64_t snapshots_landed;
91
92 struct {
93 uint64_t prim_storage_needed[2];
94 uint64_t num_prims[2];
95 } stream[4];
96 };
97
98 static struct gen_mi_value
99 query_mem64(struct iris_query *q, uint32_t offset)
100 {
101 struct iris_address addr = {
102 .bo = iris_resource_bo(q->query_state_ref.res),
103 .offset = q->query_state_ref.offset + offset,
104 .access = IRIS_DOMAIN_OTHER_WRITE
105 };
106 return gen_mi_mem64(addr);
107 }
108
109 /**
110 * Is this type of query written by PIPE_CONTROL?
111 */
112 static bool
113 iris_is_query_pipelined(struct iris_query *q)
114 {
115 switch (q->type) {
116 case PIPE_QUERY_OCCLUSION_COUNTER:
117 case PIPE_QUERY_OCCLUSION_PREDICATE:
118 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
119 case PIPE_QUERY_TIMESTAMP:
120 case PIPE_QUERY_TIMESTAMP_DISJOINT:
121 case PIPE_QUERY_TIME_ELAPSED:
122 return true;
123
124 default:
125 return false;
126 }
127 }
128
129 static void
130 mark_available(struct iris_context *ice, struct iris_query *q)
131 {
132 struct iris_batch *batch = &ice->batches[q->batch_idx];
133 unsigned flags = PIPE_CONTROL_WRITE_IMMEDIATE;
134 unsigned offset = offsetof(struct iris_query_snapshots, snapshots_landed);
135 struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
136 offset += q->query_state_ref.offset;
137
138 if (!iris_is_query_pipelined(q)) {
139 batch->screen->vtbl.store_data_imm64(batch, bo, offset, true);
140 } else {
141 /* Order available *after* the query results. */
142 flags |= PIPE_CONTROL_FLUSH_ENABLE;
143 iris_emit_pipe_control_write(batch, "query: mark available",
144 flags, bo, offset, true);
145 }
146 }
147
148 /**
149 * Write PS_DEPTH_COUNT to q->(dest) via a PIPE_CONTROL.
150 */
151 static void
152 iris_pipelined_write(struct iris_batch *batch,
153 struct iris_query *q,
154 enum pipe_control_flags flags,
155 unsigned offset)
156 {
157 const struct gen_device_info *devinfo = &batch->screen->devinfo;
158 const unsigned optional_cs_stall =
159 GEN_GEN == 9 && devinfo->gt == 4 ? PIPE_CONTROL_CS_STALL : 0;
160 struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
161
162 iris_emit_pipe_control_write(batch, "query: pipelined snapshot write",
163 flags | optional_cs_stall,
164 bo, offset, 0ull);
165 }
166
167 static void
168 write_value(struct iris_context *ice, struct iris_query *q, unsigned offset)
169 {
170 struct iris_batch *batch = &ice->batches[q->batch_idx];
171 struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
172
173 if (!iris_is_query_pipelined(q)) {
174 iris_emit_pipe_control_flush(batch,
175 "query: non-pipelined snapshot write",
176 PIPE_CONTROL_CS_STALL |
177 PIPE_CONTROL_STALL_AT_SCOREBOARD);
178 q->stalled = true;
179 }
180
181 switch (q->type) {
182 case PIPE_QUERY_OCCLUSION_COUNTER:
183 case PIPE_QUERY_OCCLUSION_PREDICATE:
184 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
185 if (GEN_GEN >= 10) {
186 /* "Driver must program PIPE_CONTROL with only Depth Stall Enable
187 * bit set prior to programming a PIPE_CONTROL with Write PS Depth
188 * Count sync operation."
189 */
190 iris_emit_pipe_control_flush(batch,
191 "workaround: depth stall before writing "
192 "PS_DEPTH_COUNT",
193 PIPE_CONTROL_DEPTH_STALL);
194 }
195 iris_pipelined_write(&ice->batches[IRIS_BATCH_RENDER], q,
196 PIPE_CONTROL_WRITE_DEPTH_COUNT |
197 PIPE_CONTROL_DEPTH_STALL,
198 offset);
199 break;
200 case PIPE_QUERY_TIME_ELAPSED:
201 case PIPE_QUERY_TIMESTAMP:
202 case PIPE_QUERY_TIMESTAMP_DISJOINT:
203 iris_pipelined_write(&ice->batches[IRIS_BATCH_RENDER], q,
204 PIPE_CONTROL_WRITE_TIMESTAMP,
205 offset);
206 break;
207 case PIPE_QUERY_PRIMITIVES_GENERATED:
208 batch->screen->vtbl.store_register_mem64(batch,
209 q->index == 0 ?
210 GENX(CL_INVOCATION_COUNT_num) :
211 SO_PRIM_STORAGE_NEEDED(q->index),
212 bo, offset, false);
213 break;
214 case PIPE_QUERY_PRIMITIVES_EMITTED:
215 batch->screen->vtbl.store_register_mem64(batch,
216 SO_NUM_PRIMS_WRITTEN(q->index),
217 bo, offset, false);
218 break;
219 case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE: {
220 static const uint32_t index_to_reg[] = {
221 GENX(IA_VERTICES_COUNT_num),
222 GENX(IA_PRIMITIVES_COUNT_num),
223 GENX(VS_INVOCATION_COUNT_num),
224 GENX(GS_INVOCATION_COUNT_num),
225 GENX(GS_PRIMITIVES_COUNT_num),
226 GENX(CL_INVOCATION_COUNT_num),
227 GENX(CL_PRIMITIVES_COUNT_num),
228 GENX(PS_INVOCATION_COUNT_num),
229 GENX(HS_INVOCATION_COUNT_num),
230 GENX(DS_INVOCATION_COUNT_num),
231 GENX(CS_INVOCATION_COUNT_num),
232 };
233 const uint32_t reg = index_to_reg[q->index];
234
235 batch->screen->vtbl.store_register_mem64(batch, reg, bo, offset, false);
236 break;
237 }
238 default:
239 assert(false);
240 }
241 }
242
243 static void
244 write_overflow_values(struct iris_context *ice, struct iris_query *q, bool end)
245 {
246 struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
247 uint32_t count = q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ? 1 : 4;
248 struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
249 uint32_t offset = q->query_state_ref.offset;
250
251 iris_emit_pipe_control_flush(batch,
252 "query: write SO overflow snapshots",
253 PIPE_CONTROL_CS_STALL |
254 PIPE_CONTROL_STALL_AT_SCOREBOARD);
255 for (uint32_t i = 0; i < count; i++) {
256 int s = q->index + i;
257 int g_idx = offset + offsetof(struct iris_query_so_overflow,
258 stream[s].num_prims[end]);
259 int w_idx = offset + offsetof(struct iris_query_so_overflow,
260 stream[s].prim_storage_needed[end]);
261 batch->screen->vtbl.store_register_mem64(batch, SO_NUM_PRIMS_WRITTEN(s),
262 bo, g_idx, false);
263 batch->screen->vtbl.store_register_mem64(batch, SO_PRIM_STORAGE_NEEDED(s),
264 bo, w_idx, false);
265 }
266 }
267
268 static uint64_t
269 iris_raw_timestamp_delta(uint64_t time0, uint64_t time1)
270 {
271 if (time0 > time1) {
272 return (1ULL << TIMESTAMP_BITS) + time1 - time0;
273 } else {
274 return time1 - time0;
275 }
276 }
277
278 static bool
279 stream_overflowed(struct iris_query_so_overflow *so, int s)
280 {
281 return (so->stream[s].prim_storage_needed[1] -
282 so->stream[s].prim_storage_needed[0]) !=
283 (so->stream[s].num_prims[1] - so->stream[s].num_prims[0]);
284 }
285
286 static void
287 calculate_result_on_cpu(const struct gen_device_info *devinfo,
288 struct iris_query *q)
289 {
290 switch (q->type) {
291 case PIPE_QUERY_OCCLUSION_PREDICATE:
292 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
293 q->result = q->map->end != q->map->start;
294 break;
295 case PIPE_QUERY_TIMESTAMP:
296 case PIPE_QUERY_TIMESTAMP_DISJOINT:
297 /* The timestamp is the single starting snapshot. */
298 q->result = gen_device_info_timebase_scale(devinfo, q->map->start);
299 q->result &= (1ull << TIMESTAMP_BITS) - 1;
300 break;
301 case PIPE_QUERY_TIME_ELAPSED:
302 q->result = iris_raw_timestamp_delta(q->map->start, q->map->end);
303 q->result = gen_device_info_timebase_scale(devinfo, q->result);
304 q->result &= (1ull << TIMESTAMP_BITS) - 1;
305 break;
306 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
307 q->result = stream_overflowed((void *) q->map, q->index);
308 break;
309 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
310 q->result = false;
311 for (int i = 0; i < MAX_VERTEX_STREAMS; i++)
312 q->result |= stream_overflowed((void *) q->map, i);
313 break;
314 case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
315 q->result = q->map->end - q->map->start;
316
317 /* WaDividePSInvocationCountBy4:HSW,BDW */
318 if (GEN_GEN == 8 && q->index == PIPE_STAT_QUERY_PS_INVOCATIONS)
319 q->result /= 4;
320 break;
321 case PIPE_QUERY_OCCLUSION_COUNTER:
322 case PIPE_QUERY_PRIMITIVES_GENERATED:
323 case PIPE_QUERY_PRIMITIVES_EMITTED:
324 default:
325 q->result = q->map->end - q->map->start;
326 break;
327 }
328
329 q->ready = true;
330 }
331
332 /**
333 * Calculate the streamout overflow for stream \p idx:
334 *
335 * (num_prims[1] - num_prims[0]) - (storage_needed[1] - storage_needed[0])
336 */
337 static struct gen_mi_value
338 calc_overflow_for_stream(struct gen_mi_builder *b,
339 struct iris_query *q,
340 int idx)
341 {
342 #define C(counter, i) query_mem64(q, \
343 offsetof(struct iris_query_so_overflow, stream[idx].counter[i]))
344
345 return gen_mi_isub(b, gen_mi_isub(b, C(num_prims, 1), C(num_prims, 0)),
346 gen_mi_isub(b, C(prim_storage_needed, 1),
347 C(prim_storage_needed, 0)));
348 #undef C
349 }
350
351 /**
352 * Calculate whether any stream has overflowed.
353 */
354 static struct gen_mi_value
355 calc_overflow_any_stream(struct gen_mi_builder *b, struct iris_query *q)
356 {
357 struct gen_mi_value stream_result[MAX_VERTEX_STREAMS];
358 for (int i = 0; i < MAX_VERTEX_STREAMS; i++)
359 stream_result[i] = calc_overflow_for_stream(b, q, i);
360
361 struct gen_mi_value result = stream_result[0];
362 for (int i = 1; i < MAX_VERTEX_STREAMS; i++)
363 result = gen_mi_ior(b, result, stream_result[i]);
364
365 return result;
366 }
367
368 static bool
369 query_is_boolean(enum pipe_query_type type)
370 {
371 switch (type) {
372 case PIPE_QUERY_OCCLUSION_PREDICATE:
373 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
374 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
375 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
376 return true;
377 default:
378 return false;
379 }
380 }
381
382 /**
383 * Calculate the result using MI_MATH.
384 */
385 static struct gen_mi_value
386 calculate_result_on_gpu(const struct gen_device_info *devinfo,
387 struct gen_mi_builder *b,
388 struct iris_query *q)
389 {
390 struct gen_mi_value result;
391 struct gen_mi_value start_val =
392 query_mem64(q, offsetof(struct iris_query_snapshots, start));
393 struct gen_mi_value end_val =
394 query_mem64(q, offsetof(struct iris_query_snapshots, end));
395
396 switch (q->type) {
397 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
398 result = calc_overflow_for_stream(b, q, q->index);
399 break;
400 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
401 result = calc_overflow_any_stream(b, q);
402 break;
403 case PIPE_QUERY_TIMESTAMP: {
404 /* TODO: This discards any fractional bits of the timebase scale.
405 * We would need to do a bit of fixed point math on the CS ALU, or
406 * launch an actual shader to calculate this with full precision.
407 */
408 uint32_t scale = 1000000000ull / devinfo->timestamp_frequency;
409 result = gen_mi_iand(b, gen_mi_imm((1ull << 36) - 1),
410 gen_mi_imul_imm(b, start_val, scale));
411 break;
412 }
413 case PIPE_QUERY_TIME_ELAPSED: {
414 /* TODO: This discards fractional bits (see above). */
415 uint32_t scale = 1000000000ull / devinfo->timestamp_frequency;
416 result = gen_mi_imul_imm(b, gen_mi_isub(b, end_val, start_val), scale);
417 break;
418 }
419 default:
420 result = gen_mi_isub(b, end_val, start_val);
421 break;
422 }
423
424 /* WaDividePSInvocationCountBy4:HSW,BDW */
425 if (GEN_GEN == 8 &&
426 q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE &&
427 q->index == PIPE_STAT_QUERY_PS_INVOCATIONS)
428 result = gen_mi_ushr32_imm(b, result, 2);
429
430 if (query_is_boolean(q->type))
431 result = gen_mi_iand(b, gen_mi_nz(b, result), gen_mi_imm(1));
432
433 return result;
434 }
435
436 static struct pipe_query *
437 iris_create_query(struct pipe_context *ctx,
438 unsigned query_type,
439 unsigned index)
440 {
441 struct iris_query *q = calloc(1, sizeof(struct iris_query));
442
443 q->type = query_type;
444 q->index = index;
445 q->monitor = NULL;
446
447 if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE &&
448 q->index == PIPE_STAT_QUERY_CS_INVOCATIONS)
449 q->batch_idx = IRIS_BATCH_COMPUTE;
450 else
451 q->batch_idx = IRIS_BATCH_RENDER;
452 return (struct pipe_query *) q;
453 }
454
455 static struct pipe_query *
456 iris_create_batch_query(struct pipe_context *ctx,
457 unsigned num_queries,
458 unsigned *query_types)
459 {
460 struct iris_context *ice = (void *) ctx;
461 struct iris_query *q = calloc(1, sizeof(struct iris_query));
462 if (unlikely(!q))
463 return NULL;
464 q->type = PIPE_QUERY_DRIVER_SPECIFIC;
465 q->index = -1;
466 q->monitor = iris_create_monitor_object(ice, num_queries, query_types);
467 if (unlikely(!q->monitor)) {
468 free(q);
469 return NULL;
470 }
471
472 return (struct pipe_query *) q;
473 }
474
475 static void
476 iris_destroy_query(struct pipe_context *ctx, struct pipe_query *p_query)
477 {
478 struct iris_query *query = (void *) p_query;
479 struct iris_screen *screen = (void *) ctx->screen;
480 if (query->monitor) {
481 iris_destroy_monitor_object(ctx, query->monitor);
482 query->monitor = NULL;
483 } else {
484 iris_syncobj_reference(screen, &query->syncobj, NULL);
485 screen->base.fence_reference(ctx->screen, &query->fence, NULL);
486 }
487 free(query);
488 }
489
490
491 static bool
492 iris_begin_query(struct pipe_context *ctx, struct pipe_query *query)
493 {
494 struct iris_context *ice = (void *) ctx;
495 struct iris_query *q = (void *) query;
496
497 if (q->monitor)
498 return iris_begin_monitor(ctx, q->monitor);
499
500 void *ptr = NULL;
501 uint32_t size;
502
503 if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
504 q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
505 size = sizeof(struct iris_query_so_overflow);
506 else
507 size = sizeof(struct iris_query_snapshots);
508
509 u_upload_alloc(ice->query_buffer_uploader, 0,
510 size, size, &q->query_state_ref.offset,
511 &q->query_state_ref.res, &ptr);
512
513 if (!iris_resource_bo(q->query_state_ref.res))
514 return false;
515
516 q->map = ptr;
517 if (!q->map)
518 return false;
519
520 q->result = 0ull;
521 q->ready = false;
522 WRITE_ONCE(q->map->snapshots_landed, false);
523
524 if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {
525 ice->state.prims_generated_query_active = true;
526 ice->state.dirty |= IRIS_DIRTY_STREAMOUT | IRIS_DIRTY_CLIP;
527 }
528
529 if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
530 q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
531 write_overflow_values(ice, q, false);
532 else
533 write_value(ice, q,
534 q->query_state_ref.offset +
535 offsetof(struct iris_query_snapshots, start));
536
537 return true;
538 }
539
540 static bool
541 iris_end_query(struct pipe_context *ctx, struct pipe_query *query)
542 {
543 struct iris_context *ice = (void *) ctx;
544 struct iris_query *q = (void *) query;
545
546 if (q->monitor)
547 return iris_end_monitor(ctx, q->monitor);
548
549 if (q->type == PIPE_QUERY_GPU_FINISHED) {
550 ctx->flush(ctx, &q->fence, PIPE_FLUSH_DEFERRED);
551 return true;
552 }
553
554 struct iris_batch *batch = &ice->batches[q->batch_idx];
555
556 if (q->type == PIPE_QUERY_TIMESTAMP) {
557 iris_begin_query(ctx, query);
558 iris_batch_reference_signal_syncobj(batch, &q->syncobj);
559 mark_available(ice, q);
560 return true;
561 }
562
563 if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {
564 ice->state.prims_generated_query_active = false;
565 ice->state.dirty |= IRIS_DIRTY_STREAMOUT | IRIS_DIRTY_CLIP;
566 }
567
568 if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
569 q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
570 write_overflow_values(ice, q, true);
571 else
572 write_value(ice, q,
573 q->query_state_ref.offset +
574 offsetof(struct iris_query_snapshots, end));
575
576 iris_batch_reference_signal_syncobj(batch, &q->syncobj);
577 mark_available(ice, q);
578
579 return true;
580 }
581
582 /**
583 * See if the snapshots have landed for a query, and if so, compute the
584 * result and mark it ready. Does not flush (unlike iris_get_query_result).
585 */
586 static void
587 iris_check_query_no_flush(struct iris_context *ice, struct iris_query *q)
588 {
589 struct iris_screen *screen = (void *) ice->ctx.screen;
590 const struct gen_device_info *devinfo = &screen->devinfo;
591
592 if (!q->ready && READ_ONCE(q->map->snapshots_landed)) {
593 calculate_result_on_cpu(devinfo, q);
594 }
595 }
596
597 static bool
598 iris_get_query_result(struct pipe_context *ctx,
599 struct pipe_query *query,
600 bool wait,
601 union pipe_query_result *result)
602 {
603 struct iris_context *ice = (void *) ctx;
604 struct iris_query *q = (void *) query;
605
606 if (q->monitor)
607 return iris_get_monitor_result(ctx, q->monitor, wait, result->batch);
608
609 struct iris_screen *screen = (void *) ctx->screen;
610 const struct gen_device_info *devinfo = &screen->devinfo;
611
612 if (unlikely(screen->no_hw)) {
613 result->u64 = 0;
614 return true;
615 }
616
617 if (q->type == PIPE_QUERY_GPU_FINISHED) {
618 struct pipe_screen *screen = ctx->screen;
619
620 result->b = screen->fence_finish(screen, ctx, q->fence,
621 wait ? PIPE_TIMEOUT_INFINITE : 0);
622 return result->b;
623 }
624
625 if (!q->ready) {
626 struct iris_batch *batch = &ice->batches[q->batch_idx];
627 if (q->syncobj == iris_batch_get_signal_syncobj(batch))
628 iris_batch_flush(batch);
629
630 while (!READ_ONCE(q->map->snapshots_landed)) {
631 if (wait)
632 iris_wait_syncobj(ctx->screen, q->syncobj, INT64_MAX);
633 else
634 return false;
635 }
636
637 assert(READ_ONCE(q->map->snapshots_landed));
638 calculate_result_on_cpu(devinfo, q);
639 }
640
641 assert(q->ready);
642
643 result->u64 = q->result;
644
645 return true;
646 }
647
648 static void
649 iris_get_query_result_resource(struct pipe_context *ctx,
650 struct pipe_query *query,
651 bool wait,
652 enum pipe_query_value_type result_type,
653 int index,
654 struct pipe_resource *p_res,
655 unsigned offset)
656 {
657 struct iris_context *ice = (void *) ctx;
658 struct iris_query *q = (void *) query;
659 struct iris_batch *batch = &ice->batches[q->batch_idx];
660 const struct gen_device_info *devinfo = &batch->screen->devinfo;
661 struct iris_resource *res = (void *) p_res;
662 struct iris_bo *query_bo = iris_resource_bo(q->query_state_ref.res);
663 struct iris_bo *dst_bo = iris_resource_bo(p_res);
664 unsigned snapshots_landed_offset =
665 offsetof(struct iris_query_snapshots, snapshots_landed);
666
667 res->bind_history |= PIPE_BIND_QUERY_BUFFER;
668
669 if (index == -1) {
670 /* They're asking for the availability of the result. If we still
671 * have commands queued up which produce the result, submit them
672 * now so that progress happens. Either way, copy the snapshots
673 * landed field to the destination resource.
674 */
675 if (q->syncobj == iris_batch_get_signal_syncobj(batch))
676 iris_batch_flush(batch);
677
678 batch->screen->vtbl.copy_mem_mem(batch, dst_bo, offset,
679 query_bo, snapshots_landed_offset,
680 result_type <= PIPE_QUERY_TYPE_U32 ? 4 : 8);
681 return;
682 }
683
684 if (!q->ready && READ_ONCE(q->map->snapshots_landed)) {
685 /* The final snapshots happen to have landed, so let's just compute
686 * the result on the CPU now...
687 */
688 calculate_result_on_cpu(devinfo, q);
689 }
690
691 if (q->ready) {
692 /* We happen to have the result on the CPU, so just copy it. */
693 if (result_type <= PIPE_QUERY_TYPE_U32) {
694 batch->screen->vtbl.store_data_imm32(batch, dst_bo, offset, q->result);
695 } else {
696 batch->screen->vtbl.store_data_imm64(batch, dst_bo, offset, q->result);
697 }
698
699 /* Make sure the result lands before they use bind the QBO elsewhere
700 * and use the result.
701 */
702 // XXX: Why? i965 doesn't do this.
703 iris_emit_pipe_control_flush(batch,
704 "query: unknown QBO flushing hack",
705 PIPE_CONTROL_CS_STALL);
706 return;
707 }
708
709 bool predicated = !wait && !q->stalled;
710
711 struct gen_mi_builder b;
712 gen_mi_builder_init(&b, batch);
713
714 iris_batch_sync_region_start(batch);
715
716 struct gen_mi_value result = calculate_result_on_gpu(devinfo, &b, q);
717 struct gen_mi_value dst =
718 result_type <= PIPE_QUERY_TYPE_U32 ?
719 gen_mi_mem32(rw_bo(dst_bo, offset, IRIS_DOMAIN_OTHER_WRITE)) :
720 gen_mi_mem64(rw_bo(dst_bo, offset, IRIS_DOMAIN_OTHER_WRITE));
721
722 if (predicated) {
723 gen_mi_store(&b, gen_mi_reg32(MI_PREDICATE_RESULT),
724 gen_mi_mem64(ro_bo(query_bo, snapshots_landed_offset)));
725 gen_mi_store_if(&b, dst, result);
726 } else {
727 gen_mi_store(&b, dst, result);
728 }
729
730 iris_batch_sync_region_end(batch);
731 }
732
733 static void
734 iris_set_active_query_state(struct pipe_context *ctx, bool enable)
735 {
736 struct iris_context *ice = (void *) ctx;
737
738 if (ice->state.statistics_counters_enabled == enable)
739 return;
740
741 // XXX: most packets aren't paying attention to this yet, because it'd
742 // have to be done dynamically at draw time, which is a pain
743 ice->state.statistics_counters_enabled = enable;
744 ice->state.dirty |= IRIS_DIRTY_CLIP |
745 IRIS_DIRTY_RASTER |
746 IRIS_DIRTY_STREAMOUT |
747 IRIS_DIRTY_WM;
748 ice->state.stage_dirty |= IRIS_STAGE_DIRTY_GS |
749 IRIS_STAGE_DIRTY_TCS |
750 IRIS_STAGE_DIRTY_TES |
751 IRIS_STAGE_DIRTY_VS;
752 }
753
754 static void
755 set_predicate_enable(struct iris_context *ice, bool value)
756 {
757 if (value)
758 ice->state.predicate = IRIS_PREDICATE_STATE_RENDER;
759 else
760 ice->state.predicate = IRIS_PREDICATE_STATE_DONT_RENDER;
761 }
762
763 static void
764 set_predicate_for_result(struct iris_context *ice,
765 struct iris_query *q,
766 bool inverted)
767 {
768 struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
769 struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
770
771 iris_batch_sync_region_start(batch);
772
773 /* The CPU doesn't have the query result yet; use hardware predication */
774 ice->state.predicate = IRIS_PREDICATE_STATE_USE_BIT;
775
776 /* Ensure the memory is coherent for MI_LOAD_REGISTER_* commands. */
777 iris_emit_pipe_control_flush(batch,
778 "conditional rendering: set predicate",
779 PIPE_CONTROL_FLUSH_ENABLE);
780 q->stalled = true;
781
782 struct gen_mi_builder b;
783 gen_mi_builder_init(&b, batch);
784
785 struct gen_mi_value result;
786
787 switch (q->type) {
788 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
789 result = calc_overflow_for_stream(&b, q, q->index);
790 break;
791 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
792 result = calc_overflow_any_stream(&b, q);
793 break;
794 default: {
795 /* PIPE_QUERY_OCCLUSION_* */
796 struct gen_mi_value start =
797 query_mem64(q, offsetof(struct iris_query_snapshots, start));
798 struct gen_mi_value end =
799 query_mem64(q, offsetof(struct iris_query_snapshots, end));
800 result = gen_mi_isub(&b, end, start);
801 break;
802 }
803 }
804
805 result = inverted ? gen_mi_z(&b, result) : gen_mi_nz(&b, result);
806 result = gen_mi_iand(&b, result, gen_mi_imm(1));
807
808 /* We immediately set the predicate on the render batch, as all the
809 * counters come from 3D operations. However, we may need to predicate
810 * a compute dispatch, which executes in a different GEM context and has
811 * a different MI_PREDICATE_RESULT register. So, we save the result to
812 * memory and reload it in iris_launch_grid.
813 */
814 gen_mi_value_ref(&b, result);
815 gen_mi_store(&b, gen_mi_reg32(MI_PREDICATE_RESULT), result);
816 gen_mi_store(&b, query_mem64(q, offsetof(struct iris_query_snapshots,
817 predicate_result)), result);
818 ice->state.compute_predicate = bo;
819
820 iris_batch_sync_region_end(batch);
821 }
822
823 static void
824 iris_render_condition(struct pipe_context *ctx,
825 struct pipe_query *query,
826 bool condition,
827 enum pipe_render_cond_flag mode)
828 {
829 struct iris_context *ice = (void *) ctx;
830 struct iris_query *q = (void *) query;
831
832 /* The old condition isn't relevant; we'll update it if necessary */
833 ice->state.compute_predicate = NULL;
834 ice->condition.query = q;
835 ice->condition.condition = condition;
836
837 if (!q) {
838 ice->state.predicate = IRIS_PREDICATE_STATE_RENDER;
839 return;
840 }
841
842 iris_check_query_no_flush(ice, q);
843
844 if (q->result || q->ready) {
845 set_predicate_enable(ice, (q->result != 0) ^ condition);
846 } else {
847 if (mode == PIPE_RENDER_COND_NO_WAIT ||
848 mode == PIPE_RENDER_COND_BY_REGION_NO_WAIT) {
849 perf_debug(&ice->dbg, "Conditional rendering demoted from "
850 "\"no wait\" to \"wait\".");
851 }
852 set_predicate_for_result(ice, q, condition);
853 }
854 }
855
856 static void
857 iris_resolve_conditional_render(struct iris_context *ice)
858 {
859 struct pipe_context *ctx = (void *) ice;
860 struct iris_query *q = ice->condition.query;
861 struct pipe_query *query = (void *) q;
862 union pipe_query_result result;
863
864 if (ice->state.predicate != IRIS_PREDICATE_STATE_USE_BIT)
865 return;
866
867 assert(q);
868
869 iris_get_query_result(ctx, query, true, &result);
870 set_predicate_enable(ice, (q->result != 0) ^ ice->condition.condition);
871 }
872
873 void
874 genX(init_query)(struct iris_context *ice)
875 {
876 struct pipe_context *ctx = &ice->ctx;
877 struct iris_screen *screen = (struct iris_screen *)ctx->screen;
878
879 ctx->create_query = iris_create_query;
880 ctx->create_batch_query = iris_create_batch_query;
881 ctx->destroy_query = iris_destroy_query;
882 ctx->begin_query = iris_begin_query;
883 ctx->end_query = iris_end_query;
884 ctx->get_query_result = iris_get_query_result;
885 ctx->get_query_result_resource = iris_get_query_result_resource;
886 ctx->set_active_query_state = iris_set_active_query_state;
887 ctx->render_condition = iris_render_condition;
888
889 screen->vtbl.resolve_conditional_render = iris_resolve_conditional_render;
890 }