iris/perf: implement iris_create_monitor_object
[mesa.git] / src / gallium / drivers / iris / iris_query.c
1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23 /**
24 * @file iris_query.c
25 *
26 * ============================= GENXML CODE =============================
27 * [This file is compiled once per generation.]
28 * =======================================================================
29 *
30 * Query object support. This allows measuring various simple statistics
31 * via counters on the GPU. We use GenX code for MI_MATH calculations.
32 */
33
34 #include <stdio.h>
35 #include <errno.h>
36 #include "perf/gen_perf.h"
37 #include "pipe/p_defines.h"
38 #include "pipe/p_state.h"
39 #include "pipe/p_context.h"
40 #include "pipe/p_screen.h"
41 #include "util/u_inlines.h"
42 #include "util/u_upload_mgr.h"
43 #include "iris_context.h"
44 #include "iris_defines.h"
45 #include "iris_fence.h"
46 #include "iris_resource.h"
47 #include "iris_screen.h"
48
49 #include "iris_genx_macros.h"
50
51 #define SO_PRIM_STORAGE_NEEDED(n) (GENX(SO_PRIM_STORAGE_NEEDED0_num) + (n) * 8)
52 #define SO_NUM_PRIMS_WRITTEN(n) (GENX(SO_NUM_PRIMS_WRITTEN0_num) + (n) * 8)
53
54 struct iris_query {
55 enum pipe_query_type type;
56 int index;
57
58 bool ready;
59
60 bool stalled;
61
62 uint64_t result;
63
64 struct iris_state_ref query_state_ref;
65 struct iris_query_snapshots *map;
66 struct iris_syncpt *syncpt;
67
68 int batch_idx;
69
70 struct iris_monitor_object *monitor;
71 };
72
73 struct iris_query_snapshots {
74 /** iris_render_condition's saved MI_PREDICATE_RESULT value. */
75 uint64_t predicate_result;
76
77 /** Have the start/end snapshots landed? */
78 uint64_t snapshots_landed;
79
80 /** Starting and ending counter snapshots */
81 uint64_t start;
82 uint64_t end;
83 };
84
85 struct iris_query_so_overflow {
86 uint64_t predicate_result;
87 uint64_t snapshots_landed;
88
89 struct {
90 uint64_t prim_storage_needed[2];
91 uint64_t num_prims[2];
92 } stream[4];
93 };
94
95 static struct gen_mi_value
96 query_mem64(struct iris_query *q, uint32_t offset)
97 {
98 struct iris_address addr = {
99 .bo = iris_resource_bo(q->query_state_ref.res),
100 .offset = q->query_state_ref.offset + offset,
101 .write = true
102 };
103 return gen_mi_mem64(addr);
104 }
105
106 /**
107 * Is this type of query written by PIPE_CONTROL?
108 */
109 static bool
110 iris_is_query_pipelined(struct iris_query *q)
111 {
112 switch (q->type) {
113 case PIPE_QUERY_OCCLUSION_COUNTER:
114 case PIPE_QUERY_OCCLUSION_PREDICATE:
115 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
116 case PIPE_QUERY_TIMESTAMP:
117 case PIPE_QUERY_TIMESTAMP_DISJOINT:
118 case PIPE_QUERY_TIME_ELAPSED:
119 return true;
120
121 default:
122 return false;
123 }
124 }
125
126 static void
127 mark_available(struct iris_context *ice, struct iris_query *q)
128 {
129 struct iris_batch *batch = &ice->batches[q->batch_idx];
130 unsigned flags = PIPE_CONTROL_WRITE_IMMEDIATE;
131 unsigned offset = offsetof(struct iris_query_snapshots, snapshots_landed);
132 struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
133 offset += q->query_state_ref.offset;
134
135 if (!iris_is_query_pipelined(q)) {
136 ice->vtbl.store_data_imm64(batch, bo, offset, true);
137 } else {
138 /* Order available *after* the query results. */
139 flags |= PIPE_CONTROL_FLUSH_ENABLE;
140 iris_emit_pipe_control_write(batch, "query: mark available",
141 flags, bo, offset, true);
142 }
143 }
144
145 /**
146 * Write PS_DEPTH_COUNT to q->(dest) via a PIPE_CONTROL.
147 */
148 static void
149 iris_pipelined_write(struct iris_batch *batch,
150 struct iris_query *q,
151 enum pipe_control_flags flags,
152 unsigned offset)
153 {
154 const struct gen_device_info *devinfo = &batch->screen->devinfo;
155 const unsigned optional_cs_stall =
156 GEN_GEN == 9 && devinfo->gt == 4 ? PIPE_CONTROL_CS_STALL : 0;
157 struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
158
159 iris_emit_pipe_control_write(batch, "query: pipelined snapshot write",
160 flags | optional_cs_stall,
161 bo, offset, 0ull);
162 }
163
164 static void
165 write_value(struct iris_context *ice, struct iris_query *q, unsigned offset)
166 {
167 struct iris_batch *batch = &ice->batches[q->batch_idx];
168 struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
169
170 if (!iris_is_query_pipelined(q)) {
171 iris_emit_pipe_control_flush(batch,
172 "query: non-pipelined snapshot write",
173 PIPE_CONTROL_CS_STALL |
174 PIPE_CONTROL_STALL_AT_SCOREBOARD);
175 q->stalled = true;
176 }
177
178 switch (q->type) {
179 case PIPE_QUERY_OCCLUSION_COUNTER:
180 case PIPE_QUERY_OCCLUSION_PREDICATE:
181 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
182 if (GEN_GEN >= 10) {
183 /* "Driver must program PIPE_CONTROL with only Depth Stall Enable
184 * bit set prior to programming a PIPE_CONTROL with Write PS Depth
185 * Count sync operation."
186 */
187 iris_emit_pipe_control_flush(batch,
188 "workaround: depth stall before writing "
189 "PS_DEPTH_COUNT",
190 PIPE_CONTROL_DEPTH_STALL);
191 }
192 iris_pipelined_write(&ice->batches[IRIS_BATCH_RENDER], q,
193 PIPE_CONTROL_WRITE_DEPTH_COUNT |
194 PIPE_CONTROL_DEPTH_STALL,
195 offset);
196 break;
197 case PIPE_QUERY_TIME_ELAPSED:
198 case PIPE_QUERY_TIMESTAMP:
199 case PIPE_QUERY_TIMESTAMP_DISJOINT:
200 iris_pipelined_write(&ice->batches[IRIS_BATCH_RENDER], q,
201 PIPE_CONTROL_WRITE_TIMESTAMP,
202 offset);
203 break;
204 case PIPE_QUERY_PRIMITIVES_GENERATED:
205 ice->vtbl.store_register_mem64(batch,
206 q->index == 0 ?
207 GENX(CL_INVOCATION_COUNT_num) :
208 SO_PRIM_STORAGE_NEEDED(q->index),
209 bo, offset, false);
210 break;
211 case PIPE_QUERY_PRIMITIVES_EMITTED:
212 ice->vtbl.store_register_mem64(batch,
213 SO_NUM_PRIMS_WRITTEN(q->index),
214 bo, offset, false);
215 break;
216 case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE: {
217 static const uint32_t index_to_reg[] = {
218 GENX(IA_VERTICES_COUNT_num),
219 GENX(IA_PRIMITIVES_COUNT_num),
220 GENX(VS_INVOCATION_COUNT_num),
221 GENX(GS_INVOCATION_COUNT_num),
222 GENX(GS_PRIMITIVES_COUNT_num),
223 GENX(CL_INVOCATION_COUNT_num),
224 GENX(CL_PRIMITIVES_COUNT_num),
225 GENX(PS_INVOCATION_COUNT_num),
226 GENX(HS_INVOCATION_COUNT_num),
227 GENX(DS_INVOCATION_COUNT_num),
228 GENX(CS_INVOCATION_COUNT_num),
229 };
230 const uint32_t reg = index_to_reg[q->index];
231
232 ice->vtbl.store_register_mem64(batch, reg, bo, offset, false);
233 break;
234 }
235 default:
236 assert(false);
237 }
238 }
239
240 static void
241 write_overflow_values(struct iris_context *ice, struct iris_query *q, bool end)
242 {
243 struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
244 uint32_t count = q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ? 1 : 4;
245 struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
246 uint32_t offset = q->query_state_ref.offset;
247
248 iris_emit_pipe_control_flush(batch,
249 "query: write SO overflow snapshots",
250 PIPE_CONTROL_CS_STALL |
251 PIPE_CONTROL_STALL_AT_SCOREBOARD);
252 for (uint32_t i = 0; i < count; i++) {
253 int s = q->index + i;
254 int g_idx = offset + offsetof(struct iris_query_so_overflow,
255 stream[s].num_prims[end]);
256 int w_idx = offset + offsetof(struct iris_query_so_overflow,
257 stream[s].prim_storage_needed[end]);
258 ice->vtbl.store_register_mem64(batch, SO_NUM_PRIMS_WRITTEN(s),
259 bo, g_idx, false);
260 ice->vtbl.store_register_mem64(batch, SO_PRIM_STORAGE_NEEDED(s),
261 bo, w_idx, false);
262 }
263 }
264
265 static uint64_t
266 iris_raw_timestamp_delta(uint64_t time0, uint64_t time1)
267 {
268 if (time0 > time1) {
269 return (1ULL << TIMESTAMP_BITS) + time1 - time0;
270 } else {
271 return time1 - time0;
272 }
273 }
274
275 static bool
276 stream_overflowed(struct iris_query_so_overflow *so, int s)
277 {
278 return (so->stream[s].prim_storage_needed[1] -
279 so->stream[s].prim_storage_needed[0]) !=
280 (so->stream[s].num_prims[1] - so->stream[s].num_prims[0]);
281 }
282
283 static void
284 calculate_result_on_cpu(const struct gen_device_info *devinfo,
285 struct iris_query *q)
286 {
287 switch (q->type) {
288 case PIPE_QUERY_OCCLUSION_PREDICATE:
289 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
290 q->result = q->map->end != q->map->start;
291 break;
292 case PIPE_QUERY_TIMESTAMP:
293 case PIPE_QUERY_TIMESTAMP_DISJOINT:
294 /* The timestamp is the single starting snapshot. */
295 q->result = gen_device_info_timebase_scale(devinfo, q->map->start);
296 q->result &= (1ull << TIMESTAMP_BITS) - 1;
297 break;
298 case PIPE_QUERY_TIME_ELAPSED:
299 q->result = iris_raw_timestamp_delta(q->map->start, q->map->end);
300 q->result = gen_device_info_timebase_scale(devinfo, q->result);
301 q->result &= (1ull << TIMESTAMP_BITS) - 1;
302 break;
303 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
304 q->result = stream_overflowed((void *) q->map, q->index);
305 break;
306 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
307 q->result = false;
308 for (int i = 0; i < MAX_VERTEX_STREAMS; i++)
309 q->result |= stream_overflowed((void *) q->map, i);
310 break;
311 case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
312 q->result = q->map->end - q->map->start;
313
314 /* WaDividePSInvocationCountBy4:HSW,BDW */
315 if (GEN_GEN == 8 && q->index == PIPE_STAT_QUERY_PS_INVOCATIONS)
316 q->result /= 4;
317 break;
318 case PIPE_QUERY_OCCLUSION_COUNTER:
319 case PIPE_QUERY_PRIMITIVES_GENERATED:
320 case PIPE_QUERY_PRIMITIVES_EMITTED:
321 default:
322 q->result = q->map->end - q->map->start;
323 break;
324 }
325
326 q->ready = true;
327 }
328
329 /**
330 * Calculate the streamout overflow for stream \p idx:
331 *
332 * (num_prims[1] - num_prims[0]) - (storage_needed[1] - storage_needed[0])
333 */
334 static struct gen_mi_value
335 calc_overflow_for_stream(struct gen_mi_builder *b,
336 struct iris_query *q,
337 int idx)
338 {
339 #define C(counter, i) query_mem64(q, \
340 offsetof(struct iris_query_so_overflow, stream[idx].counter[i]))
341
342 return gen_mi_isub(b, gen_mi_isub(b, C(num_prims, 1), C(num_prims, 0)),
343 gen_mi_isub(b, C(prim_storage_needed, 1),
344 C(prim_storage_needed, 0)));
345 #undef C
346 }
347
348 /**
349 * Calculate whether any stream has overflowed.
350 */
351 static struct gen_mi_value
352 calc_overflow_any_stream(struct gen_mi_builder *b, struct iris_query *q)
353 {
354 struct gen_mi_value stream_result[MAX_VERTEX_STREAMS];
355 for (int i = 0; i < MAX_VERTEX_STREAMS; i++)
356 stream_result[i] = calc_overflow_for_stream(b, q, i);
357
358 struct gen_mi_value result = stream_result[0];
359 for (int i = 1; i < MAX_VERTEX_STREAMS; i++)
360 result = gen_mi_ior(b, result, stream_result[i]);
361
362 return result;
363 }
364
365 static bool
366 query_is_boolean(enum pipe_query_type type)
367 {
368 switch (type) {
369 case PIPE_QUERY_OCCLUSION_PREDICATE:
370 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
371 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
372 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
373 return true;
374 default:
375 return false;
376 }
377 }
378
379 /**
380 * Calculate the result using MI_MATH.
381 */
382 static struct gen_mi_value
383 calculate_result_on_gpu(const struct gen_device_info *devinfo,
384 struct gen_mi_builder *b,
385 struct iris_query *q)
386 {
387 struct gen_mi_value result;
388 struct gen_mi_value start_val =
389 query_mem64(q, offsetof(struct iris_query_snapshots, start));
390 struct gen_mi_value end_val =
391 query_mem64(q, offsetof(struct iris_query_snapshots, end));
392
393 switch (q->type) {
394 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
395 result = calc_overflow_for_stream(b, q, q->index);
396 break;
397 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
398 result = calc_overflow_any_stream(b, q);
399 break;
400 case PIPE_QUERY_TIMESTAMP: {
401 /* TODO: This discards any fractional bits of the timebase scale.
402 * We would need to do a bit of fixed point math on the CS ALU, or
403 * launch an actual shader to calculate this with full precision.
404 */
405 uint32_t scale = 1000000000ull / devinfo->timestamp_frequency;
406 result = gen_mi_iand(b, gen_mi_imm((1ull << 36) - 1),
407 gen_mi_imul_imm(b, start_val, scale));
408 break;
409 }
410 case PIPE_QUERY_TIME_ELAPSED: {
411 /* TODO: This discards fractional bits (see above). */
412 uint32_t scale = 1000000000ull / devinfo->timestamp_frequency;
413 result = gen_mi_imul_imm(b, gen_mi_isub(b, end_val, start_val), scale);
414 break;
415 }
416 default:
417 result = gen_mi_isub(b, end_val, start_val);
418 break;
419 }
420
421 /* WaDividePSInvocationCountBy4:HSW,BDW */
422 if (GEN_GEN == 8 &&
423 q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE &&
424 q->index == PIPE_STAT_QUERY_PS_INVOCATIONS)
425 result = gen_mi_ushr32_imm(b, result, 2);
426
427 if (query_is_boolean(q->type))
428 result = gen_mi_iand(b, gen_mi_nz(b, result), gen_mi_imm(1));
429
430 return result;
431 }
432
433 static struct pipe_query *
434 iris_create_query(struct pipe_context *ctx,
435 unsigned query_type,
436 unsigned index)
437 {
438 struct iris_query *q = calloc(1, sizeof(struct iris_query));
439
440 q->type = query_type;
441 q->index = index;
442
443 if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE &&
444 q->index == PIPE_STAT_QUERY_CS_INVOCATIONS)
445 q->batch_idx = IRIS_BATCH_COMPUTE;
446 else
447 q->batch_idx = IRIS_BATCH_RENDER;
448 return (struct pipe_query *) q;
449 }
450
451 static void
452 iris_destroy_query(struct pipe_context *ctx, struct pipe_query *p_query)
453 {
454 struct iris_query *query = (void *) p_query;
455 struct iris_screen *screen = (void *) ctx->screen;
456 iris_syncpt_reference(screen, &query->syncpt, NULL);
457 free(query);
458 }
459
460
461 static bool
462 iris_begin_query(struct pipe_context *ctx, struct pipe_query *query)
463 {
464 struct iris_context *ice = (void *) ctx;
465 struct iris_query *q = (void *) query;
466 void *ptr = NULL;
467 uint32_t size;
468
469 if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
470 q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
471 size = sizeof(struct iris_query_so_overflow);
472 else
473 size = sizeof(struct iris_query_snapshots);
474
475 u_upload_alloc(ice->query_buffer_uploader, 0,
476 size, size, &q->query_state_ref.offset,
477 &q->query_state_ref.res, &ptr);
478
479 if (!iris_resource_bo(q->query_state_ref.res))
480 return false;
481
482 q->map = ptr;
483 if (!q->map)
484 return false;
485
486 q->result = 0ull;
487 q->ready = false;
488 WRITE_ONCE(q->map->snapshots_landed, false);
489
490 if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {
491 ice->state.prims_generated_query_active = true;
492 ice->state.dirty |= IRIS_DIRTY_STREAMOUT | IRIS_DIRTY_CLIP;
493 }
494
495 if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
496 q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
497 write_overflow_values(ice, q, false);
498 else
499 write_value(ice, q,
500 q->query_state_ref.offset +
501 offsetof(struct iris_query_snapshots, start));
502
503 return true;
504 }
505
506 static bool
507 iris_end_query(struct pipe_context *ctx, struct pipe_query *query)
508 {
509 struct iris_context *ice = (void *) ctx;
510 struct iris_query *q = (void *) query;
511 struct iris_batch *batch = &ice->batches[q->batch_idx];
512
513 if (q->type == PIPE_QUERY_TIMESTAMP) {
514 iris_begin_query(ctx, query);
515 iris_batch_reference_signal_syncpt(batch, &q->syncpt);
516 mark_available(ice, q);
517 return true;
518 }
519
520 if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {
521 ice->state.prims_generated_query_active = false;
522 ice->state.dirty |= IRIS_DIRTY_STREAMOUT | IRIS_DIRTY_CLIP;
523 }
524
525 if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
526 q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
527 write_overflow_values(ice, q, true);
528 else
529 write_value(ice, q,
530 q->query_state_ref.offset +
531 offsetof(struct iris_query_snapshots, end));
532
533 iris_batch_reference_signal_syncpt(batch, &q->syncpt);
534 mark_available(ice, q);
535
536 return true;
537 }
538
539 /**
540 * See if the snapshots have landed for a query, and if so, compute the
541 * result and mark it ready. Does not flush (unlike iris_get_query_result).
542 */
543 static void
544 iris_check_query_no_flush(struct iris_context *ice, struct iris_query *q)
545 {
546 struct iris_screen *screen = (void *) ice->ctx.screen;
547 const struct gen_device_info *devinfo = &screen->devinfo;
548
549 if (!q->ready && READ_ONCE(q->map->snapshots_landed)) {
550 calculate_result_on_cpu(devinfo, q);
551 }
552 }
553
554 static bool
555 iris_get_query_result(struct pipe_context *ctx,
556 struct pipe_query *query,
557 bool wait,
558 union pipe_query_result *result)
559 {
560 struct iris_context *ice = (void *) ctx;
561 struct iris_query *q = (void *) query;
562 struct iris_screen *screen = (void *) ctx->screen;
563 const struct gen_device_info *devinfo = &screen->devinfo;
564
565 if (unlikely(screen->no_hw)) {
566 result->u64 = 0;
567 return true;
568 }
569
570 if (!q->ready) {
571 struct iris_batch *batch = &ice->batches[q->batch_idx];
572 if (q->syncpt == iris_batch_get_signal_syncpt(batch))
573 iris_batch_flush(batch);
574
575 while (!READ_ONCE(q->map->snapshots_landed)) {
576 if (wait)
577 iris_wait_syncpt(ctx->screen, q->syncpt, INT64_MAX);
578 else
579 return false;
580 }
581
582 assert(READ_ONCE(q->map->snapshots_landed));
583 calculate_result_on_cpu(devinfo, q);
584 }
585
586 assert(q->ready);
587
588 result->u64 = q->result;
589
590 return true;
591 }
592
593 static void
594 iris_get_query_result_resource(struct pipe_context *ctx,
595 struct pipe_query *query,
596 bool wait,
597 enum pipe_query_value_type result_type,
598 int index,
599 struct pipe_resource *p_res,
600 unsigned offset)
601 {
602 struct iris_context *ice = (void *) ctx;
603 struct iris_query *q = (void *) query;
604 struct iris_batch *batch = &ice->batches[q->batch_idx];
605 const struct gen_device_info *devinfo = &batch->screen->devinfo;
606 struct iris_resource *res = (void *) p_res;
607 struct iris_bo *query_bo = iris_resource_bo(q->query_state_ref.res);
608 struct iris_bo *dst_bo = iris_resource_bo(p_res);
609 unsigned snapshots_landed_offset =
610 offsetof(struct iris_query_snapshots, snapshots_landed);
611
612 res->bind_history |= PIPE_BIND_QUERY_BUFFER;
613
614 if (index == -1) {
615 /* They're asking for the availability of the result. If we still
616 * have commands queued up which produce the result, submit them
617 * now so that progress happens. Either way, copy the snapshots
618 * landed field to the destination resource.
619 */
620 if (q->syncpt == iris_batch_get_signal_syncpt(batch))
621 iris_batch_flush(batch);
622
623 ice->vtbl.copy_mem_mem(batch, dst_bo, offset,
624 query_bo, snapshots_landed_offset,
625 result_type <= PIPE_QUERY_TYPE_U32 ? 4 : 8);
626 return;
627 }
628
629 if (!q->ready && READ_ONCE(q->map->snapshots_landed)) {
630 /* The final snapshots happen to have landed, so let's just compute
631 * the result on the CPU now...
632 */
633 calculate_result_on_cpu(devinfo, q);
634 }
635
636 if (q->ready) {
637 /* We happen to have the result on the CPU, so just copy it. */
638 if (result_type <= PIPE_QUERY_TYPE_U32) {
639 ice->vtbl.store_data_imm32(batch, dst_bo, offset, q->result);
640 } else {
641 ice->vtbl.store_data_imm64(batch, dst_bo, offset, q->result);
642 }
643
644 /* Make sure the result lands before they use bind the QBO elsewhere
645 * and use the result.
646 */
647 // XXX: Why? i965 doesn't do this.
648 iris_emit_pipe_control_flush(batch,
649 "query: unknown QBO flushing hack",
650 PIPE_CONTROL_CS_STALL);
651 return;
652 }
653
654 bool predicated = !wait && !q->stalled;
655
656 struct gen_mi_builder b;
657 gen_mi_builder_init(&b, batch);
658
659 struct gen_mi_value result = calculate_result_on_gpu(devinfo, &b, q);
660 struct gen_mi_value dst =
661 result_type <= PIPE_QUERY_TYPE_U32 ? gen_mi_mem32(rw_bo(dst_bo, offset))
662 : gen_mi_mem64(rw_bo(dst_bo, offset));
663
664 if (predicated) {
665 gen_mi_store(&b, gen_mi_reg32(MI_PREDICATE_RESULT),
666 gen_mi_mem64(ro_bo(query_bo, snapshots_landed_offset)));
667 gen_mi_store_if(&b, dst, result);
668 } else {
669 gen_mi_store(&b, dst, result);
670 }
671 }
672
673 static void
674 iris_set_active_query_state(struct pipe_context *ctx, bool enable)
675 {
676 struct iris_context *ice = (void *) ctx;
677
678 if (ice->state.statistics_counters_enabled == enable)
679 return;
680
681 // XXX: most packets aren't paying attention to this yet, because it'd
682 // have to be done dynamically at draw time, which is a pain
683 ice->state.statistics_counters_enabled = enable;
684 ice->state.dirty |= IRIS_DIRTY_CLIP |
685 IRIS_DIRTY_GS |
686 IRIS_DIRTY_RASTER |
687 IRIS_DIRTY_STREAMOUT |
688 IRIS_DIRTY_TCS |
689 IRIS_DIRTY_TES |
690 IRIS_DIRTY_VS |
691 IRIS_DIRTY_WM;
692 }
693
694 static void
695 set_predicate_enable(struct iris_context *ice, bool value)
696 {
697 if (value)
698 ice->state.predicate = IRIS_PREDICATE_STATE_RENDER;
699 else
700 ice->state.predicate = IRIS_PREDICATE_STATE_DONT_RENDER;
701 }
702
703 static void
704 set_predicate_for_result(struct iris_context *ice,
705 struct iris_query *q,
706 bool inverted)
707 {
708 struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
709 struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
710
711 /* The CPU doesn't have the query result yet; use hardware predication */
712 ice->state.predicate = IRIS_PREDICATE_STATE_USE_BIT;
713
714 /* Ensure the memory is coherent for MI_LOAD_REGISTER_* commands. */
715 iris_emit_pipe_control_flush(batch,
716 "conditional rendering: set predicate",
717 PIPE_CONTROL_FLUSH_ENABLE);
718 q->stalled = true;
719
720 struct gen_mi_builder b;
721 gen_mi_builder_init(&b, batch);
722
723 struct gen_mi_value result;
724
725 switch (q->type) {
726 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
727 result = calc_overflow_for_stream(&b, q, q->index);
728 break;
729 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
730 result = calc_overflow_any_stream(&b, q);
731 break;
732 default: {
733 /* PIPE_QUERY_OCCLUSION_* */
734 struct gen_mi_value start =
735 query_mem64(q, offsetof(struct iris_query_snapshots, start));
736 struct gen_mi_value end =
737 query_mem64(q, offsetof(struct iris_query_snapshots, end));
738 result = gen_mi_isub(&b, end, start);
739 break;
740 }
741 }
742
743 result = inverted ? gen_mi_z(&b, result) : gen_mi_nz(&b, result);
744 result = gen_mi_iand(&b, result, gen_mi_imm(1));
745
746 /* We immediately set the predicate on the render batch, as all the
747 * counters come from 3D operations. However, we may need to predicate
748 * a compute dispatch, which executes in a different GEM context and has
749 * a different MI_PREDICATE_RESULT register. So, we save the result to
750 * memory and reload it in iris_launch_grid.
751 */
752 gen_mi_value_ref(&b, result);
753 gen_mi_store(&b, gen_mi_reg32(MI_PREDICATE_RESULT), result);
754 gen_mi_store(&b, query_mem64(q, offsetof(struct iris_query_snapshots,
755 predicate_result)), result);
756 ice->state.compute_predicate = bo;
757 }
758
759 static void
760 iris_render_condition(struct pipe_context *ctx,
761 struct pipe_query *query,
762 bool condition,
763 enum pipe_render_cond_flag mode)
764 {
765 struct iris_context *ice = (void *) ctx;
766 struct iris_query *q = (void *) query;
767
768 /* The old condition isn't relevant; we'll update it if necessary */
769 ice->state.compute_predicate = NULL;
770 ice->condition.query = q;
771 ice->condition.condition = condition;
772
773 if (!q) {
774 ice->state.predicate = IRIS_PREDICATE_STATE_RENDER;
775 return;
776 }
777
778 iris_check_query_no_flush(ice, q);
779
780 if (q->result || q->ready) {
781 set_predicate_enable(ice, (q->result != 0) ^ condition);
782 } else {
783 if (mode == PIPE_RENDER_COND_NO_WAIT ||
784 mode == PIPE_RENDER_COND_BY_REGION_NO_WAIT) {
785 perf_debug(&ice->dbg, "Conditional rendering demoted from "
786 "\"no wait\" to \"wait\".");
787 }
788 set_predicate_for_result(ice, q, condition);
789 }
790 }
791
792 static void
793 iris_resolve_conditional_render(struct iris_context *ice)
794 {
795 struct pipe_context *ctx = (void *) ice;
796 struct iris_query *q = ice->condition.query;
797 struct pipe_query *query = (void *) q;
798 union pipe_query_result result;
799
800 if (ice->state.predicate != IRIS_PREDICATE_STATE_USE_BIT)
801 return;
802
803 assert(q);
804
805 iris_get_query_result(ctx, query, true, &result);
806 set_predicate_enable(ice, (q->result != 0) ^ ice->condition.condition);
807 }
808
809 void
810 genX(init_query)(struct iris_context *ice)
811 {
812 struct pipe_context *ctx = &ice->ctx;
813
814 ctx->create_query = iris_create_query;
815 ctx->destroy_query = iris_destroy_query;
816 ctx->begin_query = iris_begin_query;
817 ctx->end_query = iris_end_query;
818 ctx->get_query_result = iris_get_query_result;
819 ctx->get_query_result_resource = iris_get_query_result_resource;
820 ctx->set_active_query_state = iris_set_active_query_state;
821 ctx->render_condition = iris_render_condition;
822
823 ice->vtbl.resolve_conditional_render = iris_resolve_conditional_render;
824 }