panfrost: Hoist bo != NULL check before dereference
[mesa.git] / src / gallium / drivers / iris / iris_query.c
1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23 /**
24 * @file iris_query.c
25 *
26 * ============================= GENXML CODE =============================
27 * [This file is compiled once per generation.]
28 * =======================================================================
29 *
30 * Query object support. This allows measuring various simple statistics
31 * via counters on the GPU. We use GenX code for MI_MATH calculations.
32 */
33
34 #include <stdio.h>
35 #include <errno.h>
36 #include "perf/gen_perf.h"
37 #include "pipe/p_defines.h"
38 #include "pipe/p_state.h"
39 #include "pipe/p_context.h"
40 #include "pipe/p_screen.h"
41 #include "util/u_inlines.h"
42 #include "util/u_upload_mgr.h"
43 #include "iris_context.h"
44 #include "iris_defines.h"
45 #include "iris_fence.h"
46 #include "iris_monitor.h"
47 #include "iris_resource.h"
48 #include "iris_screen.h"
49
50 #include "iris_genx_macros.h"
51
52 #define SO_PRIM_STORAGE_NEEDED(n) (GENX(SO_PRIM_STORAGE_NEEDED0_num) + (n) * 8)
53 #define SO_NUM_PRIMS_WRITTEN(n) (GENX(SO_NUM_PRIMS_WRITTEN0_num) + (n) * 8)
54
55 struct iris_query {
56 enum pipe_query_type type;
57 int index;
58
59 bool ready;
60
61 bool stalled;
62
63 uint64_t result;
64
65 struct iris_state_ref query_state_ref;
66 struct iris_query_snapshots *map;
67 struct iris_syncpt *syncpt;
68
69 int batch_idx;
70
71 struct iris_monitor_object *monitor;
72 };
73
74 struct iris_query_snapshots {
75 /** iris_render_condition's saved MI_PREDICATE_RESULT value. */
76 uint64_t predicate_result;
77
78 /** Have the start/end snapshots landed? */
79 uint64_t snapshots_landed;
80
81 /** Starting and ending counter snapshots */
82 uint64_t start;
83 uint64_t end;
84 };
85
86 struct iris_query_so_overflow {
87 uint64_t predicate_result;
88 uint64_t snapshots_landed;
89
90 struct {
91 uint64_t prim_storage_needed[2];
92 uint64_t num_prims[2];
93 } stream[4];
94 };
95
96 static struct gen_mi_value
97 query_mem64(struct iris_query *q, uint32_t offset)
98 {
99 struct iris_address addr = {
100 .bo = iris_resource_bo(q->query_state_ref.res),
101 .offset = q->query_state_ref.offset + offset,
102 .write = true
103 };
104 return gen_mi_mem64(addr);
105 }
106
107 /**
108 * Is this type of query written by PIPE_CONTROL?
109 */
110 static bool
111 iris_is_query_pipelined(struct iris_query *q)
112 {
113 switch (q->type) {
114 case PIPE_QUERY_OCCLUSION_COUNTER:
115 case PIPE_QUERY_OCCLUSION_PREDICATE:
116 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
117 case PIPE_QUERY_TIMESTAMP:
118 case PIPE_QUERY_TIMESTAMP_DISJOINT:
119 case PIPE_QUERY_TIME_ELAPSED:
120 return true;
121
122 default:
123 return false;
124 }
125 }
126
127 static void
128 mark_available(struct iris_context *ice, struct iris_query *q)
129 {
130 struct iris_batch *batch = &ice->batches[q->batch_idx];
131 unsigned flags = PIPE_CONTROL_WRITE_IMMEDIATE;
132 unsigned offset = offsetof(struct iris_query_snapshots, snapshots_landed);
133 struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
134 offset += q->query_state_ref.offset;
135
136 if (!iris_is_query_pipelined(q)) {
137 ice->vtbl.store_data_imm64(batch, bo, offset, true);
138 } else {
139 /* Order available *after* the query results. */
140 flags |= PIPE_CONTROL_FLUSH_ENABLE;
141 iris_emit_pipe_control_write(batch, "query: mark available",
142 flags, bo, offset, true);
143 }
144 }
145
146 /**
147 * Write PS_DEPTH_COUNT to q->(dest) via a PIPE_CONTROL.
148 */
149 static void
150 iris_pipelined_write(struct iris_batch *batch,
151 struct iris_query *q,
152 enum pipe_control_flags flags,
153 unsigned offset)
154 {
155 const struct gen_device_info *devinfo = &batch->screen->devinfo;
156 const unsigned optional_cs_stall =
157 GEN_GEN == 9 && devinfo->gt == 4 ? PIPE_CONTROL_CS_STALL : 0;
158 struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
159
160 iris_emit_pipe_control_write(batch, "query: pipelined snapshot write",
161 flags | optional_cs_stall,
162 bo, offset, 0ull);
163 }
164
165 static void
166 write_value(struct iris_context *ice, struct iris_query *q, unsigned offset)
167 {
168 struct iris_batch *batch = &ice->batches[q->batch_idx];
169 struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
170
171 if (!iris_is_query_pipelined(q)) {
172 iris_emit_pipe_control_flush(batch,
173 "query: non-pipelined snapshot write",
174 PIPE_CONTROL_CS_STALL |
175 PIPE_CONTROL_STALL_AT_SCOREBOARD);
176 q->stalled = true;
177 }
178
179 switch (q->type) {
180 case PIPE_QUERY_OCCLUSION_COUNTER:
181 case PIPE_QUERY_OCCLUSION_PREDICATE:
182 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
183 if (GEN_GEN >= 10) {
184 /* "Driver must program PIPE_CONTROL with only Depth Stall Enable
185 * bit set prior to programming a PIPE_CONTROL with Write PS Depth
186 * Count sync operation."
187 */
188 iris_emit_pipe_control_flush(batch,
189 "workaround: depth stall before writing "
190 "PS_DEPTH_COUNT",
191 PIPE_CONTROL_DEPTH_STALL);
192 }
193 iris_pipelined_write(&ice->batches[IRIS_BATCH_RENDER], q,
194 PIPE_CONTROL_WRITE_DEPTH_COUNT |
195 PIPE_CONTROL_DEPTH_STALL,
196 offset);
197 break;
198 case PIPE_QUERY_TIME_ELAPSED:
199 case PIPE_QUERY_TIMESTAMP:
200 case PIPE_QUERY_TIMESTAMP_DISJOINT:
201 iris_pipelined_write(&ice->batches[IRIS_BATCH_RENDER], q,
202 PIPE_CONTROL_WRITE_TIMESTAMP,
203 offset);
204 break;
205 case PIPE_QUERY_PRIMITIVES_GENERATED:
206 ice->vtbl.store_register_mem64(batch,
207 q->index == 0 ?
208 GENX(CL_INVOCATION_COUNT_num) :
209 SO_PRIM_STORAGE_NEEDED(q->index),
210 bo, offset, false);
211 break;
212 case PIPE_QUERY_PRIMITIVES_EMITTED:
213 ice->vtbl.store_register_mem64(batch,
214 SO_NUM_PRIMS_WRITTEN(q->index),
215 bo, offset, false);
216 break;
217 case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE: {
218 static const uint32_t index_to_reg[] = {
219 GENX(IA_VERTICES_COUNT_num),
220 GENX(IA_PRIMITIVES_COUNT_num),
221 GENX(VS_INVOCATION_COUNT_num),
222 GENX(GS_INVOCATION_COUNT_num),
223 GENX(GS_PRIMITIVES_COUNT_num),
224 GENX(CL_INVOCATION_COUNT_num),
225 GENX(CL_PRIMITIVES_COUNT_num),
226 GENX(PS_INVOCATION_COUNT_num),
227 GENX(HS_INVOCATION_COUNT_num),
228 GENX(DS_INVOCATION_COUNT_num),
229 GENX(CS_INVOCATION_COUNT_num),
230 };
231 const uint32_t reg = index_to_reg[q->index];
232
233 ice->vtbl.store_register_mem64(batch, reg, bo, offset, false);
234 break;
235 }
236 default:
237 assert(false);
238 }
239 }
240
241 static void
242 write_overflow_values(struct iris_context *ice, struct iris_query *q, bool end)
243 {
244 struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
245 uint32_t count = q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ? 1 : 4;
246 struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
247 uint32_t offset = q->query_state_ref.offset;
248
249 iris_emit_pipe_control_flush(batch,
250 "query: write SO overflow snapshots",
251 PIPE_CONTROL_CS_STALL |
252 PIPE_CONTROL_STALL_AT_SCOREBOARD);
253 for (uint32_t i = 0; i < count; i++) {
254 int s = q->index + i;
255 int g_idx = offset + offsetof(struct iris_query_so_overflow,
256 stream[s].num_prims[end]);
257 int w_idx = offset + offsetof(struct iris_query_so_overflow,
258 stream[s].prim_storage_needed[end]);
259 ice->vtbl.store_register_mem64(batch, SO_NUM_PRIMS_WRITTEN(s),
260 bo, g_idx, false);
261 ice->vtbl.store_register_mem64(batch, SO_PRIM_STORAGE_NEEDED(s),
262 bo, w_idx, false);
263 }
264 }
265
266 static uint64_t
267 iris_raw_timestamp_delta(uint64_t time0, uint64_t time1)
268 {
269 if (time0 > time1) {
270 return (1ULL << TIMESTAMP_BITS) + time1 - time0;
271 } else {
272 return time1 - time0;
273 }
274 }
275
276 static bool
277 stream_overflowed(struct iris_query_so_overflow *so, int s)
278 {
279 return (so->stream[s].prim_storage_needed[1] -
280 so->stream[s].prim_storage_needed[0]) !=
281 (so->stream[s].num_prims[1] - so->stream[s].num_prims[0]);
282 }
283
284 static void
285 calculate_result_on_cpu(const struct gen_device_info *devinfo,
286 struct iris_query *q)
287 {
288 switch (q->type) {
289 case PIPE_QUERY_OCCLUSION_PREDICATE:
290 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
291 q->result = q->map->end != q->map->start;
292 break;
293 case PIPE_QUERY_TIMESTAMP:
294 case PIPE_QUERY_TIMESTAMP_DISJOINT:
295 /* The timestamp is the single starting snapshot. */
296 q->result = gen_device_info_timebase_scale(devinfo, q->map->start);
297 q->result &= (1ull << TIMESTAMP_BITS) - 1;
298 break;
299 case PIPE_QUERY_TIME_ELAPSED:
300 q->result = iris_raw_timestamp_delta(q->map->start, q->map->end);
301 q->result = gen_device_info_timebase_scale(devinfo, q->result);
302 q->result &= (1ull << TIMESTAMP_BITS) - 1;
303 break;
304 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
305 q->result = stream_overflowed((void *) q->map, q->index);
306 break;
307 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
308 q->result = false;
309 for (int i = 0; i < MAX_VERTEX_STREAMS; i++)
310 q->result |= stream_overflowed((void *) q->map, i);
311 break;
312 case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
313 q->result = q->map->end - q->map->start;
314
315 /* WaDividePSInvocationCountBy4:HSW,BDW */
316 if (GEN_GEN == 8 && q->index == PIPE_STAT_QUERY_PS_INVOCATIONS)
317 q->result /= 4;
318 break;
319 case PIPE_QUERY_OCCLUSION_COUNTER:
320 case PIPE_QUERY_PRIMITIVES_GENERATED:
321 case PIPE_QUERY_PRIMITIVES_EMITTED:
322 default:
323 q->result = q->map->end - q->map->start;
324 break;
325 }
326
327 q->ready = true;
328 }
329
330 /**
331 * Calculate the streamout overflow for stream \p idx:
332 *
333 * (num_prims[1] - num_prims[0]) - (storage_needed[1] - storage_needed[0])
334 */
335 static struct gen_mi_value
336 calc_overflow_for_stream(struct gen_mi_builder *b,
337 struct iris_query *q,
338 int idx)
339 {
340 #define C(counter, i) query_mem64(q, \
341 offsetof(struct iris_query_so_overflow, stream[idx].counter[i]))
342
343 return gen_mi_isub(b, gen_mi_isub(b, C(num_prims, 1), C(num_prims, 0)),
344 gen_mi_isub(b, C(prim_storage_needed, 1),
345 C(prim_storage_needed, 0)));
346 #undef C
347 }
348
349 /**
350 * Calculate whether any stream has overflowed.
351 */
352 static struct gen_mi_value
353 calc_overflow_any_stream(struct gen_mi_builder *b, struct iris_query *q)
354 {
355 struct gen_mi_value stream_result[MAX_VERTEX_STREAMS];
356 for (int i = 0; i < MAX_VERTEX_STREAMS; i++)
357 stream_result[i] = calc_overflow_for_stream(b, q, i);
358
359 struct gen_mi_value result = stream_result[0];
360 for (int i = 1; i < MAX_VERTEX_STREAMS; i++)
361 result = gen_mi_ior(b, result, stream_result[i]);
362
363 return result;
364 }
365
366 static bool
367 query_is_boolean(enum pipe_query_type type)
368 {
369 switch (type) {
370 case PIPE_QUERY_OCCLUSION_PREDICATE:
371 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
372 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
373 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
374 return true;
375 default:
376 return false;
377 }
378 }
379
380 /**
381 * Calculate the result using MI_MATH.
382 */
383 static struct gen_mi_value
384 calculate_result_on_gpu(const struct gen_device_info *devinfo,
385 struct gen_mi_builder *b,
386 struct iris_query *q)
387 {
388 struct gen_mi_value result;
389 struct gen_mi_value start_val =
390 query_mem64(q, offsetof(struct iris_query_snapshots, start));
391 struct gen_mi_value end_val =
392 query_mem64(q, offsetof(struct iris_query_snapshots, end));
393
394 switch (q->type) {
395 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
396 result = calc_overflow_for_stream(b, q, q->index);
397 break;
398 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
399 result = calc_overflow_any_stream(b, q);
400 break;
401 case PIPE_QUERY_TIMESTAMP: {
402 /* TODO: This discards any fractional bits of the timebase scale.
403 * We would need to do a bit of fixed point math on the CS ALU, or
404 * launch an actual shader to calculate this with full precision.
405 */
406 uint32_t scale = 1000000000ull / devinfo->timestamp_frequency;
407 result = gen_mi_iand(b, gen_mi_imm((1ull << 36) - 1),
408 gen_mi_imul_imm(b, start_val, scale));
409 break;
410 }
411 case PIPE_QUERY_TIME_ELAPSED: {
412 /* TODO: This discards fractional bits (see above). */
413 uint32_t scale = 1000000000ull / devinfo->timestamp_frequency;
414 result = gen_mi_imul_imm(b, gen_mi_isub(b, end_val, start_val), scale);
415 break;
416 }
417 default:
418 result = gen_mi_isub(b, end_val, start_val);
419 break;
420 }
421
422 /* WaDividePSInvocationCountBy4:HSW,BDW */
423 if (GEN_GEN == 8 &&
424 q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE &&
425 q->index == PIPE_STAT_QUERY_PS_INVOCATIONS)
426 result = gen_mi_ushr32_imm(b, result, 2);
427
428 if (query_is_boolean(q->type))
429 result = gen_mi_iand(b, gen_mi_nz(b, result), gen_mi_imm(1));
430
431 return result;
432 }
433
434 static struct pipe_query *
435 iris_create_query(struct pipe_context *ctx,
436 unsigned query_type,
437 unsigned index)
438 {
439 struct iris_query *q = calloc(1, sizeof(struct iris_query));
440
441 q->type = query_type;
442 q->index = index;
443 q->monitor = NULL;
444
445 if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE &&
446 q->index == PIPE_STAT_QUERY_CS_INVOCATIONS)
447 q->batch_idx = IRIS_BATCH_COMPUTE;
448 else
449 q->batch_idx = IRIS_BATCH_RENDER;
450 return (struct pipe_query *) q;
451 }
452
453 static struct pipe_query *
454 iris_create_batch_query(struct pipe_context *ctx,
455 unsigned num_queries,
456 unsigned *query_types)
457 {
458 struct iris_context *ice = (void *) ctx;
459 struct iris_query *q = calloc(1, sizeof(struct iris_query));
460 if (unlikely(!q))
461 return NULL;
462 q->type = PIPE_QUERY_DRIVER_SPECIFIC;
463 q->index = -1;
464 q->monitor = iris_create_monitor_object(ice, num_queries, query_types);
465 if (unlikely(!q->monitor)) {
466 free(q);
467 return NULL;
468 }
469
470 return (struct pipe_query *) q;
471 }
472
473 static void
474 iris_destroy_query(struct pipe_context *ctx, struct pipe_query *p_query)
475 {
476 struct iris_query *query = (void *) p_query;
477 struct iris_screen *screen = (void *) ctx->screen;
478 if (query->monitor) {
479 iris_destroy_monitor_object(ctx, query->monitor);
480 query->monitor = NULL;
481 } else {
482 iris_syncpt_reference(screen, &query->syncpt, NULL);
483 }
484 free(query);
485 }
486
487
488 static bool
489 iris_begin_query(struct pipe_context *ctx, struct pipe_query *query)
490 {
491 struct iris_context *ice = (void *) ctx;
492 struct iris_query *q = (void *) query;
493
494 if (q->monitor)
495 return iris_begin_monitor(ctx, q->monitor);
496
497 void *ptr = NULL;
498 uint32_t size;
499
500 if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
501 q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
502 size = sizeof(struct iris_query_so_overflow);
503 else
504 size = sizeof(struct iris_query_snapshots);
505
506 u_upload_alloc(ice->query_buffer_uploader, 0,
507 size, size, &q->query_state_ref.offset,
508 &q->query_state_ref.res, &ptr);
509
510 if (!iris_resource_bo(q->query_state_ref.res))
511 return false;
512
513 q->map = ptr;
514 if (!q->map)
515 return false;
516
517 q->result = 0ull;
518 q->ready = false;
519 WRITE_ONCE(q->map->snapshots_landed, false);
520
521 if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {
522 ice->state.prims_generated_query_active = true;
523 ice->state.dirty |= IRIS_DIRTY_STREAMOUT | IRIS_DIRTY_CLIP;
524 }
525
526 if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
527 q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
528 write_overflow_values(ice, q, false);
529 else
530 write_value(ice, q,
531 q->query_state_ref.offset +
532 offsetof(struct iris_query_snapshots, start));
533
534 return true;
535 }
536
537 static bool
538 iris_end_query(struct pipe_context *ctx, struct pipe_query *query)
539 {
540 struct iris_context *ice = (void *) ctx;
541 struct iris_query *q = (void *) query;
542
543 if (q->monitor)
544 return iris_end_monitor(ctx, q->monitor);
545
546 struct iris_batch *batch = &ice->batches[q->batch_idx];
547
548 if (q->type == PIPE_QUERY_TIMESTAMP) {
549 iris_begin_query(ctx, query);
550 iris_batch_reference_signal_syncpt(batch, &q->syncpt);
551 mark_available(ice, q);
552 return true;
553 }
554
555 if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {
556 ice->state.prims_generated_query_active = false;
557 ice->state.dirty |= IRIS_DIRTY_STREAMOUT | IRIS_DIRTY_CLIP;
558 }
559
560 if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
561 q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
562 write_overflow_values(ice, q, true);
563 else
564 write_value(ice, q,
565 q->query_state_ref.offset +
566 offsetof(struct iris_query_snapshots, end));
567
568 iris_batch_reference_signal_syncpt(batch, &q->syncpt);
569 mark_available(ice, q);
570
571 return true;
572 }
573
574 /**
575 * See if the snapshots have landed for a query, and if so, compute the
576 * result and mark it ready. Does not flush (unlike iris_get_query_result).
577 */
578 static void
579 iris_check_query_no_flush(struct iris_context *ice, struct iris_query *q)
580 {
581 struct iris_screen *screen = (void *) ice->ctx.screen;
582 const struct gen_device_info *devinfo = &screen->devinfo;
583
584 if (!q->ready && READ_ONCE(q->map->snapshots_landed)) {
585 calculate_result_on_cpu(devinfo, q);
586 }
587 }
588
589 static bool
590 iris_get_query_result(struct pipe_context *ctx,
591 struct pipe_query *query,
592 bool wait,
593 union pipe_query_result *result)
594 {
595 struct iris_context *ice = (void *) ctx;
596 struct iris_query *q = (void *) query;
597
598 if (q->monitor)
599 return iris_get_monitor_result(ctx, q->monitor, wait, result->batch);
600
601 struct iris_screen *screen = (void *) ctx->screen;
602 const struct gen_device_info *devinfo = &screen->devinfo;
603
604 if (unlikely(screen->no_hw)) {
605 result->u64 = 0;
606 return true;
607 }
608
609 if (!q->ready) {
610 struct iris_batch *batch = &ice->batches[q->batch_idx];
611 if (q->syncpt == iris_batch_get_signal_syncpt(batch))
612 iris_batch_flush(batch);
613
614 while (!READ_ONCE(q->map->snapshots_landed)) {
615 if (wait)
616 iris_wait_syncpt(ctx->screen, q->syncpt, INT64_MAX);
617 else
618 return false;
619 }
620
621 assert(READ_ONCE(q->map->snapshots_landed));
622 calculate_result_on_cpu(devinfo, q);
623 }
624
625 assert(q->ready);
626
627 result->u64 = q->result;
628
629 return true;
630 }
631
632 static void
633 iris_get_query_result_resource(struct pipe_context *ctx,
634 struct pipe_query *query,
635 bool wait,
636 enum pipe_query_value_type result_type,
637 int index,
638 struct pipe_resource *p_res,
639 unsigned offset)
640 {
641 struct iris_context *ice = (void *) ctx;
642 struct iris_query *q = (void *) query;
643 struct iris_batch *batch = &ice->batches[q->batch_idx];
644 const struct gen_device_info *devinfo = &batch->screen->devinfo;
645 struct iris_resource *res = (void *) p_res;
646 struct iris_bo *query_bo = iris_resource_bo(q->query_state_ref.res);
647 struct iris_bo *dst_bo = iris_resource_bo(p_res);
648 unsigned snapshots_landed_offset =
649 offsetof(struct iris_query_snapshots, snapshots_landed);
650
651 res->bind_history |= PIPE_BIND_QUERY_BUFFER;
652
653 if (index == -1) {
654 /* They're asking for the availability of the result. If we still
655 * have commands queued up which produce the result, submit them
656 * now so that progress happens. Either way, copy the snapshots
657 * landed field to the destination resource.
658 */
659 if (q->syncpt == iris_batch_get_signal_syncpt(batch))
660 iris_batch_flush(batch);
661
662 ice->vtbl.copy_mem_mem(batch, dst_bo, offset,
663 query_bo, snapshots_landed_offset,
664 result_type <= PIPE_QUERY_TYPE_U32 ? 4 : 8);
665 return;
666 }
667
668 if (!q->ready && READ_ONCE(q->map->snapshots_landed)) {
669 /* The final snapshots happen to have landed, so let's just compute
670 * the result on the CPU now...
671 */
672 calculate_result_on_cpu(devinfo, q);
673 }
674
675 if (q->ready) {
676 /* We happen to have the result on the CPU, so just copy it. */
677 if (result_type <= PIPE_QUERY_TYPE_U32) {
678 ice->vtbl.store_data_imm32(batch, dst_bo, offset, q->result);
679 } else {
680 ice->vtbl.store_data_imm64(batch, dst_bo, offset, q->result);
681 }
682
683 /* Make sure the result lands before they use bind the QBO elsewhere
684 * and use the result.
685 */
686 // XXX: Why? i965 doesn't do this.
687 iris_emit_pipe_control_flush(batch,
688 "query: unknown QBO flushing hack",
689 PIPE_CONTROL_CS_STALL);
690 return;
691 }
692
693 bool predicated = !wait && !q->stalled;
694
695 struct gen_mi_builder b;
696 gen_mi_builder_init(&b, batch);
697
698 struct gen_mi_value result = calculate_result_on_gpu(devinfo, &b, q);
699 struct gen_mi_value dst =
700 result_type <= PIPE_QUERY_TYPE_U32 ? gen_mi_mem32(rw_bo(dst_bo, offset))
701 : gen_mi_mem64(rw_bo(dst_bo, offset));
702
703 if (predicated) {
704 gen_mi_store(&b, gen_mi_reg32(MI_PREDICATE_RESULT),
705 gen_mi_mem64(ro_bo(query_bo, snapshots_landed_offset)));
706 gen_mi_store_if(&b, dst, result);
707 } else {
708 gen_mi_store(&b, dst, result);
709 }
710 }
711
712 static void
713 iris_set_active_query_state(struct pipe_context *ctx, bool enable)
714 {
715 struct iris_context *ice = (void *) ctx;
716
717 if (ice->state.statistics_counters_enabled == enable)
718 return;
719
720 // XXX: most packets aren't paying attention to this yet, because it'd
721 // have to be done dynamically at draw time, which is a pain
722 ice->state.statistics_counters_enabled = enable;
723 ice->state.dirty |= IRIS_DIRTY_CLIP |
724 IRIS_DIRTY_GS |
725 IRIS_DIRTY_RASTER |
726 IRIS_DIRTY_STREAMOUT |
727 IRIS_DIRTY_TCS |
728 IRIS_DIRTY_TES |
729 IRIS_DIRTY_VS |
730 IRIS_DIRTY_WM;
731 }
732
733 static void
734 set_predicate_enable(struct iris_context *ice, bool value)
735 {
736 if (value)
737 ice->state.predicate = IRIS_PREDICATE_STATE_RENDER;
738 else
739 ice->state.predicate = IRIS_PREDICATE_STATE_DONT_RENDER;
740 }
741
742 static void
743 set_predicate_for_result(struct iris_context *ice,
744 struct iris_query *q,
745 bool inverted)
746 {
747 struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
748 struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
749
750 /* The CPU doesn't have the query result yet; use hardware predication */
751 ice->state.predicate = IRIS_PREDICATE_STATE_USE_BIT;
752
753 /* Ensure the memory is coherent for MI_LOAD_REGISTER_* commands. */
754 iris_emit_pipe_control_flush(batch,
755 "conditional rendering: set predicate",
756 PIPE_CONTROL_FLUSH_ENABLE);
757 q->stalled = true;
758
759 struct gen_mi_builder b;
760 gen_mi_builder_init(&b, batch);
761
762 struct gen_mi_value result;
763
764 switch (q->type) {
765 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
766 result = calc_overflow_for_stream(&b, q, q->index);
767 break;
768 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
769 result = calc_overflow_any_stream(&b, q);
770 break;
771 default: {
772 /* PIPE_QUERY_OCCLUSION_* */
773 struct gen_mi_value start =
774 query_mem64(q, offsetof(struct iris_query_snapshots, start));
775 struct gen_mi_value end =
776 query_mem64(q, offsetof(struct iris_query_snapshots, end));
777 result = gen_mi_isub(&b, end, start);
778 break;
779 }
780 }
781
782 result = inverted ? gen_mi_z(&b, result) : gen_mi_nz(&b, result);
783 result = gen_mi_iand(&b, result, gen_mi_imm(1));
784
785 /* We immediately set the predicate on the render batch, as all the
786 * counters come from 3D operations. However, we may need to predicate
787 * a compute dispatch, which executes in a different GEM context and has
788 * a different MI_PREDICATE_RESULT register. So, we save the result to
789 * memory and reload it in iris_launch_grid.
790 */
791 gen_mi_value_ref(&b, result);
792 gen_mi_store(&b, gen_mi_reg32(MI_PREDICATE_RESULT), result);
793 gen_mi_store(&b, query_mem64(q, offsetof(struct iris_query_snapshots,
794 predicate_result)), result);
795 ice->state.compute_predicate = bo;
796 }
797
798 static void
799 iris_render_condition(struct pipe_context *ctx,
800 struct pipe_query *query,
801 bool condition,
802 enum pipe_render_cond_flag mode)
803 {
804 struct iris_context *ice = (void *) ctx;
805 struct iris_query *q = (void *) query;
806
807 /* The old condition isn't relevant; we'll update it if necessary */
808 ice->state.compute_predicate = NULL;
809 ice->condition.query = q;
810 ice->condition.condition = condition;
811
812 if (!q) {
813 ice->state.predicate = IRIS_PREDICATE_STATE_RENDER;
814 return;
815 }
816
817 iris_check_query_no_flush(ice, q);
818
819 if (q->result || q->ready) {
820 set_predicate_enable(ice, (q->result != 0) ^ condition);
821 } else {
822 if (mode == PIPE_RENDER_COND_NO_WAIT ||
823 mode == PIPE_RENDER_COND_BY_REGION_NO_WAIT) {
824 perf_debug(&ice->dbg, "Conditional rendering demoted from "
825 "\"no wait\" to \"wait\".");
826 }
827 set_predicate_for_result(ice, q, condition);
828 }
829 }
830
831 static void
832 iris_resolve_conditional_render(struct iris_context *ice)
833 {
834 struct pipe_context *ctx = (void *) ice;
835 struct iris_query *q = ice->condition.query;
836 struct pipe_query *query = (void *) q;
837 union pipe_query_result result;
838
839 if (ice->state.predicate != IRIS_PREDICATE_STATE_USE_BIT)
840 return;
841
842 assert(q);
843
844 iris_get_query_result(ctx, query, true, &result);
845 set_predicate_enable(ice, (q->result != 0) ^ ice->condition.condition);
846 }
847
848 void
849 genX(init_query)(struct iris_context *ice)
850 {
851 struct pipe_context *ctx = &ice->ctx;
852
853 ctx->create_query = iris_create_query;
854 ctx->create_batch_query = iris_create_batch_query;
855 ctx->destroy_query = iris_destroy_query;
856 ctx->begin_query = iris_begin_query;
857 ctx->end_query = iris_end_query;
858 ctx->get_query_result = iris_get_query_result;
859 ctx->get_query_result_resource = iris_get_query_result_resource;
860 ctx->set_active_query_state = iris_set_active_query_state;
861 ctx->render_condition = iris_render_condition;
862
863 ice->vtbl.resolve_conditional_render = iris_resolve_conditional_render;
864 }