iris: Add _MI_ALU helpers that don't paste
[mesa.git] / src / gallium / drivers / iris / iris_query.c
1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23 /**
24 * @file iris_query.c
25 *
26 * Query object support. This allows measuring various simple statistics
27 * via counters on the GPU.
28 */
29
30 #include <stdio.h>
31 #include <errno.h>
32 #include "pipe/p_defines.h"
33 #include "pipe/p_state.h"
34 #include "pipe/p_context.h"
35 #include "pipe/p_screen.h"
36 #include "util/u_inlines.h"
37 #include "iris_context.h"
38 #include "iris_defines.h"
39 #include "iris_resource.h"
40 #include "iris_screen.h"
41
42 #define IA_VERTICES_COUNT 0x2310
43 #define IA_PRIMITIVES_COUNT 0x2318
44 #define VS_INVOCATION_COUNT 0x2320
45 #define HS_INVOCATION_COUNT 0x2300
46 #define DS_INVOCATION_COUNT 0x2308
47 #define GS_INVOCATION_COUNT 0x2328
48 #define GS_PRIMITIVES_COUNT 0x2330
49 #define CL_INVOCATION_COUNT 0x2338
50 #define CL_PRIMITIVES_COUNT 0x2340
51 #define PS_INVOCATION_COUNT 0x2348
52 #define CS_INVOCATION_COUNT 0x2290
53 #define PS_DEPTH_COUNT 0x2350
54
55 #define SO_PRIM_STORAGE_NEEDED(n) (0x5240 + (n) * 8)
56
57 #define SO_NUM_PRIMS_WRITTEN(n) (0x5200 + (n) * 8)
58
59 #define CS_GPR(n) (0x2600 + (n) * 8)
60
61 #define MI_MATH (0x1a << 23)
62
63 #define MI_ALU_LOAD 0x080
64 #define MI_ALU_LOADINV 0x480
65 #define MI_ALU_LOAD0 0x081
66 #define MI_ALU_LOAD1 0x481
67 #define MI_ALU_ADD 0x100
68 #define MI_ALU_SUB 0x101
69 #define MI_ALU_AND 0x102
70 #define MI_ALU_OR 0x103
71 #define MI_ALU_XOR 0x104
72 #define MI_ALU_STORE 0x180
73 #define MI_ALU_STOREINV 0x580
74
75 #define MI_ALU_R0 0x00
76 #define MI_ALU_R1 0x01
77 #define MI_ALU_R2 0x02
78 #define MI_ALU_R3 0x03
79 #define MI_ALU_R4 0x04
80 #define MI_ALU_SRCA 0x20
81 #define MI_ALU_SRCB 0x21
82 #define MI_ALU_ACCU 0x31
83 #define MI_ALU_ZF 0x32
84 #define MI_ALU_CF 0x33
85
86 #define _MI_ALU(op, x, y) (((op) << 20) | ((x) << 10) | (y))
87
88 #define _MI_ALU0(op) _MI_ALU(MI_ALU_##op, 0, 0)
89 #define _MI_ALU1(op, x) _MI_ALU(MI_ALU_##op, x, 0)
90 #define _MI_ALU2(op, x, y) _MI_ALU(MI_ALU_##op, x, y)
91
92 #define MI_ALU0(op) _MI_ALU0(op)
93 #define MI_ALU1(op, x) _MI_ALU1(op, MI_ALU_##x)
94 #define MI_ALU2(op, x, y) _MI_ALU2(op, MI_ALU_##x, MI_ALU_##y)
95
96 struct iris_query {
97 enum pipe_query_type type;
98 int index;
99
100 bool ready;
101
102 bool stalled;
103
104 uint64_t result;
105
106 struct iris_bo *bo;
107 struct iris_query_snapshots *map;
108
109 int batch_idx;
110 };
111
112 struct iris_query_snapshots {
113 /** iris_render_condition's saved MI_PREDICATE_DATA value. */
114 uint64_t predicate_data;
115
116 /** Have the start/end snapshots landed? */
117 uint64_t snapshots_landed;
118
119 /** Starting and ending counter snapshots */
120 uint64_t start;
121 uint64_t end;
122 };
123
124 struct iris_query_so_overflow {
125 uint64_t predicate_data;
126 uint64_t snapshots_landed;
127
128 struct {
129 uint64_t prim_storage_needed[2];
130 uint64_t num_prims[2];
131 } stream[4];
132 };
133
134 /**
135 * Is this type of query written by PIPE_CONTROL?
136 */
137 static bool
138 iris_is_query_pipelined(struct iris_query *q)
139 {
140 switch (q->type) {
141 case PIPE_QUERY_OCCLUSION_COUNTER:
142 case PIPE_QUERY_OCCLUSION_PREDICATE:
143 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
144 case PIPE_QUERY_TIMESTAMP:
145 case PIPE_QUERY_TIMESTAMP_DISJOINT:
146 case PIPE_QUERY_TIME_ELAPSED:
147 return true;
148
149 default:
150 return false;
151 }
152 }
153
154 static void
155 mark_available(struct iris_context *ice, struct iris_query *q)
156 {
157 struct iris_batch *batch = &ice->batches[q->batch_idx];
158 unsigned flags = PIPE_CONTROL_WRITE_IMMEDIATE;
159 unsigned offset = offsetof(struct iris_query_snapshots, snapshots_landed);
160
161 if (!iris_is_query_pipelined(q)) {
162 ice->vtbl.store_data_imm64(batch, q->bo, offset, true);
163 } else {
164 /* Order available *after* the query results. */
165 flags |= PIPE_CONTROL_FLUSH_ENABLE;
166 iris_emit_pipe_control_write(batch, flags, q->bo, offset, true);
167 }
168 }
169
170 /**
171 * Write PS_DEPTH_COUNT to q->(dest) via a PIPE_CONTROL.
172 */
173 static void
174 iris_pipelined_write(struct iris_batch *batch,
175 struct iris_query *q,
176 enum pipe_control_flags flags,
177 unsigned offset)
178 {
179 const struct gen_device_info *devinfo = &batch->screen->devinfo;
180 const unsigned optional_cs_stall =
181 devinfo->gen == 9 && devinfo->gt == 4 ? PIPE_CONTROL_CS_STALL : 0;
182
183 iris_emit_pipe_control_write(batch, flags | optional_cs_stall,
184 q->bo, offset, 0ull);
185 }
186
187 static void
188 write_value(struct iris_context *ice, struct iris_query *q, unsigned offset)
189 {
190 struct iris_batch *batch = &ice->batches[q->batch_idx];
191 const struct gen_device_info *devinfo = &batch->screen->devinfo;
192
193 if (!iris_is_query_pipelined(q)) {
194 iris_emit_pipe_control_flush(batch,
195 PIPE_CONTROL_CS_STALL |
196 PIPE_CONTROL_STALL_AT_SCOREBOARD);
197 q->stalled = true;
198 }
199
200 switch (q->type) {
201 case PIPE_QUERY_OCCLUSION_COUNTER:
202 case PIPE_QUERY_OCCLUSION_PREDICATE:
203 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
204 if (devinfo->gen >= 10) {
205 /* "Driver must program PIPE_CONTROL with only Depth Stall Enable
206 * bit set prior to programming a PIPE_CONTROL with Write PS Depth
207 * Count sync operation."
208 */
209 iris_emit_pipe_control_flush(batch, PIPE_CONTROL_DEPTH_STALL);
210 }
211 iris_pipelined_write(&ice->batches[IRIS_BATCH_RENDER], q,
212 PIPE_CONTROL_WRITE_DEPTH_COUNT |
213 PIPE_CONTROL_DEPTH_STALL,
214 offset);
215 break;
216 case PIPE_QUERY_TIME_ELAPSED:
217 case PIPE_QUERY_TIMESTAMP:
218 case PIPE_QUERY_TIMESTAMP_DISJOINT:
219 iris_pipelined_write(&ice->batches[IRIS_BATCH_RENDER], q,
220 PIPE_CONTROL_WRITE_TIMESTAMP,
221 offset);
222 break;
223 case PIPE_QUERY_PRIMITIVES_GENERATED:
224 ice->vtbl.store_register_mem64(batch,
225 q->index == 0 ? CL_INVOCATION_COUNT :
226 SO_PRIM_STORAGE_NEEDED(q->index),
227 q->bo, offset, false);
228 break;
229 case PIPE_QUERY_PRIMITIVES_EMITTED:
230 ice->vtbl.store_register_mem64(batch,
231 SO_NUM_PRIMS_WRITTEN(q->index),
232 q->bo, offset, false);
233 break;
234 case PIPE_QUERY_PIPELINE_STATISTICS: {
235 static const uint32_t index_to_reg[] = {
236 IA_VERTICES_COUNT,
237 IA_PRIMITIVES_COUNT,
238 VS_INVOCATION_COUNT,
239 GS_INVOCATION_COUNT,
240 GS_PRIMITIVES_COUNT,
241 CL_INVOCATION_COUNT,
242 CL_PRIMITIVES_COUNT,
243 PS_INVOCATION_COUNT,
244 HS_INVOCATION_COUNT,
245 DS_INVOCATION_COUNT,
246 CS_INVOCATION_COUNT,
247 };
248 const uint32_t reg = index_to_reg[q->index];
249
250 ice->vtbl.store_register_mem64(batch, reg, q->bo, offset, false);
251 break;
252 }
253 default:
254 assert(false);
255 }
256 }
257
258 static void
259 write_overflow_values(struct iris_context *ice, struct iris_query *q, bool end)
260 {
261 struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
262 uint32_t count = q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ? 1 : 4;
263
264 iris_emit_pipe_control_flush(batch,
265 PIPE_CONTROL_CS_STALL |
266 PIPE_CONTROL_STALL_AT_SCOREBOARD);
267 for (uint32_t i = 0; i < count; i++) {
268 int s = q->index + i;
269 int g_idx = offsetof(struct iris_query_so_overflow,
270 stream[s].num_prims[end]);
271 int w_idx = offsetof(struct iris_query_so_overflow,
272 stream[s].prim_storage_needed[end]);
273 ice->vtbl.store_register_mem64(batch, SO_NUM_PRIMS_WRITTEN(s),
274 q->bo, g_idx, false);
275 ice->vtbl.store_register_mem64(batch, SO_PRIM_STORAGE_NEEDED(s),
276 q->bo, w_idx, false);
277 }
278 }
279
280 uint64_t
281 iris_timebase_scale(const struct gen_device_info *devinfo,
282 uint64_t gpu_timestamp)
283 {
284 return (1000000000ull * gpu_timestamp) / devinfo->timestamp_frequency;
285 }
286
287 static uint64_t
288 iris_raw_timestamp_delta(uint64_t time0, uint64_t time1)
289 {
290 if (time0 > time1) {
291 return (1ULL << TIMESTAMP_BITS) + time1 - time0;
292 } else {
293 return time1 - time0;
294 }
295 }
296
297 static bool
298 stream_overflowed(struct iris_query_so_overflow *so, int s)
299 {
300 return (so->stream[s].prim_storage_needed[1] -
301 so->stream[s].prim_storage_needed[0]) !=
302 (so->stream[s].num_prims[1] - so->stream[s].num_prims[0]);
303 }
304
305 static void
306 calculate_result_on_cpu(const struct gen_device_info *devinfo,
307 struct iris_query *q)
308 {
309 switch (q->type) {
310 case PIPE_QUERY_OCCLUSION_PREDICATE:
311 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
312 q->result = q->map->end != q->map->start;
313 break;
314 case PIPE_QUERY_TIMESTAMP:
315 case PIPE_QUERY_TIMESTAMP_DISJOINT:
316 /* The timestamp is the single starting snapshot. */
317 q->result = iris_timebase_scale(devinfo, q->map->start);
318 q->result &= (1ull << TIMESTAMP_BITS) - 1;
319 break;
320 case PIPE_QUERY_TIME_ELAPSED:
321 q->result = iris_raw_timestamp_delta(q->map->start, q->map->end);
322 q->result = iris_timebase_scale(devinfo, q->result);
323 q->result &= (1ull << TIMESTAMP_BITS) - 1;
324 break;
325 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
326 q->result = stream_overflowed((void *) q->map, q->index);
327 break;
328 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
329 q->result = false;
330 for (int i = 0; i < MAX_VERTEX_STREAMS; i++)
331 q->result |= stream_overflowed((void *) q->map, i);
332 break;
333 case PIPE_QUERY_OCCLUSION_COUNTER:
334 case PIPE_QUERY_PRIMITIVES_GENERATED:
335 case PIPE_QUERY_PRIMITIVES_EMITTED:
336 case PIPE_QUERY_PIPELINE_STATISTICS:
337 default:
338 q->result = q->map->end - q->map->start;
339 break;
340 }
341
342 q->ready = true;
343 }
344
345 /*
346 * GPR0 = (GPR0 == 0) ? 0 : 1;
347 */
348 static void
349 gpr0_to_bool(struct iris_context *ice)
350 {
351 struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
352
353 ice->vtbl.load_register_imm64(batch, CS_GPR(1), 1ull);
354
355 static const uint32_t math[] = {
356 MI_MATH | (9 - 2),
357 MI_ALU2(LOAD, SRCA, R0),
358 MI_ALU1(LOAD0, SRCB),
359 MI_ALU0(ADD),
360 MI_ALU2(STOREINV, R0, ZF),
361 MI_ALU2(LOAD, SRCA, R0),
362 MI_ALU2(LOAD, SRCB, R1),
363 MI_ALU0(AND),
364 MI_ALU2(STORE, R0, ACCU),
365 };
366 iris_batch_emit(batch, math, sizeof(math));
367 }
368
369 static void
370 load_overflow_data_to_cs_gprs(struct iris_context *ice,
371 struct iris_query *q,
372 int idx)
373 {
374 struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
375
376 ice->vtbl.load_register_mem64(batch, CS_GPR(1), q->bo,
377 offsetof(struct iris_query_so_overflow,
378 stream[idx].prim_storage_needed[0]));
379 ice->vtbl.load_register_mem64(batch, CS_GPR(2), q->bo,
380 offsetof(struct iris_query_so_overflow,
381 stream[idx].prim_storage_needed[1]));
382
383 ice->vtbl.load_register_mem64(batch, CS_GPR(3), q->bo,
384 offsetof(struct iris_query_so_overflow,
385 stream[idx].num_prims[0]));
386 ice->vtbl.load_register_mem64(batch, CS_GPR(4), q->bo,
387 offsetof(struct iris_query_so_overflow,
388 stream[idx].num_prims[1]));
389 }
390
391 /*
392 * R3 = R4 - R3;
393 * R1 = R2 - R1;
394 * R1 = R3 - R1;
395 * R0 = R0 | R1;
396 */
397 static void
398 calc_overflow_for_stream(struct iris_context *ice)
399 {
400 struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
401 static const uint32_t maths[] = {
402 MI_MATH | (17 - 2),
403 MI_ALU2(LOAD, SRCA, R4),
404 MI_ALU2(LOAD, SRCB, R3),
405 MI_ALU0(SUB),
406 MI_ALU2(STORE, R3, ACCU),
407 MI_ALU2(LOAD, SRCA, R2),
408 MI_ALU2(LOAD, SRCB, R1),
409 MI_ALU0(SUB),
410 MI_ALU2(STORE, R1, ACCU),
411 MI_ALU2(LOAD, SRCA, R3),
412 MI_ALU2(LOAD, SRCB, R1),
413 MI_ALU0(SUB),
414 MI_ALU2(STORE, R1, ACCU),
415 MI_ALU2(LOAD, SRCA, R1),
416 MI_ALU2(LOAD, SRCB, R0),
417 MI_ALU0(OR),
418 MI_ALU2(STORE, R0, ACCU),
419 };
420
421 iris_batch_emit(batch, maths, sizeof(maths));
422 }
423
424 static void
425 overflow_result_to_gpr0(struct iris_context *ice, struct iris_query *q)
426 {
427 struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
428
429 ice->vtbl.load_register_imm64(batch, CS_GPR(0), 0ull);
430
431 if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) {
432 load_overflow_data_to_cs_gprs(ice, q, q->index);
433 calc_overflow_for_stream(ice);
434 } else {
435 for (int i = 0; i < MAX_VERTEX_STREAMS; i++) {
436 load_overflow_data_to_cs_gprs(ice, q, i);
437 calc_overflow_for_stream(ice);
438 }
439 }
440
441 gpr0_to_bool(ice);
442 }
443
444 /**
445 * Calculate the result and store it to CS_GPR0.
446 */
447 static void
448 calculate_result_on_gpu(struct iris_context *ice, struct iris_query *q)
449 {
450 struct iris_batch *batch = &ice->batches[q->batch_idx];
451
452 if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
453 q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
454 overflow_result_to_gpr0(ice, q);
455 return;
456 }
457
458 ice->vtbl.load_register_mem64(batch, CS_GPR(1), q->bo,
459 offsetof(struct iris_query_snapshots, start));
460 ice->vtbl.load_register_mem64(batch, CS_GPR(2), q->bo,
461 offsetof(struct iris_query_snapshots, end));
462
463 static const uint32_t math[] = {
464 MI_MATH | (5 - 2),
465 MI_ALU2(LOAD, SRCA, R2),
466 MI_ALU2(LOAD, SRCB, R1),
467 MI_ALU0(SUB),
468 MI_ALU2(STORE, R0, ACCU),
469 };
470 iris_batch_emit(batch, math, sizeof(math));
471
472 if (q->type == PIPE_QUERY_OCCLUSION_PREDICATE ||
473 q->type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE)
474 gpr0_to_bool(ice);
475 }
476
477 static struct pipe_query *
478 iris_create_query(struct pipe_context *ctx,
479 unsigned query_type,
480 unsigned index)
481 {
482 struct iris_query *q = calloc(1, sizeof(struct iris_query));
483
484 q->type = query_type;
485 q->index = index;
486
487 if (q->type == PIPE_QUERY_PIPELINE_STATISTICS && q->index == 10)
488 q->batch_idx = IRIS_BATCH_COMPUTE;
489 else
490 q->batch_idx = IRIS_BATCH_RENDER;
491 return (struct pipe_query *) q;
492 }
493
494 static void
495 iris_destroy_query(struct pipe_context *ctx, struct pipe_query *p_query)
496 {
497 struct iris_query *query = (void *) p_query;
498 iris_bo_unreference(query->bo);
499 free(query);
500 }
501
502
503 static boolean
504 iris_begin_query(struct pipe_context *ctx, struct pipe_query *query)
505 {
506 struct iris_screen *screen = (void *) ctx->screen;
507 struct iris_context *ice = (void *) ctx;
508 struct iris_query *q = (void *) query;
509
510 iris_bo_unreference(q->bo);
511 q->bo = iris_bo_alloc(screen->bufmgr, "query object", 4096,
512 IRIS_MEMZONE_OTHER);
513 if (!q->bo)
514 return false;
515
516 q->map = iris_bo_map(&ice->dbg, q->bo, MAP_READ | MAP_WRITE | MAP_ASYNC);
517 if (!q->map)
518 return false;
519
520 q->result = 0ull;
521 q->ready = false;
522 q->map->snapshots_landed = false;
523
524 if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {
525 ice->state.prims_generated_query_active = true;
526 ice->state.dirty |= IRIS_DIRTY_STREAMOUT | IRIS_DIRTY_CLIP;
527 }
528
529 if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
530 q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
531 write_overflow_values(ice, q, false);
532 else
533 write_value(ice, q, offsetof(struct iris_query_snapshots, start));
534
535 return true;
536 }
537
538 static bool
539 iris_end_query(struct pipe_context *ctx, struct pipe_query *query)
540 {
541 struct iris_context *ice = (void *) ctx;
542 struct iris_query *q = (void *) query;
543
544 if (q->type == PIPE_QUERY_TIMESTAMP) {
545 iris_begin_query(ctx, query);
546 mark_available(ice, q);
547 return true;
548 }
549
550 if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {
551 ice->state.prims_generated_query_active = false;
552 ice->state.dirty |= IRIS_DIRTY_STREAMOUT | IRIS_DIRTY_CLIP;
553 }
554
555 if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
556 q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
557 write_overflow_values(ice, q, true);
558 else
559 write_value(ice, q, offsetof(struct iris_query_snapshots, end));
560 mark_available(ice, q);
561
562 return true;
563 }
564
565 /**
566 * See if the snapshots have landed for a query, and if so, compute the
567 * result and mark it ready. Does not flush (unlike iris_get_query_result).
568 */
569 static void
570 iris_check_query_no_flush(struct iris_context *ice, struct iris_query *q)
571 {
572 struct iris_screen *screen = (void *) ice->ctx.screen;
573 const struct gen_device_info *devinfo = &screen->devinfo;
574
575 if (!q->ready && q->map->snapshots_landed) {
576 calculate_result_on_cpu(devinfo, q);
577 }
578 }
579
580 static boolean
581 iris_get_query_result(struct pipe_context *ctx,
582 struct pipe_query *query,
583 boolean wait,
584 union pipe_query_result *result)
585 {
586 struct iris_context *ice = (void *) ctx;
587 struct iris_query *q = (void *) query;
588 struct iris_screen *screen = (void *) ctx->screen;
589 const struct gen_device_info *devinfo = &screen->devinfo;
590
591 if (!q->ready) {
592 if (iris_batch_references(&ice->batches[q->batch_idx], q->bo))
593 iris_batch_flush(&ice->batches[q->batch_idx]);
594
595 if (!q->map->snapshots_landed) {
596 if (wait)
597 iris_bo_wait_rendering(q->bo);
598 else
599 return false;
600 }
601
602 assert(q->map->snapshots_landed);
603 calculate_result_on_cpu(devinfo, q);
604 }
605
606 assert(q->ready);
607
608 if (q->type == PIPE_QUERY_PIPELINE_STATISTICS) {
609 switch (q->index) {
610 case 0:
611 result->pipeline_statistics.ia_vertices = q->result;
612 break;
613 case 1:
614 result->pipeline_statistics.ia_primitives = q->result;
615 break;
616 case 2:
617 result->pipeline_statistics.vs_invocations = q->result;
618 break;
619 case 3:
620 result->pipeline_statistics.gs_invocations = q->result;
621 break;
622 case 4:
623 result->pipeline_statistics.gs_primitives = q->result;
624 break;
625 case 5:
626 result->pipeline_statistics.c_invocations = q->result;
627 break;
628 case 6:
629 result->pipeline_statistics.c_primitives = q->result;
630 break;
631 case 7:
632 result->pipeline_statistics.ps_invocations = q->result;
633 break;
634 case 8:
635 result->pipeline_statistics.hs_invocations = q->result;
636 break;
637 case 9:
638 result->pipeline_statistics.ds_invocations = q->result;
639 break;
640 case 10:
641 result->pipeline_statistics.cs_invocations = q->result;
642 break;
643 }
644 } else {
645 result->u64 = q->result;
646 }
647
648 return true;
649 }
650
651 static void
652 iris_get_query_result_resource(struct pipe_context *ctx,
653 struct pipe_query *query,
654 boolean wait,
655 enum pipe_query_value_type result_type,
656 int index,
657 struct pipe_resource *p_res,
658 unsigned offset)
659 {
660 struct iris_context *ice = (void *) ctx;
661 struct iris_query *q = (void *) query;
662 struct iris_batch *batch = &ice->batches[q->batch_idx];
663 const struct gen_device_info *devinfo = &batch->screen->devinfo;
664 struct iris_resource *res = (void *) p_res;
665 unsigned snapshots_landed_offset =
666 offsetof(struct iris_query_snapshots, snapshots_landed);
667
668 res->bind_history |= PIPE_BIND_QUERY_BUFFER;
669
670 if (index == -1) {
671 /* They're asking for the availability of the result. If we still
672 * have commands queued up which produce the result, submit them
673 * now so that progress happens. Either way, copy the snapshots
674 * landed field to the destination resource.
675 */
676 if (iris_batch_references(batch, q->bo))
677 iris_batch_flush(batch);
678
679 ice->vtbl.copy_mem_mem(batch, iris_resource_bo(p_res), offset,
680 q->bo, snapshots_landed_offset,
681 result_type <= PIPE_QUERY_TYPE_U32 ? 4 : 8);
682 return;
683 }
684
685 if (!q->ready && q->map->snapshots_landed) {
686 /* The final snapshots happen to have landed, so let's just compute
687 * the result on the CPU now...
688 */
689 calculate_result_on_cpu(devinfo, q);
690 }
691
692 if (q->ready) {
693 /* We happen to have the result on the CPU, so just copy it. */
694 if (result_type <= PIPE_QUERY_TYPE_U32) {
695 ice->vtbl.store_data_imm32(batch, iris_resource_bo(p_res), offset,
696 q->result);
697 } else {
698 ice->vtbl.store_data_imm64(batch, iris_resource_bo(p_res), offset,
699 q->result);
700 }
701
702 /* Make sure the result lands before they use bind the QBO elsewhere
703 * and use the result.
704 */
705 // XXX: Why? i965 doesn't do this.
706 iris_emit_pipe_control_flush(batch, PIPE_CONTROL_CS_STALL);
707 return;
708 }
709
710 /* Calculate the result to CS_GPR0 */
711 calculate_result_on_gpu(ice, q);
712
713 bool predicated = !wait && !q->stalled;
714
715 if (predicated) {
716 ice->vtbl.load_register_imm64(batch, MI_PREDICATE_SRC1, 0ull);
717 ice->vtbl.load_register_mem64(batch, MI_PREDICATE_SRC0, q->bo,
718 snapshots_landed_offset);
719 uint32_t predicate = MI_PREDICATE |
720 MI_PREDICATE_LOADOP_LOADINV |
721 MI_PREDICATE_COMBINEOP_SET |
722 MI_PREDICATE_COMPAREOP_SRCS_EQUAL;
723 iris_batch_emit(batch, &predicate, sizeof(uint32_t));
724 }
725
726 if (result_type <= PIPE_QUERY_TYPE_U32) {
727 ice->vtbl.store_register_mem32(batch, CS_GPR(0),
728 iris_resource_bo(p_res),
729 offset, predicated);
730 } else {
731 ice->vtbl.store_register_mem64(batch, CS_GPR(0),
732 iris_resource_bo(p_res),
733 offset, predicated);
734 }
735 }
736
737 static void
738 iris_set_active_query_state(struct pipe_context *ctx, boolean enable)
739 {
740 struct iris_context *ice = (void *) ctx;
741
742 if (ice->state.statistics_counters_enabled == enable)
743 return;
744
745 // XXX: most packets aren't paying attention to this yet, because it'd
746 // have to be done dynamically at draw time, which is a pain
747 ice->state.statistics_counters_enabled = enable;
748 ice->state.dirty |= IRIS_DIRTY_CLIP |
749 IRIS_DIRTY_GS |
750 IRIS_DIRTY_RASTER |
751 IRIS_DIRTY_STREAMOUT |
752 IRIS_DIRTY_TCS |
753 IRIS_DIRTY_TES |
754 IRIS_DIRTY_VS |
755 IRIS_DIRTY_WM;
756 }
757
758 static void
759 set_predicate_enable(struct iris_context *ice, bool value)
760 {
761 if (value)
762 ice->state.predicate = IRIS_PREDICATE_STATE_RENDER;
763 else
764 ice->state.predicate = IRIS_PREDICATE_STATE_DONT_RENDER;
765 }
766
767 static void
768 set_predicate_for_result(struct iris_context *ice,
769 struct iris_query *q,
770 bool inverted)
771 {
772 struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
773
774 /* The CPU doesn't have the query result yet; use hardware predication */
775 ice->state.predicate = IRIS_PREDICATE_STATE_USE_BIT;
776
777 /* Ensure the memory is coherent for MI_LOAD_REGISTER_* commands. */
778 iris_emit_pipe_control_flush(batch, PIPE_CONTROL_FLUSH_ENABLE);
779 q->stalled = true;
780
781 switch (q->type) {
782 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
783 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
784 overflow_result_to_gpr0(ice, q);
785
786 ice->vtbl.load_register_reg64(batch, MI_PREDICATE_SRC0, CS_GPR(0));
787 ice->vtbl.load_register_imm64(batch, MI_PREDICATE_SRC1, 0ull);
788 break;
789 default:
790 /* PIPE_QUERY_OCCLUSION_* */
791 ice->vtbl.load_register_mem64(batch, MI_PREDICATE_SRC0, q->bo,
792 offsetof(struct iris_query_snapshots, start));
793 ice->vtbl.load_register_mem64(batch, MI_PREDICATE_SRC1, q->bo,
794 offsetof(struct iris_query_snapshots, end));
795 break;
796 }
797
798 uint32_t mi_predicate = MI_PREDICATE |
799 MI_PREDICATE_COMBINEOP_SET |
800 MI_PREDICATE_COMPAREOP_SRCS_EQUAL |
801 (inverted ? MI_PREDICATE_LOADOP_LOAD
802 : MI_PREDICATE_LOADOP_LOADINV);
803 iris_batch_emit(batch, &mi_predicate, sizeof(uint32_t));
804
805 /* We immediately set the predicate on the render batch, as all the
806 * counters come from 3D operations. However, we may need to predicate
807 * a compute dispatch, which executes in a different GEM context and has
808 * a different MI_PREDICATE_DATA register. So, we save the result to
809 * memory and reload it in iris_launch_grid.
810 */
811 unsigned offset = offsetof(struct iris_query_snapshots, predicate_data);
812 ice->vtbl.store_register_mem64(batch, MI_PREDICATE_DATA,
813 q->bo, offset, false);
814 ice->state.compute_predicate = q->bo;
815 }
816
817 static void
818 iris_render_condition(struct pipe_context *ctx,
819 struct pipe_query *query,
820 boolean condition,
821 enum pipe_render_cond_flag mode)
822 {
823 struct iris_context *ice = (void *) ctx;
824 struct iris_query *q = (void *) query;
825
826 if (!q) {
827 ice->state.predicate = IRIS_PREDICATE_STATE_RENDER;
828 return;
829 }
830
831 iris_check_query_no_flush(ice, q);
832
833 if (q->result || q->ready) {
834 set_predicate_enable(ice, (q->result != 0) ^ condition);
835 } else {
836 if (mode == PIPE_RENDER_COND_NO_WAIT ||
837 mode == PIPE_RENDER_COND_BY_REGION_NO_WAIT) {
838 perf_debug(&ice->dbg, "Conditional rendering demoted from "
839 "\"no wait\" to \"wait\".");
840 }
841 set_predicate_for_result(ice, q, condition);
842 }
843 }
844
845 void
846 iris_init_query_functions(struct pipe_context *ctx)
847 {
848 ctx->create_query = iris_create_query;
849 ctx->destroy_query = iris_destroy_query;
850 ctx->begin_query = iris_begin_query;
851 ctx->end_query = iris_end_query;
852 ctx->get_query_result = iris_get_query_result;
853 ctx->get_query_result_resource = iris_get_query_result_resource;
854 ctx->set_active_query_state = iris_set_active_query_state;
855 ctx->render_condition = iris_render_condition;
856 }