iris: execute compute related query on compute batch.
[mesa.git] / src / gallium / drivers / iris / iris_query.c
1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23 /**
24 * @file iris_query.c
25 *
26 * Query object support. This allows measuring various simple statistics
27 * via counters on the GPU.
28 */
29
30 #include <stdio.h>
31 #include <errno.h>
32 #include "pipe/p_defines.h"
33 #include "pipe/p_state.h"
34 #include "pipe/p_context.h"
35 #include "pipe/p_screen.h"
36 #include "util/u_inlines.h"
37 #include "iris_context.h"
38 #include "iris_defines.h"
39 #include "iris_resource.h"
40 #include "iris_screen.h"
41
42 #define IA_VERTICES_COUNT 0x2310
43 #define IA_PRIMITIVES_COUNT 0x2318
44 #define VS_INVOCATION_COUNT 0x2320
45 #define HS_INVOCATION_COUNT 0x2300
46 #define DS_INVOCATION_COUNT 0x2308
47 #define GS_INVOCATION_COUNT 0x2328
48 #define GS_PRIMITIVES_COUNT 0x2330
49 #define CL_INVOCATION_COUNT 0x2338
50 #define CL_PRIMITIVES_COUNT 0x2340
51 #define PS_INVOCATION_COUNT 0x2348
52 #define CS_INVOCATION_COUNT 0x2290
53 #define PS_DEPTH_COUNT 0x2350
54
55 #define SO_PRIM_STORAGE_NEEDED(n) (0x5240 + (n) * 8)
56
57 #define SO_NUM_PRIMS_WRITTEN(n) (0x5200 + (n) * 8)
58
59 #define CS_GPR(n) (0x2600 + (n) * 8)
60
61 #define MI_MATH (0x1a << 23)
62
63 #define MI_ALU_LOAD 0x080
64 #define MI_ALU_LOADINV 0x480
65 #define MI_ALU_LOAD0 0x081
66 #define MI_ALU_LOAD1 0x481
67 #define MI_ALU_ADD 0x100
68 #define MI_ALU_SUB 0x101
69 #define MI_ALU_AND 0x102
70 #define MI_ALU_OR 0x103
71 #define MI_ALU_XOR 0x104
72 #define MI_ALU_STORE 0x180
73 #define MI_ALU_STOREINV 0x580
74
75 #define MI_ALU_R0 0x00
76 #define MI_ALU_R1 0x01
77 #define MI_ALU_R2 0x02
78 #define MI_ALU_R3 0x03
79 #define MI_ALU_R4 0x04
80 #define MI_ALU_SRCA 0x20
81 #define MI_ALU_SRCB 0x21
82 #define MI_ALU_ACCU 0x31
83 #define MI_ALU_ZF 0x32
84 #define MI_ALU_CF 0x33
85
86 #define MI_ALU0(op) ((MI_ALU_##op << 20))
87 #define MI_ALU1(op, x) ((MI_ALU_##op << 20) | (MI_ALU_##x << 10))
88 #define MI_ALU2(op, x, y) \
89 ((MI_ALU_##op << 20) | (MI_ALU_##x << 10) | (MI_ALU_##y))
90
91 struct iris_query {
92 enum pipe_query_type type;
93 int index;
94
95 bool ready;
96
97 uint64_t result;
98
99 struct iris_bo *bo;
100 struct iris_query_snapshots *map;
101
102 int batch_idx;
103 };
104
105 struct iris_query_snapshots {
106 uint64_t snapshots_landed;
107 uint64_t start;
108 uint64_t end;
109 };
110
111 struct iris_query_so_overflow {
112 uint64_t snapshots_landed;
113 struct {
114 uint64_t prim_storage_needed[2];
115 uint64_t num_prims[2];
116 } stream[4];
117 };
118
119 /**
120 * Is this type of query written by PIPE_CONTROL?
121 */
122 static bool
123 iris_is_query_pipelined(struct iris_query *q)
124 {
125 switch (q->type) {
126 case PIPE_QUERY_OCCLUSION_COUNTER:
127 case PIPE_QUERY_OCCLUSION_PREDICATE:
128 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
129 case PIPE_QUERY_TIMESTAMP:
130 case PIPE_QUERY_TIMESTAMP_DISJOINT:
131 case PIPE_QUERY_TIME_ELAPSED:
132 return true;
133
134 default:
135 return false;
136 }
137 }
138
139 static void
140 mark_available(struct iris_context *ice, struct iris_query *q)
141 {
142 struct iris_batch *batch = &ice->batches[q->batch_idx];
143 unsigned flags = PIPE_CONTROL_WRITE_IMMEDIATE;
144 unsigned offset = offsetof(struct iris_query_snapshots, snapshots_landed);
145
146 if (!iris_is_query_pipelined(q)) {
147 ice->vtbl.store_data_imm64(batch, q->bo, offset, true);
148 } else {
149 /* Order available *after* the query results. */
150 flags |= PIPE_CONTROL_FLUSH_ENABLE;
151 iris_emit_pipe_control_write(batch, flags, q->bo, offset, true);
152 }
153 }
154
155 /**
156 * Write PS_DEPTH_COUNT to q->(dest) via a PIPE_CONTROL.
157 */
158 static void
159 iris_pipelined_write(struct iris_batch *batch,
160 struct iris_query *q,
161 enum pipe_control_flags flags,
162 unsigned offset)
163 {
164 const struct gen_device_info *devinfo = &batch->screen->devinfo;
165 const unsigned optional_cs_stall =
166 devinfo->gen == 9 && devinfo->gt == 4 ? PIPE_CONTROL_CS_STALL : 0;
167
168 iris_emit_pipe_control_write(batch, flags | optional_cs_stall,
169 q->bo, offset, 0ull);
170 }
171
172 static void
173 write_value(struct iris_context *ice, struct iris_query *q, unsigned offset)
174 {
175 struct iris_batch *batch = &ice->batches[q->batch_idx];
176 const struct gen_device_info *devinfo = &batch->screen->devinfo;
177
178 if (!iris_is_query_pipelined(q)) {
179 iris_emit_pipe_control_flush(batch,
180 PIPE_CONTROL_CS_STALL |
181 PIPE_CONTROL_STALL_AT_SCOREBOARD);
182 }
183
184 switch (q->type) {
185 case PIPE_QUERY_OCCLUSION_COUNTER:
186 case PIPE_QUERY_OCCLUSION_PREDICATE:
187 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
188 if (devinfo->gen >= 10) {
189 /* "Driver must program PIPE_CONTROL with only Depth Stall Enable
190 * bit set prior to programming a PIPE_CONTROL with Write PS Depth
191 * Count sync operation."
192 */
193 iris_emit_pipe_control_flush(batch, PIPE_CONTROL_DEPTH_STALL);
194 }
195 iris_pipelined_write(&ice->batches[IRIS_BATCH_RENDER], q,
196 PIPE_CONTROL_WRITE_DEPTH_COUNT |
197 PIPE_CONTROL_DEPTH_STALL,
198 offset);
199 break;
200 case PIPE_QUERY_TIME_ELAPSED:
201 case PIPE_QUERY_TIMESTAMP:
202 case PIPE_QUERY_TIMESTAMP_DISJOINT:
203 iris_pipelined_write(&ice->batches[IRIS_BATCH_RENDER], q,
204 PIPE_CONTROL_WRITE_TIMESTAMP,
205 offset);
206 break;
207 case PIPE_QUERY_PRIMITIVES_GENERATED:
208 ice->vtbl.store_register_mem64(batch,
209 q->index == 0 ? CL_INVOCATION_COUNT :
210 SO_PRIM_STORAGE_NEEDED(q->index),
211 q->bo, offset, false);
212 break;
213 case PIPE_QUERY_PRIMITIVES_EMITTED:
214 ice->vtbl.store_register_mem64(batch,
215 SO_NUM_PRIMS_WRITTEN(q->index),
216 q->bo, offset, false);
217 break;
218 case PIPE_QUERY_PIPELINE_STATISTICS: {
219 static const uint32_t index_to_reg[] = {
220 IA_VERTICES_COUNT,
221 IA_PRIMITIVES_COUNT,
222 VS_INVOCATION_COUNT,
223 GS_INVOCATION_COUNT,
224 GS_PRIMITIVES_COUNT,
225 CL_INVOCATION_COUNT,
226 CL_PRIMITIVES_COUNT,
227 PS_INVOCATION_COUNT,
228 HS_INVOCATION_COUNT,
229 DS_INVOCATION_COUNT,
230 CS_INVOCATION_COUNT,
231 };
232 const uint32_t reg = index_to_reg[q->index];
233
234 ice->vtbl.store_register_mem64(batch, reg, q->bo, offset, false);
235 break;
236 }
237 default:
238 assert(false);
239 }
240 }
241
242 static void
243 write_overflow_values(struct iris_context *ice, struct iris_query *q, bool end)
244 {
245 struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
246 uint32_t count = q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ? 1 : 4;
247
248 iris_emit_pipe_control_flush(batch,
249 PIPE_CONTROL_CS_STALL |
250 PIPE_CONTROL_STALL_AT_SCOREBOARD);
251 for (uint32_t i = 0; i < count; i++) {
252 int s = q->index + i;
253 int g_idx = offsetof(struct iris_query_so_overflow,
254 stream[s].num_prims[end]);
255 int w_idx = offsetof(struct iris_query_so_overflow,
256 stream[s].prim_storage_needed[end]);
257 ice->vtbl.store_register_mem64(batch, SO_NUM_PRIMS_WRITTEN(s),
258 q->bo, g_idx, false);
259 ice->vtbl.store_register_mem64(batch, SO_PRIM_STORAGE_NEEDED(s),
260 q->bo, w_idx, false);
261 }
262 }
263
264 uint64_t
265 iris_timebase_scale(const struct gen_device_info *devinfo,
266 uint64_t gpu_timestamp)
267 {
268 return (1000000000ull * gpu_timestamp) / devinfo->timestamp_frequency;
269 }
270
271 static uint64_t
272 iris_raw_timestamp_delta(uint64_t time0, uint64_t time1)
273 {
274 if (time0 > time1) {
275 return (1ULL << TIMESTAMP_BITS) + time1 - time0;
276 } else {
277 return time1 - time0;
278 }
279 }
280
281 static bool
282 stream_overflowed(struct iris_query_so_overflow *so, int s)
283 {
284 return (so->stream[s].prim_storage_needed[1] -
285 so->stream[s].prim_storage_needed[0]) !=
286 (so->stream[s].num_prims[1] - so->stream[s].num_prims[0]);
287 }
288
289 static void
290 calculate_result_on_cpu(const struct gen_device_info *devinfo,
291 struct iris_query *q)
292 {
293 switch (q->type) {
294 case PIPE_QUERY_OCCLUSION_PREDICATE:
295 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
296 q->result = q->map->end != q->map->start;
297 break;
298 case PIPE_QUERY_TIMESTAMP:
299 case PIPE_QUERY_TIMESTAMP_DISJOINT:
300 /* The timestamp is the single starting snapshot. */
301 q->result = iris_timebase_scale(devinfo, q->map->start);
302 q->result &= (1ull << TIMESTAMP_BITS) - 1;
303 break;
304 case PIPE_QUERY_TIME_ELAPSED:
305 q->result = iris_raw_timestamp_delta(q->map->start, q->map->end);
306 q->result = iris_timebase_scale(devinfo, q->result);
307 q->result &= (1ull << TIMESTAMP_BITS) - 1;
308 break;
309 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
310 q->result = stream_overflowed((void *) q->map, q->index);
311 break;
312 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
313 q->result = false;
314 for (int i = 0; i < MAX_VERTEX_STREAMS; i++)
315 q->result |= stream_overflowed((void *) q->map, i);
316 break;
317 case PIPE_QUERY_OCCLUSION_COUNTER:
318 case PIPE_QUERY_PRIMITIVES_GENERATED:
319 case PIPE_QUERY_PRIMITIVES_EMITTED:
320 case PIPE_QUERY_PIPELINE_STATISTICS:
321 default:
322 q->result = q->map->end - q->map->start;
323 break;
324 }
325
326 q->ready = true;
327 }
328
329 /*
330 * GPR0 = (GPR0 == 0) ? 0 : 1;
331 */
332 static void
333 gpr0_to_bool(struct iris_context *ice)
334 {
335 struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
336
337 ice->vtbl.load_register_imm64(batch, CS_GPR(1), 1ull);
338
339 static const uint32_t math[] = {
340 MI_MATH | (9 - 2),
341 MI_ALU2(LOAD, SRCA, R0),
342 MI_ALU1(LOAD0, SRCB),
343 MI_ALU0(ADD),
344 MI_ALU2(STOREINV, R0, ZF),
345 MI_ALU2(LOAD, SRCA, R0),
346 MI_ALU2(LOAD, SRCB, R1),
347 MI_ALU0(AND),
348 MI_ALU2(STORE, R0, ACCU),
349 };
350 iris_batch_emit(batch, math, sizeof(math));
351 }
352
353 static void
354 load_overflow_data_to_cs_gprs(struct iris_context *ice,
355 struct iris_query *q,
356 int idx)
357 {
358 struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
359
360 ice->vtbl.load_register_mem64(batch, CS_GPR(1), q->bo,
361 offsetof(struct iris_query_so_overflow,
362 stream[idx].prim_storage_needed[0]));
363 ice->vtbl.load_register_mem64(batch, CS_GPR(2), q->bo,
364 offsetof(struct iris_query_so_overflow,
365 stream[idx].prim_storage_needed[1]));
366
367 ice->vtbl.load_register_mem64(batch, CS_GPR(3), q->bo,
368 offsetof(struct iris_query_so_overflow,
369 stream[idx].num_prims[0]));
370 ice->vtbl.load_register_mem64(batch, CS_GPR(4), q->bo,
371 offsetof(struct iris_query_so_overflow,
372 stream[idx].num_prims[1]));
373 }
374
375 /*
376 * R3 = R4 - R3;
377 * R1 = R2 - R1;
378 * R1 = R3 - R1;
379 * R0 = R0 | R1;
380 */
381 static void
382 calc_overflow_for_stream(struct iris_context *ice)
383 {
384 struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
385 static const uint32_t maths[] = {
386 MI_MATH | (17 - 2),
387 MI_ALU2(LOAD, SRCA, R4),
388 MI_ALU2(LOAD, SRCB, R3),
389 MI_ALU0(SUB),
390 MI_ALU2(STORE, R3, ACCU),
391 MI_ALU2(LOAD, SRCA, R2),
392 MI_ALU2(LOAD, SRCB, R1),
393 MI_ALU0(SUB),
394 MI_ALU2(STORE, R1, ACCU),
395 MI_ALU2(LOAD, SRCA, R3),
396 MI_ALU2(LOAD, SRCB, R1),
397 MI_ALU0(SUB),
398 MI_ALU2(STORE, R1, ACCU),
399 MI_ALU2(LOAD, SRCA, R1),
400 MI_ALU2(LOAD, SRCB, R0),
401 MI_ALU0(OR),
402 MI_ALU2(STORE, R0, ACCU),
403 };
404
405 iris_batch_emit(batch, maths, sizeof(maths));
406 }
407
408 static void
409 overflow_result_to_gpr0(struct iris_context *ice, struct iris_query *q)
410 {
411 struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
412
413 ice->vtbl.load_register_imm64(batch, CS_GPR(0), 0ull);
414
415 if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) {
416 load_overflow_data_to_cs_gprs(ice, q, q->index);
417 calc_overflow_for_stream(ice);
418 } else {
419 for (int i = 0; i < MAX_VERTEX_STREAMS; i++) {
420 load_overflow_data_to_cs_gprs(ice, q, i);
421 calc_overflow_for_stream(ice);
422 }
423 }
424
425 gpr0_to_bool(ice);
426 }
427
428 /**
429 * Calculate the result and store it to CS_GPR0.
430 */
431 static void
432 calculate_result_on_gpu(struct iris_context *ice, struct iris_query *q)
433 {
434 struct iris_batch *batch = &ice->batches[q->batch_idx];
435
436 if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
437 q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
438 overflow_result_to_gpr0(ice, q);
439 return;
440 }
441
442 ice->vtbl.load_register_mem64(batch, CS_GPR(1), q->bo,
443 offsetof(struct iris_query_snapshots, start));
444 ice->vtbl.load_register_mem64(batch, CS_GPR(2), q->bo,
445 offsetof(struct iris_query_snapshots, end));
446
447 static const uint32_t math[] = {
448 MI_MATH | (5 - 2),
449 MI_ALU2(LOAD, SRCA, R2),
450 MI_ALU2(LOAD, SRCB, R1),
451 MI_ALU0(SUB),
452 MI_ALU2(STORE, R0, ACCU),
453 };
454 iris_batch_emit(batch, math, sizeof(math));
455
456 if (q->type == PIPE_QUERY_OCCLUSION_PREDICATE ||
457 q->type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE)
458 gpr0_to_bool(ice);
459 }
460
461 static struct pipe_query *
462 iris_create_query(struct pipe_context *ctx,
463 unsigned query_type,
464 unsigned index)
465 {
466 struct iris_query *q = calloc(1, sizeof(struct iris_query));
467
468 q->type = query_type;
469 q->index = index;
470
471 if (q->type == PIPE_QUERY_PIPELINE_STATISTICS && q->index == 10)
472 q->batch_idx = IRIS_BATCH_COMPUTE;
473 else
474 q->batch_idx = IRIS_BATCH_RENDER;
475 return (struct pipe_query *) q;
476 }
477
478 static void
479 iris_destroy_query(struct pipe_context *ctx, struct pipe_query *p_query)
480 {
481 struct iris_query *query = (void *) p_query;
482 iris_bo_unreference(query->bo);
483 free(query);
484 }
485
486
487 static boolean
488 iris_begin_query(struct pipe_context *ctx, struct pipe_query *query)
489 {
490 struct iris_screen *screen = (void *) ctx->screen;
491 struct iris_context *ice = (void *) ctx;
492 struct iris_query *q = (void *) query;
493
494 iris_bo_unreference(q->bo);
495 q->bo = iris_bo_alloc(screen->bufmgr, "query object", 4096,
496 IRIS_MEMZONE_OTHER);
497 if (!q->bo)
498 return false;
499
500 q->map = iris_bo_map(&ice->dbg, q->bo, MAP_READ | MAP_WRITE | MAP_ASYNC);
501 if (!q->map)
502 return false;
503
504 q->result = 0ull;
505 q->ready = false;
506 q->map->snapshots_landed = false;
507
508 if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {
509 ice->state.prims_generated_query_active = true;
510 ice->state.dirty |= IRIS_DIRTY_STREAMOUT;
511 }
512
513 if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
514 q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
515 write_overflow_values(ice, q, false);
516 else
517 write_value(ice, q, offsetof(struct iris_query_snapshots, start));
518
519 return true;
520 }
521
522 static bool
523 iris_end_query(struct pipe_context *ctx, struct pipe_query *query)
524 {
525 struct iris_context *ice = (void *) ctx;
526 struct iris_query *q = (void *) query;
527
528 if (q->type == PIPE_QUERY_TIMESTAMP) {
529 iris_begin_query(ctx, query);
530 mark_available(ice, q);
531 return true;
532 }
533
534 if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {
535 ice->state.prims_generated_query_active = true;
536 ice->state.dirty |= IRIS_DIRTY_STREAMOUT;
537 }
538
539 if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
540 q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
541 write_overflow_values(ice, q, true);
542 else
543 write_value(ice, q, offsetof(struct iris_query_snapshots, end));
544 mark_available(ice, q);
545
546 return true;
547 }
548
549 static boolean
550 iris_get_query_result(struct pipe_context *ctx,
551 struct pipe_query *query,
552 boolean wait,
553 union pipe_query_result *result)
554 {
555 struct iris_context *ice = (void *) ctx;
556 struct iris_query *q = (void *) query;
557 struct iris_screen *screen = (void *) ctx->screen;
558 const struct gen_device_info *devinfo = &screen->devinfo;
559
560 if (!q->ready) {
561 if (iris_batch_references(&ice->batches[q->batch_idx], q->bo))
562 iris_batch_flush(&ice->batches[q->batch_idx]);
563
564 if (!q->map->snapshots_landed) {
565 if (wait)
566 iris_bo_wait_rendering(q->bo);
567 else
568 return false;
569 }
570
571 assert(q->map->snapshots_landed);
572 calculate_result_on_cpu(devinfo, q);
573 }
574
575 assert(q->ready);
576
577 if (q->type == PIPE_QUERY_PIPELINE_STATISTICS) {
578 switch (q->index) {
579 case 0:
580 result->pipeline_statistics.ia_vertices = q->result;
581 break;
582 case 1:
583 result->pipeline_statistics.ia_primitives = q->result;
584 break;
585 case 2:
586 result->pipeline_statistics.vs_invocations = q->result;
587 break;
588 case 3:
589 result->pipeline_statistics.gs_invocations = q->result;
590 break;
591 case 4:
592 result->pipeline_statistics.gs_primitives = q->result;
593 break;
594 case 5:
595 result->pipeline_statistics.c_invocations = q->result;
596 break;
597 case 6:
598 result->pipeline_statistics.c_primitives = q->result;
599 break;
600 case 7:
601 result->pipeline_statistics.ps_invocations = q->result;
602 break;
603 case 8:
604 result->pipeline_statistics.hs_invocations = q->result;
605 break;
606 case 9:
607 result->pipeline_statistics.ds_invocations = q->result;
608 break;
609 case 10:
610 result->pipeline_statistics.cs_invocations = q->result;
611 break;
612 }
613 } else {
614 result->u64 = q->result;
615 }
616
617 return true;
618 }
619
620 static void
621 iris_get_query_result_resource(struct pipe_context *ctx,
622 struct pipe_query *query,
623 boolean wait,
624 enum pipe_query_value_type result_type,
625 int index,
626 struct pipe_resource *p_res,
627 unsigned offset)
628 {
629 struct iris_context *ice = (void *) ctx;
630 struct iris_query *q = (void *) query;
631 struct iris_batch *batch = &ice->batches[q->batch_idx];
632 const struct gen_device_info *devinfo = &batch->screen->devinfo;
633 struct iris_resource *res = (void *) p_res;
634 unsigned snapshots_landed_offset =
635 offsetof(struct iris_query_snapshots, snapshots_landed);
636
637 res->bind_history |= PIPE_BIND_QUERY_BUFFER;
638
639 if (index == -1) {
640 /* They're asking for the availability of the result. If we still
641 * have commands queued up which produce the result, submit them
642 * now so that progress happens. Either way, copy the snapshots
643 * landed field to the destination resource.
644 */
645 if (iris_batch_references(batch, q->bo))
646 iris_batch_flush(batch);
647
648 ice->vtbl.copy_mem_mem(batch, iris_resource_bo(p_res), offset,
649 q->bo, snapshots_landed_offset,
650 result_type <= PIPE_QUERY_TYPE_U32 ? 4 : 8);
651 return;
652 }
653
654 if (!q->ready && q->map->snapshots_landed) {
655 /* The final snapshots happen to have landed, so let's just compute
656 * the result on the CPU now...
657 */
658 calculate_result_on_cpu(devinfo, q);
659 }
660
661 if (q->ready) {
662 /* We happen to have the result on the CPU, so just copy it. */
663 if (result_type <= PIPE_QUERY_TYPE_U32) {
664 ice->vtbl.store_data_imm32(batch, iris_resource_bo(p_res), offset,
665 q->result);
666 } else {
667 ice->vtbl.store_data_imm64(batch, iris_resource_bo(p_res), offset,
668 q->result);
669 }
670
671 /* Make sure the result lands before they use bind the QBO elsewhere
672 * and use the result.
673 */
674 // XXX: Why? i965 doesn't do this.
675 iris_emit_pipe_control_flush(batch, PIPE_CONTROL_CS_STALL);
676 return;
677 }
678
679 /* Calculate the result to CS_GPR0 */
680 calculate_result_on_gpu(ice, q);
681
682 bool predicated = !wait && iris_is_query_pipelined(q);
683
684 if (predicated) {
685 ice->vtbl.load_register_imm64(batch, MI_PREDICATE_SRC1, 0ull);
686 ice->vtbl.load_register_mem64(batch, MI_PREDICATE_SRC0, q->bo,
687 snapshots_landed_offset);
688 uint32_t predicate = MI_PREDICATE |
689 MI_PREDICATE_LOADOP_LOADINV |
690 MI_PREDICATE_COMBINEOP_SET |
691 MI_PREDICATE_COMPAREOP_SRCS_EQUAL;
692 iris_batch_emit(batch, &predicate, sizeof(uint32_t));
693 }
694
695 if (result_type <= PIPE_QUERY_TYPE_U32) {
696 ice->vtbl.store_register_mem32(batch, CS_GPR(0),
697 iris_resource_bo(p_res),
698 offset, predicated);
699 } else {
700 ice->vtbl.store_register_mem64(batch, CS_GPR(0),
701 iris_resource_bo(p_res),
702 offset, predicated);
703 }
704 }
705
706 static void
707 iris_set_active_query_state(struct pipe_context *ctx, boolean enable)
708 {
709 struct iris_context *ice = (void *) ctx;
710
711 if (ice->state.statistics_counters_enabled == enable)
712 return;
713
714 // XXX: most packets aren't paying attention to this yet, because it'd
715 // have to be done dynamically at draw time, which is a pain
716 ice->state.statistics_counters_enabled = enable;
717 ice->state.dirty |= IRIS_DIRTY_CLIP |
718 IRIS_DIRTY_GS |
719 IRIS_DIRTY_RASTER |
720 IRIS_DIRTY_STREAMOUT |
721 IRIS_DIRTY_TCS |
722 IRIS_DIRTY_TES |
723 IRIS_DIRTY_VS |
724 IRIS_DIRTY_WM;
725 }
726
727 void
728 iris_init_query_functions(struct pipe_context *ctx)
729 {
730 ctx->create_query = iris_create_query;
731 ctx->destroy_query = iris_destroy_query;
732 ctx->begin_query = iris_begin_query;
733 ctx->end_query = iris_end_query;
734 ctx->get_query_result = iris_get_query_result;
735 ctx->get_query_result_resource = iris_get_query_result_resource;
736 ctx->set_active_query_state = iris_set_active_query_state;
737 }