2 * Copyright © 2017 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
26 * Query object support. This allows measuring various simple statistics
27 * via counters on the GPU.
32 #include "pipe/p_defines.h"
33 #include "pipe/p_state.h"
34 #include "pipe/p_context.h"
35 #include "pipe/p_screen.h"
36 #include "util/u_inlines.h"
37 #include "iris_context.h"
38 #include "iris_defines.h"
39 #include "iris_resource.h"
40 #include "iris_screen.h"
42 #define IA_VERTICES_COUNT 0x2310
43 #define IA_PRIMITIVES_COUNT 0x2318
44 #define VS_INVOCATION_COUNT 0x2320
45 #define HS_INVOCATION_COUNT 0x2300
46 #define DS_INVOCATION_COUNT 0x2308
47 #define GS_INVOCATION_COUNT 0x2328
48 #define GS_PRIMITIVES_COUNT 0x2330
49 #define CL_INVOCATION_COUNT 0x2338
50 #define CL_PRIMITIVES_COUNT 0x2340
51 #define PS_INVOCATION_COUNT 0x2348
52 #define CS_INVOCATION_COUNT 0x2290
53 #define PS_DEPTH_COUNT 0x2350
55 #define SO_PRIM_STORAGE_NEEDED(n) (0x5240 + (n) * 8)
57 #define SO_NUM_PRIMS_WRITTEN(n) (0x5200 + (n) * 8)
59 #define CS_GPR(n) (0x2600 + (n) * 8)
61 #define MI_MATH (0x1a << 23)
63 #define MI_ALU_LOAD 0x080
64 #define MI_ALU_LOADINV 0x480
65 #define MI_ALU_LOAD0 0x081
66 #define MI_ALU_LOAD1 0x481
67 #define MI_ALU_ADD 0x100
68 #define MI_ALU_SUB 0x101
69 #define MI_ALU_AND 0x102
70 #define MI_ALU_OR 0x103
71 #define MI_ALU_XOR 0x104
72 #define MI_ALU_STORE 0x180
73 #define MI_ALU_STOREINV 0x580
75 #define MI_ALU_R0 0x00
76 #define MI_ALU_R1 0x01
77 #define MI_ALU_R2 0x02
78 #define MI_ALU_R3 0x03
79 #define MI_ALU_R4 0x04
80 #define MI_ALU_SRCA 0x20
81 #define MI_ALU_SRCB 0x21
82 #define MI_ALU_ACCU 0x31
83 #define MI_ALU_ZF 0x32
84 #define MI_ALU_CF 0x33
86 #define _MI_ALU(op, x, y) (((op) << 20) | ((x) << 10) | (y))
88 #define _MI_ALU0(op) _MI_ALU(MI_ALU_##op, 0, 0)
89 #define _MI_ALU1(op, x) _MI_ALU(MI_ALU_##op, x, 0)
90 #define _MI_ALU2(op, x, y) _MI_ALU(MI_ALU_##op, x, y)
92 #define MI_ALU0(op) _MI_ALU0(op)
93 #define MI_ALU1(op, x) _MI_ALU1(op, MI_ALU_##x)
94 #define MI_ALU2(op, x, y) _MI_ALU2(op, MI_ALU_##x, MI_ALU_##y)
97 enum pipe_query_type type
;
107 struct iris_query_snapshots
*map
;
112 struct iris_query_snapshots
{
113 /** iris_render_condition's saved MI_PREDICATE_DATA value. */
114 uint64_t predicate_data
;
116 /** Have the start/end snapshots landed? */
117 uint64_t snapshots_landed
;
119 /** Starting and ending counter snapshots */
124 struct iris_query_so_overflow
{
125 uint64_t predicate_data
;
126 uint64_t snapshots_landed
;
129 uint64_t prim_storage_needed
[2];
130 uint64_t num_prims
[2];
135 * Is this type of query written by PIPE_CONTROL?
138 iris_is_query_pipelined(struct iris_query
*q
)
141 case PIPE_QUERY_OCCLUSION_COUNTER
:
142 case PIPE_QUERY_OCCLUSION_PREDICATE
:
143 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE
:
144 case PIPE_QUERY_TIMESTAMP
:
145 case PIPE_QUERY_TIMESTAMP_DISJOINT
:
146 case PIPE_QUERY_TIME_ELAPSED
:
155 mark_available(struct iris_context
*ice
, struct iris_query
*q
)
157 struct iris_batch
*batch
= &ice
->batches
[q
->batch_idx
];
158 unsigned flags
= PIPE_CONTROL_WRITE_IMMEDIATE
;
159 unsigned offset
= offsetof(struct iris_query_snapshots
, snapshots_landed
);
161 if (!iris_is_query_pipelined(q
)) {
162 ice
->vtbl
.store_data_imm64(batch
, q
->bo
, offset
, true);
164 /* Order available *after* the query results. */
165 flags
|= PIPE_CONTROL_FLUSH_ENABLE
;
166 iris_emit_pipe_control_write(batch
, flags
, q
->bo
, offset
, true);
171 * Write PS_DEPTH_COUNT to q->(dest) via a PIPE_CONTROL.
174 iris_pipelined_write(struct iris_batch
*batch
,
175 struct iris_query
*q
,
176 enum pipe_control_flags flags
,
179 const struct gen_device_info
*devinfo
= &batch
->screen
->devinfo
;
180 const unsigned optional_cs_stall
=
181 devinfo
->gen
== 9 && devinfo
->gt
== 4 ? PIPE_CONTROL_CS_STALL
: 0;
183 iris_emit_pipe_control_write(batch
, flags
| optional_cs_stall
,
184 q
->bo
, offset
, 0ull);
188 write_value(struct iris_context
*ice
, struct iris_query
*q
, unsigned offset
)
190 struct iris_batch
*batch
= &ice
->batches
[q
->batch_idx
];
191 const struct gen_device_info
*devinfo
= &batch
->screen
->devinfo
;
193 if (!iris_is_query_pipelined(q
)) {
194 iris_emit_pipe_control_flush(batch
,
195 PIPE_CONTROL_CS_STALL
|
196 PIPE_CONTROL_STALL_AT_SCOREBOARD
);
201 case PIPE_QUERY_OCCLUSION_COUNTER
:
202 case PIPE_QUERY_OCCLUSION_PREDICATE
:
203 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE
:
204 if (devinfo
->gen
>= 10) {
205 /* "Driver must program PIPE_CONTROL with only Depth Stall Enable
206 * bit set prior to programming a PIPE_CONTROL with Write PS Depth
207 * Count sync operation."
209 iris_emit_pipe_control_flush(batch
, PIPE_CONTROL_DEPTH_STALL
);
211 iris_pipelined_write(&ice
->batches
[IRIS_BATCH_RENDER
], q
,
212 PIPE_CONTROL_WRITE_DEPTH_COUNT
|
213 PIPE_CONTROL_DEPTH_STALL
,
216 case PIPE_QUERY_TIME_ELAPSED
:
217 case PIPE_QUERY_TIMESTAMP
:
218 case PIPE_QUERY_TIMESTAMP_DISJOINT
:
219 iris_pipelined_write(&ice
->batches
[IRIS_BATCH_RENDER
], q
,
220 PIPE_CONTROL_WRITE_TIMESTAMP
,
223 case PIPE_QUERY_PRIMITIVES_GENERATED
:
224 ice
->vtbl
.store_register_mem64(batch
,
225 q
->index
== 0 ? CL_INVOCATION_COUNT
:
226 SO_PRIM_STORAGE_NEEDED(q
->index
),
227 q
->bo
, offset
, false);
229 case PIPE_QUERY_PRIMITIVES_EMITTED
:
230 ice
->vtbl
.store_register_mem64(batch
,
231 SO_NUM_PRIMS_WRITTEN(q
->index
),
232 q
->bo
, offset
, false);
234 case PIPE_QUERY_PIPELINE_STATISTICS
: {
235 static const uint32_t index_to_reg
[] = {
248 const uint32_t reg
= index_to_reg
[q
->index
];
250 ice
->vtbl
.store_register_mem64(batch
, reg
, q
->bo
, offset
, false);
259 write_overflow_values(struct iris_context
*ice
, struct iris_query
*q
, bool end
)
261 struct iris_batch
*batch
= &ice
->batches
[IRIS_BATCH_RENDER
];
262 uint32_t count
= q
->type
== PIPE_QUERY_SO_OVERFLOW_PREDICATE
? 1 : 4;
264 iris_emit_pipe_control_flush(batch
,
265 PIPE_CONTROL_CS_STALL
|
266 PIPE_CONTROL_STALL_AT_SCOREBOARD
);
267 for (uint32_t i
= 0; i
< count
; i
++) {
268 int s
= q
->index
+ i
;
269 int g_idx
= offsetof(struct iris_query_so_overflow
,
270 stream
[s
].num_prims
[end
]);
271 int w_idx
= offsetof(struct iris_query_so_overflow
,
272 stream
[s
].prim_storage_needed
[end
]);
273 ice
->vtbl
.store_register_mem64(batch
, SO_NUM_PRIMS_WRITTEN(s
),
274 q
->bo
, g_idx
, false);
275 ice
->vtbl
.store_register_mem64(batch
, SO_PRIM_STORAGE_NEEDED(s
),
276 q
->bo
, w_idx
, false);
281 iris_timebase_scale(const struct gen_device_info
*devinfo
,
282 uint64_t gpu_timestamp
)
284 return (1000000000ull * gpu_timestamp
) / devinfo
->timestamp_frequency
;
288 iris_raw_timestamp_delta(uint64_t time0
, uint64_t time1
)
291 return (1ULL << TIMESTAMP_BITS
) + time1
- time0
;
293 return time1
- time0
;
298 stream_overflowed(struct iris_query_so_overflow
*so
, int s
)
300 return (so
->stream
[s
].prim_storage_needed
[1] -
301 so
->stream
[s
].prim_storage_needed
[0]) !=
302 (so
->stream
[s
].num_prims
[1] - so
->stream
[s
].num_prims
[0]);
306 calculate_result_on_cpu(const struct gen_device_info
*devinfo
,
307 struct iris_query
*q
)
310 case PIPE_QUERY_OCCLUSION_PREDICATE
:
311 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE
:
312 q
->result
= q
->map
->end
!= q
->map
->start
;
314 case PIPE_QUERY_TIMESTAMP
:
315 case PIPE_QUERY_TIMESTAMP_DISJOINT
:
316 /* The timestamp is the single starting snapshot. */
317 q
->result
= iris_timebase_scale(devinfo
, q
->map
->start
);
318 q
->result
&= (1ull << TIMESTAMP_BITS
) - 1;
320 case PIPE_QUERY_TIME_ELAPSED
:
321 q
->result
= iris_raw_timestamp_delta(q
->map
->start
, q
->map
->end
);
322 q
->result
= iris_timebase_scale(devinfo
, q
->result
);
323 q
->result
&= (1ull << TIMESTAMP_BITS
) - 1;
325 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
326 q
->result
= stream_overflowed((void *) q
->map
, q
->index
);
328 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE
:
330 for (int i
= 0; i
< MAX_VERTEX_STREAMS
; i
++)
331 q
->result
|= stream_overflowed((void *) q
->map
, i
);
333 case PIPE_QUERY_OCCLUSION_COUNTER
:
334 case PIPE_QUERY_PRIMITIVES_GENERATED
:
335 case PIPE_QUERY_PRIMITIVES_EMITTED
:
336 case PIPE_QUERY_PIPELINE_STATISTICS
:
338 q
->result
= q
->map
->end
- q
->map
->start
;
346 * GPR0 = (GPR0 == 0) ? 0 : 1;
349 gpr0_to_bool(struct iris_context
*ice
)
351 struct iris_batch
*batch
= &ice
->batches
[IRIS_BATCH_RENDER
];
353 ice
->vtbl
.load_register_imm64(batch
, CS_GPR(1), 1ull);
355 static const uint32_t math
[] = {
357 MI_ALU2(LOAD
, SRCA
, R0
),
358 MI_ALU1(LOAD0
, SRCB
),
360 MI_ALU2(STOREINV
, R0
, ZF
),
361 MI_ALU2(LOAD
, SRCA
, R0
),
362 MI_ALU2(LOAD
, SRCB
, R1
),
364 MI_ALU2(STORE
, R0
, ACCU
),
366 iris_batch_emit(batch
, math
, sizeof(math
));
370 load_overflow_data_to_cs_gprs(struct iris_context
*ice
,
371 struct iris_query
*q
,
374 struct iris_batch
*batch
= &ice
->batches
[IRIS_BATCH_RENDER
];
376 ice
->vtbl
.load_register_mem64(batch
, CS_GPR(1), q
->bo
,
377 offsetof(struct iris_query_so_overflow
,
378 stream
[idx
].prim_storage_needed
[0]));
379 ice
->vtbl
.load_register_mem64(batch
, CS_GPR(2), q
->bo
,
380 offsetof(struct iris_query_so_overflow
,
381 stream
[idx
].prim_storage_needed
[1]));
383 ice
->vtbl
.load_register_mem64(batch
, CS_GPR(3), q
->bo
,
384 offsetof(struct iris_query_so_overflow
,
385 stream
[idx
].num_prims
[0]));
386 ice
->vtbl
.load_register_mem64(batch
, CS_GPR(4), q
->bo
,
387 offsetof(struct iris_query_so_overflow
,
388 stream
[idx
].num_prims
[1]));
398 calc_overflow_for_stream(struct iris_context
*ice
)
400 struct iris_batch
*batch
= &ice
->batches
[IRIS_BATCH_RENDER
];
401 static const uint32_t maths
[] = {
403 MI_ALU2(LOAD
, SRCA
, R4
),
404 MI_ALU2(LOAD
, SRCB
, R3
),
406 MI_ALU2(STORE
, R3
, ACCU
),
407 MI_ALU2(LOAD
, SRCA
, R2
),
408 MI_ALU2(LOAD
, SRCB
, R1
),
410 MI_ALU2(STORE
, R1
, ACCU
),
411 MI_ALU2(LOAD
, SRCA
, R3
),
412 MI_ALU2(LOAD
, SRCB
, R1
),
414 MI_ALU2(STORE
, R1
, ACCU
),
415 MI_ALU2(LOAD
, SRCA
, R1
),
416 MI_ALU2(LOAD
, SRCB
, R0
),
418 MI_ALU2(STORE
, R0
, ACCU
),
421 iris_batch_emit(batch
, maths
, sizeof(maths
));
425 overflow_result_to_gpr0(struct iris_context
*ice
, struct iris_query
*q
)
427 struct iris_batch
*batch
= &ice
->batches
[IRIS_BATCH_RENDER
];
429 ice
->vtbl
.load_register_imm64(batch
, CS_GPR(0), 0ull);
431 if (q
->type
== PIPE_QUERY_SO_OVERFLOW_PREDICATE
) {
432 load_overflow_data_to_cs_gprs(ice
, q
, q
->index
);
433 calc_overflow_for_stream(ice
);
435 for (int i
= 0; i
< MAX_VERTEX_STREAMS
; i
++) {
436 load_overflow_data_to_cs_gprs(ice
, q
, i
);
437 calc_overflow_for_stream(ice
);
445 * Calculate the result and store it to CS_GPR0.
448 calculate_result_on_gpu(struct iris_context
*ice
, struct iris_query
*q
)
450 struct iris_batch
*batch
= &ice
->batches
[q
->batch_idx
];
452 if (q
->type
== PIPE_QUERY_SO_OVERFLOW_PREDICATE
||
453 q
->type
== PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE
) {
454 overflow_result_to_gpr0(ice
, q
);
458 ice
->vtbl
.load_register_mem64(batch
, CS_GPR(1), q
->bo
,
459 offsetof(struct iris_query_snapshots
, start
));
460 ice
->vtbl
.load_register_mem64(batch
, CS_GPR(2), q
->bo
,
461 offsetof(struct iris_query_snapshots
, end
));
463 static const uint32_t math
[] = {
465 MI_ALU2(LOAD
, SRCA
, R2
),
466 MI_ALU2(LOAD
, SRCB
, R1
),
468 MI_ALU2(STORE
, R0
, ACCU
),
470 iris_batch_emit(batch
, math
, sizeof(math
));
472 if (q
->type
== PIPE_QUERY_OCCLUSION_PREDICATE
||
473 q
->type
== PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE
)
477 static struct pipe_query
*
478 iris_create_query(struct pipe_context
*ctx
,
482 struct iris_query
*q
= calloc(1, sizeof(struct iris_query
));
484 q
->type
= query_type
;
487 if (q
->type
== PIPE_QUERY_PIPELINE_STATISTICS
&& q
->index
== 10)
488 q
->batch_idx
= IRIS_BATCH_COMPUTE
;
490 q
->batch_idx
= IRIS_BATCH_RENDER
;
491 return (struct pipe_query
*) q
;
495 iris_destroy_query(struct pipe_context
*ctx
, struct pipe_query
*p_query
)
497 struct iris_query
*query
= (void *) p_query
;
498 iris_bo_unreference(query
->bo
);
504 iris_begin_query(struct pipe_context
*ctx
, struct pipe_query
*query
)
506 struct iris_screen
*screen
= (void *) ctx
->screen
;
507 struct iris_context
*ice
= (void *) ctx
;
508 struct iris_query
*q
= (void *) query
;
510 iris_bo_unreference(q
->bo
);
511 q
->bo
= iris_bo_alloc(screen
->bufmgr
, "query object", 4096,
516 q
->map
= iris_bo_map(&ice
->dbg
, q
->bo
, MAP_READ
| MAP_WRITE
| MAP_ASYNC
);
522 q
->map
->snapshots_landed
= false;
524 if (q
->type
== PIPE_QUERY_PRIMITIVES_GENERATED
&& q
->index
== 0) {
525 ice
->state
.prims_generated_query_active
= true;
526 ice
->state
.dirty
|= IRIS_DIRTY_STREAMOUT
| IRIS_DIRTY_CLIP
;
529 if (q
->type
== PIPE_QUERY_SO_OVERFLOW_PREDICATE
||
530 q
->type
== PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE
)
531 write_overflow_values(ice
, q
, false);
533 write_value(ice
, q
, offsetof(struct iris_query_snapshots
, start
));
539 iris_end_query(struct pipe_context
*ctx
, struct pipe_query
*query
)
541 struct iris_context
*ice
= (void *) ctx
;
542 struct iris_query
*q
= (void *) query
;
544 if (q
->type
== PIPE_QUERY_TIMESTAMP
) {
545 iris_begin_query(ctx
, query
);
546 mark_available(ice
, q
);
550 if (q
->type
== PIPE_QUERY_PRIMITIVES_GENERATED
&& q
->index
== 0) {
551 ice
->state
.prims_generated_query_active
= false;
552 ice
->state
.dirty
|= IRIS_DIRTY_STREAMOUT
| IRIS_DIRTY_CLIP
;
555 if (q
->type
== PIPE_QUERY_SO_OVERFLOW_PREDICATE
||
556 q
->type
== PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE
)
557 write_overflow_values(ice
, q
, true);
559 write_value(ice
, q
, offsetof(struct iris_query_snapshots
, end
));
560 mark_available(ice
, q
);
566 * See if the snapshots have landed for a query, and if so, compute the
567 * result and mark it ready. Does not flush (unlike iris_get_query_result).
570 iris_check_query_no_flush(struct iris_context
*ice
, struct iris_query
*q
)
572 struct iris_screen
*screen
= (void *) ice
->ctx
.screen
;
573 const struct gen_device_info
*devinfo
= &screen
->devinfo
;
575 if (!q
->ready
&& q
->map
->snapshots_landed
) {
576 calculate_result_on_cpu(devinfo
, q
);
581 iris_get_query_result(struct pipe_context
*ctx
,
582 struct pipe_query
*query
,
584 union pipe_query_result
*result
)
586 struct iris_context
*ice
= (void *) ctx
;
587 struct iris_query
*q
= (void *) query
;
588 struct iris_screen
*screen
= (void *) ctx
->screen
;
589 const struct gen_device_info
*devinfo
= &screen
->devinfo
;
592 if (iris_batch_references(&ice
->batches
[q
->batch_idx
], q
->bo
))
593 iris_batch_flush(&ice
->batches
[q
->batch_idx
]);
595 if (!q
->map
->snapshots_landed
) {
597 iris_bo_wait_rendering(q
->bo
);
602 assert(q
->map
->snapshots_landed
);
603 calculate_result_on_cpu(devinfo
, q
);
608 if (q
->type
== PIPE_QUERY_PIPELINE_STATISTICS
) {
611 result
->pipeline_statistics
.ia_vertices
= q
->result
;
614 result
->pipeline_statistics
.ia_primitives
= q
->result
;
617 result
->pipeline_statistics
.vs_invocations
= q
->result
;
620 result
->pipeline_statistics
.gs_invocations
= q
->result
;
623 result
->pipeline_statistics
.gs_primitives
= q
->result
;
626 result
->pipeline_statistics
.c_invocations
= q
->result
;
629 result
->pipeline_statistics
.c_primitives
= q
->result
;
632 result
->pipeline_statistics
.ps_invocations
= q
->result
;
635 result
->pipeline_statistics
.hs_invocations
= q
->result
;
638 result
->pipeline_statistics
.ds_invocations
= q
->result
;
641 result
->pipeline_statistics
.cs_invocations
= q
->result
;
645 result
->u64
= q
->result
;
652 iris_get_query_result_resource(struct pipe_context
*ctx
,
653 struct pipe_query
*query
,
655 enum pipe_query_value_type result_type
,
657 struct pipe_resource
*p_res
,
660 struct iris_context
*ice
= (void *) ctx
;
661 struct iris_query
*q
= (void *) query
;
662 struct iris_batch
*batch
= &ice
->batches
[q
->batch_idx
];
663 const struct gen_device_info
*devinfo
= &batch
->screen
->devinfo
;
664 struct iris_resource
*res
= (void *) p_res
;
665 unsigned snapshots_landed_offset
=
666 offsetof(struct iris_query_snapshots
, snapshots_landed
);
668 res
->bind_history
|= PIPE_BIND_QUERY_BUFFER
;
671 /* They're asking for the availability of the result. If we still
672 * have commands queued up which produce the result, submit them
673 * now so that progress happens. Either way, copy the snapshots
674 * landed field to the destination resource.
676 if (iris_batch_references(batch
, q
->bo
))
677 iris_batch_flush(batch
);
679 ice
->vtbl
.copy_mem_mem(batch
, iris_resource_bo(p_res
), offset
,
680 q
->bo
, snapshots_landed_offset
,
681 result_type
<= PIPE_QUERY_TYPE_U32
? 4 : 8);
685 if (!q
->ready
&& q
->map
->snapshots_landed
) {
686 /* The final snapshots happen to have landed, so let's just compute
687 * the result on the CPU now...
689 calculate_result_on_cpu(devinfo
, q
);
693 /* We happen to have the result on the CPU, so just copy it. */
694 if (result_type
<= PIPE_QUERY_TYPE_U32
) {
695 ice
->vtbl
.store_data_imm32(batch
, iris_resource_bo(p_res
), offset
,
698 ice
->vtbl
.store_data_imm64(batch
, iris_resource_bo(p_res
), offset
,
702 /* Make sure the result lands before they use bind the QBO elsewhere
703 * and use the result.
705 // XXX: Why? i965 doesn't do this.
706 iris_emit_pipe_control_flush(batch
, PIPE_CONTROL_CS_STALL
);
710 /* Calculate the result to CS_GPR0 */
711 calculate_result_on_gpu(ice
, q
);
713 bool predicated
= !wait
&& !q
->stalled
;
716 ice
->vtbl
.load_register_imm64(batch
, MI_PREDICATE_SRC1
, 0ull);
717 ice
->vtbl
.load_register_mem64(batch
, MI_PREDICATE_SRC0
, q
->bo
,
718 snapshots_landed_offset
);
719 uint32_t predicate
= MI_PREDICATE
|
720 MI_PREDICATE_LOADOP_LOADINV
|
721 MI_PREDICATE_COMBINEOP_SET
|
722 MI_PREDICATE_COMPAREOP_SRCS_EQUAL
;
723 iris_batch_emit(batch
, &predicate
, sizeof(uint32_t));
726 if (result_type
<= PIPE_QUERY_TYPE_U32
) {
727 ice
->vtbl
.store_register_mem32(batch
, CS_GPR(0),
728 iris_resource_bo(p_res
),
731 ice
->vtbl
.store_register_mem64(batch
, CS_GPR(0),
732 iris_resource_bo(p_res
),
738 iris_set_active_query_state(struct pipe_context
*ctx
, boolean enable
)
740 struct iris_context
*ice
= (void *) ctx
;
742 if (ice
->state
.statistics_counters_enabled
== enable
)
745 // XXX: most packets aren't paying attention to this yet, because it'd
746 // have to be done dynamically at draw time, which is a pain
747 ice
->state
.statistics_counters_enabled
= enable
;
748 ice
->state
.dirty
|= IRIS_DIRTY_CLIP
|
751 IRIS_DIRTY_STREAMOUT
|
759 set_predicate_enable(struct iris_context
*ice
, bool value
)
762 ice
->state
.predicate
= IRIS_PREDICATE_STATE_RENDER
;
764 ice
->state
.predicate
= IRIS_PREDICATE_STATE_DONT_RENDER
;
768 set_predicate_for_result(struct iris_context
*ice
,
769 struct iris_query
*q
,
772 struct iris_batch
*batch
= &ice
->batches
[IRIS_BATCH_RENDER
];
774 /* The CPU doesn't have the query result yet; use hardware predication */
775 ice
->state
.predicate
= IRIS_PREDICATE_STATE_USE_BIT
;
777 /* Ensure the memory is coherent for MI_LOAD_REGISTER_* commands. */
778 iris_emit_pipe_control_flush(batch
, PIPE_CONTROL_FLUSH_ENABLE
);
782 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
783 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE
:
784 overflow_result_to_gpr0(ice
, q
);
786 ice
->vtbl
.load_register_reg64(batch
, MI_PREDICATE_SRC0
, CS_GPR(0));
787 ice
->vtbl
.load_register_imm64(batch
, MI_PREDICATE_SRC1
, 0ull);
790 /* PIPE_QUERY_OCCLUSION_* */
791 ice
->vtbl
.load_register_mem64(batch
, MI_PREDICATE_SRC0
, q
->bo
,
792 offsetof(struct iris_query_snapshots
, start
));
793 ice
->vtbl
.load_register_mem64(batch
, MI_PREDICATE_SRC1
, q
->bo
,
794 offsetof(struct iris_query_snapshots
, end
));
798 uint32_t mi_predicate
= MI_PREDICATE
|
799 MI_PREDICATE_COMBINEOP_SET
|
800 MI_PREDICATE_COMPAREOP_SRCS_EQUAL
|
801 (inverted
? MI_PREDICATE_LOADOP_LOAD
802 : MI_PREDICATE_LOADOP_LOADINV
);
803 iris_batch_emit(batch
, &mi_predicate
, sizeof(uint32_t));
805 /* We immediately set the predicate on the render batch, as all the
806 * counters come from 3D operations. However, we may need to predicate
807 * a compute dispatch, which executes in a different GEM context and has
808 * a different MI_PREDICATE_DATA register. So, we save the result to
809 * memory and reload it in iris_launch_grid.
811 unsigned offset
= offsetof(struct iris_query_snapshots
, predicate_data
);
812 ice
->vtbl
.store_register_mem64(batch
, MI_PREDICATE_DATA
,
813 q
->bo
, offset
, false);
814 ice
->state
.compute_predicate
= q
->bo
;
818 iris_render_condition(struct pipe_context
*ctx
,
819 struct pipe_query
*query
,
821 enum pipe_render_cond_flag mode
)
823 struct iris_context
*ice
= (void *) ctx
;
824 struct iris_query
*q
= (void *) query
;
827 ice
->state
.predicate
= IRIS_PREDICATE_STATE_RENDER
;
831 iris_check_query_no_flush(ice
, q
);
833 if (q
->result
|| q
->ready
) {
834 set_predicate_enable(ice
, (q
->result
!= 0) ^ condition
);
836 if (mode
== PIPE_RENDER_COND_NO_WAIT
||
837 mode
== PIPE_RENDER_COND_BY_REGION_NO_WAIT
) {
838 perf_debug(&ice
->dbg
, "Conditional rendering demoted from "
839 "\"no wait\" to \"wait\".");
841 set_predicate_for_result(ice
, q
, condition
);
846 iris_init_query_functions(struct pipe_context
*ctx
)
848 ctx
->create_query
= iris_create_query
;
849 ctx
->destroy_query
= iris_destroy_query
;
850 ctx
->begin_query
= iris_begin_query
;
851 ctx
->end_query
= iris_end_query
;
852 ctx
->get_query_result
= iris_get_query_result
;
853 ctx
->get_query_result_resource
= iris_get_query_result_resource
;
854 ctx
->set_active_query_state
= iris_set_active_query_state
;
855 ctx
->render_condition
= iris_render_condition
;