2 * Copyright © 2017 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
26 * ============================= GENXML CODE =============================
27 * [This file is compiled once per generation.]
28 * =======================================================================
30 * Query object support. This allows measuring various simple statistics
31 * via counters on the GPU. We use GenX code for MI_MATH calculations.
36 #include "pipe/p_defines.h"
37 #include "pipe/p_state.h"
38 #include "pipe/p_context.h"
39 #include "pipe/p_screen.h"
40 #include "util/u_inlines.h"
41 #include "util/u_upload_mgr.h"
42 #include "iris_context.h"
43 #include "iris_defines.h"
44 #include "iris_fence.h"
45 #include "iris_monitor.h"
46 #include "iris_resource.h"
47 #include "iris_screen.h"
49 #include "iris_genx_macros.h"
51 #define SO_PRIM_STORAGE_NEEDED(n) (GENX(SO_PRIM_STORAGE_NEEDED0_num) + (n) * 8)
52 #define SO_NUM_PRIMS_WRITTEN(n) (GENX(SO_NUM_PRIMS_WRITTEN0_num) + (n) * 8)
55 enum pipe_query_type type
;
64 struct iris_state_ref query_state_ref
;
65 struct iris_query_snapshots
*map
;
66 struct iris_syncpt
*syncpt
;
70 struct iris_monitor_object
*monitor
;
72 /* Fence for PIPE_QUERY_GPU_FINISHED. */
73 struct pipe_fence_handle
*fence
;
76 struct iris_query_snapshots
{
77 /** iris_render_condition's saved MI_PREDICATE_RESULT value. */
78 uint64_t predicate_result
;
80 /** Have the start/end snapshots landed? */
81 uint64_t snapshots_landed
;
83 /** Starting and ending counter snapshots */
88 struct iris_query_so_overflow
{
89 uint64_t predicate_result
;
90 uint64_t snapshots_landed
;
93 uint64_t prim_storage_needed
[2];
94 uint64_t num_prims
[2];
98 static struct gen_mi_value
99 query_mem64(struct iris_query
*q
, uint32_t offset
)
101 struct iris_address addr
= {
102 .bo
= iris_resource_bo(q
->query_state_ref
.res
),
103 .offset
= q
->query_state_ref
.offset
+ offset
,
106 return gen_mi_mem64(addr
);
110 * Is this type of query written by PIPE_CONTROL?
113 iris_is_query_pipelined(struct iris_query
*q
)
116 case PIPE_QUERY_OCCLUSION_COUNTER
:
117 case PIPE_QUERY_OCCLUSION_PREDICATE
:
118 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE
:
119 case PIPE_QUERY_TIMESTAMP
:
120 case PIPE_QUERY_TIMESTAMP_DISJOINT
:
121 case PIPE_QUERY_TIME_ELAPSED
:
130 mark_available(struct iris_context
*ice
, struct iris_query
*q
)
132 struct iris_batch
*batch
= &ice
->batches
[q
->batch_idx
];
133 unsigned flags
= PIPE_CONTROL_WRITE_IMMEDIATE
;
134 unsigned offset
= offsetof(struct iris_query_snapshots
, snapshots_landed
);
135 struct iris_bo
*bo
= iris_resource_bo(q
->query_state_ref
.res
);
136 offset
+= q
->query_state_ref
.offset
;
138 if (!iris_is_query_pipelined(q
)) {
139 ice
->vtbl
.store_data_imm64(batch
, bo
, offset
, true);
141 /* Order available *after* the query results. */
142 flags
|= PIPE_CONTROL_FLUSH_ENABLE
;
143 iris_emit_pipe_control_write(batch
, "query: mark available",
144 flags
, bo
, offset
, true);
149 * Write PS_DEPTH_COUNT to q->(dest) via a PIPE_CONTROL.
152 iris_pipelined_write(struct iris_batch
*batch
,
153 struct iris_query
*q
,
154 enum pipe_control_flags flags
,
157 const struct gen_device_info
*devinfo
= &batch
->screen
->devinfo
;
158 const unsigned optional_cs_stall
=
159 GEN_GEN
== 9 && devinfo
->gt
== 4 ? PIPE_CONTROL_CS_STALL
: 0;
160 struct iris_bo
*bo
= iris_resource_bo(q
->query_state_ref
.res
);
162 iris_emit_pipe_control_write(batch
, "query: pipelined snapshot write",
163 flags
| optional_cs_stall
,
168 write_value(struct iris_context
*ice
, struct iris_query
*q
, unsigned offset
)
170 struct iris_batch
*batch
= &ice
->batches
[q
->batch_idx
];
171 struct iris_bo
*bo
= iris_resource_bo(q
->query_state_ref
.res
);
173 if (!iris_is_query_pipelined(q
)) {
174 iris_emit_pipe_control_flush(batch
,
175 "query: non-pipelined snapshot write",
176 PIPE_CONTROL_CS_STALL
|
177 PIPE_CONTROL_STALL_AT_SCOREBOARD
);
182 case PIPE_QUERY_OCCLUSION_COUNTER
:
183 case PIPE_QUERY_OCCLUSION_PREDICATE
:
184 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE
:
186 /* "Driver must program PIPE_CONTROL with only Depth Stall Enable
187 * bit set prior to programming a PIPE_CONTROL with Write PS Depth
188 * Count sync operation."
190 iris_emit_pipe_control_flush(batch
,
191 "workaround: depth stall before writing "
193 PIPE_CONTROL_DEPTH_STALL
);
195 iris_pipelined_write(&ice
->batches
[IRIS_BATCH_RENDER
], q
,
196 PIPE_CONTROL_WRITE_DEPTH_COUNT
|
197 PIPE_CONTROL_DEPTH_STALL
,
200 case PIPE_QUERY_TIME_ELAPSED
:
201 case PIPE_QUERY_TIMESTAMP
:
202 case PIPE_QUERY_TIMESTAMP_DISJOINT
:
203 iris_pipelined_write(&ice
->batches
[IRIS_BATCH_RENDER
], q
,
204 PIPE_CONTROL_WRITE_TIMESTAMP
,
207 case PIPE_QUERY_PRIMITIVES_GENERATED
:
208 ice
->vtbl
.store_register_mem64(batch
,
210 GENX(CL_INVOCATION_COUNT_num
) :
211 SO_PRIM_STORAGE_NEEDED(q
->index
),
214 case PIPE_QUERY_PRIMITIVES_EMITTED
:
215 ice
->vtbl
.store_register_mem64(batch
,
216 SO_NUM_PRIMS_WRITTEN(q
->index
),
219 case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE
: {
220 static const uint32_t index_to_reg
[] = {
221 GENX(IA_VERTICES_COUNT_num
),
222 GENX(IA_PRIMITIVES_COUNT_num
),
223 GENX(VS_INVOCATION_COUNT_num
),
224 GENX(GS_INVOCATION_COUNT_num
),
225 GENX(GS_PRIMITIVES_COUNT_num
),
226 GENX(CL_INVOCATION_COUNT_num
),
227 GENX(CL_PRIMITIVES_COUNT_num
),
228 GENX(PS_INVOCATION_COUNT_num
),
229 GENX(HS_INVOCATION_COUNT_num
),
230 GENX(DS_INVOCATION_COUNT_num
),
231 GENX(CS_INVOCATION_COUNT_num
),
233 const uint32_t reg
= index_to_reg
[q
->index
];
235 ice
->vtbl
.store_register_mem64(batch
, reg
, bo
, offset
, false);
244 write_overflow_values(struct iris_context
*ice
, struct iris_query
*q
, bool end
)
246 struct iris_batch
*batch
= &ice
->batches
[IRIS_BATCH_RENDER
];
247 uint32_t count
= q
->type
== PIPE_QUERY_SO_OVERFLOW_PREDICATE
? 1 : 4;
248 struct iris_bo
*bo
= iris_resource_bo(q
->query_state_ref
.res
);
249 uint32_t offset
= q
->query_state_ref
.offset
;
251 iris_emit_pipe_control_flush(batch
,
252 "query: write SO overflow snapshots",
253 PIPE_CONTROL_CS_STALL
|
254 PIPE_CONTROL_STALL_AT_SCOREBOARD
);
255 for (uint32_t i
= 0; i
< count
; i
++) {
256 int s
= q
->index
+ i
;
257 int g_idx
= offset
+ offsetof(struct iris_query_so_overflow
,
258 stream
[s
].num_prims
[end
]);
259 int w_idx
= offset
+ offsetof(struct iris_query_so_overflow
,
260 stream
[s
].prim_storage_needed
[end
]);
261 ice
->vtbl
.store_register_mem64(batch
, SO_NUM_PRIMS_WRITTEN(s
),
263 ice
->vtbl
.store_register_mem64(batch
, SO_PRIM_STORAGE_NEEDED(s
),
269 iris_raw_timestamp_delta(uint64_t time0
, uint64_t time1
)
272 return (1ULL << TIMESTAMP_BITS
) + time1
- time0
;
274 return time1
- time0
;
279 stream_overflowed(struct iris_query_so_overflow
*so
, int s
)
281 return (so
->stream
[s
].prim_storage_needed
[1] -
282 so
->stream
[s
].prim_storage_needed
[0]) !=
283 (so
->stream
[s
].num_prims
[1] - so
->stream
[s
].num_prims
[0]);
287 calculate_result_on_cpu(const struct gen_device_info
*devinfo
,
288 struct iris_query
*q
)
291 case PIPE_QUERY_OCCLUSION_PREDICATE
:
292 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE
:
293 q
->result
= q
->map
->end
!= q
->map
->start
;
295 case PIPE_QUERY_TIMESTAMP
:
296 case PIPE_QUERY_TIMESTAMP_DISJOINT
:
297 /* The timestamp is the single starting snapshot. */
298 q
->result
= gen_device_info_timebase_scale(devinfo
, q
->map
->start
);
299 q
->result
&= (1ull << TIMESTAMP_BITS
) - 1;
301 case PIPE_QUERY_TIME_ELAPSED
:
302 q
->result
= iris_raw_timestamp_delta(q
->map
->start
, q
->map
->end
);
303 q
->result
= gen_device_info_timebase_scale(devinfo
, q
->result
);
304 q
->result
&= (1ull << TIMESTAMP_BITS
) - 1;
306 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
307 q
->result
= stream_overflowed((void *) q
->map
, q
->index
);
309 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE
:
311 for (int i
= 0; i
< MAX_VERTEX_STREAMS
; i
++)
312 q
->result
|= stream_overflowed((void *) q
->map
, i
);
314 case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE
:
315 q
->result
= q
->map
->end
- q
->map
->start
;
317 /* WaDividePSInvocationCountBy4:HSW,BDW */
318 if (GEN_GEN
== 8 && q
->index
== PIPE_STAT_QUERY_PS_INVOCATIONS
)
321 case PIPE_QUERY_OCCLUSION_COUNTER
:
322 case PIPE_QUERY_PRIMITIVES_GENERATED
:
323 case PIPE_QUERY_PRIMITIVES_EMITTED
:
325 q
->result
= q
->map
->end
- q
->map
->start
;
333 * Calculate the streamout overflow for stream \p idx:
335 * (num_prims[1] - num_prims[0]) - (storage_needed[1] - storage_needed[0])
337 static struct gen_mi_value
338 calc_overflow_for_stream(struct gen_mi_builder
*b
,
339 struct iris_query
*q
,
342 #define C(counter, i) query_mem64(q, \
343 offsetof(struct iris_query_so_overflow, stream[idx].counter[i]))
345 return gen_mi_isub(b
, gen_mi_isub(b
, C(num_prims
, 1), C(num_prims
, 0)),
346 gen_mi_isub(b
, C(prim_storage_needed
, 1),
347 C(prim_storage_needed
, 0)));
352 * Calculate whether any stream has overflowed.
354 static struct gen_mi_value
355 calc_overflow_any_stream(struct gen_mi_builder
*b
, struct iris_query
*q
)
357 struct gen_mi_value stream_result
[MAX_VERTEX_STREAMS
];
358 for (int i
= 0; i
< MAX_VERTEX_STREAMS
; i
++)
359 stream_result
[i
] = calc_overflow_for_stream(b
, q
, i
);
361 struct gen_mi_value result
= stream_result
[0];
362 for (int i
= 1; i
< MAX_VERTEX_STREAMS
; i
++)
363 result
= gen_mi_ior(b
, result
, stream_result
[i
]);
369 query_is_boolean(enum pipe_query_type type
)
372 case PIPE_QUERY_OCCLUSION_PREDICATE
:
373 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE
:
374 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
375 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE
:
383 * Calculate the result using MI_MATH.
385 static struct gen_mi_value
386 calculate_result_on_gpu(const struct gen_device_info
*devinfo
,
387 struct gen_mi_builder
*b
,
388 struct iris_query
*q
)
390 struct gen_mi_value result
;
391 struct gen_mi_value start_val
=
392 query_mem64(q
, offsetof(struct iris_query_snapshots
, start
));
393 struct gen_mi_value end_val
=
394 query_mem64(q
, offsetof(struct iris_query_snapshots
, end
));
397 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
398 result
= calc_overflow_for_stream(b
, q
, q
->index
);
400 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE
:
401 result
= calc_overflow_any_stream(b
, q
);
403 case PIPE_QUERY_TIMESTAMP
: {
404 /* TODO: This discards any fractional bits of the timebase scale.
405 * We would need to do a bit of fixed point math on the CS ALU, or
406 * launch an actual shader to calculate this with full precision.
408 uint32_t scale
= 1000000000ull / devinfo
->timestamp_frequency
;
409 result
= gen_mi_iand(b
, gen_mi_imm((1ull << 36) - 1),
410 gen_mi_imul_imm(b
, start_val
, scale
));
413 case PIPE_QUERY_TIME_ELAPSED
: {
414 /* TODO: This discards fractional bits (see above). */
415 uint32_t scale
= 1000000000ull / devinfo
->timestamp_frequency
;
416 result
= gen_mi_imul_imm(b
, gen_mi_isub(b
, end_val
, start_val
), scale
);
420 result
= gen_mi_isub(b
, end_val
, start_val
);
424 /* WaDividePSInvocationCountBy4:HSW,BDW */
426 q
->type
== PIPE_QUERY_PIPELINE_STATISTICS_SINGLE
&&
427 q
->index
== PIPE_STAT_QUERY_PS_INVOCATIONS
)
428 result
= gen_mi_ushr32_imm(b
, result
, 2);
430 if (query_is_boolean(q
->type
))
431 result
= gen_mi_iand(b
, gen_mi_nz(b
, result
), gen_mi_imm(1));
436 static struct pipe_query
*
437 iris_create_query(struct pipe_context
*ctx
,
441 struct iris_query
*q
= calloc(1, sizeof(struct iris_query
));
443 q
->type
= query_type
;
447 if (q
->type
== PIPE_QUERY_PIPELINE_STATISTICS_SINGLE
&&
448 q
->index
== PIPE_STAT_QUERY_CS_INVOCATIONS
)
449 q
->batch_idx
= IRIS_BATCH_COMPUTE
;
451 q
->batch_idx
= IRIS_BATCH_RENDER
;
452 return (struct pipe_query
*) q
;
455 static struct pipe_query
*
456 iris_create_batch_query(struct pipe_context
*ctx
,
457 unsigned num_queries
,
458 unsigned *query_types
)
460 struct iris_context
*ice
= (void *) ctx
;
461 struct iris_query
*q
= calloc(1, sizeof(struct iris_query
));
464 q
->type
= PIPE_QUERY_DRIVER_SPECIFIC
;
466 q
->monitor
= iris_create_monitor_object(ice
, num_queries
, query_types
);
467 if (unlikely(!q
->monitor
)) {
472 return (struct pipe_query
*) q
;
476 iris_destroy_query(struct pipe_context
*ctx
, struct pipe_query
*p_query
)
478 struct iris_query
*query
= (void *) p_query
;
479 struct iris_screen
*screen
= (void *) ctx
->screen
;
480 if (query
->monitor
) {
481 iris_destroy_monitor_object(ctx
, query
->monitor
);
482 query
->monitor
= NULL
;
484 iris_syncpt_reference(screen
, &query
->syncpt
, NULL
);
485 screen
->base
.fence_reference(ctx
->screen
, &query
->fence
, NULL
);
492 iris_begin_query(struct pipe_context
*ctx
, struct pipe_query
*query
)
494 struct iris_context
*ice
= (void *) ctx
;
495 struct iris_query
*q
= (void *) query
;
498 return iris_begin_monitor(ctx
, q
->monitor
);
503 if (q
->type
== PIPE_QUERY_SO_OVERFLOW_PREDICATE
||
504 q
->type
== PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE
)
505 size
= sizeof(struct iris_query_so_overflow
);
507 size
= sizeof(struct iris_query_snapshots
);
509 u_upload_alloc(ice
->query_buffer_uploader
, 0,
510 size
, size
, &q
->query_state_ref
.offset
,
511 &q
->query_state_ref
.res
, &ptr
);
513 if (!iris_resource_bo(q
->query_state_ref
.res
))
522 WRITE_ONCE(q
->map
->snapshots_landed
, false);
524 if (q
->type
== PIPE_QUERY_PRIMITIVES_GENERATED
&& q
->index
== 0) {
525 ice
->state
.prims_generated_query_active
= true;
526 ice
->state
.dirty
|= IRIS_DIRTY_STREAMOUT
| IRIS_DIRTY_CLIP
;
529 if (q
->type
== PIPE_QUERY_SO_OVERFLOW_PREDICATE
||
530 q
->type
== PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE
)
531 write_overflow_values(ice
, q
, false);
534 q
->query_state_ref
.offset
+
535 offsetof(struct iris_query_snapshots
, start
));
541 iris_end_query(struct pipe_context
*ctx
, struct pipe_query
*query
)
543 struct iris_context
*ice
= (void *) ctx
;
544 struct iris_query
*q
= (void *) query
;
547 return iris_end_monitor(ctx
, q
->monitor
);
549 if (q
->type
== PIPE_QUERY_GPU_FINISHED
) {
550 ctx
->flush(ctx
, &q
->fence
, PIPE_FLUSH_DEFERRED
);
554 struct iris_batch
*batch
= &ice
->batches
[q
->batch_idx
];
556 if (q
->type
== PIPE_QUERY_TIMESTAMP
) {
557 iris_begin_query(ctx
, query
);
558 iris_batch_reference_signal_syncpt(batch
, &q
->syncpt
);
559 mark_available(ice
, q
);
563 if (q
->type
== PIPE_QUERY_PRIMITIVES_GENERATED
&& q
->index
== 0) {
564 ice
->state
.prims_generated_query_active
= false;
565 ice
->state
.dirty
|= IRIS_DIRTY_STREAMOUT
| IRIS_DIRTY_CLIP
;
568 if (q
->type
== PIPE_QUERY_SO_OVERFLOW_PREDICATE
||
569 q
->type
== PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE
)
570 write_overflow_values(ice
, q
, true);
573 q
->query_state_ref
.offset
+
574 offsetof(struct iris_query_snapshots
, end
));
576 iris_batch_reference_signal_syncpt(batch
, &q
->syncpt
);
577 mark_available(ice
, q
);
583 * See if the snapshots have landed for a query, and if so, compute the
584 * result and mark it ready. Does not flush (unlike iris_get_query_result).
587 iris_check_query_no_flush(struct iris_context
*ice
, struct iris_query
*q
)
589 struct iris_screen
*screen
= (void *) ice
->ctx
.screen
;
590 const struct gen_device_info
*devinfo
= &screen
->devinfo
;
592 if (!q
->ready
&& READ_ONCE(q
->map
->snapshots_landed
)) {
593 calculate_result_on_cpu(devinfo
, q
);
598 iris_get_query_result(struct pipe_context
*ctx
,
599 struct pipe_query
*query
,
601 union pipe_query_result
*result
)
603 struct iris_context
*ice
= (void *) ctx
;
604 struct iris_query
*q
= (void *) query
;
607 return iris_get_monitor_result(ctx
, q
->monitor
, wait
, result
->batch
);
609 struct iris_screen
*screen
= (void *) ctx
->screen
;
610 const struct gen_device_info
*devinfo
= &screen
->devinfo
;
612 if (unlikely(screen
->no_hw
)) {
617 if (q
->type
== PIPE_QUERY_GPU_FINISHED
) {
618 struct pipe_screen
*screen
= ctx
->screen
;
620 result
->b
= screen
->fence_finish(screen
, ctx
, q
->fence
,
621 wait
? PIPE_TIMEOUT_INFINITE
: 0);
626 struct iris_batch
*batch
= &ice
->batches
[q
->batch_idx
];
627 if (q
->syncpt
== iris_batch_get_signal_syncpt(batch
))
628 iris_batch_flush(batch
);
630 while (!READ_ONCE(q
->map
->snapshots_landed
)) {
632 iris_wait_syncpt(ctx
->screen
, q
->syncpt
, INT64_MAX
);
637 assert(READ_ONCE(q
->map
->snapshots_landed
));
638 calculate_result_on_cpu(devinfo
, q
);
643 result
->u64
= q
->result
;
649 iris_get_query_result_resource(struct pipe_context
*ctx
,
650 struct pipe_query
*query
,
652 enum pipe_query_value_type result_type
,
654 struct pipe_resource
*p_res
,
657 struct iris_context
*ice
= (void *) ctx
;
658 struct iris_query
*q
= (void *) query
;
659 struct iris_batch
*batch
= &ice
->batches
[q
->batch_idx
];
660 const struct gen_device_info
*devinfo
= &batch
->screen
->devinfo
;
661 struct iris_resource
*res
= (void *) p_res
;
662 struct iris_bo
*query_bo
= iris_resource_bo(q
->query_state_ref
.res
);
663 struct iris_bo
*dst_bo
= iris_resource_bo(p_res
);
664 unsigned snapshots_landed_offset
=
665 offsetof(struct iris_query_snapshots
, snapshots_landed
);
667 res
->bind_history
|= PIPE_BIND_QUERY_BUFFER
;
670 /* They're asking for the availability of the result. If we still
671 * have commands queued up which produce the result, submit them
672 * now so that progress happens. Either way, copy the snapshots
673 * landed field to the destination resource.
675 if (q
->syncpt
== iris_batch_get_signal_syncpt(batch
))
676 iris_batch_flush(batch
);
678 ice
->vtbl
.copy_mem_mem(batch
, dst_bo
, offset
,
679 query_bo
, snapshots_landed_offset
,
680 result_type
<= PIPE_QUERY_TYPE_U32
? 4 : 8);
684 if (!q
->ready
&& READ_ONCE(q
->map
->snapshots_landed
)) {
685 /* The final snapshots happen to have landed, so let's just compute
686 * the result on the CPU now...
688 calculate_result_on_cpu(devinfo
, q
);
692 /* We happen to have the result on the CPU, so just copy it. */
693 if (result_type
<= PIPE_QUERY_TYPE_U32
) {
694 ice
->vtbl
.store_data_imm32(batch
, dst_bo
, offset
, q
->result
);
696 ice
->vtbl
.store_data_imm64(batch
, dst_bo
, offset
, q
->result
);
699 /* Make sure the result lands before they use bind the QBO elsewhere
700 * and use the result.
702 // XXX: Why? i965 doesn't do this.
703 iris_emit_pipe_control_flush(batch
,
704 "query: unknown QBO flushing hack",
705 PIPE_CONTROL_CS_STALL
);
709 bool predicated
= !wait
&& !q
->stalled
;
711 struct gen_mi_builder b
;
712 gen_mi_builder_init(&b
, batch
);
714 struct gen_mi_value result
= calculate_result_on_gpu(devinfo
, &b
, q
);
715 struct gen_mi_value dst
=
716 result_type
<= PIPE_QUERY_TYPE_U32
? gen_mi_mem32(rw_bo(dst_bo
, offset
))
717 : gen_mi_mem64(rw_bo(dst_bo
, offset
));
720 gen_mi_store(&b
, gen_mi_reg32(MI_PREDICATE_RESULT
),
721 gen_mi_mem64(ro_bo(query_bo
, snapshots_landed_offset
)));
722 gen_mi_store_if(&b
, dst
, result
);
724 gen_mi_store(&b
, dst
, result
);
729 iris_set_active_query_state(struct pipe_context
*ctx
, bool enable
)
731 struct iris_context
*ice
= (void *) ctx
;
733 if (ice
->state
.statistics_counters_enabled
== enable
)
736 // XXX: most packets aren't paying attention to this yet, because it'd
737 // have to be done dynamically at draw time, which is a pain
738 ice
->state
.statistics_counters_enabled
= enable
;
739 ice
->state
.dirty
|= IRIS_DIRTY_CLIP
|
742 IRIS_DIRTY_STREAMOUT
|
750 set_predicate_enable(struct iris_context
*ice
, bool value
)
753 ice
->state
.predicate
= IRIS_PREDICATE_STATE_RENDER
;
755 ice
->state
.predicate
= IRIS_PREDICATE_STATE_DONT_RENDER
;
759 set_predicate_for_result(struct iris_context
*ice
,
760 struct iris_query
*q
,
763 struct iris_batch
*batch
= &ice
->batches
[IRIS_BATCH_RENDER
];
764 struct iris_bo
*bo
= iris_resource_bo(q
->query_state_ref
.res
);
766 /* The CPU doesn't have the query result yet; use hardware predication */
767 ice
->state
.predicate
= IRIS_PREDICATE_STATE_USE_BIT
;
769 /* Ensure the memory is coherent for MI_LOAD_REGISTER_* commands. */
770 iris_emit_pipe_control_flush(batch
,
771 "conditional rendering: set predicate",
772 PIPE_CONTROL_FLUSH_ENABLE
);
775 struct gen_mi_builder b
;
776 gen_mi_builder_init(&b
, batch
);
778 struct gen_mi_value result
;
781 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
782 result
= calc_overflow_for_stream(&b
, q
, q
->index
);
784 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE
:
785 result
= calc_overflow_any_stream(&b
, q
);
788 /* PIPE_QUERY_OCCLUSION_* */
789 struct gen_mi_value start
=
790 query_mem64(q
, offsetof(struct iris_query_snapshots
, start
));
791 struct gen_mi_value end
=
792 query_mem64(q
, offsetof(struct iris_query_snapshots
, end
));
793 result
= gen_mi_isub(&b
, end
, start
);
798 result
= inverted
? gen_mi_z(&b
, result
) : gen_mi_nz(&b
, result
);
799 result
= gen_mi_iand(&b
, result
, gen_mi_imm(1));
801 /* We immediately set the predicate on the render batch, as all the
802 * counters come from 3D operations. However, we may need to predicate
803 * a compute dispatch, which executes in a different GEM context and has
804 * a different MI_PREDICATE_RESULT register. So, we save the result to
805 * memory and reload it in iris_launch_grid.
807 gen_mi_value_ref(&b
, result
);
808 gen_mi_store(&b
, gen_mi_reg32(MI_PREDICATE_RESULT
), result
);
809 gen_mi_store(&b
, query_mem64(q
, offsetof(struct iris_query_snapshots
,
810 predicate_result
)), result
);
811 ice
->state
.compute_predicate
= bo
;
815 iris_render_condition(struct pipe_context
*ctx
,
816 struct pipe_query
*query
,
818 enum pipe_render_cond_flag mode
)
820 struct iris_context
*ice
= (void *) ctx
;
821 struct iris_query
*q
= (void *) query
;
823 /* The old condition isn't relevant; we'll update it if necessary */
824 ice
->state
.compute_predicate
= NULL
;
825 ice
->condition
.query
= q
;
826 ice
->condition
.condition
= condition
;
829 ice
->state
.predicate
= IRIS_PREDICATE_STATE_RENDER
;
833 iris_check_query_no_flush(ice
, q
);
835 if (q
->result
|| q
->ready
) {
836 set_predicate_enable(ice
, (q
->result
!= 0) ^ condition
);
838 if (mode
== PIPE_RENDER_COND_NO_WAIT
||
839 mode
== PIPE_RENDER_COND_BY_REGION_NO_WAIT
) {
840 perf_debug(&ice
->dbg
, "Conditional rendering demoted from "
841 "\"no wait\" to \"wait\".");
843 set_predicate_for_result(ice
, q
, condition
);
848 iris_resolve_conditional_render(struct iris_context
*ice
)
850 struct pipe_context
*ctx
= (void *) ice
;
851 struct iris_query
*q
= ice
->condition
.query
;
852 struct pipe_query
*query
= (void *) q
;
853 union pipe_query_result result
;
855 if (ice
->state
.predicate
!= IRIS_PREDICATE_STATE_USE_BIT
)
860 iris_get_query_result(ctx
, query
, true, &result
);
861 set_predicate_enable(ice
, (q
->result
!= 0) ^ ice
->condition
.condition
);
865 genX(init_query
)(struct iris_context
*ice
)
867 struct pipe_context
*ctx
= &ice
->ctx
;
869 ctx
->create_query
= iris_create_query
;
870 ctx
->create_batch_query
= iris_create_batch_query
;
871 ctx
->destroy_query
= iris_destroy_query
;
872 ctx
->begin_query
= iris_begin_query
;
873 ctx
->end_query
= iris_end_query
;
874 ctx
->get_query_result
= iris_get_query_result
;
875 ctx
->get_query_result_resource
= iris_get_query_result_resource
;
876 ctx
->set_active_query_state
= iris_set_active_query_state
;
877 ctx
->render_condition
= iris_render_condition
;
879 ice
->vtbl
.resolve_conditional_render
= iris_resolve_conditional_render
;