2 * Copyright © 2017 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
26 * ============================= GENXML CODE =============================
27 * [This file is compiled once per generation.]
28 * =======================================================================
30 * Query object support. This allows measuring various simple statistics
31 * via counters on the GPU. We use GenX code for MI_MATH calculations.
36 #include "perf/gen_perf.h"
37 #include "pipe/p_defines.h"
38 #include "pipe/p_state.h"
39 #include "pipe/p_context.h"
40 #include "pipe/p_screen.h"
41 #include "util/u_inlines.h"
42 #include "util/u_upload_mgr.h"
43 #include "iris_context.h"
44 #include "iris_defines.h"
45 #include "iris_fence.h"
46 #include "iris_monitor.h"
47 #include "iris_resource.h"
48 #include "iris_screen.h"
50 #include "iris_genx_macros.h"
52 #define SO_PRIM_STORAGE_NEEDED(n) (GENX(SO_PRIM_STORAGE_NEEDED0_num) + (n) * 8)
53 #define SO_NUM_PRIMS_WRITTEN(n) (GENX(SO_NUM_PRIMS_WRITTEN0_num) + (n) * 8)
56 enum pipe_query_type type
;
65 struct iris_state_ref query_state_ref
;
66 struct iris_query_snapshots
*map
;
67 struct iris_syncpt
*syncpt
;
71 struct iris_monitor_object
*monitor
;
73 /* Fence for PIPE_QUERY_GPU_FINISHED. */
74 struct pipe_fence_handle
*fence
;
77 struct iris_query_snapshots
{
78 /** iris_render_condition's saved MI_PREDICATE_RESULT value. */
79 uint64_t predicate_result
;
81 /** Have the start/end snapshots landed? */
82 uint64_t snapshots_landed
;
84 /** Starting and ending counter snapshots */
89 struct iris_query_so_overflow
{
90 uint64_t predicate_result
;
91 uint64_t snapshots_landed
;
94 uint64_t prim_storage_needed
[2];
95 uint64_t num_prims
[2];
99 static struct gen_mi_value
100 query_mem64(struct iris_query
*q
, uint32_t offset
)
102 struct iris_address addr
= {
103 .bo
= iris_resource_bo(q
->query_state_ref
.res
),
104 .offset
= q
->query_state_ref
.offset
+ offset
,
107 return gen_mi_mem64(addr
);
111 * Is this type of query written by PIPE_CONTROL?
114 iris_is_query_pipelined(struct iris_query
*q
)
117 case PIPE_QUERY_OCCLUSION_COUNTER
:
118 case PIPE_QUERY_OCCLUSION_PREDICATE
:
119 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE
:
120 case PIPE_QUERY_TIMESTAMP
:
121 case PIPE_QUERY_TIMESTAMP_DISJOINT
:
122 case PIPE_QUERY_TIME_ELAPSED
:
131 mark_available(struct iris_context
*ice
, struct iris_query
*q
)
133 struct iris_batch
*batch
= &ice
->batches
[q
->batch_idx
];
134 unsigned flags
= PIPE_CONTROL_WRITE_IMMEDIATE
;
135 unsigned offset
= offsetof(struct iris_query_snapshots
, snapshots_landed
);
136 struct iris_bo
*bo
= iris_resource_bo(q
->query_state_ref
.res
);
137 offset
+= q
->query_state_ref
.offset
;
139 if (!iris_is_query_pipelined(q
)) {
140 ice
->vtbl
.store_data_imm64(batch
, bo
, offset
, true);
142 /* Order available *after* the query results. */
143 flags
|= PIPE_CONTROL_FLUSH_ENABLE
;
144 iris_emit_pipe_control_write(batch
, "query: mark available",
145 flags
, bo
, offset
, true);
150 * Write PS_DEPTH_COUNT to q->(dest) via a PIPE_CONTROL.
153 iris_pipelined_write(struct iris_batch
*batch
,
154 struct iris_query
*q
,
155 enum pipe_control_flags flags
,
158 const struct gen_device_info
*devinfo
= &batch
->screen
->devinfo
;
159 const unsigned optional_cs_stall
=
160 GEN_GEN
== 9 && devinfo
->gt
== 4 ? PIPE_CONTROL_CS_STALL
: 0;
161 struct iris_bo
*bo
= iris_resource_bo(q
->query_state_ref
.res
);
163 iris_emit_pipe_control_write(batch
, "query: pipelined snapshot write",
164 flags
| optional_cs_stall
,
169 write_value(struct iris_context
*ice
, struct iris_query
*q
, unsigned offset
)
171 struct iris_batch
*batch
= &ice
->batches
[q
->batch_idx
];
172 struct iris_bo
*bo
= iris_resource_bo(q
->query_state_ref
.res
);
174 if (!iris_is_query_pipelined(q
)) {
175 iris_emit_pipe_control_flush(batch
,
176 "query: non-pipelined snapshot write",
177 PIPE_CONTROL_CS_STALL
|
178 PIPE_CONTROL_STALL_AT_SCOREBOARD
);
183 case PIPE_QUERY_OCCLUSION_COUNTER
:
184 case PIPE_QUERY_OCCLUSION_PREDICATE
:
185 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE
:
187 /* "Driver must program PIPE_CONTROL with only Depth Stall Enable
188 * bit set prior to programming a PIPE_CONTROL with Write PS Depth
189 * Count sync operation."
191 iris_emit_pipe_control_flush(batch
,
192 "workaround: depth stall before writing "
194 PIPE_CONTROL_DEPTH_STALL
);
196 iris_pipelined_write(&ice
->batches
[IRIS_BATCH_RENDER
], q
,
197 PIPE_CONTROL_WRITE_DEPTH_COUNT
|
198 PIPE_CONTROL_DEPTH_STALL
,
201 case PIPE_QUERY_TIME_ELAPSED
:
202 case PIPE_QUERY_TIMESTAMP
:
203 case PIPE_QUERY_TIMESTAMP_DISJOINT
:
204 iris_pipelined_write(&ice
->batches
[IRIS_BATCH_RENDER
], q
,
205 PIPE_CONTROL_WRITE_TIMESTAMP
,
208 case PIPE_QUERY_PRIMITIVES_GENERATED
:
209 ice
->vtbl
.store_register_mem64(batch
,
211 GENX(CL_INVOCATION_COUNT_num
) :
212 SO_PRIM_STORAGE_NEEDED(q
->index
),
215 case PIPE_QUERY_PRIMITIVES_EMITTED
:
216 ice
->vtbl
.store_register_mem64(batch
,
217 SO_NUM_PRIMS_WRITTEN(q
->index
),
220 case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE
: {
221 static const uint32_t index_to_reg
[] = {
222 GENX(IA_VERTICES_COUNT_num
),
223 GENX(IA_PRIMITIVES_COUNT_num
),
224 GENX(VS_INVOCATION_COUNT_num
),
225 GENX(GS_INVOCATION_COUNT_num
),
226 GENX(GS_PRIMITIVES_COUNT_num
),
227 GENX(CL_INVOCATION_COUNT_num
),
228 GENX(CL_PRIMITIVES_COUNT_num
),
229 GENX(PS_INVOCATION_COUNT_num
),
230 GENX(HS_INVOCATION_COUNT_num
),
231 GENX(DS_INVOCATION_COUNT_num
),
232 GENX(CS_INVOCATION_COUNT_num
),
234 const uint32_t reg
= index_to_reg
[q
->index
];
236 ice
->vtbl
.store_register_mem64(batch
, reg
, bo
, offset
, false);
245 write_overflow_values(struct iris_context
*ice
, struct iris_query
*q
, bool end
)
247 struct iris_batch
*batch
= &ice
->batches
[IRIS_BATCH_RENDER
];
248 uint32_t count
= q
->type
== PIPE_QUERY_SO_OVERFLOW_PREDICATE
? 1 : 4;
249 struct iris_bo
*bo
= iris_resource_bo(q
->query_state_ref
.res
);
250 uint32_t offset
= q
->query_state_ref
.offset
;
252 iris_emit_pipe_control_flush(batch
,
253 "query: write SO overflow snapshots",
254 PIPE_CONTROL_CS_STALL
|
255 PIPE_CONTROL_STALL_AT_SCOREBOARD
);
256 for (uint32_t i
= 0; i
< count
; i
++) {
257 int s
= q
->index
+ i
;
258 int g_idx
= offset
+ offsetof(struct iris_query_so_overflow
,
259 stream
[s
].num_prims
[end
]);
260 int w_idx
= offset
+ offsetof(struct iris_query_so_overflow
,
261 stream
[s
].prim_storage_needed
[end
]);
262 ice
->vtbl
.store_register_mem64(batch
, SO_NUM_PRIMS_WRITTEN(s
),
264 ice
->vtbl
.store_register_mem64(batch
, SO_PRIM_STORAGE_NEEDED(s
),
270 iris_raw_timestamp_delta(uint64_t time0
, uint64_t time1
)
273 return (1ULL << TIMESTAMP_BITS
) + time1
- time0
;
275 return time1
- time0
;
280 stream_overflowed(struct iris_query_so_overflow
*so
, int s
)
282 return (so
->stream
[s
].prim_storage_needed
[1] -
283 so
->stream
[s
].prim_storage_needed
[0]) !=
284 (so
->stream
[s
].num_prims
[1] - so
->stream
[s
].num_prims
[0]);
288 calculate_result_on_cpu(const struct gen_device_info
*devinfo
,
289 struct iris_query
*q
)
292 case PIPE_QUERY_OCCLUSION_PREDICATE
:
293 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE
:
294 q
->result
= q
->map
->end
!= q
->map
->start
;
296 case PIPE_QUERY_TIMESTAMP
:
297 case PIPE_QUERY_TIMESTAMP_DISJOINT
:
298 /* The timestamp is the single starting snapshot. */
299 q
->result
= gen_device_info_timebase_scale(devinfo
, q
->map
->start
);
300 q
->result
&= (1ull << TIMESTAMP_BITS
) - 1;
302 case PIPE_QUERY_TIME_ELAPSED
:
303 q
->result
= iris_raw_timestamp_delta(q
->map
->start
, q
->map
->end
);
304 q
->result
= gen_device_info_timebase_scale(devinfo
, q
->result
);
305 q
->result
&= (1ull << TIMESTAMP_BITS
) - 1;
307 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
308 q
->result
= stream_overflowed((void *) q
->map
, q
->index
);
310 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE
:
312 for (int i
= 0; i
< MAX_VERTEX_STREAMS
; i
++)
313 q
->result
|= stream_overflowed((void *) q
->map
, i
);
315 case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE
:
316 q
->result
= q
->map
->end
- q
->map
->start
;
318 /* WaDividePSInvocationCountBy4:HSW,BDW */
319 if (GEN_GEN
== 8 && q
->index
== PIPE_STAT_QUERY_PS_INVOCATIONS
)
322 case PIPE_QUERY_OCCLUSION_COUNTER
:
323 case PIPE_QUERY_PRIMITIVES_GENERATED
:
324 case PIPE_QUERY_PRIMITIVES_EMITTED
:
326 q
->result
= q
->map
->end
- q
->map
->start
;
334 * Calculate the streamout overflow for stream \p idx:
336 * (num_prims[1] - num_prims[0]) - (storage_needed[1] - storage_needed[0])
338 static struct gen_mi_value
339 calc_overflow_for_stream(struct gen_mi_builder
*b
,
340 struct iris_query
*q
,
343 #define C(counter, i) query_mem64(q, \
344 offsetof(struct iris_query_so_overflow, stream[idx].counter[i]))
346 return gen_mi_isub(b
, gen_mi_isub(b
, C(num_prims
, 1), C(num_prims
, 0)),
347 gen_mi_isub(b
, C(prim_storage_needed
, 1),
348 C(prim_storage_needed
, 0)));
353 * Calculate whether any stream has overflowed.
355 static struct gen_mi_value
356 calc_overflow_any_stream(struct gen_mi_builder
*b
, struct iris_query
*q
)
358 struct gen_mi_value stream_result
[MAX_VERTEX_STREAMS
];
359 for (int i
= 0; i
< MAX_VERTEX_STREAMS
; i
++)
360 stream_result
[i
] = calc_overflow_for_stream(b
, q
, i
);
362 struct gen_mi_value result
= stream_result
[0];
363 for (int i
= 1; i
< MAX_VERTEX_STREAMS
; i
++)
364 result
= gen_mi_ior(b
, result
, stream_result
[i
]);
370 query_is_boolean(enum pipe_query_type type
)
373 case PIPE_QUERY_OCCLUSION_PREDICATE
:
374 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE
:
375 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
376 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE
:
384 * Calculate the result using MI_MATH.
386 static struct gen_mi_value
387 calculate_result_on_gpu(const struct gen_device_info
*devinfo
,
388 struct gen_mi_builder
*b
,
389 struct iris_query
*q
)
391 struct gen_mi_value result
;
392 struct gen_mi_value start_val
=
393 query_mem64(q
, offsetof(struct iris_query_snapshots
, start
));
394 struct gen_mi_value end_val
=
395 query_mem64(q
, offsetof(struct iris_query_snapshots
, end
));
398 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
399 result
= calc_overflow_for_stream(b
, q
, q
->index
);
401 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE
:
402 result
= calc_overflow_any_stream(b
, q
);
404 case PIPE_QUERY_TIMESTAMP
: {
405 /* TODO: This discards any fractional bits of the timebase scale.
406 * We would need to do a bit of fixed point math on the CS ALU, or
407 * launch an actual shader to calculate this with full precision.
409 uint32_t scale
= 1000000000ull / devinfo
->timestamp_frequency
;
410 result
= gen_mi_iand(b
, gen_mi_imm((1ull << 36) - 1),
411 gen_mi_imul_imm(b
, start_val
, scale
));
414 case PIPE_QUERY_TIME_ELAPSED
: {
415 /* TODO: This discards fractional bits (see above). */
416 uint32_t scale
= 1000000000ull / devinfo
->timestamp_frequency
;
417 result
= gen_mi_imul_imm(b
, gen_mi_isub(b
, end_val
, start_val
), scale
);
421 result
= gen_mi_isub(b
, end_val
, start_val
);
425 /* WaDividePSInvocationCountBy4:HSW,BDW */
427 q
->type
== PIPE_QUERY_PIPELINE_STATISTICS_SINGLE
&&
428 q
->index
== PIPE_STAT_QUERY_PS_INVOCATIONS
)
429 result
= gen_mi_ushr32_imm(b
, result
, 2);
431 if (query_is_boolean(q
->type
))
432 result
= gen_mi_iand(b
, gen_mi_nz(b
, result
), gen_mi_imm(1));
437 static struct pipe_query
*
438 iris_create_query(struct pipe_context
*ctx
,
442 struct iris_query
*q
= calloc(1, sizeof(struct iris_query
));
444 q
->type
= query_type
;
448 if (q
->type
== PIPE_QUERY_PIPELINE_STATISTICS_SINGLE
&&
449 q
->index
== PIPE_STAT_QUERY_CS_INVOCATIONS
)
450 q
->batch_idx
= IRIS_BATCH_COMPUTE
;
452 q
->batch_idx
= IRIS_BATCH_RENDER
;
453 return (struct pipe_query
*) q
;
456 static struct pipe_query
*
457 iris_create_batch_query(struct pipe_context
*ctx
,
458 unsigned num_queries
,
459 unsigned *query_types
)
461 struct iris_context
*ice
= (void *) ctx
;
462 struct iris_query
*q
= calloc(1, sizeof(struct iris_query
));
465 q
->type
= PIPE_QUERY_DRIVER_SPECIFIC
;
467 q
->monitor
= iris_create_monitor_object(ice
, num_queries
, query_types
);
468 if (unlikely(!q
->monitor
)) {
473 return (struct pipe_query
*) q
;
477 iris_destroy_query(struct pipe_context
*ctx
, struct pipe_query
*p_query
)
479 struct iris_query
*query
= (void *) p_query
;
480 struct iris_screen
*screen
= (void *) ctx
->screen
;
481 if (query
->monitor
) {
482 iris_destroy_monitor_object(ctx
, query
->monitor
);
483 query
->monitor
= NULL
;
485 iris_syncpt_reference(screen
, &query
->syncpt
, NULL
);
486 screen
->base
.fence_reference(ctx
->screen
, &query
->fence
, NULL
);
493 iris_begin_query(struct pipe_context
*ctx
, struct pipe_query
*query
)
495 struct iris_context
*ice
= (void *) ctx
;
496 struct iris_query
*q
= (void *) query
;
499 return iris_begin_monitor(ctx
, q
->monitor
);
504 if (q
->type
== PIPE_QUERY_SO_OVERFLOW_PREDICATE
||
505 q
->type
== PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE
)
506 size
= sizeof(struct iris_query_so_overflow
);
508 size
= sizeof(struct iris_query_snapshots
);
510 u_upload_alloc(ice
->query_buffer_uploader
, 0,
511 size
, size
, &q
->query_state_ref
.offset
,
512 &q
->query_state_ref
.res
, &ptr
);
514 if (!iris_resource_bo(q
->query_state_ref
.res
))
523 WRITE_ONCE(q
->map
->snapshots_landed
, false);
525 if (q
->type
== PIPE_QUERY_PRIMITIVES_GENERATED
&& q
->index
== 0) {
526 ice
->state
.prims_generated_query_active
= true;
527 ice
->state
.dirty
|= IRIS_DIRTY_STREAMOUT
| IRIS_DIRTY_CLIP
;
530 if (q
->type
== PIPE_QUERY_SO_OVERFLOW_PREDICATE
||
531 q
->type
== PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE
)
532 write_overflow_values(ice
, q
, false);
535 q
->query_state_ref
.offset
+
536 offsetof(struct iris_query_snapshots
, start
));
542 iris_end_query(struct pipe_context
*ctx
, struct pipe_query
*query
)
544 struct iris_context
*ice
= (void *) ctx
;
545 struct iris_query
*q
= (void *) query
;
548 return iris_end_monitor(ctx
, q
->monitor
);
550 if (q
->type
== PIPE_QUERY_GPU_FINISHED
) {
551 ctx
->flush(ctx
, &q
->fence
, PIPE_FLUSH_DEFERRED
);
555 struct iris_batch
*batch
= &ice
->batches
[q
->batch_idx
];
557 if (q
->type
== PIPE_QUERY_TIMESTAMP
) {
558 iris_begin_query(ctx
, query
);
559 iris_batch_reference_signal_syncpt(batch
, &q
->syncpt
);
560 mark_available(ice
, q
);
564 if (q
->type
== PIPE_QUERY_PRIMITIVES_GENERATED
&& q
->index
== 0) {
565 ice
->state
.prims_generated_query_active
= false;
566 ice
->state
.dirty
|= IRIS_DIRTY_STREAMOUT
| IRIS_DIRTY_CLIP
;
569 if (q
->type
== PIPE_QUERY_SO_OVERFLOW_PREDICATE
||
570 q
->type
== PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE
)
571 write_overflow_values(ice
, q
, true);
574 q
->query_state_ref
.offset
+
575 offsetof(struct iris_query_snapshots
, end
));
577 iris_batch_reference_signal_syncpt(batch
, &q
->syncpt
);
578 mark_available(ice
, q
);
584 * See if the snapshots have landed for a query, and if so, compute the
585 * result and mark it ready. Does not flush (unlike iris_get_query_result).
588 iris_check_query_no_flush(struct iris_context
*ice
, struct iris_query
*q
)
590 struct iris_screen
*screen
= (void *) ice
->ctx
.screen
;
591 const struct gen_device_info
*devinfo
= &screen
->devinfo
;
593 if (!q
->ready
&& READ_ONCE(q
->map
->snapshots_landed
)) {
594 calculate_result_on_cpu(devinfo
, q
);
599 iris_get_query_result(struct pipe_context
*ctx
,
600 struct pipe_query
*query
,
602 union pipe_query_result
*result
)
604 struct iris_context
*ice
= (void *) ctx
;
605 struct iris_query
*q
= (void *) query
;
608 return iris_get_monitor_result(ctx
, q
->monitor
, wait
, result
->batch
);
610 struct iris_screen
*screen
= (void *) ctx
->screen
;
611 const struct gen_device_info
*devinfo
= &screen
->devinfo
;
613 if (unlikely(screen
->no_hw
)) {
618 if (q
->type
== PIPE_QUERY_GPU_FINISHED
) {
619 struct pipe_screen
*screen
= ctx
->screen
;
621 result
->b
= screen
->fence_finish(screen
, ctx
, q
->fence
,
622 wait
? PIPE_TIMEOUT_INFINITE
: 0);
627 struct iris_batch
*batch
= &ice
->batches
[q
->batch_idx
];
628 if (q
->syncpt
== iris_batch_get_signal_syncpt(batch
))
629 iris_batch_flush(batch
);
631 while (!READ_ONCE(q
->map
->snapshots_landed
)) {
633 iris_wait_syncpt(ctx
->screen
, q
->syncpt
, INT64_MAX
);
638 assert(READ_ONCE(q
->map
->snapshots_landed
));
639 calculate_result_on_cpu(devinfo
, q
);
644 result
->u64
= q
->result
;
650 iris_get_query_result_resource(struct pipe_context
*ctx
,
651 struct pipe_query
*query
,
653 enum pipe_query_value_type result_type
,
655 struct pipe_resource
*p_res
,
658 struct iris_context
*ice
= (void *) ctx
;
659 struct iris_query
*q
= (void *) query
;
660 struct iris_batch
*batch
= &ice
->batches
[q
->batch_idx
];
661 const struct gen_device_info
*devinfo
= &batch
->screen
->devinfo
;
662 struct iris_resource
*res
= (void *) p_res
;
663 struct iris_bo
*query_bo
= iris_resource_bo(q
->query_state_ref
.res
);
664 struct iris_bo
*dst_bo
= iris_resource_bo(p_res
);
665 unsigned snapshots_landed_offset
=
666 offsetof(struct iris_query_snapshots
, snapshots_landed
);
668 res
->bind_history
|= PIPE_BIND_QUERY_BUFFER
;
671 /* They're asking for the availability of the result. If we still
672 * have commands queued up which produce the result, submit them
673 * now so that progress happens. Either way, copy the snapshots
674 * landed field to the destination resource.
676 if (q
->syncpt
== iris_batch_get_signal_syncpt(batch
))
677 iris_batch_flush(batch
);
679 ice
->vtbl
.copy_mem_mem(batch
, dst_bo
, offset
,
680 query_bo
, snapshots_landed_offset
,
681 result_type
<= PIPE_QUERY_TYPE_U32
? 4 : 8);
685 if (!q
->ready
&& READ_ONCE(q
->map
->snapshots_landed
)) {
686 /* The final snapshots happen to have landed, so let's just compute
687 * the result on the CPU now...
689 calculate_result_on_cpu(devinfo
, q
);
693 /* We happen to have the result on the CPU, so just copy it. */
694 if (result_type
<= PIPE_QUERY_TYPE_U32
) {
695 ice
->vtbl
.store_data_imm32(batch
, dst_bo
, offset
, q
->result
);
697 ice
->vtbl
.store_data_imm64(batch
, dst_bo
, offset
, q
->result
);
700 /* Make sure the result lands before they use bind the QBO elsewhere
701 * and use the result.
703 // XXX: Why? i965 doesn't do this.
704 iris_emit_pipe_control_flush(batch
,
705 "query: unknown QBO flushing hack",
706 PIPE_CONTROL_CS_STALL
);
710 bool predicated
= !wait
&& !q
->stalled
;
712 struct gen_mi_builder b
;
713 gen_mi_builder_init(&b
, batch
);
715 struct gen_mi_value result
= calculate_result_on_gpu(devinfo
, &b
, q
);
716 struct gen_mi_value dst
=
717 result_type
<= PIPE_QUERY_TYPE_U32
? gen_mi_mem32(rw_bo(dst_bo
, offset
))
718 : gen_mi_mem64(rw_bo(dst_bo
, offset
));
721 gen_mi_store(&b
, gen_mi_reg32(MI_PREDICATE_RESULT
),
722 gen_mi_mem64(ro_bo(query_bo
, snapshots_landed_offset
)));
723 gen_mi_store_if(&b
, dst
, result
);
725 gen_mi_store(&b
, dst
, result
);
730 iris_set_active_query_state(struct pipe_context
*ctx
, bool enable
)
732 struct iris_context
*ice
= (void *) ctx
;
734 if (ice
->state
.statistics_counters_enabled
== enable
)
737 // XXX: most packets aren't paying attention to this yet, because it'd
738 // have to be done dynamically at draw time, which is a pain
739 ice
->state
.statistics_counters_enabled
= enable
;
740 ice
->state
.dirty
|= IRIS_DIRTY_CLIP
|
743 IRIS_DIRTY_STREAMOUT
|
751 set_predicate_enable(struct iris_context
*ice
, bool value
)
754 ice
->state
.predicate
= IRIS_PREDICATE_STATE_RENDER
;
756 ice
->state
.predicate
= IRIS_PREDICATE_STATE_DONT_RENDER
;
760 set_predicate_for_result(struct iris_context
*ice
,
761 struct iris_query
*q
,
764 struct iris_batch
*batch
= &ice
->batches
[IRIS_BATCH_RENDER
];
765 struct iris_bo
*bo
= iris_resource_bo(q
->query_state_ref
.res
);
767 /* The CPU doesn't have the query result yet; use hardware predication */
768 ice
->state
.predicate
= IRIS_PREDICATE_STATE_USE_BIT
;
770 /* Ensure the memory is coherent for MI_LOAD_REGISTER_* commands. */
771 iris_emit_pipe_control_flush(batch
,
772 "conditional rendering: set predicate",
773 PIPE_CONTROL_FLUSH_ENABLE
);
776 struct gen_mi_builder b
;
777 gen_mi_builder_init(&b
, batch
);
779 struct gen_mi_value result
;
782 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
783 result
= calc_overflow_for_stream(&b
, q
, q
->index
);
785 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE
:
786 result
= calc_overflow_any_stream(&b
, q
);
789 /* PIPE_QUERY_OCCLUSION_* */
790 struct gen_mi_value start
=
791 query_mem64(q
, offsetof(struct iris_query_snapshots
, start
));
792 struct gen_mi_value end
=
793 query_mem64(q
, offsetof(struct iris_query_snapshots
, end
));
794 result
= gen_mi_isub(&b
, end
, start
);
799 result
= inverted
? gen_mi_z(&b
, result
) : gen_mi_nz(&b
, result
);
800 result
= gen_mi_iand(&b
, result
, gen_mi_imm(1));
802 /* We immediately set the predicate on the render batch, as all the
803 * counters come from 3D operations. However, we may need to predicate
804 * a compute dispatch, which executes in a different GEM context and has
805 * a different MI_PREDICATE_RESULT register. So, we save the result to
806 * memory and reload it in iris_launch_grid.
808 gen_mi_value_ref(&b
, result
);
809 gen_mi_store(&b
, gen_mi_reg32(MI_PREDICATE_RESULT
), result
);
810 gen_mi_store(&b
, query_mem64(q
, offsetof(struct iris_query_snapshots
,
811 predicate_result
)), result
);
812 ice
->state
.compute_predicate
= bo
;
816 iris_render_condition(struct pipe_context
*ctx
,
817 struct pipe_query
*query
,
819 enum pipe_render_cond_flag mode
)
821 struct iris_context
*ice
= (void *) ctx
;
822 struct iris_query
*q
= (void *) query
;
824 /* The old condition isn't relevant; we'll update it if necessary */
825 ice
->state
.compute_predicate
= NULL
;
826 ice
->condition
.query
= q
;
827 ice
->condition
.condition
= condition
;
830 ice
->state
.predicate
= IRIS_PREDICATE_STATE_RENDER
;
834 iris_check_query_no_flush(ice
, q
);
836 if (q
->result
|| q
->ready
) {
837 set_predicate_enable(ice
, (q
->result
!= 0) ^ condition
);
839 if (mode
== PIPE_RENDER_COND_NO_WAIT
||
840 mode
== PIPE_RENDER_COND_BY_REGION_NO_WAIT
) {
841 perf_debug(&ice
->dbg
, "Conditional rendering demoted from "
842 "\"no wait\" to \"wait\".");
844 set_predicate_for_result(ice
, q
, condition
);
849 iris_resolve_conditional_render(struct iris_context
*ice
)
851 struct pipe_context
*ctx
= (void *) ice
;
852 struct iris_query
*q
= ice
->condition
.query
;
853 struct pipe_query
*query
= (void *) q
;
854 union pipe_query_result result
;
856 if (ice
->state
.predicate
!= IRIS_PREDICATE_STATE_USE_BIT
)
861 iris_get_query_result(ctx
, query
, true, &result
);
862 set_predicate_enable(ice
, (q
->result
!= 0) ^ ice
->condition
.condition
);
866 genX(init_query
)(struct iris_context
*ice
)
868 struct pipe_context
*ctx
= &ice
->ctx
;
870 ctx
->create_query
= iris_create_query
;
871 ctx
->create_batch_query
= iris_create_batch_query
;
872 ctx
->destroy_query
= iris_destroy_query
;
873 ctx
->begin_query
= iris_begin_query
;
874 ctx
->end_query
= iris_end_query
;
875 ctx
->get_query_result
= iris_get_query_result
;
876 ctx
->get_query_result_resource
= iris_get_query_result_resource
;
877 ctx
->set_active_query_state
= iris_set_active_query_state
;
878 ctx
->render_condition
= iris_render_condition
;
880 ice
->vtbl
.resolve_conditional_render
= iris_resolve_conditional_render
;