2 * Copyright © 2017 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
26 * ============================= GENXML CODE =============================
27 * [This file is compiled once per generation.]
28 * =======================================================================
30 * Query object support. This allows measuring various simple statistics
31 * via counters on the GPU. We use GenX code for MI_MATH calculations.
36 #include "perf/gen_perf.h"
37 #include "pipe/p_defines.h"
38 #include "pipe/p_state.h"
39 #include "pipe/p_context.h"
40 #include "pipe/p_screen.h"
41 #include "util/u_inlines.h"
42 #include "util/u_upload_mgr.h"
43 #include "iris_context.h"
44 #include "iris_defines.h"
45 #include "iris_fence.h"
46 #include "iris_resource.h"
47 #include "iris_screen.h"
49 #include "iris_genx_macros.h"
51 #define SO_PRIM_STORAGE_NEEDED(n) (GENX(SO_PRIM_STORAGE_NEEDED0_num) + (n) * 8)
52 #define SO_NUM_PRIMS_WRITTEN(n) (GENX(SO_NUM_PRIMS_WRITTEN0_num) + (n) * 8)
55 enum pipe_query_type type
;
64 struct iris_state_ref query_state_ref
;
65 struct iris_query_snapshots
*map
;
66 struct iris_syncpt
*syncpt
;
70 struct iris_monitor_object
*monitor
;
73 struct iris_query_snapshots
{
74 /** iris_render_condition's saved MI_PREDICATE_RESULT value. */
75 uint64_t predicate_result
;
77 /** Have the start/end snapshots landed? */
78 uint64_t snapshots_landed
;
80 /** Starting and ending counter snapshots */
85 struct iris_query_so_overflow
{
86 uint64_t predicate_result
;
87 uint64_t snapshots_landed
;
90 uint64_t prim_storage_needed
[2];
91 uint64_t num_prims
[2];
95 static struct gen_mi_value
96 query_mem64(struct iris_query
*q
, uint32_t offset
)
98 struct iris_address addr
= {
99 .bo
= iris_resource_bo(q
->query_state_ref
.res
),
100 .offset
= q
->query_state_ref
.offset
+ offset
,
103 return gen_mi_mem64(addr
);
107 * Is this type of query written by PIPE_CONTROL?
110 iris_is_query_pipelined(struct iris_query
*q
)
113 case PIPE_QUERY_OCCLUSION_COUNTER
:
114 case PIPE_QUERY_OCCLUSION_PREDICATE
:
115 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE
:
116 case PIPE_QUERY_TIMESTAMP
:
117 case PIPE_QUERY_TIMESTAMP_DISJOINT
:
118 case PIPE_QUERY_TIME_ELAPSED
:
127 mark_available(struct iris_context
*ice
, struct iris_query
*q
)
129 struct iris_batch
*batch
= &ice
->batches
[q
->batch_idx
];
130 unsigned flags
= PIPE_CONTROL_WRITE_IMMEDIATE
;
131 unsigned offset
= offsetof(struct iris_query_snapshots
, snapshots_landed
);
132 struct iris_bo
*bo
= iris_resource_bo(q
->query_state_ref
.res
);
133 offset
+= q
->query_state_ref
.offset
;
135 if (!iris_is_query_pipelined(q
)) {
136 ice
->vtbl
.store_data_imm64(batch
, bo
, offset
, true);
138 /* Order available *after* the query results. */
139 flags
|= PIPE_CONTROL_FLUSH_ENABLE
;
140 iris_emit_pipe_control_write(batch
, "query: mark available",
141 flags
, bo
, offset
, true);
146 * Write PS_DEPTH_COUNT to q->(dest) via a PIPE_CONTROL.
149 iris_pipelined_write(struct iris_batch
*batch
,
150 struct iris_query
*q
,
151 enum pipe_control_flags flags
,
154 const struct gen_device_info
*devinfo
= &batch
->screen
->devinfo
;
155 const unsigned optional_cs_stall
=
156 GEN_GEN
== 9 && devinfo
->gt
== 4 ? PIPE_CONTROL_CS_STALL
: 0;
157 struct iris_bo
*bo
= iris_resource_bo(q
->query_state_ref
.res
);
159 iris_emit_pipe_control_write(batch
, "query: pipelined snapshot write",
160 flags
| optional_cs_stall
,
165 write_value(struct iris_context
*ice
, struct iris_query
*q
, unsigned offset
)
167 struct iris_batch
*batch
= &ice
->batches
[q
->batch_idx
];
168 struct iris_bo
*bo
= iris_resource_bo(q
->query_state_ref
.res
);
170 if (!iris_is_query_pipelined(q
)) {
171 iris_emit_pipe_control_flush(batch
,
172 "query: non-pipelined snapshot write",
173 PIPE_CONTROL_CS_STALL
|
174 PIPE_CONTROL_STALL_AT_SCOREBOARD
);
179 case PIPE_QUERY_OCCLUSION_COUNTER
:
180 case PIPE_QUERY_OCCLUSION_PREDICATE
:
181 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE
:
183 /* "Driver must program PIPE_CONTROL with only Depth Stall Enable
184 * bit set prior to programming a PIPE_CONTROL with Write PS Depth
185 * Count sync operation."
187 iris_emit_pipe_control_flush(batch
,
188 "workaround: depth stall before writing "
190 PIPE_CONTROL_DEPTH_STALL
);
192 iris_pipelined_write(&ice
->batches
[IRIS_BATCH_RENDER
], q
,
193 PIPE_CONTROL_WRITE_DEPTH_COUNT
|
194 PIPE_CONTROL_DEPTH_STALL
,
197 case PIPE_QUERY_TIME_ELAPSED
:
198 case PIPE_QUERY_TIMESTAMP
:
199 case PIPE_QUERY_TIMESTAMP_DISJOINT
:
200 iris_pipelined_write(&ice
->batches
[IRIS_BATCH_RENDER
], q
,
201 PIPE_CONTROL_WRITE_TIMESTAMP
,
204 case PIPE_QUERY_PRIMITIVES_GENERATED
:
205 ice
->vtbl
.store_register_mem64(batch
,
207 GENX(CL_INVOCATION_COUNT_num
) :
208 SO_PRIM_STORAGE_NEEDED(q
->index
),
211 case PIPE_QUERY_PRIMITIVES_EMITTED
:
212 ice
->vtbl
.store_register_mem64(batch
,
213 SO_NUM_PRIMS_WRITTEN(q
->index
),
216 case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE
: {
217 static const uint32_t index_to_reg
[] = {
218 GENX(IA_VERTICES_COUNT_num
),
219 GENX(IA_PRIMITIVES_COUNT_num
),
220 GENX(VS_INVOCATION_COUNT_num
),
221 GENX(GS_INVOCATION_COUNT_num
),
222 GENX(GS_PRIMITIVES_COUNT_num
),
223 GENX(CL_INVOCATION_COUNT_num
),
224 GENX(CL_PRIMITIVES_COUNT_num
),
225 GENX(PS_INVOCATION_COUNT_num
),
226 GENX(HS_INVOCATION_COUNT_num
),
227 GENX(DS_INVOCATION_COUNT_num
),
228 GENX(CS_INVOCATION_COUNT_num
),
230 const uint32_t reg
= index_to_reg
[q
->index
];
232 ice
->vtbl
.store_register_mem64(batch
, reg
, bo
, offset
, false);
241 write_overflow_values(struct iris_context
*ice
, struct iris_query
*q
, bool end
)
243 struct iris_batch
*batch
= &ice
->batches
[IRIS_BATCH_RENDER
];
244 uint32_t count
= q
->type
== PIPE_QUERY_SO_OVERFLOW_PREDICATE
? 1 : 4;
245 struct iris_bo
*bo
= iris_resource_bo(q
->query_state_ref
.res
);
246 uint32_t offset
= q
->query_state_ref
.offset
;
248 iris_emit_pipe_control_flush(batch
,
249 "query: write SO overflow snapshots",
250 PIPE_CONTROL_CS_STALL
|
251 PIPE_CONTROL_STALL_AT_SCOREBOARD
);
252 for (uint32_t i
= 0; i
< count
; i
++) {
253 int s
= q
->index
+ i
;
254 int g_idx
= offset
+ offsetof(struct iris_query_so_overflow
,
255 stream
[s
].num_prims
[end
]);
256 int w_idx
= offset
+ offsetof(struct iris_query_so_overflow
,
257 stream
[s
].prim_storage_needed
[end
]);
258 ice
->vtbl
.store_register_mem64(batch
, SO_NUM_PRIMS_WRITTEN(s
),
260 ice
->vtbl
.store_register_mem64(batch
, SO_PRIM_STORAGE_NEEDED(s
),
266 iris_raw_timestamp_delta(uint64_t time0
, uint64_t time1
)
269 return (1ULL << TIMESTAMP_BITS
) + time1
- time0
;
271 return time1
- time0
;
276 stream_overflowed(struct iris_query_so_overflow
*so
, int s
)
278 return (so
->stream
[s
].prim_storage_needed
[1] -
279 so
->stream
[s
].prim_storage_needed
[0]) !=
280 (so
->stream
[s
].num_prims
[1] - so
->stream
[s
].num_prims
[0]);
284 calculate_result_on_cpu(const struct gen_device_info
*devinfo
,
285 struct iris_query
*q
)
288 case PIPE_QUERY_OCCLUSION_PREDICATE
:
289 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE
:
290 q
->result
= q
->map
->end
!= q
->map
->start
;
292 case PIPE_QUERY_TIMESTAMP
:
293 case PIPE_QUERY_TIMESTAMP_DISJOINT
:
294 /* The timestamp is the single starting snapshot. */
295 q
->result
= gen_device_info_timebase_scale(devinfo
, q
->map
->start
);
296 q
->result
&= (1ull << TIMESTAMP_BITS
) - 1;
298 case PIPE_QUERY_TIME_ELAPSED
:
299 q
->result
= iris_raw_timestamp_delta(q
->map
->start
, q
->map
->end
);
300 q
->result
= gen_device_info_timebase_scale(devinfo
, q
->result
);
301 q
->result
&= (1ull << TIMESTAMP_BITS
) - 1;
303 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
304 q
->result
= stream_overflowed((void *) q
->map
, q
->index
);
306 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE
:
308 for (int i
= 0; i
< MAX_VERTEX_STREAMS
; i
++)
309 q
->result
|= stream_overflowed((void *) q
->map
, i
);
311 case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE
:
312 q
->result
= q
->map
->end
- q
->map
->start
;
314 /* WaDividePSInvocationCountBy4:HSW,BDW */
315 if (GEN_GEN
== 8 && q
->index
== PIPE_STAT_QUERY_PS_INVOCATIONS
)
318 case PIPE_QUERY_OCCLUSION_COUNTER
:
319 case PIPE_QUERY_PRIMITIVES_GENERATED
:
320 case PIPE_QUERY_PRIMITIVES_EMITTED
:
322 q
->result
= q
->map
->end
- q
->map
->start
;
330 * Calculate the streamout overflow for stream \p idx:
332 * (num_prims[1] - num_prims[0]) - (storage_needed[1] - storage_needed[0])
334 static struct gen_mi_value
335 calc_overflow_for_stream(struct gen_mi_builder
*b
,
336 struct iris_query
*q
,
339 #define C(counter, i) query_mem64(q, \
340 offsetof(struct iris_query_so_overflow, stream[idx].counter[i]))
342 return gen_mi_isub(b
, gen_mi_isub(b
, C(num_prims
, 1), C(num_prims
, 0)),
343 gen_mi_isub(b
, C(prim_storage_needed
, 1),
344 C(prim_storage_needed
, 0)));
349 * Calculate whether any stream has overflowed.
351 static struct gen_mi_value
352 calc_overflow_any_stream(struct gen_mi_builder
*b
, struct iris_query
*q
)
354 struct gen_mi_value stream_result
[MAX_VERTEX_STREAMS
];
355 for (int i
= 0; i
< MAX_VERTEX_STREAMS
; i
++)
356 stream_result
[i
] = calc_overflow_for_stream(b
, q
, i
);
358 struct gen_mi_value result
= stream_result
[0];
359 for (int i
= 1; i
< MAX_VERTEX_STREAMS
; i
++)
360 result
= gen_mi_ior(b
, result
, stream_result
[i
]);
366 query_is_boolean(enum pipe_query_type type
)
369 case PIPE_QUERY_OCCLUSION_PREDICATE
:
370 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE
:
371 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
372 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE
:
380 * Calculate the result using MI_MATH.
382 static struct gen_mi_value
383 calculate_result_on_gpu(const struct gen_device_info
*devinfo
,
384 struct gen_mi_builder
*b
,
385 struct iris_query
*q
)
387 struct gen_mi_value result
;
388 struct gen_mi_value start_val
=
389 query_mem64(q
, offsetof(struct iris_query_snapshots
, start
));
390 struct gen_mi_value end_val
=
391 query_mem64(q
, offsetof(struct iris_query_snapshots
, end
));
394 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
395 result
= calc_overflow_for_stream(b
, q
, q
->index
);
397 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE
:
398 result
= calc_overflow_any_stream(b
, q
);
400 case PIPE_QUERY_TIMESTAMP
: {
401 /* TODO: This discards any fractional bits of the timebase scale.
402 * We would need to do a bit of fixed point math on the CS ALU, or
403 * launch an actual shader to calculate this with full precision.
405 uint32_t scale
= 1000000000ull / devinfo
->timestamp_frequency
;
406 result
= gen_mi_iand(b
, gen_mi_imm((1ull << 36) - 1),
407 gen_mi_imul_imm(b
, start_val
, scale
));
410 case PIPE_QUERY_TIME_ELAPSED
: {
411 /* TODO: This discards fractional bits (see above). */
412 uint32_t scale
= 1000000000ull / devinfo
->timestamp_frequency
;
413 result
= gen_mi_imul_imm(b
, gen_mi_isub(b
, end_val
, start_val
), scale
);
417 result
= gen_mi_isub(b
, end_val
, start_val
);
421 /* WaDividePSInvocationCountBy4:HSW,BDW */
423 q
->type
== PIPE_QUERY_PIPELINE_STATISTICS_SINGLE
&&
424 q
->index
== PIPE_STAT_QUERY_PS_INVOCATIONS
)
425 result
= gen_mi_ushr32_imm(b
, result
, 2);
427 if (query_is_boolean(q
->type
))
428 result
= gen_mi_iand(b
, gen_mi_nz(b
, result
), gen_mi_imm(1));
433 static struct pipe_query
*
434 iris_create_query(struct pipe_context
*ctx
,
438 struct iris_query
*q
= calloc(1, sizeof(struct iris_query
));
440 q
->type
= query_type
;
443 if (q
->type
== PIPE_QUERY_PIPELINE_STATISTICS_SINGLE
&&
444 q
->index
== PIPE_STAT_QUERY_CS_INVOCATIONS
)
445 q
->batch_idx
= IRIS_BATCH_COMPUTE
;
447 q
->batch_idx
= IRIS_BATCH_RENDER
;
448 return (struct pipe_query
*) q
;
452 iris_destroy_query(struct pipe_context
*ctx
, struct pipe_query
*p_query
)
454 struct iris_query
*query
= (void *) p_query
;
455 struct iris_screen
*screen
= (void *) ctx
->screen
;
456 iris_syncpt_reference(screen
, &query
->syncpt
, NULL
);
462 iris_begin_query(struct pipe_context
*ctx
, struct pipe_query
*query
)
464 struct iris_context
*ice
= (void *) ctx
;
465 struct iris_query
*q
= (void *) query
;
469 if (q
->type
== PIPE_QUERY_SO_OVERFLOW_PREDICATE
||
470 q
->type
== PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE
)
471 size
= sizeof(struct iris_query_so_overflow
);
473 size
= sizeof(struct iris_query_snapshots
);
475 u_upload_alloc(ice
->query_buffer_uploader
, 0,
476 size
, size
, &q
->query_state_ref
.offset
,
477 &q
->query_state_ref
.res
, &ptr
);
479 if (!iris_resource_bo(q
->query_state_ref
.res
))
488 WRITE_ONCE(q
->map
->snapshots_landed
, false);
490 if (q
->type
== PIPE_QUERY_PRIMITIVES_GENERATED
&& q
->index
== 0) {
491 ice
->state
.prims_generated_query_active
= true;
492 ice
->state
.dirty
|= IRIS_DIRTY_STREAMOUT
| IRIS_DIRTY_CLIP
;
495 if (q
->type
== PIPE_QUERY_SO_OVERFLOW_PREDICATE
||
496 q
->type
== PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE
)
497 write_overflow_values(ice
, q
, false);
500 q
->query_state_ref
.offset
+
501 offsetof(struct iris_query_snapshots
, start
));
507 iris_end_query(struct pipe_context
*ctx
, struct pipe_query
*query
)
509 struct iris_context
*ice
= (void *) ctx
;
510 struct iris_query
*q
= (void *) query
;
511 struct iris_batch
*batch
= &ice
->batches
[q
->batch_idx
];
513 if (q
->type
== PIPE_QUERY_TIMESTAMP
) {
514 iris_begin_query(ctx
, query
);
515 iris_batch_reference_signal_syncpt(batch
, &q
->syncpt
);
516 mark_available(ice
, q
);
520 if (q
->type
== PIPE_QUERY_PRIMITIVES_GENERATED
&& q
->index
== 0) {
521 ice
->state
.prims_generated_query_active
= false;
522 ice
->state
.dirty
|= IRIS_DIRTY_STREAMOUT
| IRIS_DIRTY_CLIP
;
525 if (q
->type
== PIPE_QUERY_SO_OVERFLOW_PREDICATE
||
526 q
->type
== PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE
)
527 write_overflow_values(ice
, q
, true);
530 q
->query_state_ref
.offset
+
531 offsetof(struct iris_query_snapshots
, end
));
533 iris_batch_reference_signal_syncpt(batch
, &q
->syncpt
);
534 mark_available(ice
, q
);
540 * See if the snapshots have landed for a query, and if so, compute the
541 * result and mark it ready. Does not flush (unlike iris_get_query_result).
544 iris_check_query_no_flush(struct iris_context
*ice
, struct iris_query
*q
)
546 struct iris_screen
*screen
= (void *) ice
->ctx
.screen
;
547 const struct gen_device_info
*devinfo
= &screen
->devinfo
;
549 if (!q
->ready
&& READ_ONCE(q
->map
->snapshots_landed
)) {
550 calculate_result_on_cpu(devinfo
, q
);
555 iris_get_query_result(struct pipe_context
*ctx
,
556 struct pipe_query
*query
,
558 union pipe_query_result
*result
)
560 struct iris_context
*ice
= (void *) ctx
;
561 struct iris_query
*q
= (void *) query
;
562 struct iris_screen
*screen
= (void *) ctx
->screen
;
563 const struct gen_device_info
*devinfo
= &screen
->devinfo
;
565 if (unlikely(screen
->no_hw
)) {
571 struct iris_batch
*batch
= &ice
->batches
[q
->batch_idx
];
572 if (q
->syncpt
== iris_batch_get_signal_syncpt(batch
))
573 iris_batch_flush(batch
);
575 while (!READ_ONCE(q
->map
->snapshots_landed
)) {
577 iris_wait_syncpt(ctx
->screen
, q
->syncpt
, INT64_MAX
);
582 assert(READ_ONCE(q
->map
->snapshots_landed
));
583 calculate_result_on_cpu(devinfo
, q
);
588 result
->u64
= q
->result
;
594 iris_get_query_result_resource(struct pipe_context
*ctx
,
595 struct pipe_query
*query
,
597 enum pipe_query_value_type result_type
,
599 struct pipe_resource
*p_res
,
602 struct iris_context
*ice
= (void *) ctx
;
603 struct iris_query
*q
= (void *) query
;
604 struct iris_batch
*batch
= &ice
->batches
[q
->batch_idx
];
605 const struct gen_device_info
*devinfo
= &batch
->screen
->devinfo
;
606 struct iris_resource
*res
= (void *) p_res
;
607 struct iris_bo
*query_bo
= iris_resource_bo(q
->query_state_ref
.res
);
608 struct iris_bo
*dst_bo
= iris_resource_bo(p_res
);
609 unsigned snapshots_landed_offset
=
610 offsetof(struct iris_query_snapshots
, snapshots_landed
);
612 res
->bind_history
|= PIPE_BIND_QUERY_BUFFER
;
615 /* They're asking for the availability of the result. If we still
616 * have commands queued up which produce the result, submit them
617 * now so that progress happens. Either way, copy the snapshots
618 * landed field to the destination resource.
620 if (q
->syncpt
== iris_batch_get_signal_syncpt(batch
))
621 iris_batch_flush(batch
);
623 ice
->vtbl
.copy_mem_mem(batch
, dst_bo
, offset
,
624 query_bo
, snapshots_landed_offset
,
625 result_type
<= PIPE_QUERY_TYPE_U32
? 4 : 8);
629 if (!q
->ready
&& READ_ONCE(q
->map
->snapshots_landed
)) {
630 /* The final snapshots happen to have landed, so let's just compute
631 * the result on the CPU now...
633 calculate_result_on_cpu(devinfo
, q
);
637 /* We happen to have the result on the CPU, so just copy it. */
638 if (result_type
<= PIPE_QUERY_TYPE_U32
) {
639 ice
->vtbl
.store_data_imm32(batch
, dst_bo
, offset
, q
->result
);
641 ice
->vtbl
.store_data_imm64(batch
, dst_bo
, offset
, q
->result
);
644 /* Make sure the result lands before they use bind the QBO elsewhere
645 * and use the result.
647 // XXX: Why? i965 doesn't do this.
648 iris_emit_pipe_control_flush(batch
,
649 "query: unknown QBO flushing hack",
650 PIPE_CONTROL_CS_STALL
);
654 bool predicated
= !wait
&& !q
->stalled
;
656 struct gen_mi_builder b
;
657 gen_mi_builder_init(&b
, batch
);
659 struct gen_mi_value result
= calculate_result_on_gpu(devinfo
, &b
, q
);
660 struct gen_mi_value dst
=
661 result_type
<= PIPE_QUERY_TYPE_U32
? gen_mi_mem32(rw_bo(dst_bo
, offset
))
662 : gen_mi_mem64(rw_bo(dst_bo
, offset
));
665 gen_mi_store(&b
, gen_mi_reg32(MI_PREDICATE_RESULT
),
666 gen_mi_mem64(ro_bo(query_bo
, snapshots_landed_offset
)));
667 gen_mi_store_if(&b
, dst
, result
);
669 gen_mi_store(&b
, dst
, result
);
674 iris_set_active_query_state(struct pipe_context
*ctx
, bool enable
)
676 struct iris_context
*ice
= (void *) ctx
;
678 if (ice
->state
.statistics_counters_enabled
== enable
)
681 // XXX: most packets aren't paying attention to this yet, because it'd
682 // have to be done dynamically at draw time, which is a pain
683 ice
->state
.statistics_counters_enabled
= enable
;
684 ice
->state
.dirty
|= IRIS_DIRTY_CLIP
|
687 IRIS_DIRTY_STREAMOUT
|
695 set_predicate_enable(struct iris_context
*ice
, bool value
)
698 ice
->state
.predicate
= IRIS_PREDICATE_STATE_RENDER
;
700 ice
->state
.predicate
= IRIS_PREDICATE_STATE_DONT_RENDER
;
704 set_predicate_for_result(struct iris_context
*ice
,
705 struct iris_query
*q
,
708 struct iris_batch
*batch
= &ice
->batches
[IRIS_BATCH_RENDER
];
709 struct iris_bo
*bo
= iris_resource_bo(q
->query_state_ref
.res
);
711 /* The CPU doesn't have the query result yet; use hardware predication */
712 ice
->state
.predicate
= IRIS_PREDICATE_STATE_USE_BIT
;
714 /* Ensure the memory is coherent for MI_LOAD_REGISTER_* commands. */
715 iris_emit_pipe_control_flush(batch
,
716 "conditional rendering: set predicate",
717 PIPE_CONTROL_FLUSH_ENABLE
);
720 struct gen_mi_builder b
;
721 gen_mi_builder_init(&b
, batch
);
723 struct gen_mi_value result
;
726 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
727 result
= calc_overflow_for_stream(&b
, q
, q
->index
);
729 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE
:
730 result
= calc_overflow_any_stream(&b
, q
);
733 /* PIPE_QUERY_OCCLUSION_* */
734 struct gen_mi_value start
=
735 query_mem64(q
, offsetof(struct iris_query_snapshots
, start
));
736 struct gen_mi_value end
=
737 query_mem64(q
, offsetof(struct iris_query_snapshots
, end
));
738 result
= gen_mi_isub(&b
, end
, start
);
743 result
= inverted
? gen_mi_z(&b
, result
) : gen_mi_nz(&b
, result
);
744 result
= gen_mi_iand(&b
, result
, gen_mi_imm(1));
746 /* We immediately set the predicate on the render batch, as all the
747 * counters come from 3D operations. However, we may need to predicate
748 * a compute dispatch, which executes in a different GEM context and has
749 * a different MI_PREDICATE_RESULT register. So, we save the result to
750 * memory and reload it in iris_launch_grid.
752 gen_mi_value_ref(&b
, result
);
753 gen_mi_store(&b
, gen_mi_reg32(MI_PREDICATE_RESULT
), result
);
754 gen_mi_store(&b
, query_mem64(q
, offsetof(struct iris_query_snapshots
,
755 predicate_result
)), result
);
756 ice
->state
.compute_predicate
= bo
;
760 iris_render_condition(struct pipe_context
*ctx
,
761 struct pipe_query
*query
,
763 enum pipe_render_cond_flag mode
)
765 struct iris_context
*ice
= (void *) ctx
;
766 struct iris_query
*q
= (void *) query
;
768 /* The old condition isn't relevant; we'll update it if necessary */
769 ice
->state
.compute_predicate
= NULL
;
770 ice
->condition
.query
= q
;
771 ice
->condition
.condition
= condition
;
774 ice
->state
.predicate
= IRIS_PREDICATE_STATE_RENDER
;
778 iris_check_query_no_flush(ice
, q
);
780 if (q
->result
|| q
->ready
) {
781 set_predicate_enable(ice
, (q
->result
!= 0) ^ condition
);
783 if (mode
== PIPE_RENDER_COND_NO_WAIT
||
784 mode
== PIPE_RENDER_COND_BY_REGION_NO_WAIT
) {
785 perf_debug(&ice
->dbg
, "Conditional rendering demoted from "
786 "\"no wait\" to \"wait\".");
788 set_predicate_for_result(ice
, q
, condition
);
793 iris_resolve_conditional_render(struct iris_context
*ice
)
795 struct pipe_context
*ctx
= (void *) ice
;
796 struct iris_query
*q
= ice
->condition
.query
;
797 struct pipe_query
*query
= (void *) q
;
798 union pipe_query_result result
;
800 if (ice
->state
.predicate
!= IRIS_PREDICATE_STATE_USE_BIT
)
805 iris_get_query_result(ctx
, query
, true, &result
);
806 set_predicate_enable(ice
, (q
->result
!= 0) ^ ice
->condition
.condition
);
810 genX(init_query
)(struct iris_context
*ice
)
812 struct pipe_context
*ctx
= &ice
->ctx
;
814 ctx
->create_query
= iris_create_query
;
815 ctx
->destroy_query
= iris_destroy_query
;
816 ctx
->begin_query
= iris_begin_query
;
817 ctx
->end_query
= iris_end_query
;
818 ctx
->get_query_result
= iris_get_query_result
;
819 ctx
->get_query_result_resource
= iris_get_query_result_resource
;
820 ctx
->set_active_query_state
= iris_set_active_query_state
;
821 ctx
->render_condition
= iris_render_condition
;
823 ice
->vtbl
.resolve_conditional_render
= iris_resolve_conditional_render
;