2 * Copyright © 2017 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
26 * ============================= GENXML CODE =============================
27 * [This file is compiled once per generation.]
28 * =======================================================================
30 * Query object support. This allows measuring various simple statistics
31 * via counters on the GPU. We use GenX code for MI_MATH calculations.
36 #include "perf/gen_perf.h"
37 #include "pipe/p_defines.h"
38 #include "pipe/p_state.h"
39 #include "pipe/p_context.h"
40 #include "pipe/p_screen.h"
41 #include "util/u_inlines.h"
42 #include "util/u_upload_mgr.h"
43 #include "iris_context.h"
44 #include "iris_defines.h"
45 #include "iris_fence.h"
46 #include "iris_resource.h"
47 #include "iris_screen.h"
49 #include "iris_genx_macros.h"
51 #define SO_PRIM_STORAGE_NEEDED(n) (GENX(SO_PRIM_STORAGE_NEEDED0_num) + (n) * 8)
52 #define SO_NUM_PRIMS_WRITTEN(n) (GENX(SO_NUM_PRIMS_WRITTEN0_num) + (n) * 8)
55 enum pipe_query_type type
;
64 struct iris_state_ref query_state_ref
;
65 struct iris_query_snapshots
*map
;
66 struct iris_syncpt
*syncpt
;
71 struct iris_query_snapshots
{
72 /** iris_render_condition's saved MI_PREDICATE_RESULT value. */
73 uint64_t predicate_result
;
75 /** Have the start/end snapshots landed? */
76 uint64_t snapshots_landed
;
78 /** Starting and ending counter snapshots */
83 struct iris_query_so_overflow
{
84 uint64_t predicate_result
;
85 uint64_t snapshots_landed
;
88 uint64_t prim_storage_needed
[2];
89 uint64_t num_prims
[2];
93 static struct gen_mi_value
94 query_mem64(struct iris_query
*q
, uint32_t offset
)
96 struct iris_address addr
= {
97 .bo
= iris_resource_bo(q
->query_state_ref
.res
),
98 .offset
= q
->query_state_ref
.offset
+ offset
,
101 return gen_mi_mem64(addr
);
105 * Is this type of query written by PIPE_CONTROL?
108 iris_is_query_pipelined(struct iris_query
*q
)
111 case PIPE_QUERY_OCCLUSION_COUNTER
:
112 case PIPE_QUERY_OCCLUSION_PREDICATE
:
113 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE
:
114 case PIPE_QUERY_TIMESTAMP
:
115 case PIPE_QUERY_TIMESTAMP_DISJOINT
:
116 case PIPE_QUERY_TIME_ELAPSED
:
125 mark_available(struct iris_context
*ice
, struct iris_query
*q
)
127 struct iris_batch
*batch
= &ice
->batches
[q
->batch_idx
];
128 unsigned flags
= PIPE_CONTROL_WRITE_IMMEDIATE
;
129 unsigned offset
= offsetof(struct iris_query_snapshots
, snapshots_landed
);
130 struct iris_bo
*bo
= iris_resource_bo(q
->query_state_ref
.res
);
131 offset
+= q
->query_state_ref
.offset
;
133 if (!iris_is_query_pipelined(q
)) {
134 ice
->vtbl
.store_data_imm64(batch
, bo
, offset
, true);
136 /* Order available *after* the query results. */
137 flags
|= PIPE_CONTROL_FLUSH_ENABLE
;
138 iris_emit_pipe_control_write(batch
, "query: mark available",
139 flags
, bo
, offset
, true);
144 * Write PS_DEPTH_COUNT to q->(dest) via a PIPE_CONTROL.
147 iris_pipelined_write(struct iris_batch
*batch
,
148 struct iris_query
*q
,
149 enum pipe_control_flags flags
,
152 const struct gen_device_info
*devinfo
= &batch
->screen
->devinfo
;
153 const unsigned optional_cs_stall
=
154 GEN_GEN
== 9 && devinfo
->gt
== 4 ? PIPE_CONTROL_CS_STALL
: 0;
155 struct iris_bo
*bo
= iris_resource_bo(q
->query_state_ref
.res
);
157 iris_emit_pipe_control_write(batch
, "query: pipelined snapshot write",
158 flags
| optional_cs_stall
,
163 write_value(struct iris_context
*ice
, struct iris_query
*q
, unsigned offset
)
165 struct iris_batch
*batch
= &ice
->batches
[q
->batch_idx
];
166 struct iris_bo
*bo
= iris_resource_bo(q
->query_state_ref
.res
);
168 if (!iris_is_query_pipelined(q
)) {
169 iris_emit_pipe_control_flush(batch
,
170 "query: non-pipelined snapshot write",
171 PIPE_CONTROL_CS_STALL
|
172 PIPE_CONTROL_STALL_AT_SCOREBOARD
);
177 case PIPE_QUERY_OCCLUSION_COUNTER
:
178 case PIPE_QUERY_OCCLUSION_PREDICATE
:
179 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE
:
181 /* "Driver must program PIPE_CONTROL with only Depth Stall Enable
182 * bit set prior to programming a PIPE_CONTROL with Write PS Depth
183 * Count sync operation."
185 iris_emit_pipe_control_flush(batch
,
186 "workaround: depth stall before writing "
188 PIPE_CONTROL_DEPTH_STALL
);
190 iris_pipelined_write(&ice
->batches
[IRIS_BATCH_RENDER
], q
,
191 PIPE_CONTROL_WRITE_DEPTH_COUNT
|
192 PIPE_CONTROL_DEPTH_STALL
,
195 case PIPE_QUERY_TIME_ELAPSED
:
196 case PIPE_QUERY_TIMESTAMP
:
197 case PIPE_QUERY_TIMESTAMP_DISJOINT
:
198 iris_pipelined_write(&ice
->batches
[IRIS_BATCH_RENDER
], q
,
199 PIPE_CONTROL_WRITE_TIMESTAMP
,
202 case PIPE_QUERY_PRIMITIVES_GENERATED
:
203 ice
->vtbl
.store_register_mem64(batch
,
205 GENX(CL_INVOCATION_COUNT_num
) :
206 SO_PRIM_STORAGE_NEEDED(q
->index
),
209 case PIPE_QUERY_PRIMITIVES_EMITTED
:
210 ice
->vtbl
.store_register_mem64(batch
,
211 SO_NUM_PRIMS_WRITTEN(q
->index
),
214 case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE
: {
215 static const uint32_t index_to_reg
[] = {
216 GENX(IA_VERTICES_COUNT_num
),
217 GENX(IA_PRIMITIVES_COUNT_num
),
218 GENX(VS_INVOCATION_COUNT_num
),
219 GENX(GS_INVOCATION_COUNT_num
),
220 GENX(GS_PRIMITIVES_COUNT_num
),
221 GENX(CL_INVOCATION_COUNT_num
),
222 GENX(CL_PRIMITIVES_COUNT_num
),
223 GENX(PS_INVOCATION_COUNT_num
),
224 GENX(HS_INVOCATION_COUNT_num
),
225 GENX(DS_INVOCATION_COUNT_num
),
226 GENX(CS_INVOCATION_COUNT_num
),
228 const uint32_t reg
= index_to_reg
[q
->index
];
230 ice
->vtbl
.store_register_mem64(batch
, reg
, bo
, offset
, false);
239 write_overflow_values(struct iris_context
*ice
, struct iris_query
*q
, bool end
)
241 struct iris_batch
*batch
= &ice
->batches
[IRIS_BATCH_RENDER
];
242 uint32_t count
= q
->type
== PIPE_QUERY_SO_OVERFLOW_PREDICATE
? 1 : 4;
243 struct iris_bo
*bo
= iris_resource_bo(q
->query_state_ref
.res
);
244 uint32_t offset
= q
->query_state_ref
.offset
;
246 iris_emit_pipe_control_flush(batch
,
247 "query: write SO overflow snapshots",
248 PIPE_CONTROL_CS_STALL
|
249 PIPE_CONTROL_STALL_AT_SCOREBOARD
);
250 for (uint32_t i
= 0; i
< count
; i
++) {
251 int s
= q
->index
+ i
;
252 int g_idx
= offset
+ offsetof(struct iris_query_so_overflow
,
253 stream
[s
].num_prims
[end
]);
254 int w_idx
= offset
+ offsetof(struct iris_query_so_overflow
,
255 stream
[s
].prim_storage_needed
[end
]);
256 ice
->vtbl
.store_register_mem64(batch
, SO_NUM_PRIMS_WRITTEN(s
),
258 ice
->vtbl
.store_register_mem64(batch
, SO_PRIM_STORAGE_NEEDED(s
),
264 iris_raw_timestamp_delta(uint64_t time0
, uint64_t time1
)
267 return (1ULL << TIMESTAMP_BITS
) + time1
- time0
;
269 return time1
- time0
;
274 stream_overflowed(struct iris_query_so_overflow
*so
, int s
)
276 return (so
->stream
[s
].prim_storage_needed
[1] -
277 so
->stream
[s
].prim_storage_needed
[0]) !=
278 (so
->stream
[s
].num_prims
[1] - so
->stream
[s
].num_prims
[0]);
282 calculate_result_on_cpu(const struct gen_device_info
*devinfo
,
283 struct iris_query
*q
)
286 case PIPE_QUERY_OCCLUSION_PREDICATE
:
287 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE
:
288 q
->result
= q
->map
->end
!= q
->map
->start
;
290 case PIPE_QUERY_TIMESTAMP
:
291 case PIPE_QUERY_TIMESTAMP_DISJOINT
:
292 /* The timestamp is the single starting snapshot. */
293 q
->result
= gen_device_info_timebase_scale(devinfo
, q
->map
->start
);
294 q
->result
&= (1ull << TIMESTAMP_BITS
) - 1;
296 case PIPE_QUERY_TIME_ELAPSED
:
297 q
->result
= iris_raw_timestamp_delta(q
->map
->start
, q
->map
->end
);
298 q
->result
= gen_device_info_timebase_scale(devinfo
, q
->result
);
299 q
->result
&= (1ull << TIMESTAMP_BITS
) - 1;
301 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
302 q
->result
= stream_overflowed((void *) q
->map
, q
->index
);
304 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE
:
306 for (int i
= 0; i
< MAX_VERTEX_STREAMS
; i
++)
307 q
->result
|= stream_overflowed((void *) q
->map
, i
);
309 case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE
:
310 q
->result
= q
->map
->end
- q
->map
->start
;
312 /* WaDividePSInvocationCountBy4:HSW,BDW */
313 if (GEN_GEN
== 8 && q
->index
== PIPE_STAT_QUERY_PS_INVOCATIONS
)
316 case PIPE_QUERY_OCCLUSION_COUNTER
:
317 case PIPE_QUERY_PRIMITIVES_GENERATED
:
318 case PIPE_QUERY_PRIMITIVES_EMITTED
:
320 q
->result
= q
->map
->end
- q
->map
->start
;
328 * Calculate the streamout overflow for stream \p idx:
330 * (num_prims[1] - num_prims[0]) - (storage_needed[1] - storage_needed[0])
332 static struct gen_mi_value
333 calc_overflow_for_stream(struct gen_mi_builder
*b
,
334 struct iris_query
*q
,
337 #define C(counter, i) query_mem64(q, \
338 offsetof(struct iris_query_so_overflow, stream[idx].counter[i]))
340 return gen_mi_isub(b
, gen_mi_isub(b
, C(num_prims
, 1), C(num_prims
, 0)),
341 gen_mi_isub(b
, C(prim_storage_needed
, 1),
342 C(prim_storage_needed
, 0)));
347 * Calculate whether any stream has overflowed.
349 static struct gen_mi_value
350 calc_overflow_any_stream(struct gen_mi_builder
*b
, struct iris_query
*q
)
352 struct gen_mi_value stream_result
[MAX_VERTEX_STREAMS
];
353 for (int i
= 0; i
< MAX_VERTEX_STREAMS
; i
++)
354 stream_result
[i
] = calc_overflow_for_stream(b
, q
, i
);
356 struct gen_mi_value result
= stream_result
[0];
357 for (int i
= 1; i
< MAX_VERTEX_STREAMS
; i
++)
358 result
= gen_mi_ior(b
, result
, stream_result
[i
]);
364 query_is_boolean(enum pipe_query_type type
)
367 case PIPE_QUERY_OCCLUSION_PREDICATE
:
368 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE
:
369 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
370 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE
:
378 * Calculate the result using MI_MATH.
380 static struct gen_mi_value
381 calculate_result_on_gpu(const struct gen_device_info
*devinfo
,
382 struct gen_mi_builder
*b
,
383 struct iris_query
*q
)
385 struct gen_mi_value result
;
386 struct gen_mi_value start_val
=
387 query_mem64(q
, offsetof(struct iris_query_snapshots
, start
));
388 struct gen_mi_value end_val
=
389 query_mem64(q
, offsetof(struct iris_query_snapshots
, end
));
392 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
393 result
= calc_overflow_for_stream(b
, q
, q
->index
);
395 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE
:
396 result
= calc_overflow_any_stream(b
, q
);
398 case PIPE_QUERY_TIMESTAMP
: {
399 /* TODO: This discards any fractional bits of the timebase scale.
400 * We would need to do a bit of fixed point math on the CS ALU, or
401 * launch an actual shader to calculate this with full precision.
403 uint32_t scale
= 1000000000ull / devinfo
->timestamp_frequency
;
404 result
= gen_mi_iand(b
, gen_mi_imm((1ull << 36) - 1),
405 gen_mi_imul_imm(b
, start_val
, scale
));
408 case PIPE_QUERY_TIME_ELAPSED
: {
409 /* TODO: This discards fractional bits (see above). */
410 uint32_t scale
= 1000000000ull / devinfo
->timestamp_frequency
;
411 result
= gen_mi_imul_imm(b
, gen_mi_isub(b
, end_val
, start_val
), scale
);
415 result
= gen_mi_isub(b
, end_val
, start_val
);
419 /* WaDividePSInvocationCountBy4:HSW,BDW */
421 q
->type
== PIPE_QUERY_PIPELINE_STATISTICS_SINGLE
&&
422 q
->index
== PIPE_STAT_QUERY_PS_INVOCATIONS
)
423 result
= gen_mi_ushr32_imm(b
, result
, 2);
425 if (query_is_boolean(q
->type
))
426 result
= gen_mi_iand(b
, gen_mi_nz(b
, result
), gen_mi_imm(1));
431 static struct pipe_query
*
432 iris_create_query(struct pipe_context
*ctx
,
436 struct iris_query
*q
= calloc(1, sizeof(struct iris_query
));
438 q
->type
= query_type
;
441 if (q
->type
== PIPE_QUERY_PIPELINE_STATISTICS_SINGLE
&&
442 q
->index
== PIPE_STAT_QUERY_CS_INVOCATIONS
)
443 q
->batch_idx
= IRIS_BATCH_COMPUTE
;
445 q
->batch_idx
= IRIS_BATCH_RENDER
;
446 return (struct pipe_query
*) q
;
450 iris_destroy_query(struct pipe_context
*ctx
, struct pipe_query
*p_query
)
452 struct iris_query
*query
= (void *) p_query
;
453 struct iris_screen
*screen
= (void *) ctx
->screen
;
454 iris_syncpt_reference(screen
, &query
->syncpt
, NULL
);
460 iris_begin_query(struct pipe_context
*ctx
, struct pipe_query
*query
)
462 struct iris_context
*ice
= (void *) ctx
;
463 struct iris_query
*q
= (void *) query
;
467 if (q
->type
== PIPE_QUERY_SO_OVERFLOW_PREDICATE
||
468 q
->type
== PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE
)
469 size
= sizeof(struct iris_query_so_overflow
);
471 size
= sizeof(struct iris_query_snapshots
);
473 u_upload_alloc(ice
->query_buffer_uploader
, 0,
474 size
, size
, &q
->query_state_ref
.offset
,
475 &q
->query_state_ref
.res
, &ptr
);
477 if (!iris_resource_bo(q
->query_state_ref
.res
))
486 WRITE_ONCE(q
->map
->snapshots_landed
, false);
488 if (q
->type
== PIPE_QUERY_PRIMITIVES_GENERATED
&& q
->index
== 0) {
489 ice
->state
.prims_generated_query_active
= true;
490 ice
->state
.dirty
|= IRIS_DIRTY_STREAMOUT
| IRIS_DIRTY_CLIP
;
493 if (q
->type
== PIPE_QUERY_SO_OVERFLOW_PREDICATE
||
494 q
->type
== PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE
)
495 write_overflow_values(ice
, q
, false);
498 q
->query_state_ref
.offset
+
499 offsetof(struct iris_query_snapshots
, start
));
505 iris_end_query(struct pipe_context
*ctx
, struct pipe_query
*query
)
507 struct iris_context
*ice
= (void *) ctx
;
508 struct iris_query
*q
= (void *) query
;
509 struct iris_batch
*batch
= &ice
->batches
[q
->batch_idx
];
511 if (q
->type
== PIPE_QUERY_TIMESTAMP
) {
512 iris_begin_query(ctx
, query
);
513 iris_batch_reference_signal_syncpt(batch
, &q
->syncpt
);
514 mark_available(ice
, q
);
518 if (q
->type
== PIPE_QUERY_PRIMITIVES_GENERATED
&& q
->index
== 0) {
519 ice
->state
.prims_generated_query_active
= false;
520 ice
->state
.dirty
|= IRIS_DIRTY_STREAMOUT
| IRIS_DIRTY_CLIP
;
523 if (q
->type
== PIPE_QUERY_SO_OVERFLOW_PREDICATE
||
524 q
->type
== PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE
)
525 write_overflow_values(ice
, q
, true);
528 q
->query_state_ref
.offset
+
529 offsetof(struct iris_query_snapshots
, end
));
531 iris_batch_reference_signal_syncpt(batch
, &q
->syncpt
);
532 mark_available(ice
, q
);
538 * See if the snapshots have landed for a query, and if so, compute the
539 * result and mark it ready. Does not flush (unlike iris_get_query_result).
542 iris_check_query_no_flush(struct iris_context
*ice
, struct iris_query
*q
)
544 struct iris_screen
*screen
= (void *) ice
->ctx
.screen
;
545 const struct gen_device_info
*devinfo
= &screen
->devinfo
;
547 if (!q
->ready
&& READ_ONCE(q
->map
->snapshots_landed
)) {
548 calculate_result_on_cpu(devinfo
, q
);
553 iris_get_query_result(struct pipe_context
*ctx
,
554 struct pipe_query
*query
,
556 union pipe_query_result
*result
)
558 struct iris_context
*ice
= (void *) ctx
;
559 struct iris_query
*q
= (void *) query
;
560 struct iris_screen
*screen
= (void *) ctx
->screen
;
561 const struct gen_device_info
*devinfo
= &screen
->devinfo
;
563 if (unlikely(screen
->no_hw
)) {
569 struct iris_batch
*batch
= &ice
->batches
[q
->batch_idx
];
570 if (q
->syncpt
== iris_batch_get_signal_syncpt(batch
))
571 iris_batch_flush(batch
);
573 while (!READ_ONCE(q
->map
->snapshots_landed
)) {
575 iris_wait_syncpt(ctx
->screen
, q
->syncpt
, INT64_MAX
);
580 assert(READ_ONCE(q
->map
->snapshots_landed
));
581 calculate_result_on_cpu(devinfo
, q
);
586 result
->u64
= q
->result
;
592 iris_get_query_result_resource(struct pipe_context
*ctx
,
593 struct pipe_query
*query
,
595 enum pipe_query_value_type result_type
,
597 struct pipe_resource
*p_res
,
600 struct iris_context
*ice
= (void *) ctx
;
601 struct iris_query
*q
= (void *) query
;
602 struct iris_batch
*batch
= &ice
->batches
[q
->batch_idx
];
603 const struct gen_device_info
*devinfo
= &batch
->screen
->devinfo
;
604 struct iris_resource
*res
= (void *) p_res
;
605 struct iris_bo
*query_bo
= iris_resource_bo(q
->query_state_ref
.res
);
606 struct iris_bo
*dst_bo
= iris_resource_bo(p_res
);
607 unsigned snapshots_landed_offset
=
608 offsetof(struct iris_query_snapshots
, snapshots_landed
);
610 res
->bind_history
|= PIPE_BIND_QUERY_BUFFER
;
613 /* They're asking for the availability of the result. If we still
614 * have commands queued up which produce the result, submit them
615 * now so that progress happens. Either way, copy the snapshots
616 * landed field to the destination resource.
618 if (q
->syncpt
== iris_batch_get_signal_syncpt(batch
))
619 iris_batch_flush(batch
);
621 ice
->vtbl
.copy_mem_mem(batch
, dst_bo
, offset
,
622 query_bo
, snapshots_landed_offset
,
623 result_type
<= PIPE_QUERY_TYPE_U32
? 4 : 8);
627 if (!q
->ready
&& READ_ONCE(q
->map
->snapshots_landed
)) {
628 /* The final snapshots happen to have landed, so let's just compute
629 * the result on the CPU now...
631 calculate_result_on_cpu(devinfo
, q
);
635 /* We happen to have the result on the CPU, so just copy it. */
636 if (result_type
<= PIPE_QUERY_TYPE_U32
) {
637 ice
->vtbl
.store_data_imm32(batch
, dst_bo
, offset
, q
->result
);
639 ice
->vtbl
.store_data_imm64(batch
, dst_bo
, offset
, q
->result
);
642 /* Make sure the result lands before they use bind the QBO elsewhere
643 * and use the result.
645 // XXX: Why? i965 doesn't do this.
646 iris_emit_pipe_control_flush(batch
,
647 "query: unknown QBO flushing hack",
648 PIPE_CONTROL_CS_STALL
);
652 bool predicated
= !wait
&& !q
->stalled
;
654 struct gen_mi_builder b
;
655 gen_mi_builder_init(&b
, batch
);
657 struct gen_mi_value result
= calculate_result_on_gpu(devinfo
, &b
, q
);
658 struct gen_mi_value dst
=
659 result_type
<= PIPE_QUERY_TYPE_U32
? gen_mi_mem32(rw_bo(dst_bo
, offset
))
660 : gen_mi_mem64(rw_bo(dst_bo
, offset
));
663 gen_mi_store(&b
, gen_mi_reg32(MI_PREDICATE_RESULT
),
664 gen_mi_mem64(ro_bo(query_bo
, snapshots_landed_offset
)));
665 gen_mi_store_if(&b
, dst
, result
);
667 gen_mi_store(&b
, dst
, result
);
672 iris_set_active_query_state(struct pipe_context
*ctx
, bool enable
)
674 struct iris_context
*ice
= (void *) ctx
;
676 if (ice
->state
.statistics_counters_enabled
== enable
)
679 // XXX: most packets aren't paying attention to this yet, because it'd
680 // have to be done dynamically at draw time, which is a pain
681 ice
->state
.statistics_counters_enabled
= enable
;
682 ice
->state
.dirty
|= IRIS_DIRTY_CLIP
|
685 IRIS_DIRTY_STREAMOUT
|
693 set_predicate_enable(struct iris_context
*ice
, bool value
)
696 ice
->state
.predicate
= IRIS_PREDICATE_STATE_RENDER
;
698 ice
->state
.predicate
= IRIS_PREDICATE_STATE_DONT_RENDER
;
702 set_predicate_for_result(struct iris_context
*ice
,
703 struct iris_query
*q
,
706 struct iris_batch
*batch
= &ice
->batches
[IRIS_BATCH_RENDER
];
707 struct iris_bo
*bo
= iris_resource_bo(q
->query_state_ref
.res
);
709 /* The CPU doesn't have the query result yet; use hardware predication */
710 ice
->state
.predicate
= IRIS_PREDICATE_STATE_USE_BIT
;
712 /* Ensure the memory is coherent for MI_LOAD_REGISTER_* commands. */
713 iris_emit_pipe_control_flush(batch
,
714 "conditional rendering: set predicate",
715 PIPE_CONTROL_FLUSH_ENABLE
);
718 struct gen_mi_builder b
;
719 gen_mi_builder_init(&b
, batch
);
721 struct gen_mi_value result
;
724 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
725 result
= calc_overflow_for_stream(&b
, q
, q
->index
);
727 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE
:
728 result
= calc_overflow_any_stream(&b
, q
);
731 /* PIPE_QUERY_OCCLUSION_* */
732 struct gen_mi_value start
=
733 query_mem64(q
, offsetof(struct iris_query_snapshots
, start
));
734 struct gen_mi_value end
=
735 query_mem64(q
, offsetof(struct iris_query_snapshots
, end
));
736 result
= gen_mi_isub(&b
, end
, start
);
741 result
= inverted
? gen_mi_z(&b
, result
) : gen_mi_nz(&b
, result
);
742 result
= gen_mi_iand(&b
, result
, gen_mi_imm(1));
744 /* We immediately set the predicate on the render batch, as all the
745 * counters come from 3D operations. However, we may need to predicate
746 * a compute dispatch, which executes in a different GEM context and has
747 * a different MI_PREDICATE_RESULT register. So, we save the result to
748 * memory and reload it in iris_launch_grid.
750 gen_mi_value_ref(&b
, result
);
751 gen_mi_store(&b
, gen_mi_reg32(MI_PREDICATE_RESULT
), result
);
752 gen_mi_store(&b
, query_mem64(q
, offsetof(struct iris_query_snapshots
,
753 predicate_result
)), result
);
754 ice
->state
.compute_predicate
= bo
;
758 iris_render_condition(struct pipe_context
*ctx
,
759 struct pipe_query
*query
,
761 enum pipe_render_cond_flag mode
)
763 struct iris_context
*ice
= (void *) ctx
;
764 struct iris_query
*q
= (void *) query
;
766 /* The old condition isn't relevant; we'll update it if necessary */
767 ice
->state
.compute_predicate
= NULL
;
768 ice
->condition
.query
= q
;
769 ice
->condition
.condition
= condition
;
772 ice
->state
.predicate
= IRIS_PREDICATE_STATE_RENDER
;
776 iris_check_query_no_flush(ice
, q
);
778 if (q
->result
|| q
->ready
) {
779 set_predicate_enable(ice
, (q
->result
!= 0) ^ condition
);
781 if (mode
== PIPE_RENDER_COND_NO_WAIT
||
782 mode
== PIPE_RENDER_COND_BY_REGION_NO_WAIT
) {
783 perf_debug(&ice
->dbg
, "Conditional rendering demoted from "
784 "\"no wait\" to \"wait\".");
786 set_predicate_for_result(ice
, q
, condition
);
791 iris_resolve_conditional_render(struct iris_context
*ice
)
793 struct pipe_context
*ctx
= (void *) ice
;
794 struct iris_query
*q
= ice
->condition
.query
;
795 struct pipe_query
*query
= (void *) q
;
796 union pipe_query_result result
;
798 if (ice
->state
.predicate
!= IRIS_PREDICATE_STATE_USE_BIT
)
803 iris_get_query_result(ctx
, query
, true, &result
);
804 set_predicate_enable(ice
, (q
->result
!= 0) ^ ice
->condition
.condition
);
808 genX(init_query
)(struct iris_context
*ice
)
810 struct pipe_context
*ctx
= &ice
->ctx
;
812 ctx
->create_query
= iris_create_query
;
813 ctx
->destroy_query
= iris_destroy_query
;
814 ctx
->begin_query
= iris_begin_query
;
815 ctx
->end_query
= iris_end_query
;
816 ctx
->get_query_result
= iris_get_query_result
;
817 ctx
->get_query_result_resource
= iris_get_query_result_resource
;
818 ctx
->set_active_query_state
= iris_set_active_query_state
;
819 ctx
->render_condition
= iris_render_condition
;
821 ice
->vtbl
.resolve_conditional_render
= iris_resolve_conditional_render
;