2 * Copyright © 2017 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
26 * Query object support. This allows measuring various simple statistics
27 * via counters on the GPU.
32 #include "pipe/p_defines.h"
33 #include "pipe/p_state.h"
34 #include "pipe/p_context.h"
35 #include "pipe/p_screen.h"
36 #include "util/u_inlines.h"
37 #include "iris_context.h"
38 #include "iris_defines.h"
39 #include "iris_resource.h"
40 #include "iris_screen.h"
42 #define IA_VERTICES_COUNT 0x2310
43 #define IA_PRIMITIVES_COUNT 0x2318
44 #define VS_INVOCATION_COUNT 0x2320
45 #define HS_INVOCATION_COUNT 0x2300
46 #define DS_INVOCATION_COUNT 0x2308
47 #define GS_INVOCATION_COUNT 0x2328
48 #define GS_PRIMITIVES_COUNT 0x2330
49 #define CL_INVOCATION_COUNT 0x2338
50 #define CL_PRIMITIVES_COUNT 0x2340
51 #define PS_INVOCATION_COUNT 0x2348
52 #define CS_INVOCATION_COUNT 0x2290
53 #define PS_DEPTH_COUNT 0x2350
55 #define SO_PRIM_STORAGE_NEEDED(n) (0x5240 + (n) * 8)
57 #define SO_NUM_PRIMS_WRITTEN(n) (0x5200 + (n) * 8)
59 #define CS_GPR(n) (0x2600 + (n) * 8)
61 #define MI_MATH (0x1a << 23)
63 #define MI_ALU_LOAD 0x080
64 #define MI_ALU_LOADINV 0x480
65 #define MI_ALU_LOAD0 0x081
66 #define MI_ALU_LOAD1 0x481
67 #define MI_ALU_ADD 0x100
68 #define MI_ALU_SUB 0x101
69 #define MI_ALU_AND 0x102
70 #define MI_ALU_OR 0x103
71 #define MI_ALU_XOR 0x104
72 #define MI_ALU_STORE 0x180
73 #define MI_ALU_STOREINV 0x580
75 #define MI_ALU_R0 0x00
76 #define MI_ALU_R1 0x01
77 #define MI_ALU_R2 0x02
78 #define MI_ALU_R3 0x03
79 #define MI_ALU_R4 0x04
80 #define MI_ALU_SRCA 0x20
81 #define MI_ALU_SRCB 0x21
82 #define MI_ALU_ACCU 0x31
83 #define MI_ALU_ZF 0x32
84 #define MI_ALU_CF 0x33
86 #define MI_ALU0(op) ((MI_ALU_##op << 20))
87 #define MI_ALU1(op, x) ((MI_ALU_##op << 20) | (MI_ALU_##x << 10))
88 #define MI_ALU2(op, x, y) \
89 ((MI_ALU_##op << 20) | (MI_ALU_##x << 10) | (MI_ALU_##y))
92 enum pipe_query_type type
;
100 struct iris_query_snapshots
*map
;
103 struct iris_query_snapshots
{
104 uint64_t snapshots_landed
;
110 * Is this type of query written by PIPE_CONTROL?
113 iris_is_query_pipelined(struct iris_query
*q
)
116 case PIPE_QUERY_OCCLUSION_COUNTER
:
117 case PIPE_QUERY_OCCLUSION_PREDICATE
:
118 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE
:
119 case PIPE_QUERY_TIMESTAMP
:
120 case PIPE_QUERY_TIMESTAMP_DISJOINT
:
121 case PIPE_QUERY_TIME_ELAPSED
:
130 mark_available(struct iris_context
*ice
, struct iris_query
*q
)
132 struct iris_batch
*batch
= &ice
->render_batch
;
133 unsigned flags
= PIPE_CONTROL_WRITE_IMMEDIATE
;
134 unsigned offset
= offsetof(struct iris_query_snapshots
, snapshots_landed
);
136 if (!iris_is_query_pipelined(q
)) {
137 ice
->vtbl
.store_data_imm64(batch
, q
->bo
, offset
, true);
139 /* Order available *after* the query results. */
140 flags
|= PIPE_CONTROL_FLUSH_ENABLE
;
141 iris_emit_pipe_control_write(batch
, flags
, q
->bo
, offset
, true);
146 * Write PS_DEPTH_COUNT to q->(dest) via a PIPE_CONTROL.
149 iris_pipelined_write(struct iris_batch
*batch
,
150 struct iris_query
*q
,
151 enum pipe_control_flags flags
,
154 const struct gen_device_info
*devinfo
= &batch
->screen
->devinfo
;
155 const unsigned optional_cs_stall
=
156 devinfo
->gen
== 9 && devinfo
->gt
== 4 ? PIPE_CONTROL_CS_STALL
: 0;
158 iris_emit_pipe_control_write(batch
, flags
| optional_cs_stall
,
159 q
->bo
, offset
, 0ull);
163 write_value(struct iris_context
*ice
, struct iris_query
*q
, unsigned offset
)
165 struct iris_batch
*batch
= &ice
->render_batch
;
166 const struct gen_device_info
*devinfo
= &batch
->screen
->devinfo
;
168 if (!iris_is_query_pipelined(q
)) {
169 iris_emit_pipe_control_flush(batch
,
170 PIPE_CONTROL_CS_STALL
|
171 PIPE_CONTROL_STALL_AT_SCOREBOARD
);
175 case PIPE_QUERY_OCCLUSION_COUNTER
:
176 case PIPE_QUERY_OCCLUSION_PREDICATE
:
177 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE
:
178 if (devinfo
->gen
>= 10) {
179 /* "Driver must program PIPE_CONTROL with only Depth Stall Enable
180 * bit set prior to programming a PIPE_CONTROL with Write PS Depth
181 * Count sync operation."
183 iris_emit_pipe_control_flush(batch
, PIPE_CONTROL_DEPTH_STALL
);
185 iris_pipelined_write(&ice
->render_batch
, q
,
186 PIPE_CONTROL_WRITE_DEPTH_COUNT
|
187 PIPE_CONTROL_DEPTH_STALL
,
190 case PIPE_QUERY_TIME_ELAPSED
:
191 case PIPE_QUERY_TIMESTAMP
:
192 case PIPE_QUERY_TIMESTAMP_DISJOINT
:
193 iris_pipelined_write(&ice
->render_batch
, q
,
194 PIPE_CONTROL_WRITE_TIMESTAMP
,
197 case PIPE_QUERY_PRIMITIVES_GENERATED
:
198 ice
->vtbl
.store_register_mem64(batch
,
199 q
->index
== 0 ? CL_INVOCATION_COUNT
:
200 SO_PRIM_STORAGE_NEEDED(q
->index
),
201 q
->bo
, offset
, false);
203 case PIPE_QUERY_PRIMITIVES_EMITTED
:
204 ice
->vtbl
.store_register_mem64(batch
,
205 SO_NUM_PRIMS_WRITTEN(q
->index
),
206 q
->bo
, offset
, false);
208 case PIPE_QUERY_PIPELINE_STATISTICS
: {
209 static const uint32_t index_to_reg
[] = {
222 const uint32_t reg
= index_to_reg
[q
->index
];
224 ice
->vtbl
.store_register_mem64(batch
, reg
, q
->bo
, offset
, false);
233 iris_timebase_scale(const struct gen_device_info
*devinfo
,
234 uint64_t gpu_timestamp
)
236 return (1000000000ull * gpu_timestamp
) / devinfo
->timestamp_frequency
;
240 iris_raw_timestamp_delta(uint64_t time0
, uint64_t time1
)
243 return (1ULL << TIMESTAMP_BITS
) + time1
- time0
;
245 return time1
- time0
;
250 calculate_result_on_cpu(const struct gen_device_info
*devinfo
,
251 struct iris_query
*q
)
254 case PIPE_QUERY_OCCLUSION_PREDICATE
:
255 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE
:
256 q
->result
= q
->map
->end
!= q
->map
->start
;
258 case PIPE_QUERY_TIMESTAMP
:
259 case PIPE_QUERY_TIMESTAMP_DISJOINT
:
260 /* The timestamp is the single starting snapshot. */
261 q
->result
= iris_timebase_scale(devinfo
, q
->map
->start
);
262 q
->result
&= (1ull << TIMESTAMP_BITS
) - 1;
264 case PIPE_QUERY_TIME_ELAPSED
:
265 q
->result
= iris_raw_timestamp_delta(q
->map
->start
, q
->map
->end
);
266 q
->result
= iris_timebase_scale(devinfo
, q
->result
);
267 q
->result
&= (1ull << TIMESTAMP_BITS
) - 1;
269 case PIPE_QUERY_OCCLUSION_COUNTER
:
270 case PIPE_QUERY_PRIMITIVES_GENERATED
:
271 case PIPE_QUERY_PRIMITIVES_EMITTED
:
272 case PIPE_QUERY_PIPELINE_STATISTICS
:
274 q
->result
= q
->map
->end
- q
->map
->start
;
282 * GPR0 = (GPR0 == 0) ? 0 : 1;
285 gpr0_to_bool(struct iris_context
*ice
)
287 struct iris_batch
*batch
= &ice
->render_batch
;
289 ice
->vtbl
.load_register_imm64(batch
, CS_GPR(1), 1ull);
291 static const uint32_t math
[] = {
293 MI_ALU2(LOAD
, SRCA
, R0
),
294 MI_ALU1(LOAD0
, SRCB
),
296 MI_ALU2(STOREINV
, R0
, ZF
),
297 MI_ALU2(LOAD
, SRCA
, R0
),
298 MI_ALU2(LOAD
, SRCB
, R1
),
300 MI_ALU2(STORE
, R0
, ACCU
),
302 iris_batch_emit(batch
, math
, sizeof(math
));
306 * Calculate the result and store it to CS_GPR0.
309 calculate_result_on_gpu(struct iris_context
*ice
, struct iris_query
*q
)
311 struct iris_batch
*batch
= &ice
->render_batch
;
313 ice
->vtbl
.load_register_mem64(batch
, CS_GPR(1), q
->bo
,
314 offsetof(struct iris_query_snapshots
, start
));
315 ice
->vtbl
.load_register_mem64(batch
, CS_GPR(2), q
->bo
,
316 offsetof(struct iris_query_snapshots
, end
));
318 static const uint32_t math
[] = {
320 MI_ALU2(LOAD
, SRCA
, R2
),
321 MI_ALU2(LOAD
, SRCB
, R1
),
323 MI_ALU2(STORE
, R0
, ACCU
),
325 iris_batch_emit(batch
, math
, sizeof(math
));
327 if (q
->type
== PIPE_QUERY_OCCLUSION_PREDICATE
||
328 q
->type
== PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE
)
332 static struct pipe_query
*
333 iris_create_query(struct pipe_context
*ctx
,
337 struct iris_query
*q
= calloc(1, sizeof(struct iris_query
));
339 q
->type
= query_type
;
342 return (struct pipe_query
*) q
;
346 iris_destroy_query(struct pipe_context
*ctx
, struct pipe_query
*p_query
)
348 struct iris_query
*query
= (void *) p_query
;
349 iris_bo_unreference(query
->bo
);
355 iris_begin_query(struct pipe_context
*ctx
, struct pipe_query
*query
)
357 struct iris_screen
*screen
= (void *) ctx
->screen
;
358 struct iris_context
*ice
= (void *) ctx
;
359 struct iris_query
*q
= (void *) query
;
361 iris_bo_unreference(q
->bo
);
362 q
->bo
= iris_bo_alloc(screen
->bufmgr
, "query object", 4096,
367 q
->map
= iris_bo_map(&ice
->dbg
, q
->bo
, MAP_READ
| MAP_WRITE
| MAP_ASYNC
);
373 q
->map
->snapshots_landed
= false;
375 if (q
->type
== PIPE_QUERY_PRIMITIVES_GENERATED
&& q
->index
== 0) {
376 ice
->state
.prims_generated_query_active
= true;
377 ice
->state
.dirty
|= IRIS_DIRTY_STREAMOUT
;
380 write_value(ice
, q
, offsetof(struct iris_query_snapshots
, start
));
386 iris_end_query(struct pipe_context
*ctx
, struct pipe_query
*query
)
388 struct iris_context
*ice
= (void *) ctx
;
389 struct iris_query
*q
= (void *) query
;
391 if (q
->type
== PIPE_QUERY_TIMESTAMP
) {
392 iris_begin_query(ctx
, query
);
393 mark_available(ice
, q
);
397 if (q
->type
== PIPE_QUERY_PRIMITIVES_GENERATED
&& q
->index
== 0) {
398 ice
->state
.prims_generated_query_active
= true;
399 ice
->state
.dirty
|= IRIS_DIRTY_STREAMOUT
;
402 write_value(ice
, q
, offsetof(struct iris_query_snapshots
, end
));
403 mark_available(ice
, q
);
409 iris_get_query_result(struct pipe_context
*ctx
,
410 struct pipe_query
*query
,
412 union pipe_query_result
*result
)
414 struct iris_context
*ice
= (void *) ctx
;
415 struct iris_query
*q
= (void *) query
;
416 struct iris_screen
*screen
= (void *) ctx
->screen
;
417 const struct gen_device_info
*devinfo
= &screen
->devinfo
;
420 if (iris_batch_references(&ice
->render_batch
, q
->bo
))
421 iris_batch_flush(&ice
->render_batch
);
423 if (!q
->map
->snapshots_landed
) {
425 iris_bo_wait_rendering(q
->bo
);
430 assert(q
->map
->snapshots_landed
);
431 calculate_result_on_cpu(devinfo
, q
);
436 if (q
->type
== PIPE_QUERY_PIPELINE_STATISTICS
) {
439 result
->pipeline_statistics
.ia_vertices
= q
->result
;
442 result
->pipeline_statistics
.ia_primitives
= q
->result
;
445 result
->pipeline_statistics
.vs_invocations
= q
->result
;
448 result
->pipeline_statistics
.gs_invocations
= q
->result
;
451 result
->pipeline_statistics
.gs_primitives
= q
->result
;
454 result
->pipeline_statistics
.c_invocations
= q
->result
;
457 result
->pipeline_statistics
.c_primitives
= q
->result
;
460 result
->pipeline_statistics
.ps_invocations
= q
->result
;
463 result
->pipeline_statistics
.hs_invocations
= q
->result
;
466 result
->pipeline_statistics
.ds_invocations
= q
->result
;
469 result
->pipeline_statistics
.cs_invocations
= q
->result
;
473 result
->u64
= q
->result
;
480 iris_get_query_result_resource(struct pipe_context
*ctx
,
481 struct pipe_query
*query
,
483 enum pipe_query_value_type result_type
,
485 struct pipe_resource
*p_res
,
488 struct iris_context
*ice
= (void *) ctx
;
489 struct iris_query
*q
= (void *) query
;
490 struct iris_batch
*batch
= &ice
->render_batch
;
491 const struct gen_device_info
*devinfo
= &batch
->screen
->devinfo
;
492 unsigned snapshots_landed_offset
=
493 offsetof(struct iris_query_snapshots
, snapshots_landed
);
496 /* They're asking for the availability of the result. If we still
497 * have commands queued up which produce the result, submit them
498 * now so that progress happens. Either way, copy the snapshots
499 * landed field to the destination resource.
501 if (iris_batch_references(batch
, q
->bo
))
502 iris_batch_flush(batch
);
504 ice
->vtbl
.copy_mem_mem(batch
, iris_resource_bo(p_res
), offset
,
505 q
->bo
, snapshots_landed_offset
,
506 result_type
<= PIPE_QUERY_TYPE_U32
? 4 : 8);
510 if (!q
->ready
&& q
->map
->snapshots_landed
) {
511 /* The final snapshots happen to have landed, so let's just compute
512 * the result on the CPU now...
514 calculate_result_on_cpu(devinfo
, q
);
518 /* We happen to have the result on the CPU, so just copy it. */
519 if (result_type
<= PIPE_QUERY_TYPE_U32
) {
520 ice
->vtbl
.store_data_imm32(batch
, iris_resource_bo(p_res
), offset
,
523 ice
->vtbl
.store_data_imm64(batch
, iris_resource_bo(p_res
), offset
,
527 /* Make sure the result lands before they use bind the QBO elsewhere
528 * and use the result.
530 // XXX: Why? i965 doesn't do this.
531 iris_emit_pipe_control_flush(batch
, PIPE_CONTROL_CS_STALL
);
535 /* Calculate the result to CS_GPR0 */
536 calculate_result_on_gpu(ice
, q
);
538 bool predicated
= !wait
&& iris_is_query_pipelined(q
);
541 ice
->vtbl
.load_register_imm64(batch
, MI_PREDICATE_SRC1
, 0ull);
542 ice
->vtbl
.load_register_mem64(batch
, MI_PREDICATE_SRC0
, q
->bo
,
543 snapshots_landed_offset
);
544 uint32_t predicate
= MI_PREDICATE
|
545 MI_PREDICATE_LOADOP_LOADINV
|
546 MI_PREDICATE_COMBINEOP_SET
|
547 MI_PREDICATE_COMPAREOP_SRCS_EQUAL
;
548 iris_batch_emit(batch
, &predicate
, sizeof(uint32_t));
551 if (result_type
<= PIPE_QUERY_TYPE_U32
) {
552 ice
->vtbl
.store_register_mem32(batch
, CS_GPR(0),
553 iris_resource_bo(p_res
),
556 ice
->vtbl
.store_register_mem64(batch
, CS_GPR(0),
557 iris_resource_bo(p_res
),
563 iris_set_active_query_state(struct pipe_context
*pipe
, boolean enable
)
565 /* Do nothing, intentionally - only u_blitter uses this. */
569 iris_init_query_functions(struct pipe_context
*ctx
)
571 ctx
->create_query
= iris_create_query
;
572 ctx
->destroy_query
= iris_destroy_query
;
573 ctx
->begin_query
= iris_begin_query
;
574 ctx
->end_query
= iris_end_query
;
575 ctx
->get_query_result
= iris_get_query_result
;
576 ctx
->get_query_result_resource
= iris_get_query_result_resource
;
577 ctx
->set_active_query_state
= iris_set_active_query_state
;