iris: Move snapshots_landed to the front.
[mesa.git] / src / gallium / drivers / iris / iris_query.c
1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23 /**
24 * @file iris_query.c
25 *
26 * Query object support. This allows measuring various simple statistics
27 * via counters on the GPU.
28 */
29
30 #include <stdio.h>
31 #include <errno.h>
32 #include "pipe/p_defines.h"
33 #include "pipe/p_state.h"
34 #include "pipe/p_context.h"
35 #include "pipe/p_screen.h"
36 #include "util/u_inlines.h"
37 #include "iris_context.h"
38 #include "iris_defines.h"
39 #include "iris_resource.h"
40 #include "iris_screen.h"
41
42 #define IA_VERTICES_COUNT 0x2310
43 #define IA_PRIMITIVES_COUNT 0x2318
44 #define VS_INVOCATION_COUNT 0x2320
45 #define HS_INVOCATION_COUNT 0x2300
46 #define DS_INVOCATION_COUNT 0x2308
47 #define GS_INVOCATION_COUNT 0x2328
48 #define GS_PRIMITIVES_COUNT 0x2330
49 #define CL_INVOCATION_COUNT 0x2338
50 #define CL_PRIMITIVES_COUNT 0x2340
51 #define PS_INVOCATION_COUNT 0x2348
52 #define CS_INVOCATION_COUNT 0x2290
53 #define PS_DEPTH_COUNT 0x2350
54
55 #define SO_PRIM_STORAGE_NEEDED(n) (0x5240 + (n) * 8)
56
57 #define SO_NUM_PRIMS_WRITTEN(n) (0x5200 + (n) * 8)
58
59 #define CS_GPR(n) (0x2600 + (n) * 8)
60
61 #define MI_MATH (0x1a << 23)
62
63 #define MI_ALU_LOAD 0x080
64 #define MI_ALU_LOADINV 0x480
65 #define MI_ALU_LOAD0 0x081
66 #define MI_ALU_LOAD1 0x481
67 #define MI_ALU_ADD 0x100
68 #define MI_ALU_SUB 0x101
69 #define MI_ALU_AND 0x102
70 #define MI_ALU_OR 0x103
71 #define MI_ALU_XOR 0x104
72 #define MI_ALU_STORE 0x180
73 #define MI_ALU_STOREINV 0x580
74
75 #define MI_ALU_R0 0x00
76 #define MI_ALU_R1 0x01
77 #define MI_ALU_R2 0x02
78 #define MI_ALU_R3 0x03
79 #define MI_ALU_R4 0x04
80 #define MI_ALU_SRCA 0x20
81 #define MI_ALU_SRCB 0x21
82 #define MI_ALU_ACCU 0x31
83 #define MI_ALU_ZF 0x32
84 #define MI_ALU_CF 0x33
85
86 #define MI_ALU0(op) ((MI_ALU_##op << 20))
87 #define MI_ALU1(op, x) ((MI_ALU_##op << 20) | (MI_ALU_##x << 10))
88 #define MI_ALU2(op, x, y) \
89 ((MI_ALU_##op << 20) | (MI_ALU_##x << 10) | (MI_ALU_##y))
90
91 struct iris_query {
92 enum pipe_query_type type;
93 int index;
94
95 bool ready;
96
97 uint64_t result;
98
99 struct iris_bo *bo;
100 struct iris_query_snapshots *map;
101 };
102
103 struct iris_query_snapshots {
104 uint64_t snapshots_landed;
105 uint64_t start;
106 uint64_t end;
107 };
108
109 /**
110 * Is this type of query written by PIPE_CONTROL?
111 */
112 static bool
113 iris_is_query_pipelined(struct iris_query *q)
114 {
115 switch (q->type) {
116 case PIPE_QUERY_OCCLUSION_COUNTER:
117 case PIPE_QUERY_OCCLUSION_PREDICATE:
118 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
119 case PIPE_QUERY_TIMESTAMP:
120 case PIPE_QUERY_TIMESTAMP_DISJOINT:
121 case PIPE_QUERY_TIME_ELAPSED:
122 return true;
123
124 default:
125 return false;
126 }
127 }
128
129 static void
130 mark_available(struct iris_context *ice, struct iris_query *q)
131 {
132 struct iris_batch *batch = &ice->render_batch;
133 unsigned flags = PIPE_CONTROL_WRITE_IMMEDIATE;
134 unsigned offset = offsetof(struct iris_query_snapshots, snapshots_landed);
135
136 if (!iris_is_query_pipelined(q)) {
137 ice->vtbl.store_data_imm64(batch, q->bo, offset, true);
138 } else {
139 /* Order available *after* the query results. */
140 flags |= PIPE_CONTROL_FLUSH_ENABLE;
141 iris_emit_pipe_control_write(batch, flags, q->bo, offset, true);
142 }
143 }
144
145 /**
146 * Write PS_DEPTH_COUNT to q->(dest) via a PIPE_CONTROL.
147 */
148 static void
149 iris_pipelined_write(struct iris_batch *batch,
150 struct iris_query *q,
151 enum pipe_control_flags flags,
152 unsigned offset)
153 {
154 const struct gen_device_info *devinfo = &batch->screen->devinfo;
155 const unsigned optional_cs_stall =
156 devinfo->gen == 9 && devinfo->gt == 4 ? PIPE_CONTROL_CS_STALL : 0;
157
158 iris_emit_pipe_control_write(batch, flags | optional_cs_stall,
159 q->bo, offset, 0ull);
160 }
161
162 static void
163 write_value(struct iris_context *ice, struct iris_query *q, unsigned offset)
164 {
165 struct iris_batch *batch = &ice->render_batch;
166 const struct gen_device_info *devinfo = &batch->screen->devinfo;
167
168 if (!iris_is_query_pipelined(q)) {
169 iris_emit_pipe_control_flush(batch,
170 PIPE_CONTROL_CS_STALL |
171 PIPE_CONTROL_STALL_AT_SCOREBOARD);
172 }
173
174 switch (q->type) {
175 case PIPE_QUERY_OCCLUSION_COUNTER:
176 case PIPE_QUERY_OCCLUSION_PREDICATE:
177 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
178 if (devinfo->gen >= 10) {
179 /* "Driver must program PIPE_CONTROL with only Depth Stall Enable
180 * bit set prior to programming a PIPE_CONTROL with Write PS Depth
181 * Count sync operation."
182 */
183 iris_emit_pipe_control_flush(batch, PIPE_CONTROL_DEPTH_STALL);
184 }
185 iris_pipelined_write(&ice->render_batch, q,
186 PIPE_CONTROL_WRITE_DEPTH_COUNT |
187 PIPE_CONTROL_DEPTH_STALL,
188 offset);
189 break;
190 case PIPE_QUERY_TIME_ELAPSED:
191 case PIPE_QUERY_TIMESTAMP:
192 case PIPE_QUERY_TIMESTAMP_DISJOINT:
193 iris_pipelined_write(&ice->render_batch, q,
194 PIPE_CONTROL_WRITE_TIMESTAMP,
195 offset);
196 break;
197 case PIPE_QUERY_PRIMITIVES_GENERATED:
198 ice->vtbl.store_register_mem64(batch,
199 q->index == 0 ? CL_INVOCATION_COUNT :
200 SO_PRIM_STORAGE_NEEDED(q->index),
201 q->bo, offset, false);
202 break;
203 case PIPE_QUERY_PRIMITIVES_EMITTED:
204 ice->vtbl.store_register_mem64(batch,
205 SO_NUM_PRIMS_WRITTEN(q->index),
206 q->bo, offset, false);
207 break;
208 case PIPE_QUERY_PIPELINE_STATISTICS: {
209 static const uint32_t index_to_reg[] = {
210 IA_VERTICES_COUNT,
211 IA_PRIMITIVES_COUNT,
212 VS_INVOCATION_COUNT,
213 GS_INVOCATION_COUNT,
214 GS_PRIMITIVES_COUNT,
215 CL_INVOCATION_COUNT,
216 CL_PRIMITIVES_COUNT,
217 PS_INVOCATION_COUNT,
218 HS_INVOCATION_COUNT,
219 DS_INVOCATION_COUNT,
220 CS_INVOCATION_COUNT,
221 };
222 const uint32_t reg = index_to_reg[q->index];
223
224 ice->vtbl.store_register_mem64(batch, reg, q->bo, offset, false);
225 break;
226 }
227 default:
228 assert(false);
229 }
230 }
231
232 uint64_t
233 iris_timebase_scale(const struct gen_device_info *devinfo,
234 uint64_t gpu_timestamp)
235 {
236 return (1000000000ull * gpu_timestamp) / devinfo->timestamp_frequency;
237 }
238
239 static uint64_t
240 iris_raw_timestamp_delta(uint64_t time0, uint64_t time1)
241 {
242 if (time0 > time1) {
243 return (1ULL << TIMESTAMP_BITS) + time1 - time0;
244 } else {
245 return time1 - time0;
246 }
247 }
248
249 static void
250 calculate_result_on_cpu(const struct gen_device_info *devinfo,
251 struct iris_query *q)
252 {
253 switch (q->type) {
254 case PIPE_QUERY_OCCLUSION_PREDICATE:
255 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
256 q->result = q->map->end != q->map->start;
257 break;
258 case PIPE_QUERY_TIMESTAMP:
259 case PIPE_QUERY_TIMESTAMP_DISJOINT:
260 /* The timestamp is the single starting snapshot. */
261 q->result = iris_timebase_scale(devinfo, q->map->start);
262 q->result &= (1ull << TIMESTAMP_BITS) - 1;
263 break;
264 case PIPE_QUERY_TIME_ELAPSED:
265 q->result = iris_raw_timestamp_delta(q->map->start, q->map->end);
266 q->result = iris_timebase_scale(devinfo, q->result);
267 q->result &= (1ull << TIMESTAMP_BITS) - 1;
268 break;
269 case PIPE_QUERY_OCCLUSION_COUNTER:
270 case PIPE_QUERY_PRIMITIVES_GENERATED:
271 case PIPE_QUERY_PRIMITIVES_EMITTED:
272 case PIPE_QUERY_PIPELINE_STATISTICS:
273 default:
274 q->result = q->map->end - q->map->start;
275 break;
276 }
277
278 q->ready = true;
279 }
280
281 /*
282 * GPR0 = (GPR0 == 0) ? 0 : 1;
283 */
284 static void
285 gpr0_to_bool(struct iris_context *ice)
286 {
287 struct iris_batch *batch = &ice->render_batch;
288
289 ice->vtbl.load_register_imm64(batch, CS_GPR(1), 1ull);
290
291 static const uint32_t math[] = {
292 MI_MATH | (9 - 2),
293 MI_ALU2(LOAD, SRCA, R0),
294 MI_ALU1(LOAD0, SRCB),
295 MI_ALU0(ADD),
296 MI_ALU2(STOREINV, R0, ZF),
297 MI_ALU2(LOAD, SRCA, R0),
298 MI_ALU2(LOAD, SRCB, R1),
299 MI_ALU0(AND),
300 MI_ALU2(STORE, R0, ACCU),
301 };
302 iris_batch_emit(batch, math, sizeof(math));
303 }
304
305 /**
306 * Calculate the result and store it to CS_GPR0.
307 */
308 static void
309 calculate_result_on_gpu(struct iris_context *ice, struct iris_query *q)
310 {
311 struct iris_batch *batch = &ice->render_batch;
312
313 ice->vtbl.load_register_mem64(batch, CS_GPR(1), q->bo,
314 offsetof(struct iris_query_snapshots, start));
315 ice->vtbl.load_register_mem64(batch, CS_GPR(2), q->bo,
316 offsetof(struct iris_query_snapshots, end));
317
318 static const uint32_t math[] = {
319 MI_MATH | (5 - 2),
320 MI_ALU2(LOAD, SRCA, R2),
321 MI_ALU2(LOAD, SRCB, R1),
322 MI_ALU0(SUB),
323 MI_ALU2(STORE, R0, ACCU),
324 };
325 iris_batch_emit(batch, math, sizeof(math));
326
327 if (q->type == PIPE_QUERY_OCCLUSION_PREDICATE ||
328 q->type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE)
329 gpr0_to_bool(ice);
330 }
331
332 static struct pipe_query *
333 iris_create_query(struct pipe_context *ctx,
334 unsigned query_type,
335 unsigned index)
336 {
337 struct iris_query *q = calloc(1, sizeof(struct iris_query));
338
339 q->type = query_type;
340 q->index = index;
341
342 return (struct pipe_query *) q;
343 }
344
345 static void
346 iris_destroy_query(struct pipe_context *ctx, struct pipe_query *p_query)
347 {
348 struct iris_query *query = (void *) p_query;
349 iris_bo_unreference(query->bo);
350 free(query);
351 }
352
353
354 static boolean
355 iris_begin_query(struct pipe_context *ctx, struct pipe_query *query)
356 {
357 struct iris_screen *screen = (void *) ctx->screen;
358 struct iris_context *ice = (void *) ctx;
359 struct iris_query *q = (void *) query;
360
361 iris_bo_unreference(q->bo);
362 q->bo = iris_bo_alloc(screen->bufmgr, "query object", 4096,
363 IRIS_MEMZONE_OTHER);
364 if (!q->bo)
365 return false;
366
367 q->map = iris_bo_map(&ice->dbg, q->bo, MAP_READ | MAP_WRITE | MAP_ASYNC);
368 if (!q->map)
369 return false;
370
371 q->result = 0ull;
372 q->ready = false;
373 q->map->snapshots_landed = false;
374
375 if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {
376 ice->state.prims_generated_query_active = true;
377 ice->state.dirty |= IRIS_DIRTY_STREAMOUT;
378 }
379
380 write_value(ice, q, offsetof(struct iris_query_snapshots, start));
381
382 return true;
383 }
384
385 static bool
386 iris_end_query(struct pipe_context *ctx, struct pipe_query *query)
387 {
388 struct iris_context *ice = (void *) ctx;
389 struct iris_query *q = (void *) query;
390
391 if (q->type == PIPE_QUERY_TIMESTAMP) {
392 iris_begin_query(ctx, query);
393 mark_available(ice, q);
394 return true;
395 }
396
397 if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {
398 ice->state.prims_generated_query_active = true;
399 ice->state.dirty |= IRIS_DIRTY_STREAMOUT;
400 }
401
402 write_value(ice, q, offsetof(struct iris_query_snapshots, end));
403 mark_available(ice, q);
404
405 return true;
406 }
407
408 static boolean
409 iris_get_query_result(struct pipe_context *ctx,
410 struct pipe_query *query,
411 boolean wait,
412 union pipe_query_result *result)
413 {
414 struct iris_context *ice = (void *) ctx;
415 struct iris_query *q = (void *) query;
416 struct iris_screen *screen = (void *) ctx->screen;
417 const struct gen_device_info *devinfo = &screen->devinfo;
418
419 if (!q->ready) {
420 if (iris_batch_references(&ice->render_batch, q->bo))
421 iris_batch_flush(&ice->render_batch);
422
423 if (!q->map->snapshots_landed) {
424 if (wait)
425 iris_bo_wait_rendering(q->bo);
426 else
427 return false;
428 }
429
430 assert(q->map->snapshots_landed);
431 calculate_result_on_cpu(devinfo, q);
432 }
433
434 assert(q->ready);
435
436 if (q->type == PIPE_QUERY_PIPELINE_STATISTICS) {
437 switch (q->index) {
438 case 0:
439 result->pipeline_statistics.ia_vertices = q->result;
440 break;
441 case 1:
442 result->pipeline_statistics.ia_primitives = q->result;
443 break;
444 case 2:
445 result->pipeline_statistics.vs_invocations = q->result;
446 break;
447 case 3:
448 result->pipeline_statistics.gs_invocations = q->result;
449 break;
450 case 4:
451 result->pipeline_statistics.gs_primitives = q->result;
452 break;
453 case 5:
454 result->pipeline_statistics.c_invocations = q->result;
455 break;
456 case 6:
457 result->pipeline_statistics.c_primitives = q->result;
458 break;
459 case 7:
460 result->pipeline_statistics.ps_invocations = q->result;
461 break;
462 case 8:
463 result->pipeline_statistics.hs_invocations = q->result;
464 break;
465 case 9:
466 result->pipeline_statistics.ds_invocations = q->result;
467 break;
468 case 10:
469 result->pipeline_statistics.cs_invocations = q->result;
470 break;
471 }
472 } else {
473 result->u64 = q->result;
474 }
475
476 return true;
477 }
478
479 static void
480 iris_get_query_result_resource(struct pipe_context *ctx,
481 struct pipe_query *query,
482 boolean wait,
483 enum pipe_query_value_type result_type,
484 int index,
485 struct pipe_resource *p_res,
486 unsigned offset)
487 {
488 struct iris_context *ice = (void *) ctx;
489 struct iris_query *q = (void *) query;
490 struct iris_batch *batch = &ice->render_batch;
491 const struct gen_device_info *devinfo = &batch->screen->devinfo;
492 unsigned snapshots_landed_offset =
493 offsetof(struct iris_query_snapshots, snapshots_landed);
494
495 if (index == -1) {
496 /* They're asking for the availability of the result. If we still
497 * have commands queued up which produce the result, submit them
498 * now so that progress happens. Either way, copy the snapshots
499 * landed field to the destination resource.
500 */
501 if (iris_batch_references(batch, q->bo))
502 iris_batch_flush(batch);
503
504 ice->vtbl.copy_mem_mem(batch, iris_resource_bo(p_res), offset,
505 q->bo, snapshots_landed_offset,
506 result_type <= PIPE_QUERY_TYPE_U32 ? 4 : 8);
507 return;
508 }
509
510 if (!q->ready && q->map->snapshots_landed) {
511 /* The final snapshots happen to have landed, so let's just compute
512 * the result on the CPU now...
513 */
514 calculate_result_on_cpu(devinfo, q);
515 }
516
517 if (q->ready) {
518 /* We happen to have the result on the CPU, so just copy it. */
519 if (result_type <= PIPE_QUERY_TYPE_U32) {
520 ice->vtbl.store_data_imm32(batch, iris_resource_bo(p_res), offset,
521 q->result);
522 } else {
523 ice->vtbl.store_data_imm64(batch, iris_resource_bo(p_res), offset,
524 q->result);
525 }
526
527 /* Make sure the result lands before they use bind the QBO elsewhere
528 * and use the result.
529 */
530 // XXX: Why? i965 doesn't do this.
531 iris_emit_pipe_control_flush(batch, PIPE_CONTROL_CS_STALL);
532 return;
533 }
534
535 /* Calculate the result to CS_GPR0 */
536 calculate_result_on_gpu(ice, q);
537
538 bool predicated = !wait && iris_is_query_pipelined(q);
539
540 if (predicated) {
541 ice->vtbl.load_register_imm64(batch, MI_PREDICATE_SRC1, 0ull);
542 ice->vtbl.load_register_mem64(batch, MI_PREDICATE_SRC0, q->bo,
543 snapshots_landed_offset);
544 uint32_t predicate = MI_PREDICATE |
545 MI_PREDICATE_LOADOP_LOADINV |
546 MI_PREDICATE_COMBINEOP_SET |
547 MI_PREDICATE_COMPAREOP_SRCS_EQUAL;
548 iris_batch_emit(batch, &predicate, sizeof(uint32_t));
549 }
550
551 if (result_type <= PIPE_QUERY_TYPE_U32) {
552 ice->vtbl.store_register_mem32(batch, CS_GPR(0),
553 iris_resource_bo(p_res),
554 offset, predicated);
555 } else {
556 ice->vtbl.store_register_mem64(batch, CS_GPR(0),
557 iris_resource_bo(p_res),
558 offset, predicated);
559 }
560 }
561
562 static void
563 iris_set_active_query_state(struct pipe_context *pipe, boolean enable)
564 {
565 /* Do nothing, intentionally - only u_blitter uses this. */
566 }
567
568 void
569 iris_init_query_functions(struct pipe_context *ctx)
570 {
571 ctx->create_query = iris_create_query;
572 ctx->destroy_query = iris_destroy_query;
573 ctx->begin_query = iris_begin_query;
574 ctx->end_query = iris_end_query;
575 ctx->get_query_result = iris_get_query_result;
576 ctx->get_query_result_resource = iris_get_query_result_resource;
577 ctx->set_active_query_state = iris_set_active_query_state;
578 }