radeonsi: remove Constant Engine support
[mesa.git] / src / gallium / drivers / radeon / r600_query.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 * Copyright 2014 Marek Olšák <marek.olsak@amd.com>
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 #include "r600_query.h"
26 #include "r600_cs.h"
27 #include "util/u_memory.h"
28 #include "util/u_upload_mgr.h"
29 #include "os/os_time.h"
30 #include "tgsi/tgsi_text.h"
31
32 #define R600_MAX_STREAMS 4
33
34 struct r600_hw_query_params {
35 unsigned start_offset;
36 unsigned end_offset;
37 unsigned fence_offset;
38 unsigned pair_stride;
39 unsigned pair_count;
40 };
41
42 /* Queries without buffer handling or suspend/resume. */
43 struct r600_query_sw {
44 struct r600_query b;
45
46 uint64_t begin_result;
47 uint64_t end_result;
48
49 uint64_t begin_time;
50 uint64_t end_time;
51
52 /* Fence for GPU_FINISHED. */
53 struct pipe_fence_handle *fence;
54 };
55
56 static void r600_query_sw_destroy(struct r600_common_screen *rscreen,
57 struct r600_query *rquery)
58 {
59 struct r600_query_sw *query = (struct r600_query_sw *)rquery;
60
61 rscreen->b.fence_reference(&rscreen->b, &query->fence, NULL);
62 FREE(query);
63 }
64
65 static enum radeon_value_id winsys_id_from_type(unsigned type)
66 {
67 switch (type) {
68 case R600_QUERY_REQUESTED_VRAM: return RADEON_REQUESTED_VRAM_MEMORY;
69 case R600_QUERY_REQUESTED_GTT: return RADEON_REQUESTED_GTT_MEMORY;
70 case R600_QUERY_MAPPED_VRAM: return RADEON_MAPPED_VRAM;
71 case R600_QUERY_MAPPED_GTT: return RADEON_MAPPED_GTT;
72 case R600_QUERY_BUFFER_WAIT_TIME: return RADEON_BUFFER_WAIT_TIME_NS;
73 case R600_QUERY_NUM_MAPPED_BUFFERS: return RADEON_NUM_MAPPED_BUFFERS;
74 case R600_QUERY_NUM_GFX_IBS: return RADEON_NUM_GFX_IBS;
75 case R600_QUERY_NUM_SDMA_IBS: return RADEON_NUM_SDMA_IBS;
76 case R600_QUERY_GFX_BO_LIST_SIZE: return RADEON_GFX_BO_LIST_COUNTER;
77 case R600_QUERY_NUM_BYTES_MOVED: return RADEON_NUM_BYTES_MOVED;
78 case R600_QUERY_NUM_EVICTIONS: return RADEON_NUM_EVICTIONS;
79 case R600_QUERY_NUM_VRAM_CPU_PAGE_FAULTS: return RADEON_NUM_VRAM_CPU_PAGE_FAULTS;
80 case R600_QUERY_VRAM_USAGE: return RADEON_VRAM_USAGE;
81 case R600_QUERY_VRAM_VIS_USAGE: return RADEON_VRAM_VIS_USAGE;
82 case R600_QUERY_GTT_USAGE: return RADEON_GTT_USAGE;
83 case R600_QUERY_GPU_TEMPERATURE: return RADEON_GPU_TEMPERATURE;
84 case R600_QUERY_CURRENT_GPU_SCLK: return RADEON_CURRENT_SCLK;
85 case R600_QUERY_CURRENT_GPU_MCLK: return RADEON_CURRENT_MCLK;
86 case R600_QUERY_CS_THREAD_BUSY: return RADEON_CS_THREAD_TIME;
87 default: unreachable("query type does not correspond to winsys id");
88 }
89 }
90
91 static bool r600_query_sw_begin(struct r600_common_context *rctx,
92 struct r600_query *rquery)
93 {
94 struct r600_query_sw *query = (struct r600_query_sw *)rquery;
95 enum radeon_value_id ws_id;
96
97 switch(query->b.type) {
98 case PIPE_QUERY_TIMESTAMP_DISJOINT:
99 case PIPE_QUERY_GPU_FINISHED:
100 break;
101 case R600_QUERY_DRAW_CALLS:
102 query->begin_result = rctx->num_draw_calls;
103 break;
104 case R600_QUERY_DECOMPRESS_CALLS:
105 query->begin_result = rctx->num_decompress_calls;
106 break;
107 case R600_QUERY_MRT_DRAW_CALLS:
108 query->begin_result = rctx->num_mrt_draw_calls;
109 break;
110 case R600_QUERY_PRIM_RESTART_CALLS:
111 query->begin_result = rctx->num_prim_restart_calls;
112 break;
113 case R600_QUERY_SPILL_DRAW_CALLS:
114 query->begin_result = rctx->num_spill_draw_calls;
115 break;
116 case R600_QUERY_COMPUTE_CALLS:
117 query->begin_result = rctx->num_compute_calls;
118 break;
119 case R600_QUERY_SPILL_COMPUTE_CALLS:
120 query->begin_result = rctx->num_spill_compute_calls;
121 break;
122 case R600_QUERY_DMA_CALLS:
123 query->begin_result = rctx->num_dma_calls;
124 break;
125 case R600_QUERY_CP_DMA_CALLS:
126 query->begin_result = rctx->num_cp_dma_calls;
127 break;
128 case R600_QUERY_NUM_VS_FLUSHES:
129 query->begin_result = rctx->num_vs_flushes;
130 break;
131 case R600_QUERY_NUM_PS_FLUSHES:
132 query->begin_result = rctx->num_ps_flushes;
133 break;
134 case R600_QUERY_NUM_CS_FLUSHES:
135 query->begin_result = rctx->num_cs_flushes;
136 break;
137 case R600_QUERY_NUM_CB_CACHE_FLUSHES:
138 query->begin_result = rctx->num_cb_cache_flushes;
139 break;
140 case R600_QUERY_NUM_DB_CACHE_FLUSHES:
141 query->begin_result = rctx->num_db_cache_flushes;
142 break;
143 case R600_QUERY_NUM_L2_INVALIDATES:
144 query->begin_result = rctx->num_L2_invalidates;
145 break;
146 case R600_QUERY_NUM_L2_WRITEBACKS:
147 query->begin_result = rctx->num_L2_writebacks;
148 break;
149 case R600_QUERY_NUM_RESIDENT_HANDLES:
150 query->begin_result = rctx->num_resident_handles;
151 break;
152 case R600_QUERY_TC_OFFLOADED_SLOTS:
153 query->begin_result = rctx->tc ? rctx->tc->num_offloaded_slots : 0;
154 break;
155 case R600_QUERY_TC_DIRECT_SLOTS:
156 query->begin_result = rctx->tc ? rctx->tc->num_direct_slots : 0;
157 break;
158 case R600_QUERY_TC_NUM_SYNCS:
159 query->begin_result = rctx->tc ? rctx->tc->num_syncs : 0;
160 break;
161 case R600_QUERY_REQUESTED_VRAM:
162 case R600_QUERY_REQUESTED_GTT:
163 case R600_QUERY_MAPPED_VRAM:
164 case R600_QUERY_MAPPED_GTT:
165 case R600_QUERY_VRAM_USAGE:
166 case R600_QUERY_VRAM_VIS_USAGE:
167 case R600_QUERY_GTT_USAGE:
168 case R600_QUERY_GPU_TEMPERATURE:
169 case R600_QUERY_CURRENT_GPU_SCLK:
170 case R600_QUERY_CURRENT_GPU_MCLK:
171 case R600_QUERY_BACK_BUFFER_PS_DRAW_RATIO:
172 case R600_QUERY_NUM_MAPPED_BUFFERS:
173 query->begin_result = 0;
174 break;
175 case R600_QUERY_BUFFER_WAIT_TIME:
176 case R600_QUERY_NUM_GFX_IBS:
177 case R600_QUERY_NUM_SDMA_IBS:
178 case R600_QUERY_NUM_BYTES_MOVED:
179 case R600_QUERY_NUM_EVICTIONS:
180 case R600_QUERY_NUM_VRAM_CPU_PAGE_FAULTS: {
181 enum radeon_value_id ws_id = winsys_id_from_type(query->b.type);
182 query->begin_result = rctx->ws->query_value(rctx->ws, ws_id);
183 break;
184 }
185 case R600_QUERY_GFX_BO_LIST_SIZE:
186 ws_id = winsys_id_from_type(query->b.type);
187 query->begin_result = rctx->ws->query_value(rctx->ws, ws_id);
188 query->begin_time = rctx->ws->query_value(rctx->ws,
189 RADEON_NUM_GFX_IBS);
190 break;
191 case R600_QUERY_CS_THREAD_BUSY:
192 ws_id = winsys_id_from_type(query->b.type);
193 query->begin_result = rctx->ws->query_value(rctx->ws, ws_id);
194 query->begin_time = os_time_get_nano();
195 break;
196 case R600_QUERY_GALLIUM_THREAD_BUSY:
197 query->begin_result =
198 rctx->tc ? util_queue_get_thread_time_nano(&rctx->tc->queue, 0) : 0;
199 query->begin_time = os_time_get_nano();
200 break;
201 case R600_QUERY_GPU_LOAD:
202 case R600_QUERY_GPU_SHADERS_BUSY:
203 case R600_QUERY_GPU_TA_BUSY:
204 case R600_QUERY_GPU_GDS_BUSY:
205 case R600_QUERY_GPU_VGT_BUSY:
206 case R600_QUERY_GPU_IA_BUSY:
207 case R600_QUERY_GPU_SX_BUSY:
208 case R600_QUERY_GPU_WD_BUSY:
209 case R600_QUERY_GPU_BCI_BUSY:
210 case R600_QUERY_GPU_SC_BUSY:
211 case R600_QUERY_GPU_PA_BUSY:
212 case R600_QUERY_GPU_DB_BUSY:
213 case R600_QUERY_GPU_CP_BUSY:
214 case R600_QUERY_GPU_CB_BUSY:
215 case R600_QUERY_GPU_SDMA_BUSY:
216 case R600_QUERY_GPU_PFP_BUSY:
217 case R600_QUERY_GPU_MEQ_BUSY:
218 case R600_QUERY_GPU_ME_BUSY:
219 case R600_QUERY_GPU_SURF_SYNC_BUSY:
220 case R600_QUERY_GPU_CP_DMA_BUSY:
221 case R600_QUERY_GPU_SCRATCH_RAM_BUSY:
222 query->begin_result = r600_begin_counter(rctx->screen,
223 query->b.type);
224 break;
225 case R600_QUERY_NUM_COMPILATIONS:
226 query->begin_result = p_atomic_read(&rctx->screen->num_compilations);
227 break;
228 case R600_QUERY_NUM_SHADERS_CREATED:
229 query->begin_result = p_atomic_read(&rctx->screen->num_shaders_created);
230 break;
231 case R600_QUERY_NUM_SHADER_CACHE_HITS:
232 query->begin_result =
233 p_atomic_read(&rctx->screen->num_shader_cache_hits);
234 break;
235 case R600_QUERY_GPIN_ASIC_ID:
236 case R600_QUERY_GPIN_NUM_SIMD:
237 case R600_QUERY_GPIN_NUM_RB:
238 case R600_QUERY_GPIN_NUM_SPI:
239 case R600_QUERY_GPIN_NUM_SE:
240 break;
241 default:
242 unreachable("r600_query_sw_begin: bad query type");
243 }
244
245 return true;
246 }
247
248 static bool r600_query_sw_end(struct r600_common_context *rctx,
249 struct r600_query *rquery)
250 {
251 struct r600_query_sw *query = (struct r600_query_sw *)rquery;
252 enum radeon_value_id ws_id;
253
254 switch(query->b.type) {
255 case PIPE_QUERY_TIMESTAMP_DISJOINT:
256 break;
257 case PIPE_QUERY_GPU_FINISHED:
258 rctx->b.flush(&rctx->b, &query->fence, PIPE_FLUSH_DEFERRED);
259 break;
260 case R600_QUERY_DRAW_CALLS:
261 query->end_result = rctx->num_draw_calls;
262 break;
263 case R600_QUERY_DECOMPRESS_CALLS:
264 query->end_result = rctx->num_decompress_calls;
265 break;
266 case R600_QUERY_MRT_DRAW_CALLS:
267 query->end_result = rctx->num_mrt_draw_calls;
268 break;
269 case R600_QUERY_PRIM_RESTART_CALLS:
270 query->end_result = rctx->num_prim_restart_calls;
271 break;
272 case R600_QUERY_SPILL_DRAW_CALLS:
273 query->end_result = rctx->num_spill_draw_calls;
274 break;
275 case R600_QUERY_COMPUTE_CALLS:
276 query->end_result = rctx->num_compute_calls;
277 break;
278 case R600_QUERY_SPILL_COMPUTE_CALLS:
279 query->end_result = rctx->num_spill_compute_calls;
280 break;
281 case R600_QUERY_DMA_CALLS:
282 query->end_result = rctx->num_dma_calls;
283 break;
284 case R600_QUERY_CP_DMA_CALLS:
285 query->end_result = rctx->num_cp_dma_calls;
286 break;
287 case R600_QUERY_NUM_VS_FLUSHES:
288 query->end_result = rctx->num_vs_flushes;
289 break;
290 case R600_QUERY_NUM_PS_FLUSHES:
291 query->end_result = rctx->num_ps_flushes;
292 break;
293 case R600_QUERY_NUM_CS_FLUSHES:
294 query->end_result = rctx->num_cs_flushes;
295 break;
296 case R600_QUERY_NUM_CB_CACHE_FLUSHES:
297 query->end_result = rctx->num_cb_cache_flushes;
298 break;
299 case R600_QUERY_NUM_DB_CACHE_FLUSHES:
300 query->end_result = rctx->num_db_cache_flushes;
301 break;
302 case R600_QUERY_NUM_L2_INVALIDATES:
303 query->end_result = rctx->num_L2_invalidates;
304 break;
305 case R600_QUERY_NUM_L2_WRITEBACKS:
306 query->end_result = rctx->num_L2_writebacks;
307 break;
308 case R600_QUERY_NUM_RESIDENT_HANDLES:
309 query->end_result = rctx->num_resident_handles;
310 break;
311 case R600_QUERY_TC_OFFLOADED_SLOTS:
312 query->end_result = rctx->tc ? rctx->tc->num_offloaded_slots : 0;
313 break;
314 case R600_QUERY_TC_DIRECT_SLOTS:
315 query->end_result = rctx->tc ? rctx->tc->num_direct_slots : 0;
316 break;
317 case R600_QUERY_TC_NUM_SYNCS:
318 query->end_result = rctx->tc ? rctx->tc->num_syncs : 0;
319 break;
320 case R600_QUERY_REQUESTED_VRAM:
321 case R600_QUERY_REQUESTED_GTT:
322 case R600_QUERY_MAPPED_VRAM:
323 case R600_QUERY_MAPPED_GTT:
324 case R600_QUERY_VRAM_USAGE:
325 case R600_QUERY_VRAM_VIS_USAGE:
326 case R600_QUERY_GTT_USAGE:
327 case R600_QUERY_GPU_TEMPERATURE:
328 case R600_QUERY_CURRENT_GPU_SCLK:
329 case R600_QUERY_CURRENT_GPU_MCLK:
330 case R600_QUERY_BUFFER_WAIT_TIME:
331 case R600_QUERY_NUM_MAPPED_BUFFERS:
332 case R600_QUERY_NUM_GFX_IBS:
333 case R600_QUERY_NUM_SDMA_IBS:
334 case R600_QUERY_NUM_BYTES_MOVED:
335 case R600_QUERY_NUM_EVICTIONS:
336 case R600_QUERY_NUM_VRAM_CPU_PAGE_FAULTS: {
337 enum radeon_value_id ws_id = winsys_id_from_type(query->b.type);
338 query->end_result = rctx->ws->query_value(rctx->ws, ws_id);
339 break;
340 }
341 case R600_QUERY_GFX_BO_LIST_SIZE:
342 ws_id = winsys_id_from_type(query->b.type);
343 query->end_result = rctx->ws->query_value(rctx->ws, ws_id);
344 query->end_time = rctx->ws->query_value(rctx->ws,
345 RADEON_NUM_GFX_IBS);
346 break;
347 case R600_QUERY_CS_THREAD_BUSY:
348 ws_id = winsys_id_from_type(query->b.type);
349 query->end_result = rctx->ws->query_value(rctx->ws, ws_id);
350 query->end_time = os_time_get_nano();
351 break;
352 case R600_QUERY_GALLIUM_THREAD_BUSY:
353 query->end_result =
354 rctx->tc ? util_queue_get_thread_time_nano(&rctx->tc->queue, 0) : 0;
355 query->end_time = os_time_get_nano();
356 break;
357 case R600_QUERY_GPU_LOAD:
358 case R600_QUERY_GPU_SHADERS_BUSY:
359 case R600_QUERY_GPU_TA_BUSY:
360 case R600_QUERY_GPU_GDS_BUSY:
361 case R600_QUERY_GPU_VGT_BUSY:
362 case R600_QUERY_GPU_IA_BUSY:
363 case R600_QUERY_GPU_SX_BUSY:
364 case R600_QUERY_GPU_WD_BUSY:
365 case R600_QUERY_GPU_BCI_BUSY:
366 case R600_QUERY_GPU_SC_BUSY:
367 case R600_QUERY_GPU_PA_BUSY:
368 case R600_QUERY_GPU_DB_BUSY:
369 case R600_QUERY_GPU_CP_BUSY:
370 case R600_QUERY_GPU_CB_BUSY:
371 case R600_QUERY_GPU_SDMA_BUSY:
372 case R600_QUERY_GPU_PFP_BUSY:
373 case R600_QUERY_GPU_MEQ_BUSY:
374 case R600_QUERY_GPU_ME_BUSY:
375 case R600_QUERY_GPU_SURF_SYNC_BUSY:
376 case R600_QUERY_GPU_CP_DMA_BUSY:
377 case R600_QUERY_GPU_SCRATCH_RAM_BUSY:
378 query->end_result = r600_end_counter(rctx->screen,
379 query->b.type,
380 query->begin_result);
381 query->begin_result = 0;
382 break;
383 case R600_QUERY_NUM_COMPILATIONS:
384 query->end_result = p_atomic_read(&rctx->screen->num_compilations);
385 break;
386 case R600_QUERY_NUM_SHADERS_CREATED:
387 query->end_result = p_atomic_read(&rctx->screen->num_shaders_created);
388 break;
389 case R600_QUERY_BACK_BUFFER_PS_DRAW_RATIO:
390 query->end_result = rctx->last_tex_ps_draw_ratio;
391 break;
392 case R600_QUERY_NUM_SHADER_CACHE_HITS:
393 query->end_result =
394 p_atomic_read(&rctx->screen->num_shader_cache_hits);
395 break;
396 case R600_QUERY_GPIN_ASIC_ID:
397 case R600_QUERY_GPIN_NUM_SIMD:
398 case R600_QUERY_GPIN_NUM_RB:
399 case R600_QUERY_GPIN_NUM_SPI:
400 case R600_QUERY_GPIN_NUM_SE:
401 break;
402 default:
403 unreachable("r600_query_sw_end: bad query type");
404 }
405
406 return true;
407 }
408
409 static bool r600_query_sw_get_result(struct r600_common_context *rctx,
410 struct r600_query *rquery,
411 bool wait,
412 union pipe_query_result *result)
413 {
414 struct r600_query_sw *query = (struct r600_query_sw *)rquery;
415
416 switch (query->b.type) {
417 case PIPE_QUERY_TIMESTAMP_DISJOINT:
418 /* Convert from cycles per millisecond to cycles per second (Hz). */
419 result->timestamp_disjoint.frequency =
420 (uint64_t)rctx->screen->info.clock_crystal_freq * 1000;
421 result->timestamp_disjoint.disjoint = false;
422 return true;
423 case PIPE_QUERY_GPU_FINISHED: {
424 struct pipe_screen *screen = rctx->b.screen;
425 struct pipe_context *ctx = rquery->b.flushed ? NULL : &rctx->b;
426
427 result->b = screen->fence_finish(screen, ctx, query->fence,
428 wait ? PIPE_TIMEOUT_INFINITE : 0);
429 return result->b;
430 }
431
432 case R600_QUERY_GFX_BO_LIST_SIZE:
433 result->u64 = (query->end_result - query->begin_result) /
434 (query->end_time - query->begin_time);
435 return true;
436 case R600_QUERY_CS_THREAD_BUSY:
437 case R600_QUERY_GALLIUM_THREAD_BUSY:
438 result->u64 = (query->end_result - query->begin_result) * 100 /
439 (query->end_time - query->begin_time);
440 return true;
441 case R600_QUERY_GPIN_ASIC_ID:
442 result->u32 = 0;
443 return true;
444 case R600_QUERY_GPIN_NUM_SIMD:
445 result->u32 = rctx->screen->info.num_good_compute_units;
446 return true;
447 case R600_QUERY_GPIN_NUM_RB:
448 result->u32 = rctx->screen->info.num_render_backends;
449 return true;
450 case R600_QUERY_GPIN_NUM_SPI:
451 result->u32 = 1; /* all supported chips have one SPI per SE */
452 return true;
453 case R600_QUERY_GPIN_NUM_SE:
454 result->u32 = rctx->screen->info.max_se;
455 return true;
456 }
457
458 result->u64 = query->end_result - query->begin_result;
459
460 switch (query->b.type) {
461 case R600_QUERY_BUFFER_WAIT_TIME:
462 case R600_QUERY_GPU_TEMPERATURE:
463 result->u64 /= 1000;
464 break;
465 case R600_QUERY_CURRENT_GPU_SCLK:
466 case R600_QUERY_CURRENT_GPU_MCLK:
467 result->u64 *= 1000000;
468 break;
469 }
470
471 return true;
472 }
473
474
475 static struct r600_query_ops sw_query_ops = {
476 .destroy = r600_query_sw_destroy,
477 .begin = r600_query_sw_begin,
478 .end = r600_query_sw_end,
479 .get_result = r600_query_sw_get_result,
480 .get_result_resource = NULL
481 };
482
483 static struct pipe_query *r600_query_sw_create(unsigned query_type)
484 {
485 struct r600_query_sw *query;
486
487 query = CALLOC_STRUCT(r600_query_sw);
488 if (!query)
489 return NULL;
490
491 query->b.type = query_type;
492 query->b.ops = &sw_query_ops;
493
494 return (struct pipe_query *)query;
495 }
496
497 void r600_query_hw_destroy(struct r600_common_screen *rscreen,
498 struct r600_query *rquery)
499 {
500 struct r600_query_hw *query = (struct r600_query_hw *)rquery;
501 struct r600_query_buffer *prev = query->buffer.previous;
502
503 /* Release all query buffers. */
504 while (prev) {
505 struct r600_query_buffer *qbuf = prev;
506 prev = prev->previous;
507 r600_resource_reference(&qbuf->buf, NULL);
508 FREE(qbuf);
509 }
510
511 r600_resource_reference(&query->buffer.buf, NULL);
512 r600_resource_reference(&query->workaround_buf, NULL);
513 FREE(rquery);
514 }
515
516 static struct r600_resource *r600_new_query_buffer(struct r600_common_screen *rscreen,
517 struct r600_query_hw *query)
518 {
519 unsigned buf_size = MAX2(query->result_size,
520 rscreen->info.min_alloc_size);
521
522 /* Queries are normally read by the CPU after
523 * being written by the gpu, hence staging is probably a good
524 * usage pattern.
525 */
526 struct r600_resource *buf = (struct r600_resource*)
527 pipe_buffer_create(&rscreen->b, 0,
528 PIPE_USAGE_STAGING, buf_size);
529 if (!buf)
530 return NULL;
531
532 if (!query->ops->prepare_buffer(rscreen, query, buf)) {
533 r600_resource_reference(&buf, NULL);
534 return NULL;
535 }
536
537 return buf;
538 }
539
540 static bool r600_query_hw_prepare_buffer(struct r600_common_screen *rscreen,
541 struct r600_query_hw *query,
542 struct r600_resource *buffer)
543 {
544 /* Callers ensure that the buffer is currently unused by the GPU. */
545 uint32_t *results = rscreen->ws->buffer_map(buffer->buf, NULL,
546 PIPE_TRANSFER_WRITE |
547 PIPE_TRANSFER_UNSYNCHRONIZED);
548 if (!results)
549 return false;
550
551 memset(results, 0, buffer->b.b.width0);
552
553 if (query->b.type == PIPE_QUERY_OCCLUSION_COUNTER ||
554 query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE) {
555 unsigned max_rbs = rscreen->info.num_render_backends;
556 unsigned enabled_rb_mask = rscreen->info.enabled_rb_mask;
557 unsigned num_results;
558 unsigned i, j;
559
560 /* Set top bits for unused backends. */
561 num_results = buffer->b.b.width0 / query->result_size;
562 for (j = 0; j < num_results; j++) {
563 for (i = 0; i < max_rbs; i++) {
564 if (!(enabled_rb_mask & (1<<i))) {
565 results[(i * 4)+1] = 0x80000000;
566 results[(i * 4)+3] = 0x80000000;
567 }
568 }
569 results += 4 * max_rbs;
570 }
571 }
572
573 return true;
574 }
575
576 static void r600_query_hw_get_result_resource(struct r600_common_context *rctx,
577 struct r600_query *rquery,
578 bool wait,
579 enum pipe_query_value_type result_type,
580 int index,
581 struct pipe_resource *resource,
582 unsigned offset);
583
584 static struct r600_query_ops query_hw_ops = {
585 .destroy = r600_query_hw_destroy,
586 .begin = r600_query_hw_begin,
587 .end = r600_query_hw_end,
588 .get_result = r600_query_hw_get_result,
589 .get_result_resource = r600_query_hw_get_result_resource,
590 };
591
592 static void r600_query_hw_do_emit_start(struct r600_common_context *ctx,
593 struct r600_query_hw *query,
594 struct r600_resource *buffer,
595 uint64_t va);
596 static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx,
597 struct r600_query_hw *query,
598 struct r600_resource *buffer,
599 uint64_t va);
600 static void r600_query_hw_add_result(struct r600_common_screen *rscreen,
601 struct r600_query_hw *, void *buffer,
602 union pipe_query_result *result);
603 static void r600_query_hw_clear_result(struct r600_query_hw *,
604 union pipe_query_result *);
605
606 static struct r600_query_hw_ops query_hw_default_hw_ops = {
607 .prepare_buffer = r600_query_hw_prepare_buffer,
608 .emit_start = r600_query_hw_do_emit_start,
609 .emit_stop = r600_query_hw_do_emit_stop,
610 .clear_result = r600_query_hw_clear_result,
611 .add_result = r600_query_hw_add_result,
612 };
613
614 bool r600_query_hw_init(struct r600_common_screen *rscreen,
615 struct r600_query_hw *query)
616 {
617 query->buffer.buf = r600_new_query_buffer(rscreen, query);
618 if (!query->buffer.buf)
619 return false;
620
621 return true;
622 }
623
624 static struct pipe_query *r600_query_hw_create(struct r600_common_screen *rscreen,
625 unsigned query_type,
626 unsigned index)
627 {
628 struct r600_query_hw *query = CALLOC_STRUCT(r600_query_hw);
629 if (!query)
630 return NULL;
631
632 query->b.type = query_type;
633 query->b.ops = &query_hw_ops;
634 query->ops = &query_hw_default_hw_ops;
635
636 switch (query_type) {
637 case PIPE_QUERY_OCCLUSION_COUNTER:
638 case PIPE_QUERY_OCCLUSION_PREDICATE:
639 query->result_size = 16 * rscreen->info.num_render_backends;
640 query->result_size += 16; /* for the fence + alignment */
641 query->num_cs_dw_begin = 6;
642 query->num_cs_dw_end = 6 + r600_gfx_write_fence_dwords(rscreen);
643 break;
644 case PIPE_QUERY_TIME_ELAPSED:
645 query->result_size = 24;
646 query->num_cs_dw_begin = 8;
647 query->num_cs_dw_end = 8 + r600_gfx_write_fence_dwords(rscreen);
648 break;
649 case PIPE_QUERY_TIMESTAMP:
650 query->result_size = 16;
651 query->num_cs_dw_end = 8 + r600_gfx_write_fence_dwords(rscreen);
652 query->flags = R600_QUERY_HW_FLAG_NO_START;
653 break;
654 case PIPE_QUERY_PRIMITIVES_EMITTED:
655 case PIPE_QUERY_PRIMITIVES_GENERATED:
656 case PIPE_QUERY_SO_STATISTICS:
657 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
658 /* NumPrimitivesWritten, PrimitiveStorageNeeded. */
659 query->result_size = 32;
660 query->num_cs_dw_begin = 6;
661 query->num_cs_dw_end = 6;
662 query->stream = index;
663 break;
664 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
665 /* NumPrimitivesWritten, PrimitiveStorageNeeded. */
666 query->result_size = 32 * R600_MAX_STREAMS;
667 query->num_cs_dw_begin = 6 * R600_MAX_STREAMS;
668 query->num_cs_dw_end = 6 * R600_MAX_STREAMS;
669 break;
670 case PIPE_QUERY_PIPELINE_STATISTICS:
671 /* 11 values on EG, 8 on R600. */
672 query->result_size = (rscreen->chip_class >= EVERGREEN ? 11 : 8) * 16;
673 query->result_size += 8; /* for the fence + alignment */
674 query->num_cs_dw_begin = 6;
675 query->num_cs_dw_end = 6 + r600_gfx_write_fence_dwords(rscreen);
676 break;
677 default:
678 assert(0);
679 FREE(query);
680 return NULL;
681 }
682
683 if (!r600_query_hw_init(rscreen, query)) {
684 FREE(query);
685 return NULL;
686 }
687
688 return (struct pipe_query *)query;
689 }
690
691 static void r600_update_occlusion_query_state(struct r600_common_context *rctx,
692 unsigned type, int diff)
693 {
694 if (type == PIPE_QUERY_OCCLUSION_COUNTER ||
695 type == PIPE_QUERY_OCCLUSION_PREDICATE) {
696 bool old_enable = rctx->num_occlusion_queries != 0;
697 bool old_perfect_enable =
698 rctx->num_perfect_occlusion_queries != 0;
699 bool enable, perfect_enable;
700
701 rctx->num_occlusion_queries += diff;
702 assert(rctx->num_occlusion_queries >= 0);
703
704 if (type == PIPE_QUERY_OCCLUSION_COUNTER) {
705 rctx->num_perfect_occlusion_queries += diff;
706 assert(rctx->num_perfect_occlusion_queries >= 0);
707 }
708
709 enable = rctx->num_occlusion_queries != 0;
710 perfect_enable = rctx->num_perfect_occlusion_queries != 0;
711
712 if (enable != old_enable || perfect_enable != old_perfect_enable) {
713 rctx->set_occlusion_query_state(&rctx->b, enable);
714 }
715 }
716 }
717
718 static unsigned event_type_for_stream(unsigned stream)
719 {
720 switch (stream) {
721 default:
722 case 0: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS;
723 case 1: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS1;
724 case 2: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS2;
725 case 3: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS3;
726 }
727 }
728
729 static void emit_sample_streamout(struct radeon_winsys_cs *cs, uint64_t va,
730 unsigned stream)
731 {
732 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
733 radeon_emit(cs, EVENT_TYPE(event_type_for_stream(stream)) | EVENT_INDEX(3));
734 radeon_emit(cs, va);
735 radeon_emit(cs, va >> 32);
736 }
737
738 static void r600_query_hw_do_emit_start(struct r600_common_context *ctx,
739 struct r600_query_hw *query,
740 struct r600_resource *buffer,
741 uint64_t va)
742 {
743 struct radeon_winsys_cs *cs = ctx->gfx.cs;
744
745 switch (query->b.type) {
746 case PIPE_QUERY_OCCLUSION_COUNTER:
747 case PIPE_QUERY_OCCLUSION_PREDICATE:
748 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
749 radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
750 radeon_emit(cs, va);
751 radeon_emit(cs, va >> 32);
752 break;
753 case PIPE_QUERY_PRIMITIVES_EMITTED:
754 case PIPE_QUERY_PRIMITIVES_GENERATED:
755 case PIPE_QUERY_SO_STATISTICS:
756 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
757 emit_sample_streamout(cs, va, query->stream);
758 break;
759 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
760 for (unsigned stream = 0; stream < R600_MAX_STREAMS; ++stream)
761 emit_sample_streamout(cs, va + 32 * stream, stream);
762 break;
763 case PIPE_QUERY_TIME_ELAPSED:
764 if (ctx->chip_class >= SI) {
765 /* Write the timestamp from the CP not waiting for
766 * outstanding draws (top-of-pipe).
767 */
768 radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
769 radeon_emit(cs, COPY_DATA_COUNT_SEL |
770 COPY_DATA_SRC_SEL(COPY_DATA_TIMESTAMP) |
771 COPY_DATA_DST_SEL(COPY_DATA_MEM_ASYNC));
772 radeon_emit(cs, 0);
773 radeon_emit(cs, 0);
774 radeon_emit(cs, va);
775 radeon_emit(cs, va >> 32);
776 } else {
777 /* Write the timestamp after the last draw is done.
778 * (bottom-of-pipe)
779 */
780 r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS,
781 0, 3, NULL, va, 0, query->b.type);
782 }
783 break;
784 case PIPE_QUERY_PIPELINE_STATISTICS:
785 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
786 radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
787 radeon_emit(cs, va);
788 radeon_emit(cs, va >> 32);
789 break;
790 default:
791 assert(0);
792 }
793 r600_emit_reloc(ctx, &ctx->gfx, query->buffer.buf, RADEON_USAGE_WRITE,
794 RADEON_PRIO_QUERY);
795 }
796
797 static void r600_query_hw_emit_start(struct r600_common_context *ctx,
798 struct r600_query_hw *query)
799 {
800 uint64_t va;
801
802 if (!query->buffer.buf)
803 return; // previous buffer allocation failure
804
805 r600_update_occlusion_query_state(ctx, query->b.type, 1);
806 r600_update_prims_generated_query_state(ctx, query->b.type, 1);
807
808 ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw_begin + query->num_cs_dw_end,
809 true);
810
811 /* Get a new query buffer if needed. */
812 if (query->buffer.results_end + query->result_size > query->buffer.buf->b.b.width0) {
813 struct r600_query_buffer *qbuf = MALLOC_STRUCT(r600_query_buffer);
814 *qbuf = query->buffer;
815 query->buffer.results_end = 0;
816 query->buffer.previous = qbuf;
817 query->buffer.buf = r600_new_query_buffer(ctx->screen, query);
818 if (!query->buffer.buf)
819 return;
820 }
821
822 /* emit begin query */
823 va = query->buffer.buf->gpu_address + query->buffer.results_end;
824
825 query->ops->emit_start(ctx, query, query->buffer.buf, va);
826
827 ctx->num_cs_dw_queries_suspend += query->num_cs_dw_end;
828 }
829
830 static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx,
831 struct r600_query_hw *query,
832 struct r600_resource *buffer,
833 uint64_t va)
834 {
835 struct radeon_winsys_cs *cs = ctx->gfx.cs;
836 uint64_t fence_va = 0;
837
838 switch (query->b.type) {
839 case PIPE_QUERY_OCCLUSION_COUNTER:
840 case PIPE_QUERY_OCCLUSION_PREDICATE:
841 va += 8;
842 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
843 radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
844 radeon_emit(cs, va);
845 radeon_emit(cs, va >> 32);
846
847 fence_va = va + ctx->screen->info.num_render_backends * 16 - 8;
848 break;
849 case PIPE_QUERY_PRIMITIVES_EMITTED:
850 case PIPE_QUERY_PRIMITIVES_GENERATED:
851 case PIPE_QUERY_SO_STATISTICS:
852 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
853 va += 16;
854 emit_sample_streamout(cs, va, query->stream);
855 break;
856 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
857 va += 16;
858 for (unsigned stream = 0; stream < R600_MAX_STREAMS; ++stream)
859 emit_sample_streamout(cs, va + 32 * stream, stream);
860 break;
861 case PIPE_QUERY_TIME_ELAPSED:
862 va += 8;
863 /* fall through */
864 case PIPE_QUERY_TIMESTAMP:
865 r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS,
866 0, 3, NULL, va, 0, query->b.type);
867 fence_va = va + 8;
868 break;
869 case PIPE_QUERY_PIPELINE_STATISTICS: {
870 unsigned sample_size = (query->result_size - 8) / 2;
871
872 va += sample_size;
873 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
874 radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
875 radeon_emit(cs, va);
876 radeon_emit(cs, va >> 32);
877
878 fence_va = va + sample_size;
879 break;
880 }
881 default:
882 assert(0);
883 }
884 r600_emit_reloc(ctx, &ctx->gfx, query->buffer.buf, RADEON_USAGE_WRITE,
885 RADEON_PRIO_QUERY);
886
887 if (fence_va)
888 r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS, 0, 1,
889 query->buffer.buf, fence_va, 0x80000000,
890 query->b.type);
891 }
892
893 static void r600_query_hw_emit_stop(struct r600_common_context *ctx,
894 struct r600_query_hw *query)
895 {
896 uint64_t va;
897
898 if (!query->buffer.buf)
899 return; // previous buffer allocation failure
900
901 /* The queries which need begin already called this in begin_query. */
902 if (query->flags & R600_QUERY_HW_FLAG_NO_START) {
903 ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw_end, false);
904 }
905
906 /* emit end query */
907 va = query->buffer.buf->gpu_address + query->buffer.results_end;
908
909 query->ops->emit_stop(ctx, query, query->buffer.buf, va);
910
911 query->buffer.results_end += query->result_size;
912
913 if (!(query->flags & R600_QUERY_HW_FLAG_NO_START))
914 ctx->num_cs_dw_queries_suspend -= query->num_cs_dw_end;
915
916 r600_update_occlusion_query_state(ctx, query->b.type, -1);
917 r600_update_prims_generated_query_state(ctx, query->b.type, -1);
918 }
919
920 static void emit_set_predicate(struct r600_common_context *ctx,
921 struct r600_resource *buf, uint64_t va,
922 uint32_t op)
923 {
924 struct radeon_winsys_cs *cs = ctx->gfx.cs;
925
926 if (ctx->chip_class >= GFX9) {
927 radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 2, 0));
928 radeon_emit(cs, op);
929 radeon_emit(cs, va);
930 radeon_emit(cs, va >> 32);
931 } else {
932 radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 1, 0));
933 radeon_emit(cs, va);
934 radeon_emit(cs, op | ((va >> 32) & 0xFF));
935 }
936 r600_emit_reloc(ctx, &ctx->gfx, buf, RADEON_USAGE_READ,
937 RADEON_PRIO_QUERY);
938 }
939
940 static void r600_emit_query_predication(struct r600_common_context *ctx,
941 struct r600_atom *atom)
942 {
943 struct r600_query_hw *query = (struct r600_query_hw *)ctx->render_cond;
944 struct r600_query_buffer *qbuf;
945 uint32_t op;
946 bool flag_wait, invert;
947
948 if (!query)
949 return;
950
951 invert = ctx->render_cond_invert;
952 flag_wait = ctx->render_cond_mode == PIPE_RENDER_COND_WAIT ||
953 ctx->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT;
954
955 if (query->workaround_buf) {
956 op = PRED_OP(PREDICATION_OP_BOOL64);
957 } else {
958 switch (query->b.type) {
959 case PIPE_QUERY_OCCLUSION_COUNTER:
960 case PIPE_QUERY_OCCLUSION_PREDICATE:
961 op = PRED_OP(PREDICATION_OP_ZPASS);
962 break;
963 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
964 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
965 op = PRED_OP(PREDICATION_OP_PRIMCOUNT);
966 invert = !invert;
967 break;
968 default:
969 assert(0);
970 return;
971 }
972 }
973
974 /* if true then invert, see GL_ARB_conditional_render_inverted */
975 if (invert)
976 op |= PREDICATION_DRAW_NOT_VISIBLE; /* Draw if not visible or overflow */
977 else
978 op |= PREDICATION_DRAW_VISIBLE; /* Draw if visible or no overflow */
979
980 /* Use the value written by compute shader as a workaround. Note that
981 * the wait flag does not apply in this predication mode.
982 *
983 * The shader outputs the result value to L2. Workarounds only affect VI
984 * and later, where the CP reads data from L2, so we don't need an
985 * additional flush.
986 */
987 if (query->workaround_buf) {
988 uint64_t va = query->workaround_buf->gpu_address + query->workaround_offset;
989 emit_set_predicate(ctx, query->workaround_buf, va, op);
990 return;
991 }
992
993 op |= flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW;
994
995 /* emit predicate packets for all data blocks */
996 for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
997 unsigned results_base = 0;
998 uint64_t va_base = qbuf->buf->gpu_address;
999
1000 while (results_base < qbuf->results_end) {
1001 uint64_t va = va_base + results_base;
1002
1003 if (query->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
1004 for (unsigned stream = 0; stream < R600_MAX_STREAMS; ++stream) {
1005 emit_set_predicate(ctx, qbuf->buf, va + 32 * stream, op);
1006
1007 /* set CONTINUE bit for all packets except the first */
1008 op |= PREDICATION_CONTINUE;
1009 }
1010 } else {
1011 emit_set_predicate(ctx, qbuf->buf, va, op);
1012 op |= PREDICATION_CONTINUE;
1013 }
1014
1015 results_base += query->result_size;
1016 }
1017 }
1018 }
1019
1020 static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index)
1021 {
1022 struct r600_common_screen *rscreen =
1023 (struct r600_common_screen *)ctx->screen;
1024
1025 if (query_type == PIPE_QUERY_TIMESTAMP_DISJOINT ||
1026 query_type == PIPE_QUERY_GPU_FINISHED ||
1027 query_type >= PIPE_QUERY_DRIVER_SPECIFIC)
1028 return r600_query_sw_create(query_type);
1029
1030 return r600_query_hw_create(rscreen, query_type, index);
1031 }
1032
1033 static void r600_destroy_query(struct pipe_context *ctx, struct pipe_query *query)
1034 {
1035 struct r600_common_context *rctx = (struct r600_common_context *)ctx;
1036 struct r600_query *rquery = (struct r600_query *)query;
1037
1038 rquery->ops->destroy(rctx->screen, rquery);
1039 }
1040
1041 static boolean r600_begin_query(struct pipe_context *ctx,
1042 struct pipe_query *query)
1043 {
1044 struct r600_common_context *rctx = (struct r600_common_context *)ctx;
1045 struct r600_query *rquery = (struct r600_query *)query;
1046
1047 return rquery->ops->begin(rctx, rquery);
1048 }
1049
1050 void r600_query_hw_reset_buffers(struct r600_common_context *rctx,
1051 struct r600_query_hw *query)
1052 {
1053 struct r600_query_buffer *prev = query->buffer.previous;
1054
1055 /* Discard the old query buffers. */
1056 while (prev) {
1057 struct r600_query_buffer *qbuf = prev;
1058 prev = prev->previous;
1059 r600_resource_reference(&qbuf->buf, NULL);
1060 FREE(qbuf);
1061 }
1062
1063 query->buffer.results_end = 0;
1064 query->buffer.previous = NULL;
1065
1066 /* Obtain a new buffer if the current one can't be mapped without a stall. */
1067 if (r600_rings_is_buffer_referenced(rctx, query->buffer.buf->buf, RADEON_USAGE_READWRITE) ||
1068 !rctx->ws->buffer_wait(query->buffer.buf->buf, 0, RADEON_USAGE_READWRITE)) {
1069 r600_resource_reference(&query->buffer.buf, NULL);
1070 query->buffer.buf = r600_new_query_buffer(rctx->screen, query);
1071 } else {
1072 if (!query->ops->prepare_buffer(rctx->screen, query, query->buffer.buf))
1073 r600_resource_reference(&query->buffer.buf, NULL);
1074 }
1075 }
1076
1077 bool r600_query_hw_begin(struct r600_common_context *rctx,
1078 struct r600_query *rquery)
1079 {
1080 struct r600_query_hw *query = (struct r600_query_hw *)rquery;
1081
1082 if (query->flags & R600_QUERY_HW_FLAG_NO_START) {
1083 assert(0);
1084 return false;
1085 }
1086
1087 if (!(query->flags & R600_QUERY_HW_FLAG_BEGIN_RESUMES))
1088 r600_query_hw_reset_buffers(rctx, query);
1089
1090 r600_resource_reference(&query->workaround_buf, NULL);
1091
1092 r600_query_hw_emit_start(rctx, query);
1093 if (!query->buffer.buf)
1094 return false;
1095
1096 LIST_ADDTAIL(&query->list, &rctx->active_queries);
1097 return true;
1098 }
1099
1100 static bool r600_end_query(struct pipe_context *ctx, struct pipe_query *query)
1101 {
1102 struct r600_common_context *rctx = (struct r600_common_context *)ctx;
1103 struct r600_query *rquery = (struct r600_query *)query;
1104
1105 return rquery->ops->end(rctx, rquery);
1106 }
1107
1108 bool r600_query_hw_end(struct r600_common_context *rctx,
1109 struct r600_query *rquery)
1110 {
1111 struct r600_query_hw *query = (struct r600_query_hw *)rquery;
1112
1113 if (query->flags & R600_QUERY_HW_FLAG_NO_START)
1114 r600_query_hw_reset_buffers(rctx, query);
1115
1116 r600_query_hw_emit_stop(rctx, query);
1117
1118 if (!(query->flags & R600_QUERY_HW_FLAG_NO_START))
1119 LIST_DELINIT(&query->list);
1120
1121 if (!query->buffer.buf)
1122 return false;
1123
1124 return true;
1125 }
1126
1127 static void r600_get_hw_query_params(struct r600_common_context *rctx,
1128 struct r600_query_hw *rquery, int index,
1129 struct r600_hw_query_params *params)
1130 {
1131 unsigned max_rbs = rctx->screen->info.num_render_backends;
1132
1133 params->pair_stride = 0;
1134 params->pair_count = 1;
1135
1136 switch (rquery->b.type) {
1137 case PIPE_QUERY_OCCLUSION_COUNTER:
1138 case PIPE_QUERY_OCCLUSION_PREDICATE:
1139 params->start_offset = 0;
1140 params->end_offset = 8;
1141 params->fence_offset = max_rbs * 16;
1142 params->pair_stride = 16;
1143 params->pair_count = max_rbs;
1144 break;
1145 case PIPE_QUERY_TIME_ELAPSED:
1146 params->start_offset = 0;
1147 params->end_offset = 8;
1148 params->fence_offset = 16;
1149 break;
1150 case PIPE_QUERY_TIMESTAMP:
1151 params->start_offset = 0;
1152 params->end_offset = 0;
1153 params->fence_offset = 8;
1154 break;
1155 case PIPE_QUERY_PRIMITIVES_EMITTED:
1156 params->start_offset = 8;
1157 params->end_offset = 24;
1158 params->fence_offset = params->end_offset + 4;
1159 break;
1160 case PIPE_QUERY_PRIMITIVES_GENERATED:
1161 params->start_offset = 0;
1162 params->end_offset = 16;
1163 params->fence_offset = params->end_offset + 4;
1164 break;
1165 case PIPE_QUERY_SO_STATISTICS:
1166 params->start_offset = 8 - index * 8;
1167 params->end_offset = 24 - index * 8;
1168 params->fence_offset = params->end_offset + 4;
1169 break;
1170 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
1171 params->pair_count = R600_MAX_STREAMS;
1172 params->pair_stride = 32;
1173 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
1174 params->start_offset = 0;
1175 params->end_offset = 16;
1176
1177 /* We can re-use the high dword of the last 64-bit value as a
1178 * fence: it is initialized as 0, and the high bit is set by
1179 * the write of the streamout stats event.
1180 */
1181 params->fence_offset = rquery->result_size - 4;
1182 break;
1183 case PIPE_QUERY_PIPELINE_STATISTICS:
1184 {
1185 /* Offsets apply to EG+ */
1186 static const unsigned offsets[] = {56, 48, 24, 32, 40, 16, 8, 0, 64, 72, 80};
1187 params->start_offset = offsets[index];
1188 params->end_offset = 88 + offsets[index];
1189 params->fence_offset = 2 * 88;
1190 break;
1191 }
1192 default:
1193 unreachable("r600_get_hw_query_params unsupported");
1194 }
1195 }
1196
1197 static unsigned r600_query_read_result(void *map, unsigned start_index, unsigned end_index,
1198 bool test_status_bit)
1199 {
1200 uint32_t *current_result = (uint32_t*)map;
1201 uint64_t start, end;
1202
1203 start = (uint64_t)current_result[start_index] |
1204 (uint64_t)current_result[start_index+1] << 32;
1205 end = (uint64_t)current_result[end_index] |
1206 (uint64_t)current_result[end_index+1] << 32;
1207
1208 if (!test_status_bit ||
1209 ((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL))) {
1210 return end - start;
1211 }
1212 return 0;
1213 }
1214
1215 static void r600_query_hw_add_result(struct r600_common_screen *rscreen,
1216 struct r600_query_hw *query,
1217 void *buffer,
1218 union pipe_query_result *result)
1219 {
1220 unsigned max_rbs = rscreen->info.num_render_backends;
1221
1222 switch (query->b.type) {
1223 case PIPE_QUERY_OCCLUSION_COUNTER: {
1224 for (unsigned i = 0; i < max_rbs; ++i) {
1225 unsigned results_base = i * 16;
1226 result->u64 +=
1227 r600_query_read_result(buffer + results_base, 0, 2, true);
1228 }
1229 break;
1230 }
1231 case PIPE_QUERY_OCCLUSION_PREDICATE: {
1232 for (unsigned i = 0; i < max_rbs; ++i) {
1233 unsigned results_base = i * 16;
1234 result->b = result->b ||
1235 r600_query_read_result(buffer + results_base, 0, 2, true) != 0;
1236 }
1237 break;
1238 }
1239 case PIPE_QUERY_TIME_ELAPSED:
1240 result->u64 += r600_query_read_result(buffer, 0, 2, false);
1241 break;
1242 case PIPE_QUERY_TIMESTAMP:
1243 result->u64 = *(uint64_t*)buffer;
1244 break;
1245 case PIPE_QUERY_PRIMITIVES_EMITTED:
1246 /* SAMPLE_STREAMOUTSTATS stores this structure:
1247 * {
1248 * u64 NumPrimitivesWritten;
1249 * u64 PrimitiveStorageNeeded;
1250 * }
1251 * We only need NumPrimitivesWritten here. */
1252 result->u64 += r600_query_read_result(buffer, 2, 6, true);
1253 break;
1254 case PIPE_QUERY_PRIMITIVES_GENERATED:
1255 /* Here we read PrimitiveStorageNeeded. */
1256 result->u64 += r600_query_read_result(buffer, 0, 4, true);
1257 break;
1258 case PIPE_QUERY_SO_STATISTICS:
1259 result->so_statistics.num_primitives_written +=
1260 r600_query_read_result(buffer, 2, 6, true);
1261 result->so_statistics.primitives_storage_needed +=
1262 r600_query_read_result(buffer, 0, 4, true);
1263 break;
1264 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
1265 result->b = result->b ||
1266 r600_query_read_result(buffer, 2, 6, true) !=
1267 r600_query_read_result(buffer, 0, 4, true);
1268 break;
1269 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
1270 for (unsigned stream = 0; stream < R600_MAX_STREAMS; ++stream) {
1271 result->b = result->b ||
1272 r600_query_read_result(buffer, 2, 6, true) !=
1273 r600_query_read_result(buffer, 0, 4, true);
1274 buffer = (char *)buffer + 32;
1275 }
1276 break;
1277 case PIPE_QUERY_PIPELINE_STATISTICS:
1278 if (rscreen->chip_class >= EVERGREEN) {
1279 result->pipeline_statistics.ps_invocations +=
1280 r600_query_read_result(buffer, 0, 22, false);
1281 result->pipeline_statistics.c_primitives +=
1282 r600_query_read_result(buffer, 2, 24, false);
1283 result->pipeline_statistics.c_invocations +=
1284 r600_query_read_result(buffer, 4, 26, false);
1285 result->pipeline_statistics.vs_invocations +=
1286 r600_query_read_result(buffer, 6, 28, false);
1287 result->pipeline_statistics.gs_invocations +=
1288 r600_query_read_result(buffer, 8, 30, false);
1289 result->pipeline_statistics.gs_primitives +=
1290 r600_query_read_result(buffer, 10, 32, false);
1291 result->pipeline_statistics.ia_primitives +=
1292 r600_query_read_result(buffer, 12, 34, false);
1293 result->pipeline_statistics.ia_vertices +=
1294 r600_query_read_result(buffer, 14, 36, false);
1295 result->pipeline_statistics.hs_invocations +=
1296 r600_query_read_result(buffer, 16, 38, false);
1297 result->pipeline_statistics.ds_invocations +=
1298 r600_query_read_result(buffer, 18, 40, false);
1299 result->pipeline_statistics.cs_invocations +=
1300 r600_query_read_result(buffer, 20, 42, false);
1301 } else {
1302 result->pipeline_statistics.ps_invocations +=
1303 r600_query_read_result(buffer, 0, 16, false);
1304 result->pipeline_statistics.c_primitives +=
1305 r600_query_read_result(buffer, 2, 18, false);
1306 result->pipeline_statistics.c_invocations +=
1307 r600_query_read_result(buffer, 4, 20, false);
1308 result->pipeline_statistics.vs_invocations +=
1309 r600_query_read_result(buffer, 6, 22, false);
1310 result->pipeline_statistics.gs_invocations +=
1311 r600_query_read_result(buffer, 8, 24, false);
1312 result->pipeline_statistics.gs_primitives +=
1313 r600_query_read_result(buffer, 10, 26, false);
1314 result->pipeline_statistics.ia_primitives +=
1315 r600_query_read_result(buffer, 12, 28, false);
1316 result->pipeline_statistics.ia_vertices +=
1317 r600_query_read_result(buffer, 14, 30, false);
1318 }
1319 #if 0 /* for testing */
1320 printf("Pipeline stats: IA verts=%llu, IA prims=%llu, VS=%llu, HS=%llu, "
1321 "DS=%llu, GS=%llu, GS prims=%llu, Clipper=%llu, "
1322 "Clipper prims=%llu, PS=%llu, CS=%llu\n",
1323 result->pipeline_statistics.ia_vertices,
1324 result->pipeline_statistics.ia_primitives,
1325 result->pipeline_statistics.vs_invocations,
1326 result->pipeline_statistics.hs_invocations,
1327 result->pipeline_statistics.ds_invocations,
1328 result->pipeline_statistics.gs_invocations,
1329 result->pipeline_statistics.gs_primitives,
1330 result->pipeline_statistics.c_invocations,
1331 result->pipeline_statistics.c_primitives,
1332 result->pipeline_statistics.ps_invocations,
1333 result->pipeline_statistics.cs_invocations);
1334 #endif
1335 break;
1336 default:
1337 assert(0);
1338 }
1339 }
1340
1341 static boolean r600_get_query_result(struct pipe_context *ctx,
1342 struct pipe_query *query, boolean wait,
1343 union pipe_query_result *result)
1344 {
1345 struct r600_common_context *rctx = (struct r600_common_context *)ctx;
1346 struct r600_query *rquery = (struct r600_query *)query;
1347
1348 return rquery->ops->get_result(rctx, rquery, wait, result);
1349 }
1350
1351 static void r600_get_query_result_resource(struct pipe_context *ctx,
1352 struct pipe_query *query,
1353 boolean wait,
1354 enum pipe_query_value_type result_type,
1355 int index,
1356 struct pipe_resource *resource,
1357 unsigned offset)
1358 {
1359 struct r600_common_context *rctx = (struct r600_common_context *)ctx;
1360 struct r600_query *rquery = (struct r600_query *)query;
1361
1362 rquery->ops->get_result_resource(rctx, rquery, wait, result_type, index,
1363 resource, offset);
1364 }
1365
1366 static void r600_query_hw_clear_result(struct r600_query_hw *query,
1367 union pipe_query_result *result)
1368 {
1369 util_query_clear_result(result, query->b.type);
1370 }
1371
1372 bool r600_query_hw_get_result(struct r600_common_context *rctx,
1373 struct r600_query *rquery,
1374 bool wait, union pipe_query_result *result)
1375 {
1376 struct r600_common_screen *rscreen = rctx->screen;
1377 struct r600_query_hw *query = (struct r600_query_hw *)rquery;
1378 struct r600_query_buffer *qbuf;
1379
1380 query->ops->clear_result(query, result);
1381
1382 for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
1383 unsigned usage = PIPE_TRANSFER_READ |
1384 (wait ? 0 : PIPE_TRANSFER_DONTBLOCK);
1385 unsigned results_base = 0;
1386 void *map;
1387
1388 if (rquery->b.flushed)
1389 map = rctx->ws->buffer_map(qbuf->buf->buf, NULL, usage);
1390 else
1391 map = r600_buffer_map_sync_with_rings(rctx, qbuf->buf, usage);
1392
1393 if (!map)
1394 return false;
1395
1396 while (results_base != qbuf->results_end) {
1397 query->ops->add_result(rscreen, query, map + results_base,
1398 result);
1399 results_base += query->result_size;
1400 }
1401 }
1402
1403 /* Convert the time to expected units. */
1404 if (rquery->type == PIPE_QUERY_TIME_ELAPSED ||
1405 rquery->type == PIPE_QUERY_TIMESTAMP) {
1406 result->u64 = (1000000 * result->u64) / rscreen->info.clock_crystal_freq;
1407 }
1408 return true;
1409 }
1410
1411 /* Create the compute shader that is used to collect the results.
1412 *
1413 * One compute grid with a single thread is launched for every query result
1414 * buffer. The thread (optionally) reads a previous summary buffer, then
1415 * accumulates data from the query result buffer, and writes the result either
1416 * to a summary buffer to be consumed by the next grid invocation or to the
1417 * user-supplied buffer.
1418 *
1419 * Data layout:
1420 *
1421 * CONST
1422 * 0.x = end_offset
1423 * 0.y = result_stride
1424 * 0.z = result_count
1425 * 0.w = bit field:
1426 * 1: read previously accumulated values
1427 * 2: write accumulated values for chaining
1428 * 4: write result available
1429 * 8: convert result to boolean (0/1)
1430 * 16: only read one dword and use that as result
1431 * 32: apply timestamp conversion
1432 * 64: store full 64 bits result
1433 * 128: store signed 32 bits result
1434 * 256: SO_OVERFLOW mode: take the difference of two successive half-pairs
1435 * 1.x = fence_offset
1436 * 1.y = pair_stride
1437 * 1.z = pair_count
1438 *
1439 * BUFFER[0] = query result buffer
1440 * BUFFER[1] = previous summary buffer
1441 * BUFFER[2] = next summary buffer or user-supplied buffer
1442 */
1443 static void r600_create_query_result_shader(struct r600_common_context *rctx)
1444 {
1445 /* TEMP[0].xy = accumulated result so far
1446 * TEMP[0].z = result not available
1447 *
1448 * TEMP[1].x = current result index
1449 * TEMP[1].y = current pair index
1450 */
1451 static const char text_tmpl[] =
1452 "COMP\n"
1453 "PROPERTY CS_FIXED_BLOCK_WIDTH 1\n"
1454 "PROPERTY CS_FIXED_BLOCK_HEIGHT 1\n"
1455 "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
1456 "DCL BUFFER[0]\n"
1457 "DCL BUFFER[1]\n"
1458 "DCL BUFFER[2]\n"
1459 "DCL CONST[0..1]\n"
1460 "DCL TEMP[0..5]\n"
1461 "IMM[0] UINT32 {0, 31, 2147483647, 4294967295}\n"
1462 "IMM[1] UINT32 {1, 2, 4, 8}\n"
1463 "IMM[2] UINT32 {16, 32, 64, 128}\n"
1464 "IMM[3] UINT32 {1000000, 0, %u, 0}\n" /* for timestamp conversion */
1465 "IMM[4] UINT32 {256, 0, 0, 0}\n"
1466
1467 "AND TEMP[5], CONST[0].wwww, IMM[2].xxxx\n"
1468 "UIF TEMP[5]\n"
1469 /* Check result availability. */
1470 "LOAD TEMP[1].x, BUFFER[0], CONST[1].xxxx\n"
1471 "ISHR TEMP[0].z, TEMP[1].xxxx, IMM[0].yyyy\n"
1472 "MOV TEMP[1], TEMP[0].zzzz\n"
1473 "NOT TEMP[0].z, TEMP[0].zzzz\n"
1474
1475 /* Load result if available. */
1476 "UIF TEMP[1]\n"
1477 "LOAD TEMP[0].xy, BUFFER[0], IMM[0].xxxx\n"
1478 "ENDIF\n"
1479 "ELSE\n"
1480 /* Load previously accumulated result if requested. */
1481 "MOV TEMP[0], IMM[0].xxxx\n"
1482 "AND TEMP[4], CONST[0].wwww, IMM[1].xxxx\n"
1483 "UIF TEMP[4]\n"
1484 "LOAD TEMP[0].xyz, BUFFER[1], IMM[0].xxxx\n"
1485 "ENDIF\n"
1486
1487 "MOV TEMP[1].x, IMM[0].xxxx\n"
1488 "BGNLOOP\n"
1489 /* Break if accumulated result so far is not available. */
1490 "UIF TEMP[0].zzzz\n"
1491 "BRK\n"
1492 "ENDIF\n"
1493
1494 /* Break if result_index >= result_count. */
1495 "USGE TEMP[5], TEMP[1].xxxx, CONST[0].zzzz\n"
1496 "UIF TEMP[5]\n"
1497 "BRK\n"
1498 "ENDIF\n"
1499
1500 /* Load fence and check result availability */
1501 "UMAD TEMP[5].x, TEMP[1].xxxx, CONST[0].yyyy, CONST[1].xxxx\n"
1502 "LOAD TEMP[5].x, BUFFER[0], TEMP[5].xxxx\n"
1503 "ISHR TEMP[0].z, TEMP[5].xxxx, IMM[0].yyyy\n"
1504 "NOT TEMP[0].z, TEMP[0].zzzz\n"
1505 "UIF TEMP[0].zzzz\n"
1506 "BRK\n"
1507 "ENDIF\n"
1508
1509 "MOV TEMP[1].y, IMM[0].xxxx\n"
1510 "BGNLOOP\n"
1511 /* Load start and end. */
1512 "UMUL TEMP[5].x, TEMP[1].xxxx, CONST[0].yyyy\n"
1513 "UMAD TEMP[5].x, TEMP[1].yyyy, CONST[1].yyyy, TEMP[5].xxxx\n"
1514 "LOAD TEMP[2].xy, BUFFER[0], TEMP[5].xxxx\n"
1515
1516 "UADD TEMP[5].y, TEMP[5].xxxx, CONST[0].xxxx\n"
1517 "LOAD TEMP[3].xy, BUFFER[0], TEMP[5].yyyy\n"
1518
1519 "U64ADD TEMP[4].xy, TEMP[3], -TEMP[2]\n"
1520
1521 "AND TEMP[5].z, CONST[0].wwww, IMM[4].xxxx\n"
1522 "UIF TEMP[5].zzzz\n"
1523 /* Load second start/end half-pair and
1524 * take the difference
1525 */
1526 "UADD TEMP[5].xy, TEMP[5], IMM[1].wwww\n"
1527 "LOAD TEMP[2].xy, BUFFER[0], TEMP[5].xxxx\n"
1528 "LOAD TEMP[3].xy, BUFFER[0], TEMP[5].yyyy\n"
1529
1530 "U64ADD TEMP[3].xy, TEMP[3], -TEMP[2]\n"
1531 "U64ADD TEMP[4].xy, TEMP[4], -TEMP[3]\n"
1532 "ENDIF\n"
1533
1534 "U64ADD TEMP[0].xy, TEMP[0], TEMP[4]\n"
1535
1536 /* Increment pair index */
1537 "UADD TEMP[1].y, TEMP[1].yyyy, IMM[1].xxxx\n"
1538 "USGE TEMP[5], TEMP[1].yyyy, CONST[1].zzzz\n"
1539 "UIF TEMP[5]\n"
1540 "BRK\n"
1541 "ENDIF\n"
1542 "ENDLOOP\n"
1543
1544 /* Increment result index */
1545 "UADD TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx\n"
1546 "ENDLOOP\n"
1547 "ENDIF\n"
1548
1549 "AND TEMP[4], CONST[0].wwww, IMM[1].yyyy\n"
1550 "UIF TEMP[4]\n"
1551 /* Store accumulated data for chaining. */
1552 "STORE BUFFER[2].xyz, IMM[0].xxxx, TEMP[0]\n"
1553 "ELSE\n"
1554 "AND TEMP[4], CONST[0].wwww, IMM[1].zzzz\n"
1555 "UIF TEMP[4]\n"
1556 /* Store result availability. */
1557 "NOT TEMP[0].z, TEMP[0]\n"
1558 "AND TEMP[0].z, TEMP[0].zzzz, IMM[1].xxxx\n"
1559 "STORE BUFFER[2].x, IMM[0].xxxx, TEMP[0].zzzz\n"
1560
1561 "AND TEMP[4], CONST[0].wwww, IMM[2].zzzz\n"
1562 "UIF TEMP[4]\n"
1563 "STORE BUFFER[2].y, IMM[0].xxxx, IMM[0].xxxx\n"
1564 "ENDIF\n"
1565 "ELSE\n"
1566 /* Store result if it is available. */
1567 "NOT TEMP[4], TEMP[0].zzzz\n"
1568 "UIF TEMP[4]\n"
1569 /* Apply timestamp conversion */
1570 "AND TEMP[4], CONST[0].wwww, IMM[2].yyyy\n"
1571 "UIF TEMP[4]\n"
1572 "U64MUL TEMP[0].xy, TEMP[0], IMM[3].xyxy\n"
1573 "U64DIV TEMP[0].xy, TEMP[0], IMM[3].zwzw\n"
1574 "ENDIF\n"
1575
1576 /* Convert to boolean */
1577 "AND TEMP[4], CONST[0].wwww, IMM[1].wwww\n"
1578 "UIF TEMP[4]\n"
1579 "U64SNE TEMP[0].x, TEMP[0].xyxy, IMM[4].zwzw\n"
1580 "AND TEMP[0].x, TEMP[0].xxxx, IMM[1].xxxx\n"
1581 "MOV TEMP[0].y, IMM[0].xxxx\n"
1582 "ENDIF\n"
1583
1584 "AND TEMP[4], CONST[0].wwww, IMM[2].zzzz\n"
1585 "UIF TEMP[4]\n"
1586 "STORE BUFFER[2].xy, IMM[0].xxxx, TEMP[0].xyxy\n"
1587 "ELSE\n"
1588 /* Clamping */
1589 "UIF TEMP[0].yyyy\n"
1590 "MOV TEMP[0].x, IMM[0].wwww\n"
1591 "ENDIF\n"
1592
1593 "AND TEMP[4], CONST[0].wwww, IMM[2].wwww\n"
1594 "UIF TEMP[4]\n"
1595 "UMIN TEMP[0].x, TEMP[0].xxxx, IMM[0].zzzz\n"
1596 "ENDIF\n"
1597
1598 "STORE BUFFER[2].x, IMM[0].xxxx, TEMP[0].xxxx\n"
1599 "ENDIF\n"
1600 "ENDIF\n"
1601 "ENDIF\n"
1602 "ENDIF\n"
1603
1604 "END\n";
1605
1606 char text[sizeof(text_tmpl) + 32];
1607 struct tgsi_token tokens[1024];
1608 struct pipe_compute_state state = {};
1609
1610 /* Hard code the frequency into the shader so that the backend can
1611 * use the full range of optimizations for divide-by-constant.
1612 */
1613 snprintf(text, sizeof(text), text_tmpl,
1614 rctx->screen->info.clock_crystal_freq);
1615
1616 if (!tgsi_text_translate(text, tokens, ARRAY_SIZE(tokens))) {
1617 assert(false);
1618 return;
1619 }
1620
1621 state.ir_type = PIPE_SHADER_IR_TGSI;
1622 state.prog = tokens;
1623
1624 rctx->query_result_shader = rctx->b.create_compute_state(&rctx->b, &state);
1625 }
1626
1627 static void r600_restore_qbo_state(struct r600_common_context *rctx,
1628 struct r600_qbo_state *st)
1629 {
1630 rctx->b.bind_compute_state(&rctx->b, st->saved_compute);
1631
1632 rctx->b.set_constant_buffer(&rctx->b, PIPE_SHADER_COMPUTE, 0, &st->saved_const0);
1633 pipe_resource_reference(&st->saved_const0.buffer, NULL);
1634
1635 rctx->b.set_shader_buffers(&rctx->b, PIPE_SHADER_COMPUTE, 0, 3, st->saved_ssbo);
1636 for (unsigned i = 0; i < 3; ++i)
1637 pipe_resource_reference(&st->saved_ssbo[i].buffer, NULL);
1638 }
1639
1640 static void r600_query_hw_get_result_resource(struct r600_common_context *rctx,
1641 struct r600_query *rquery,
1642 bool wait,
1643 enum pipe_query_value_type result_type,
1644 int index,
1645 struct pipe_resource *resource,
1646 unsigned offset)
1647 {
1648 struct r600_query_hw *query = (struct r600_query_hw *)rquery;
1649 struct r600_query_buffer *qbuf;
1650 struct r600_query_buffer *qbuf_prev;
1651 struct pipe_resource *tmp_buffer = NULL;
1652 unsigned tmp_buffer_offset = 0;
1653 struct r600_qbo_state saved_state = {};
1654 struct pipe_grid_info grid = {};
1655 struct pipe_constant_buffer constant_buffer = {};
1656 struct pipe_shader_buffer ssbo[3];
1657 struct r600_hw_query_params params;
1658 struct {
1659 uint32_t end_offset;
1660 uint32_t result_stride;
1661 uint32_t result_count;
1662 uint32_t config;
1663 uint32_t fence_offset;
1664 uint32_t pair_stride;
1665 uint32_t pair_count;
1666 } consts;
1667
1668 if (!rctx->query_result_shader) {
1669 r600_create_query_result_shader(rctx);
1670 if (!rctx->query_result_shader)
1671 return;
1672 }
1673
1674 if (query->buffer.previous) {
1675 u_suballocator_alloc(rctx->allocator_zeroed_memory, 16, 16,
1676 &tmp_buffer_offset, &tmp_buffer);
1677 if (!tmp_buffer)
1678 return;
1679 }
1680
1681 rctx->save_qbo_state(&rctx->b, &saved_state);
1682
1683 r600_get_hw_query_params(rctx, query, index >= 0 ? index : 0, &params);
1684 consts.end_offset = params.end_offset - params.start_offset;
1685 consts.fence_offset = params.fence_offset - params.start_offset;
1686 consts.result_stride = query->result_size;
1687 consts.pair_stride = params.pair_stride;
1688 consts.pair_count = params.pair_count;
1689
1690 constant_buffer.buffer_size = sizeof(consts);
1691 constant_buffer.user_buffer = &consts;
1692
1693 ssbo[1].buffer = tmp_buffer;
1694 ssbo[1].buffer_offset = tmp_buffer_offset;
1695 ssbo[1].buffer_size = 16;
1696
1697 ssbo[2] = ssbo[1];
1698
1699 rctx->b.bind_compute_state(&rctx->b, rctx->query_result_shader);
1700
1701 grid.block[0] = 1;
1702 grid.block[1] = 1;
1703 grid.block[2] = 1;
1704 grid.grid[0] = 1;
1705 grid.grid[1] = 1;
1706 grid.grid[2] = 1;
1707
1708 consts.config = 0;
1709 if (index < 0)
1710 consts.config |= 4;
1711 if (query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE)
1712 consts.config |= 8;
1713 else if (query->b.type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
1714 query->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
1715 consts.config |= 8 | 256;
1716 else if (query->b.type == PIPE_QUERY_TIMESTAMP ||
1717 query->b.type == PIPE_QUERY_TIME_ELAPSED)
1718 consts.config |= 32;
1719
1720 switch (result_type) {
1721 case PIPE_QUERY_TYPE_U64:
1722 case PIPE_QUERY_TYPE_I64:
1723 consts.config |= 64;
1724 break;
1725 case PIPE_QUERY_TYPE_I32:
1726 consts.config |= 128;
1727 break;
1728 case PIPE_QUERY_TYPE_U32:
1729 break;
1730 }
1731
1732 rctx->flags |= rctx->screen->barrier_flags.cp_to_L2;
1733
1734 for (qbuf = &query->buffer; qbuf; qbuf = qbuf_prev) {
1735 if (query->b.type != PIPE_QUERY_TIMESTAMP) {
1736 qbuf_prev = qbuf->previous;
1737 consts.result_count = qbuf->results_end / query->result_size;
1738 consts.config &= ~3;
1739 if (qbuf != &query->buffer)
1740 consts.config |= 1;
1741 if (qbuf->previous)
1742 consts.config |= 2;
1743 } else {
1744 /* Only read the last timestamp. */
1745 qbuf_prev = NULL;
1746 consts.result_count = 0;
1747 consts.config |= 16;
1748 params.start_offset += qbuf->results_end - query->result_size;
1749 }
1750
1751 rctx->b.set_constant_buffer(&rctx->b, PIPE_SHADER_COMPUTE, 0, &constant_buffer);
1752
1753 ssbo[0].buffer = &qbuf->buf->b.b;
1754 ssbo[0].buffer_offset = params.start_offset;
1755 ssbo[0].buffer_size = qbuf->results_end - params.start_offset;
1756
1757 if (!qbuf->previous) {
1758 ssbo[2].buffer = resource;
1759 ssbo[2].buffer_offset = offset;
1760 ssbo[2].buffer_size = 8;
1761
1762 ((struct r600_resource *)resource)->TC_L2_dirty = true;
1763 }
1764
1765 rctx->b.set_shader_buffers(&rctx->b, PIPE_SHADER_COMPUTE, 0, 3, ssbo);
1766
1767 if (wait && qbuf == &query->buffer) {
1768 uint64_t va;
1769
1770 /* Wait for result availability. Wait only for readiness
1771 * of the last entry, since the fence writes should be
1772 * serialized in the CP.
1773 */
1774 va = qbuf->buf->gpu_address + qbuf->results_end - query->result_size;
1775 va += params.fence_offset;
1776
1777 r600_gfx_wait_fence(rctx, va, 0x80000000, 0x80000000);
1778 }
1779
1780 rctx->b.launch_grid(&rctx->b, &grid);
1781 rctx->flags |= rctx->screen->barrier_flags.compute_to_L2;
1782 }
1783
1784 r600_restore_qbo_state(rctx, &saved_state);
1785 pipe_resource_reference(&tmp_buffer, NULL);
1786 }
1787
1788 static void r600_render_condition(struct pipe_context *ctx,
1789 struct pipe_query *query,
1790 boolean condition,
1791 enum pipe_render_cond_flag mode)
1792 {
1793 struct r600_common_context *rctx = (struct r600_common_context *)ctx;
1794 struct r600_query_hw *rquery = (struct r600_query_hw *)query;
1795 struct r600_query_buffer *qbuf;
1796 struct r600_atom *atom = &rctx->render_cond_atom;
1797
1798 /* Compute the size of SET_PREDICATION packets. */
1799 atom->num_dw = 0;
1800 if (query) {
1801 bool needs_workaround = false;
1802
1803 /* There is a firmware regression in VI which causes successive
1804 * SET_PREDICATION packets to give the wrong answer for
1805 * non-inverted stream overflow predication.
1806 */
1807 if (rctx->chip_class >= VI && !condition &&
1808 (rquery->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE ||
1809 (rquery->b.type == PIPE_QUERY_SO_OVERFLOW_PREDICATE &&
1810 (rquery->buffer.previous ||
1811 rquery->buffer.results_end > rquery->result_size)))) {
1812 needs_workaround = true;
1813 }
1814
1815 if (needs_workaround && !rquery->workaround_buf) {
1816 bool old_force_off = rctx->render_cond_force_off;
1817 rctx->render_cond_force_off = true;
1818
1819 u_suballocator_alloc(
1820 rctx->allocator_zeroed_memory, 8, 8,
1821 &rquery->workaround_offset,
1822 (struct pipe_resource **)&rquery->workaround_buf);
1823
1824 /* Reset to NULL to avoid a redundant SET_PREDICATION
1825 * from launching the compute grid.
1826 */
1827 rctx->render_cond = NULL;
1828
1829 ctx->get_query_result_resource(
1830 ctx, query, true, PIPE_QUERY_TYPE_U64, 0,
1831 &rquery->workaround_buf->b.b, rquery->workaround_offset);
1832
1833 atom->num_dw = 5;
1834
1835 rctx->render_cond_force_off = old_force_off;
1836 } else {
1837 for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous)
1838 atom->num_dw += (qbuf->results_end / rquery->result_size) * 5;
1839
1840 if (rquery->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
1841 atom->num_dw *= R600_MAX_STREAMS;
1842 }
1843 }
1844
1845 rctx->render_cond = query;
1846 rctx->render_cond_invert = condition;
1847 rctx->render_cond_mode = mode;
1848
1849 rctx->set_atom_dirty(rctx, atom, query != NULL);
1850 }
1851
1852 void r600_suspend_queries(struct r600_common_context *ctx)
1853 {
1854 struct r600_query_hw *query;
1855
1856 LIST_FOR_EACH_ENTRY(query, &ctx->active_queries, list) {
1857 r600_query_hw_emit_stop(ctx, query);
1858 }
1859 assert(ctx->num_cs_dw_queries_suspend == 0);
1860 }
1861
1862 static unsigned r600_queries_num_cs_dw_for_resuming(struct r600_common_context *ctx,
1863 struct list_head *query_list)
1864 {
1865 struct r600_query_hw *query;
1866 unsigned num_dw = 0;
1867
1868 LIST_FOR_EACH_ENTRY(query, query_list, list) {
1869 /* begin + end */
1870 num_dw += query->num_cs_dw_begin + query->num_cs_dw_end;
1871
1872 /* Workaround for the fact that
1873 * num_cs_dw_nontimer_queries_suspend is incremented for every
1874 * resumed query, which raises the bar in need_cs_space for
1875 * queries about to be resumed.
1876 */
1877 num_dw += query->num_cs_dw_end;
1878 }
1879 /* primitives generated query */
1880 num_dw += ctx->streamout.enable_atom.num_dw;
1881 /* guess for ZPASS enable or PERFECT_ZPASS_COUNT enable updates */
1882 num_dw += 13;
1883
1884 return num_dw;
1885 }
1886
1887 void r600_resume_queries(struct r600_common_context *ctx)
1888 {
1889 struct r600_query_hw *query;
1890 unsigned num_cs_dw = r600_queries_num_cs_dw_for_resuming(ctx, &ctx->active_queries);
1891
1892 assert(ctx->num_cs_dw_queries_suspend == 0);
1893
1894 /* Check CS space here. Resuming must not be interrupted by flushes. */
1895 ctx->need_gfx_cs_space(&ctx->b, num_cs_dw, true);
1896
1897 LIST_FOR_EACH_ENTRY(query, &ctx->active_queries, list) {
1898 r600_query_hw_emit_start(ctx, query);
1899 }
1900 }
1901
1902 /* Fix radeon_info::enabled_rb_mask for R600, R700, EVERGREEN, NI. */
1903 void r600_query_fix_enabled_rb_mask(struct r600_common_screen *rscreen)
1904 {
1905 struct r600_common_context *ctx =
1906 (struct r600_common_context*)rscreen->aux_context;
1907 struct radeon_winsys_cs *cs = ctx->gfx.cs;
1908 struct r600_resource *buffer;
1909 uint32_t *results;
1910 unsigned i, mask = 0;
1911 unsigned max_rbs = ctx->screen->info.num_render_backends;
1912
1913 assert(rscreen->chip_class <= CAYMAN);
1914
1915 /* if backend_map query is supported by the kernel */
1916 if (rscreen->info.r600_gb_backend_map_valid) {
1917 unsigned num_tile_pipes = rscreen->info.num_tile_pipes;
1918 unsigned backend_map = rscreen->info.r600_gb_backend_map;
1919 unsigned item_width, item_mask;
1920
1921 if (ctx->chip_class >= EVERGREEN) {
1922 item_width = 4;
1923 item_mask = 0x7;
1924 } else {
1925 item_width = 2;
1926 item_mask = 0x3;
1927 }
1928
1929 while (num_tile_pipes--) {
1930 i = backend_map & item_mask;
1931 mask |= (1<<i);
1932 backend_map >>= item_width;
1933 }
1934 if (mask != 0) {
1935 rscreen->info.enabled_rb_mask = mask;
1936 return;
1937 }
1938 }
1939
1940 /* otherwise backup path for older kernels */
1941
1942 /* create buffer for event data */
1943 buffer = (struct r600_resource*)
1944 pipe_buffer_create(ctx->b.screen, 0,
1945 PIPE_USAGE_STAGING, max_rbs * 16);
1946 if (!buffer)
1947 return;
1948
1949 /* initialize buffer with zeroes */
1950 results = r600_buffer_map_sync_with_rings(ctx, buffer, PIPE_TRANSFER_WRITE);
1951 if (results) {
1952 memset(results, 0, max_rbs * 4 * 4);
1953
1954 /* emit EVENT_WRITE for ZPASS_DONE */
1955 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
1956 radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
1957 radeon_emit(cs, buffer->gpu_address);
1958 radeon_emit(cs, buffer->gpu_address >> 32);
1959
1960 r600_emit_reloc(ctx, &ctx->gfx, buffer,
1961 RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
1962
1963 /* analyze results */
1964 results = r600_buffer_map_sync_with_rings(ctx, buffer, PIPE_TRANSFER_READ);
1965 if (results) {
1966 for(i = 0; i < max_rbs; i++) {
1967 /* at least highest bit will be set if backend is used */
1968 if (results[i*4 + 1])
1969 mask |= (1<<i);
1970 }
1971 }
1972 }
1973
1974 r600_resource_reference(&buffer, NULL);
1975
1976 if (mask)
1977 rscreen->info.enabled_rb_mask = mask;
1978 }
1979
1980 #define XFULL(name_, query_type_, type_, result_type_, group_id_) \
1981 { \
1982 .name = name_, \
1983 .query_type = R600_QUERY_##query_type_, \
1984 .type = PIPE_DRIVER_QUERY_TYPE_##type_, \
1985 .result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_##result_type_, \
1986 .group_id = group_id_ \
1987 }
1988
1989 #define X(name_, query_type_, type_, result_type_) \
1990 XFULL(name_, query_type_, type_, result_type_, ~(unsigned)0)
1991
1992 #define XG(group_, name_, query_type_, type_, result_type_) \
1993 XFULL(name_, query_type_, type_, result_type_, R600_QUERY_GROUP_##group_)
1994
1995 static struct pipe_driver_query_info r600_driver_query_list[] = {
1996 X("num-compilations", NUM_COMPILATIONS, UINT64, CUMULATIVE),
1997 X("num-shaders-created", NUM_SHADERS_CREATED, UINT64, CUMULATIVE),
1998 X("num-shader-cache-hits", NUM_SHADER_CACHE_HITS, UINT64, CUMULATIVE),
1999 X("draw-calls", DRAW_CALLS, UINT64, AVERAGE),
2000 X("decompress-calls", DECOMPRESS_CALLS, UINT64, AVERAGE),
2001 X("MRT-draw-calls", MRT_DRAW_CALLS, UINT64, AVERAGE),
2002 X("prim-restart-calls", PRIM_RESTART_CALLS, UINT64, AVERAGE),
2003 X("spill-draw-calls", SPILL_DRAW_CALLS, UINT64, AVERAGE),
2004 X("compute-calls", COMPUTE_CALLS, UINT64, AVERAGE),
2005 X("spill-compute-calls", SPILL_COMPUTE_CALLS, UINT64, AVERAGE),
2006 X("dma-calls", DMA_CALLS, UINT64, AVERAGE),
2007 X("cp-dma-calls", CP_DMA_CALLS, UINT64, AVERAGE),
2008 X("num-vs-flushes", NUM_VS_FLUSHES, UINT64, AVERAGE),
2009 X("num-ps-flushes", NUM_PS_FLUSHES, UINT64, AVERAGE),
2010 X("num-cs-flushes", NUM_CS_FLUSHES, UINT64, AVERAGE),
2011 X("num-CB-cache-flushes", NUM_CB_CACHE_FLUSHES, UINT64, AVERAGE),
2012 X("num-DB-cache-flushes", NUM_DB_CACHE_FLUSHES, UINT64, AVERAGE),
2013 X("num-L2-invalidates", NUM_L2_INVALIDATES, UINT64, AVERAGE),
2014 X("num-L2-writebacks", NUM_L2_WRITEBACKS, UINT64, AVERAGE),
2015 X("num-resident-handles", NUM_RESIDENT_HANDLES, UINT64, AVERAGE),
2016 X("tc-offloaded-slots", TC_OFFLOADED_SLOTS, UINT64, AVERAGE),
2017 X("tc-direct-slots", TC_DIRECT_SLOTS, UINT64, AVERAGE),
2018 X("tc-num-syncs", TC_NUM_SYNCS, UINT64, AVERAGE),
2019 X("CS-thread-busy", CS_THREAD_BUSY, UINT64, AVERAGE),
2020 X("gallium-thread-busy", GALLIUM_THREAD_BUSY, UINT64, AVERAGE),
2021 X("requested-VRAM", REQUESTED_VRAM, BYTES, AVERAGE),
2022 X("requested-GTT", REQUESTED_GTT, BYTES, AVERAGE),
2023 X("mapped-VRAM", MAPPED_VRAM, BYTES, AVERAGE),
2024 X("mapped-GTT", MAPPED_GTT, BYTES, AVERAGE),
2025 X("buffer-wait-time", BUFFER_WAIT_TIME, MICROSECONDS, CUMULATIVE),
2026 X("num-mapped-buffers", NUM_MAPPED_BUFFERS, UINT64, AVERAGE),
2027 X("num-GFX-IBs", NUM_GFX_IBS, UINT64, AVERAGE),
2028 X("num-SDMA-IBs", NUM_SDMA_IBS, UINT64, AVERAGE),
2029 X("GFX-BO-list-size", GFX_BO_LIST_SIZE, UINT64, AVERAGE),
2030 X("num-bytes-moved", NUM_BYTES_MOVED, BYTES, CUMULATIVE),
2031 X("num-evictions", NUM_EVICTIONS, UINT64, CUMULATIVE),
2032 X("VRAM-CPU-page-faults", NUM_VRAM_CPU_PAGE_FAULTS, UINT64, CUMULATIVE),
2033 X("VRAM-usage", VRAM_USAGE, BYTES, AVERAGE),
2034 X("VRAM-vis-usage", VRAM_VIS_USAGE, BYTES, AVERAGE),
2035 X("GTT-usage", GTT_USAGE, BYTES, AVERAGE),
2036 X("back-buffer-ps-draw-ratio", BACK_BUFFER_PS_DRAW_RATIO, UINT64, AVERAGE),
2037
2038 /* GPIN queries are for the benefit of old versions of GPUPerfStudio,
2039 * which use it as a fallback path to detect the GPU type.
2040 *
2041 * Note: The names of these queries are significant for GPUPerfStudio
2042 * (and possibly their order as well). */
2043 XG(GPIN, "GPIN_000", GPIN_ASIC_ID, UINT, AVERAGE),
2044 XG(GPIN, "GPIN_001", GPIN_NUM_SIMD, UINT, AVERAGE),
2045 XG(GPIN, "GPIN_002", GPIN_NUM_RB, UINT, AVERAGE),
2046 XG(GPIN, "GPIN_003", GPIN_NUM_SPI, UINT, AVERAGE),
2047 XG(GPIN, "GPIN_004", GPIN_NUM_SE, UINT, AVERAGE),
2048
2049 X("temperature", GPU_TEMPERATURE, UINT64, AVERAGE),
2050 X("shader-clock", CURRENT_GPU_SCLK, HZ, AVERAGE),
2051 X("memory-clock", CURRENT_GPU_MCLK, HZ, AVERAGE),
2052
2053 /* The following queries must be at the end of the list because their
2054 * availability is adjusted dynamically based on the DRM version. */
2055 X("GPU-load", GPU_LOAD, UINT64, AVERAGE),
2056 X("GPU-shaders-busy", GPU_SHADERS_BUSY, UINT64, AVERAGE),
2057 X("GPU-ta-busy", GPU_TA_BUSY, UINT64, AVERAGE),
2058 X("GPU-gds-busy", GPU_GDS_BUSY, UINT64, AVERAGE),
2059 X("GPU-vgt-busy", GPU_VGT_BUSY, UINT64, AVERAGE),
2060 X("GPU-ia-busy", GPU_IA_BUSY, UINT64, AVERAGE),
2061 X("GPU-sx-busy", GPU_SX_BUSY, UINT64, AVERAGE),
2062 X("GPU-wd-busy", GPU_WD_BUSY, UINT64, AVERAGE),
2063 X("GPU-bci-busy", GPU_BCI_BUSY, UINT64, AVERAGE),
2064 X("GPU-sc-busy", GPU_SC_BUSY, UINT64, AVERAGE),
2065 X("GPU-pa-busy", GPU_PA_BUSY, UINT64, AVERAGE),
2066 X("GPU-db-busy", GPU_DB_BUSY, UINT64, AVERAGE),
2067 X("GPU-cp-busy", GPU_CP_BUSY, UINT64, AVERAGE),
2068 X("GPU-cb-busy", GPU_CB_BUSY, UINT64, AVERAGE),
2069 X("GPU-sdma-busy", GPU_SDMA_BUSY, UINT64, AVERAGE),
2070 X("GPU-pfp-busy", GPU_PFP_BUSY, UINT64, AVERAGE),
2071 X("GPU-meq-busy", GPU_MEQ_BUSY, UINT64, AVERAGE),
2072 X("GPU-me-busy", GPU_ME_BUSY, UINT64, AVERAGE),
2073 X("GPU-surf-sync-busy", GPU_SURF_SYNC_BUSY, UINT64, AVERAGE),
2074 X("GPU-cp-dma-busy", GPU_CP_DMA_BUSY, UINT64, AVERAGE),
2075 X("GPU-scratch-ram-busy", GPU_SCRATCH_RAM_BUSY, UINT64, AVERAGE),
2076 };
2077
2078 #undef X
2079 #undef XG
2080 #undef XFULL
2081
2082 static unsigned r600_get_num_queries(struct r600_common_screen *rscreen)
2083 {
2084 if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42)
2085 return ARRAY_SIZE(r600_driver_query_list);
2086 else if (rscreen->info.drm_major == 3) {
2087 if (rscreen->chip_class >= VI)
2088 return ARRAY_SIZE(r600_driver_query_list);
2089 else
2090 return ARRAY_SIZE(r600_driver_query_list) - 7;
2091 }
2092 else
2093 return ARRAY_SIZE(r600_driver_query_list) - 25;
2094 }
2095
2096 static int r600_get_driver_query_info(struct pipe_screen *screen,
2097 unsigned index,
2098 struct pipe_driver_query_info *info)
2099 {
2100 struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
2101 unsigned num_queries = r600_get_num_queries(rscreen);
2102
2103 if (!info) {
2104 unsigned num_perfcounters =
2105 r600_get_perfcounter_info(rscreen, 0, NULL);
2106
2107 return num_queries + num_perfcounters;
2108 }
2109
2110 if (index >= num_queries)
2111 return r600_get_perfcounter_info(rscreen, index - num_queries, info);
2112
2113 *info = r600_driver_query_list[index];
2114
2115 switch (info->query_type) {
2116 case R600_QUERY_REQUESTED_VRAM:
2117 case R600_QUERY_VRAM_USAGE:
2118 case R600_QUERY_MAPPED_VRAM:
2119 info->max_value.u64 = rscreen->info.vram_size;
2120 break;
2121 case R600_QUERY_REQUESTED_GTT:
2122 case R600_QUERY_GTT_USAGE:
2123 case R600_QUERY_MAPPED_GTT:
2124 info->max_value.u64 = rscreen->info.gart_size;
2125 break;
2126 case R600_QUERY_GPU_TEMPERATURE:
2127 info->max_value.u64 = 125;
2128 break;
2129 case R600_QUERY_VRAM_VIS_USAGE:
2130 info->max_value.u64 = rscreen->info.vram_vis_size;
2131 break;
2132 }
2133
2134 if (info->group_id != ~(unsigned)0 && rscreen->perfcounters)
2135 info->group_id += rscreen->perfcounters->num_groups;
2136
2137 return 1;
2138 }
2139
2140 /* Note: Unfortunately, GPUPerfStudio hardcodes the order of hardware
2141 * performance counter groups, so be careful when changing this and related
2142 * functions.
2143 */
2144 static int r600_get_driver_query_group_info(struct pipe_screen *screen,
2145 unsigned index,
2146 struct pipe_driver_query_group_info *info)
2147 {
2148 struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
2149 unsigned num_pc_groups = 0;
2150
2151 if (rscreen->perfcounters)
2152 num_pc_groups = rscreen->perfcounters->num_groups;
2153
2154 if (!info)
2155 return num_pc_groups + R600_NUM_SW_QUERY_GROUPS;
2156
2157 if (index < num_pc_groups)
2158 return r600_get_perfcounter_group_info(rscreen, index, info);
2159
2160 index -= num_pc_groups;
2161 if (index >= R600_NUM_SW_QUERY_GROUPS)
2162 return 0;
2163
2164 info->name = "GPIN";
2165 info->max_active_queries = 5;
2166 info->num_queries = 5;
2167 return 1;
2168 }
2169
2170 void r600_query_init(struct r600_common_context *rctx)
2171 {
2172 rctx->b.create_query = r600_create_query;
2173 rctx->b.create_batch_query = r600_create_batch_query;
2174 rctx->b.destroy_query = r600_destroy_query;
2175 rctx->b.begin_query = r600_begin_query;
2176 rctx->b.end_query = r600_end_query;
2177 rctx->b.get_query_result = r600_get_query_result;
2178 rctx->b.get_query_result_resource = r600_get_query_result_resource;
2179 rctx->render_cond_atom.emit = r600_emit_query_predication;
2180
2181 if (((struct r600_common_screen*)rctx->b.screen)->info.num_render_backends > 0)
2182 rctx->b.render_condition = r600_render_condition;
2183
2184 LIST_INITHEAD(&rctx->active_queries);
2185 }
2186
2187 void r600_init_screen_query_functions(struct r600_common_screen *rscreen)
2188 {
2189 rscreen->b.get_driver_query_info = r600_get_driver_query_info;
2190 rscreen->b.get_driver_query_group_info = r600_get_driver_query_group_info;
2191 }