radeonsi: avoid redundant SET_PREDICATION packet with QBO workaround
[mesa.git] / src / gallium / drivers / radeon / r600_query.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 * Copyright 2014 Marek Olšák <marek.olsak@amd.com>
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 #include "r600_query.h"
26 #include "r600_cs.h"
27 #include "util/u_memory.h"
28 #include "util/u_upload_mgr.h"
29 #include "os/os_time.h"
30 #include "tgsi/tgsi_text.h"
31
32 #define R600_MAX_STREAMS 4
33
34 struct r600_hw_query_params {
35 unsigned start_offset;
36 unsigned end_offset;
37 unsigned fence_offset;
38 unsigned pair_stride;
39 unsigned pair_count;
40 };
41
42 /* Queries without buffer handling or suspend/resume. */
43 struct r600_query_sw {
44 struct r600_query b;
45
46 uint64_t begin_result;
47 uint64_t end_result;
48
49 uint64_t begin_time;
50 uint64_t end_time;
51
52 /* Fence for GPU_FINISHED. */
53 struct pipe_fence_handle *fence;
54 };
55
56 static void r600_query_sw_destroy(struct r600_common_screen *rscreen,
57 struct r600_query *rquery)
58 {
59 struct r600_query_sw *query = (struct r600_query_sw *)rquery;
60
61 rscreen->b.fence_reference(&rscreen->b, &query->fence, NULL);
62 FREE(query);
63 }
64
65 static enum radeon_value_id winsys_id_from_type(unsigned type)
66 {
67 switch (type) {
68 case R600_QUERY_REQUESTED_VRAM: return RADEON_REQUESTED_VRAM_MEMORY;
69 case R600_QUERY_REQUESTED_GTT: return RADEON_REQUESTED_GTT_MEMORY;
70 case R600_QUERY_MAPPED_VRAM: return RADEON_MAPPED_VRAM;
71 case R600_QUERY_MAPPED_GTT: return RADEON_MAPPED_GTT;
72 case R600_QUERY_BUFFER_WAIT_TIME: return RADEON_BUFFER_WAIT_TIME_NS;
73 case R600_QUERY_NUM_MAPPED_BUFFERS: return RADEON_NUM_MAPPED_BUFFERS;
74 case R600_QUERY_NUM_GFX_IBS: return RADEON_NUM_GFX_IBS;
75 case R600_QUERY_NUM_SDMA_IBS: return RADEON_NUM_SDMA_IBS;
76 case R600_QUERY_GFX_BO_LIST_SIZE: return RADEON_GFX_BO_LIST_COUNTER;
77 case R600_QUERY_NUM_BYTES_MOVED: return RADEON_NUM_BYTES_MOVED;
78 case R600_QUERY_NUM_EVICTIONS: return RADEON_NUM_EVICTIONS;
79 case R600_QUERY_NUM_VRAM_CPU_PAGE_FAULTS: return RADEON_NUM_VRAM_CPU_PAGE_FAULTS;
80 case R600_QUERY_VRAM_USAGE: return RADEON_VRAM_USAGE;
81 case R600_QUERY_VRAM_VIS_USAGE: return RADEON_VRAM_VIS_USAGE;
82 case R600_QUERY_GTT_USAGE: return RADEON_GTT_USAGE;
83 case R600_QUERY_GPU_TEMPERATURE: return RADEON_GPU_TEMPERATURE;
84 case R600_QUERY_CURRENT_GPU_SCLK: return RADEON_CURRENT_SCLK;
85 case R600_QUERY_CURRENT_GPU_MCLK: return RADEON_CURRENT_MCLK;
86 case R600_QUERY_CS_THREAD_BUSY: return RADEON_CS_THREAD_TIME;
87 default: unreachable("query type does not correspond to winsys id");
88 }
89 }
90
91 static bool r600_query_sw_begin(struct r600_common_context *rctx,
92 struct r600_query *rquery)
93 {
94 struct r600_query_sw *query = (struct r600_query_sw *)rquery;
95 enum radeon_value_id ws_id;
96
97 switch(query->b.type) {
98 case PIPE_QUERY_TIMESTAMP_DISJOINT:
99 case PIPE_QUERY_GPU_FINISHED:
100 break;
101 case R600_QUERY_DRAW_CALLS:
102 query->begin_result = rctx->num_draw_calls;
103 break;
104 case R600_QUERY_MRT_DRAW_CALLS:
105 query->begin_result = rctx->num_mrt_draw_calls;
106 break;
107 case R600_QUERY_PRIM_RESTART_CALLS:
108 query->begin_result = rctx->num_prim_restart_calls;
109 break;
110 case R600_QUERY_SPILL_DRAW_CALLS:
111 query->begin_result = rctx->num_spill_draw_calls;
112 break;
113 case R600_QUERY_COMPUTE_CALLS:
114 query->begin_result = rctx->num_compute_calls;
115 break;
116 case R600_QUERY_SPILL_COMPUTE_CALLS:
117 query->begin_result = rctx->num_spill_compute_calls;
118 break;
119 case R600_QUERY_DMA_CALLS:
120 query->begin_result = rctx->num_dma_calls;
121 break;
122 case R600_QUERY_CP_DMA_CALLS:
123 query->begin_result = rctx->num_cp_dma_calls;
124 break;
125 case R600_QUERY_NUM_VS_FLUSHES:
126 query->begin_result = rctx->num_vs_flushes;
127 break;
128 case R600_QUERY_NUM_PS_FLUSHES:
129 query->begin_result = rctx->num_ps_flushes;
130 break;
131 case R600_QUERY_NUM_CS_FLUSHES:
132 query->begin_result = rctx->num_cs_flushes;
133 break;
134 case R600_QUERY_NUM_CB_CACHE_FLUSHES:
135 query->begin_result = rctx->num_cb_cache_flushes;
136 break;
137 case R600_QUERY_NUM_DB_CACHE_FLUSHES:
138 query->begin_result = rctx->num_db_cache_flushes;
139 break;
140 case R600_QUERY_NUM_L2_INVALIDATES:
141 query->begin_result = rctx->num_L2_invalidates;
142 break;
143 case R600_QUERY_NUM_L2_WRITEBACKS:
144 query->begin_result = rctx->num_L2_writebacks;
145 break;
146 case R600_QUERY_NUM_RESIDENT_HANDLES:
147 query->begin_result = rctx->num_resident_handles;
148 break;
149 case R600_QUERY_TC_OFFLOADED_SLOTS:
150 query->begin_result = rctx->tc ? rctx->tc->num_offloaded_slots : 0;
151 break;
152 case R600_QUERY_TC_DIRECT_SLOTS:
153 query->begin_result = rctx->tc ? rctx->tc->num_direct_slots : 0;
154 break;
155 case R600_QUERY_TC_NUM_SYNCS:
156 query->begin_result = rctx->tc ? rctx->tc->num_syncs : 0;
157 break;
158 case R600_QUERY_REQUESTED_VRAM:
159 case R600_QUERY_REQUESTED_GTT:
160 case R600_QUERY_MAPPED_VRAM:
161 case R600_QUERY_MAPPED_GTT:
162 case R600_QUERY_VRAM_USAGE:
163 case R600_QUERY_VRAM_VIS_USAGE:
164 case R600_QUERY_GTT_USAGE:
165 case R600_QUERY_GPU_TEMPERATURE:
166 case R600_QUERY_CURRENT_GPU_SCLK:
167 case R600_QUERY_CURRENT_GPU_MCLK:
168 case R600_QUERY_BACK_BUFFER_PS_DRAW_RATIO:
169 case R600_QUERY_NUM_MAPPED_BUFFERS:
170 query->begin_result = 0;
171 break;
172 case R600_QUERY_BUFFER_WAIT_TIME:
173 case R600_QUERY_NUM_GFX_IBS:
174 case R600_QUERY_NUM_SDMA_IBS:
175 case R600_QUERY_NUM_BYTES_MOVED:
176 case R600_QUERY_NUM_EVICTIONS:
177 case R600_QUERY_NUM_VRAM_CPU_PAGE_FAULTS: {
178 enum radeon_value_id ws_id = winsys_id_from_type(query->b.type);
179 query->begin_result = rctx->ws->query_value(rctx->ws, ws_id);
180 break;
181 }
182 case R600_QUERY_GFX_BO_LIST_SIZE:
183 ws_id = winsys_id_from_type(query->b.type);
184 query->begin_result = rctx->ws->query_value(rctx->ws, ws_id);
185 query->begin_time = rctx->ws->query_value(rctx->ws,
186 RADEON_NUM_GFX_IBS);
187 break;
188 case R600_QUERY_CS_THREAD_BUSY:
189 ws_id = winsys_id_from_type(query->b.type);
190 query->begin_result = rctx->ws->query_value(rctx->ws, ws_id);
191 query->begin_time = os_time_get_nano();
192 break;
193 case R600_QUERY_GALLIUM_THREAD_BUSY:
194 query->begin_result =
195 rctx->tc ? util_queue_get_thread_time_nano(&rctx->tc->queue, 0) : 0;
196 query->begin_time = os_time_get_nano();
197 break;
198 case R600_QUERY_GPU_LOAD:
199 case R600_QUERY_GPU_SHADERS_BUSY:
200 case R600_QUERY_GPU_TA_BUSY:
201 case R600_QUERY_GPU_GDS_BUSY:
202 case R600_QUERY_GPU_VGT_BUSY:
203 case R600_QUERY_GPU_IA_BUSY:
204 case R600_QUERY_GPU_SX_BUSY:
205 case R600_QUERY_GPU_WD_BUSY:
206 case R600_QUERY_GPU_BCI_BUSY:
207 case R600_QUERY_GPU_SC_BUSY:
208 case R600_QUERY_GPU_PA_BUSY:
209 case R600_QUERY_GPU_DB_BUSY:
210 case R600_QUERY_GPU_CP_BUSY:
211 case R600_QUERY_GPU_CB_BUSY:
212 case R600_QUERY_GPU_SDMA_BUSY:
213 case R600_QUERY_GPU_PFP_BUSY:
214 case R600_QUERY_GPU_MEQ_BUSY:
215 case R600_QUERY_GPU_ME_BUSY:
216 case R600_QUERY_GPU_SURF_SYNC_BUSY:
217 case R600_QUERY_GPU_DMA_BUSY:
218 case R600_QUERY_GPU_SCRATCH_RAM_BUSY:
219 case R600_QUERY_GPU_CE_BUSY:
220 query->begin_result = r600_begin_counter(rctx->screen,
221 query->b.type);
222 break;
223 case R600_QUERY_NUM_COMPILATIONS:
224 query->begin_result = p_atomic_read(&rctx->screen->num_compilations);
225 break;
226 case R600_QUERY_NUM_SHADERS_CREATED:
227 query->begin_result = p_atomic_read(&rctx->screen->num_shaders_created);
228 break;
229 case R600_QUERY_NUM_SHADER_CACHE_HITS:
230 query->begin_result =
231 p_atomic_read(&rctx->screen->num_shader_cache_hits);
232 break;
233 case R600_QUERY_GPIN_ASIC_ID:
234 case R600_QUERY_GPIN_NUM_SIMD:
235 case R600_QUERY_GPIN_NUM_RB:
236 case R600_QUERY_GPIN_NUM_SPI:
237 case R600_QUERY_GPIN_NUM_SE:
238 break;
239 default:
240 unreachable("r600_query_sw_begin: bad query type");
241 }
242
243 return true;
244 }
245
246 static bool r600_query_sw_end(struct r600_common_context *rctx,
247 struct r600_query *rquery)
248 {
249 struct r600_query_sw *query = (struct r600_query_sw *)rquery;
250 enum radeon_value_id ws_id;
251
252 switch(query->b.type) {
253 case PIPE_QUERY_TIMESTAMP_DISJOINT:
254 break;
255 case PIPE_QUERY_GPU_FINISHED:
256 rctx->b.flush(&rctx->b, &query->fence, PIPE_FLUSH_DEFERRED);
257 break;
258 case R600_QUERY_DRAW_CALLS:
259 query->end_result = rctx->num_draw_calls;
260 break;
261 case R600_QUERY_MRT_DRAW_CALLS:
262 query->end_result = rctx->num_mrt_draw_calls;
263 break;
264 case R600_QUERY_PRIM_RESTART_CALLS:
265 query->end_result = rctx->num_prim_restart_calls;
266 break;
267 case R600_QUERY_SPILL_DRAW_CALLS:
268 query->end_result = rctx->num_spill_draw_calls;
269 break;
270 case R600_QUERY_COMPUTE_CALLS:
271 query->end_result = rctx->num_compute_calls;
272 break;
273 case R600_QUERY_SPILL_COMPUTE_CALLS:
274 query->end_result = rctx->num_spill_compute_calls;
275 break;
276 case R600_QUERY_DMA_CALLS:
277 query->end_result = rctx->num_dma_calls;
278 break;
279 case R600_QUERY_CP_DMA_CALLS:
280 query->end_result = rctx->num_cp_dma_calls;
281 break;
282 case R600_QUERY_NUM_VS_FLUSHES:
283 query->end_result = rctx->num_vs_flushes;
284 break;
285 case R600_QUERY_NUM_PS_FLUSHES:
286 query->end_result = rctx->num_ps_flushes;
287 break;
288 case R600_QUERY_NUM_CS_FLUSHES:
289 query->end_result = rctx->num_cs_flushes;
290 break;
291 case R600_QUERY_NUM_CB_CACHE_FLUSHES:
292 query->end_result = rctx->num_cb_cache_flushes;
293 break;
294 case R600_QUERY_NUM_DB_CACHE_FLUSHES:
295 query->end_result = rctx->num_db_cache_flushes;
296 break;
297 case R600_QUERY_NUM_L2_INVALIDATES:
298 query->end_result = rctx->num_L2_invalidates;
299 break;
300 case R600_QUERY_NUM_L2_WRITEBACKS:
301 query->end_result = rctx->num_L2_writebacks;
302 break;
303 case R600_QUERY_NUM_RESIDENT_HANDLES:
304 query->end_result = rctx->num_resident_handles;
305 break;
306 case R600_QUERY_TC_OFFLOADED_SLOTS:
307 query->end_result = rctx->tc ? rctx->tc->num_offloaded_slots : 0;
308 break;
309 case R600_QUERY_TC_DIRECT_SLOTS:
310 query->end_result = rctx->tc ? rctx->tc->num_direct_slots : 0;
311 break;
312 case R600_QUERY_TC_NUM_SYNCS:
313 query->end_result = rctx->tc ? rctx->tc->num_syncs : 0;
314 break;
315 case R600_QUERY_REQUESTED_VRAM:
316 case R600_QUERY_REQUESTED_GTT:
317 case R600_QUERY_MAPPED_VRAM:
318 case R600_QUERY_MAPPED_GTT:
319 case R600_QUERY_VRAM_USAGE:
320 case R600_QUERY_VRAM_VIS_USAGE:
321 case R600_QUERY_GTT_USAGE:
322 case R600_QUERY_GPU_TEMPERATURE:
323 case R600_QUERY_CURRENT_GPU_SCLK:
324 case R600_QUERY_CURRENT_GPU_MCLK:
325 case R600_QUERY_BUFFER_WAIT_TIME:
326 case R600_QUERY_NUM_MAPPED_BUFFERS:
327 case R600_QUERY_NUM_GFX_IBS:
328 case R600_QUERY_NUM_SDMA_IBS:
329 case R600_QUERY_NUM_BYTES_MOVED:
330 case R600_QUERY_NUM_EVICTIONS:
331 case R600_QUERY_NUM_VRAM_CPU_PAGE_FAULTS: {
332 enum radeon_value_id ws_id = winsys_id_from_type(query->b.type);
333 query->end_result = rctx->ws->query_value(rctx->ws, ws_id);
334 break;
335 }
336 case R600_QUERY_GFX_BO_LIST_SIZE:
337 ws_id = winsys_id_from_type(query->b.type);
338 query->end_result = rctx->ws->query_value(rctx->ws, ws_id);
339 query->end_time = rctx->ws->query_value(rctx->ws,
340 RADEON_NUM_GFX_IBS);
341 break;
342 case R600_QUERY_CS_THREAD_BUSY:
343 ws_id = winsys_id_from_type(query->b.type);
344 query->end_result = rctx->ws->query_value(rctx->ws, ws_id);
345 query->end_time = os_time_get_nano();
346 break;
347 case R600_QUERY_GALLIUM_THREAD_BUSY:
348 query->end_result =
349 rctx->tc ? util_queue_get_thread_time_nano(&rctx->tc->queue, 0) : 0;
350 query->end_time = os_time_get_nano();
351 break;
352 case R600_QUERY_GPU_LOAD:
353 case R600_QUERY_GPU_SHADERS_BUSY:
354 case R600_QUERY_GPU_TA_BUSY:
355 case R600_QUERY_GPU_GDS_BUSY:
356 case R600_QUERY_GPU_VGT_BUSY:
357 case R600_QUERY_GPU_IA_BUSY:
358 case R600_QUERY_GPU_SX_BUSY:
359 case R600_QUERY_GPU_WD_BUSY:
360 case R600_QUERY_GPU_BCI_BUSY:
361 case R600_QUERY_GPU_SC_BUSY:
362 case R600_QUERY_GPU_PA_BUSY:
363 case R600_QUERY_GPU_DB_BUSY:
364 case R600_QUERY_GPU_CP_BUSY:
365 case R600_QUERY_GPU_CB_BUSY:
366 case R600_QUERY_GPU_SDMA_BUSY:
367 case R600_QUERY_GPU_PFP_BUSY:
368 case R600_QUERY_GPU_MEQ_BUSY:
369 case R600_QUERY_GPU_ME_BUSY:
370 case R600_QUERY_GPU_SURF_SYNC_BUSY:
371 case R600_QUERY_GPU_DMA_BUSY:
372 case R600_QUERY_GPU_SCRATCH_RAM_BUSY:
373 case R600_QUERY_GPU_CE_BUSY:
374 query->end_result = r600_end_counter(rctx->screen,
375 query->b.type,
376 query->begin_result);
377 query->begin_result = 0;
378 break;
379 case R600_QUERY_NUM_COMPILATIONS:
380 query->end_result = p_atomic_read(&rctx->screen->num_compilations);
381 break;
382 case R600_QUERY_NUM_SHADERS_CREATED:
383 query->end_result = p_atomic_read(&rctx->screen->num_shaders_created);
384 break;
385 case R600_QUERY_BACK_BUFFER_PS_DRAW_RATIO:
386 query->end_result = rctx->last_tex_ps_draw_ratio;
387 break;
388 case R600_QUERY_NUM_SHADER_CACHE_HITS:
389 query->end_result =
390 p_atomic_read(&rctx->screen->num_shader_cache_hits);
391 break;
392 case R600_QUERY_GPIN_ASIC_ID:
393 case R600_QUERY_GPIN_NUM_SIMD:
394 case R600_QUERY_GPIN_NUM_RB:
395 case R600_QUERY_GPIN_NUM_SPI:
396 case R600_QUERY_GPIN_NUM_SE:
397 break;
398 default:
399 unreachable("r600_query_sw_end: bad query type");
400 }
401
402 return true;
403 }
404
405 static bool r600_query_sw_get_result(struct r600_common_context *rctx,
406 struct r600_query *rquery,
407 bool wait,
408 union pipe_query_result *result)
409 {
410 struct r600_query_sw *query = (struct r600_query_sw *)rquery;
411
412 switch (query->b.type) {
413 case PIPE_QUERY_TIMESTAMP_DISJOINT:
414 /* Convert from cycles per millisecond to cycles per second (Hz). */
415 result->timestamp_disjoint.frequency =
416 (uint64_t)rctx->screen->info.clock_crystal_freq * 1000;
417 result->timestamp_disjoint.disjoint = false;
418 return true;
419 case PIPE_QUERY_GPU_FINISHED: {
420 struct pipe_screen *screen = rctx->b.screen;
421 struct pipe_context *ctx = rquery->b.flushed ? NULL : &rctx->b;
422
423 result->b = screen->fence_finish(screen, ctx, query->fence,
424 wait ? PIPE_TIMEOUT_INFINITE : 0);
425 return result->b;
426 }
427
428 case R600_QUERY_GFX_BO_LIST_SIZE:
429 result->u64 = (query->end_result - query->begin_result) /
430 (query->end_time - query->begin_time);
431 return true;
432 case R600_QUERY_CS_THREAD_BUSY:
433 case R600_QUERY_GALLIUM_THREAD_BUSY:
434 result->u64 = (query->end_result - query->begin_result) * 100 /
435 (query->end_time - query->begin_time);
436 return true;
437 case R600_QUERY_GPIN_ASIC_ID:
438 result->u32 = 0;
439 return true;
440 case R600_QUERY_GPIN_NUM_SIMD:
441 result->u32 = rctx->screen->info.num_good_compute_units;
442 return true;
443 case R600_QUERY_GPIN_NUM_RB:
444 result->u32 = rctx->screen->info.num_render_backends;
445 return true;
446 case R600_QUERY_GPIN_NUM_SPI:
447 result->u32 = 1; /* all supported chips have one SPI per SE */
448 return true;
449 case R600_QUERY_GPIN_NUM_SE:
450 result->u32 = rctx->screen->info.max_se;
451 return true;
452 }
453
454 result->u64 = query->end_result - query->begin_result;
455
456 switch (query->b.type) {
457 case R600_QUERY_BUFFER_WAIT_TIME:
458 case R600_QUERY_GPU_TEMPERATURE:
459 result->u64 /= 1000;
460 break;
461 case R600_QUERY_CURRENT_GPU_SCLK:
462 case R600_QUERY_CURRENT_GPU_MCLK:
463 result->u64 *= 1000000;
464 break;
465 }
466
467 return true;
468 }
469
470
471 static struct r600_query_ops sw_query_ops = {
472 .destroy = r600_query_sw_destroy,
473 .begin = r600_query_sw_begin,
474 .end = r600_query_sw_end,
475 .get_result = r600_query_sw_get_result,
476 .get_result_resource = NULL
477 };
478
479 static struct pipe_query *r600_query_sw_create(unsigned query_type)
480 {
481 struct r600_query_sw *query;
482
483 query = CALLOC_STRUCT(r600_query_sw);
484 if (!query)
485 return NULL;
486
487 query->b.type = query_type;
488 query->b.ops = &sw_query_ops;
489
490 return (struct pipe_query *)query;
491 }
492
493 void r600_query_hw_destroy(struct r600_common_screen *rscreen,
494 struct r600_query *rquery)
495 {
496 struct r600_query_hw *query = (struct r600_query_hw *)rquery;
497 struct r600_query_buffer *prev = query->buffer.previous;
498
499 /* Release all query buffers. */
500 while (prev) {
501 struct r600_query_buffer *qbuf = prev;
502 prev = prev->previous;
503 r600_resource_reference(&qbuf->buf, NULL);
504 FREE(qbuf);
505 }
506
507 r600_resource_reference(&query->buffer.buf, NULL);
508 r600_resource_reference(&query->workaround_buf, NULL);
509 FREE(rquery);
510 }
511
512 static struct r600_resource *r600_new_query_buffer(struct r600_common_screen *rscreen,
513 struct r600_query_hw *query)
514 {
515 unsigned buf_size = MAX2(query->result_size,
516 rscreen->info.min_alloc_size);
517
518 /* Queries are normally read by the CPU after
519 * being written by the gpu, hence staging is probably a good
520 * usage pattern.
521 */
522 struct r600_resource *buf = (struct r600_resource*)
523 pipe_buffer_create(&rscreen->b, 0,
524 PIPE_USAGE_STAGING, buf_size);
525 if (!buf)
526 return NULL;
527
528 if (!query->ops->prepare_buffer(rscreen, query, buf)) {
529 r600_resource_reference(&buf, NULL);
530 return NULL;
531 }
532
533 return buf;
534 }
535
536 static bool r600_query_hw_prepare_buffer(struct r600_common_screen *rscreen,
537 struct r600_query_hw *query,
538 struct r600_resource *buffer)
539 {
540 /* Callers ensure that the buffer is currently unused by the GPU. */
541 uint32_t *results = rscreen->ws->buffer_map(buffer->buf, NULL,
542 PIPE_TRANSFER_WRITE |
543 PIPE_TRANSFER_UNSYNCHRONIZED);
544 if (!results)
545 return false;
546
547 memset(results, 0, buffer->b.b.width0);
548
549 if (query->b.type == PIPE_QUERY_OCCLUSION_COUNTER ||
550 query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE) {
551 unsigned max_rbs = rscreen->info.num_render_backends;
552 unsigned enabled_rb_mask = rscreen->info.enabled_rb_mask;
553 unsigned num_results;
554 unsigned i, j;
555
556 /* Set top bits for unused backends. */
557 num_results = buffer->b.b.width0 / query->result_size;
558 for (j = 0; j < num_results; j++) {
559 for (i = 0; i < max_rbs; i++) {
560 if (!(enabled_rb_mask & (1<<i))) {
561 results[(i * 4)+1] = 0x80000000;
562 results[(i * 4)+3] = 0x80000000;
563 }
564 }
565 results += 4 * max_rbs;
566 }
567 }
568
569 return true;
570 }
571
572 static void r600_query_hw_get_result_resource(struct r600_common_context *rctx,
573 struct r600_query *rquery,
574 bool wait,
575 enum pipe_query_value_type result_type,
576 int index,
577 struct pipe_resource *resource,
578 unsigned offset);
579
580 static struct r600_query_ops query_hw_ops = {
581 .destroy = r600_query_hw_destroy,
582 .begin = r600_query_hw_begin,
583 .end = r600_query_hw_end,
584 .get_result = r600_query_hw_get_result,
585 .get_result_resource = r600_query_hw_get_result_resource,
586 };
587
588 static void r600_query_hw_do_emit_start(struct r600_common_context *ctx,
589 struct r600_query_hw *query,
590 struct r600_resource *buffer,
591 uint64_t va);
592 static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx,
593 struct r600_query_hw *query,
594 struct r600_resource *buffer,
595 uint64_t va);
596 static void r600_query_hw_add_result(struct r600_common_screen *rscreen,
597 struct r600_query_hw *, void *buffer,
598 union pipe_query_result *result);
599 static void r600_query_hw_clear_result(struct r600_query_hw *,
600 union pipe_query_result *);
601
602 static struct r600_query_hw_ops query_hw_default_hw_ops = {
603 .prepare_buffer = r600_query_hw_prepare_buffer,
604 .emit_start = r600_query_hw_do_emit_start,
605 .emit_stop = r600_query_hw_do_emit_stop,
606 .clear_result = r600_query_hw_clear_result,
607 .add_result = r600_query_hw_add_result,
608 };
609
610 bool r600_query_hw_init(struct r600_common_screen *rscreen,
611 struct r600_query_hw *query)
612 {
613 query->buffer.buf = r600_new_query_buffer(rscreen, query);
614 if (!query->buffer.buf)
615 return false;
616
617 return true;
618 }
619
620 static struct pipe_query *r600_query_hw_create(struct r600_common_screen *rscreen,
621 unsigned query_type,
622 unsigned index)
623 {
624 struct r600_query_hw *query = CALLOC_STRUCT(r600_query_hw);
625 if (!query)
626 return NULL;
627
628 query->b.type = query_type;
629 query->b.ops = &query_hw_ops;
630 query->ops = &query_hw_default_hw_ops;
631
632 switch (query_type) {
633 case PIPE_QUERY_OCCLUSION_COUNTER:
634 case PIPE_QUERY_OCCLUSION_PREDICATE:
635 query->result_size = 16 * rscreen->info.num_render_backends;
636 query->result_size += 16; /* for the fence + alignment */
637 query->num_cs_dw_begin = 6;
638 query->num_cs_dw_end = 6 + r600_gfx_write_fence_dwords(rscreen);
639 break;
640 case PIPE_QUERY_TIME_ELAPSED:
641 query->result_size = 24;
642 query->num_cs_dw_begin = 8;
643 query->num_cs_dw_end = 8 + r600_gfx_write_fence_dwords(rscreen);
644 break;
645 case PIPE_QUERY_TIMESTAMP:
646 query->result_size = 16;
647 query->num_cs_dw_end = 8 + r600_gfx_write_fence_dwords(rscreen);
648 query->flags = R600_QUERY_HW_FLAG_NO_START;
649 break;
650 case PIPE_QUERY_PRIMITIVES_EMITTED:
651 case PIPE_QUERY_PRIMITIVES_GENERATED:
652 case PIPE_QUERY_SO_STATISTICS:
653 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
654 /* NumPrimitivesWritten, PrimitiveStorageNeeded. */
655 query->result_size = 32;
656 query->num_cs_dw_begin = 6;
657 query->num_cs_dw_end = 6;
658 query->stream = index;
659 break;
660 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
661 /* NumPrimitivesWritten, PrimitiveStorageNeeded. */
662 query->result_size = 32 * R600_MAX_STREAMS;
663 query->num_cs_dw_begin = 6 * R600_MAX_STREAMS;
664 query->num_cs_dw_end = 6 * R600_MAX_STREAMS;
665 break;
666 case PIPE_QUERY_PIPELINE_STATISTICS:
667 /* 11 values on EG, 8 on R600. */
668 query->result_size = (rscreen->chip_class >= EVERGREEN ? 11 : 8) * 16;
669 query->result_size += 8; /* for the fence + alignment */
670 query->num_cs_dw_begin = 6;
671 query->num_cs_dw_end = 6 + r600_gfx_write_fence_dwords(rscreen);
672 break;
673 default:
674 assert(0);
675 FREE(query);
676 return NULL;
677 }
678
679 if (!r600_query_hw_init(rscreen, query)) {
680 FREE(query);
681 return NULL;
682 }
683
684 return (struct pipe_query *)query;
685 }
686
687 static void r600_update_occlusion_query_state(struct r600_common_context *rctx,
688 unsigned type, int diff)
689 {
690 if (type == PIPE_QUERY_OCCLUSION_COUNTER ||
691 type == PIPE_QUERY_OCCLUSION_PREDICATE) {
692 bool old_enable = rctx->num_occlusion_queries != 0;
693 bool old_perfect_enable =
694 rctx->num_perfect_occlusion_queries != 0;
695 bool enable, perfect_enable;
696
697 rctx->num_occlusion_queries += diff;
698 assert(rctx->num_occlusion_queries >= 0);
699
700 if (type == PIPE_QUERY_OCCLUSION_COUNTER) {
701 rctx->num_perfect_occlusion_queries += diff;
702 assert(rctx->num_perfect_occlusion_queries >= 0);
703 }
704
705 enable = rctx->num_occlusion_queries != 0;
706 perfect_enable = rctx->num_perfect_occlusion_queries != 0;
707
708 if (enable != old_enable || perfect_enable != old_perfect_enable) {
709 rctx->set_occlusion_query_state(&rctx->b, enable);
710 }
711 }
712 }
713
714 static unsigned event_type_for_stream(unsigned stream)
715 {
716 switch (stream) {
717 default:
718 case 0: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS;
719 case 1: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS1;
720 case 2: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS2;
721 case 3: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS3;
722 }
723 }
724
725 static void emit_sample_streamout(struct radeon_winsys_cs *cs, uint64_t va,
726 unsigned stream)
727 {
728 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
729 radeon_emit(cs, EVENT_TYPE(event_type_for_stream(stream)) | EVENT_INDEX(3));
730 radeon_emit(cs, va);
731 radeon_emit(cs, va >> 32);
732 }
733
734 static void r600_query_hw_do_emit_start(struct r600_common_context *ctx,
735 struct r600_query_hw *query,
736 struct r600_resource *buffer,
737 uint64_t va)
738 {
739 struct radeon_winsys_cs *cs = ctx->gfx.cs;
740
741 switch (query->b.type) {
742 case PIPE_QUERY_OCCLUSION_COUNTER:
743 case PIPE_QUERY_OCCLUSION_PREDICATE:
744 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
745 radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
746 radeon_emit(cs, va);
747 radeon_emit(cs, va >> 32);
748 break;
749 case PIPE_QUERY_PRIMITIVES_EMITTED:
750 case PIPE_QUERY_PRIMITIVES_GENERATED:
751 case PIPE_QUERY_SO_STATISTICS:
752 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
753 emit_sample_streamout(cs, va, query->stream);
754 break;
755 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
756 for (unsigned stream = 0; stream < R600_MAX_STREAMS; ++stream)
757 emit_sample_streamout(cs, va + 32 * stream, stream);
758 break;
759 case PIPE_QUERY_TIME_ELAPSED:
760 if (ctx->chip_class >= SI) {
761 /* Write the timestamp from the CP not waiting for
762 * outstanding draws (top-of-pipe).
763 */
764 radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
765 radeon_emit(cs, COPY_DATA_COUNT_SEL |
766 COPY_DATA_SRC_SEL(COPY_DATA_TIMESTAMP) |
767 COPY_DATA_DST_SEL(COPY_DATA_MEM_ASYNC));
768 radeon_emit(cs, 0);
769 radeon_emit(cs, 0);
770 radeon_emit(cs, va);
771 radeon_emit(cs, va >> 32);
772 } else {
773 /* Write the timestamp after the last draw is done.
774 * (bottom-of-pipe)
775 */
776 r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS,
777 0, 3, NULL, va, 0, 0);
778 }
779 break;
780 case PIPE_QUERY_PIPELINE_STATISTICS:
781 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
782 radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
783 radeon_emit(cs, va);
784 radeon_emit(cs, va >> 32);
785 break;
786 default:
787 assert(0);
788 }
789 r600_emit_reloc(ctx, &ctx->gfx, query->buffer.buf, RADEON_USAGE_WRITE,
790 RADEON_PRIO_QUERY);
791 }
792
793 static void r600_query_hw_emit_start(struct r600_common_context *ctx,
794 struct r600_query_hw *query)
795 {
796 uint64_t va;
797
798 if (!query->buffer.buf)
799 return; // previous buffer allocation failure
800
801 r600_update_occlusion_query_state(ctx, query->b.type, 1);
802 r600_update_prims_generated_query_state(ctx, query->b.type, 1);
803
804 ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw_begin + query->num_cs_dw_end,
805 true);
806
807 /* Get a new query buffer if needed. */
808 if (query->buffer.results_end + query->result_size > query->buffer.buf->b.b.width0) {
809 struct r600_query_buffer *qbuf = MALLOC_STRUCT(r600_query_buffer);
810 *qbuf = query->buffer;
811 query->buffer.results_end = 0;
812 query->buffer.previous = qbuf;
813 query->buffer.buf = r600_new_query_buffer(ctx->screen, query);
814 if (!query->buffer.buf)
815 return;
816 }
817
818 /* emit begin query */
819 va = query->buffer.buf->gpu_address + query->buffer.results_end;
820
821 query->ops->emit_start(ctx, query, query->buffer.buf, va);
822
823 ctx->num_cs_dw_queries_suspend += query->num_cs_dw_end;
824 }
825
826 static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx,
827 struct r600_query_hw *query,
828 struct r600_resource *buffer,
829 uint64_t va)
830 {
831 struct radeon_winsys_cs *cs = ctx->gfx.cs;
832 uint64_t fence_va = 0;
833
834 switch (query->b.type) {
835 case PIPE_QUERY_OCCLUSION_COUNTER:
836 case PIPE_QUERY_OCCLUSION_PREDICATE:
837 va += 8;
838 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
839 radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
840 radeon_emit(cs, va);
841 radeon_emit(cs, va >> 32);
842
843 fence_va = va + ctx->screen->info.num_render_backends * 16 - 8;
844 break;
845 case PIPE_QUERY_PRIMITIVES_EMITTED:
846 case PIPE_QUERY_PRIMITIVES_GENERATED:
847 case PIPE_QUERY_SO_STATISTICS:
848 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
849 va += 16;
850 emit_sample_streamout(cs, va, query->stream);
851 break;
852 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
853 va += 16;
854 for (unsigned stream = 0; stream < R600_MAX_STREAMS; ++stream)
855 emit_sample_streamout(cs, va + 32 * stream, stream);
856 break;
857 case PIPE_QUERY_TIME_ELAPSED:
858 va += 8;
859 /* fall through */
860 case PIPE_QUERY_TIMESTAMP:
861 r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS,
862 0, 3, NULL, va, 0, 0);
863 fence_va = va + 8;
864 break;
865 case PIPE_QUERY_PIPELINE_STATISTICS: {
866 unsigned sample_size = (query->result_size - 8) / 2;
867
868 va += sample_size;
869 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
870 radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
871 radeon_emit(cs, va);
872 radeon_emit(cs, va >> 32);
873
874 fence_va = va + sample_size;
875 break;
876 }
877 default:
878 assert(0);
879 }
880 r600_emit_reloc(ctx, &ctx->gfx, query->buffer.buf, RADEON_USAGE_WRITE,
881 RADEON_PRIO_QUERY);
882
883 if (fence_va)
884 r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS, 0, 1,
885 query->buffer.buf, fence_va, 0, 0x80000000);
886 }
887
888 static void r600_query_hw_emit_stop(struct r600_common_context *ctx,
889 struct r600_query_hw *query)
890 {
891 uint64_t va;
892
893 if (!query->buffer.buf)
894 return; // previous buffer allocation failure
895
896 /* The queries which need begin already called this in begin_query. */
897 if (query->flags & R600_QUERY_HW_FLAG_NO_START) {
898 ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw_end, false);
899 }
900
901 /* emit end query */
902 va = query->buffer.buf->gpu_address + query->buffer.results_end;
903
904 query->ops->emit_stop(ctx, query, query->buffer.buf, va);
905
906 query->buffer.results_end += query->result_size;
907
908 if (!(query->flags & R600_QUERY_HW_FLAG_NO_START))
909 ctx->num_cs_dw_queries_suspend -= query->num_cs_dw_end;
910
911 r600_update_occlusion_query_state(ctx, query->b.type, -1);
912 r600_update_prims_generated_query_state(ctx, query->b.type, -1);
913 }
914
915 static void emit_set_predicate(struct r600_common_context *ctx,
916 struct r600_resource *buf, uint64_t va,
917 uint32_t op)
918 {
919 struct radeon_winsys_cs *cs = ctx->gfx.cs;
920
921 if (ctx->chip_class >= GFX9) {
922 radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 2, 0));
923 radeon_emit(cs, op);
924 radeon_emit(cs, va);
925 radeon_emit(cs, va >> 32);
926 } else {
927 radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 1, 0));
928 radeon_emit(cs, va);
929 radeon_emit(cs, op | ((va >> 32) & 0xFF));
930 }
931 r600_emit_reloc(ctx, &ctx->gfx, buf, RADEON_USAGE_READ,
932 RADEON_PRIO_QUERY);
933 }
934
935 static void r600_emit_query_predication(struct r600_common_context *ctx,
936 struct r600_atom *atom)
937 {
938 struct r600_query_hw *query = (struct r600_query_hw *)ctx->render_cond;
939 struct r600_query_buffer *qbuf;
940 uint32_t op;
941 bool flag_wait, invert;
942
943 if (!query)
944 return;
945
946 invert = ctx->render_cond_invert;
947 flag_wait = ctx->render_cond_mode == PIPE_RENDER_COND_WAIT ||
948 ctx->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT;
949
950 if (query->workaround_buf) {
951 op = PRED_OP(PREDICATION_OP_BOOL64);
952 } else {
953 switch (query->b.type) {
954 case PIPE_QUERY_OCCLUSION_COUNTER:
955 case PIPE_QUERY_OCCLUSION_PREDICATE:
956 op = PRED_OP(PREDICATION_OP_ZPASS);
957 break;
958 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
959 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
960 op = PRED_OP(PREDICATION_OP_PRIMCOUNT);
961 invert = !invert;
962 break;
963 default:
964 assert(0);
965 return;
966 }
967 }
968
969 /* if true then invert, see GL_ARB_conditional_render_inverted */
970 if (invert)
971 op |= PREDICATION_DRAW_NOT_VISIBLE; /* Draw if not visible or overflow */
972 else
973 op |= PREDICATION_DRAW_VISIBLE; /* Draw if visible or no overflow */
974
975 /* Use the value written by compute shader as a workaround. Note that
976 * the wait flag does not apply in this predication mode.
977 *
978 * The shader outputs the result value to L2. Workarounds only affect VI
979 * and later, where the CP reads data from L2, so we don't need an
980 * additional flush.
981 */
982 if (query->workaround_buf) {
983 uint64_t va = query->workaround_buf->gpu_address + query->workaround_offset;
984 emit_set_predicate(ctx, query->workaround_buf, va, op);
985 return;
986 }
987
988 op |= flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW;
989
990 /* emit predicate packets for all data blocks */
991 for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
992 unsigned results_base = 0;
993 uint64_t va_base = qbuf->buf->gpu_address;
994
995 while (results_base < qbuf->results_end) {
996 uint64_t va = va_base + results_base;
997
998 if (query->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
999 for (unsigned stream = 0; stream < R600_MAX_STREAMS; ++stream) {
1000 emit_set_predicate(ctx, qbuf->buf, va + 32 * stream, op);
1001
1002 /* set CONTINUE bit for all packets except the first */
1003 op |= PREDICATION_CONTINUE;
1004 }
1005 } else {
1006 emit_set_predicate(ctx, qbuf->buf, va, op);
1007 op |= PREDICATION_CONTINUE;
1008 }
1009
1010 results_base += query->result_size;
1011 }
1012 }
1013 }
1014
1015 static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index)
1016 {
1017 struct r600_common_screen *rscreen =
1018 (struct r600_common_screen *)ctx->screen;
1019
1020 if (query_type == PIPE_QUERY_TIMESTAMP_DISJOINT ||
1021 query_type == PIPE_QUERY_GPU_FINISHED ||
1022 query_type >= PIPE_QUERY_DRIVER_SPECIFIC)
1023 return r600_query_sw_create(query_type);
1024
1025 return r600_query_hw_create(rscreen, query_type, index);
1026 }
1027
1028 static void r600_destroy_query(struct pipe_context *ctx, struct pipe_query *query)
1029 {
1030 struct r600_common_context *rctx = (struct r600_common_context *)ctx;
1031 struct r600_query *rquery = (struct r600_query *)query;
1032
1033 rquery->ops->destroy(rctx->screen, rquery);
1034 }
1035
1036 static boolean r600_begin_query(struct pipe_context *ctx,
1037 struct pipe_query *query)
1038 {
1039 struct r600_common_context *rctx = (struct r600_common_context *)ctx;
1040 struct r600_query *rquery = (struct r600_query *)query;
1041
1042 return rquery->ops->begin(rctx, rquery);
1043 }
1044
1045 void r600_query_hw_reset_buffers(struct r600_common_context *rctx,
1046 struct r600_query_hw *query)
1047 {
1048 struct r600_query_buffer *prev = query->buffer.previous;
1049
1050 /* Discard the old query buffers. */
1051 while (prev) {
1052 struct r600_query_buffer *qbuf = prev;
1053 prev = prev->previous;
1054 r600_resource_reference(&qbuf->buf, NULL);
1055 FREE(qbuf);
1056 }
1057
1058 query->buffer.results_end = 0;
1059 query->buffer.previous = NULL;
1060
1061 /* Obtain a new buffer if the current one can't be mapped without a stall. */
1062 if (r600_rings_is_buffer_referenced(rctx, query->buffer.buf->buf, RADEON_USAGE_READWRITE) ||
1063 !rctx->ws->buffer_wait(query->buffer.buf->buf, 0, RADEON_USAGE_READWRITE)) {
1064 r600_resource_reference(&query->buffer.buf, NULL);
1065 query->buffer.buf = r600_new_query_buffer(rctx->screen, query);
1066 } else {
1067 if (!query->ops->prepare_buffer(rctx->screen, query, query->buffer.buf))
1068 r600_resource_reference(&query->buffer.buf, NULL);
1069 }
1070 }
1071
1072 bool r600_query_hw_begin(struct r600_common_context *rctx,
1073 struct r600_query *rquery)
1074 {
1075 struct r600_query_hw *query = (struct r600_query_hw *)rquery;
1076
1077 if (query->flags & R600_QUERY_HW_FLAG_NO_START) {
1078 assert(0);
1079 return false;
1080 }
1081
1082 if (!(query->flags & R600_QUERY_HW_FLAG_BEGIN_RESUMES))
1083 r600_query_hw_reset_buffers(rctx, query);
1084
1085 r600_resource_reference(&query->workaround_buf, NULL);
1086
1087 r600_query_hw_emit_start(rctx, query);
1088 if (!query->buffer.buf)
1089 return false;
1090
1091 LIST_ADDTAIL(&query->list, &rctx->active_queries);
1092 return true;
1093 }
1094
1095 static bool r600_end_query(struct pipe_context *ctx, struct pipe_query *query)
1096 {
1097 struct r600_common_context *rctx = (struct r600_common_context *)ctx;
1098 struct r600_query *rquery = (struct r600_query *)query;
1099
1100 return rquery->ops->end(rctx, rquery);
1101 }
1102
1103 bool r600_query_hw_end(struct r600_common_context *rctx,
1104 struct r600_query *rquery)
1105 {
1106 struct r600_query_hw *query = (struct r600_query_hw *)rquery;
1107
1108 if (query->flags & R600_QUERY_HW_FLAG_NO_START)
1109 r600_query_hw_reset_buffers(rctx, query);
1110
1111 r600_query_hw_emit_stop(rctx, query);
1112
1113 if (!(query->flags & R600_QUERY_HW_FLAG_NO_START))
1114 LIST_DELINIT(&query->list);
1115
1116 if (!query->buffer.buf)
1117 return false;
1118
1119 return true;
1120 }
1121
1122 static void r600_get_hw_query_params(struct r600_common_context *rctx,
1123 struct r600_query_hw *rquery, int index,
1124 struct r600_hw_query_params *params)
1125 {
1126 unsigned max_rbs = rctx->screen->info.num_render_backends;
1127
1128 params->pair_stride = 0;
1129 params->pair_count = 1;
1130
1131 switch (rquery->b.type) {
1132 case PIPE_QUERY_OCCLUSION_COUNTER:
1133 case PIPE_QUERY_OCCLUSION_PREDICATE:
1134 params->start_offset = 0;
1135 params->end_offset = 8;
1136 params->fence_offset = max_rbs * 16;
1137 params->pair_stride = 16;
1138 params->pair_count = max_rbs;
1139 break;
1140 case PIPE_QUERY_TIME_ELAPSED:
1141 params->start_offset = 0;
1142 params->end_offset = 8;
1143 params->fence_offset = 16;
1144 break;
1145 case PIPE_QUERY_TIMESTAMP:
1146 params->start_offset = 0;
1147 params->end_offset = 0;
1148 params->fence_offset = 8;
1149 break;
1150 case PIPE_QUERY_PRIMITIVES_EMITTED:
1151 params->start_offset = 8;
1152 params->end_offset = 24;
1153 params->fence_offset = params->end_offset + 4;
1154 break;
1155 case PIPE_QUERY_PRIMITIVES_GENERATED:
1156 params->start_offset = 0;
1157 params->end_offset = 16;
1158 params->fence_offset = params->end_offset + 4;
1159 break;
1160 case PIPE_QUERY_SO_STATISTICS:
1161 params->start_offset = 8 - index * 8;
1162 params->end_offset = 24 - index * 8;
1163 params->fence_offset = params->end_offset + 4;
1164 break;
1165 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
1166 params->pair_count = R600_MAX_STREAMS;
1167 params->pair_stride = 32;
1168 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
1169 params->start_offset = 0;
1170 params->end_offset = 16;
1171
1172 /* We can re-use the high dword of the last 64-bit value as a
1173 * fence: it is initialized as 0, and the high bit is set by
1174 * the write of the streamout stats event.
1175 */
1176 params->fence_offset = rquery->result_size - 4;
1177 break;
1178 case PIPE_QUERY_PIPELINE_STATISTICS:
1179 {
1180 /* Offsets apply to EG+ */
1181 static const unsigned offsets[] = {56, 48, 24, 32, 40, 16, 8, 0, 64, 72, 80};
1182 params->start_offset = offsets[index];
1183 params->end_offset = 88 + offsets[index];
1184 params->fence_offset = 2 * 88;
1185 break;
1186 }
1187 default:
1188 unreachable("r600_get_hw_query_params unsupported");
1189 }
1190 }
1191
1192 static unsigned r600_query_read_result(void *map, unsigned start_index, unsigned end_index,
1193 bool test_status_bit)
1194 {
1195 uint32_t *current_result = (uint32_t*)map;
1196 uint64_t start, end;
1197
1198 start = (uint64_t)current_result[start_index] |
1199 (uint64_t)current_result[start_index+1] << 32;
1200 end = (uint64_t)current_result[end_index] |
1201 (uint64_t)current_result[end_index+1] << 32;
1202
1203 if (!test_status_bit ||
1204 ((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL))) {
1205 return end - start;
1206 }
1207 return 0;
1208 }
1209
1210 static void r600_query_hw_add_result(struct r600_common_screen *rscreen,
1211 struct r600_query_hw *query,
1212 void *buffer,
1213 union pipe_query_result *result)
1214 {
1215 unsigned max_rbs = rscreen->info.num_render_backends;
1216
1217 switch (query->b.type) {
1218 case PIPE_QUERY_OCCLUSION_COUNTER: {
1219 for (unsigned i = 0; i < max_rbs; ++i) {
1220 unsigned results_base = i * 16;
1221 result->u64 +=
1222 r600_query_read_result(buffer + results_base, 0, 2, true);
1223 }
1224 break;
1225 }
1226 case PIPE_QUERY_OCCLUSION_PREDICATE: {
1227 for (unsigned i = 0; i < max_rbs; ++i) {
1228 unsigned results_base = i * 16;
1229 result->b = result->b ||
1230 r600_query_read_result(buffer + results_base, 0, 2, true) != 0;
1231 }
1232 break;
1233 }
1234 case PIPE_QUERY_TIME_ELAPSED:
1235 result->u64 += r600_query_read_result(buffer, 0, 2, false);
1236 break;
1237 case PIPE_QUERY_TIMESTAMP:
1238 result->u64 = *(uint64_t*)buffer;
1239 break;
1240 case PIPE_QUERY_PRIMITIVES_EMITTED:
1241 /* SAMPLE_STREAMOUTSTATS stores this structure:
1242 * {
1243 * u64 NumPrimitivesWritten;
1244 * u64 PrimitiveStorageNeeded;
1245 * }
1246 * We only need NumPrimitivesWritten here. */
1247 result->u64 += r600_query_read_result(buffer, 2, 6, true);
1248 break;
1249 case PIPE_QUERY_PRIMITIVES_GENERATED:
1250 /* Here we read PrimitiveStorageNeeded. */
1251 result->u64 += r600_query_read_result(buffer, 0, 4, true);
1252 break;
1253 case PIPE_QUERY_SO_STATISTICS:
1254 result->so_statistics.num_primitives_written +=
1255 r600_query_read_result(buffer, 2, 6, true);
1256 result->so_statistics.primitives_storage_needed +=
1257 r600_query_read_result(buffer, 0, 4, true);
1258 break;
1259 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
1260 result->b = result->b ||
1261 r600_query_read_result(buffer, 2, 6, true) !=
1262 r600_query_read_result(buffer, 0, 4, true);
1263 break;
1264 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
1265 for (unsigned stream = 0; stream < R600_MAX_STREAMS; ++stream) {
1266 result->b = result->b ||
1267 r600_query_read_result(buffer, 2, 6, true) !=
1268 r600_query_read_result(buffer, 0, 4, true);
1269 buffer = (char *)buffer + 32;
1270 }
1271 break;
1272 case PIPE_QUERY_PIPELINE_STATISTICS:
1273 if (rscreen->chip_class >= EVERGREEN) {
1274 result->pipeline_statistics.ps_invocations +=
1275 r600_query_read_result(buffer, 0, 22, false);
1276 result->pipeline_statistics.c_primitives +=
1277 r600_query_read_result(buffer, 2, 24, false);
1278 result->pipeline_statistics.c_invocations +=
1279 r600_query_read_result(buffer, 4, 26, false);
1280 result->pipeline_statistics.vs_invocations +=
1281 r600_query_read_result(buffer, 6, 28, false);
1282 result->pipeline_statistics.gs_invocations +=
1283 r600_query_read_result(buffer, 8, 30, false);
1284 result->pipeline_statistics.gs_primitives +=
1285 r600_query_read_result(buffer, 10, 32, false);
1286 result->pipeline_statistics.ia_primitives +=
1287 r600_query_read_result(buffer, 12, 34, false);
1288 result->pipeline_statistics.ia_vertices +=
1289 r600_query_read_result(buffer, 14, 36, false);
1290 result->pipeline_statistics.hs_invocations +=
1291 r600_query_read_result(buffer, 16, 38, false);
1292 result->pipeline_statistics.ds_invocations +=
1293 r600_query_read_result(buffer, 18, 40, false);
1294 result->pipeline_statistics.cs_invocations +=
1295 r600_query_read_result(buffer, 20, 42, false);
1296 } else {
1297 result->pipeline_statistics.ps_invocations +=
1298 r600_query_read_result(buffer, 0, 16, false);
1299 result->pipeline_statistics.c_primitives +=
1300 r600_query_read_result(buffer, 2, 18, false);
1301 result->pipeline_statistics.c_invocations +=
1302 r600_query_read_result(buffer, 4, 20, false);
1303 result->pipeline_statistics.vs_invocations +=
1304 r600_query_read_result(buffer, 6, 22, false);
1305 result->pipeline_statistics.gs_invocations +=
1306 r600_query_read_result(buffer, 8, 24, false);
1307 result->pipeline_statistics.gs_primitives +=
1308 r600_query_read_result(buffer, 10, 26, false);
1309 result->pipeline_statistics.ia_primitives +=
1310 r600_query_read_result(buffer, 12, 28, false);
1311 result->pipeline_statistics.ia_vertices +=
1312 r600_query_read_result(buffer, 14, 30, false);
1313 }
1314 #if 0 /* for testing */
1315 printf("Pipeline stats: IA verts=%llu, IA prims=%llu, VS=%llu, HS=%llu, "
1316 "DS=%llu, GS=%llu, GS prims=%llu, Clipper=%llu, "
1317 "Clipper prims=%llu, PS=%llu, CS=%llu\n",
1318 result->pipeline_statistics.ia_vertices,
1319 result->pipeline_statistics.ia_primitives,
1320 result->pipeline_statistics.vs_invocations,
1321 result->pipeline_statistics.hs_invocations,
1322 result->pipeline_statistics.ds_invocations,
1323 result->pipeline_statistics.gs_invocations,
1324 result->pipeline_statistics.gs_primitives,
1325 result->pipeline_statistics.c_invocations,
1326 result->pipeline_statistics.c_primitives,
1327 result->pipeline_statistics.ps_invocations,
1328 result->pipeline_statistics.cs_invocations);
1329 #endif
1330 break;
1331 default:
1332 assert(0);
1333 }
1334 }
1335
1336 static boolean r600_get_query_result(struct pipe_context *ctx,
1337 struct pipe_query *query, boolean wait,
1338 union pipe_query_result *result)
1339 {
1340 struct r600_common_context *rctx = (struct r600_common_context *)ctx;
1341 struct r600_query *rquery = (struct r600_query *)query;
1342
1343 return rquery->ops->get_result(rctx, rquery, wait, result);
1344 }
1345
1346 static void r600_get_query_result_resource(struct pipe_context *ctx,
1347 struct pipe_query *query,
1348 boolean wait,
1349 enum pipe_query_value_type result_type,
1350 int index,
1351 struct pipe_resource *resource,
1352 unsigned offset)
1353 {
1354 struct r600_common_context *rctx = (struct r600_common_context *)ctx;
1355 struct r600_query *rquery = (struct r600_query *)query;
1356
1357 rquery->ops->get_result_resource(rctx, rquery, wait, result_type, index,
1358 resource, offset);
1359 }
1360
1361 static void r600_query_hw_clear_result(struct r600_query_hw *query,
1362 union pipe_query_result *result)
1363 {
1364 util_query_clear_result(result, query->b.type);
1365 }
1366
1367 bool r600_query_hw_get_result(struct r600_common_context *rctx,
1368 struct r600_query *rquery,
1369 bool wait, union pipe_query_result *result)
1370 {
1371 struct r600_common_screen *rscreen = rctx->screen;
1372 struct r600_query_hw *query = (struct r600_query_hw *)rquery;
1373 struct r600_query_buffer *qbuf;
1374
1375 query->ops->clear_result(query, result);
1376
1377 for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
1378 unsigned usage = PIPE_TRANSFER_READ |
1379 (wait ? 0 : PIPE_TRANSFER_DONTBLOCK);
1380 unsigned results_base = 0;
1381 void *map;
1382
1383 if (rquery->b.flushed)
1384 map = rctx->ws->buffer_map(qbuf->buf->buf, NULL, usage);
1385 else
1386 map = r600_buffer_map_sync_with_rings(rctx, qbuf->buf, usage);
1387
1388 if (!map)
1389 return false;
1390
1391 while (results_base != qbuf->results_end) {
1392 query->ops->add_result(rscreen, query, map + results_base,
1393 result);
1394 results_base += query->result_size;
1395 }
1396 }
1397
1398 /* Convert the time to expected units. */
1399 if (rquery->type == PIPE_QUERY_TIME_ELAPSED ||
1400 rquery->type == PIPE_QUERY_TIMESTAMP) {
1401 result->u64 = (1000000 * result->u64) / rscreen->info.clock_crystal_freq;
1402 }
1403 return true;
1404 }
1405
1406 /* Create the compute shader that is used to collect the results.
1407 *
1408 * One compute grid with a single thread is launched for every query result
1409 * buffer. The thread (optionally) reads a previous summary buffer, then
1410 * accumulates data from the query result buffer, and writes the result either
1411 * to a summary buffer to be consumed by the next grid invocation or to the
1412 * user-supplied buffer.
1413 *
1414 * Data layout:
1415 *
1416 * CONST
1417 * 0.x = end_offset
1418 * 0.y = result_stride
1419 * 0.z = result_count
1420 * 0.w = bit field:
1421 * 1: read previously accumulated values
1422 * 2: write accumulated values for chaining
1423 * 4: write result available
1424 * 8: convert result to boolean (0/1)
1425 * 16: only read one dword and use that as result
1426 * 32: apply timestamp conversion
1427 * 64: store full 64 bits result
1428 * 128: store signed 32 bits result
1429 * 256: SO_OVERFLOW mode: take the difference of two successive half-pairs
1430 * 1.x = fence_offset
1431 * 1.y = pair_stride
1432 * 1.z = pair_count
1433 *
1434 * BUFFER[0] = query result buffer
1435 * BUFFER[1] = previous summary buffer
1436 * BUFFER[2] = next summary buffer or user-supplied buffer
1437 */
1438 static void r600_create_query_result_shader(struct r600_common_context *rctx)
1439 {
1440 /* TEMP[0].xy = accumulated result so far
1441 * TEMP[0].z = result not available
1442 *
1443 * TEMP[1].x = current result index
1444 * TEMP[1].y = current pair index
1445 */
1446 static const char text_tmpl[] =
1447 "COMP\n"
1448 "PROPERTY CS_FIXED_BLOCK_WIDTH 1\n"
1449 "PROPERTY CS_FIXED_BLOCK_HEIGHT 1\n"
1450 "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
1451 "DCL BUFFER[0]\n"
1452 "DCL BUFFER[1]\n"
1453 "DCL BUFFER[2]\n"
1454 "DCL CONST[0..1]\n"
1455 "DCL TEMP[0..5]\n"
1456 "IMM[0] UINT32 {0, 31, 2147483647, 4294967295}\n"
1457 "IMM[1] UINT32 {1, 2, 4, 8}\n"
1458 "IMM[2] UINT32 {16, 32, 64, 128}\n"
1459 "IMM[3] UINT32 {1000000, 0, %u, 0}\n" /* for timestamp conversion */
1460 "IMM[4] UINT32 {256, 0, 0, 0}\n"
1461
1462 "AND TEMP[5], CONST[0].wwww, IMM[2].xxxx\n"
1463 "UIF TEMP[5]\n"
1464 /* Check result availability. */
1465 "LOAD TEMP[1].x, BUFFER[0], CONST[1].xxxx\n"
1466 "ISHR TEMP[0].z, TEMP[1].xxxx, IMM[0].yyyy\n"
1467 "MOV TEMP[1], TEMP[0].zzzz\n"
1468 "NOT TEMP[0].z, TEMP[0].zzzz\n"
1469
1470 /* Load result if available. */
1471 "UIF TEMP[1]\n"
1472 "LOAD TEMP[0].xy, BUFFER[0], IMM[0].xxxx\n"
1473 "ENDIF\n"
1474 "ELSE\n"
1475 /* Load previously accumulated result if requested. */
1476 "MOV TEMP[0], IMM[0].xxxx\n"
1477 "AND TEMP[4], CONST[0].wwww, IMM[1].xxxx\n"
1478 "UIF TEMP[4]\n"
1479 "LOAD TEMP[0].xyz, BUFFER[1], IMM[0].xxxx\n"
1480 "ENDIF\n"
1481
1482 "MOV TEMP[1].x, IMM[0].xxxx\n"
1483 "BGNLOOP\n"
1484 /* Break if accumulated result so far is not available. */
1485 "UIF TEMP[0].zzzz\n"
1486 "BRK\n"
1487 "ENDIF\n"
1488
1489 /* Break if result_index >= result_count. */
1490 "USGE TEMP[5], TEMP[1].xxxx, CONST[0].zzzz\n"
1491 "UIF TEMP[5]\n"
1492 "BRK\n"
1493 "ENDIF\n"
1494
1495 /* Load fence and check result availability */
1496 "UMAD TEMP[5].x, TEMP[1].xxxx, CONST[0].yyyy, CONST[1].xxxx\n"
1497 "LOAD TEMP[5].x, BUFFER[0], TEMP[5].xxxx\n"
1498 "ISHR TEMP[0].z, TEMP[5].xxxx, IMM[0].yyyy\n"
1499 "NOT TEMP[0].z, TEMP[0].zzzz\n"
1500 "UIF TEMP[0].zzzz\n"
1501 "BRK\n"
1502 "ENDIF\n"
1503
1504 "MOV TEMP[1].y, IMM[0].xxxx\n"
1505 "BGNLOOP\n"
1506 /* Load start and end. */
1507 "UMUL TEMP[5].x, TEMP[1].xxxx, CONST[0].yyyy\n"
1508 "UMAD TEMP[5].x, TEMP[1].yyyy, CONST[1].yyyy, TEMP[5].xxxx\n"
1509 "LOAD TEMP[2].xy, BUFFER[0], TEMP[5].xxxx\n"
1510
1511 "UADD TEMP[5].y, TEMP[5].xxxx, CONST[0].xxxx\n"
1512 "LOAD TEMP[3].xy, BUFFER[0], TEMP[5].yyyy\n"
1513
1514 "U64ADD TEMP[4].xy, TEMP[3], -TEMP[2]\n"
1515
1516 "AND TEMP[5].z, CONST[0].wwww, IMM[4].xxxx\n"
1517 "UIF TEMP[5].zzzz\n"
1518 /* Load second start/end half-pair and
1519 * take the difference
1520 */
1521 "UADD TEMP[5].xy, TEMP[5], IMM[1].wwww\n"
1522 "LOAD TEMP[2].xy, BUFFER[0], TEMP[5].xxxx\n"
1523 "LOAD TEMP[3].xy, BUFFER[0], TEMP[5].yyyy\n"
1524
1525 "U64ADD TEMP[3].xy, TEMP[3], -TEMP[2]\n"
1526 "U64ADD TEMP[4].xy, TEMP[4], -TEMP[3]\n"
1527 "ENDIF\n"
1528
1529 "U64ADD TEMP[0].xy, TEMP[0], TEMP[4]\n"
1530
1531 /* Increment pair index */
1532 "UADD TEMP[1].y, TEMP[1].yyyy, IMM[1].xxxx\n"
1533 "USGE TEMP[5], TEMP[1].yyyy, CONST[1].zzzz\n"
1534 "UIF TEMP[5]\n"
1535 "BRK\n"
1536 "ENDIF\n"
1537 "ENDLOOP\n"
1538
1539 /* Increment result index */
1540 "UADD TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx\n"
1541 "ENDLOOP\n"
1542 "ENDIF\n"
1543
1544 "AND TEMP[4], CONST[0].wwww, IMM[1].yyyy\n"
1545 "UIF TEMP[4]\n"
1546 /* Store accumulated data for chaining. */
1547 "STORE BUFFER[2].xyz, IMM[0].xxxx, TEMP[0]\n"
1548 "ELSE\n"
1549 "AND TEMP[4], CONST[0].wwww, IMM[1].zzzz\n"
1550 "UIF TEMP[4]\n"
1551 /* Store result availability. */
1552 "NOT TEMP[0].z, TEMP[0]\n"
1553 "AND TEMP[0].z, TEMP[0].zzzz, IMM[1].xxxx\n"
1554 "STORE BUFFER[2].x, IMM[0].xxxx, TEMP[0].zzzz\n"
1555
1556 "AND TEMP[4], CONST[0].wwww, IMM[2].zzzz\n"
1557 "UIF TEMP[4]\n"
1558 "STORE BUFFER[2].y, IMM[0].xxxx, IMM[0].xxxx\n"
1559 "ENDIF\n"
1560 "ELSE\n"
1561 /* Store result if it is available. */
1562 "NOT TEMP[4], TEMP[0].zzzz\n"
1563 "UIF TEMP[4]\n"
1564 /* Apply timestamp conversion */
1565 "AND TEMP[4], CONST[0].wwww, IMM[2].yyyy\n"
1566 "UIF TEMP[4]\n"
1567 "U64MUL TEMP[0].xy, TEMP[0], IMM[3].xyxy\n"
1568 "U64DIV TEMP[0].xy, TEMP[0], IMM[3].zwzw\n"
1569 "ENDIF\n"
1570
1571 /* Convert to boolean */
1572 "AND TEMP[4], CONST[0].wwww, IMM[1].wwww\n"
1573 "UIF TEMP[4]\n"
1574 "U64SNE TEMP[0].x, TEMP[0].xyxy, IMM[4].zwzw\n"
1575 "AND TEMP[0].x, TEMP[0].xxxx, IMM[1].xxxx\n"
1576 "MOV TEMP[0].y, IMM[0].xxxx\n"
1577 "ENDIF\n"
1578
1579 "AND TEMP[4], CONST[0].wwww, IMM[2].zzzz\n"
1580 "UIF TEMP[4]\n"
1581 "STORE BUFFER[2].xy, IMM[0].xxxx, TEMP[0].xyxy\n"
1582 "ELSE\n"
1583 /* Clamping */
1584 "UIF TEMP[0].yyyy\n"
1585 "MOV TEMP[0].x, IMM[0].wwww\n"
1586 "ENDIF\n"
1587
1588 "AND TEMP[4], CONST[0].wwww, IMM[2].wwww\n"
1589 "UIF TEMP[4]\n"
1590 "UMIN TEMP[0].x, TEMP[0].xxxx, IMM[0].zzzz\n"
1591 "ENDIF\n"
1592
1593 "STORE BUFFER[2].x, IMM[0].xxxx, TEMP[0].xxxx\n"
1594 "ENDIF\n"
1595 "ENDIF\n"
1596 "ENDIF\n"
1597 "ENDIF\n"
1598
1599 "END\n";
1600
1601 char text[sizeof(text_tmpl) + 32];
1602 struct tgsi_token tokens[1024];
1603 struct pipe_compute_state state = {};
1604
1605 /* Hard code the frequency into the shader so that the backend can
1606 * use the full range of optimizations for divide-by-constant.
1607 */
1608 snprintf(text, sizeof(text), text_tmpl,
1609 rctx->screen->info.clock_crystal_freq);
1610
1611 if (!tgsi_text_translate(text, tokens, ARRAY_SIZE(tokens))) {
1612 assert(false);
1613 return;
1614 }
1615
1616 state.ir_type = PIPE_SHADER_IR_TGSI;
1617 state.prog = tokens;
1618
1619 rctx->query_result_shader = rctx->b.create_compute_state(&rctx->b, &state);
1620 }
1621
1622 static void r600_restore_qbo_state(struct r600_common_context *rctx,
1623 struct r600_qbo_state *st)
1624 {
1625 rctx->b.bind_compute_state(&rctx->b, st->saved_compute);
1626
1627 rctx->b.set_constant_buffer(&rctx->b, PIPE_SHADER_COMPUTE, 0, &st->saved_const0);
1628 pipe_resource_reference(&st->saved_const0.buffer, NULL);
1629
1630 rctx->b.set_shader_buffers(&rctx->b, PIPE_SHADER_COMPUTE, 0, 3, st->saved_ssbo);
1631 for (unsigned i = 0; i < 3; ++i)
1632 pipe_resource_reference(&st->saved_ssbo[i].buffer, NULL);
1633 }
1634
1635 static void r600_query_hw_get_result_resource(struct r600_common_context *rctx,
1636 struct r600_query *rquery,
1637 bool wait,
1638 enum pipe_query_value_type result_type,
1639 int index,
1640 struct pipe_resource *resource,
1641 unsigned offset)
1642 {
1643 struct r600_query_hw *query = (struct r600_query_hw *)rquery;
1644 struct r600_query_buffer *qbuf;
1645 struct r600_query_buffer *qbuf_prev;
1646 struct pipe_resource *tmp_buffer = NULL;
1647 unsigned tmp_buffer_offset = 0;
1648 struct r600_qbo_state saved_state = {};
1649 struct pipe_grid_info grid = {};
1650 struct pipe_constant_buffer constant_buffer = {};
1651 struct pipe_shader_buffer ssbo[3];
1652 struct r600_hw_query_params params;
1653 struct {
1654 uint32_t end_offset;
1655 uint32_t result_stride;
1656 uint32_t result_count;
1657 uint32_t config;
1658 uint32_t fence_offset;
1659 uint32_t pair_stride;
1660 uint32_t pair_count;
1661 } consts;
1662
1663 if (!rctx->query_result_shader) {
1664 r600_create_query_result_shader(rctx);
1665 if (!rctx->query_result_shader)
1666 return;
1667 }
1668
1669 if (query->buffer.previous) {
1670 u_suballocator_alloc(rctx->allocator_zeroed_memory, 16, 16,
1671 &tmp_buffer_offset, &tmp_buffer);
1672 if (!tmp_buffer)
1673 return;
1674 }
1675
1676 rctx->save_qbo_state(&rctx->b, &saved_state);
1677
1678 r600_get_hw_query_params(rctx, query, index >= 0 ? index : 0, &params);
1679 consts.end_offset = params.end_offset - params.start_offset;
1680 consts.fence_offset = params.fence_offset - params.start_offset;
1681 consts.result_stride = query->result_size;
1682 consts.pair_stride = params.pair_stride;
1683 consts.pair_count = params.pair_count;
1684
1685 constant_buffer.buffer_size = sizeof(consts);
1686 constant_buffer.user_buffer = &consts;
1687
1688 ssbo[1].buffer = tmp_buffer;
1689 ssbo[1].buffer_offset = tmp_buffer_offset;
1690 ssbo[1].buffer_size = 16;
1691
1692 ssbo[2] = ssbo[1];
1693
1694 rctx->b.bind_compute_state(&rctx->b, rctx->query_result_shader);
1695
1696 grid.block[0] = 1;
1697 grid.block[1] = 1;
1698 grid.block[2] = 1;
1699 grid.grid[0] = 1;
1700 grid.grid[1] = 1;
1701 grid.grid[2] = 1;
1702
1703 consts.config = 0;
1704 if (index < 0)
1705 consts.config |= 4;
1706 if (query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE)
1707 consts.config |= 8;
1708 else if (query->b.type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
1709 query->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
1710 consts.config |= 8 | 256;
1711 else if (query->b.type == PIPE_QUERY_TIMESTAMP ||
1712 query->b.type == PIPE_QUERY_TIME_ELAPSED)
1713 consts.config |= 32;
1714
1715 switch (result_type) {
1716 case PIPE_QUERY_TYPE_U64:
1717 case PIPE_QUERY_TYPE_I64:
1718 consts.config |= 64;
1719 break;
1720 case PIPE_QUERY_TYPE_I32:
1721 consts.config |= 128;
1722 break;
1723 case PIPE_QUERY_TYPE_U32:
1724 break;
1725 }
1726
1727 rctx->flags |= rctx->screen->barrier_flags.cp_to_L2;
1728
1729 for (qbuf = &query->buffer; qbuf; qbuf = qbuf_prev) {
1730 if (query->b.type != PIPE_QUERY_TIMESTAMP) {
1731 qbuf_prev = qbuf->previous;
1732 consts.result_count = qbuf->results_end / query->result_size;
1733 consts.config &= ~3;
1734 if (qbuf != &query->buffer)
1735 consts.config |= 1;
1736 if (qbuf->previous)
1737 consts.config |= 2;
1738 } else {
1739 /* Only read the last timestamp. */
1740 qbuf_prev = NULL;
1741 consts.result_count = 0;
1742 consts.config |= 16;
1743 params.start_offset += qbuf->results_end - query->result_size;
1744 }
1745
1746 rctx->b.set_constant_buffer(&rctx->b, PIPE_SHADER_COMPUTE, 0, &constant_buffer);
1747
1748 ssbo[0].buffer = &qbuf->buf->b.b;
1749 ssbo[0].buffer_offset = params.start_offset;
1750 ssbo[0].buffer_size = qbuf->results_end - params.start_offset;
1751
1752 if (!qbuf->previous) {
1753 ssbo[2].buffer = resource;
1754 ssbo[2].buffer_offset = offset;
1755 ssbo[2].buffer_size = 8;
1756
1757 ((struct r600_resource *)resource)->TC_L2_dirty = true;
1758 }
1759
1760 rctx->b.set_shader_buffers(&rctx->b, PIPE_SHADER_COMPUTE, 0, 3, ssbo);
1761
1762 if (wait && qbuf == &query->buffer) {
1763 uint64_t va;
1764
1765 /* Wait for result availability. Wait only for readiness
1766 * of the last entry, since the fence writes should be
1767 * serialized in the CP.
1768 */
1769 va = qbuf->buf->gpu_address + qbuf->results_end - query->result_size;
1770 va += params.fence_offset;
1771
1772 r600_gfx_wait_fence(rctx, va, 0x80000000, 0x80000000);
1773 }
1774
1775 rctx->b.launch_grid(&rctx->b, &grid);
1776 rctx->flags |= rctx->screen->barrier_flags.compute_to_L2;
1777 }
1778
1779 r600_restore_qbo_state(rctx, &saved_state);
1780 pipe_resource_reference(&tmp_buffer, NULL);
1781 }
1782
1783 static void r600_render_condition(struct pipe_context *ctx,
1784 struct pipe_query *query,
1785 boolean condition,
1786 enum pipe_render_cond_flag mode)
1787 {
1788 struct r600_common_context *rctx = (struct r600_common_context *)ctx;
1789 struct r600_query_hw *rquery = (struct r600_query_hw *)query;
1790 struct r600_query_buffer *qbuf;
1791 struct r600_atom *atom = &rctx->render_cond_atom;
1792
1793 /* Compute the size of SET_PREDICATION packets. */
1794 atom->num_dw = 0;
1795 if (query) {
1796 bool needs_workaround = false;
1797
1798 /* There is a firmware regression in VI which causes successive
1799 * SET_PREDICATION packets to give the wrong answer for
1800 * non-inverted stream overflow predication.
1801 */
1802 if (rctx->chip_class >= VI && !condition &&
1803 (rquery->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE ||
1804 (rquery->b.type == PIPE_QUERY_SO_OVERFLOW_PREDICATE &&
1805 (rquery->buffer.previous ||
1806 rquery->buffer.results_end > rquery->result_size)))) {
1807 needs_workaround = true;
1808 }
1809
1810 if (needs_workaround && !rquery->workaround_buf) {
1811 bool old_force_off = rctx->render_cond_force_off;
1812 rctx->render_cond_force_off = true;
1813
1814 u_suballocator_alloc(
1815 rctx->allocator_zeroed_memory, 8, 8,
1816 &rquery->workaround_offset,
1817 (struct pipe_resource **)&rquery->workaround_buf);
1818
1819 /* Reset to NULL to avoid a redundant SET_PREDICATION
1820 * from launching the compute grid.
1821 */
1822 rctx->render_cond = NULL;
1823
1824 ctx->get_query_result_resource(
1825 ctx, query, true, PIPE_QUERY_TYPE_U64, 0,
1826 &rquery->workaround_buf->b.b, rquery->workaround_offset);
1827
1828 atom->num_dw = 5;
1829
1830 rctx->render_cond_force_off = old_force_off;
1831 } else {
1832 for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous)
1833 atom->num_dw += (qbuf->results_end / rquery->result_size) * 5;
1834
1835 if (rquery->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
1836 atom->num_dw *= R600_MAX_STREAMS;
1837 }
1838 }
1839
1840 rctx->render_cond = query;
1841 rctx->render_cond_invert = condition;
1842 rctx->render_cond_mode = mode;
1843
1844 rctx->set_atom_dirty(rctx, atom, query != NULL);
1845 }
1846
1847 void r600_suspend_queries(struct r600_common_context *ctx)
1848 {
1849 struct r600_query_hw *query;
1850
1851 LIST_FOR_EACH_ENTRY(query, &ctx->active_queries, list) {
1852 r600_query_hw_emit_stop(ctx, query);
1853 }
1854 assert(ctx->num_cs_dw_queries_suspend == 0);
1855 }
1856
1857 static unsigned r600_queries_num_cs_dw_for_resuming(struct r600_common_context *ctx,
1858 struct list_head *query_list)
1859 {
1860 struct r600_query_hw *query;
1861 unsigned num_dw = 0;
1862
1863 LIST_FOR_EACH_ENTRY(query, query_list, list) {
1864 /* begin + end */
1865 num_dw += query->num_cs_dw_begin + query->num_cs_dw_end;
1866
1867 /* Workaround for the fact that
1868 * num_cs_dw_nontimer_queries_suspend is incremented for every
1869 * resumed query, which raises the bar in need_cs_space for
1870 * queries about to be resumed.
1871 */
1872 num_dw += query->num_cs_dw_end;
1873 }
1874 /* primitives generated query */
1875 num_dw += ctx->streamout.enable_atom.num_dw;
1876 /* guess for ZPASS enable or PERFECT_ZPASS_COUNT enable updates */
1877 num_dw += 13;
1878
1879 return num_dw;
1880 }
1881
1882 void r600_resume_queries(struct r600_common_context *ctx)
1883 {
1884 struct r600_query_hw *query;
1885 unsigned num_cs_dw = r600_queries_num_cs_dw_for_resuming(ctx, &ctx->active_queries);
1886
1887 assert(ctx->num_cs_dw_queries_suspend == 0);
1888
1889 /* Check CS space here. Resuming must not be interrupted by flushes. */
1890 ctx->need_gfx_cs_space(&ctx->b, num_cs_dw, true);
1891
1892 LIST_FOR_EACH_ENTRY(query, &ctx->active_queries, list) {
1893 r600_query_hw_emit_start(ctx, query);
1894 }
1895 }
1896
1897 /* Fix radeon_info::enabled_rb_mask for R600, R700, EVERGREEN, NI. */
1898 void r600_query_fix_enabled_rb_mask(struct r600_common_screen *rscreen)
1899 {
1900 struct r600_common_context *ctx =
1901 (struct r600_common_context*)rscreen->aux_context;
1902 struct radeon_winsys_cs *cs = ctx->gfx.cs;
1903 struct r600_resource *buffer;
1904 uint32_t *results;
1905 unsigned i, mask = 0;
1906 unsigned max_rbs = ctx->screen->info.num_render_backends;
1907
1908 assert(rscreen->chip_class <= CAYMAN);
1909
1910 /* if backend_map query is supported by the kernel */
1911 if (rscreen->info.r600_gb_backend_map_valid) {
1912 unsigned num_tile_pipes = rscreen->info.num_tile_pipes;
1913 unsigned backend_map = rscreen->info.r600_gb_backend_map;
1914 unsigned item_width, item_mask;
1915
1916 if (ctx->chip_class >= EVERGREEN) {
1917 item_width = 4;
1918 item_mask = 0x7;
1919 } else {
1920 item_width = 2;
1921 item_mask = 0x3;
1922 }
1923
1924 while (num_tile_pipes--) {
1925 i = backend_map & item_mask;
1926 mask |= (1<<i);
1927 backend_map >>= item_width;
1928 }
1929 if (mask != 0) {
1930 rscreen->info.enabled_rb_mask = mask;
1931 return;
1932 }
1933 }
1934
1935 /* otherwise backup path for older kernels */
1936
1937 /* create buffer for event data */
1938 buffer = (struct r600_resource*)
1939 pipe_buffer_create(ctx->b.screen, 0,
1940 PIPE_USAGE_STAGING, max_rbs * 16);
1941 if (!buffer)
1942 return;
1943
1944 /* initialize buffer with zeroes */
1945 results = r600_buffer_map_sync_with_rings(ctx, buffer, PIPE_TRANSFER_WRITE);
1946 if (results) {
1947 memset(results, 0, max_rbs * 4 * 4);
1948
1949 /* emit EVENT_WRITE for ZPASS_DONE */
1950 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
1951 radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
1952 radeon_emit(cs, buffer->gpu_address);
1953 radeon_emit(cs, buffer->gpu_address >> 32);
1954
1955 r600_emit_reloc(ctx, &ctx->gfx, buffer,
1956 RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
1957
1958 /* analyze results */
1959 results = r600_buffer_map_sync_with_rings(ctx, buffer, PIPE_TRANSFER_READ);
1960 if (results) {
1961 for(i = 0; i < max_rbs; i++) {
1962 /* at least highest bit will be set if backend is used */
1963 if (results[i*4 + 1])
1964 mask |= (1<<i);
1965 }
1966 }
1967 }
1968
1969 r600_resource_reference(&buffer, NULL);
1970
1971 if (mask)
1972 rscreen->info.enabled_rb_mask = mask;
1973 }
1974
1975 #define XFULL(name_, query_type_, type_, result_type_, group_id_) \
1976 { \
1977 .name = name_, \
1978 .query_type = R600_QUERY_##query_type_, \
1979 .type = PIPE_DRIVER_QUERY_TYPE_##type_, \
1980 .result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_##result_type_, \
1981 .group_id = group_id_ \
1982 }
1983
1984 #define X(name_, query_type_, type_, result_type_) \
1985 XFULL(name_, query_type_, type_, result_type_, ~(unsigned)0)
1986
1987 #define XG(group_, name_, query_type_, type_, result_type_) \
1988 XFULL(name_, query_type_, type_, result_type_, R600_QUERY_GROUP_##group_)
1989
1990 static struct pipe_driver_query_info r600_driver_query_list[] = {
1991 X("num-compilations", NUM_COMPILATIONS, UINT64, CUMULATIVE),
1992 X("num-shaders-created", NUM_SHADERS_CREATED, UINT64, CUMULATIVE),
1993 X("num-shader-cache-hits", NUM_SHADER_CACHE_HITS, UINT64, CUMULATIVE),
1994 X("draw-calls", DRAW_CALLS, UINT64, AVERAGE),
1995 X("MRT-draw-calls", MRT_DRAW_CALLS, UINT64, AVERAGE),
1996 X("prim-restart-calls", PRIM_RESTART_CALLS, UINT64, AVERAGE),
1997 X("spill-draw-calls", SPILL_DRAW_CALLS, UINT64, AVERAGE),
1998 X("compute-calls", COMPUTE_CALLS, UINT64, AVERAGE),
1999 X("spill-compute-calls", SPILL_COMPUTE_CALLS, UINT64, AVERAGE),
2000 X("dma-calls", DMA_CALLS, UINT64, AVERAGE),
2001 X("cp-dma-calls", CP_DMA_CALLS, UINT64, AVERAGE),
2002 X("num-vs-flushes", NUM_VS_FLUSHES, UINT64, AVERAGE),
2003 X("num-ps-flushes", NUM_PS_FLUSHES, UINT64, AVERAGE),
2004 X("num-cs-flushes", NUM_CS_FLUSHES, UINT64, AVERAGE),
2005 X("num-CB-cache-flushes", NUM_CB_CACHE_FLUSHES, UINT64, AVERAGE),
2006 X("num-DB-cache-flushes", NUM_DB_CACHE_FLUSHES, UINT64, AVERAGE),
2007 X("num-L2-invalidates", NUM_L2_INVALIDATES, UINT64, AVERAGE),
2008 X("num-L2-writebacks", NUM_L2_WRITEBACKS, UINT64, AVERAGE),
2009 X("num-resident-handles", NUM_RESIDENT_HANDLES, UINT64, AVERAGE),
2010 X("tc-offloaded-slots", TC_OFFLOADED_SLOTS, UINT64, AVERAGE),
2011 X("tc-direct-slots", TC_DIRECT_SLOTS, UINT64, AVERAGE),
2012 X("tc-num-syncs", TC_NUM_SYNCS, UINT64, AVERAGE),
2013 X("CS-thread-busy", CS_THREAD_BUSY, UINT64, AVERAGE),
2014 X("gallium-thread-busy", GALLIUM_THREAD_BUSY, UINT64, AVERAGE),
2015 X("requested-VRAM", REQUESTED_VRAM, BYTES, AVERAGE),
2016 X("requested-GTT", REQUESTED_GTT, BYTES, AVERAGE),
2017 X("mapped-VRAM", MAPPED_VRAM, BYTES, AVERAGE),
2018 X("mapped-GTT", MAPPED_GTT, BYTES, AVERAGE),
2019 X("buffer-wait-time", BUFFER_WAIT_TIME, MICROSECONDS, CUMULATIVE),
2020 X("num-mapped-buffers", NUM_MAPPED_BUFFERS, UINT64, AVERAGE),
2021 X("num-GFX-IBs", NUM_GFX_IBS, UINT64, AVERAGE),
2022 X("num-SDMA-IBs", NUM_SDMA_IBS, UINT64, AVERAGE),
2023 X("GFX-BO-list-size", GFX_BO_LIST_SIZE, UINT64, AVERAGE),
2024 X("num-bytes-moved", NUM_BYTES_MOVED, BYTES, CUMULATIVE),
2025 X("num-evictions", NUM_EVICTIONS, UINT64, CUMULATIVE),
2026 X("VRAM-CPU-page-faults", NUM_VRAM_CPU_PAGE_FAULTS, UINT64, CUMULATIVE),
2027 X("VRAM-usage", VRAM_USAGE, BYTES, AVERAGE),
2028 X("VRAM-vis-usage", VRAM_VIS_USAGE, BYTES, AVERAGE),
2029 X("GTT-usage", GTT_USAGE, BYTES, AVERAGE),
2030 X("back-buffer-ps-draw-ratio", BACK_BUFFER_PS_DRAW_RATIO, UINT64, AVERAGE),
2031
2032 /* GPIN queries are for the benefit of old versions of GPUPerfStudio,
2033 * which use it as a fallback path to detect the GPU type.
2034 *
2035 * Note: The names of these queries are significant for GPUPerfStudio
2036 * (and possibly their order as well). */
2037 XG(GPIN, "GPIN_000", GPIN_ASIC_ID, UINT, AVERAGE),
2038 XG(GPIN, "GPIN_001", GPIN_NUM_SIMD, UINT, AVERAGE),
2039 XG(GPIN, "GPIN_002", GPIN_NUM_RB, UINT, AVERAGE),
2040 XG(GPIN, "GPIN_003", GPIN_NUM_SPI, UINT, AVERAGE),
2041 XG(GPIN, "GPIN_004", GPIN_NUM_SE, UINT, AVERAGE),
2042
2043 X("temperature", GPU_TEMPERATURE, UINT64, AVERAGE),
2044 X("shader-clock", CURRENT_GPU_SCLK, HZ, AVERAGE),
2045 X("memory-clock", CURRENT_GPU_MCLK, HZ, AVERAGE),
2046
2047 /* The following queries must be at the end of the list because their
2048 * availability is adjusted dynamically based on the DRM version. */
2049 X("GPU-load", GPU_LOAD, UINT64, AVERAGE),
2050 X("GPU-shaders-busy", GPU_SHADERS_BUSY, UINT64, AVERAGE),
2051 X("GPU-ta-busy", GPU_TA_BUSY, UINT64, AVERAGE),
2052 X("GPU-gds-busy", GPU_GDS_BUSY, UINT64, AVERAGE),
2053 X("GPU-vgt-busy", GPU_VGT_BUSY, UINT64, AVERAGE),
2054 X("GPU-ia-busy", GPU_IA_BUSY, UINT64, AVERAGE),
2055 X("GPU-sx-busy", GPU_SX_BUSY, UINT64, AVERAGE),
2056 X("GPU-wd-busy", GPU_WD_BUSY, UINT64, AVERAGE),
2057 X("GPU-bci-busy", GPU_BCI_BUSY, UINT64, AVERAGE),
2058 X("GPU-sc-busy", GPU_SC_BUSY, UINT64, AVERAGE),
2059 X("GPU-pa-busy", GPU_PA_BUSY, UINT64, AVERAGE),
2060 X("GPU-db-busy", GPU_DB_BUSY, UINT64, AVERAGE),
2061 X("GPU-cp-busy", GPU_CP_BUSY, UINT64, AVERAGE),
2062 X("GPU-cb-busy", GPU_CB_BUSY, UINT64, AVERAGE),
2063 X("GPU-sdma-busy", GPU_SDMA_BUSY, UINT64, AVERAGE),
2064 X("GPU-pfp-busy", GPU_PFP_BUSY, UINT64, AVERAGE),
2065 X("GPU-meq-busy", GPU_MEQ_BUSY, UINT64, AVERAGE),
2066 X("GPU-me-busy", GPU_ME_BUSY, UINT64, AVERAGE),
2067 X("GPU-surf-sync-busy", GPU_SURF_SYNC_BUSY, UINT64, AVERAGE),
2068 X("GPU-dma-busy", GPU_DMA_BUSY, UINT64, AVERAGE),
2069 X("GPU-scratch-ram-busy", GPU_SCRATCH_RAM_BUSY, UINT64, AVERAGE),
2070 X("GPU-ce-busy", GPU_CE_BUSY, UINT64, AVERAGE),
2071 };
2072
2073 #undef X
2074 #undef XG
2075 #undef XFULL
2076
2077 static unsigned r600_get_num_queries(struct r600_common_screen *rscreen)
2078 {
2079 if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42)
2080 return ARRAY_SIZE(r600_driver_query_list);
2081 else if (rscreen->info.drm_major == 3) {
2082 if (rscreen->chip_class >= VI)
2083 return ARRAY_SIZE(r600_driver_query_list);
2084 else
2085 return ARRAY_SIZE(r600_driver_query_list) - 7;
2086 }
2087 else
2088 return ARRAY_SIZE(r600_driver_query_list) - 25;
2089 }
2090
2091 static int r600_get_driver_query_info(struct pipe_screen *screen,
2092 unsigned index,
2093 struct pipe_driver_query_info *info)
2094 {
2095 struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
2096 unsigned num_queries = r600_get_num_queries(rscreen);
2097
2098 if (!info) {
2099 unsigned num_perfcounters =
2100 r600_get_perfcounter_info(rscreen, 0, NULL);
2101
2102 return num_queries + num_perfcounters;
2103 }
2104
2105 if (index >= num_queries)
2106 return r600_get_perfcounter_info(rscreen, index - num_queries, info);
2107
2108 *info = r600_driver_query_list[index];
2109
2110 switch (info->query_type) {
2111 case R600_QUERY_REQUESTED_VRAM:
2112 case R600_QUERY_VRAM_USAGE:
2113 case R600_QUERY_MAPPED_VRAM:
2114 info->max_value.u64 = rscreen->info.vram_size;
2115 break;
2116 case R600_QUERY_REQUESTED_GTT:
2117 case R600_QUERY_GTT_USAGE:
2118 case R600_QUERY_MAPPED_GTT:
2119 info->max_value.u64 = rscreen->info.gart_size;
2120 break;
2121 case R600_QUERY_GPU_TEMPERATURE:
2122 info->max_value.u64 = 125;
2123 break;
2124 case R600_QUERY_VRAM_VIS_USAGE:
2125 info->max_value.u64 = rscreen->info.vram_vis_size;
2126 break;
2127 }
2128
2129 if (info->group_id != ~(unsigned)0 && rscreen->perfcounters)
2130 info->group_id += rscreen->perfcounters->num_groups;
2131
2132 return 1;
2133 }
2134
2135 /* Note: Unfortunately, GPUPerfStudio hardcodes the order of hardware
2136 * performance counter groups, so be careful when changing this and related
2137 * functions.
2138 */
2139 static int r600_get_driver_query_group_info(struct pipe_screen *screen,
2140 unsigned index,
2141 struct pipe_driver_query_group_info *info)
2142 {
2143 struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
2144 unsigned num_pc_groups = 0;
2145
2146 if (rscreen->perfcounters)
2147 num_pc_groups = rscreen->perfcounters->num_groups;
2148
2149 if (!info)
2150 return num_pc_groups + R600_NUM_SW_QUERY_GROUPS;
2151
2152 if (index < num_pc_groups)
2153 return r600_get_perfcounter_group_info(rscreen, index, info);
2154
2155 index -= num_pc_groups;
2156 if (index >= R600_NUM_SW_QUERY_GROUPS)
2157 return 0;
2158
2159 info->name = "GPIN";
2160 info->max_active_queries = 5;
2161 info->num_queries = 5;
2162 return 1;
2163 }
2164
2165 void r600_query_init(struct r600_common_context *rctx)
2166 {
2167 rctx->b.create_query = r600_create_query;
2168 rctx->b.create_batch_query = r600_create_batch_query;
2169 rctx->b.destroy_query = r600_destroy_query;
2170 rctx->b.begin_query = r600_begin_query;
2171 rctx->b.end_query = r600_end_query;
2172 rctx->b.get_query_result = r600_get_query_result;
2173 rctx->b.get_query_result_resource = r600_get_query_result_resource;
2174 rctx->render_cond_atom.emit = r600_emit_query_predication;
2175
2176 if (((struct r600_common_screen*)rctx->b.screen)->info.num_render_backends > 0)
2177 rctx->b.render_condition = r600_render_condition;
2178
2179 LIST_INITHEAD(&rctx->active_queries);
2180 }
2181
2182 void r600_init_screen_query_functions(struct r600_common_screen *rscreen)
2183 {
2184 rscreen->b.get_driver_query_info = r600_get_driver_query_info;
2185 rscreen->b.get_driver_query_group_info = r600_get_driver_query_group_info;
2186 }