2 * Copyrigh 2016 Red Hat Inc.
4 * Copyright © 2015 Intel Corporation
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
32 #include "radv_private.h"
36 static unsigned get_max_db(struct radv_device
*device
)
38 unsigned num_db
= device
->physical_device
->rad_info
.num_render_backends
;
39 MAYBE_UNUSED
unsigned rb_mask
= device
->physical_device
->rad_info
.enabled_rb_mask
;
41 if (device
->physical_device
->rad_info
.chip_class
== SI
)
44 num_db
= MAX2(8, num_db
);
46 /* Otherwise we need to change the query reset procedure */
47 assert(rb_mask
== ((1ull << num_db
) - 1));
52 VkResult
radv_CreateQueryPool(
54 const VkQueryPoolCreateInfo
* pCreateInfo
,
55 const VkAllocationCallbacks
* pAllocator
,
56 VkQueryPool
* pQueryPool
)
58 RADV_FROM_HANDLE(radv_device
, device
, _device
);
60 struct radv_query_pool
*pool
= vk_alloc2(&device
->alloc
, pAllocator
,
62 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
65 return VK_ERROR_OUT_OF_HOST_MEMORY
;
68 switch(pCreateInfo
->queryType
) {
69 case VK_QUERY_TYPE_OCCLUSION
:
70 /* 16 bytes tmp. buffer as the compute packet writes 64 bits, but
71 * the app. may have 32 bits of space. */
72 pool
->stride
= 16 * get_max_db(device
) + 16;
74 case VK_QUERY_TYPE_PIPELINE_STATISTICS
:
75 pool
->stride
= 16 * 11;
77 case VK_QUERY_TYPE_TIMESTAMP
:
81 unreachable("creating unhandled query type");
84 pool
->type
= pCreateInfo
->queryType
;
85 pool
->availability_offset
= pool
->stride
* pCreateInfo
->queryCount
;
86 size
= pool
->availability_offset
+ 4 * pCreateInfo
->queryCount
;
88 pool
->bo
= device
->ws
->buffer_create(device
->ws
, size
,
89 64, RADEON_DOMAIN_GTT
, 0);
92 vk_free2(&device
->alloc
, pAllocator
, pool
);
93 return VK_ERROR_OUT_OF_DEVICE_MEMORY
;
96 pool
->ptr
= device
->ws
->buffer_map(pool
->bo
);
99 device
->ws
->buffer_destroy(pool
->bo
);
100 vk_free2(&device
->alloc
, pAllocator
, pool
);
101 return VK_ERROR_OUT_OF_DEVICE_MEMORY
;
103 memset(pool
->ptr
, 0, size
);
105 *pQueryPool
= radv_query_pool_to_handle(pool
);
109 void radv_DestroyQueryPool(
112 const VkAllocationCallbacks
* pAllocator
)
114 RADV_FROM_HANDLE(radv_device
, device
, _device
);
115 RADV_FROM_HANDLE(radv_query_pool
, pool
, _pool
);
120 device
->ws
->buffer_destroy(pool
->bo
);
121 vk_free2(&device
->alloc
, pAllocator
, pool
);
124 VkResult
radv_GetQueryPoolResults(
126 VkQueryPool queryPool
,
132 VkQueryResultFlags flags
)
134 RADV_FROM_HANDLE(radv_device
, device
, _device
);
135 RADV_FROM_HANDLE(radv_query_pool
, pool
, queryPool
);
137 VkResult result
= VK_SUCCESS
;
139 for(unsigned i
= 0; i
< queryCount
; ++i
, data
+= stride
) {
141 unsigned query
= firstQuery
+ i
;
142 char *src
= pool
->ptr
+ query
* pool
->stride
;
145 switch (pool
->type
) {
146 case VK_QUERY_TYPE_TIMESTAMP
: {
147 if (flags
& VK_QUERY_RESULT_WAIT_BIT
) {
148 while(!*(volatile uint32_t*)(pool
->ptr
+ pool
->availability_offset
+ 4 * query
))
152 available
= *(uint32_t*)(pool
->ptr
+ pool
->availability_offset
+ 4 * query
);
153 if (!available
&& !(flags
& VK_QUERY_RESULT_PARTIAL_BIT
)) {
154 result
= VK_NOT_READY
;
159 if (flags
& VK_QUERY_RESULT_64_BIT
) {
160 *(uint64_t*)dest
= *(uint64_t*)src
;
163 *(uint32_t*)dest
= *(uint32_t*)src
;
168 case VK_QUERY_TYPE_OCCLUSION
: {
169 volatile uint64_t const *src64
= (volatile uint64_t const *)src
;
171 int db_count
= get_max_db(device
);
174 for (int i
= 0; i
< db_count
; ++i
) {
177 start
= src64
[2 * i
];
178 end
= src64
[2 * i
+ 1];
179 } while ((!(start
& (1ull << 63)) || !(end
& (1ull << 63))) && (flags
& VK_QUERY_RESULT_WAIT_BIT
));
181 if (!(start
& (1ull << 63)) || !(end
& (1ull << 63)))
184 result
+= end
- start
;
188 if (!available
&& !(flags
& VK_QUERY_RESULT_PARTIAL_BIT
)) {
189 result
= VK_NOT_READY
;
194 if (flags
& VK_QUERY_RESULT_64_BIT
) {
195 *(uint64_t*)dest
= result
;
198 *(uint32_t*)dest
= result
;
203 unreachable("trying to get results of unhandled query type");
207 if (flags
& VK_QUERY_RESULT_WITH_AVAILABILITY_BIT
) {
208 *(uint32_t*)dest
= available
;
216 void radv_CmdCopyQueryPoolResults(
217 VkCommandBuffer commandBuffer
,
218 VkQueryPool queryPool
,
222 VkDeviceSize dstOffset
,
224 VkQueryResultFlags flags
)
226 RADV_FROM_HANDLE(radv_cmd_buffer
, cmd_buffer
, commandBuffer
);
227 RADV_FROM_HANDLE(radv_query_pool
, pool
, queryPool
);
228 RADV_FROM_HANDLE(radv_buffer
, dst_buffer
, dstBuffer
);
229 struct radeon_winsys_cs
*cs
= cmd_buffer
->cs
;
230 uint64_t va
= cmd_buffer
->device
->ws
->buffer_get_va(pool
->bo
);
231 uint64_t dest_va
= cmd_buffer
->device
->ws
->buffer_get_va(dst_buffer
->bo
);
232 dest_va
+= dst_buffer
->offset
+ dstOffset
;
234 cmd_buffer
->device
->ws
->cs_add_buffer(cmd_buffer
->cs
, pool
->bo
, 8);
235 cmd_buffer
->device
->ws
->cs_add_buffer(cmd_buffer
->cs
, dst_buffer
->bo
, 8);
237 for(unsigned i
= 0; i
< queryCount
; ++i
, dest_va
+= stride
) {
238 unsigned query
= firstQuery
+ i
;
239 uint64_t local_src_va
= va
+ query
* pool
->stride
;
240 unsigned elem_size
= (flags
& VK_QUERY_RESULT_64_BIT
) ? 8 : 4;
242 MAYBE_UNUSED
unsigned cdw_max
= radeon_check_space(cmd_buffer
->device
->ws
, cs
, 26);
244 if (flags
& VK_QUERY_RESULT_WAIT_BIT
) {
245 /* TODO, not sure if there is any case where we won't always be ready yet */
246 uint64_t avail_va
= va
+ pool
->availability_offset
+ 4 * query
;
249 /* This waits on the ME. All copies below are done on the ME */
250 radeon_emit(cs
, PKT3(PKT3_WAIT_REG_MEM
, 5, 0));
251 radeon_emit(cs
, WAIT_REG_MEM_EQUAL
| WAIT_REG_MEM_MEM_SPACE(1));
252 radeon_emit(cs
, avail_va
);
253 radeon_emit(cs
, avail_va
>> 32);
254 radeon_emit(cs
, 1); /* reference value */
255 radeon_emit(cs
, 0xffffffff); /* mask */
256 radeon_emit(cs
, 4); /* poll interval */
259 switch (pool
->type
) {
260 case VK_QUERY_TYPE_OCCLUSION
:
261 local_src_va
+= pool
->stride
- 16;
263 case VK_QUERY_TYPE_TIMESTAMP
:
264 radeon_emit(cs
, PKT3(PKT3_COPY_DATA
, 4, 0));
265 radeon_emit(cs
, COPY_DATA_SRC_SEL(COPY_DATA_MEM
) |
266 COPY_DATA_DST_SEL(COPY_DATA_MEM
) |
267 ((flags
& VK_QUERY_RESULT_64_BIT
) ? COPY_DATA_COUNT_SEL
: 0));
268 radeon_emit(cs
, local_src_va
);
269 radeon_emit(cs
, local_src_va
>> 32);
270 radeon_emit(cs
, dest_va
);
271 radeon_emit(cs
, dest_va
>> 32);
274 unreachable("trying to get results of unhandled query type");
277 /* The flag could be still changed while the data copy is busy and we
278 * then might have invalid data, but a ready flag. However, the availability
279 * writes happen on the ME too, so they should be synchronized. Might need to
280 * revisit this with multiple queues.
282 if (flags
& VK_QUERY_RESULT_WITH_AVAILABILITY_BIT
) {
283 uint64_t avail_va
= va
+ pool
->availability_offset
+ 4 * query
;
284 uint64_t avail_dest_va
= dest_va
;
285 if (pool
->type
!= VK_QUERY_TYPE_PIPELINE_STATISTICS
)
286 avail_dest_va
+= elem_size
;
290 radeon_emit(cs
, PKT3(PKT3_COPY_DATA
, 4, 0));
291 radeon_emit(cs
, COPY_DATA_SRC_SEL(COPY_DATA_MEM
) |
292 COPY_DATA_DST_SEL(COPY_DATA_MEM
));
293 radeon_emit(cs
, avail_va
);
294 radeon_emit(cs
, avail_va
>> 32);
295 radeon_emit(cs
, avail_dest_va
);
296 radeon_emit(cs
, avail_dest_va
>> 32);
299 assert(cs
->cdw
<= cdw_max
);
304 void radv_CmdResetQueryPool(
305 VkCommandBuffer commandBuffer
,
306 VkQueryPool queryPool
,
310 RADV_FROM_HANDLE(radv_cmd_buffer
, cmd_buffer
, commandBuffer
);
311 RADV_FROM_HANDLE(radv_query_pool
, pool
, queryPool
);
312 uint64_t va
= cmd_buffer
->device
->ws
->buffer_get_va(pool
->bo
);
314 cmd_buffer
->device
->ws
->cs_add_buffer(cmd_buffer
->cs
, pool
->bo
, 8);
316 si_cp_dma_clear_buffer(cmd_buffer
, va
+ firstQuery
* pool
->stride
,
317 queryCount
* pool
->stride
, 0);
318 si_cp_dma_clear_buffer(cmd_buffer
, va
+ pool
->availability_offset
+ firstQuery
* 4,
322 void radv_CmdBeginQuery(
323 VkCommandBuffer commandBuffer
,
324 VkQueryPool queryPool
,
326 VkQueryControlFlags flags
)
328 RADV_FROM_HANDLE(radv_cmd_buffer
, cmd_buffer
, commandBuffer
);
329 RADV_FROM_HANDLE(radv_query_pool
, pool
, queryPool
);
330 struct radeon_winsys_cs
*cs
= cmd_buffer
->cs
;
331 uint64_t va
= cmd_buffer
->device
->ws
->buffer_get_va(pool
->bo
);
332 va
+= pool
->stride
* query
;
334 cmd_buffer
->device
->ws
->cs_add_buffer(cs
, pool
->bo
, 8);
336 switch (pool
->type
) {
337 case VK_QUERY_TYPE_OCCLUSION
:
338 radeon_check_space(cmd_buffer
->device
->ws
, cs
, 7);
340 ++cmd_buffer
->state
.active_occlusion_queries
;
341 if (cmd_buffer
->state
.active_occlusion_queries
== 1)
342 radv_set_db_count_control(cmd_buffer
);
344 radeon_emit(cs
, PKT3(PKT3_EVENT_WRITE
, 2, 0));
345 radeon_emit(cs
, EVENT_TYPE(V_028A90_ZPASS_DONE
) | EVENT_INDEX(1));
347 radeon_emit(cs
, va
>> 32);
350 unreachable("beginning unhandled query type");
355 void radv_CmdEndQuery(
356 VkCommandBuffer commandBuffer
,
357 VkQueryPool queryPool
,
360 RADV_FROM_HANDLE(radv_cmd_buffer
, cmd_buffer
, commandBuffer
);
361 RADV_FROM_HANDLE(radv_query_pool
, pool
, queryPool
);
362 struct radeon_winsys_cs
*cs
= cmd_buffer
->cs
;
363 uint64_t va
= cmd_buffer
->device
->ws
->buffer_get_va(pool
->bo
);
364 uint64_t avail_va
= va
+ pool
->availability_offset
+ 4 * query
;
365 va
+= pool
->stride
* query
;
367 cmd_buffer
->device
->ws
->cs_add_buffer(cs
, pool
->bo
, 8);
369 switch (pool
->type
) {
370 case VK_QUERY_TYPE_OCCLUSION
:
371 radeon_check_space(cmd_buffer
->device
->ws
, cs
, 14);
373 cmd_buffer
->state
.active_occlusion_queries
--;
374 if (cmd_buffer
->state
.active_occlusion_queries
== 0)
375 radv_set_db_count_control(cmd_buffer
);
377 radeon_emit(cs
, PKT3(PKT3_EVENT_WRITE
, 2, 0));
378 radeon_emit(cs
, EVENT_TYPE(V_028A90_ZPASS_DONE
) | EVENT_INDEX(1));
379 radeon_emit(cs
, va
+ 8);
380 radeon_emit(cs
, (va
+ 8) >> 32);
382 /* hangs for VK_COMMAND_BUFFER_LEVEL_SECONDARY. */
383 if (cmd_buffer
->level
== VK_COMMAND_BUFFER_LEVEL_PRIMARY
) {
384 radeon_emit(cs
, PKT3(PKT3_OCCLUSION_QUERY
, 3, 0));
386 radeon_emit(cs
, va
>> 32);
387 radeon_emit(cs
, va
+ pool
->stride
- 16);
388 radeon_emit(cs
, (va
+ pool
->stride
- 16) >> 32);
393 unreachable("ending unhandled query type");
396 radeon_check_space(cmd_buffer
->device
->ws
, cs
, 5);
398 radeon_emit(cs
, PKT3(PKT3_WRITE_DATA
, 3, 0));
399 radeon_emit(cs
, S_370_DST_SEL(V_370_MEMORY_SYNC
) |
400 S_370_WR_CONFIRM(1) |
401 S_370_ENGINE_SEL(V_370_ME
));
402 radeon_emit(cs
, avail_va
);
403 radeon_emit(cs
, avail_va
>> 32);
407 void radv_CmdWriteTimestamp(
408 VkCommandBuffer commandBuffer
,
409 VkPipelineStageFlagBits pipelineStage
,
410 VkQueryPool queryPool
,
413 RADV_FROM_HANDLE(radv_cmd_buffer
, cmd_buffer
, commandBuffer
);
414 RADV_FROM_HANDLE(radv_query_pool
, pool
, queryPool
);
415 bool mec
= radv_cmd_buffer_uses_mec(cmd_buffer
);
416 struct radeon_winsys_cs
*cs
= cmd_buffer
->cs
;
417 uint64_t va
= cmd_buffer
->device
->ws
->buffer_get_va(pool
->bo
);
418 uint64_t avail_va
= va
+ pool
->availability_offset
+ 4 * query
;
419 uint64_t query_va
= va
+ pool
->stride
* query
;
421 cmd_buffer
->device
->ws
->cs_add_buffer(cs
, pool
->bo
, 5);
423 MAYBE_UNUSED
unsigned cdw_max
= radeon_check_space(cmd_buffer
->device
->ws
, cs
, 12);
426 radeon_emit(cs
, PKT3(PKT3_RELEASE_MEM
, 5, 0));
427 radeon_emit(cs
, EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS
) | EVENT_INDEX(5));
428 radeon_emit(cs
, 3 << 29);
429 radeon_emit(cs
, query_va
);
430 radeon_emit(cs
, query_va
>> 32);
434 radeon_emit(cs
, PKT3(PKT3_EVENT_WRITE_EOP
, 4, 0));
435 radeon_emit(cs
, EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS
) | EVENT_INDEX(5));
436 radeon_emit(cs
, query_va
);
437 radeon_emit(cs
, (3 << 29) | ((query_va
>> 32) & 0xFFFF));
442 radeon_emit(cs
, PKT3(PKT3_WRITE_DATA
, 3, 0));
443 radeon_emit(cs
, S_370_DST_SEL(mec
? V_370_MEM_ASYNC
: V_370_MEMORY_SYNC
) |
444 S_370_WR_CONFIRM(1) |
445 S_370_ENGINE_SEL(V_370_ME
));
446 radeon_emit(cs
, avail_va
);
447 radeon_emit(cs
, avail_va
>> 32);
450 assert(cmd_buffer
->cs
->cdw
<= cdw_max
);