2 * Copyrigh 2016 Red Hat Inc.
4 * Copyright © 2015 Intel Corporation
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
32 #include "radv_private.h"
36 static unsigned get_max_db(struct radv_device
*device
)
38 unsigned num_db
= device
->instance
->physicalDevice
.rad_info
.num_render_backends
;
39 unsigned rb_mask
= device
->instance
->physicalDevice
.rad_info
.enabled_rb_mask
;
41 if (device
->instance
->physicalDevice
.rad_info
.chip_class
== SI
)
44 num_db
= MAX2(8, num_db
);
46 /* Otherwise we need to change the query reset procedure */
47 assert(rb_mask
== ((1ull << num_db
) - 1));
52 VkResult
radv_CreateQueryPool(
54 const VkQueryPoolCreateInfo
* pCreateInfo
,
55 const VkAllocationCallbacks
* pAllocator
,
56 VkQueryPool
* pQueryPool
)
58 RADV_FROM_HANDLE(radv_device
, device
, _device
);
60 struct radv_query_pool
*pool
= vk_alloc2(&device
->alloc
, pAllocator
,
62 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
65 return VK_ERROR_OUT_OF_HOST_MEMORY
;
68 switch(pCreateInfo
->queryType
) {
69 case VK_QUERY_TYPE_OCCLUSION
:
70 /* 16 bytes tmp. buffer as the compute packet writes 64 bits, but
71 * the app. may have 32 bits of space. */
72 pool
->stride
= 16 * get_max_db(device
) + 16;
74 case VK_QUERY_TYPE_PIPELINE_STATISTICS
:
75 pool
->stride
= 16 * 11;
77 case VK_QUERY_TYPE_TIMESTAMP
:
81 unreachable("creating unhandled query type");
84 pool
->type
= pCreateInfo
->queryType
;
85 pool
->availability_offset
= pool
->stride
* pCreateInfo
->queryCount
;
86 size
= pool
->availability_offset
+ 4 * pCreateInfo
->queryCount
;
88 pool
->bo
= device
->ws
->buffer_create(device
->ws
, size
,
89 64, RADEON_DOMAIN_GTT
, 0);
92 vk_free2(&device
->alloc
, pAllocator
, pool
);
93 return VK_ERROR_OUT_OF_DEVICE_MEMORY
;
96 pool
->ptr
= device
->ws
->buffer_map(pool
->bo
);
99 device
->ws
->buffer_destroy(pool
->bo
);
100 vk_free2(&device
->alloc
, pAllocator
, pool
);
101 return VK_ERROR_OUT_OF_DEVICE_MEMORY
;
103 memset(pool
->ptr
, 0, size
);
105 *pQueryPool
= radv_query_pool_to_handle(pool
);
109 void radv_DestroyQueryPool(
112 const VkAllocationCallbacks
* pAllocator
)
114 RADV_FROM_HANDLE(radv_device
, device
, _device
);
115 RADV_FROM_HANDLE(radv_query_pool
, pool
, _pool
);
120 device
->ws
->buffer_destroy(pool
->bo
);
121 vk_free2(&device
->alloc
, pAllocator
, pool
);
124 VkResult
radv_GetQueryPoolResults(
126 VkQueryPool queryPool
,
132 VkQueryResultFlags flags
)
134 RADV_FROM_HANDLE(radv_query_pool
, pool
, queryPool
);
136 VkResult result
= VK_SUCCESS
;
138 for(unsigned i
= 0; i
< queryCount
; ++i
, data
+= stride
) {
140 unsigned query
= firstQuery
+ i
;
141 char *src
= pool
->ptr
+ query
* pool
->stride
;
144 if (flags
& VK_QUERY_RESULT_WAIT_BIT
) {
145 while(!*(volatile uint32_t*)(pool
->ptr
+ pool
->availability_offset
+ 4 * query
))
149 if (!*(uint32_t*)(pool
->ptr
+ pool
->availability_offset
+ 4 * query
) &&
150 !(flags
& VK_QUERY_RESULT_PARTIAL_BIT
)) {
151 if (flags
& VK_QUERY_RESULT_WITH_AVAILABILITY_BIT
)
152 *(uint32_t*)dest
= 0;
153 result
= VK_NOT_READY
;
158 available
= *(uint32_t*)(pool
->ptr
+ pool
->availability_offset
+ 4 * query
);
159 switch (pool
->type
) {
160 case VK_QUERY_TYPE_TIMESTAMP
:
161 if (flags
& VK_QUERY_RESULT_64_BIT
) {
162 *(uint64_t*)dest
= *(uint64_t*)src
;
165 *(uint32_t*)dest
= *(uint32_t*)src
;
169 case VK_QUERY_TYPE_OCCLUSION
: {
170 uint64_t result
= *(uint64_t*)(src
+ pool
->stride
- 16);
172 if (flags
& VK_QUERY_RESULT_64_BIT
) {
173 *(uint64_t*)dest
= result
;
176 *(uint32_t*)dest
= result
;
181 unreachable("trying to get results of unhandled query type");
185 if (flags
& VK_QUERY_RESULT_WITH_AVAILABILITY_BIT
) {
186 *(uint32_t*)dest
= available
;
194 void radv_CmdCopyQueryPoolResults(
195 VkCommandBuffer commandBuffer
,
196 VkQueryPool queryPool
,
200 VkDeviceSize dstOffset
,
202 VkQueryResultFlags flags
)
204 RADV_FROM_HANDLE(radv_cmd_buffer
, cmd_buffer
, commandBuffer
);
205 RADV_FROM_HANDLE(radv_query_pool
, pool
, queryPool
);
206 RADV_FROM_HANDLE(radv_buffer
, dst_buffer
, dstBuffer
);
207 struct radeon_winsys_cs
*cs
= cmd_buffer
->cs
;
208 uint64_t va
= cmd_buffer
->device
->ws
->buffer_get_va(pool
->bo
);
209 uint64_t dest_va
= cmd_buffer
->device
->ws
->buffer_get_va(dst_buffer
->bo
);
210 dest_va
+= dst_buffer
->offset
+ dstOffset
;
212 cmd_buffer
->device
->ws
->cs_add_buffer(cmd_buffer
->cs
, pool
->bo
, 8);
213 cmd_buffer
->device
->ws
->cs_add_buffer(cmd_buffer
->cs
, dst_buffer
->bo
, 8);
215 for(unsigned i
= 0; i
< queryCount
; ++i
, dest_va
+= stride
) {
216 unsigned query
= firstQuery
+ i
;
217 uint64_t local_src_va
= va
+ query
* pool
->stride
;
218 unsigned elem_size
= (flags
& VK_QUERY_RESULT_64_BIT
) ? 8 : 4;
220 unsigned cdw_max
= radeon_check_space(cmd_buffer
->device
->ws
, cs
, 26);
222 if (flags
& VK_QUERY_RESULT_WAIT_BIT
) {
223 /* TODO, not sure if there is any case where we won't always be ready yet */
224 uint64_t avail_va
= va
+ pool
->availability_offset
+ 4 * query
;
227 /* This waits on the ME. All copies below are done on the ME */
228 radeon_emit(cs
, PKT3(PKT3_WAIT_REG_MEM
, 5, 0));
229 radeon_emit(cs
, WAIT_REG_MEM_EQUAL
| WAIT_REG_MEM_MEM_SPACE(1));
230 radeon_emit(cs
, avail_va
);
231 radeon_emit(cs
, avail_va
>> 32);
232 radeon_emit(cs
, 1); /* reference value */
233 radeon_emit(cs
, 0xffffffff); /* mask */
234 radeon_emit(cs
, 4); /* poll interval */
237 switch (pool
->type
) {
238 case VK_QUERY_TYPE_OCCLUSION
:
239 local_src_va
+= pool
->stride
- 16;
241 case VK_QUERY_TYPE_TIMESTAMP
:
242 radeon_emit(cs
, PKT3(PKT3_COPY_DATA
, 4, 0));
243 radeon_emit(cs
, COPY_DATA_SRC_SEL(COPY_DATA_MEM
) |
244 COPY_DATA_DST_SEL(COPY_DATA_MEM
) |
245 ((flags
& VK_QUERY_RESULT_64_BIT
) ? COPY_DATA_COUNT_SEL
: 0));
246 radeon_emit(cs
, local_src_va
);
247 radeon_emit(cs
, local_src_va
>> 32);
248 radeon_emit(cs
, dest_va
);
249 radeon_emit(cs
, dest_va
>> 32);
252 unreachable("trying to get results of unhandled query type");
255 /* The flag could be still changed while the data copy is busy and we
256 * then might have invalid data, but a ready flag. However, the availability
257 * writes happen on the ME too, so they should be synchronized. Might need to
258 * revisit this with multiple queues.
260 if (flags
& VK_QUERY_RESULT_WITH_AVAILABILITY_BIT
) {
261 uint64_t avail_va
= va
+ pool
->availability_offset
+ 4 * query
;
262 uint64_t avail_dest_va
= dest_va
;
263 if (pool
->type
!= VK_QUERY_TYPE_PIPELINE_STATISTICS
)
264 avail_dest_va
+= elem_size
;
268 radeon_emit(cs
, PKT3(PKT3_COPY_DATA
, 4, 0));
269 radeon_emit(cs
, COPY_DATA_SRC_SEL(COPY_DATA_MEM
) |
270 COPY_DATA_DST_SEL(COPY_DATA_MEM
));
271 radeon_emit(cs
, avail_va
);
272 radeon_emit(cs
, avail_va
>> 32);
273 radeon_emit(cs
, avail_dest_va
);
274 radeon_emit(cs
, avail_dest_va
>> 32);
277 assert(cs
->cdw
<= cdw_max
);
282 void radv_CmdResetQueryPool(
283 VkCommandBuffer commandBuffer
,
284 VkQueryPool queryPool
,
288 RADV_FROM_HANDLE(radv_cmd_buffer
, cmd_buffer
, commandBuffer
);
289 RADV_FROM_HANDLE(radv_query_pool
, pool
, queryPool
);
290 uint64_t va
= cmd_buffer
->device
->ws
->buffer_get_va(pool
->bo
);
292 cmd_buffer
->device
->ws
->cs_add_buffer(cmd_buffer
->cs
, pool
->bo
, 8);
294 si_cp_dma_clear_buffer(cmd_buffer
, va
+ firstQuery
* pool
->stride
,
295 queryCount
* pool
->stride
, 0);
296 si_cp_dma_clear_buffer(cmd_buffer
, va
+ pool
->availability_offset
+ firstQuery
* 4,
300 void radv_CmdBeginQuery(
301 VkCommandBuffer commandBuffer
,
302 VkQueryPool queryPool
,
304 VkQueryControlFlags flags
)
306 RADV_FROM_HANDLE(radv_cmd_buffer
, cmd_buffer
, commandBuffer
);
307 RADV_FROM_HANDLE(radv_query_pool
, pool
, queryPool
);
308 struct radeon_winsys_cs
*cs
= cmd_buffer
->cs
;
309 uint64_t va
= cmd_buffer
->device
->ws
->buffer_get_va(pool
->bo
);
310 va
+= pool
->stride
* query
;
312 cmd_buffer
->device
->ws
->cs_add_buffer(cs
, pool
->bo
, 8);
314 switch (pool
->type
) {
315 case VK_QUERY_TYPE_OCCLUSION
:
316 radeon_check_space(cmd_buffer
->device
->ws
, cs
, 7);
318 ++cmd_buffer
->state
.active_occlusion_queries
;
319 if (cmd_buffer
->state
.active_occlusion_queries
== 1)
320 radv_set_db_count_control(cmd_buffer
);
322 radeon_emit(cs
, PKT3(PKT3_EVENT_WRITE
, 2, 0));
323 radeon_emit(cs
, EVENT_TYPE(V_028A90_ZPASS_DONE
) | EVENT_INDEX(1));
325 radeon_emit(cs
, va
>> 32);
328 unreachable("beginning unhandled query type");
333 void radv_CmdEndQuery(
334 VkCommandBuffer commandBuffer
,
335 VkQueryPool queryPool
,
338 RADV_FROM_HANDLE(radv_cmd_buffer
, cmd_buffer
, commandBuffer
);
339 RADV_FROM_HANDLE(radv_query_pool
, pool
, queryPool
);
340 struct radeon_winsys_cs
*cs
= cmd_buffer
->cs
;
341 uint64_t va
= cmd_buffer
->device
->ws
->buffer_get_va(pool
->bo
);
342 uint64_t avail_va
= va
+ pool
->availability_offset
+ 4 * query
;
343 va
+= pool
->stride
* query
;
345 cmd_buffer
->device
->ws
->cs_add_buffer(cs
, pool
->bo
, 8);
347 switch (pool
->type
) {
348 case VK_QUERY_TYPE_OCCLUSION
:
349 radeon_check_space(cmd_buffer
->device
->ws
, cs
, 14);
351 cmd_buffer
->state
.active_occlusion_queries
--;
352 if (cmd_buffer
->state
.active_occlusion_queries
== 0)
353 radv_set_db_count_control(cmd_buffer
);
355 radeon_emit(cs
, PKT3(PKT3_EVENT_WRITE
, 2, 0));
356 radeon_emit(cs
, EVENT_TYPE(V_028A90_ZPASS_DONE
) | EVENT_INDEX(1));
357 radeon_emit(cs
, va
+ 8);
358 radeon_emit(cs
, (va
+ 8) >> 32);
360 radeon_emit(cs
, PKT3(PKT3_OCCLUSION_QUERY
, 3, 0));
362 radeon_emit(cs
, va
>> 32);
363 radeon_emit(cs
, va
+ pool
->stride
- 16);
364 radeon_emit(cs
, (va
+ pool
->stride
- 16) >> 32);
368 unreachable("ending unhandled query type");
371 radeon_check_space(cmd_buffer
->device
->ws
, cs
, 5);
373 radeon_emit(cs
, PKT3(PKT3_WRITE_DATA
, 3, 0));
374 radeon_emit(cs
, S_370_DST_SEL(V_370_MEMORY_SYNC
) |
375 S_370_WR_CONFIRM(1) |
376 S_370_ENGINE_SEL(V_370_ME
));
377 radeon_emit(cs
, avail_va
);
378 radeon_emit(cs
, avail_va
>> 32);
382 void radv_CmdWriteTimestamp(
383 VkCommandBuffer commandBuffer
,
384 VkPipelineStageFlagBits pipelineStage
,
385 VkQueryPool queryPool
,
388 RADV_FROM_HANDLE(radv_cmd_buffer
, cmd_buffer
, commandBuffer
);
389 RADV_FROM_HANDLE(radv_query_pool
, pool
, queryPool
);
390 struct radeon_winsys_cs
*cs
= cmd_buffer
->cs
;
391 uint64_t va
= cmd_buffer
->device
->ws
->buffer_get_va(pool
->bo
);
392 uint64_t avail_va
= va
+ pool
->availability_offset
+ 4 * query
;
393 uint64_t query_va
= va
+ pool
->stride
* query
;
395 cmd_buffer
->device
->ws
->cs_add_buffer(cs
, pool
->bo
, 5);
397 unsigned cdw_max
= radeon_check_space(cmd_buffer
->device
->ws
, cs
, 11);
399 radeon_emit(cs
, PKT3(PKT3_EVENT_WRITE_EOP
, 4, 0));
400 radeon_emit(cs
, EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS
) | EVENT_INDEX(5));
401 radeon_emit(cs
, query_va
);
402 radeon_emit(cs
, (3 << 29) | ((query_va
>> 32) & 0xFFFF));
406 radeon_emit(cs
, PKT3(PKT3_WRITE_DATA
, 3, 0));
407 radeon_emit(cs
, S_370_DST_SEL(V_370_MEMORY_SYNC
) |
408 S_370_WR_CONFIRM(1) |
409 S_370_ENGINE_SEL(V_370_ME
));
410 radeon_emit(cs
, avail_va
);
411 radeon_emit(cs
, avail_va
>> 32);
414 assert(cmd_buffer
->cs
->cdw
<= cdw_max
);