2 * Copyrigh 2016 Red Hat Inc.
4 * Copyright © 2015 Intel Corporation
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 * DEALINGS IN THE SOFTWARE.
26 #include "tu_private.h"
34 #include "registers/adreno_pm4.xml.h"
35 #include "registers/adreno_common.xml.h"
36 #include "registers/a6xx.xml.h"
38 #include "nir/nir_builder.h"
42 /* It seems like sample counts need to be copied over to 16-byte aligned
44 struct PACKED slot_value
{
49 struct PACKED occlusion_query_slot
{
50 struct slot_value available
; /* 0 when unavailable, 1 when available */
51 struct slot_value begin
;
52 struct slot_value end
;
53 struct slot_value result
;
56 /* Returns the IOVA of a given uint64_t field in a given slot of a query
58 #define query_iova(type, pool, query, field) \
59 pool->bo.iova + pool->stride * query + offsetof(type, field) + \
60 offsetof(struct slot_value, value)
62 #define occlusion_query_iova(pool, query, field) \
63 query_iova(struct occlusion_query_slot, pool, query, field)
66 tu_CreateQueryPool(VkDevice _device
,
67 const VkQueryPoolCreateInfo
*pCreateInfo
,
68 const VkAllocationCallbacks
*pAllocator
,
69 VkQueryPool
*pQueryPool
)
71 TU_FROM_HANDLE(tu_device
, device
, _device
);
72 assert(pCreateInfo
->sType
== VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO
);
73 assert(pCreateInfo
->queryCount
> 0);
76 switch (pCreateInfo
->queryType
) {
77 case VK_QUERY_TYPE_OCCLUSION
:
78 slot_size
= sizeof(struct occlusion_query_slot
);
80 case VK_QUERY_TYPE_PIPELINE_STATISTICS
:
81 case VK_QUERY_TYPE_TIMESTAMP
:
82 unreachable("Unimplemented query type");
84 assert(!"Invalid query type");
87 struct tu_query_pool
*pool
=
88 vk_alloc2(&device
->alloc
, pAllocator
, sizeof(*pool
), 8,
89 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
92 return vk_error(device
->instance
, VK_ERROR_OUT_OF_HOST_MEMORY
);
94 VkResult result
= tu_bo_init_new(device
, &pool
->bo
,
95 pCreateInfo
->queryCount
* slot_size
);
96 if (result
!= VK_SUCCESS
) {
97 vk_free2(&device
->alloc
, pAllocator
, pool
);
101 result
= tu_bo_map(device
, &pool
->bo
);
102 if (result
!= VK_SUCCESS
) {
103 tu_bo_finish(device
, &pool
->bo
);
104 vk_free2(&device
->alloc
, pAllocator
, pool
);
108 /* Initialize all query statuses to unavailable */
109 memset(pool
->bo
.map
, 0, pool
->bo
.size
);
111 pool
->type
= pCreateInfo
->queryType
;
112 pool
->stride
= slot_size
;
113 pool
->size
= pCreateInfo
->queryCount
;
114 pool
->pipeline_statistics
= pCreateInfo
->pipelineStatistics
;
115 *pQueryPool
= tu_query_pool_to_handle(pool
);
121 tu_DestroyQueryPool(VkDevice _device
,
123 const VkAllocationCallbacks
*pAllocator
)
125 TU_FROM_HANDLE(tu_device
, device
, _device
);
126 TU_FROM_HANDLE(tu_query_pool
, pool
, _pool
);
131 tu_bo_finish(device
, &pool
->bo
);
132 vk_free2(&device
->alloc
, pAllocator
, pool
);
136 tu_GetQueryPoolResults(VkDevice _device
,
137 VkQueryPool queryPool
,
143 VkQueryResultFlags flags
)
149 tu_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer
,
150 VkQueryPool queryPool
,
154 VkDeviceSize dstOffset
,
156 VkQueryResultFlags flags
)
161 tu_CmdResetQueryPool(VkCommandBuffer commandBuffer
,
162 VkQueryPool queryPool
,
169 emit_begin_occlusion_query(struct tu_cmd_buffer
*cmdbuf
,
170 struct tu_query_pool
*pool
,
173 /* From the Vulkan 1.1.130 spec:
175 * A query must begin and end inside the same subpass of a render pass
176 * instance, or must both begin and end outside of a render pass
179 * Unlike on an immediate-mode renderer, Turnip renders all tiles on
180 * vkCmdEndRenderPass, not individually on each vkCmdDraw*. As such, if a
181 * query begins/ends inside the same subpass of a render pass, we need to
182 * record the packets on the secondary draw command stream. cmdbuf->draw_cs
183 * is then run on every tile during render, so we just need to accumulate
184 * sample counts in slot->result to compute the query result.
186 struct tu_cs
*cs
= cmdbuf
->state
.pass
? &cmdbuf
->draw_cs
: &cmdbuf
->cs
;
188 uint64_t begin_iova
= occlusion_query_iova(pool
, query
, begin
);
190 tu_cs_reserve_space(cmdbuf
->device
, cs
, 7);
192 A6XX_RB_SAMPLE_COUNT_CONTROL(.copy
= true));
195 A6XX_RB_SAMPLE_COUNT_ADDR_LO(begin_iova
));
197 tu_cs_emit_pkt7(cs
, CP_EVENT_WRITE
, 1);
198 tu_cs_emit(cs
, ZPASS_DONE
);
202 tu_CmdBeginQuery(VkCommandBuffer commandBuffer
,
203 VkQueryPool queryPool
,
205 VkQueryControlFlags flags
)
207 TU_FROM_HANDLE(tu_cmd_buffer
, cmdbuf
, commandBuffer
);
208 TU_FROM_HANDLE(tu_query_pool
, pool
, queryPool
);
209 assert(query
< pool
->size
);
211 switch (pool
->type
) {
212 case VK_QUERY_TYPE_OCCLUSION
:
213 /* In freedreno, there is no implementation difference between
214 * GL_SAMPLES_PASSED and GL_ANY_SAMPLES_PASSED, so we can similarly
215 * ignore the VK_QUERY_CONTROL_PRECISE_BIT flag here.
217 emit_begin_occlusion_query(cmdbuf
, pool
, query
);
219 case VK_QUERY_TYPE_PIPELINE_STATISTICS
:
220 case VK_QUERY_TYPE_TIMESTAMP
:
221 unreachable("Unimplemented query type");
223 assert(!"Invalid query type");
226 tu_bo_list_add(&cmdbuf
->bo_list
, &pool
->bo
, MSM_SUBMIT_BO_WRITE
);
230 emit_end_occlusion_query(struct tu_cmd_buffer
*cmdbuf
,
231 struct tu_query_pool
*pool
,
234 /* Ending an occlusion query happens in a few steps:
235 * 1) Set the slot->end to UINT64_MAX.
236 * 2) Set up the SAMPLE_COUNT registers and trigger a CP_EVENT_WRITE to
237 * write the current sample count value into slot->end.
238 * 3) Since (2) is asynchronous, wait until slot->end is not equal to
239 * UINT64_MAX before continuing via CP_WAIT_REG_MEM.
240 * 4) Accumulate the results of the query (slot->end - slot->begin) into
242 * 5) If vkCmdEndQuery is *not* called from within the scope of a render
243 * pass, set the slot's available bit since the query is now done.
244 * 6) If vkCmdEndQuery *is* called from within the scope of a render
245 * pass, we cannot mark as available yet since the commands in
246 * draw_cs are not run until vkCmdEndRenderPass.
248 struct tu_cs
*cs
= cmdbuf
->state
.pass
? &cmdbuf
->draw_cs
: &cmdbuf
->cs
;
250 uint64_t begin_iova
= occlusion_query_iova(pool
, query
, begin
);
251 uint64_t end_iova
= occlusion_query_iova(pool
, query
, end
);
252 uint64_t result_iova
= occlusion_query_iova(pool
, query
, result
);
253 tu_cs_reserve_space(cmdbuf
->device
, cs
, 31);
254 tu_cs_emit_pkt7(cs
, CP_MEM_WRITE
, 4);
255 tu_cs_emit_qw(cs
, end_iova
);
256 tu_cs_emit_qw(cs
, 0xffffffffffffffffull
);
258 tu_cs_emit_pkt7(cs
, CP_WAIT_MEM_WRITES
, 0);
261 A6XX_RB_SAMPLE_COUNT_CONTROL(.copy
= true));
264 A6XX_RB_SAMPLE_COUNT_ADDR_LO(end_iova
));
266 tu_cs_emit_pkt7(cs
, CP_EVENT_WRITE
, 1);
267 tu_cs_emit(cs
, ZPASS_DONE
);
269 tu_cs_emit_pkt7(cs
, CP_WAIT_REG_MEM
, 6);
270 tu_cs_emit(cs
, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_NE
) |
271 CP_WAIT_REG_MEM_0_POLL_MEMORY
);
272 tu_cs_emit_qw(cs
, end_iova
);
273 tu_cs_emit(cs
, CP_WAIT_REG_MEM_3_REF(0xffffffff));
274 tu_cs_emit(cs
, CP_WAIT_REG_MEM_4_MASK(~0));
275 tu_cs_emit(cs
, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(16));
277 /* result (dst) = result (srcA) + end (srcB) - begin (srcC) */
278 tu_cs_emit_pkt7(cs
, CP_MEM_TO_MEM
, 9);
279 tu_cs_emit(cs
, CP_MEM_TO_MEM_0_DOUBLE
| CP_MEM_TO_MEM_0_NEG_C
);
280 tu_cs_emit_qw(cs
, result_iova
);
281 tu_cs_emit_qw(cs
, result_iova
);
282 tu_cs_emit_qw(cs
, end_iova
);
283 tu_cs_emit_qw(cs
, begin_iova
);
285 tu_cs_emit_pkt7(cs
, CP_WAIT_MEM_WRITES
, 0);
287 if (!cmdbuf
->state
.pass
) {
288 tu_cs_reserve_space(cmdbuf
->device
, cs
, 5);
289 tu_cs_emit_pkt7(cs
, CP_MEM_WRITE
, 4);
290 tu_cs_emit_qw(cs
, occlusion_query_iova(pool
, query
, available
));
291 tu_cs_emit_qw(cs
, 0x1);
296 tu_CmdEndQuery(VkCommandBuffer commandBuffer
,
297 VkQueryPool queryPool
,
300 TU_FROM_HANDLE(tu_cmd_buffer
, cmdbuf
, commandBuffer
);
301 TU_FROM_HANDLE(tu_query_pool
, pool
, queryPool
);
302 assert(query
< pool
->size
);
304 switch (pool
->type
) {
305 case VK_QUERY_TYPE_OCCLUSION
:
306 emit_end_occlusion_query(cmdbuf
, pool
, query
);
308 case VK_QUERY_TYPE_PIPELINE_STATISTICS
:
309 case VK_QUERY_TYPE_TIMESTAMP
:
310 unreachable("Unimplemented query type");
312 assert(!"Invalid query type");
315 tu_bo_list_add(&cmdbuf
->bo_list
, &pool
->bo
, MSM_SUBMIT_BO_WRITE
);
319 tu_CmdWriteTimestamp(VkCommandBuffer commandBuffer
,
320 VkPipelineStageFlagBits pipelineStage
,
321 VkQueryPool queryPool
,