2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 #include "anv_private.h"
32 #include "genxml/gen_macros.h"
33 #include "genxml/genX_pack.h"
35 VkResult
genX(CreateQueryPool
)(
37 const VkQueryPoolCreateInfo
* pCreateInfo
,
38 const VkAllocationCallbacks
* pAllocator
,
39 VkQueryPool
* pQueryPool
)
41 ANV_FROM_HANDLE(anv_device
, device
, _device
);
42 struct anv_query_pool
*pool
;
45 assert(pCreateInfo
->sType
== VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO
);
47 /* Query pool slots are made up of some number of 64-bit values packed
48 * tightly together. The first 64-bit value is always the "available" bit
49 * which is 0 when the query is unavailable and 1 when it is available.
50 * The 64-bit values that follow are determined by the type of query.
52 uint32_t uint64s_per_slot
= 1;
54 switch (pCreateInfo
->queryType
) {
55 case VK_QUERY_TYPE_OCCLUSION
:
56 /* Occlusion queries have two values: begin and end. */
57 uint64s_per_slot
+= 2;
59 case VK_QUERY_TYPE_TIMESTAMP
:
60 /* Timestamps just have the one timestamp value */
61 uint64s_per_slot
+= 1;
63 case VK_QUERY_TYPE_PIPELINE_STATISTICS
:
64 return VK_ERROR_INCOMPATIBLE_DRIVER
;
66 assert(!"Invalid query type");
69 pool
= vk_alloc2(&device
->alloc
, pAllocator
, sizeof(*pool
), 8,
70 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
72 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
74 pool
->type
= pCreateInfo
->queryType
;
75 pool
->stride
= uint64s_per_slot
* sizeof(uint64_t);
76 pool
->slots
= pCreateInfo
->queryCount
;
78 uint64_t size
= pool
->slots
* pool
->stride
;
79 result
= anv_bo_init_new(&pool
->bo
, device
, size
);
80 if (result
!= VK_SUCCESS
)
83 pool
->bo
.map
= anv_gem_mmap(device
, pool
->bo
.gem_handle
, 0, size
, 0);
85 *pQueryPool
= anv_query_pool_to_handle(pool
);
90 vk_free2(&device
->alloc
, pAllocator
, pool
);
95 void genX(DestroyQueryPool
)(
98 const VkAllocationCallbacks
* pAllocator
)
100 ANV_FROM_HANDLE(anv_device
, device
, _device
);
101 ANV_FROM_HANDLE(anv_query_pool
, pool
, _pool
);
106 anv_gem_munmap(pool
->bo
.map
, pool
->bo
.size
);
107 anv_gem_close(device
, pool
->bo
.gem_handle
);
108 vk_free2(&device
->alloc
, pAllocator
, pool
);
112 cpu_write_query_result(void *dst_slot
, VkQueryResultFlags flags
,
113 uint32_t value_index
, uint64_t result
)
115 if (flags
& VK_QUERY_RESULT_64_BIT
) {
116 uint64_t *dst64
= dst_slot
;
117 dst64
[value_index
] = result
;
119 uint32_t *dst32
= dst_slot
;
120 dst32
[value_index
] = result
;
124 VkResult
genX(GetQueryPoolResults
)(
126 VkQueryPool queryPool
,
132 VkQueryResultFlags flags
)
134 ANV_FROM_HANDLE(anv_device
, device
, _device
);
135 ANV_FROM_HANDLE(anv_query_pool
, pool
, queryPool
);
136 int64_t timeout
= INT64_MAX
;
139 assert(pool
->type
== VK_QUERY_TYPE_OCCLUSION
||
140 pool
->type
== VK_QUERY_TYPE_TIMESTAMP
);
145 if (flags
& VK_QUERY_RESULT_WAIT_BIT
) {
146 ret
= anv_gem_wait(device
, pool
->bo
.gem_handle
, &timeout
);
148 /* We don't know the real error. */
149 return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY
,
150 "gem_wait failed %m");
154 void *data_end
= pData
+ dataSize
;
156 if (!device
->info
.has_llc
) {
157 uint64_t offset
= firstQuery
* pool
->stride
;
158 uint64_t size
= queryCount
* pool
->stride
;
159 anv_invalidate_range(pool
->bo
.map
+ offset
,
160 MIN2(size
, pool
->bo
.size
- offset
));
163 VkResult status
= VK_SUCCESS
;
164 for (uint32_t i
= 0; i
< queryCount
; i
++) {
165 uint64_t *slot
= pool
->bo
.map
+ (firstQuery
+ i
) * pool
->stride
;
167 /* Availability is always at the start of the slot */
168 bool available
= slot
[0];
170 /* From the Vulkan 1.0.42 spec:
172 * "If VK_QUERY_RESULT_WAIT_BIT and VK_QUERY_RESULT_PARTIAL_BIT are
173 * both not set then no result values are written to pData for
174 * queries that are in the unavailable state at the time of the call,
175 * and vkGetQueryPoolResults returns VK_NOT_READY. However,
176 * availability state is still written to pData for those queries if
177 * VK_QUERY_RESULT_WITH_AVAILABILITY_BIT is set."
179 bool write_results
= available
|| (flags
& VK_QUERY_RESULT_PARTIAL_BIT
);
182 switch (pool
->type
) {
183 case VK_QUERY_TYPE_OCCLUSION
: {
184 cpu_write_query_result(pData
, flags
, 0, slot
[2] - slot
[1]);
187 case VK_QUERY_TYPE_PIPELINE_STATISTICS
:
188 unreachable("pipeline stats not supported");
189 case VK_QUERY_TYPE_TIMESTAMP
: {
190 cpu_write_query_result(pData
, flags
, 0, slot
[1]);
194 unreachable("invalid pool type");
197 status
= VK_NOT_READY
;
200 if (flags
& VK_QUERY_RESULT_WITH_AVAILABILITY_BIT
)
201 cpu_write_query_result(pData
, flags
, 1, available
);
204 if (pData
>= data_end
)
212 emit_ps_depth_count(struct anv_cmd_buffer
*cmd_buffer
,
213 struct anv_bo
*bo
, uint32_t offset
)
215 anv_batch_emit(&cmd_buffer
->batch
, GENX(PIPE_CONTROL
), pc
) {
216 pc
.DestinationAddressType
= DAT_PPGTT
;
217 pc
.PostSyncOperation
= WritePSDepthCount
;
218 pc
.DepthStallEnable
= true;
219 pc
.Address
= (struct anv_address
) { bo
, offset
};
221 if (GEN_GEN
== 9 && cmd_buffer
->device
->info
.gt
== 4)
222 pc
.CommandStreamerStallEnable
= true;
227 emit_query_availability(struct anv_cmd_buffer
*cmd_buffer
,
228 struct anv_bo
*bo
, uint32_t offset
)
230 anv_batch_emit(&cmd_buffer
->batch
, GENX(PIPE_CONTROL
), pc
) {
231 pc
.DestinationAddressType
= DAT_PPGTT
;
232 pc
.PostSyncOperation
= WriteImmediateData
;
233 pc
.Address
= (struct anv_address
) { bo
, offset
};
234 pc
.ImmediateData
= 1;
238 void genX(CmdResetQueryPool
)(
239 VkCommandBuffer commandBuffer
,
240 VkQueryPool queryPool
,
244 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
245 ANV_FROM_HANDLE(anv_query_pool
, pool
, queryPool
);
247 for (uint32_t i
= 0; i
< queryCount
; i
++) {
248 anv_batch_emit(&cmd_buffer
->batch
, GENX(MI_STORE_DATA_IMM
), sdm
) {
249 sdm
.Address
= (struct anv_address
) {
251 .offset
= (firstQuery
+ i
) * pool
->stride
,
259 void genX(CmdBeginQuery
)(
260 VkCommandBuffer commandBuffer
,
261 VkQueryPool queryPool
,
263 VkQueryControlFlags flags
)
265 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
266 ANV_FROM_HANDLE(anv_query_pool
, pool
, queryPool
);
268 /* Workaround: When meta uses the pipeline with the VS disabled, it seems
269 * that the pipelining of the depth write breaks. What we see is that
270 * samples from the render pass clear leaks into the first query
271 * immediately after the clear. Doing a pipecontrol with a post-sync
272 * operation and DepthStallEnable seems to work around the issue.
274 if (cmd_buffer
->state
.need_query_wa
) {
275 cmd_buffer
->state
.need_query_wa
= false;
276 anv_batch_emit(&cmd_buffer
->batch
, GENX(PIPE_CONTROL
), pc
) {
277 pc
.DepthCacheFlushEnable
= true;
278 pc
.DepthStallEnable
= true;
282 switch (pool
->type
) {
283 case VK_QUERY_TYPE_OCCLUSION
:
284 emit_ps_depth_count(cmd_buffer
, &pool
->bo
, query
* pool
->stride
+ 8);
287 case VK_QUERY_TYPE_PIPELINE_STATISTICS
:
293 void genX(CmdEndQuery
)(
294 VkCommandBuffer commandBuffer
,
295 VkQueryPool queryPool
,
298 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
299 ANV_FROM_HANDLE(anv_query_pool
, pool
, queryPool
);
301 switch (pool
->type
) {
302 case VK_QUERY_TYPE_OCCLUSION
:
303 emit_ps_depth_count(cmd_buffer
, &pool
->bo
, query
* pool
->stride
+ 16);
304 emit_query_availability(cmd_buffer
, &pool
->bo
, query
* pool
->stride
);
307 case VK_QUERY_TYPE_PIPELINE_STATISTICS
:
313 #define TIMESTAMP 0x2358
315 void genX(CmdWriteTimestamp
)(
316 VkCommandBuffer commandBuffer
,
317 VkPipelineStageFlagBits pipelineStage
,
318 VkQueryPool queryPool
,
321 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
322 ANV_FROM_HANDLE(anv_query_pool
, pool
, queryPool
);
323 uint32_t offset
= query
* pool
->stride
;
325 assert(pool
->type
== VK_QUERY_TYPE_TIMESTAMP
);
327 switch (pipelineStage
) {
328 case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT
:
329 anv_batch_emit(&cmd_buffer
->batch
, GENX(MI_STORE_REGISTER_MEM
), srm
) {
330 srm
.RegisterAddress
= TIMESTAMP
;
331 srm
.MemoryAddress
= (struct anv_address
) { &pool
->bo
, offset
+ 8 };
333 anv_batch_emit(&cmd_buffer
->batch
, GENX(MI_STORE_REGISTER_MEM
), srm
) {
334 srm
.RegisterAddress
= TIMESTAMP
+ 4;
335 srm
.MemoryAddress
= (struct anv_address
) { &pool
->bo
, offset
+ 12 };
340 /* Everything else is bottom-of-pipe */
341 anv_batch_emit(&cmd_buffer
->batch
, GENX(PIPE_CONTROL
), pc
) {
342 pc
.DestinationAddressType
= DAT_PPGTT
;
343 pc
.PostSyncOperation
= WriteTimestamp
;
344 pc
.Address
= (struct anv_address
) { &pool
->bo
, offset
+ 8 };
346 if (GEN_GEN
== 9 && cmd_buffer
->device
->info
.gt
== 4)
347 pc
.CommandStreamerStallEnable
= true;
352 emit_query_availability(cmd_buffer
, &pool
->bo
, offset
);
355 #if GEN_GEN > 7 || GEN_IS_HASWELL
357 #define alu_opcode(v) __gen_uint((v), 20, 31)
358 #define alu_operand1(v) __gen_uint((v), 10, 19)
359 #define alu_operand2(v) __gen_uint((v), 0, 9)
360 #define alu(opcode, operand1, operand2) \
361 alu_opcode(opcode) | alu_operand1(operand1) | alu_operand2(operand2)
363 #define OPCODE_NOOP 0x000
364 #define OPCODE_LOAD 0x080
365 #define OPCODE_LOADINV 0x480
366 #define OPCODE_LOAD0 0x081
367 #define OPCODE_LOAD1 0x481
368 #define OPCODE_ADD 0x100
369 #define OPCODE_SUB 0x101
370 #define OPCODE_AND 0x102
371 #define OPCODE_OR 0x103
372 #define OPCODE_XOR 0x104
373 #define OPCODE_STORE 0x180
374 #define OPCODE_STOREINV 0x580
376 #define OPERAND_R0 0x00
377 #define OPERAND_R1 0x01
378 #define OPERAND_R2 0x02
379 #define OPERAND_R3 0x03
380 #define OPERAND_R4 0x04
381 #define OPERAND_SRCA 0x20
382 #define OPERAND_SRCB 0x21
383 #define OPERAND_ACCU 0x31
384 #define OPERAND_ZF 0x32
385 #define OPERAND_CF 0x33
387 #define CS_GPR(n) (0x2600 + (n) * 8)
390 emit_load_alu_reg_u64(struct anv_batch
*batch
, uint32_t reg
,
391 struct anv_bo
*bo
, uint32_t offset
)
393 anv_batch_emit(batch
, GENX(MI_LOAD_REGISTER_MEM
), lrm
) {
394 lrm
.RegisterAddress
= reg
,
395 lrm
.MemoryAddress
= (struct anv_address
) { bo
, offset
};
397 anv_batch_emit(batch
, GENX(MI_LOAD_REGISTER_MEM
), lrm
) {
398 lrm
.RegisterAddress
= reg
+ 4;
399 lrm
.MemoryAddress
= (struct anv_address
) { bo
, offset
+ 4 };
404 store_query_result(struct anv_batch
*batch
, uint32_t reg
,
405 struct anv_bo
*bo
, uint32_t offset
, VkQueryResultFlags flags
)
407 anv_batch_emit(batch
, GENX(MI_STORE_REGISTER_MEM
), srm
) {
408 srm
.RegisterAddress
= reg
;
409 srm
.MemoryAddress
= (struct anv_address
) { bo
, offset
};
412 if (flags
& VK_QUERY_RESULT_64_BIT
) {
413 anv_batch_emit(batch
, GENX(MI_STORE_REGISTER_MEM
), srm
) {
414 srm
.RegisterAddress
= reg
+ 4;
415 srm
.MemoryAddress
= (struct anv_address
) { bo
, offset
+ 4 };
420 void genX(CmdCopyQueryPoolResults
)(
421 VkCommandBuffer commandBuffer
,
422 VkQueryPool queryPool
,
426 VkDeviceSize destOffset
,
427 VkDeviceSize destStride
,
428 VkQueryResultFlags flags
)
430 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
431 ANV_FROM_HANDLE(anv_query_pool
, pool
, queryPool
);
432 ANV_FROM_HANDLE(anv_buffer
, buffer
, destBuffer
);
433 uint32_t slot_offset
, dst_offset
;
435 if (flags
& VK_QUERY_RESULT_WAIT_BIT
) {
436 anv_batch_emit(&cmd_buffer
->batch
, GENX(PIPE_CONTROL
), pc
) {
437 pc
.CommandStreamerStallEnable
= true;
438 pc
.StallAtPixelScoreboard
= true;
442 dst_offset
= buffer
->offset
+ destOffset
;
443 for (uint32_t i
= 0; i
< queryCount
; i
++) {
445 slot_offset
= (firstQuery
+ i
) * pool
->stride
;
446 switch (pool
->type
) {
447 case VK_QUERY_TYPE_OCCLUSION
:
448 emit_load_alu_reg_u64(&cmd_buffer
->batch
,
449 CS_GPR(0), &pool
->bo
, slot_offset
+ 8);
450 emit_load_alu_reg_u64(&cmd_buffer
->batch
,
451 CS_GPR(1), &pool
->bo
, slot_offset
+ 16);
453 /* FIXME: We need to clamp the result for 32 bit. */
455 uint32_t *dw
= anv_batch_emitn(&cmd_buffer
->batch
, 5, GENX(MI_MATH
));
456 dw
[1] = alu(OPCODE_LOAD
, OPERAND_SRCA
, OPERAND_R1
);
457 dw
[2] = alu(OPCODE_LOAD
, OPERAND_SRCB
, OPERAND_R0
);
458 dw
[3] = alu(OPCODE_SUB
, 0, 0);
459 dw
[4] = alu(OPCODE_STORE
, OPERAND_R2
, OPERAND_ACCU
);
462 case VK_QUERY_TYPE_TIMESTAMP
:
463 emit_load_alu_reg_u64(&cmd_buffer
->batch
,
464 CS_GPR(2), &pool
->bo
, slot_offset
+ 8);
468 unreachable("unhandled query type");
471 store_query_result(&cmd_buffer
->batch
,
472 CS_GPR(2), buffer
->bo
, dst_offset
, flags
);
474 if (flags
& VK_QUERY_RESULT_WITH_AVAILABILITY_BIT
) {
475 emit_load_alu_reg_u64(&cmd_buffer
->batch
, CS_GPR(0),
476 &pool
->bo
, slot_offset
);
477 if (flags
& VK_QUERY_RESULT_64_BIT
)
478 store_query_result(&cmd_buffer
->batch
,
479 CS_GPR(0), buffer
->bo
, dst_offset
+ 8, flags
);
481 store_query_result(&cmd_buffer
->batch
,
482 CS_GPR(0), buffer
->bo
, dst_offset
+ 4, flags
);
485 dst_offset
+= destStride
;
490 void genX(CmdCopyQueryPoolResults
)(
491 VkCommandBuffer commandBuffer
,
492 VkQueryPool queryPool
,
496 VkDeviceSize destOffset
,
497 VkDeviceSize destStride
,
498 VkQueryResultFlags flags
)
500 anv_finishme("Queries not yet supported on Ivy Bridge");