2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 #include "anv_private.h"
32 #include "genxml/gen_macros.h"
33 #include "genxml/genX_pack.h"
35 VkResult
genX(CreateQueryPool
)(
37 const VkQueryPoolCreateInfo
* pCreateInfo
,
38 const VkAllocationCallbacks
* pAllocator
,
39 VkQueryPool
* pQueryPool
)
41 ANV_FROM_HANDLE(anv_device
, device
, _device
);
42 struct anv_query_pool
*pool
;
47 assert(pCreateInfo
->sType
== VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO
);
49 switch (pCreateInfo
->queryType
) {
50 case VK_QUERY_TYPE_OCCLUSION
:
51 case VK_QUERY_TYPE_TIMESTAMP
:
53 case VK_QUERY_TYPE_PIPELINE_STATISTICS
:
54 return VK_ERROR_INCOMPATIBLE_DRIVER
;
56 assert(!"Invalid query type");
59 slot_size
= sizeof(struct anv_query_pool_slot
);
60 pool
= vk_alloc2(&device
->alloc
, pAllocator
, sizeof(*pool
), 8,
61 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
63 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
65 pool
->type
= pCreateInfo
->queryType
;
66 pool
->slots
= pCreateInfo
->queryCount
;
68 size
= pCreateInfo
->queryCount
* slot_size
;
69 result
= anv_bo_init_new(&pool
->bo
, device
, size
);
70 if (result
!= VK_SUCCESS
)
73 pool
->bo
.map
= anv_gem_mmap(device
, pool
->bo
.gem_handle
, 0, size
, 0);
75 *pQueryPool
= anv_query_pool_to_handle(pool
);
80 vk_free2(&device
->alloc
, pAllocator
, pool
);
85 void genX(DestroyQueryPool
)(
88 const VkAllocationCallbacks
* pAllocator
)
90 ANV_FROM_HANDLE(anv_device
, device
, _device
);
91 ANV_FROM_HANDLE(anv_query_pool
, pool
, _pool
);
96 anv_gem_munmap(pool
->bo
.map
, pool
->bo
.size
);
97 anv_gem_close(device
, pool
->bo
.gem_handle
);
98 vk_free2(&device
->alloc
, pAllocator
, pool
);
101 VkResult
genX(GetQueryPoolResults
)(
103 VkQueryPool queryPool
,
109 VkQueryResultFlags flags
)
111 ANV_FROM_HANDLE(anv_device
, device
, _device
);
112 ANV_FROM_HANDLE(anv_query_pool
, pool
, queryPool
);
113 int64_t timeout
= INT64_MAX
;
117 assert(pool
->type
== VK_QUERY_TYPE_OCCLUSION
||
118 pool
->type
== VK_QUERY_TYPE_TIMESTAMP
);
123 if (flags
& VK_QUERY_RESULT_WAIT_BIT
) {
124 ret
= anv_gem_wait(device
, pool
->bo
.gem_handle
, &timeout
);
126 /* We don't know the real error. */
127 return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY
,
128 "gem_wait failed %m");
132 void *data_end
= pData
+ dataSize
;
133 struct anv_query_pool_slot
*slot
= pool
->bo
.map
;
135 if (!device
->info
.has_llc
) {
136 uint64_t offset
= firstQuery
* sizeof(*slot
);
137 uint64_t size
= queryCount
* sizeof(*slot
);
138 anv_invalidate_range(pool
->bo
.map
+ offset
,
139 MIN2(size
, pool
->bo
.size
- offset
));
142 VkResult status
= VK_SUCCESS
;
143 for (uint32_t i
= 0; i
< queryCount
; i
++) {
144 bool available
= slot
[firstQuery
+ i
].available
;
146 /* From the Vulkan 1.0.42 spec:
148 * "If VK_QUERY_RESULT_WAIT_BIT and VK_QUERY_RESULT_PARTIAL_BIT are
149 * both not set then no result values are written to pData for
150 * queries that are in the unavailable state at the time of the call,
151 * and vkGetQueryPoolResults returns VK_NOT_READY. However,
152 * availability state is still written to pData for those queries if
153 * VK_QUERY_RESULT_WITH_AVAILABILITY_BIT is set."
155 bool write_results
= available
|| (flags
& VK_QUERY_RESULT_PARTIAL_BIT
);
158 switch (pool
->type
) {
159 case VK_QUERY_TYPE_OCCLUSION
: {
160 result
= slot
[firstQuery
+ i
].end
- slot
[firstQuery
+ i
].begin
;
163 case VK_QUERY_TYPE_PIPELINE_STATISTICS
:
164 unreachable("pipeline stats not supported");
165 case VK_QUERY_TYPE_TIMESTAMP
: {
166 result
= slot
[firstQuery
+ i
].begin
;
170 unreachable("invalid pool type");
173 status
= VK_NOT_READY
;
176 if (flags
& VK_QUERY_RESULT_64_BIT
) {
177 uint64_t *dst
= pData
;
180 if (flags
& VK_QUERY_RESULT_WITH_AVAILABILITY_BIT
)
181 dst
[1] = slot
[firstQuery
+ i
].available
;
183 uint32_t *dst
= pData
;
186 if (flags
& VK_QUERY_RESULT_WITH_AVAILABILITY_BIT
)
187 dst
[1] = slot
[firstQuery
+ i
].available
;
191 if (pData
>= data_end
)
199 emit_ps_depth_count(struct anv_cmd_buffer
*cmd_buffer
,
200 struct anv_bo
*bo
, uint32_t offset
)
202 anv_batch_emit(&cmd_buffer
->batch
, GENX(PIPE_CONTROL
), pc
) {
203 pc
.DestinationAddressType
= DAT_PPGTT
;
204 pc
.PostSyncOperation
= WritePSDepthCount
;
205 pc
.DepthStallEnable
= true;
206 pc
.Address
= (struct anv_address
) { bo
, offset
};
208 if (GEN_GEN
== 9 && cmd_buffer
->device
->info
.gt
== 4)
209 pc
.CommandStreamerStallEnable
= true;
214 emit_query_availability(struct anv_cmd_buffer
*cmd_buffer
,
215 struct anv_bo
*bo
, uint32_t offset
)
217 anv_batch_emit(&cmd_buffer
->batch
, GENX(PIPE_CONTROL
), pc
) {
218 pc
.DestinationAddressType
= DAT_PPGTT
;
219 pc
.PostSyncOperation
= WriteImmediateData
;
220 pc
.Address
= (struct anv_address
) { bo
, offset
};
221 pc
.ImmediateData
= 1;
225 void genX(CmdResetQueryPool
)(
226 VkCommandBuffer commandBuffer
,
227 VkQueryPool queryPool
,
231 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
232 ANV_FROM_HANDLE(anv_query_pool
, pool
, queryPool
);
234 for (uint32_t i
= 0; i
< queryCount
; i
++) {
235 anv_batch_emit(&cmd_buffer
->batch
, GENX(MI_STORE_DATA_IMM
), sdm
) {
236 sdm
.Address
= (struct anv_address
) {
238 .offset
= (firstQuery
+ i
) * sizeof(struct anv_query_pool_slot
),
246 void genX(CmdBeginQuery
)(
247 VkCommandBuffer commandBuffer
,
248 VkQueryPool queryPool
,
250 VkQueryControlFlags flags
)
252 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
253 ANV_FROM_HANDLE(anv_query_pool
, pool
, queryPool
);
255 /* Workaround: When meta uses the pipeline with the VS disabled, it seems
256 * that the pipelining of the depth write breaks. What we see is that
257 * samples from the render pass clear leaks into the first query
258 * immediately after the clear. Doing a pipecontrol with a post-sync
259 * operation and DepthStallEnable seems to work around the issue.
261 if (cmd_buffer
->state
.need_query_wa
) {
262 cmd_buffer
->state
.need_query_wa
= false;
263 anv_batch_emit(&cmd_buffer
->batch
, GENX(PIPE_CONTROL
), pc
) {
264 pc
.DepthCacheFlushEnable
= true;
265 pc
.DepthStallEnable
= true;
269 switch (pool
->type
) {
270 case VK_QUERY_TYPE_OCCLUSION
:
271 emit_ps_depth_count(cmd_buffer
, &pool
->bo
,
272 query
* sizeof(struct anv_query_pool_slot
) + 8);
275 case VK_QUERY_TYPE_PIPELINE_STATISTICS
:
281 void genX(CmdEndQuery
)(
282 VkCommandBuffer commandBuffer
,
283 VkQueryPool queryPool
,
286 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
287 ANV_FROM_HANDLE(anv_query_pool
, pool
, queryPool
);
289 switch (pool
->type
) {
290 case VK_QUERY_TYPE_OCCLUSION
:
291 emit_ps_depth_count(cmd_buffer
, &pool
->bo
,
292 query
* sizeof(struct anv_query_pool_slot
) + 16);
294 emit_query_availability(cmd_buffer
, &pool
->bo
,
295 query
* sizeof(struct anv_query_pool_slot
));
298 case VK_QUERY_TYPE_PIPELINE_STATISTICS
:
304 #define TIMESTAMP 0x2358
306 void genX(CmdWriteTimestamp
)(
307 VkCommandBuffer commandBuffer
,
308 VkPipelineStageFlagBits pipelineStage
,
309 VkQueryPool queryPool
,
312 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
313 ANV_FROM_HANDLE(anv_query_pool
, pool
, queryPool
);
314 uint32_t offset
= query
* sizeof(struct anv_query_pool_slot
);
316 assert(pool
->type
== VK_QUERY_TYPE_TIMESTAMP
);
318 switch (pipelineStage
) {
319 case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT
:
320 anv_batch_emit(&cmd_buffer
->batch
, GENX(MI_STORE_REGISTER_MEM
), srm
) {
321 srm
.RegisterAddress
= TIMESTAMP
;
322 srm
.MemoryAddress
= (struct anv_address
) { &pool
->bo
, offset
+ 8 };
324 anv_batch_emit(&cmd_buffer
->batch
, GENX(MI_STORE_REGISTER_MEM
), srm
) {
325 srm
.RegisterAddress
= TIMESTAMP
+ 4;
326 srm
.MemoryAddress
= (struct anv_address
) { &pool
->bo
, offset
+ 12 };
331 /* Everything else is bottom-of-pipe */
332 anv_batch_emit(&cmd_buffer
->batch
, GENX(PIPE_CONTROL
), pc
) {
333 pc
.DestinationAddressType
= DAT_PPGTT
;
334 pc
.PostSyncOperation
= WriteTimestamp
;
335 pc
.Address
= (struct anv_address
) { &pool
->bo
, offset
+ 8 };
337 if (GEN_GEN
== 9 && cmd_buffer
->device
->info
.gt
== 4)
338 pc
.CommandStreamerStallEnable
= true;
343 emit_query_availability(cmd_buffer
, &pool
->bo
, offset
);
346 #if GEN_GEN > 7 || GEN_IS_HASWELL
348 #define alu_opcode(v) __gen_uint((v), 20, 31)
349 #define alu_operand1(v) __gen_uint((v), 10, 19)
350 #define alu_operand2(v) __gen_uint((v), 0, 9)
351 #define alu(opcode, operand1, operand2) \
352 alu_opcode(opcode) | alu_operand1(operand1) | alu_operand2(operand2)
354 #define OPCODE_NOOP 0x000
355 #define OPCODE_LOAD 0x080
356 #define OPCODE_LOADINV 0x480
357 #define OPCODE_LOAD0 0x081
358 #define OPCODE_LOAD1 0x481
359 #define OPCODE_ADD 0x100
360 #define OPCODE_SUB 0x101
361 #define OPCODE_AND 0x102
362 #define OPCODE_OR 0x103
363 #define OPCODE_XOR 0x104
364 #define OPCODE_STORE 0x180
365 #define OPCODE_STOREINV 0x580
367 #define OPERAND_R0 0x00
368 #define OPERAND_R1 0x01
369 #define OPERAND_R2 0x02
370 #define OPERAND_R3 0x03
371 #define OPERAND_R4 0x04
372 #define OPERAND_SRCA 0x20
373 #define OPERAND_SRCB 0x21
374 #define OPERAND_ACCU 0x31
375 #define OPERAND_ZF 0x32
376 #define OPERAND_CF 0x33
378 #define CS_GPR(n) (0x2600 + (n) * 8)
381 emit_load_alu_reg_u64(struct anv_batch
*batch
, uint32_t reg
,
382 struct anv_bo
*bo
, uint32_t offset
)
384 anv_batch_emit(batch
, GENX(MI_LOAD_REGISTER_MEM
), lrm
) {
385 lrm
.RegisterAddress
= reg
,
386 lrm
.MemoryAddress
= (struct anv_address
) { bo
, offset
};
388 anv_batch_emit(batch
, GENX(MI_LOAD_REGISTER_MEM
), lrm
) {
389 lrm
.RegisterAddress
= reg
+ 4;
390 lrm
.MemoryAddress
= (struct anv_address
) { bo
, offset
+ 4 };
395 store_query_result(struct anv_batch
*batch
, uint32_t reg
,
396 struct anv_bo
*bo
, uint32_t offset
, VkQueryResultFlags flags
)
398 anv_batch_emit(batch
, GENX(MI_STORE_REGISTER_MEM
), srm
) {
399 srm
.RegisterAddress
= reg
;
400 srm
.MemoryAddress
= (struct anv_address
) { bo
, offset
};
403 if (flags
& VK_QUERY_RESULT_64_BIT
) {
404 anv_batch_emit(batch
, GENX(MI_STORE_REGISTER_MEM
), srm
) {
405 srm
.RegisterAddress
= reg
+ 4;
406 srm
.MemoryAddress
= (struct anv_address
) { bo
, offset
+ 4 };
411 void genX(CmdCopyQueryPoolResults
)(
412 VkCommandBuffer commandBuffer
,
413 VkQueryPool queryPool
,
417 VkDeviceSize destOffset
,
418 VkDeviceSize destStride
,
419 VkQueryResultFlags flags
)
421 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
422 ANV_FROM_HANDLE(anv_query_pool
, pool
, queryPool
);
423 ANV_FROM_HANDLE(anv_buffer
, buffer
, destBuffer
);
424 uint32_t slot_offset
, dst_offset
;
426 if (flags
& VK_QUERY_RESULT_WAIT_BIT
) {
427 anv_batch_emit(&cmd_buffer
->batch
, GENX(PIPE_CONTROL
), pc
) {
428 pc
.CommandStreamerStallEnable
= true;
429 pc
.StallAtPixelScoreboard
= true;
433 dst_offset
= buffer
->offset
+ destOffset
;
434 for (uint32_t i
= 0; i
< queryCount
; i
++) {
436 slot_offset
= (firstQuery
+ i
) * sizeof(struct anv_query_pool_slot
);
437 switch (pool
->type
) {
438 case VK_QUERY_TYPE_OCCLUSION
:
439 emit_load_alu_reg_u64(&cmd_buffer
->batch
,
440 CS_GPR(0), &pool
->bo
, slot_offset
+ 8);
441 emit_load_alu_reg_u64(&cmd_buffer
->batch
,
442 CS_GPR(1), &pool
->bo
, slot_offset
+ 16);
444 /* FIXME: We need to clamp the result for 32 bit. */
446 uint32_t *dw
= anv_batch_emitn(&cmd_buffer
->batch
, 5, GENX(MI_MATH
));
447 dw
[1] = alu(OPCODE_LOAD
, OPERAND_SRCA
, OPERAND_R1
);
448 dw
[2] = alu(OPCODE_LOAD
, OPERAND_SRCB
, OPERAND_R0
);
449 dw
[3] = alu(OPCODE_SUB
, 0, 0);
450 dw
[4] = alu(OPCODE_STORE
, OPERAND_R2
, OPERAND_ACCU
);
453 case VK_QUERY_TYPE_TIMESTAMP
:
454 emit_load_alu_reg_u64(&cmd_buffer
->batch
,
455 CS_GPR(2), &pool
->bo
, slot_offset
+ 8);
459 unreachable("unhandled query type");
462 store_query_result(&cmd_buffer
->batch
,
463 CS_GPR(2), buffer
->bo
, dst_offset
, flags
);
465 if (flags
& VK_QUERY_RESULT_WITH_AVAILABILITY_BIT
) {
466 emit_load_alu_reg_u64(&cmd_buffer
->batch
, CS_GPR(0),
467 &pool
->bo
, slot_offset
);
468 if (flags
& VK_QUERY_RESULT_64_BIT
)
469 store_query_result(&cmd_buffer
->batch
,
470 CS_GPR(0), buffer
->bo
, dst_offset
+ 8, flags
);
472 store_query_result(&cmd_buffer
->batch
,
473 CS_GPR(0), buffer
->bo
, dst_offset
+ 4, flags
);
476 dst_offset
+= destStride
;
481 void genX(CmdCopyQueryPoolResults
)(
482 VkCommandBuffer commandBuffer
,
483 VkQueryPool queryPool
,
487 VkDeviceSize destOffset
,
488 VkDeviceSize destStride
,
489 VkQueryResultFlags flags
)
491 anv_finishme("Queries not yet supported on Ivy Bridge");