2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 #include "anv_private.h"
32 #include "genxml/gen_macros.h"
33 #include "genxml/genX_pack.h"
35 /* We reserve GPR 14 and 15 for conditional rendering */
36 #define GEN_MI_BUILDER_NUM_ALLOC_GPRS 14
37 #define __gen_get_batch_dwords anv_batch_emit_dwords
38 #define __gen_address_offset anv_address_add
39 #include "common/gen_mi_builder.h"
41 VkResult
genX(CreateQueryPool
)(
43 const VkQueryPoolCreateInfo
* pCreateInfo
,
44 const VkAllocationCallbacks
* pAllocator
,
45 VkQueryPool
* pQueryPool
)
47 ANV_FROM_HANDLE(anv_device
, device
, _device
);
48 const struct anv_physical_device
*pdevice
= &device
->instance
->physicalDevice
;
49 struct anv_query_pool
*pool
;
52 assert(pCreateInfo
->sType
== VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO
);
54 /* Query pool slots are made up of some number of 64-bit values packed
55 * tightly together. The first 64-bit value is always the "available" bit
56 * which is 0 when the query is unavailable and 1 when it is available.
57 * The 64-bit values that follow are determined by the type of query.
59 uint32_t uint64s_per_slot
= 1;
61 VkQueryPipelineStatisticFlags pipeline_statistics
= 0;
62 switch (pCreateInfo
->queryType
) {
63 case VK_QUERY_TYPE_OCCLUSION
:
64 /* Occlusion queries have two values: begin and end. */
65 uint64s_per_slot
+= 2;
67 case VK_QUERY_TYPE_TIMESTAMP
:
68 /* Timestamps just have the one timestamp value */
69 uint64s_per_slot
+= 1;
71 case VK_QUERY_TYPE_PIPELINE_STATISTICS
:
72 pipeline_statistics
= pCreateInfo
->pipelineStatistics
;
73 /* We're going to trust this field implicitly so we need to ensure that
74 * no unhandled extension bits leak in.
76 pipeline_statistics
&= ANV_PIPELINE_STATISTICS_MASK
;
78 /* Statistics queries have a min and max for every statistic */
79 uint64s_per_slot
+= 2 * util_bitcount(pipeline_statistics
);
81 case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT
:
82 /* Transform feedback queries are 4 values, begin/end for
85 uint64s_per_slot
+= 4;
88 assert(!"Invalid query type");
91 pool
= vk_alloc2(&device
->alloc
, pAllocator
, sizeof(*pool
), 8,
92 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
94 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
96 pool
->type
= pCreateInfo
->queryType
;
97 pool
->pipeline_statistics
= pipeline_statistics
;
98 pool
->stride
= uint64s_per_slot
* sizeof(uint64_t);
99 pool
->slots
= pCreateInfo
->queryCount
;
101 uint64_t size
= pool
->slots
* pool
->stride
;
102 result
= anv_bo_init_new(&pool
->bo
, device
, size
);
103 if (result
!= VK_SUCCESS
)
106 if (pdevice
->supports_48bit_addresses
)
107 pool
->bo
.flags
|= EXEC_OBJECT_SUPPORTS_48B_ADDRESS
;
109 if (pdevice
->use_softpin
)
110 pool
->bo
.flags
|= EXEC_OBJECT_PINNED
;
112 if (pdevice
->has_exec_async
)
113 pool
->bo
.flags
|= EXEC_OBJECT_ASYNC
;
115 anv_vma_alloc(device
, &pool
->bo
);
117 /* For query pools, we set the caching mode to I915_CACHING_CACHED. On LLC
118 * platforms, this does nothing. On non-LLC platforms, this means snooping
119 * which comes at a slight cost. However, the buffers aren't big, won't be
120 * written frequently, and trying to handle the flushing manually without
121 * doing too much flushing is extremely painful.
123 anv_gem_set_caching(device
, pool
->bo
.gem_handle
, I915_CACHING_CACHED
);
125 pool
->bo
.map
= anv_gem_mmap(device
, pool
->bo
.gem_handle
, 0, size
, 0);
127 *pQueryPool
= anv_query_pool_to_handle(pool
);
132 vk_free2(&device
->alloc
, pAllocator
, pool
);
137 void genX(DestroyQueryPool
)(
140 const VkAllocationCallbacks
* pAllocator
)
142 ANV_FROM_HANDLE(anv_device
, device
, _device
);
143 ANV_FROM_HANDLE(anv_query_pool
, pool
, _pool
);
148 anv_gem_munmap(pool
->bo
.map
, pool
->bo
.size
);
149 anv_vma_free(device
, &pool
->bo
);
150 anv_gem_close(device
, pool
->bo
.gem_handle
);
151 vk_free2(&device
->alloc
, pAllocator
, pool
);
154 static struct anv_address
155 anv_query_address(struct anv_query_pool
*pool
, uint32_t query
)
157 return (struct anv_address
) {
159 .offset
= query
* pool
->stride
,
164 cpu_write_query_result(void *dst_slot
, VkQueryResultFlags flags
,
165 uint32_t value_index
, uint64_t result
)
167 if (flags
& VK_QUERY_RESULT_64_BIT
) {
168 uint64_t *dst64
= dst_slot
;
169 dst64
[value_index
] = result
;
171 uint32_t *dst32
= dst_slot
;
172 dst32
[value_index
] = result
;
177 query_is_available(uint64_t *slot
)
179 return *(volatile uint64_t *)slot
;
183 wait_for_available(struct anv_device
*device
,
184 struct anv_query_pool
*pool
, uint64_t *slot
)
187 if (query_is_available(slot
))
190 int ret
= anv_gem_busy(device
, pool
->bo
.gem_handle
);
192 /* The BO is still busy, keep waiting. */
194 } else if (ret
== -1) {
195 /* We don't know the real error. */
196 return anv_device_set_lost(device
, "gem wait failed: %m");
199 /* The BO is no longer busy. */
200 if (query_is_available(slot
)) {
203 VkResult status
= anv_device_query_status(device
);
204 if (status
!= VK_SUCCESS
)
207 /* If we haven't seen availability yet, then we never will. This
208 * can only happen if we have a client error where they call
209 * GetQueryPoolResults on a query that they haven't submitted to
210 * the GPU yet. The spec allows us to do anything in this case,
211 * but returning VK_SUCCESS doesn't seem right and we shouldn't
212 * just keep spinning.
220 VkResult
genX(GetQueryPoolResults
)(
222 VkQueryPool queryPool
,
228 VkQueryResultFlags flags
)
230 ANV_FROM_HANDLE(anv_device
, device
, _device
);
231 ANV_FROM_HANDLE(anv_query_pool
, pool
, queryPool
);
233 assert(pool
->type
== VK_QUERY_TYPE_OCCLUSION
||
234 pool
->type
== VK_QUERY_TYPE_PIPELINE_STATISTICS
||
235 pool
->type
== VK_QUERY_TYPE_TIMESTAMP
||
236 pool
->type
== VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT
);
238 if (anv_device_is_lost(device
))
239 return VK_ERROR_DEVICE_LOST
;
244 void *data_end
= pData
+ dataSize
;
246 VkResult status
= VK_SUCCESS
;
247 for (uint32_t i
= 0; i
< queryCount
; i
++) {
248 uint64_t *slot
= pool
->bo
.map
+ (firstQuery
+ i
) * pool
->stride
;
250 /* Availability is always at the start of the slot */
251 bool available
= slot
[0];
253 if (!available
&& (flags
& VK_QUERY_RESULT_WAIT_BIT
)) {
254 status
= wait_for_available(device
, pool
, slot
);
255 if (status
!= VK_SUCCESS
)
261 /* From the Vulkan 1.0.42 spec:
263 * "If VK_QUERY_RESULT_WAIT_BIT and VK_QUERY_RESULT_PARTIAL_BIT are
264 * both not set then no result values are written to pData for
265 * queries that are in the unavailable state at the time of the call,
266 * and vkGetQueryPoolResults returns VK_NOT_READY. However,
267 * availability state is still written to pData for those queries if
268 * VK_QUERY_RESULT_WITH_AVAILABILITY_BIT is set."
270 bool write_results
= available
|| (flags
& VK_QUERY_RESULT_PARTIAL_BIT
);
273 switch (pool
->type
) {
274 case VK_QUERY_TYPE_OCCLUSION
:
276 cpu_write_query_result(pData
, flags
, idx
, slot
[2] - slot
[1]);
280 case VK_QUERY_TYPE_PIPELINE_STATISTICS
: {
281 uint32_t statistics
= pool
->pipeline_statistics
;
283 uint32_t stat
= u_bit_scan(&statistics
);
285 uint64_t result
= slot
[idx
* 2 + 2] - slot
[idx
* 2 + 1];
287 /* WaDividePSInvocationCountBy4:HSW,BDW */
288 if ((device
->info
.gen
== 8 || device
->info
.is_haswell
) &&
289 (1 << stat
) == VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT
)
292 cpu_write_query_result(pData
, flags
, idx
, result
);
296 assert(idx
== util_bitcount(pool
->pipeline_statistics
));
300 case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT
:
302 cpu_write_query_result(pData
, flags
, idx
, slot
[2] - slot
[1]);
305 cpu_write_query_result(pData
, flags
, idx
, slot
[4] - slot
[3]);
309 case VK_QUERY_TYPE_TIMESTAMP
:
311 cpu_write_query_result(pData
, flags
, idx
, slot
[1]);
316 unreachable("invalid pool type");
320 status
= VK_NOT_READY
;
322 if (flags
& VK_QUERY_RESULT_WITH_AVAILABILITY_BIT
)
323 cpu_write_query_result(pData
, flags
, idx
, available
);
326 if (pData
>= data_end
)
334 emit_ps_depth_count(struct anv_cmd_buffer
*cmd_buffer
,
335 struct anv_address addr
)
337 anv_batch_emit(&cmd_buffer
->batch
, GENX(PIPE_CONTROL
), pc
) {
338 pc
.DestinationAddressType
= DAT_PPGTT
;
339 pc
.PostSyncOperation
= WritePSDepthCount
;
340 pc
.DepthStallEnable
= true;
343 if (GEN_GEN
== 9 && cmd_buffer
->device
->info
.gt
== 4)
344 pc
.CommandStreamerStallEnable
= true;
349 emit_query_mi_availability(struct gen_mi_builder
*b
,
350 struct anv_address addr
,
353 gen_mi_store(b
, gen_mi_mem64(addr
), gen_mi_imm(available
));
357 emit_query_pc_availability(struct anv_cmd_buffer
*cmd_buffer
,
358 struct anv_address addr
,
361 anv_batch_emit(&cmd_buffer
->batch
, GENX(PIPE_CONTROL
), pc
) {
362 pc
.DestinationAddressType
= DAT_PPGTT
;
363 pc
.PostSyncOperation
= WriteImmediateData
;
365 pc
.ImmediateData
= available
;
370 * Goes through a series of consecutive query indices in the given pool
371 * setting all element values to 0 and emitting them as available.
374 emit_zero_queries(struct anv_cmd_buffer
*cmd_buffer
,
375 struct gen_mi_builder
*b
, struct anv_query_pool
*pool
,
376 uint32_t first_index
, uint32_t num_queries
)
378 switch (pool
->type
) {
379 case VK_QUERY_TYPE_OCCLUSION
:
380 case VK_QUERY_TYPE_TIMESTAMP
:
381 /* These queries are written with a PIPE_CONTROL so clear them using the
382 * PIPE_CONTROL as well so we don't have to synchronize between 2 types
385 assert((pool
->stride
% 8) == 0);
386 for (uint32_t i
= 0; i
< num_queries
; i
++) {
387 struct anv_address slot_addr
=
388 anv_query_address(pool
, first_index
+ i
);
390 for (uint32_t qword
= 1; qword
< (pool
->stride
/ 8); qword
++) {
391 emit_query_pc_availability(cmd_buffer
,
392 anv_address_add(slot_addr
, qword
* 8),
395 emit_query_pc_availability(cmd_buffer
, slot_addr
, true);
399 case VK_QUERY_TYPE_PIPELINE_STATISTICS
:
400 case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT
:
401 for (uint32_t i
= 0; i
< num_queries
; i
++) {
402 struct anv_address slot_addr
=
403 anv_query_address(pool
, first_index
+ i
);
404 gen_mi_memset(b
, anv_address_add(slot_addr
, 8), 0, pool
->stride
- 8);
405 emit_query_mi_availability(b
, slot_addr
, true);
410 unreachable("Unsupported query type");
414 void genX(CmdResetQueryPool
)(
415 VkCommandBuffer commandBuffer
,
416 VkQueryPool queryPool
,
420 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
421 ANV_FROM_HANDLE(anv_query_pool
, pool
, queryPool
);
423 switch (pool
->type
) {
424 case VK_QUERY_TYPE_OCCLUSION
:
425 case VK_QUERY_TYPE_TIMESTAMP
:
426 for (uint32_t i
= 0; i
< queryCount
; i
++) {
427 emit_query_pc_availability(cmd_buffer
,
428 anv_query_address(pool
, firstQuery
+ i
),
433 case VK_QUERY_TYPE_PIPELINE_STATISTICS
:
434 case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT
: {
435 struct gen_mi_builder b
;
436 gen_mi_builder_init(&b
, &cmd_buffer
->batch
);
438 for (uint32_t i
= 0; i
< queryCount
; i
++)
439 emit_query_mi_availability(&b
, anv_query_address(pool
, firstQuery
+ i
), false);
444 unreachable("Unsupported query type");
448 void genX(ResetQueryPoolEXT
)(
450 VkQueryPool queryPool
,
454 ANV_FROM_HANDLE(anv_query_pool
, pool
, queryPool
);
456 for (uint32_t i
= 0; i
< queryCount
; i
++) {
457 uint64_t *slot
= pool
->bo
.map
+ (firstQuery
+ i
) * pool
->stride
;
462 static const uint32_t vk_pipeline_stat_to_reg
[] = {
463 GENX(IA_VERTICES_COUNT_num
),
464 GENX(IA_PRIMITIVES_COUNT_num
),
465 GENX(VS_INVOCATION_COUNT_num
),
466 GENX(GS_INVOCATION_COUNT_num
),
467 GENX(GS_PRIMITIVES_COUNT_num
),
468 GENX(CL_INVOCATION_COUNT_num
),
469 GENX(CL_PRIMITIVES_COUNT_num
),
470 GENX(PS_INVOCATION_COUNT_num
),
471 GENX(HS_INVOCATION_COUNT_num
),
472 GENX(DS_INVOCATION_COUNT_num
),
473 GENX(CS_INVOCATION_COUNT_num
),
477 emit_pipeline_stat(struct gen_mi_builder
*b
, uint32_t stat
,
478 struct anv_address addr
)
480 STATIC_ASSERT(ANV_PIPELINE_STATISTICS_MASK
==
481 (1 << ARRAY_SIZE(vk_pipeline_stat_to_reg
)) - 1);
483 assert(stat
< ARRAY_SIZE(vk_pipeline_stat_to_reg
));
484 gen_mi_store(b
, gen_mi_mem64(addr
),
485 gen_mi_reg64(vk_pipeline_stat_to_reg
[stat
]));
489 emit_xfb_query(struct gen_mi_builder
*b
, uint32_t stream
,
490 struct anv_address addr
)
492 assert(stream
< MAX_XFB_STREAMS
);
494 gen_mi_store(b
, gen_mi_mem64(anv_address_add(addr
, 0)),
495 gen_mi_reg64(GENX(SO_NUM_PRIMS_WRITTEN0_num
) + stream
* 8));
496 gen_mi_store(b
, gen_mi_mem64(anv_address_add(addr
, 16)),
497 gen_mi_reg64(GENX(SO_PRIM_STORAGE_NEEDED0_num
) + stream
* 8));
500 void genX(CmdBeginQuery
)(
501 VkCommandBuffer commandBuffer
,
502 VkQueryPool queryPool
,
504 VkQueryControlFlags flags
)
506 genX(CmdBeginQueryIndexedEXT
)(commandBuffer
, queryPool
, query
, flags
, 0);
509 void genX(CmdBeginQueryIndexedEXT
)(
510 VkCommandBuffer commandBuffer
,
511 VkQueryPool queryPool
,
513 VkQueryControlFlags flags
,
516 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
517 ANV_FROM_HANDLE(anv_query_pool
, pool
, queryPool
);
518 struct anv_address query_addr
= anv_query_address(pool
, query
);
520 struct gen_mi_builder b
;
521 gen_mi_builder_init(&b
, &cmd_buffer
->batch
);
523 switch (pool
->type
) {
524 case VK_QUERY_TYPE_OCCLUSION
:
525 emit_ps_depth_count(cmd_buffer
, anv_address_add(query_addr
, 8));
528 case VK_QUERY_TYPE_PIPELINE_STATISTICS
: {
529 /* TODO: This might only be necessary for certain stats */
530 anv_batch_emit(&cmd_buffer
->batch
, GENX(PIPE_CONTROL
), pc
) {
531 pc
.CommandStreamerStallEnable
= true;
532 pc
.StallAtPixelScoreboard
= true;
535 uint32_t statistics
= pool
->pipeline_statistics
;
538 uint32_t stat
= u_bit_scan(&statistics
);
539 emit_pipeline_stat(&b
, stat
, anv_address_add(query_addr
, offset
));
545 case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT
:
546 anv_batch_emit(&cmd_buffer
->batch
, GENX(PIPE_CONTROL
), pc
) {
547 pc
.CommandStreamerStallEnable
= true;
548 pc
.StallAtPixelScoreboard
= true;
550 emit_xfb_query(&b
, index
, anv_address_add(query_addr
, 8));
558 void genX(CmdEndQuery
)(
559 VkCommandBuffer commandBuffer
,
560 VkQueryPool queryPool
,
563 genX(CmdEndQueryIndexedEXT
)(commandBuffer
, queryPool
, query
, 0);
566 void genX(CmdEndQueryIndexedEXT
)(
567 VkCommandBuffer commandBuffer
,
568 VkQueryPool queryPool
,
572 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
573 ANV_FROM_HANDLE(anv_query_pool
, pool
, queryPool
);
574 struct anv_address query_addr
= anv_query_address(pool
, query
);
576 struct gen_mi_builder b
;
577 gen_mi_builder_init(&b
, &cmd_buffer
->batch
);
579 switch (pool
->type
) {
580 case VK_QUERY_TYPE_OCCLUSION
:
581 emit_ps_depth_count(cmd_buffer
, anv_address_add(query_addr
, 16));
582 emit_query_pc_availability(cmd_buffer
, query_addr
, true);
585 case VK_QUERY_TYPE_PIPELINE_STATISTICS
: {
586 /* TODO: This might only be necessary for certain stats */
587 anv_batch_emit(&cmd_buffer
->batch
, GENX(PIPE_CONTROL
), pc
) {
588 pc
.CommandStreamerStallEnable
= true;
589 pc
.StallAtPixelScoreboard
= true;
592 uint32_t statistics
= pool
->pipeline_statistics
;
593 uint32_t offset
= 16;
595 uint32_t stat
= u_bit_scan(&statistics
);
596 emit_pipeline_stat(&b
, stat
, anv_address_add(query_addr
, offset
));
600 emit_query_mi_availability(&b
, query_addr
, true);
604 case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT
:
605 anv_batch_emit(&cmd_buffer
->batch
, GENX(PIPE_CONTROL
), pc
) {
606 pc
.CommandStreamerStallEnable
= true;
607 pc
.StallAtPixelScoreboard
= true;
610 emit_xfb_query(&b
, index
, anv_address_add(query_addr
, 16));
611 emit_query_mi_availability(&b
, query_addr
, true);
618 /* When multiview is active the spec requires that N consecutive query
619 * indices are used, where N is the number of active views in the subpass.
620 * The spec allows that we only write the results to one of the queries
621 * but we still need to manage result availability for all the query indices.
622 * Since we only emit a single query for all active views in the
623 * first index, mark the other query indices as being already available
626 if (cmd_buffer
->state
.subpass
&& cmd_buffer
->state
.subpass
->view_mask
) {
627 const uint32_t num_queries
=
628 util_bitcount(cmd_buffer
->state
.subpass
->view_mask
);
630 emit_zero_queries(cmd_buffer
, &b
, pool
, query
+ 1, num_queries
- 1);
634 #define TIMESTAMP 0x2358
636 void genX(CmdWriteTimestamp
)(
637 VkCommandBuffer commandBuffer
,
638 VkPipelineStageFlagBits pipelineStage
,
639 VkQueryPool queryPool
,
642 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
643 ANV_FROM_HANDLE(anv_query_pool
, pool
, queryPool
);
644 struct anv_address query_addr
= anv_query_address(pool
, query
);
646 assert(pool
->type
== VK_QUERY_TYPE_TIMESTAMP
);
648 struct gen_mi_builder b
;
649 gen_mi_builder_init(&b
, &cmd_buffer
->batch
);
651 switch (pipelineStage
) {
652 case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT
:
653 gen_mi_store(&b
, gen_mi_mem64(anv_address_add(query_addr
, 8)),
654 gen_mi_reg64(TIMESTAMP
));
658 /* Everything else is bottom-of-pipe */
659 anv_batch_emit(&cmd_buffer
->batch
, GENX(PIPE_CONTROL
), pc
) {
660 pc
.DestinationAddressType
= DAT_PPGTT
;
661 pc
.PostSyncOperation
= WriteTimestamp
;
662 pc
.Address
= anv_address_add(query_addr
, 8);
664 if (GEN_GEN
== 9 && cmd_buffer
->device
->info
.gt
== 4)
665 pc
.CommandStreamerStallEnable
= true;
670 emit_query_pc_availability(cmd_buffer
, query_addr
, true);
672 /* When multiview is active the spec requires that N consecutive query
673 * indices are used, where N is the number of active views in the subpass.
674 * The spec allows that we only write the results to one of the queries
675 * but we still need to manage result availability for all the query indices.
676 * Since we only emit a single query for all active views in the
677 * first index, mark the other query indices as being already available
680 if (cmd_buffer
->state
.subpass
&& cmd_buffer
->state
.subpass
->view_mask
) {
681 const uint32_t num_queries
=
682 util_bitcount(cmd_buffer
->state
.subpass
->view_mask
);
684 emit_zero_queries(cmd_buffer
, &b
, pool
, query
+ 1, num_queries
- 1);
688 #if GEN_GEN > 7 || GEN_IS_HASWELL
691 gpu_write_query_result(struct gen_mi_builder
*b
,
692 struct anv_address dst_addr
,
693 VkQueryResultFlags flags
,
694 uint32_t value_index
,
695 struct gen_mi_value query_result
)
697 if (flags
& VK_QUERY_RESULT_64_BIT
) {
698 struct anv_address res_addr
= anv_address_add(dst_addr
, value_index
* 8);
699 gen_mi_store(b
, gen_mi_mem64(res_addr
), query_result
);
701 struct anv_address res_addr
= anv_address_add(dst_addr
, value_index
* 4);
702 gen_mi_store(b
, gen_mi_mem32(res_addr
), query_result
);
706 static struct gen_mi_value
707 compute_query_result(struct gen_mi_builder
*b
, struct anv_address addr
)
709 return gen_mi_isub(b
, gen_mi_mem64(anv_address_add(addr
, 8)),
710 gen_mi_mem64(anv_address_add(addr
, 0)));
713 void genX(CmdCopyQueryPoolResults
)(
714 VkCommandBuffer commandBuffer
,
715 VkQueryPool queryPool
,
719 VkDeviceSize destOffset
,
720 VkDeviceSize destStride
,
721 VkQueryResultFlags flags
)
723 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
724 ANV_FROM_HANDLE(anv_query_pool
, pool
, queryPool
);
725 ANV_FROM_HANDLE(anv_buffer
, buffer
, destBuffer
);
727 struct gen_mi_builder b
;
728 gen_mi_builder_init(&b
, &cmd_buffer
->batch
);
729 struct gen_mi_value result
;
731 /* If render target writes are ongoing, request a render target cache flush
732 * to ensure proper ordering of the commands from the 3d pipe and the
735 if (cmd_buffer
->state
.pending_pipe_bits
& ANV_PIPE_RENDER_TARGET_BUFFER_WRITES
) {
736 cmd_buffer
->state
.pending_pipe_bits
|=
737 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT
;
740 if ((flags
& VK_QUERY_RESULT_WAIT_BIT
) ||
741 (cmd_buffer
->state
.pending_pipe_bits
& ANV_PIPE_FLUSH_BITS
) ||
742 /* Occlusion & timestamp queries are written using a PIPE_CONTROL and
743 * because we're about to copy values from MI commands, we need to
744 * stall the command streamer to make sure the PIPE_CONTROL values have
745 * landed, otherwise we could see inconsistent values & availability.
747 * From the vulkan spec:
749 * "vkCmdCopyQueryPoolResults is guaranteed to see the effect of
750 * previous uses of vkCmdResetQueryPool in the same queue, without
751 * any additional synchronization."
753 pool
->type
== VK_QUERY_TYPE_OCCLUSION
||
754 pool
->type
== VK_QUERY_TYPE_TIMESTAMP
) {
755 cmd_buffer
->state
.pending_pipe_bits
|= ANV_PIPE_CS_STALL_BIT
;
756 genX(cmd_buffer_apply_pipe_flushes
)(cmd_buffer
);
759 struct anv_address dest_addr
= anv_address_add(buffer
->address
, destOffset
);
760 for (uint32_t i
= 0; i
< queryCount
; i
++) {
761 struct anv_address query_addr
= anv_query_address(pool
, firstQuery
+ i
);
763 switch (pool
->type
) {
764 case VK_QUERY_TYPE_OCCLUSION
:
765 result
= compute_query_result(&b
, anv_address_add(query_addr
, 8));
766 gpu_write_query_result(&b
, dest_addr
, flags
, idx
++, result
);
769 case VK_QUERY_TYPE_PIPELINE_STATISTICS
: {
770 uint32_t statistics
= pool
->pipeline_statistics
;
772 uint32_t stat
= u_bit_scan(&statistics
);
774 result
= compute_query_result(&b
, anv_address_add(query_addr
,
777 /* WaDividePSInvocationCountBy4:HSW,BDW */
778 if ((cmd_buffer
->device
->info
.gen
== 8 ||
779 cmd_buffer
->device
->info
.is_haswell
) &&
780 (1 << stat
) == VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT
) {
781 result
= gen_mi_ushr32_imm(&b
, result
, 2);
784 gpu_write_query_result(&b
, dest_addr
, flags
, idx
++, result
);
786 assert(idx
== util_bitcount(pool
->pipeline_statistics
));
790 case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT
:
791 result
= compute_query_result(&b
, anv_address_add(query_addr
, 8));
792 gpu_write_query_result(&b
, dest_addr
, flags
, idx
++, result
);
793 result
= compute_query_result(&b
, anv_address_add(query_addr
, 24));
794 gpu_write_query_result(&b
, dest_addr
, flags
, idx
++, result
);
797 case VK_QUERY_TYPE_TIMESTAMP
:
798 result
= gen_mi_mem64(anv_address_add(query_addr
, 8));
799 gpu_write_query_result(&b
, dest_addr
, flags
, 0, result
);
803 unreachable("unhandled query type");
806 if (flags
& VK_QUERY_RESULT_WITH_AVAILABILITY_BIT
) {
807 gpu_write_query_result(&b
, dest_addr
, flags
, idx
,
808 gen_mi_mem64(query_addr
));
811 dest_addr
= anv_address_add(dest_addr
, destStride
);
816 void genX(CmdCopyQueryPoolResults
)(
817 VkCommandBuffer commandBuffer
,
818 VkQueryPool queryPool
,
822 VkDeviceSize destOffset
,
823 VkDeviceSize destStride
,
824 VkQueryResultFlags flags
)
826 anv_finishme("Queries not yet supported on Ivy Bridge");