anv/query: Move the available bits to the front
[mesa.git] / src / intel / vulkan / genX_query.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "anv_private.h"
31
32 #include "genxml/gen_macros.h"
33 #include "genxml/genX_pack.h"
34
35 VkResult genX(CreateQueryPool)(
36 VkDevice _device,
37 const VkQueryPoolCreateInfo* pCreateInfo,
38 const VkAllocationCallbacks* pAllocator,
39 VkQueryPool* pQueryPool)
40 {
41 ANV_FROM_HANDLE(anv_device, device, _device);
42 struct anv_query_pool *pool;
43 VkResult result;
44 uint32_t slot_size;
45 uint64_t size;
46
47 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO);
48
49 switch (pCreateInfo->queryType) {
50 case VK_QUERY_TYPE_OCCLUSION:
51 case VK_QUERY_TYPE_TIMESTAMP:
52 break;
53 case VK_QUERY_TYPE_PIPELINE_STATISTICS:
54 return VK_ERROR_INCOMPATIBLE_DRIVER;
55 default:
56 assert(!"Invalid query type");
57 }
58
59 slot_size = sizeof(struct anv_query_pool_slot);
60 pool = vk_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8,
61 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
62 if (pool == NULL)
63 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
64
65 pool->type = pCreateInfo->queryType;
66 pool->slots = pCreateInfo->queryCount;
67
68 size = pCreateInfo->queryCount * slot_size;
69 result = anv_bo_init_new(&pool->bo, device, size);
70 if (result != VK_SUCCESS)
71 goto fail;
72
73 pool->bo.map = anv_gem_mmap(device, pool->bo.gem_handle, 0, size, 0);
74
75 *pQueryPool = anv_query_pool_to_handle(pool);
76
77 return VK_SUCCESS;
78
79 fail:
80 vk_free2(&device->alloc, pAllocator, pool);
81
82 return result;
83 }
84
85 void genX(DestroyQueryPool)(
86 VkDevice _device,
87 VkQueryPool _pool,
88 const VkAllocationCallbacks* pAllocator)
89 {
90 ANV_FROM_HANDLE(anv_device, device, _device);
91 ANV_FROM_HANDLE(anv_query_pool, pool, _pool);
92
93 if (!pool)
94 return;
95
96 anv_gem_munmap(pool->bo.map, pool->bo.size);
97 anv_gem_close(device, pool->bo.gem_handle);
98 vk_free2(&device->alloc, pAllocator, pool);
99 }
100
101 VkResult genX(GetQueryPoolResults)(
102 VkDevice _device,
103 VkQueryPool queryPool,
104 uint32_t firstQuery,
105 uint32_t queryCount,
106 size_t dataSize,
107 void* pData,
108 VkDeviceSize stride,
109 VkQueryResultFlags flags)
110 {
111 ANV_FROM_HANDLE(anv_device, device, _device);
112 ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
113 int64_t timeout = INT64_MAX;
114 uint64_t result;
115 int ret;
116
117 assert(pool->type == VK_QUERY_TYPE_OCCLUSION ||
118 pool->type == VK_QUERY_TYPE_TIMESTAMP);
119
120 if (pData == NULL)
121 return VK_SUCCESS;
122
123 if (flags & VK_QUERY_RESULT_WAIT_BIT) {
124 ret = anv_gem_wait(device, pool->bo.gem_handle, &timeout);
125 if (ret == -1) {
126 /* We don't know the real error. */
127 return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY,
128 "gem_wait failed %m");
129 }
130 }
131
132 void *data_end = pData + dataSize;
133 struct anv_query_pool_slot *slot = pool->bo.map;
134
135 if (!device->info.has_llc) {
136 uint64_t offset = firstQuery * sizeof(*slot);
137 uint64_t size = queryCount * sizeof(*slot);
138 anv_invalidate_range(pool->bo.map + offset,
139 MIN2(size, pool->bo.size - offset));
140 }
141
142 VkResult status = VK_SUCCESS;
143 for (uint32_t i = 0; i < queryCount; i++) {
144 bool available = slot[firstQuery + i].available;
145
146 /* From the Vulkan 1.0.42 spec:
147 *
148 * "If VK_QUERY_RESULT_WAIT_BIT and VK_QUERY_RESULT_PARTIAL_BIT are
149 * both not set then no result values are written to pData for
150 * queries that are in the unavailable state at the time of the call,
151 * and vkGetQueryPoolResults returns VK_NOT_READY. However,
152 * availability state is still written to pData for those queries if
153 * VK_QUERY_RESULT_WITH_AVAILABILITY_BIT is set."
154 */
155 bool write_results = available || (flags & VK_QUERY_RESULT_PARTIAL_BIT);
156
157 if (write_results) {
158 switch (pool->type) {
159 case VK_QUERY_TYPE_OCCLUSION: {
160 result = slot[firstQuery + i].end - slot[firstQuery + i].begin;
161 break;
162 }
163 case VK_QUERY_TYPE_PIPELINE_STATISTICS:
164 unreachable("pipeline stats not supported");
165 case VK_QUERY_TYPE_TIMESTAMP: {
166 result = slot[firstQuery + i].begin;
167 break;
168 }
169 default:
170 unreachable("invalid pool type");
171 }
172 } else {
173 status = VK_NOT_READY;
174 }
175
176 if (flags & VK_QUERY_RESULT_64_BIT) {
177 uint64_t *dst = pData;
178 if (write_results)
179 dst[0] = result;
180 if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
181 dst[1] = slot[firstQuery + i].available;
182 } else {
183 uint32_t *dst = pData;
184 if (write_results)
185 dst[0] = result;
186 if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
187 dst[1] = slot[firstQuery + i].available;
188 }
189
190 pData += stride;
191 if (pData >= data_end)
192 break;
193 }
194
195 return status;
196 }
197
198 static void
199 emit_ps_depth_count(struct anv_cmd_buffer *cmd_buffer,
200 struct anv_bo *bo, uint32_t offset)
201 {
202 anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
203 pc.DestinationAddressType = DAT_PPGTT;
204 pc.PostSyncOperation = WritePSDepthCount;
205 pc.DepthStallEnable = true;
206 pc.Address = (struct anv_address) { bo, offset };
207
208 if (GEN_GEN == 9 && cmd_buffer->device->info.gt == 4)
209 pc.CommandStreamerStallEnable = true;
210 }
211 }
212
213 static void
214 emit_query_availability(struct anv_cmd_buffer *cmd_buffer,
215 struct anv_bo *bo, uint32_t offset)
216 {
217 anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
218 pc.DestinationAddressType = DAT_PPGTT;
219 pc.PostSyncOperation = WriteImmediateData;
220 pc.Address = (struct anv_address) { bo, offset };
221 pc.ImmediateData = 1;
222 }
223 }
224
225 void genX(CmdResetQueryPool)(
226 VkCommandBuffer commandBuffer,
227 VkQueryPool queryPool,
228 uint32_t firstQuery,
229 uint32_t queryCount)
230 {
231 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
232 ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
233
234 for (uint32_t i = 0; i < queryCount; i++) {
235 anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdm) {
236 sdm.Address = (struct anv_address) {
237 .bo = &pool->bo,
238 .offset = (firstQuery + i) * sizeof(struct anv_query_pool_slot),
239 };
240 sdm.DataDWord0 = 0;
241 sdm.DataDWord1 = 0;
242 }
243 }
244 }
245
246 void genX(CmdBeginQuery)(
247 VkCommandBuffer commandBuffer,
248 VkQueryPool queryPool,
249 uint32_t query,
250 VkQueryControlFlags flags)
251 {
252 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
253 ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
254
255 /* Workaround: When meta uses the pipeline with the VS disabled, it seems
256 * that the pipelining of the depth write breaks. What we see is that
257 * samples from the render pass clear leaks into the first query
258 * immediately after the clear. Doing a pipecontrol with a post-sync
259 * operation and DepthStallEnable seems to work around the issue.
260 */
261 if (cmd_buffer->state.need_query_wa) {
262 cmd_buffer->state.need_query_wa = false;
263 anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
264 pc.DepthCacheFlushEnable = true;
265 pc.DepthStallEnable = true;
266 }
267 }
268
269 switch (pool->type) {
270 case VK_QUERY_TYPE_OCCLUSION:
271 emit_ps_depth_count(cmd_buffer, &pool->bo,
272 query * sizeof(struct anv_query_pool_slot) + 8);
273 break;
274
275 case VK_QUERY_TYPE_PIPELINE_STATISTICS:
276 default:
277 unreachable("");
278 }
279 }
280
281 void genX(CmdEndQuery)(
282 VkCommandBuffer commandBuffer,
283 VkQueryPool queryPool,
284 uint32_t query)
285 {
286 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
287 ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
288
289 switch (pool->type) {
290 case VK_QUERY_TYPE_OCCLUSION:
291 emit_ps_depth_count(cmd_buffer, &pool->bo,
292 query * sizeof(struct anv_query_pool_slot) + 16);
293
294 emit_query_availability(cmd_buffer, &pool->bo,
295 query * sizeof(struct anv_query_pool_slot));
296 break;
297
298 case VK_QUERY_TYPE_PIPELINE_STATISTICS:
299 default:
300 unreachable("");
301 }
302 }
303
304 #define TIMESTAMP 0x2358
305
306 void genX(CmdWriteTimestamp)(
307 VkCommandBuffer commandBuffer,
308 VkPipelineStageFlagBits pipelineStage,
309 VkQueryPool queryPool,
310 uint32_t query)
311 {
312 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
313 ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
314 uint32_t offset = query * sizeof(struct anv_query_pool_slot);
315
316 assert(pool->type == VK_QUERY_TYPE_TIMESTAMP);
317
318 switch (pipelineStage) {
319 case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT:
320 anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), srm) {
321 srm.RegisterAddress = TIMESTAMP;
322 srm.MemoryAddress = (struct anv_address) { &pool->bo, offset + 8 };
323 }
324 anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), srm) {
325 srm.RegisterAddress = TIMESTAMP + 4;
326 srm.MemoryAddress = (struct anv_address) { &pool->bo, offset + 12 };
327 }
328 break;
329
330 default:
331 /* Everything else is bottom-of-pipe */
332 anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
333 pc.DestinationAddressType = DAT_PPGTT;
334 pc.PostSyncOperation = WriteTimestamp;
335 pc.Address = (struct anv_address) { &pool->bo, offset + 8 };
336
337 if (GEN_GEN == 9 && cmd_buffer->device->info.gt == 4)
338 pc.CommandStreamerStallEnable = true;
339 }
340 break;
341 }
342
343 emit_query_availability(cmd_buffer, &pool->bo, offset);
344 }
345
346 #if GEN_GEN > 7 || GEN_IS_HASWELL
347
348 #define alu_opcode(v) __gen_uint((v), 20, 31)
349 #define alu_operand1(v) __gen_uint((v), 10, 19)
350 #define alu_operand2(v) __gen_uint((v), 0, 9)
351 #define alu(opcode, operand1, operand2) \
352 alu_opcode(opcode) | alu_operand1(operand1) | alu_operand2(operand2)
353
354 #define OPCODE_NOOP 0x000
355 #define OPCODE_LOAD 0x080
356 #define OPCODE_LOADINV 0x480
357 #define OPCODE_LOAD0 0x081
358 #define OPCODE_LOAD1 0x481
359 #define OPCODE_ADD 0x100
360 #define OPCODE_SUB 0x101
361 #define OPCODE_AND 0x102
362 #define OPCODE_OR 0x103
363 #define OPCODE_XOR 0x104
364 #define OPCODE_STORE 0x180
365 #define OPCODE_STOREINV 0x580
366
367 #define OPERAND_R0 0x00
368 #define OPERAND_R1 0x01
369 #define OPERAND_R2 0x02
370 #define OPERAND_R3 0x03
371 #define OPERAND_R4 0x04
372 #define OPERAND_SRCA 0x20
373 #define OPERAND_SRCB 0x21
374 #define OPERAND_ACCU 0x31
375 #define OPERAND_ZF 0x32
376 #define OPERAND_CF 0x33
377
378 #define CS_GPR(n) (0x2600 + (n) * 8)
379
380 static void
381 emit_load_alu_reg_u64(struct anv_batch *batch, uint32_t reg,
382 struct anv_bo *bo, uint32_t offset)
383 {
384 anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
385 lrm.RegisterAddress = reg,
386 lrm.MemoryAddress = (struct anv_address) { bo, offset };
387 }
388 anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
389 lrm.RegisterAddress = reg + 4;
390 lrm.MemoryAddress = (struct anv_address) { bo, offset + 4 };
391 }
392 }
393
394 static void
395 store_query_result(struct anv_batch *batch, uint32_t reg,
396 struct anv_bo *bo, uint32_t offset, VkQueryResultFlags flags)
397 {
398 anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM), srm) {
399 srm.RegisterAddress = reg;
400 srm.MemoryAddress = (struct anv_address) { bo, offset };
401 }
402
403 if (flags & VK_QUERY_RESULT_64_BIT) {
404 anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM), srm) {
405 srm.RegisterAddress = reg + 4;
406 srm.MemoryAddress = (struct anv_address) { bo, offset + 4 };
407 }
408 }
409 }
410
411 void genX(CmdCopyQueryPoolResults)(
412 VkCommandBuffer commandBuffer,
413 VkQueryPool queryPool,
414 uint32_t firstQuery,
415 uint32_t queryCount,
416 VkBuffer destBuffer,
417 VkDeviceSize destOffset,
418 VkDeviceSize destStride,
419 VkQueryResultFlags flags)
420 {
421 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
422 ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
423 ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer);
424 uint32_t slot_offset, dst_offset;
425
426 if (flags & VK_QUERY_RESULT_WAIT_BIT) {
427 anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
428 pc.CommandStreamerStallEnable = true;
429 pc.StallAtPixelScoreboard = true;
430 }
431 }
432
433 dst_offset = buffer->offset + destOffset;
434 for (uint32_t i = 0; i < queryCount; i++) {
435
436 slot_offset = (firstQuery + i) * sizeof(struct anv_query_pool_slot);
437 switch (pool->type) {
438 case VK_QUERY_TYPE_OCCLUSION:
439 emit_load_alu_reg_u64(&cmd_buffer->batch,
440 CS_GPR(0), &pool->bo, slot_offset + 8);
441 emit_load_alu_reg_u64(&cmd_buffer->batch,
442 CS_GPR(1), &pool->bo, slot_offset + 16);
443
444 /* FIXME: We need to clamp the result for 32 bit. */
445
446 uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GENX(MI_MATH));
447 dw[1] = alu(OPCODE_LOAD, OPERAND_SRCA, OPERAND_R1);
448 dw[2] = alu(OPCODE_LOAD, OPERAND_SRCB, OPERAND_R0);
449 dw[3] = alu(OPCODE_SUB, 0, 0);
450 dw[4] = alu(OPCODE_STORE, OPERAND_R2, OPERAND_ACCU);
451 break;
452
453 case VK_QUERY_TYPE_TIMESTAMP:
454 emit_load_alu_reg_u64(&cmd_buffer->batch,
455 CS_GPR(2), &pool->bo, slot_offset + 8);
456 break;
457
458 default:
459 unreachable("unhandled query type");
460 }
461
462 store_query_result(&cmd_buffer->batch,
463 CS_GPR(2), buffer->bo, dst_offset, flags);
464
465 if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
466 emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(0),
467 &pool->bo, slot_offset);
468 if (flags & VK_QUERY_RESULT_64_BIT)
469 store_query_result(&cmd_buffer->batch,
470 CS_GPR(0), buffer->bo, dst_offset + 8, flags);
471 else
472 store_query_result(&cmd_buffer->batch,
473 CS_GPR(0), buffer->bo, dst_offset + 4, flags);
474 }
475
476 dst_offset += destStride;
477 }
478 }
479
480 #else
481 void genX(CmdCopyQueryPoolResults)(
482 VkCommandBuffer commandBuffer,
483 VkQueryPool queryPool,
484 uint32_t firstQuery,
485 uint32_t queryCount,
486 VkBuffer destBuffer,
487 VkDeviceSize destOffset,
488 VkDeviceSize destStride,
489 VkQueryResultFlags flags)
490 {
491 anv_finishme("Queries not yet supported on Ivy Bridge");
492 }
493 #endif