From: Kristian Høgsberg Date: Tue, 19 May 2015 00:03:58 +0000 (-0700) Subject: vk: Make occlusion query work, both copy and get functions X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=82ddab4b180ae5a0e2f033bf323f649f82109426;p=mesa.git vk: Make occlusion query work, both copy and get functions --- diff --git a/src/vulkan/device.c b/src/vulkan/device.c index b3fa69015c1..5cefc0ade6c 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -956,6 +956,19 @@ anv_fence_destructor(struct anv_device * device, return VK_SUCCESS; } +static VkResult +anv_query_pool_destructor(struct anv_device * device, + VkObject object) +{ + struct anv_query_pool *pool = (struct anv_query_pool *) object; + + anv_gem_munmap(pool->bo.map, pool->bo.size); + anv_gem_close(device, pool->bo.gem_handle); + anv_device_free(device, pool); + + return VK_SUCCESS; +} + static VkResult (*anv_object_destructors[])(struct anv_device *device, VkObject object) = { [VK_OBJECT_TYPE_INSTANCE] = anv_instance_destructor, @@ -968,7 +981,8 @@ static VkResult (*anv_object_destructors[])(struct anv_device *device, [VK_OBJECT_TYPE_BUFFER] = anv_free_destructor, [VK_OBJECT_TYPE_IMAGE] = anv_free_destructor, [VK_OBJECT_TYPE_RENDER_PASS] = anv_free_destructor, - [VK_OBJECT_TYPE_FENCE] = anv_fence_destructor + [VK_OBJECT_TYPE_FENCE] = anv_fence_destructor, + [VK_OBJECT_TYPE_QUERY_POOL] = anv_query_pool_destructor }; VkResult anv_DestroyObject( @@ -1299,12 +1313,6 @@ VkResult anv_ResetEvent( // Query functions -struct anv_query_pool { - VkQueryType type; - uint32_t slots; - struct anv_bo bo; -}; - VkResult anv_CreateQueryPool( VkDevice _device, const VkQueryPoolCreateInfo* pCreateInfo, @@ -1313,19 +1321,32 @@ VkResult anv_CreateQueryPool( struct anv_device *device = (struct anv_device *) _device; struct anv_query_pool *pool; VkResult result; + size_t size; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO); + switch (pCreateInfo->queryType) { + case VK_QUERY_TYPE_OCCLUSION: + break; + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + return VK_UNSUPPORTED; + default: + unreachable(""); + } + pool = anv_device_alloc(device, sizeof(*pool), 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); if (pool == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); pool->type = pCreateInfo->queryType; - result = anv_bo_init_new(&pool->bo, device, pCreateInfo->slots * 16); + size = pCreateInfo->slots * sizeof(struct anv_query_pool_slot); + result = anv_bo_init_new(&pool->bo, device, size); if (result != VK_SUCCESS) goto fail; + pool->bo.map = anv_gem_mmap(device, pool->bo.gem_handle, 0, size); + *pQueryPool = (VkQueryPool) pool; return VK_SUCCESS; @@ -1337,7 +1358,7 @@ VkResult anv_CreateQueryPool( } VkResult anv_GetQueryPoolResults( - VkDevice device, + VkDevice _device, VkQueryPool queryPool, uint32_t startQuery, uint32_t queryCount, @@ -1345,7 +1366,49 @@ VkResult anv_GetQueryPoolResults( void* pData, VkQueryResultFlags flags) { - stub_return(VK_UNSUPPORTED); + struct anv_device *device = (struct anv_device *) _device; + struct anv_query_pool *pool = (struct anv_query_pool *) queryPool; + struct anv_query_pool_slot *slot = pool->bo.map; + int64_t timeout = INT64_MAX; + uint32_t *dst32 = pData; + uint64_t *dst64 = pData; + uint64_t result; + int ret; + + if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { + /* Where is the availabilty info supposed to go? */ + anv_finishme("VK_QUERY_RESULT_WITH_AVAILABILITY_BIT"); + return VK_UNSUPPORTED; + } + + assert(pool->type == VK_QUERY_TYPE_OCCLUSION); + + if (flags & VK_QUERY_RESULT_64_BIT) + *pDataSize = queryCount * sizeof(uint64_t); + else + *pDataSize = queryCount * sizeof(uint32_t); + + if (pData == NULL) + return VK_SUCCESS; + + if (flags & VK_QUERY_RESULT_WAIT_BIT) { + ret = anv_gem_wait(device, pool->bo.gem_handle, &timeout); + if (ret == -1) + return vk_error(VK_ERROR_UNKNOWN); + } + + for (uint32_t i = 0; i < queryCount; i++) { + result = slot[startQuery + i].end - slot[startQuery + i].begin; + if (flags & VK_QUERY_RESULT_64_BIT) { + *dst64++ = result; + } else { + if (result > UINT32_MAX) + result = UINT32_MAX; + *dst32++ = result; + } + } + + return VK_SUCCESS; } // Buffer functions @@ -2698,14 +2761,13 @@ void anv_CmdBeginQuery( switch (pool->type) { case VK_QUERY_TYPE_OCCLUSION: - anv_batch_emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, slot * 16); + anv_batch_emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, + slot * sizeof(struct anv_query_pool_slot)); break; case VK_QUERY_TYPE_PIPELINE_STATISTICS: - break; - default: - break; + unreachable(""); } } @@ -2719,14 +2781,13 @@ void anv_CmdEndQuery( switch (pool->type) { case VK_QUERY_TYPE_OCCLUSION: - anv_batch_emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, slot * 16 + 8); + anv_batch_emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, + slot * sizeof(struct anv_query_pool_slot) + 8); break; case VK_QUERY_TYPE_PIPELINE_STATISTICS: - break; - default: - break; + unreachable(""); } } @@ -2771,6 +2832,50 @@ void anv_CmdWriteTimestamp( } } +#define alu_opcode(v) __gen_field((v), 20, 31) +#define alu_operand1(v) __gen_field((v), 10, 19) +#define alu_operand2(v) __gen_field((v), 0, 9) +#define alu(opcode, operand1, operand2) \ + alu_opcode(opcode) | alu_operand1(operand1) | alu_operand2(operand2) + +#define OPCODE_NOOP 0x000 +#define OPCODE_LOAD 0x080 +#define OPCODE_LOADINV 0x480 +#define OPCODE_LOAD0 0x081 +#define OPCODE_LOAD1 0x481 +#define OPCODE_ADD 0x100 +#define OPCODE_SUB 0x101 +#define OPCODE_AND 0x102 +#define OPCODE_OR 0x103 +#define OPCODE_XOR 0x104 +#define OPCODE_STORE 0x180 +#define OPCODE_STOREINV 0x580 + +#define OPERAND_R0 0x00 +#define OPERAND_R1 0x01 +#define OPERAND_R2 0x02 +#define OPERAND_R3 0x03 +#define OPERAND_R4 0x04 +#define OPERAND_SRCA 0x20 +#define OPERAND_SRCB 0x21 +#define OPERAND_ACCU 0x31 +#define OPERAND_ZF 0x32 +#define OPERAND_CF 0x33 + +#define CS_GPR(n) (0x2600 + (n) * 8) + +static void +emit_load_alu_reg_u64(struct anv_batch *batch, uint32_t reg, + struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM, + .RegisterAddress = reg, + .MemoryAddress = { bo, offset }); + anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM, + .RegisterAddress = reg + 4, + .MemoryAddress = { bo, offset + 4 }); +} + void anv_CmdCopyQueryPoolResults( VkCmdBuffer cmdBuffer, VkQueryPool queryPool, @@ -2781,7 +2886,53 @@ void anv_CmdCopyQueryPoolResults( VkDeviceSize destStride, VkQueryResultFlags flags) { - stub(); + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + struct anv_query_pool *pool = (struct anv_query_pool *) queryPool; + struct anv_buffer *buffer = (struct anv_buffer *) destBuffer; + uint32_t slot_offset, dst_offset; + + if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { + /* Where is the availabilty info supposed to go? */ + anv_finishme("VK_QUERY_RESULT_WITH_AVAILABILITY_BIT"); + return; + } + + assert(pool->type == VK_QUERY_TYPE_OCCLUSION); + + /* FIXME: If we're not waiting, should we just do this on the CPU? */ + if (flags & VK_QUERY_RESULT_WAIT_BIT) + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, + .CommandStreamerStallEnable = true); + + dst_offset = buffer->offset + destOffset; + for (uint32_t i = 0; i < queryCount; i++) { + + slot_offset = (startQuery + i) * sizeof(struct anv_query_pool_slot); + + emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(0), &pool->bo, slot_offset); + emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(1), &pool->bo, slot_offset + 8); + + /* FIXME: We need to clamp the result for 32 bit. */ + + uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GEN8_MI_MATH); + dw[1] = alu(OPCODE_LOAD, OPERAND_SRCA, OPERAND_R1); + dw[2] = alu(OPCODE_LOAD, OPERAND_SRCB, OPERAND_R0); + dw[3] = alu(OPCODE_SUB, 0, 0); + dw[4] = alu(OPCODE_STORE, OPERAND_R2, OPERAND_ACCU); + + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, + .RegisterAddress = CS_GPR(2), + /* FIXME: This is only lower 32 bits */ + .MemoryAddress = { buffer->bo, dst_offset }); + + if (flags & VK_QUERY_RESULT_64_BIT) + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, + .RegisterAddress = CS_GPR(2) + 4, + /* FIXME: This is only lower 32 bits */ + .MemoryAddress = { buffer->bo, dst_offset + 4 }); + + dst_offset += destStride; + } } void anv_CmdInitAtomicCounters( diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 554ec66d748..b79dea3ddc4 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -452,6 +452,18 @@ struct anv_dynamic_cb_state { uint32_t blend_offset; }; +struct anv_query_pool_slot { + uint64_t begin; + uint64_t end; + uint64_t available; +}; + +struct anv_query_pool { + VkQueryType type; + uint32_t slots; + struct anv_bo bo; +}; + struct anv_descriptor_set_layout { struct { uint32_t surface_count;