From: Kristian Høgsberg Kristensen Date: Fri, 14 Aug 2015 17:03:27 +0000 (-0700) Subject: vk: Move gen8 specific parts of queries to anv_gen8.c X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=130db307719901ff29677c3cfba53bd03dff9cb3;p=mesa.git vk: Move gen8 specific parts of queries to anv_gen8.c Signed-off-by: Kristian Høgsberg Kristensen --- diff --git a/src/vulkan/anv_gen8.c b/src/vulkan/anv_gen8.c index e01bf52de95..fd2b583ee47 100644 --- a/src/vulkan/anv_gen8.c +++ b/src/vulkan/anv_gen8.c @@ -951,3 +951,193 @@ VkResult gen8_CreateDynamicDepthStencilState( return VK_SUCCESS; } + +static void +emit_ps_depth_count(struct anv_batch *batch, + struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GEN8_PIPE_CONTROL, + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WritePSDepthCount, + .Address = { bo, offset }); /* FIXME: This is only lower 32 bits */ +} + +void gen8_CmdBeginQuery( + VkCmdBuffer cmdBuffer, + VkQueryPool queryPool, + uint32_t slot, + VkQueryControlFlags flags) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, + slot * sizeof(struct anv_query_pool_slot)); + break; + + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + default: + unreachable(""); + } +} + +void gen8_CmdEndQuery( + VkCmdBuffer cmdBuffer, + VkQueryPool queryPool, + uint32_t slot) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, + slot * sizeof(struct anv_query_pool_slot) + 8); + break; + + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + default: + unreachable(""); + } +} + +#define TIMESTAMP 0x2358 + +void gen8_CmdWriteTimestamp( + VkCmdBuffer cmdBuffer, + VkTimestampType timestampType, + VkBuffer destBuffer, + VkDeviceSize destOffset) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer); + struct anv_bo *bo = buffer->bo; + + switch (timestampType) { + case VK_TIMESTAMP_TYPE_TOP: + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, + .RegisterAddress = TIMESTAMP, + .MemoryAddress = { bo, buffer->offset + destOffset }); + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, + .RegisterAddress = TIMESTAMP + 4, + .MemoryAddress = { bo, buffer->offset + destOffset + 4 }); + break; + + case VK_TIMESTAMP_TYPE_BOTTOM: + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WriteTimestamp, + .Address = /* FIXME: This is only lower 32 bits */ + { bo, buffer->offset + destOffset }); + break; + + default: + break; + } +} + +#define alu_opcode(v) __gen_field((v), 20, 31) +#define alu_operand1(v) __gen_field((v), 10, 19) +#define alu_operand2(v) __gen_field((v), 0, 9) +#define alu(opcode, operand1, operand2) \ + alu_opcode(opcode) | alu_operand1(operand1) | alu_operand2(operand2) + +#define OPCODE_NOOP 0x000 +#define OPCODE_LOAD 0x080 +#define OPCODE_LOADINV 0x480 +#define OPCODE_LOAD0 0x081 +#define OPCODE_LOAD1 0x481 +#define OPCODE_ADD 0x100 +#define OPCODE_SUB 0x101 +#define OPCODE_AND 0x102 +#define OPCODE_OR 0x103 +#define OPCODE_XOR 0x104 +#define OPCODE_STORE 0x180 +#define OPCODE_STOREINV 0x580 + +#define OPERAND_R0 0x00 +#define OPERAND_R1 0x01 +#define OPERAND_R2 0x02 +#define OPERAND_R3 0x03 +#define OPERAND_R4 0x04 +#define OPERAND_SRCA 0x20 +#define OPERAND_SRCB 0x21 +#define OPERAND_ACCU 0x31 +#define OPERAND_ZF 0x32 +#define OPERAND_CF 0x33 + +#define CS_GPR(n) (0x2600 + (n) * 8) + +static void +emit_load_alu_reg_u64(struct anv_batch *batch, uint32_t reg, + struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM, + .RegisterAddress = reg, + .MemoryAddress = { bo, offset }); + anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM, + .RegisterAddress = reg + 4, + .MemoryAddress = { bo, offset + 4 }); +} + +void gen8_CmdCopyQueryPoolResults( + VkCmdBuffer cmdBuffer, + VkQueryPool queryPool, + uint32_t startQuery, + uint32_t queryCount, + VkBuffer destBuffer, + VkDeviceSize destOffset, + VkDeviceSize destStride, + VkQueryResultFlags flags) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer); + uint32_t slot_offset, dst_offset; + + if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { + /* Where is the availabilty info supposed to go? */ + anv_finishme("VK_QUERY_RESULT_WITH_AVAILABILITY_BIT"); + return; + } + + assert(pool->type == VK_QUERY_TYPE_OCCLUSION); + + /* FIXME: If we're not waiting, should we just do this on the CPU? */ + if (flags & VK_QUERY_RESULT_WAIT_BIT) + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, + .CommandStreamerStallEnable = true, + .StallAtPixelScoreboard = true); + + dst_offset = buffer->offset + destOffset; + for (uint32_t i = 0; i < queryCount; i++) { + + slot_offset = (startQuery + i) * sizeof(struct anv_query_pool_slot); + + emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(0), &pool->bo, slot_offset); + emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(1), &pool->bo, slot_offset + 8); + + /* FIXME: We need to clamp the result for 32 bit. */ + + uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GEN8_MI_MATH); + dw[1] = alu(OPCODE_LOAD, OPERAND_SRCA, OPERAND_R1); + dw[2] = alu(OPCODE_LOAD, OPERAND_SRCB, OPERAND_R0); + dw[3] = alu(OPCODE_SUB, 0, 0); + dw[4] = alu(OPCODE_STORE, OPERAND_R2, OPERAND_ACCU); + + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, + .RegisterAddress = CS_GPR(2), + /* FIXME: This is only lower 32 bits */ + .MemoryAddress = { buffer->bo, dst_offset }); + + if (flags & VK_QUERY_RESULT_64_BIT) + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, + .RegisterAddress = CS_GPR(2) + 4, + /* FIXME: This is only lower 32 bits */ + .MemoryAddress = { buffer->bo, dst_offset + 4 }); + + dst_offset += destStride; + } +} diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 69003c52a9a..a399f86e689 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1086,6 +1086,18 @@ struct anv_render_pass { struct anv_subpass subpasses[0]; }; +struct anv_query_pool_slot { + uint64_t begin; + uint64_t end; + uint64_t available; +}; + +struct anv_query_pool { + VkQueryType type; + uint32_t slots; + struct anv_bo bo; +}; + void anv_device_init_meta(struct anv_device *device); void anv_device_finish_meta(struct anv_device *device); diff --git a/src/vulkan/anv_query.c b/src/vulkan/anv_query.c index d7903ee2cb8..3785560dc3b 100644 --- a/src/vulkan/anv_query.c +++ b/src/vulkan/anv_query.c @@ -29,18 +29,6 @@ #include "anv_private.h" -struct anv_query_pool_slot { - uint64_t begin; - uint64_t end; - uint64_t available; -}; - -struct anv_query_pool { - VkQueryType type; - uint32_t slots; - struct anv_bo bo; -}; - VkResult anv_CreateQueryPool( VkDevice _device, const VkQueryPoolCreateInfo* pCreateInfo, @@ -152,57 +140,6 @@ VkResult anv_GetQueryPoolResults( return VK_SUCCESS; } -static void -anv_batch_emit_ps_depth_count(struct anv_batch *batch, - struct anv_bo *bo, uint32_t offset) -{ - anv_batch_emit(batch, GEN8_PIPE_CONTROL, - .DestinationAddressType = DAT_PPGTT, - .PostSyncOperation = WritePSDepthCount, - .Address = { bo, offset }); /* FIXME: This is only lower 32 bits */ -} - -void anv_CmdBeginQuery( - VkCmdBuffer cmdBuffer, - VkQueryPool queryPool, - uint32_t slot, - VkQueryControlFlags flags) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); - - switch (pool->type) { - case VK_QUERY_TYPE_OCCLUSION: - anv_batch_emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, - slot * sizeof(struct anv_query_pool_slot)); - break; - - case VK_QUERY_TYPE_PIPELINE_STATISTICS: - default: - unreachable(""); - } -} - -void anv_CmdEndQuery( - VkCmdBuffer cmdBuffer, - VkQueryPool queryPool, - uint32_t slot) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); - - switch (pool->type) { - case VK_QUERY_TYPE_OCCLUSION: - anv_batch_emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, - slot * sizeof(struct anv_query_pool_slot) + 8); - break; - - case VK_QUERY_TYPE_PIPELINE_STATISTICS: - default: - unreachable(""); - } -} - void anv_CmdResetQueryPool( VkCmdBuffer cmdBuffer, VkQueryPool queryPool, @@ -211,142 +148,3 @@ void anv_CmdResetQueryPool( { stub(); } - -#define TIMESTAMP 0x2358 - -void anv_CmdWriteTimestamp( - VkCmdBuffer cmdBuffer, - VkTimestampType timestampType, - VkBuffer destBuffer, - VkDeviceSize destOffset) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer); - struct anv_bo *bo = buffer->bo; - - switch (timestampType) { - case VK_TIMESTAMP_TYPE_TOP: - anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, - .RegisterAddress = TIMESTAMP, - .MemoryAddress = { bo, buffer->offset + destOffset }); - anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, - .RegisterAddress = TIMESTAMP + 4, - .MemoryAddress = { bo, buffer->offset + destOffset + 4 }); - break; - - case VK_TIMESTAMP_TYPE_BOTTOM: - anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, - .DestinationAddressType = DAT_PPGTT, - .PostSyncOperation = WriteTimestamp, - .Address = /* FIXME: This is only lower 32 bits */ - { bo, buffer->offset + destOffset }); - break; - - default: - break; - } -} - -#define alu_opcode(v) __gen_field((v), 20, 31) -#define alu_operand1(v) __gen_field((v), 10, 19) -#define alu_operand2(v) __gen_field((v), 0, 9) -#define alu(opcode, operand1, operand2) \ - alu_opcode(opcode) | alu_operand1(operand1) | alu_operand2(operand2) - -#define OPCODE_NOOP 0x000 -#define OPCODE_LOAD 0x080 -#define OPCODE_LOADINV 0x480 -#define OPCODE_LOAD0 0x081 -#define OPCODE_LOAD1 0x481 -#define OPCODE_ADD 0x100 -#define OPCODE_SUB 0x101 -#define OPCODE_AND 0x102 -#define OPCODE_OR 0x103 -#define OPCODE_XOR 0x104 -#define OPCODE_STORE 0x180 -#define OPCODE_STOREINV 0x580 - -#define OPERAND_R0 0x00 -#define OPERAND_R1 0x01 -#define OPERAND_R2 0x02 -#define OPERAND_R3 0x03 -#define OPERAND_R4 0x04 -#define OPERAND_SRCA 0x20 -#define OPERAND_SRCB 0x21 -#define OPERAND_ACCU 0x31 -#define OPERAND_ZF 0x32 -#define OPERAND_CF 0x33 - -#define CS_GPR(n) (0x2600 + (n) * 8) - -static void -emit_load_alu_reg_u64(struct anv_batch *batch, uint32_t reg, - struct anv_bo *bo, uint32_t offset) -{ - anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM, - .RegisterAddress = reg, - .MemoryAddress = { bo, offset }); - anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM, - .RegisterAddress = reg + 4, - .MemoryAddress = { bo, offset + 4 }); -} - -void anv_CmdCopyQueryPoolResults( - VkCmdBuffer cmdBuffer, - VkQueryPool queryPool, - uint32_t startQuery, - uint32_t queryCount, - VkBuffer destBuffer, - VkDeviceSize destOffset, - VkDeviceSize destStride, - VkQueryResultFlags flags) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); - ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); - ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer); - uint32_t slot_offset, dst_offset; - - if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { - /* Where is the availabilty info supposed to go? */ - anv_finishme("VK_QUERY_RESULT_WITH_AVAILABILITY_BIT"); - return; - } - - assert(pool->type == VK_QUERY_TYPE_OCCLUSION); - - /* FIXME: If we're not waiting, should we just do this on the CPU? */ - if (flags & VK_QUERY_RESULT_WAIT_BIT) - anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, - .CommandStreamerStallEnable = true, - .StallAtPixelScoreboard = true); - - dst_offset = buffer->offset + destOffset; - for (uint32_t i = 0; i < queryCount; i++) { - - slot_offset = (startQuery + i) * sizeof(struct anv_query_pool_slot); - - emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(0), &pool->bo, slot_offset); - emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(1), &pool->bo, slot_offset + 8); - - /* FIXME: We need to clamp the result for 32 bit. */ - - uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GEN8_MI_MATH); - dw[1] = alu(OPCODE_LOAD, OPERAND_SRCA, OPERAND_R1); - dw[2] = alu(OPCODE_LOAD, OPERAND_SRCB, OPERAND_R0); - dw[3] = alu(OPCODE_SUB, 0, 0); - dw[4] = alu(OPCODE_STORE, OPERAND_R2, OPERAND_ACCU); - - anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, - .RegisterAddress = CS_GPR(2), - /* FIXME: This is only lower 32 bits */ - .MemoryAddress = { buffer->bo, dst_offset }); - - if (flags & VK_QUERY_RESULT_64_BIT) - anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, - .RegisterAddress = CS_GPR(2) + 4, - /* FIXME: This is only lower 32 bits */ - .MemoryAddress = { buffer->bo, dst_offset + 4 }); - - dst_offset += destStride; - } -}