#include <string.h>
#include <unistd.h>
-#include "registers/adreno_pm4.xml.h"
-#include "registers/adreno_common.xml.h"
-#include "registers/a6xx.xml.h"
+#include "adreno_pm4.xml.h"
+#include "adreno_common.xml.h"
+#include "a6xx.xml.h"
#include "nir/nir_builder.h"
#include "util/os_time.h"
}
struct tu_query_pool *pool =
- vk_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8,
- VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
-
+ vk_object_alloc(&device->vk, pAllocator, sizeof(*pool),
+ VK_OBJECT_TYPE_QUERY_POOL);
if (!pool)
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
VkResult result = tu_bo_init_new(device, &pool->bo,
pCreateInfo->queryCount * slot_size);
if (result != VK_SUCCESS) {
- vk_free2(&device->alloc, pAllocator, pool);
+ vk_object_free(&device->vk, pAllocator, pool);
return result;
}
result = tu_bo_map(device, &pool->bo);
if (result != VK_SUCCESS) {
tu_bo_finish(device, &pool->bo);
- vk_free2(&device->alloc, pAllocator, pool);
+ vk_object_free(&device->vk, pAllocator, pool);
return result;
}
return;
tu_bo_finish(device, &pool->bo);
- vk_free2(&device->alloc, pAllocator, pool);
+ vk_object_free(&device->vk, pAllocator, pool);
}
static uint32_t
{
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
TU_FROM_HANDLE(tu_query_pool, pool, queryPool);
- struct tu_cs *cs = cmd->state.pass ? &cmd->draw_epilogue_cs : &cmd->cs;
- /* WFI to get more accurate timestamp */
- tu_cs_emit_wfi(cs);
+ tu_bo_list_add(&cmd->bo_list, &pool->bo, MSM_SUBMIT_BO_WRITE);
+
+ /* Inside a render pass, just write the timestamp multiple times so that
+ * the user gets the last one if we use GMEM. There isn't really much
+ * better we can do, and this seems to be what the blob does too.
+ */
+ struct tu_cs *cs = cmd->state.pass ? &cmd->draw_cs : &cmd->cs;
+
+ /* Stages that will already have been executed by the time the CP executes
+ * the REG_TO_MEM. DrawIndirect parameters are read by the CP, so the draw
+ * indirect stage counts as top-of-pipe too.
+ */
+ VkPipelineStageFlags top_of_pipe_flags =
+ VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT |
+ VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT;
+
+ if (pipelineStage & ~top_of_pipe_flags) {
+ /* Execute a WFI so that all commands complete. Note that CP_REG_TO_MEM
+ * does CP_WAIT_FOR_ME internally, which will wait for the WFI to
+ * complete.
+ *
+ * Stalling the CP like this is really unfortunate, but I don't think
+ * there's a better solution that allows all 48 bits of precision
+ * because CP_EVENT_WRITE doesn't support 64-bit timestamps.
+ */
+ tu_cs_emit_wfi(cs);
+ }
tu_cs_emit_pkt7(cs, CP_REG_TO_MEM, 3);
tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(REG_A6XX_CP_ALWAYS_ON_COUNTER_LO) |
CP_REG_TO_MEM_0_64B);
tu_cs_emit_qw(cs, query_result_iova(pool, query, 0));
+ /* Only flag availability once the entire renderpass is done, similar to
+ * the begin/end path.
+ */
+ cs = cmd->state.pass ? &cmd->draw_epilogue_cs : &cmd->cs;
+
tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 4);
tu_cs_emit_qw(cs, query_available_iova(pool, query));
tu_cs_emit_qw(cs, 0x1);
-
- if (cmd->state.pass) {
- /* TODO: to have useful in-renderpass timestamps:
- * for sysmem path, we can just emit the timestamp in draw_cs,
- * for gmem renderpass, we do something with accumulate,
- * but I'm not sure that would follow the spec
- */
- tu_finishme("CmdWriteTimestam in renderpass not accurate");
- }
}