X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fintel%2Fvulkan%2FgenX_cmd_buffer.c;h=7af2b316795e02e85b5213e73cd19c56e40bde86;hb=f31ed6d0cd3c6b66304bb060a0c204ecf8621e6a;hp=14338b22ecea3364aa21dfe10ec2b1196d6bfe43;hpb=924a8cbb408e7dd110d172093908f99926b7970d;p=mesa.git diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 14338b22ece..7af2b316795 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -579,8 +579,7 @@ genX(cmd_buffer_setup_attachments)(struct anv_cmd_buffer *cmd_buffer, } } - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(state->render_pass_states); + anv_state_flush(cmd_buffer->device, state->render_pass_states); } } @@ -637,6 +636,11 @@ genX(EndCommandBuffer)( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + /* We want every command buffer to start with the PMA fix in a known state, + * so we disable it at the end of the command buffer. + */ + genX(cmd_buffer_enable_pma_fix)(cmd_buffer, false); + genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); anv_cmd_buffer_end_batch_buffer(cmd_buffer); @@ -654,6 +658,11 @@ genX(CmdExecuteCommands)( assert(primary->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); + /* The secondary command buffers will assume that the PMA fix is disabled + * when they begin executing. Make sure this is true. + */ + genX(cmd_buffer_enable_pma_fix)(primary, false); + for (uint32_t i = 0; i < commandBufferCount; i++) { ANV_FROM_HANDLE(anv_cmd_buffer, secondary, pCmdBuffers[i]); @@ -1265,8 +1274,7 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, assert(image == map->image_count); out: - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(*bt_state); + anv_state_flush(cmd_buffer->device, *bt_state); return VK_SUCCESS; } @@ -1323,8 +1331,7 @@ emit_samplers(struct anv_cmd_buffer *cmd_buffer, sampler->state, sizeof(sampler->state)); } - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(*state); + anv_state_flush(cmd_buffer->device, *state); return VK_SUCCESS; } @@ -1642,8 +1649,7 @@ emit_base_vertex_instance(struct anv_cmd_buffer *cmd_buffer, ((uint32_t *)id_state.map)[0] = base_vertex; ((uint32_t *)id_state.map)[1] = base_instance; - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(id_state); + anv_state_flush(cmd_buffer->device, id_state); emit_base_vertex_instance_bo(cmd_buffer, &cmd_buffer->device->dynamic_state_block_pool.bo, id_state.offset); @@ -1657,8 +1663,7 @@ emit_draw_index(struct anv_cmd_buffer *cmd_buffer, uint32_t draw_index) ((uint32_t *)state.map)[0] = draw_index; - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(state); + anv_state_flush(cmd_buffer->device, state); emit_vertex_bo(cmd_buffer, &cmd_buffer->device->dynamic_state_block_pool.bo, @@ -1936,8 +1941,7 @@ void genX(CmdDispatch)( sizes[0] = x; sizes[1] = y; sizes[2] = z; - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(state); + anv_state_flush(cmd_buffer->device, state); cmd_buffer->state.num_workgroups_offset = state.offset; cmd_buffer->state.num_workgroups_bo = &cmd_buffer->device->dynamic_state_block_pool.bo; @@ -2227,7 +2231,8 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) const bool has_stencil = image && (image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT); - /* FIXME: Implement the PMA stall W/A */ + cmd_buffer->state.hiz_enabled = has_hiz; + /* FIXME: Width and Height are wrong */ genX(cmd_buffer_emit_gen7_depth_flush)(cmd_buffer); @@ -2465,283 +2470,9 @@ void genX(CmdEndRenderPass)( anv_cmd_buffer_resolve_subpass(cmd_buffer); + cmd_buffer->state.hiz_enabled = false; + #ifndef NDEBUG anv_dump_add_framebuffer(cmd_buffer, cmd_buffer->state.framebuffer); #endif } - -static void -emit_ps_depth_count(struct anv_cmd_buffer *cmd_buffer, - struct anv_bo *bo, uint32_t offset) -{ - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { - pc.DestinationAddressType = DAT_PPGTT; - pc.PostSyncOperation = WritePSDepthCount; - pc.DepthStallEnable = true; - pc.Address = (struct anv_address) { bo, offset }; - - if (GEN_GEN == 9 && cmd_buffer->device->info.gt == 4) - pc.CommandStreamerStallEnable = true; - } -} - -static void -emit_query_availability(struct anv_cmd_buffer *cmd_buffer, - struct anv_bo *bo, uint32_t offset) -{ - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { - pc.DestinationAddressType = DAT_PPGTT; - pc.PostSyncOperation = WriteImmediateData; - pc.Address = (struct anv_address) { bo, offset }; - pc.ImmediateData = 1; - } -} - -void genX(CmdBeginQuery)( - VkCommandBuffer commandBuffer, - VkQueryPool queryPool, - uint32_t query, - VkQueryControlFlags flags) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); - - /* Workaround: When meta uses the pipeline with the VS disabled, it seems - * that the pipelining of the depth write breaks. What we see is that - * samples from the render pass clear leaks into the first query - * immediately after the clear. Doing a pipecontrol with a post-sync - * operation and DepthStallEnable seems to work around the issue. - */ - if (cmd_buffer->state.need_query_wa) { - cmd_buffer->state.need_query_wa = false; - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { - pc.DepthCacheFlushEnable = true; - pc.DepthStallEnable = true; - } - } - - switch (pool->type) { - case VK_QUERY_TYPE_OCCLUSION: - emit_ps_depth_count(cmd_buffer, &pool->bo, - query * sizeof(struct anv_query_pool_slot)); - break; - - case VK_QUERY_TYPE_PIPELINE_STATISTICS: - default: - unreachable(""); - } -} - -void genX(CmdEndQuery)( - VkCommandBuffer commandBuffer, - VkQueryPool queryPool, - uint32_t query) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); - - switch (pool->type) { - case VK_QUERY_TYPE_OCCLUSION: - emit_ps_depth_count(cmd_buffer, &pool->bo, - query * sizeof(struct anv_query_pool_slot) + 8); - - emit_query_availability(cmd_buffer, &pool->bo, - query * sizeof(struct anv_query_pool_slot) + 16); - break; - - case VK_QUERY_TYPE_PIPELINE_STATISTICS: - default: - unreachable(""); - } -} - -#define TIMESTAMP 0x2358 - -void genX(CmdWriteTimestamp)( - VkCommandBuffer commandBuffer, - VkPipelineStageFlagBits pipelineStage, - VkQueryPool queryPool, - uint32_t query) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); - uint32_t offset = query * sizeof(struct anv_query_pool_slot); - - assert(pool->type == VK_QUERY_TYPE_TIMESTAMP); - - switch (pipelineStage) { - case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT: - anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), srm) { - srm.RegisterAddress = TIMESTAMP; - srm.MemoryAddress = (struct anv_address) { &pool->bo, offset }; - } - anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), srm) { - srm.RegisterAddress = TIMESTAMP + 4; - srm.MemoryAddress = (struct anv_address) { &pool->bo, offset + 4 }; - } - break; - - default: - /* Everything else is bottom-of-pipe */ - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { - pc.DestinationAddressType = DAT_PPGTT; - pc.PostSyncOperation = WriteTimestamp; - pc.Address = (struct anv_address) { &pool->bo, offset }; - - if (GEN_GEN == 9 && cmd_buffer->device->info.gt == 4) - pc.CommandStreamerStallEnable = true; - } - break; - } - - emit_query_availability(cmd_buffer, &pool->bo, query + 16); -} - -#if GEN_GEN > 7 || GEN_IS_HASWELL - -#define alu_opcode(v) __gen_uint((v), 20, 31) -#define alu_operand1(v) __gen_uint((v), 10, 19) -#define alu_operand2(v) __gen_uint((v), 0, 9) -#define alu(opcode, operand1, operand2) \ - alu_opcode(opcode) | alu_operand1(operand1) | alu_operand2(operand2) - -#define OPCODE_NOOP 0x000 -#define OPCODE_LOAD 0x080 -#define OPCODE_LOADINV 0x480 -#define OPCODE_LOAD0 0x081 -#define OPCODE_LOAD1 0x481 -#define OPCODE_ADD 0x100 -#define OPCODE_SUB 0x101 -#define OPCODE_AND 0x102 -#define OPCODE_OR 0x103 -#define OPCODE_XOR 0x104 -#define OPCODE_STORE 0x180 -#define OPCODE_STOREINV 0x580 - -#define OPERAND_R0 0x00 -#define OPERAND_R1 0x01 -#define OPERAND_R2 0x02 -#define OPERAND_R3 0x03 -#define OPERAND_R4 0x04 -#define OPERAND_SRCA 0x20 -#define OPERAND_SRCB 0x21 -#define OPERAND_ACCU 0x31 -#define OPERAND_ZF 0x32 -#define OPERAND_CF 0x33 - -#define CS_GPR(n) (0x2600 + (n) * 8) - -static void -emit_load_alu_reg_u64(struct anv_batch *batch, uint32_t reg, - struct anv_bo *bo, uint32_t offset) -{ - anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) { - lrm.RegisterAddress = reg, - lrm.MemoryAddress = (struct anv_address) { bo, offset }; - } - anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) { - lrm.RegisterAddress = reg + 4; - lrm.MemoryAddress = (struct anv_address) { bo, offset + 4 }; - } -} - -static void -store_query_result(struct anv_batch *batch, uint32_t reg, - struct anv_bo *bo, uint32_t offset, VkQueryResultFlags flags) -{ - anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM), srm) { - srm.RegisterAddress = reg; - srm.MemoryAddress = (struct anv_address) { bo, offset }; - } - - if (flags & VK_QUERY_RESULT_64_BIT) { - anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM), srm) { - srm.RegisterAddress = reg + 4; - srm.MemoryAddress = (struct anv_address) { bo, offset + 4 }; - } - } -} - -void genX(CmdCopyQueryPoolResults)( - VkCommandBuffer commandBuffer, - VkQueryPool queryPool, - uint32_t firstQuery, - uint32_t queryCount, - VkBuffer destBuffer, - VkDeviceSize destOffset, - VkDeviceSize destStride, - VkQueryResultFlags flags) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); - ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer); - uint32_t slot_offset, dst_offset; - - if (flags & VK_QUERY_RESULT_WAIT_BIT) { - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { - pc.CommandStreamerStallEnable = true; - pc.StallAtPixelScoreboard = true; - } - } - - dst_offset = buffer->offset + destOffset; - for (uint32_t i = 0; i < queryCount; i++) { - - slot_offset = (firstQuery + i) * sizeof(struct anv_query_pool_slot); - switch (pool->type) { - case VK_QUERY_TYPE_OCCLUSION: - emit_load_alu_reg_u64(&cmd_buffer->batch, - CS_GPR(0), &pool->bo, slot_offset); - emit_load_alu_reg_u64(&cmd_buffer->batch, - CS_GPR(1), &pool->bo, slot_offset + 8); - - /* FIXME: We need to clamp the result for 32 bit. */ - - uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GENX(MI_MATH)); - dw[1] = alu(OPCODE_LOAD, OPERAND_SRCA, OPERAND_R1); - dw[2] = alu(OPCODE_LOAD, OPERAND_SRCB, OPERAND_R0); - dw[3] = alu(OPCODE_SUB, 0, 0); - dw[4] = alu(OPCODE_STORE, OPERAND_R2, OPERAND_ACCU); - break; - - case VK_QUERY_TYPE_TIMESTAMP: - emit_load_alu_reg_u64(&cmd_buffer->batch, - CS_GPR(2), &pool->bo, slot_offset); - break; - - default: - unreachable("unhandled query type"); - } - - store_query_result(&cmd_buffer->batch, - CS_GPR(2), buffer->bo, dst_offset, flags); - - if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { - emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(0), - &pool->bo, slot_offset + 16); - if (flags & VK_QUERY_RESULT_64_BIT) - store_query_result(&cmd_buffer->batch, - CS_GPR(0), buffer->bo, dst_offset + 8, flags); - else - store_query_result(&cmd_buffer->batch, - CS_GPR(0), buffer->bo, dst_offset + 4, flags); - } - - dst_offset += destStride; - } -} - -#else -void genX(CmdCopyQueryPoolResults)( - VkCommandBuffer commandBuffer, - VkQueryPool queryPool, - uint32_t firstQuery, - uint32_t queryCount, - VkBuffer destBuffer, - VkDeviceSize destOffset, - VkDeviceSize destStride, - VkQueryResultFlags flags) -{ - anv_finishme("Queries not yet supported on Ivy Bridge"); -} -#endif