From: Bas Nieuwenhuizen Date: Sun, 14 Jan 2018 19:00:40 +0000 (+0100) Subject: radv: Merge depth stencil state with PM4 generation. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=df2e7ab0dbc41df00df9714aac713ac9ba6309c5;p=mesa.git radv: Merge depth stencil state with PM4 generation. Reviewed-by: Dave Airlie Reviewed-by: Samuel Pitoiset --- diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 52585b02659..75137c86299 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -692,61 +692,6 @@ static uint32_t si_translate_stencil_op(enum VkStencilOp op) return 0; } } -static void -radv_pipeline_init_depth_stencil_state(struct radv_pipeline *pipeline, - const VkGraphicsPipelineCreateInfo *pCreateInfo, - const struct radv_graphics_pipeline_create_info *extra) -{ - const VkPipelineDepthStencilStateCreateInfo *vkds = pCreateInfo->pDepthStencilState; - struct radv_depth_stencil_state *ds = &pipeline->graphics.ds; - - if (!vkds) - return; - - RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass); - struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass; - if (subpass->depth_stencil_attachment.attachment == VK_ATTACHMENT_UNUSED) - return; - - struct radv_render_pass_attachment *attachment = pass->attachments + subpass->depth_stencil_attachment.attachment; - bool has_depth_attachment = vk_format_is_depth(attachment->format); - bool has_stencil_attachment = vk_format_is_stencil(attachment->format); - - if (has_depth_attachment) { - ds->db_depth_control = S_028800_Z_ENABLE(vkds->depthTestEnable ? 1 : 0) | - S_028800_Z_WRITE_ENABLE(vkds->depthWriteEnable ? 1 : 0) | - S_028800_ZFUNC(vkds->depthCompareOp) | - S_028800_DEPTH_BOUNDS_ENABLE(vkds->depthBoundsTestEnable ? 1 : 0); - - /* from amdvlk: For 4xAA and 8xAA need to decompress on flush for better performance */ - ds->db_render_override2 |= S_028010_DECOMPRESS_Z_ON_FLUSH(attachment->samples > 2); - } - - if (has_stencil_attachment && vkds->stencilTestEnable) { - ds->db_depth_control |= S_028800_STENCIL_ENABLE(1) | S_028800_BACKFACE_ENABLE(1); - ds->db_depth_control |= S_028800_STENCILFUNC(vkds->front.compareOp); - ds->db_stencil_control |= S_02842C_STENCILFAIL(si_translate_stencil_op(vkds->front.failOp)); - ds->db_stencil_control |= S_02842C_STENCILZPASS(si_translate_stencil_op(vkds->front.passOp)); - ds->db_stencil_control |= S_02842C_STENCILZFAIL(si_translate_stencil_op(vkds->front.depthFailOp)); - - ds->db_depth_control |= S_028800_STENCILFUNC_BF(vkds->back.compareOp); - ds->db_stencil_control |= S_02842C_STENCILFAIL_BF(si_translate_stencil_op(vkds->back.failOp)); - ds->db_stencil_control |= S_02842C_STENCILZPASS_BF(si_translate_stencil_op(vkds->back.passOp)); - ds->db_stencil_control |= S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(vkds->back.depthFailOp)); - } - - if (extra) { - - ds->db_render_control |= S_028000_DEPTH_CLEAR_ENABLE(extra->db_depth_clear); - ds->db_render_control |= S_028000_STENCIL_CLEAR_ENABLE(extra->db_stencil_clear); - - ds->db_render_control |= S_028000_RESUMMARIZE_ENABLE(extra->db_resummarize); - ds->db_render_control |= S_028000_DEPTH_COMPRESS_DISABLE(extra->db_flush_depth_inplace); - ds->db_render_control |= S_028000_STENCIL_COMPRESS_DISABLE(extra->db_flush_stencil_inplace); - ds->db_render_override2 |= S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(extra->db_depth_disable_expclear); - ds->db_render_override2 |= S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(extra->db_stencil_disable_expclear); - } -} static uint32_t si_translate_fill(VkPolygonMode func) { @@ -2344,14 +2289,62 @@ radv_pipeline_generate_binning_state(struct radeon_winsys_cs *cs, static void radv_pipeline_generate_depth_stencil_state(struct radeon_winsys_cs *cs, - struct radv_pipeline *pipeline) + struct radv_pipeline *pipeline, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct radv_graphics_pipeline_create_info *extra) { - struct radv_depth_stencil_state *ds = &pipeline->graphics.ds; - radeon_set_context_reg(cs, R_028800_DB_DEPTH_CONTROL, ds->db_depth_control); - radeon_set_context_reg(cs, R_02842C_DB_STENCIL_CONTROL, ds->db_stencil_control); + const VkPipelineDepthStencilStateCreateInfo *vkds = pCreateInfo->pDepthStencilState; + RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass); + struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass; + struct radv_render_pass_attachment *attachment = NULL; + uint32_t db_depth_control = 0, db_stencil_control = 0; + uint32_t db_render_control = 0, db_render_override2 = 0; + + if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) + attachment = pass->attachments + subpass->depth_stencil_attachment.attachment; + + bool has_depth_attachment = attachment && vk_format_is_depth(attachment->format); + bool has_stencil_attachment = attachment && vk_format_is_stencil(attachment->format); + + if (vkds && has_depth_attachment) { + db_depth_control = S_028800_Z_ENABLE(vkds->depthTestEnable ? 1 : 0) | + S_028800_Z_WRITE_ENABLE(vkds->depthWriteEnable ? 1 : 0) | + S_028800_ZFUNC(vkds->depthCompareOp) | + S_028800_DEPTH_BOUNDS_ENABLE(vkds->depthBoundsTestEnable ? 1 : 0); + + /* from amdvlk: For 4xAA and 8xAA need to decompress on flush for better performance */ + db_render_override2 |= S_028010_DECOMPRESS_Z_ON_FLUSH(attachment->samples > 2); + } + + if (has_stencil_attachment && vkds && vkds->stencilTestEnable) { + db_depth_control |= S_028800_STENCIL_ENABLE(1) | S_028800_BACKFACE_ENABLE(1); + db_depth_control |= S_028800_STENCILFUNC(vkds->front.compareOp); + db_stencil_control |= S_02842C_STENCILFAIL(si_translate_stencil_op(vkds->front.failOp)); + db_stencil_control |= S_02842C_STENCILZPASS(si_translate_stencil_op(vkds->front.passOp)); + db_stencil_control |= S_02842C_STENCILZFAIL(si_translate_stencil_op(vkds->front.depthFailOp)); + + db_depth_control |= S_028800_STENCILFUNC_BF(vkds->back.compareOp); + db_stencil_control |= S_02842C_STENCILFAIL_BF(si_translate_stencil_op(vkds->back.failOp)); + db_stencil_control |= S_02842C_STENCILZPASS_BF(si_translate_stencil_op(vkds->back.passOp)); + db_stencil_control |= S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(vkds->back.depthFailOp)); + } + + if (attachment && extra) { + db_render_control |= S_028000_DEPTH_CLEAR_ENABLE(extra->db_depth_clear); + db_render_control |= S_028000_STENCIL_CLEAR_ENABLE(extra->db_stencil_clear); + + db_render_control |= S_028000_RESUMMARIZE_ENABLE(extra->db_resummarize); + db_render_control |= S_028000_DEPTH_COMPRESS_DISABLE(extra->db_flush_depth_inplace); + db_render_control |= S_028000_STENCIL_COMPRESS_DISABLE(extra->db_flush_stencil_inplace); + db_render_override2 |= S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(extra->db_depth_disable_expclear); + db_render_override2 |= S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(extra->db_stencil_disable_expclear); + } + + radeon_set_context_reg(cs, R_028800_DB_DEPTH_CONTROL, db_depth_control); + radeon_set_context_reg(cs, R_02842C_DB_STENCIL_CONTROL, db_stencil_control); - radeon_set_context_reg(cs, R_028000_DB_RENDER_CONTROL, ds->db_render_control); - radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, ds->db_render_override2); + radeon_set_context_reg(cs, R_028000_DB_RENDER_CONTROL, db_render_control); + radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, db_render_override2); } static void @@ -2869,12 +2862,13 @@ radv_pipeline_generate_vgt_vertex_reuse(struct radeon_winsys_cs *cs, static void radv_pipeline_generate_pm4(struct radv_pipeline *pipeline, - const VkGraphicsPipelineCreateInfo *pCreateInfo) + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct radv_graphics_pipeline_create_info *extra) { pipeline->cs.buf = malloc(4 * 256); pipeline->cs.max_dw = 256; - radv_pipeline_generate_depth_stencil_state(&pipeline->cs, pipeline); + radv_pipeline_generate_depth_stencil_state(&pipeline->cs, pipeline, pCreateInfo, extra); radv_pipeline_generate_blend_state(&pipeline->cs, pipeline); radv_pipeline_generate_raster_state(&pipeline->cs, pipeline); radv_pipeline_generate_multisample_state(&pipeline->cs, pipeline); @@ -2939,7 +2933,6 @@ radv_pipeline_init(struct radv_pipeline *pipeline, pStages); pipeline->graphics.spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1); - radv_pipeline_init_depth_stencil_state(pipeline, pCreateInfo, extra); radv_pipeline_init_raster_state(pipeline, pCreateInfo); radv_pipeline_init_multisample_state(pipeline, pCreateInfo); pipeline->graphics.prim = si_translate_prim(pCreateInfo->pInputAssemblyState->topology); @@ -3186,7 +3179,7 @@ radv_pipeline_init(struct radv_pipeline *pipeline, } result = radv_pipeline_scratch_init(device, pipeline); - radv_pipeline_generate_pm4(pipeline, pCreateInfo); + radv_pipeline_generate_pm4(pipeline, pCreateInfo, extra); return result; } diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index a124866ab95..b7f1f86dbfd 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -1127,13 +1127,6 @@ mesa_to_vk_shader_stage(gl_shader_stage mesa_stage) stage = __builtin_ffs(__tmp) - 1, __tmp; \ __tmp &= ~(1 << (stage))) -struct radv_depth_stencil_state { - uint32_t db_depth_control; - uint32_t db_stencil_control; - uint32_t db_render_control; - uint32_t db_render_override2; -}; - struct radv_blend_state { uint32_t cb_color_control; uint32_t cb_target_mask; @@ -1220,7 +1213,6 @@ struct radv_pipeline { union { struct { struct radv_blend_state blend; - struct radv_depth_stencil_state ds; struct radv_raster_state raster; struct radv_multisample_state ms; struct radv_tessellation_state tess;