From 292031a1a520d6ddc34ef6b96c262d1f9e335410 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Kristian=20H=C3=B8gsberg=20Kristensen?= Date: Mon, 25 Jan 2016 11:24:19 -0800 Subject: [PATCH] anv: Disable fs dispatch for depth/stencil only pipelines Fixes most renderpass bugs. --- src/vulkan/anv_meta_clear.c | 14 +- src/vulkan/anv_pipeline.c | 1 + src/vulkan/gen8_pipeline.c | 258 +++++++++++++++++++----------------- 3 files changed, 138 insertions(+), 135 deletions(-) diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c index 6ba27b97fe2..470b13480d8 100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@ -138,7 +138,7 @@ create_pipeline(struct anv_device *device, VK_NULL_HANDLE, &(VkGraphicsPipelineCreateInfo) { .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .stageCount = 2, + .stageCount = fs_nir ? 2 : 1, .pStages = (VkPipelineShaderStageCreateInfo[]) { { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, @@ -430,17 +430,13 @@ emit_color_clear(struct anv_cmd_buffer *cmd_buffer, static void -build_depthstencil_shaders(struct nir_shader **out_vs, - struct nir_shader **out_fs) +build_depthstencil_shader(struct nir_shader **out_vs) { nir_builder vs_b; - nir_builder fs_b; nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL); - nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL); vs_b.shader->info.name = ralloc_strdup(vs_b.shader, "meta_clear_depthstencil_vs"); - fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "meta_clear_depthstencil_fs"); const struct glsl_type *position_type = glsl_vec4_type(); @@ -457,7 +453,6 @@ build_depthstencil_shaders(struct nir_shader **out_vs, nir_copy_var(&vs_b, vs_out_pos, vs_in_pos); *out_vs = vs_b.shader; - *out_fs = fs_b.shader; } static VkResult @@ -466,9 +461,8 @@ create_depthstencil_pipeline(struct anv_device *device, struct anv_pipeline **pipeline) { struct nir_shader *vs_nir; - struct nir_shader *fs_nir; - build_depthstencil_shaders(&vs_nir, &fs_nir); + build_depthstencil_shader(&vs_nir); const VkPipelineVertexInputStateCreateInfo vi_state = { .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, @@ -522,7 +516,7 @@ create_depthstencil_pipeline(struct anv_device *device, .pAttachments = NULL, }; - return create_pipeline(device, vs_nir, fs_nir, &vi_state, &ds_state, + return create_pipeline(device, vs_nir, NULL, &vi_state, &ds_state, &cb_state, &device->meta_state.alloc, /*use_repclear*/ true, pipeline); } diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index d66987f1a8c..f52b78628cc 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -1091,6 +1091,7 @@ anv_pipeline_init(struct anv_pipeline *pipeline, pipeline->vs_simd8 = NO_KERNEL; pipeline->vs_vec4 = NO_KERNEL; pipeline->gs_kernel = NO_KERNEL; + pipeline->ps_ksp0 = NO_KERNEL; pipeline->active_stages = 0; pipeline->total_scratch = 0; diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c index 2be71a05af8..b23bb4b8895 100644 --- a/src/vulkan/gen8_pipeline.c +++ b/src/vulkan/gen8_pipeline.c @@ -475,142 +475,150 @@ genX(graphics_pipeline_create)( const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; - /* TODO: We should clean this up. Among other things, this is mostly - * shared with other gens. - */ - const struct brw_vue_map *fs_input_map; - if (pipeline->gs_kernel == NO_KERNEL) - fs_input_map = &vue_prog_data->vue_map; - else - fs_input_map = &gs_prog_data->base.vue_map; - - struct GENX(3DSTATE_SBE_SWIZ) swiz = { - GENX(3DSTATE_SBE_SWIZ_header), - }; + const int num_thread_bias = ANV_GEN == 8 ? 2 : 1; + if (pipeline->ps_ksp0 == NO_KERNEL) { + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS)); + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), + .PixelShaderValid = false); + } else { + /* TODO: We should clean this up. Among other things, this is mostly + * shared with other gens. + */ + const struct brw_vue_map *fs_input_map; + if (pipeline->gs_kernel == NO_KERNEL) + fs_input_map = &vue_prog_data->vue_map; + else + fs_input_map = &gs_prog_data->base.vue_map; + + struct GENX(3DSTATE_SBE_SWIZ) swiz = { + GENX(3DSTATE_SBE_SWIZ_header), + }; - int max_source_attr = 0; - for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) { - int input_index = wm_prog_data->urb_setup[attr]; - - if (input_index < 0) - continue; - - int source_attr = fs_input_map->varying_to_slot[attr]; - max_source_attr = MAX2(max_source_attr, source_attr); - - if (input_index >= 16) - continue; - - if (source_attr == -1) { - /* This attribute does not exist in the VUE--that means that the - * vertex shader did not write to it. It could be that it's a - * regular varying read by the fragment shader but not written by the - * vertex shader or it's gl_PrimitiveID. In the first case the value - * is undefined, in the second it needs to be gl_PrimitiveID. - */ - swiz.Attribute[input_index].ConstantSource = PRIM_ID; - swiz.Attribute[input_index].ComponentOverrideX = true; - swiz.Attribute[input_index].ComponentOverrideY = true; - swiz.Attribute[input_index].ComponentOverrideZ = true; - swiz.Attribute[input_index].ComponentOverrideW = true; - } else { - /* We have to subtract two slots to accout for the URB entry output - * read offset in the VS and GS stages. - */ - swiz.Attribute[input_index].SourceAttribute = source_attr - 2; + int max_source_attr = 0; + for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) { + int input_index = wm_prog_data->urb_setup[attr]; + + if (input_index < 0) + continue; + + int source_attr = fs_input_map->varying_to_slot[attr]; + max_source_attr = MAX2(max_source_attr, source_attr); + + if (input_index >= 16) + continue; + + if (source_attr == -1) { + /* This attribute does not exist in the VUE--that means that the + * vertex shader did not write to it. It could be that it's a + * regular varying read by the fragment shader but not written by + * the vertex shader or it's gl_PrimitiveID. In the first case the + * value is undefined, in the second it needs to be + * gl_PrimitiveID. + */ + swiz.Attribute[input_index].ConstantSource = PRIM_ID; + swiz.Attribute[input_index].ComponentOverrideX = true; + swiz.Attribute[input_index].ComponentOverrideY = true; + swiz.Attribute[input_index].ComponentOverrideZ = true; + swiz.Attribute[input_index].ComponentOverrideW = true; + } else { + /* We have to subtract two slots to accout for the URB entry output + * read offset in the VS and GS stages. + */ + swiz.Attribute[input_index].SourceAttribute = source_attr - 2; + } } - } - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SBE), - .AttributeSwizzleEnable = true, - .ForceVertexURBEntryReadLength = false, - .ForceVertexURBEntryReadOffset = false, - .VertexURBEntryReadLength = DIV_ROUND_UP(max_source_attr + 1, 2), - .PointSpriteTextureCoordinateOrigin = UPPERLEFT, - .NumberofSFOutputAttributes = + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SBE), + .AttributeSwizzleEnable = true, + .ForceVertexURBEntryReadLength = false, + .ForceVertexURBEntryReadOffset = false, + .VertexURBEntryReadLength = + DIV_ROUND_UP(max_source_attr + 1, 2), + .PointSpriteTextureCoordinateOrigin = UPPERLEFT, + .NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs, #if ANV_GEN >= 9 - .Attribute0ActiveComponentFormat = ACF_XYZW, - .Attribute1ActiveComponentFormat = ACF_XYZW, - .Attribute2ActiveComponentFormat = ACF_XYZW, - .Attribute3ActiveComponentFormat = ACF_XYZW, - .Attribute4ActiveComponentFormat = ACF_XYZW, - .Attribute5ActiveComponentFormat = ACF_XYZW, - .Attribute6ActiveComponentFormat = ACF_XYZW, - .Attribute7ActiveComponentFormat = ACF_XYZW, - .Attribute8ActiveComponentFormat = ACF_XYZW, - .Attribute9ActiveComponentFormat = ACF_XYZW, - .Attribute10ActiveComponentFormat = ACF_XYZW, - .Attribute11ActiveComponentFormat = ACF_XYZW, - .Attribute12ActiveComponentFormat = ACF_XYZW, - .Attribute13ActiveComponentFormat = ACF_XYZW, - .Attribute14ActiveComponentFormat = ACF_XYZW, - .Attribute15ActiveComponentFormat = ACF_XYZW, - /* wow, much field, very attribute */ - .Attribute16ActiveComponentFormat = ACF_XYZW, - .Attribute17ActiveComponentFormat = ACF_XYZW, - .Attribute18ActiveComponentFormat = ACF_XYZW, - .Attribute19ActiveComponentFormat = ACF_XYZW, - .Attribute20ActiveComponentFormat = ACF_XYZW, - .Attribute21ActiveComponentFormat = ACF_XYZW, - .Attribute22ActiveComponentFormat = ACF_XYZW, - .Attribute23ActiveComponentFormat = ACF_XYZW, - .Attribute24ActiveComponentFormat = ACF_XYZW, - .Attribute25ActiveComponentFormat = ACF_XYZW, - .Attribute26ActiveComponentFormat = ACF_XYZW, - .Attribute27ActiveComponentFormat = ACF_XYZW, - .Attribute28ActiveComponentFormat = ACF_XYZW, - .Attribute29ActiveComponentFormat = ACF_XYZW, - .Attribute28ActiveComponentFormat = ACF_XYZW, - .Attribute29ActiveComponentFormat = ACF_XYZW, - .Attribute30ActiveComponentFormat = ACF_XYZW, + .Attribute0ActiveComponentFormat = ACF_XYZW, + .Attribute1ActiveComponentFormat = ACF_XYZW, + .Attribute2ActiveComponentFormat = ACF_XYZW, + .Attribute3ActiveComponentFormat = ACF_XYZW, + .Attribute4ActiveComponentFormat = ACF_XYZW, + .Attribute5ActiveComponentFormat = ACF_XYZW, + .Attribute6ActiveComponentFormat = ACF_XYZW, + .Attribute7ActiveComponentFormat = ACF_XYZW, + .Attribute8ActiveComponentFormat = ACF_XYZW, + .Attribute9ActiveComponentFormat = ACF_XYZW, + .Attribute10ActiveComponentFormat = ACF_XYZW, + .Attribute11ActiveComponentFormat = ACF_XYZW, + .Attribute12ActiveComponentFormat = ACF_XYZW, + .Attribute13ActiveComponentFormat = ACF_XYZW, + .Attribute14ActiveComponentFormat = ACF_XYZW, + .Attribute15ActiveComponentFormat = ACF_XYZW, + /* wow, much field, very attribute */ + .Attribute16ActiveComponentFormat = ACF_XYZW, + .Attribute17ActiveComponentFormat = ACF_XYZW, + .Attribute18ActiveComponentFormat = ACF_XYZW, + .Attribute19ActiveComponentFormat = ACF_XYZW, + .Attribute20ActiveComponentFormat = ACF_XYZW, + .Attribute21ActiveComponentFormat = ACF_XYZW, + .Attribute22ActiveComponentFormat = ACF_XYZW, + .Attribute23ActiveComponentFormat = ACF_XYZW, + .Attribute24ActiveComponentFormat = ACF_XYZW, + .Attribute25ActiveComponentFormat = ACF_XYZW, + .Attribute26ActiveComponentFormat = ACF_XYZW, + .Attribute27ActiveComponentFormat = ACF_XYZW, + .Attribute28ActiveComponentFormat = ACF_XYZW, + .Attribute29ActiveComponentFormat = ACF_XYZW, + .Attribute28ActiveComponentFormat = ACF_XYZW, + .Attribute29ActiveComponentFormat = ACF_XYZW, + .Attribute30ActiveComponentFormat = ACF_XYZW, #endif - ); + ); - uint32_t *dw = anv_batch_emit_dwords(&pipeline->batch, - GENX(3DSTATE_SBE_SWIZ_length)); - GENX(3DSTATE_SBE_SWIZ_pack)(&pipeline->batch, dw, &swiz); + uint32_t *dw = anv_batch_emit_dwords(&pipeline->batch, + GENX(3DSTATE_SBE_SWIZ_length)); + GENX(3DSTATE_SBE_SWIZ_pack)(&pipeline->batch, dw, &swiz); - const int num_thread_bias = ANV_GEN == 8 ? 2 : 1; - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), - .KernelStartPointer0 = pipeline->ps_ksp0, - - .SingleProgramFlow = false, - .VectorMaskEnable = true, - .SamplerCount = 1, - - .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_FRAGMENT], - .PerThreadScratchSpace = ffs(wm_prog_data->base.total_scratch / 2048), - - .MaximumNumberofThreadsPerPSD = 64 - num_thread_bias, - .PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? - POSOFFSET_SAMPLE: POSOFFSET_NONE, - .PushConstantEnable = wm_prog_data->base.nr_params > 0, - ._8PixelDispatchEnable = pipeline->ps_simd8 != NO_KERNEL, - ._16PixelDispatchEnable = pipeline->ps_simd16 != NO_KERNEL, - ._32PixelDispatchEnable = false, - - .DispatchGRFStartRegisterForConstantSetupData0 = pipeline->ps_grf_start0, - .DispatchGRFStartRegisterForConstantSetupData1 = 0, - .DispatchGRFStartRegisterForConstantSetupData2 = pipeline->ps_grf_start2, - - .KernelStartPointer1 = 0, - .KernelStartPointer2 = pipeline->ps_ksp2); - - bool per_sample_ps = false; - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), - .PixelShaderValid = true, - .PixelShaderKillsPixel = wm_prog_data->uses_kill, - .PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode, - .AttributeEnable = wm_prog_data->num_varying_inputs > 0, - .oMaskPresenttoRenderTarget = wm_prog_data->uses_omask, - .PixelShaderIsPerSample = per_sample_ps, + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), + .KernelStartPointer0 = pipeline->ps_ksp0, + + .SingleProgramFlow = false, + .VectorMaskEnable = true, + .SamplerCount = 1, + + .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_FRAGMENT], + .PerThreadScratchSpace = ffs(wm_prog_data->base.total_scratch / 2048), + + .MaximumNumberofThreadsPerPSD = 64 - num_thread_bias, + .PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? + POSOFFSET_SAMPLE: POSOFFSET_NONE, + .PushConstantEnable = wm_prog_data->base.nr_params > 0, + ._8PixelDispatchEnable = pipeline->ps_simd8 != NO_KERNEL, + ._16PixelDispatchEnable = pipeline->ps_simd16 != NO_KERNEL, + ._32PixelDispatchEnable = false, + + .DispatchGRFStartRegisterForConstantSetupData0 = pipeline->ps_grf_start0, + .DispatchGRFStartRegisterForConstantSetupData1 = 0, + .DispatchGRFStartRegisterForConstantSetupData2 = pipeline->ps_grf_start2, + + .KernelStartPointer1 = 0, + .KernelStartPointer2 = pipeline->ps_ksp2); + + bool per_sample_ps = false; + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), + .PixelShaderValid = true, + .PixelShaderKillsPixel = wm_prog_data->uses_kill, + .PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode, + .AttributeEnable = wm_prog_data->num_varying_inputs > 0, + .oMaskPresenttoRenderTarget = wm_prog_data->uses_omask, + .PixelShaderIsPerSample = per_sample_ps, #if ANV_GEN >= 9 - .PixelShaderPullsBary = wm_prog_data->pulls_bary, - .InputCoverageMaskState = ICMS_NONE + .PixelShaderPullsBary = wm_prog_data->pulls_bary, + .InputCoverageMaskState = ICMS_NONE #endif - ); + ); + } *pPipeline = anv_pipeline_to_handle(pipeline); -- 2.30.2