From 9fe9db8031f4979fde0849855c6aca1a83432f4e Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Mon, 16 Jan 2017 11:51:50 +0100 Subject: [PATCH] anv: set UAV coherence required bit when needed The same we do in the OpenGL driver (comment copied from there). This is required to ensure that we execute the fragment shader stage when side-effects (such as image or ssbo stores) are present but there are no color writes. I found this while writing a test to check rendering to a framebuffer without attachments where the fragment shader does not produce any color outputs but writes to an image via imageStore(). Without this patch the fragment shader does not execute and the image is not written, which is not correct. Reviewed-by: Jason Ekstrand --- src/intel/vulkan/genX_pipeline.c | 51 ++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index a537a40905c..9d28466f796 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -1327,6 +1327,26 @@ emit_3dstate_ps(struct anv_pipeline *pipeline, } } +static inline bool +has_color_buffer_write_enabled(const struct anv_pipeline *pipeline) +{ + const struct anv_shader_bin *shader_bin = + pipeline->shaders[MESA_SHADER_FRAGMENT]; + if (!shader_bin) + return false; + + const struct anv_pipeline_bind_map *bind_map = &shader_bin->bind_map; + for (int i = 0; i < bind_map->surface_count; i++) { + if (bind_map->surface_to_descriptor[i].set != + ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS) + continue; + if (bind_map->surface_to_descriptor[i].index != UINT8_MAX) + return true; + } + + return false; +} + #if GEN_GEN >= 8 static void emit_3dstate_ps_extra(struct anv_pipeline *pipeline, @@ -1357,6 +1377,37 @@ emit_3dstate_ps_extra(struct anv_pipeline *pipeline, ps.PixelShaderKillsPixel = subpass->has_ds_self_dep || wm_prog_data->uses_kill; + /* The stricter cross-primitive coherency guarantees that the hardware + * gives us with the "Accesses UAV" bit set for at least one shader stage + * and the "UAV coherency required" bit set on the 3DPRIMITIVE command are + * redundant within the current image, atomic counter and SSBO GL APIs, + * which all have very loose ordering and coherency requirements and + * generally rely on the application to insert explicit barriers when a + * shader invocation is expected to see the memory writes performed by the + * invocations of some previous primitive. Regardless of the value of + * "UAV coherency required", the "Accesses UAV" bits will implicitly cause + * an in most cases useless DC flush when the lowermost stage with the bit + * set finishes execution. + * + * It would be nice to disable it, but in some cases we can't because on + * Gen8+ it also has an influence on rasterization via the PS UAV-only + * signal (which could be set independently from the coherency mechanism + * in the 3DSTATE_WM command on Gen7), and because in some cases it will + * determine whether the hardware skips execution of the fragment shader + * or not via the ThreadDispatchEnable signal. However if we know that + * GEN8_PS_BLEND_HAS_WRITEABLE_RT is going to be set and + * GEN8_PSX_PIXEL_SHADER_NO_RT_WRITE is not set it shouldn't make any + * difference so we may just disable it here. + * + * Gen8 hardware tries to compute ThreadDispatchEnable for us but doesn't + * take into account KillPixels when no depth or stencil writes are + * enabled. In order for occlusion queries to work correctly with no + * attachments, we need to force-enable here. + */ + if ((wm_prog_data->has_side_effects || wm_prog_data->uses_kill) && + !has_color_buffer_write_enabled(pipeline)) + ps.PixelShaderHasUAV = true; + #if GEN_GEN >= 9 ps.PixelShaderPullsBary = wm_prog_data->pulls_bary; ps.InputCoverageMaskState = wm_prog_data->uses_sample_mask ? -- 2.30.2