From 8c053e5faded7b57fdd117ed86d572e0104c06bf Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 30 Jan 2020 18:56:22 -0500 Subject: [PATCH] mesa: allow out-of-order drawing to optimize immediate mode if it's safe This increases performance by 11-13% in Viewperf11/Catia - first scene. Set allow_draw_out_of_order=true to enable this. Acked-by: Pierre-Eric Pelloux-Prayer Part-of: --- .../auxiliary/pipe-loader/driinfo_gallium.h | 1 + src/gallium/include/state_tracker/st_api.h | 1 + src/gallium/state_trackers/dri/dri_screen.c | 2 + src/mesa/main/blend.c | 4 + src/mesa/main/context.c | 1 + src/mesa/main/context.h | 10 +- src/mesa/main/depth.c | 3 + src/mesa/main/enable.c | 5 + src/mesa/main/fbobject.c | 1 + src/mesa/main/framebuffer.c | 1 + src/mesa/main/mtypes.h | 4 + src/mesa/main/pipelineobj.c | 1 + src/mesa/main/shaderapi.c | 1 + src/mesa/main/state.c | 91 +++++++++++++++++++ src/mesa/main/state.h | 3 + src/mesa/state_tracker/st_extensions.c | 2 + src/util/xmlpool/t_options.h | 5 + 17 files changed, 134 insertions(+), 2 deletions(-) diff --git a/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h b/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h index bac60215d15..72e979f1ec7 100644 --- a/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h +++ b/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h @@ -31,6 +31,7 @@ DRI_CONF_SECTION_DEBUG DRI_CONF_FORCE_GLSL_ABS_SQRT("false") DRI_CONF_GLSL_CORRECT_DERIVATIVES_AFTER_DISCARD("false") DRI_CONF_ALLOW_GLSL_LAYOUT_QUALIFIER_ON_FUNCTION_PARAMETERS("false") + DRI_CONF_ALLOW_DRAW_OUT_OF_ORDER("false") DRI_CONF_FORCE_COMPAT_PROFILE("false") DRI_CONF_FORCE_GL_VENDOR() DRI_CONF_SECTION_END diff --git a/src/gallium/include/state_tracker/st_api.h b/src/gallium/include/state_tracker/st_api.h index 0b953d6fb38..2d351e0b961 100644 --- a/src/gallium/include/state_tracker/st_api.h +++ b/src/gallium/include/state_tracker/st_api.h @@ -234,6 +234,7 @@ struct st_config_options bool force_glsl_abs_sqrt; bool allow_glsl_cross_stage_interpolation_mismatch; bool allow_glsl_layout_qualifier_on_function_parameters; + bool allow_draw_out_of_order; char *force_gl_vendor; unsigned char config_options_sha1[20]; }; diff --git a/src/gallium/state_trackers/dri/dri_screen.c b/src/gallium/state_trackers/dri/dri_screen.c index 5a9acd818d0..943d6fe0ba8 100644 --- a/src/gallium/state_trackers/dri/dri_screen.c +++ b/src/gallium/state_trackers/dri/dri_screen.c @@ -92,6 +92,8 @@ dri_fill_st_options(struct dri_screen *screen) driQueryOptionb(optionCache, "allow_glsl_cross_stage_interpolation_mismatch"); options->allow_glsl_layout_qualifier_on_function_parameters = driQueryOptionb(optionCache, "allow_glsl_layout_qualifier_on_function_parameters"); + options->allow_draw_out_of_order = + driQueryOptionb(optionCache, "allow_draw_out_of_order"); char *vendor_str = driQueryOptionstr(optionCache, "force_gl_vendor"); /* not an empty string */ diff --git a/src/mesa/main/blend.c b/src/mesa/main/blend.c index 34e8d11569d..0dc2fa171c3 100644 --- a/src/mesa/main/blend.c +++ b/src/mesa/main/blend.c @@ -35,6 +35,7 @@ #include "enums.h" #include "macros.h" #include "mtypes.h" +#include "state.h" @@ -905,6 +906,7 @@ logic_op(struct gl_context *ctx, GLenum opcode, bool no_error) ctx->NewDriverState |= ctx->DriverFlags.NewLogicOp; ctx->Color.LogicOp = opcode; ctx->Color._LogicOp = color_logicop_mapping[opcode & 0x0f]; + _mesa_update_allow_draw_out_of_order(ctx); if (ctx->Driver.LogicOpcode) ctx->Driver.LogicOpcode(ctx, ctx->Color._LogicOp); @@ -991,6 +993,7 @@ _mesa_ColorMask( GLboolean red, GLboolean green, FLUSH_VERTICES(ctx, ctx->DriverFlags.NewColorMask ? 0 : _NEW_COLOR); ctx->NewDriverState |= ctx->DriverFlags.NewColorMask; ctx->Color.ColorMask = mask; + _mesa_update_allow_draw_out_of_order(ctx); if (ctx->Driver.ColorMask) ctx->Driver.ColorMask( ctx, red, green, blue, alpha ); @@ -1027,6 +1030,7 @@ _mesa_ColorMaski(GLuint buf, GLboolean red, GLboolean green, ctx->NewDriverState |= ctx->DriverFlags.NewColorMask; ctx->Color.ColorMask &= ~(0xf << (4 * buf)); ctx->Color.ColorMask |= mask << (4 * buf); + _mesa_update_allow_draw_out_of_order(ctx); } diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c index 9023394824e..c183378d1d3 100644 --- a/src/mesa/main/context.c +++ b/src/mesa/main/context.c @@ -1765,6 +1765,7 @@ _mesa_make_current( struct gl_context *newCtx, * changed since the last time this FBO was bound). */ _mesa_update_draw_buffers(newCtx); + _mesa_update_allow_draw_out_of_order(newCtx); } if (!newCtx->ReadBuffer || _mesa_is_winsys_fbo(newCtx->ReadBuffer)) { _mesa_reference_framebuffer(&newCtx->ReadBuffer, readBuffer); diff --git a/src/mesa/main/context.h b/src/mesa/main/context.h index 58123155002..bf39ef2d08e 100644 --- a/src/mesa/main/context.h +++ b/src/mesa/main/context.h @@ -244,8 +244,14 @@ do { \ do { \ if (MESA_VERBOSE & VERBOSE_STATE) \ _mesa_debug(ctx, "FLUSH_FOR_DRAW in %s\n", __func__); \ - if (ctx->Driver.NeedFlush) \ - vbo_exec_FlushVertices(ctx, ctx->Driver.NeedFlush); \ + if (ctx->Driver.NeedFlush) { \ + if (ctx->_AllowDrawOutOfOrder) { \ + if (ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT) \ + vbo_exec_FlushVertices(ctx, FLUSH_UPDATE_CURRENT); \ + } else { \ + vbo_exec_FlushVertices(ctx, ctx->Driver.NeedFlush); \ + } \ + } \ } while (0) /** diff --git a/src/mesa/main/depth.c b/src/mesa/main/depth.c index 930f5e816f9..27bd4958253 100644 --- a/src/mesa/main/depth.c +++ b/src/mesa/main/depth.c @@ -30,6 +30,7 @@ #include "enums.h" #include "macros.h" #include "mtypes.h" +#include "state.h" /**********************************************************************/ @@ -83,6 +84,7 @@ depth_func(struct gl_context *ctx, GLenum func, bool no_error) FLUSH_VERTICES(ctx, ctx->DriverFlags.NewDepth ? 0 : _NEW_DEPTH); ctx->NewDriverState |= ctx->DriverFlags.NewDepth; ctx->Depth.Func = func; + _mesa_update_allow_draw_out_of_order(ctx); if (ctx->Driver.DepthFunc) ctx->Driver.DepthFunc(ctx, func); @@ -128,6 +130,7 @@ _mesa_DepthMask( GLboolean flag ) FLUSH_VERTICES(ctx, ctx->DriverFlags.NewDepth ? 0 : _NEW_DEPTH); ctx->NewDriverState |= ctx->DriverFlags.NewDepth; ctx->Depth.Mask = flag; + _mesa_update_allow_draw_out_of_order(ctx); if (ctx->Driver.DepthMask) ctx->Driver.DepthMask( ctx, flag ); diff --git a/src/mesa/main/enable.c b/src/mesa/main/enable.c index 5fc8bdac63c..573643b419b 100644 --- a/src/mesa/main/enable.c +++ b/src/mesa/main/enable.c @@ -425,6 +425,7 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, GLboolean state) _mesa_flush_vertices_for_blend_adv(ctx, newEnabled, ctx->Color._AdvancedBlendMode); ctx->Color.BlendEnabled = newEnabled; + _mesa_update_allow_draw_out_of_order(ctx); } } break; @@ -499,6 +500,7 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, GLboolean state) FLUSH_VERTICES(ctx, ctx->DriverFlags.NewDepth ? 0 : _NEW_DEPTH); ctx->NewDriverState |= ctx->DriverFlags.NewDepth; ctx->Depth.Test = state; + _mesa_update_allow_draw_out_of_order(ctx); break; case GL_DEBUG_OUTPUT: case GL_DEBUG_OUTPUT_SYNCHRONOUS_ARB: @@ -604,6 +606,7 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, GLboolean state) FLUSH_VERTICES(ctx, ctx->DriverFlags.NewLogicOp ? 0 : _NEW_COLOR); ctx->NewDriverState |= ctx->DriverFlags.NewLogicOp; ctx->Color.ColorLogicOpEnabled = state; + _mesa_update_allow_draw_out_of_order(ctx); break; case GL_MAP1_COLOR_4: if (ctx->API != API_OPENGL_COMPAT) @@ -840,6 +843,7 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, GLboolean state) FLUSH_VERTICES(ctx, ctx->DriverFlags.NewStencil ? 0 : _NEW_STENCIL); ctx->NewDriverState |= ctx->DriverFlags.NewStencil; ctx->Stencil.Enabled = state; + _mesa_update_allow_draw_out_of_order(ctx); break; case GL_TEXTURE_1D: if (ctx->API != API_OPENGL_COMPAT) @@ -1349,6 +1353,7 @@ _mesa_set_enablei(struct gl_context *ctx, GLenum cap, _mesa_flush_vertices_for_blend_adv(ctx, enabled, ctx->Color._AdvancedBlendMode); ctx->Color.BlendEnabled = enabled; + _mesa_update_allow_draw_out_of_order(ctx); } break; case GL_SCISSOR_TEST: diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index a34d5099688..3d35b9bdc33 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -3042,6 +3042,7 @@ _mesa_bind_framebuffers(struct gl_context *ctx, check_begin_texture_render(ctx, newDrawFb); _mesa_reference_framebuffer(&ctx->DrawBuffer, newDrawFb); + _mesa_update_allow_draw_out_of_order(ctx); } if ((bindDrawBuf || bindReadBuf) && ctx->Driver.BindFramebuffer) { diff --git a/src/mesa/main/framebuffer.c b/src/mesa/main/framebuffer.c index 8e7778019f8..1c72f446b0d 100644 --- a/src/mesa/main/framebuffer.c +++ b/src/mesa/main/framebuffer.c @@ -502,6 +502,7 @@ _mesa_update_framebuffer_visual(struct gl_context *ctx, } compute_depth_max(fb); + _mesa_update_allow_draw_out_of_order(ctx); } diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 80253cf8296..7b52a2528fd 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -4182,6 +4182,9 @@ struct gl_constants /** Whether the driver can handle MultiDrawElements with non-VBO indices. */ bool MultiDrawWithUserIndices; + /** Whether out-of-order draw (Begin/End) optimizations are allowed. */ + bool AllowDrawOutOfOrder; + /** GL_ARB_gl_spirv */ struct spirv_supported_capabilities SpirVCapabilities; @@ -5144,6 +5147,7 @@ struct gl_context struct gl_driver_flags DriverFlags; GLboolean ViewportInitialized; /**< has viewport size been initialized? */ + GLboolean _AllowDrawOutOfOrder; GLbitfield varying_vp_inputs; /**< mask of VERT_BIT_* flags */ diff --git a/src/mesa/main/pipelineobj.c b/src/mesa/main/pipelineobj.c index 1fe1205fb52..c0b6871c7b3 100644 --- a/src/mesa/main/pipelineobj.c +++ b/src/mesa/main/pipelineobj.c @@ -534,6 +534,7 @@ _mesa_bind_pipeline(struct gl_context *ctx, } _mesa_update_vertex_processing_mode(ctx); + _mesa_update_allow_draw_out_of_order(ctx); } } diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index ab81775f57d..dfed487cb48 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -2547,6 +2547,7 @@ _mesa_use_program(struct gl_context *ctx, gl_shader_stage stage, &shTarget->ReferencedPrograms[stage], shProg); _mesa_reference_program(ctx, target, prog); + _mesa_update_allow_draw_out_of_order(ctx); if (stage == MESA_SHADER_VERTEX) _mesa_update_vertex_processing_mode(ctx); return; diff --git a/src/mesa/main/state.c b/src/mesa/main/state.c index 3e2eb28dcc5..f38e7b138c4 100644 --- a/src/mesa/main/state.c +++ b/src/mesa/main/state.c @@ -56,6 +56,97 @@ #include "blend.h" +void +_mesa_update_allow_draw_out_of_order(struct gl_context *ctx) +{ + /* Out-of-order drawing is useful when vertex array draws and immediate + * mode are interleaved. + * + * Example with 3 draws: + * glBegin(); + * glVertex(); + * glEnd(); + * glDrawElements(); + * glBegin(); + * glVertex(); + * glEnd(); + * + * Out-of-order drawing changes the execution order like this: + * glDrawElements(); + * glBegin(); + * glVertex(); + * glVertex(); + * glEnd(); + * + * If out-of-order draws are enabled, immediate mode vertices are not + * flushed before glDrawElements, resulting in fewer draws and lower CPU + * overhead. This helps workstation applications. + * + * This is a simplified version of out-of-order determination to catch + * common cases. + * + * RadeonSI has a complete and more complicated out-of-order determination + * for driver-internal reasons. + */ + /* Only the compatibility profile with immediate mode needs this. */ + if (ctx->API != API_OPENGL_COMPAT || !ctx->Const.AllowDrawOutOfOrder) + return; + + /* If all of these are NULL, GLSL is disabled. */ + struct gl_program *vs = + ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX]; + struct gl_program *tcs = + ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_CTRL]; + struct gl_program *tes = + ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL]; + struct gl_program *gs = + ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY]; + struct gl_program *fs = + ctx->_Shader->CurrentProgram[MESA_SHADER_FRAGMENT]; + GLenum16 depth_func = ctx->Depth.Func; + + /* Z fighting and any primitives with equal Z shouldn't be reordered + * with LESS/LEQUAL/GREATER/GEQUAL functions. + * + * When drawing 2 primitive with equal Z: + * - with LEQUAL/GEQUAL, the last primitive wins the Z test. + * - with LESS/GREATER, the first primitive wins the Z test. + * + * Here we ignore that on the basis that such cases don't occur in real + * apps, and we they do occur, they occur with blending where out-of-order + * drawing is always disabled. + */ + bool previous_state = ctx->_AllowDrawOutOfOrder; + ctx->_AllowDrawOutOfOrder = + ctx->DrawBuffer && + ctx->DrawBuffer->Visual.depthBits && + ctx->Depth.Test && + ctx->Depth.Mask && + (depth_func == GL_NEVER || + depth_func == GL_LESS || + depth_func == GL_LEQUAL || + depth_func == GL_GREATER || + depth_func == GL_GEQUAL) && + (!ctx->DrawBuffer->Visual.stencilBits || + !ctx->Stencil.Enabled) && + (!ctx->Color.ColorMask || + (!ctx->Color.BlendEnabled && + (!ctx->Color.ColorLogicOpEnabled || + ctx->Color._LogicOp == COLOR_LOGICOP_COPY))) && + (!vs || !vs->info.writes_memory) && + (!tes || !tes->info.writes_memory) && + (!tcs || !tcs->info.writes_memory) && + (!gs || !gs->info.writes_memory) && + (!fs || !fs->info.writes_memory || !fs->info.fs.early_fragment_tests); + + /* If we are disabling out-of-order drawing, we need to flush queued + * vertices. + */ + if (previous_state && !ctx->_AllowDrawOutOfOrder) + FLUSH_VERTICES(ctx, 0); +} + + /** * Update the ctx->*Program._Current pointers to point to the * current/active programs. diff --git a/src/mesa/main/state.h b/src/mesa/main/state.h index b3ea28d0968..fd2877814dd 100644 --- a/src/mesa/main/state.h +++ b/src/mesa/main/state.h @@ -28,6 +28,9 @@ #include "mtypes.h" +extern void +_mesa_update_allow_draw_out_of_order(struct gl_context *ctx); + extern void _mesa_update_state(struct gl_context *ctx); diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index 0c03c2fc221..b52606398b3 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -1718,4 +1718,6 @@ void st_init_extensions(struct pipe_screen *screen, consts->SpirVExtensions = CALLOC_STRUCT(spirv_supported_extensions); _mesa_fill_supported_spirv_extensions(consts->SpirVExtensions, spirv_caps); } + + consts->AllowDrawOutOfOrder = options->allow_draw_out_of_order; } diff --git a/src/util/xmlpool/t_options.h b/src/util/xmlpool/t_options.h index a00c937539d..bfa3a80b6ac 100644 --- a/src/util/xmlpool/t_options.h +++ b/src/util/xmlpool/t_options.h @@ -145,6 +145,11 @@ DRI_CONF_OPT_BEGIN_B(allow_glsl_layout_qualifier_on_function_parameters, def) \ DRI_CONF_DESC(en,gettext("Allow layout qualifiers on function parameters.")) \ DRI_CONF_OPT_END +#define DRI_CONF_ALLOW_DRAW_OUT_OF_ORDER(def) \ +DRI_CONF_OPT_BEGIN_B(allow_draw_out_of_order, def) \ + DRI_CONF_DESC(en,gettext("Allow out-of-order draw optimizations. Set when Z fighting doesn't have to be accurate.")) \ +DRI_CONF_OPT_END + #define DRI_CONF_FORCE_GL_VENDOR(def) \ DRI_CONF_OPT_BEGIN(force_gl_vendor, string, def) \ DRI_CONF_DESC(en,gettext("Allow GPU vendor to be overridden.")) \ -- 2.30.2