radeonsi: allow out-of-order rasterization in commutative blending cases
authorNicolai Hähnle <nicolai.haehnle@amd.com>
Mon, 18 Sep 2017 09:24:10 +0000 (11:24 +0200)
committerNicolai Hähnle <nicolai.haehnle@amd.com>
Mon, 18 Sep 2017 09:25:20 +0000 (11:25 +0200)
We do not enable this by default for additive blending, since it slightly
breaks OpenGL invariance guarantees due to non-determinism.

Still, there may be some applications can benefit from white-listing
via the radeonsi_commutative_blend_add drirc setting without any real
visible artifacts.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Tested-by: Dieter Nützel <Dieter@nuetzel-hh.de>
src/gallium/drivers/radeonsi/driinfo_radeonsi.h
src/gallium/drivers/radeonsi/si_pipe.c
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_state.c
src/gallium/drivers/radeonsi/si_state.h
src/util/xmlpool/t_options.h

index 8be85289a0c3f9366186eb0b5fe9432ddcb8f95e..989e5175cc0683272b1c86a43c03fc0533b31c80 100644 (file)
@@ -2,4 +2,5 @@
 DRI_CONF_SECTION_PERFORMANCE
     DRI_CONF_RADEONSI_ENABLE_SISCHED("false")
     DRI_CONF_RADEONSI_ASSUME_NO_Z_FIGHTS("false")
+    DRI_CONF_RADEONSI_COMMUTATIVE_BLEND_ADD("false")
 DRI_CONF_SECTION_END
index d6de152571752e31ce1f9e3f2862765255453ba9..372bc56ce76f9b1365ab9434da9eda2f2ab4aa3a 100644 (file)
@@ -1050,6 +1050,8 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
                                         !(sscreen->b.debug_flags & DBG_NO_OUT_OF_ORDER);
        sscreen->assume_no_z_fights =
                driQueryOptionb(config->options, "radeonsi_assume_no_z_fights");
+       sscreen->commutative_blend_add =
+               driQueryOptionb(config->options, "radeonsi_commutative_blend_add");
        sscreen->has_msaa_sample_loc_bug = (sscreen->b.family >= CHIP_POLARIS10 &&
                                            sscreen->b.family <= CHIP_POLARIS12) ||
                                           sscreen->b.family == CHIP_VEGA10 ||
index 3d33e4f0ffa71aada78c810ba2fdce5a6b6f2852..ce6aa3be96b27898642a41cd06a3d7a4369ed9e3 100644 (file)
@@ -96,6 +96,7 @@ struct si_screen {
        bool                            has_draw_indirect_multi;
        bool                            has_out_of_order_rast;
        bool                            assume_no_z_fights;
+       bool                            commutative_blend_add;
        bool                            has_msaa_sample_loc_bug;
        bool                            dpbb_allowed;
        bool                            dfsm_allowed;
index 66228af1d238e2193159d8a49f35b94865956098..96f9e44497783b8e6ceaec998639d5cf989a0452 100644 (file)
@@ -377,6 +377,48 @@ static uint32_t si_translate_blend_opt_factor(int blend_fact, bool is_alpha)
        }
 }
 
+static void si_blend_check_commutativity(struct si_screen *sscreen,
+                                        struct si_state_blend *blend,
+                                        enum pipe_blend_func func,
+                                        enum pipe_blendfactor src,
+                                        enum pipe_blendfactor dst,
+                                        unsigned chanmask)
+{
+       /* Src factor is allowed when it does not depend on Dst */
+       static const uint32_t src_allowed =
+               (1u << PIPE_BLENDFACTOR_ONE) |
+               (1u << PIPE_BLENDFACTOR_SRC_COLOR) |
+               (1u << PIPE_BLENDFACTOR_SRC_ALPHA) |
+               (1u << PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) |
+               (1u << PIPE_BLENDFACTOR_CONST_COLOR) |
+               (1u << PIPE_BLENDFACTOR_CONST_ALPHA) |
+               (1u << PIPE_BLENDFACTOR_SRC1_COLOR) |
+               (1u << PIPE_BLENDFACTOR_SRC1_ALPHA) |
+               (1u << PIPE_BLENDFACTOR_ZERO) |
+               (1u << PIPE_BLENDFACTOR_INV_SRC_COLOR) |
+               (1u << PIPE_BLENDFACTOR_INV_SRC_ALPHA) |
+               (1u << PIPE_BLENDFACTOR_INV_CONST_COLOR) |
+               (1u << PIPE_BLENDFACTOR_INV_CONST_ALPHA) |
+               (1u << PIPE_BLENDFACTOR_INV_SRC1_COLOR) |
+               (1u << PIPE_BLENDFACTOR_INV_SRC1_ALPHA);
+
+       if (dst == PIPE_BLENDFACTOR_ONE &&
+           (src_allowed & (1u << src))) {
+               /* Addition is commutative, but floating point addition isn't
+                * associative: subtle changes can be introduced via different
+                * rounding.
+                *
+                * Out-of-order is also non-deterministic, which means that
+                * this breaks OpenGL invariance requirements. So only enable
+                * out-of-order additive blending if explicitly allowed by a
+                * setting.
+                */
+               if (func == PIPE_BLEND_MAX || func == PIPE_BLEND_MIN ||
+                   (func == PIPE_BLEND_ADD && sscreen->commutative_blend_add))
+                       blend->commutative_4bit |= chanmask;
+       }
+}
+
 /**
  * Get rid of DST in the blend factors by commuting the operands:
  *    func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
@@ -493,6 +535,11 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
                        continue;
                }
 
+               si_blend_check_commutativity(sctx->screen, blend,
+                                            eqRGB, srcRGB, dstRGB, 0x7 << (4 * i));
+               si_blend_check_commutativity(sctx->screen, blend,
+                                            eqA, srcA, dstA, 0x8 << (4 * i));
+
                /* Blending optimizations for RB+.
                 * These transformations don't change the behavior.
                 *
@@ -636,6 +683,7 @@ static void si_bind_blend_state(struct pipe_context *ctx, void *state)
            (!old_blend ||
             (old_blend->blend_enable_4bit != blend->blend_enable_4bit ||
              old_blend->cb_target_enabled_4bit != blend->cb_target_enabled_4bit ||
+             old_blend->commutative_4bit != blend->commutative_4bit ||
              old_blend->logicop_enable != blend->logicop_enable)))
                si_mark_atom_dirty(sctx, &sctx->msaa_config);
 }
@@ -3208,12 +3256,23 @@ static bool si_out_of_order_rasterization(struct si_context *sctx)
        if (!colormask)
                return true;
 
-       bool blend_enabled = (colormask & blend->blend_enable_4bit) != 0;
+       unsigned blendmask = colormask & blend->blend_enable_4bit;
 
-       if (blend_enabled)
-               return false; /* TODO */
+       if (blendmask) {
+               /* Only commutative blending. */
+               if (blendmask & ~blend->commutative_4bit)
+                       return false;
+
+               if (!dsa_order_invariant.pass_set)
+                       return false;
+       }
+
+       if (colormask & ~blendmask) {
+               if (!dsa_order_invariant.pass_last)
+                       return false;
+       }
 
-       return dsa_order_invariant.pass_last;
+       return true;
 }
 
 static void si_emit_msaa_config(struct si_context *sctx, struct r600_atom *atom)
index 4f14f89166d24f19e673886afa52804463e2a01a..4388ea99daf2bdc8730210562e6d37142a85cd83 100644 (file)
@@ -55,6 +55,7 @@ struct si_state_blend {
        unsigned                cb_target_enabled_4bit;
        unsigned                blend_enable_4bit;
        unsigned                need_src_alpha_4bit;
+       unsigned                commutative_4bit;
        bool                    alpha_to_coverage:1;
        bool                    alpha_to_one:1;
        bool                    dual_src_blend:1;
index c92215183a595d6682a2d2aad2af311a72459aeb..214c7c359ee9407bf4b025676b6d6829a7931d4b 100644 (file)
@@ -443,3 +443,8 @@ DRI_CONF_OPT_END
 DRI_CONF_OPT_BEGIN_B(radeonsi_assume_no_z_fights, def) \
         DRI_CONF_DESC(en,gettext("Assume no Z fights (enables aggressive out-of-order rasterization to improve performance; may cause rendering errors)")) \
 DRI_CONF_OPT_END
+
+#define DRI_CONF_RADEONSI_COMMUTATIVE_BLEND_ADD(def) \
+DRI_CONF_OPT_BEGIN_B(radeonsi_commutative_blend_add, def) \
+        DRI_CONF_DESC(en,gettext("Commutative additive blending optimizations (may cause rendering errors)")) \
+DRI_CONF_OPT_END