X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fvc4%2Fvc4_nir_lower_blend.c;h=8cad4b717e3e7b47d81a56a660207ab91846f619;hb=7c5eda6f4e0801b97aa8b6c87a82c33e1d1202c5;hp=17b524653bbfd00d0d91fe6474e39fbf23bb49ec;hpb=6bd9e0351205dc475f45b58979702b5cf414aa07;p=mesa.git

diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
index 17b524653bb..8cad4b717e3 100644
--- a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
@@ -29,6 +29,10 @@
  * from the tile buffer after having waited for the scoreboard (which is
  * handled by vc4_qpu_emit.c), then do math using your output color and that
  * destination value, and update the output color appropriately.
+ *
+ * Once this pass is done, the color write will either have one component (for
+ * single sample) with packed argb8888, or 4 components with the per-sample
+ * argb8888 result.
  */
 
 /**
@@ -37,19 +41,28 @@
  */
 #include "util/u_format.h"
 #include "vc4_qir.h"
-#include "glsl/nir/nir_builder.h"
+#include "compiler/nir/nir_builder.h"
 #include "vc4_context.h"
 
+static bool
+blend_depends_on_dst_color(struct vc4_compile *c)
+{
+        return (c->fs_key->blend.blend_enable ||
+                c->fs_key->blend.colormask != 0xf ||
+                c->fs_key->logicop_func != PIPE_LOGICOP_COPY);
+}
+
 /** Emits a load of the previous fragment color from the tile buffer. */
 static nir_ssa_def *
-vc4_nir_get_dst_color(nir_builder *b)
+vc4_nir_get_dst_color(nir_builder *b, int sample)
 {
         nir_intrinsic_instr *load =
                 nir_intrinsic_instr_create(b->shader,
                                            nir_intrinsic_load_input);
         load->num_components = 1;
-        load->const_index[0] = VC4_NIR_TLB_COLOR_READ_INPUT;
-        nir_ssa_dest_init(&load->instr, &load->dest, 1, NULL);
+        load->const_index[0] = VC4_NIR_TLB_COLOR_READ_INPUT + sample;
+        load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
+        nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL);
         nir_builder_instr_insert(b, &load->instr);
         return &load->dest.ssa;
 }
@@ -86,11 +99,11 @@ vc4_nir_srgb_encode(nir_builder *b, nir_ssa_def *linear)
 }
 
 static nir_ssa_def *
-vc4_blend_channel(nir_builder *b,
-                  nir_ssa_def **src,
-                  nir_ssa_def **dst,
-                  unsigned factor,
-                  int channel)
+vc4_blend_channel_f(nir_builder *b,
+                    nir_ssa_def **src,
+                    nir_ssa_def **dst,
+                    unsigned factor,
+                    int channel)
 {
         switch(factor) {
         case PIPE_BLENDFACTOR_ONE:
@@ -146,8 +159,75 @@ vc4_blend_channel(nir_builder *b,
 }
 
 static nir_ssa_def *
-vc4_blend_func(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
-               unsigned func)
+vc4_nir_set_packed_chan(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1,
+                        int chan)
+{
+        unsigned chan_mask = 0xff << (chan * 8);
+        return nir_ior(b,
+                       nir_iand(b, src0, nir_imm_int(b, ~chan_mask)),
+                       nir_iand(b, src1, nir_imm_int(b, chan_mask)));
+}
+
+static nir_ssa_def *
+vc4_blend_channel_i(nir_builder *b,
+                    nir_ssa_def *src,
+                    nir_ssa_def *dst,
+                    nir_ssa_def *src_a,
+                    nir_ssa_def *dst_a,
+                    unsigned factor,
+                    int a_chan)
+{
+        switch (factor) {
+        case PIPE_BLENDFACTOR_ONE:
+                return nir_imm_int(b, ~0);
+        case PIPE_BLENDFACTOR_SRC_COLOR:
+                return src;
+        case PIPE_BLENDFACTOR_SRC_ALPHA:
+                return src_a;
+        case PIPE_BLENDFACTOR_DST_ALPHA:
+                return dst_a;
+        case PIPE_BLENDFACTOR_DST_COLOR:
+                return dst;
+        case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+                return vc4_nir_set_packed_chan(b,
+                                               nir_umin_4x8(b,
+                                                            src_a,
+                                                            nir_inot(b, dst_a)),
+                                               nir_imm_int(b, ~0),
+                                               a_chan);
+        case PIPE_BLENDFACTOR_CONST_COLOR:
+                return vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_RGBA);
+        case PIPE_BLENDFACTOR_CONST_ALPHA:
+                return vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_AAAA);
+        case PIPE_BLENDFACTOR_ZERO:
+                return nir_imm_int(b, 0);
+        case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+                return nir_inot(b, src);
+        case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+                return nir_inot(b, src_a);
+        case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+                return nir_inot(b, dst_a);
+        case PIPE_BLENDFACTOR_INV_DST_COLOR:
+                return nir_inot(b, dst);
+        case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+                return nir_inot(b, vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_RGBA));
+        case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+                return nir_inot(b, vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_AAAA));
+
+        default:
+        case PIPE_BLENDFACTOR_SRC1_COLOR:
+        case PIPE_BLENDFACTOR_SRC1_ALPHA:
+        case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+        case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+                /* Unsupported. */
+                fprintf(stderr, "Unknown blend factor %d\n", factor);
+                return nir_imm_int(b, ~0);
+        }
+}
+
+static nir_ssa_def *
+vc4_blend_func_f(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
+                 unsigned func)
 {
         switch (func) {
         case PIPE_BLEND_ADD:
@@ -169,9 +249,33 @@ vc4_blend_func(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
         }
 }
 
+static nir_ssa_def *
+vc4_blend_func_i(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
+                 unsigned func)
+{
+        switch (func) {
+        case PIPE_BLEND_ADD:
+                return nir_usadd_4x8(b, src, dst);
+        case PIPE_BLEND_SUBTRACT:
+                return nir_ussub_4x8(b, src, dst);
+        case PIPE_BLEND_REVERSE_SUBTRACT:
+                return nir_ussub_4x8(b, dst, src);
+        case PIPE_BLEND_MIN:
+                return nir_umin_4x8(b, src, dst);
+        case PIPE_BLEND_MAX:
+                return nir_umax_4x8(b, src, dst);
+
+        default:
+                /* Unsupported. */
+                fprintf(stderr, "Unknown blend func %d\n", func);
+                return src;
+
+        }
+}
+
 static void
-vc4_do_blending(struct vc4_compile *c, nir_builder *b, nir_ssa_def **result,
-                nir_ssa_def **src_color, nir_ssa_def **dst_color)
+vc4_do_blending_f(struct vc4_compile *c, nir_builder *b, nir_ssa_def **result,
+                  nir_ssa_def **src_color, nir_ssa_def **dst_color)
 {
         struct pipe_rt_blend_state *blend = &c->fs_key->blend;
 
@@ -192,22 +296,108 @@ vc4_do_blending(struct vc4_compile *c, nir_builder *b, nir_ssa_def **result,
                 int dst_factor = ((i != 3) ? blend->rgb_dst_factor :
                                   blend->alpha_dst_factor);
                 src_blend[i] = nir_fmul(b, src_color[i],
-                                        vc4_blend_channel(b,
-                                                          src_color, dst_color,
-                                                          src_factor, i));
+                                        vc4_blend_channel_f(b,
+                                                            src_color, dst_color,
+                                                            src_factor, i));
                 dst_blend[i] = nir_fmul(b, dst_color[i],
-                                        vc4_blend_channel(b,
-                                                          src_color, dst_color,
-                                                          dst_factor, i));
+                                        vc4_blend_channel_f(b,
+                                                            src_color, dst_color,
+                                                            dst_factor, i));
         }
 
         for (int i = 0; i < 4; i++) {
-                result[i] = vc4_blend_func(b, src_blend[i], dst_blend[i],
-                                           ((i != 3) ? blend->rgb_func :
-                                            blend->alpha_func));
+                result[i] = vc4_blend_func_f(b, src_blend[i], dst_blend[i],
+                                             ((i != 3) ? blend->rgb_func :
+                                              blend->alpha_func));
         }
 }
 
+static nir_ssa_def *
+vc4_nir_splat(nir_builder *b, nir_ssa_def *src)
+{
+        nir_ssa_def *or1 = nir_ior(b, src, nir_ishl(b, src, nir_imm_int(b, 8)));
+        return nir_ior(b, or1, nir_ishl(b, or1, nir_imm_int(b, 16)));
+}
+
+static nir_ssa_def *
+vc4_do_blending_i(struct vc4_compile *c, nir_builder *b,
+                  nir_ssa_def *src_color, nir_ssa_def *dst_color,
+                  nir_ssa_def *src_float_a)
+{
+        struct pipe_rt_blend_state *blend = &c->fs_key->blend;
+
+        if (!blend->blend_enable)
+                return src_color;
+
+        enum pipe_format color_format = c->fs_key->color_format;
+        const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
+        nir_ssa_def *imm_0xff = nir_imm_int(b, 0xff);
+        nir_ssa_def *src_a = nir_pack_unorm_4x8(b, src_float_a);
+        nir_ssa_def *dst_a;
+        int alpha_chan;
+        for (alpha_chan = 0; alpha_chan < 4; alpha_chan++) {
+                if (format_swiz[alpha_chan] == 3)
+                        break;
+        }
+        if (alpha_chan != 4) {
+                nir_ssa_def *shift = nir_imm_int(b, alpha_chan * 8);
+                dst_a = vc4_nir_splat(b, nir_iand(b, nir_ushr(b, dst_color,
+                                                              shift), imm_0xff));
+        } else {
+                dst_a = nir_imm_int(b, ~0);
+        }
+
+        nir_ssa_def *src_factor = vc4_blend_channel_i(b,
+                                                      src_color, dst_color,
+                                                      src_a, dst_a,
+                                                      blend->rgb_src_factor,
+                                                      alpha_chan);
+        nir_ssa_def *dst_factor = vc4_blend_channel_i(b,
+                                                      src_color, dst_color,
+                                                      src_a, dst_a,
+                                                      blend->rgb_dst_factor,
+                                                      alpha_chan);
+
+        if (alpha_chan != 4 &&
+            blend->alpha_src_factor != blend->rgb_src_factor) {
+                nir_ssa_def *src_alpha_factor =
+                        vc4_blend_channel_i(b,
+                                            src_color, dst_color,
+                                            src_a, dst_a,
+                                            blend->alpha_src_factor,
+                                            alpha_chan);
+                src_factor = vc4_nir_set_packed_chan(b, src_factor,
+                                                     src_alpha_factor,
+                                                     alpha_chan);
+        }
+        if (alpha_chan != 4 &&
+            blend->alpha_dst_factor != blend->rgb_dst_factor) {
+                nir_ssa_def *dst_alpha_factor =
+                        vc4_blend_channel_i(b,
+                                            src_color, dst_color,
+                                            src_a, dst_a,
+                                            blend->alpha_dst_factor,
+                                            alpha_chan);
+                dst_factor = vc4_nir_set_packed_chan(b, dst_factor,
+                                                     dst_alpha_factor,
+                                                     alpha_chan);
+        }
+        nir_ssa_def *src_blend = nir_umul_unorm_4x8(b, src_color, src_factor);
+        nir_ssa_def *dst_blend = nir_umul_unorm_4x8(b, dst_color, dst_factor);
+
+        nir_ssa_def *result =
+                vc4_blend_func_i(b, src_blend, dst_blend, blend->rgb_func);
+        if (alpha_chan != 4 && blend->alpha_func != blend->rgb_func) {
+                nir_ssa_def *result_a = vc4_blend_func_i(b,
+                                                         src_blend,
+                                                         dst_blend,
+                                                         blend->alpha_func);
+                result = vc4_nir_set_packed_chan(b, result, result_a,
+                                                 alpha_chan);
+        }
+        return result;
+}
+
 static nir_ssa_def *
 vc4_logicop(nir_builder *b, int logicop_func,
             nir_ssa_def *src, nir_ssa_def *dst)
@@ -299,61 +489,79 @@ vc4_nir_emit_alpha_test_discard(struct vc4_compile *c, nir_builder *b,
         nir_builder_instr_insert(b, &discard->instr);
 }
 
-static void
-vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,
-                          nir_intrinsic_instr *intr)
+static nir_ssa_def *
+vc4_nir_swizzle_and_pack(struct vc4_compile *c, nir_builder *b,
+                         nir_ssa_def **colors)
+{
+        enum pipe_format color_format = c->fs_key->color_format;
+        const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
+
+        nir_ssa_def *swizzled[4];
+        for (int i = 0; i < 4; i++) {
+                swizzled[i] = vc4_nir_get_swizzled_channel(b, colors,
+                                                           format_swiz[i]);
+        }
+
+        return nir_pack_unorm_4x8(b,
+                                  nir_vec4(b,
+                                           swizzled[0], swizzled[1],
+                                           swizzled[2], swizzled[3]));
+
+}
+
+static nir_ssa_def *
+vc4_nir_blend_pipeline(struct vc4_compile *c, nir_builder *b, nir_ssa_def *src,
+                       int sample)
 {
         enum pipe_format color_format = c->fs_key->color_format;
         const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
+        bool srgb = util_format_is_srgb(color_format);
 
         /* Pull out the float src/dst color components. */
-        nir_ssa_def *packed_dst_color = vc4_nir_get_dst_color(b);
+        nir_ssa_def *packed_dst_color = vc4_nir_get_dst_color(b, sample);
         nir_ssa_def *dst_vec4 = nir_unpack_unorm_4x8(b, packed_dst_color);
         nir_ssa_def *src_color[4], *unpacked_dst_color[4];
         for (unsigned i = 0; i < 4; i++) {
-                src_color[i] = nir_swizzle(b, intr->src[0].ssa, &i, 1, false);
-                unpacked_dst_color[i] = nir_swizzle(b, dst_vec4, &i, 1, false);
+                src_color[i] = nir_channel(b, src, i);
+                unpacked_dst_color[i] = nir_channel(b, dst_vec4, i);
         }
 
-        /* Unswizzle the destination color. */
-        nir_ssa_def *dst_color[4];
-        for (unsigned i = 0; i < 4; i++) {
-                dst_color[i] = vc4_nir_get_swizzled_channel(b,
-                                                            unpacked_dst_color,
-                                                            format_swiz[i]);
-        }
+        if (c->fs_key->sample_alpha_to_one && c->fs_key->msaa)
+                src_color[3] = nir_imm_float(b, 1.0);
 
         vc4_nir_emit_alpha_test_discard(c, b, src_color[3]);
 
-        /* Turn dst color to linear. */
-        if (util_format_is_srgb(color_format)) {
+        nir_ssa_def *packed_color;
+        if (srgb) {
+                /* Unswizzle the destination color. */
+                nir_ssa_def *dst_color[4];
+                for (unsigned i = 0; i < 4; i++) {
+                        dst_color[i] = vc4_nir_get_swizzled_channel(b,
+                                                                    unpacked_dst_color,
+                                                                    format_swiz[i]);
+                }
+
+                /* Turn dst color to linear. */
                 for (int i = 0; i < 3; i++)
                         dst_color[i] = vc4_nir_srgb_decode(b, dst_color[i]);
-        }
 
-        nir_ssa_def *blend_color[4];
-        vc4_do_blending(c, b, blend_color, src_color, dst_color);
+                nir_ssa_def *blend_color[4];
+                vc4_do_blending_f(c, b, blend_color, src_color, dst_color);
 
-        /* sRGB encode the output color */
-        if (util_format_is_srgb(color_format)) {
+                /* sRGB encode the output color */
                 for (int i = 0; i < 3; i++)
                         blend_color[i] = vc4_nir_srgb_encode(b, blend_color[i]);
-        }
 
-        nir_ssa_def *swizzled_outputs[4];
-        for (int i = 0; i < 4; i++) {
-                swizzled_outputs[i] =
-                        vc4_nir_get_swizzled_channel(b, blend_color,
-                                                     format_swiz[i]);
-        }
+                packed_color = vc4_nir_swizzle_and_pack(c, b, blend_color);
+        } else {
+                nir_ssa_def *packed_src_color =
+                        vc4_nir_swizzle_and_pack(c, b, src_color);
 
-        nir_ssa_def *packed_color =
-                nir_pack_unorm_4x8(b,
-                                   nir_vec4(b,
-                                            swizzled_outputs[0],
-                                            swizzled_outputs[1],
-                                            swizzled_outputs[2],
-                                            swizzled_outputs[3]));
+                packed_color =
+                        vc4_do_blending_i(c, b,
+                                          packed_src_color, packed_dst_color,
+                                          src_color[3]);
+        }
 
         packed_color = vc4_logicop(b, c->fs_key->logicop_func,
                                    packed_color, packed_dst_color);
@@ -368,24 +576,107 @@ vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,
                         colormask &= ~(0xff << (i * 8));
                 }
         }
-        packed_color = nir_ior(b,
-                               nir_iand(b, packed_color,
-                                        nir_imm_int(b, colormask)),
-                               nir_iand(b, packed_dst_color,
-                                        nir_imm_int(b, ~colormask)));
 
-        /* Turn the old vec4 output into a store of the packed color. */
-        nir_instr_rewrite_src(&intr->instr, &intr->src[0],
-                              nir_src_for_ssa(packed_color));
+        return nir_ior(b,
+                       nir_iand(b, packed_color,
+                                nir_imm_int(b, colormask)),
+                       nir_iand(b, packed_dst_color,
+                                nir_imm_int(b, ~colormask)));
+}
+
+static int
+vc4_nir_next_output_driver_location(nir_shader *s)
+{
+        int maxloc = -1;
+
+        nir_foreach_variable(var, &s->outputs)
+                maxloc = MAX2(maxloc, (int)var->data.driver_location);
+
+        return maxloc + 1;
+}
+
+static void
+vc4_nir_store_sample_mask(struct vc4_compile *c, nir_builder *b,
+                          nir_ssa_def *val)
+{
+        nir_variable *sample_mask = nir_variable_create(c->s, nir_var_shader_out,
+                                                        glsl_uint_type(),
+                                                        "sample_mask");
+        sample_mask->data.driver_location =
+                vc4_nir_next_output_driver_location(c->s);
+        sample_mask->data.location = FRAG_RESULT_SAMPLE_MASK;
+
+        nir_intrinsic_instr *intr =
+                nir_intrinsic_instr_create(c->s, nir_intrinsic_store_output);
         intr->num_components = 1;
+        intr->const_index[0] = sample_mask->data.driver_location;
+
+        intr->src[0] = nir_src_for_ssa(val);
+        intr->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
+        nir_builder_instr_insert(b, &intr->instr);
 }
 
-static bool
-vc4_nir_lower_blend_block(nir_block *block, void *state)
+static void
+vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,
+                          nir_intrinsic_instr *intr)
 {
-        struct vc4_compile *c = state;
+        nir_ssa_def *frag_color = intr->src[0].ssa;
+
+        if (c->fs_key->sample_coverage) {
+                nir_intrinsic_instr *load =
+                        nir_intrinsic_instr_create(b->shader,
+                                                   nir_intrinsic_load_sample_mask_in);
+                load->num_components = 1;
+                nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL);
+                nir_builder_instr_insert(b, &load->instr);
+
+                nir_ssa_def *bitmask = &load->dest.ssa;
+
+                vc4_nir_store_sample_mask(c, b, bitmask);
+        } else if (c->fs_key->sample_alpha_to_coverage) {
+                nir_ssa_def *a = nir_channel(b, frag_color, 3);
+
+                /* XXX: We should do a nice dither based on the fragment
+                 * coordinate, instead.
+                 */
+                nir_ssa_def *num_samples = nir_imm_float(b, VC4_MAX_SAMPLES);
+                nir_ssa_def *num_bits = nir_f2i(b, nir_fmul(b, a, num_samples));
+                nir_ssa_def *bitmask = nir_isub(b,
+                                                nir_ishl(b,
+                                                         nir_imm_int(b, 1),
+                                                         num_bits),
+                                                nir_imm_int(b, 1));
+                vc4_nir_store_sample_mask(c, b, bitmask);
+        }
+
+        /* The TLB color read returns each sample in turn, so if our blending
+         * depends on the destination color, we're going to have to run the
+         * blending function separately for each destination sample value, and
+         * then output the per-sample color using TLB_COLOR_MS.
+         */
+        nir_ssa_def *blend_output;
+        if (c->fs_key->msaa && blend_depends_on_dst_color(c)) {
+                c->msaa_per_sample_output = true;
+
+                nir_ssa_def *samples[4];
+                for (int i = 0; i < VC4_MAX_SAMPLES; i++)
+                        samples[i] = vc4_nir_blend_pipeline(c, b, frag_color, i);
+                blend_output = nir_vec4(b,
+                                        samples[0], samples[1],
+                                        samples[2], samples[3]);
+        } else {
+                blend_output = vc4_nir_blend_pipeline(c, b, frag_color, 0);
+        }
+
+        nir_instr_rewrite_src(&intr->instr, &intr->src[0],
+                              nir_src_for_ssa(blend_output));
+        intr->num_components = blend_output->num_components;
+}
 
-        nir_foreach_instr(block, instr) {
+static bool
+vc4_nir_lower_blend_block(nir_block *block, struct vc4_compile *c)
+{
+        nir_foreach_instr_safe(instr, block) {
                 if (instr->type != nir_instr_type_intrinsic)
                         continue;
                 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
@@ -417,14 +708,15 @@ vc4_nir_lower_blend_block(nir_block *block, void *state)
 }
 
 void
-vc4_nir_lower_blend(struct vc4_compile *c)
+vc4_nir_lower_blend(nir_shader *s, struct vc4_compile *c)
 {
-        nir_foreach_overload(c->s, overload) {
-                if (overload->impl) {
-                        nir_foreach_block(overload->impl,
-                                          vc4_nir_lower_blend_block, c);
+        nir_foreach_function(function, s) {
+                if (function->impl) {
+                        nir_foreach_block(block, function->impl) {
+                                vc4_nir_lower_blend_block(block, c);
+                        }
 
-                        nir_metadata_preserve(overload->impl,
+                        nir_metadata_preserve(function->impl,
                                               nir_metadata_block_index |
                                               nir_metadata_dominance);
                 }