tree-wide: replace MAYBE_UNUSED with ASSERTED

[mesa.git] / src / gallium / drivers / vc4 / vc4_nir_lower_blend.c
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c

index 0672a92226f24081421b70207e0367989d077872..ff6268f478a3929e14657bab11a5686b2a19d2cd 100644 (file)
--- a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
@@ -29,6 +29,10 @@
   * from the tile buffer after having waited for the scoreboard (which is
   * handled by vc4_qpu_emit.c), then do math using your output color and that
   * destination value, and update the output color appropriately.
+ *
+ * Once this pass is done, the color write will either have one component (for
+ * single sample) with packed argb8888, or 4 components with the per-sample
+ * argb8888 result.
   */
  
  /**
@@ -37,54 +41,33 @@
   */
  #include "util/u_format.h"
  #include "vc4_qir.h"
-#include "glsl/nir/nir_builder.h"
+#include "compiler/nir/nir_builder.h"
+#include "compiler/nir/nir_format_convert.h"
  #include "vc4_context.h"
  
+static bool
+blend_depends_on_dst_color(struct vc4_compile *c)
+{
+        return (c->fs_key->blend.blend_enable ||
+                c->fs_key->blend.colormask != 0xf ||
+                c->fs_key->logicop_func != PIPE_LOGICOP_COPY);
+}
+
  /** Emits a load of the previous fragment color from the tile buffer. */
  static nir_ssa_def *
-vc4_nir_get_dst_color(nir_builder *b)
+vc4_nir_get_dst_color(nir_builder *b, int sample)
  {
          nir_intrinsic_instr *load =
                  nir_intrinsic_instr_create(b->shader,
                                             nir_intrinsic_load_input);
          load->num_components = 1;
-        load->const_index[0] = VC4_NIR_TLB_COLOR_READ_INPUT;
-        nir_ssa_dest_init(&load->instr, &load->dest, 1, NULL);
+        nir_intrinsic_set_base(load, VC4_NIR_TLB_COLOR_READ_INPUT + sample);
+        load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
+        nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL);
          nir_builder_instr_insert(b, &load->instr);
          return &load->dest.ssa;
  }
  
-static  nir_ssa_def *
-vc4_nir_srgb_decode(nir_builder *b, nir_ssa_def *srgb)
-{
-        nir_ssa_def *is_low = nir_flt(b, srgb, nir_imm_float(b, 0.04045));
-        nir_ssa_def *low = nir_fmul(b, srgb, nir_imm_float(b, 1.0 / 12.92));
-        nir_ssa_def *high = nir_fpow(b,
-                                     nir_fmul(b,
-                                              nir_fadd(b, srgb,
-                                                       nir_imm_float(b, 0.055)),
-                                              nir_imm_float(b, 1.0 / 1.055)),
-                                     nir_imm_float(b, 2.4));
-
-        return nir_bcsel(b, is_low, low, high);
-}
-
-static  nir_ssa_def *
-vc4_nir_srgb_encode(nir_builder *b, nir_ssa_def *linear)
-{
-        nir_ssa_def *is_low = nir_flt(b, linear, nir_imm_float(b, 0.0031308));
-        nir_ssa_def *low = nir_fmul(b, linear, nir_imm_float(b, 12.92));
-        nir_ssa_def *high = nir_fsub(b,
-                                     nir_fmul(b,
-                                              nir_imm_float(b, 1.055),
-                                              nir_fpow(b,
-                                                       linear,
-                                                       nir_imm_float(b, 0.41666))),
-                                     nir_imm_float(b, 0.055));
-
-        return nir_bcsel(b, is_low, low, high);
-}
-
  static nir_ssa_def *
  vc4_blend_channel_f(nir_builder *b,
                      nir_ssa_def **src,
@@ -114,9 +97,12 @@ vc4_blend_channel_f(nir_builder *b,
                          return nir_imm_float(b, 1.0);
                  }
          case PIPE_BLENDFACTOR_CONST_COLOR:
-                return vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_X + channel);
+                return nir_load_system_value(b,
+                                             nir_intrinsic_load_blend_const_color_r_float +
+                                             channel,
+                                             0, 32);
          case PIPE_BLENDFACTOR_CONST_ALPHA:
-                return vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_W);
+                return nir_load_blend_const_color_a_float(b);
          case PIPE_BLENDFACTOR_ZERO:
                  return nir_imm_float(b, 0.0);
          case PIPE_BLENDFACTOR_INV_SRC_COLOR:
@@ -129,10 +115,13 @@ vc4_blend_channel_f(nir_builder *b,
                  return nir_fsub(b, nir_imm_float(b, 1.0), dst[channel]);
          case PIPE_BLENDFACTOR_INV_CONST_COLOR:
                  return nir_fsub(b, nir_imm_float(b, 1.0),
-                                vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_X + channel));
+                                nir_load_system_value(b,
+                                                      nir_intrinsic_load_blend_const_color_r_float +
+                                                      channel,
+                                                      0, 32));
          case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
                  return nir_fsub(b, nir_imm_float(b, 1.0),
-                                vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_W));
+                                nir_load_blend_const_color_a_float(b));
  
          default:
          case PIPE_BLENDFACTOR_SRC1_COLOR:
@@ -183,9 +172,9 @@ vc4_blend_channel_i(nir_builder *b,
                                                 nir_imm_int(b, ~0),
                                                 a_chan);
          case PIPE_BLENDFACTOR_CONST_COLOR:
-                return vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_RGBA);
+                return nir_load_blend_const_color_rgba8888_unorm(b);
          case PIPE_BLENDFACTOR_CONST_ALPHA:
-                return vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_AAAA);
+                return nir_load_blend_const_color_aaaa8888_unorm(b);
          case PIPE_BLENDFACTOR_ZERO:
                  return nir_imm_int(b, 0);
          case PIPE_BLENDFACTOR_INV_SRC_COLOR:
@@ -197,9 +186,11 @@ vc4_blend_channel_i(nir_builder *b,
          case PIPE_BLENDFACTOR_INV_DST_COLOR:
                  return nir_inot(b, dst);
          case PIPE_BLENDFACTOR_INV_CONST_COLOR:
-                return nir_inot(b, vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_RGBA));
+                return nir_inot(b,
+                                nir_load_blend_const_color_rgba8888_unorm(b));
          case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
-                return nir_inot(b, vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_AAAA));
+                return nir_inot(b,
+                                nir_load_blend_const_color_aaaa8888_unorm(b));
  
          default:
          case PIPE_BLENDFACTOR_SRC1_COLOR:
@@ -428,54 +419,6 @@ vc4_logicop(nir_builder *b, int logicop_func,
          }
  }
  
-static nir_ssa_def *
-vc4_nir_pipe_compare_func(nir_builder *b, int func,
-                          nir_ssa_def *src0, nir_ssa_def *src1)
-{
-        switch (func) {
-        default:
-                fprintf(stderr, "Unknown compare func %d\n", func);
-                /* FALLTHROUGH */
-        case PIPE_FUNC_NEVER:
-                return nir_imm_int(b, 0);
-        case PIPE_FUNC_ALWAYS:
-                return nir_imm_int(b, ~0);
-        case PIPE_FUNC_EQUAL:
-                return nir_feq(b, src0, src1);
-        case PIPE_FUNC_NOTEQUAL:
-                return nir_fne(b, src0, src1);
-        case PIPE_FUNC_GREATER:
-                return nir_flt(b, src1, src0);
-        case PIPE_FUNC_GEQUAL:
-                return nir_fge(b, src0, src1);
-        case PIPE_FUNC_LESS:
-                return nir_flt(b, src0, src1);
-        case PIPE_FUNC_LEQUAL:
-                return nir_fge(b, src1, src0);
-        }
-}
-
-static void
-vc4_nir_emit_alpha_test_discard(struct vc4_compile *c, nir_builder *b,
-                                nir_ssa_def *alpha)
-{
-        if (!c->fs_key->alpha_test)
-                return;
-
-        nir_ssa_def *alpha_ref =
-                vc4_nir_get_state_uniform(b, QUNIFORM_ALPHA_REF);
-        nir_ssa_def *condition =
-                vc4_nir_pipe_compare_func(b, c->fs_key->alpha_test_func,
-                                          alpha, alpha_ref);
-
-        nir_intrinsic_instr *discard =
-                nir_intrinsic_instr_create(b->shader,
-                                           nir_intrinsic_discard_if);
-        discard->num_components = 1;
-        discard->src[0] = nir_src_for_ssa(nir_inot(b, condition));
-        nir_builder_instr_insert(b, &discard->instr);
-}
-
  static nir_ssa_def *
  vc4_nir_swizzle_and_pack(struct vc4_compile *c, nir_builder *b,
                           nir_ssa_def **colors)
@@ -496,24 +439,25 @@ vc4_nir_swizzle_and_pack(struct vc4_compile *c, nir_builder *b,
  
  }
  
-static void
-vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,
-                          nir_intrinsic_instr *intr)
+static nir_ssa_def *
+vc4_nir_blend_pipeline(struct vc4_compile *c, nir_builder *b, nir_ssa_def *src,
+                       int sample)
  {
          enum pipe_format color_format = c->fs_key->color_format;
          const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
          bool srgb = util_format_is_srgb(color_format);
  
          /* Pull out the float src/dst color components. */
-        nir_ssa_def *packed_dst_color = vc4_nir_get_dst_color(b);
+        nir_ssa_def *packed_dst_color = vc4_nir_get_dst_color(b, sample);
          nir_ssa_def *dst_vec4 = nir_unpack_unorm_4x8(b, packed_dst_color);
          nir_ssa_def *src_color[4], *unpacked_dst_color[4];
          for (unsigned i = 0; i < 4; i++) {
-                src_color[i] = nir_channel(b, intr->src[0].ssa, i);
+                src_color[i] = nir_channel(b, src, i);
                  unpacked_dst_color[i] = nir_channel(b, dst_vec4, i);
          }
  
-        vc4_nir_emit_alpha_test_discard(c, b, src_color[3]);
+        if (c->fs_key->sample_alpha_to_one && c->fs_key->msaa)
+                src_color[3] = nir_imm_float(b, 1.0);
  
          nir_ssa_def *packed_color;
          if (srgb) {
@@ -527,14 +471,14 @@ vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,
  
                  /* Turn dst color to linear. */
                  for (int i = 0; i < 3; i++)
-                        dst_color[i] = vc4_nir_srgb_decode(b, dst_color[i]);
+                        dst_color[i] = nir_format_srgb_to_linear(b, dst_color[i]);
  
                  nir_ssa_def *blend_color[4];
                  vc4_do_blending_f(c, b, blend_color, src_color, dst_color);
  
                  /* sRGB encode the output color */
                  for (int i = 0; i < 3; i++)
-                        blend_color[i] = vc4_nir_srgb_encode(b, blend_color[i]);
+                        blend_color[i] = nir_format_linear_to_srgb(b, blend_color[i]);
  
                  packed_color = vc4_nir_swizzle_and_pack(c, b, blend_color);
          } else {
@@ -560,24 +504,96 @@ vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,
                          colormask &= ~(0xff << (i * 8));
                  }
          }
-        packed_color = nir_ior(b,
-                               nir_iand(b, packed_color,
-                                        nir_imm_int(b, colormask)),
-                               nir_iand(b, packed_dst_color,
-                                        nir_imm_int(b, ~colormask)));
  
-        /* Turn the old vec4 output into a store of the packed color. */
-        nir_instr_rewrite_src(&intr->instr, &intr->src[0],
-                              nir_src_for_ssa(packed_color));
+        return nir_ior(b,
+                       nir_iand(b, packed_color,
+                                nir_imm_int(b, colormask)),
+                       nir_iand(b, packed_dst_color,
+                                nir_imm_int(b, ~colormask)));
+}
+
+static int
+vc4_nir_next_output_driver_location(nir_shader *s)
+{
+        int maxloc = -1;
+
+        nir_foreach_variable(var, &s->outputs)
+                maxloc = MAX2(maxloc, (int)var->data.driver_location);
+
+        return maxloc + 1;
+}
+
+static void
+vc4_nir_store_sample_mask(struct vc4_compile *c, nir_builder *b,
+                          nir_ssa_def *val)
+{
+        nir_variable *sample_mask = nir_variable_create(c->s, nir_var_shader_out,
+                                                        glsl_uint_type(),
+                                                        "sample_mask");
+        sample_mask->data.driver_location =
+                vc4_nir_next_output_driver_location(c->s);
+        sample_mask->data.location = FRAG_RESULT_SAMPLE_MASK;
+
+        nir_intrinsic_instr *intr =
+                nir_intrinsic_instr_create(c->s, nir_intrinsic_store_output);
          intr->num_components = 1;
+        nir_intrinsic_set_base(intr, sample_mask->data.driver_location);
+
+        intr->src[0] = nir_src_for_ssa(val);
+        intr->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
+        nir_builder_instr_insert(b, &intr->instr);
  }
  
-static bool
-vc4_nir_lower_blend_block(nir_block *block, void *state)
+static void
+vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,
+                          nir_intrinsic_instr *intr)
  {
-        struct vc4_compile *c = state;
+        nir_ssa_def *frag_color = intr->src[0].ssa;
+
+        if (c->fs_key->sample_alpha_to_coverage) {
+                nir_ssa_def *a = nir_channel(b, frag_color, 3);
+
+                /* XXX: We should do a nice dither based on the fragment
+                 * coordinate, instead.
+                 */
+                nir_ssa_def *num_samples = nir_imm_float(b, VC4_MAX_SAMPLES);
+                nir_ssa_def *num_bits = nir_f2i32(b, nir_fmul(b, a, num_samples));
+                nir_ssa_def *bitmask = nir_isub(b,
+                                                nir_ishl(b,
+                                                         nir_imm_int(b, 1),
+                                                         num_bits),
+                                                nir_imm_int(b, 1));
+                vc4_nir_store_sample_mask(c, b, bitmask);
+        }
  
-        nir_foreach_instr(block, instr) {
+        /* The TLB color read returns each sample in turn, so if our blending
+         * depends on the destination color, we're going to have to run the
+         * blending function separately for each destination sample value, and
+         * then output the per-sample color using TLB_COLOR_MS.
+         */
+        nir_ssa_def *blend_output;
+        if (c->fs_key->msaa && blend_depends_on_dst_color(c)) {
+                c->msaa_per_sample_output = true;
+
+                nir_ssa_def *samples[4];
+                for (int i = 0; i < VC4_MAX_SAMPLES; i++)
+                        samples[i] = vc4_nir_blend_pipeline(c, b, frag_color, i);
+                blend_output = nir_vec4(b,
+                                        samples[0], samples[1],
+                                        samples[2], samples[3]);
+        } else {
+                blend_output = vc4_nir_blend_pipeline(c, b, frag_color, 0);
+        }
+
+        nir_instr_rewrite_src(&intr->instr, &intr->src[0],
+                              nir_src_for_ssa(blend_output));
+        intr->num_components = blend_output->num_components;
+}
+
+static bool
+vc4_nir_lower_blend_block(nir_block *block, struct vc4_compile *c)
+{
+        nir_foreach_instr_safe(instr, block) {
                  if (instr->type != nir_instr_type_intrinsic)
                          continue;
                  nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
@@ -586,7 +602,8 @@ vc4_nir_lower_blend_block(nir_block *block, void *state)
  
                  nir_variable *output_var = NULL;
                  nir_foreach_variable(var, &c->s->outputs) {
-                        if (var->data.driver_location == intr->const_index[0]) {
+                        if (var->data.driver_location ==
+                            nir_intrinsic_base(intr)) {
                                  output_var = var;
                                  break;
                          }
@@ -609,16 +626,29 @@ vc4_nir_lower_blend_block(nir_block *block, void *state)
  }
  
  void
-vc4_nir_lower_blend(struct vc4_compile *c)
+vc4_nir_lower_blend(nir_shader *s, struct vc4_compile *c)
  {
-        nir_foreach_overload(c->s, overload) {
-                if (overload->impl) {
-                        nir_foreach_block(overload->impl,
-                                          vc4_nir_lower_blend_block, c);
+        nir_foreach_function(function, s) {
+                if (function->impl) {
+                        nir_foreach_block(block, function->impl) {
+                                vc4_nir_lower_blend_block(block, c);
+                        }
  
-                        nir_metadata_preserve(overload->impl,
+                        nir_metadata_preserve(function->impl,
                                                nir_metadata_block_index |
                                                nir_metadata_dominance);
                  }
          }
+
+        /* If we didn't do alpha-to-coverage on the output color, we still
+         * need to pass glSampleMask() through.
+         */
+        if (c->fs_key->sample_coverage && !c->fs_key->sample_alpha_to_coverage) {
+                nir_function_impl *impl = nir_shader_get_entrypoint(s);
+                nir_builder b;
+                nir_builder_init(&b, impl);
+                b.cursor = nir_after_block(nir_impl_last_block(impl));
+
+                vc4_nir_store_sample_mask(c, &b, nir_load_sample_mask_in(&b));
+        }
  }