src/gallium/drivers/vc4/vc4_nir_lower_blend.c

   1 /*
   2  * Copyright © 2015 Broadcom
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 /**
  25  * Implements most of the fixed function fragment pipeline in shader code.
  26  *
  27  * VC4 doesn't have any hardware support for blending, alpha test, logic ops,
  28  * or color mask.  Instead, you read the current contents of the destination
  29  * from the tile buffer after having waited for the scoreboard (which is
  30  * handled by vc4_qpu_emit.c), then do math using your output color and that
  31  * destination value, and update the output color appropriately.
  32  *
  33  * Once this pass is done, the color write will either have one component (for
  34  * single sample) with packed argb8888, or 4 components with the per-sample
  35  * argb8888 result.
  36  */
  37
  38 /**
  39  * Lowers fixed-function blending to a load of the destination color and a
  40  * series of ALU operations before the store of the output.
  41  */
  42 #include "util/format/u_format.h"
  43 #include "vc4_qir.h"
  44 #include "compiler/nir/nir_builder.h"
  45 #include "compiler/nir/nir_format_convert.h"
  46 #include "vc4_context.h"
  47
  48 static bool
  49 blend_depends_on_dst_color(struct vc4_compile *c)
  50 {
  51         return (c->fs_key->blend.blend_enable ||
  52                 c->fs_key->blend.colormask != 0xf ||
  53                 c->fs_key->logicop_func != PIPE_LOGICOP_COPY);
  54 }
  55
  56 /** Emits a load of the previous fragment color from the tile buffer. */
  57 static nir_ssa_def *
  58 vc4_nir_get_dst_color(nir_builder *b, int sample)
  59 {
  60         nir_intrinsic_instr *load =
  61                 nir_intrinsic_instr_create(b->shader,
  62                                            nir_intrinsic_load_input);
  63         load->num_components = 1;
  64         nir_intrinsic_set_base(load, VC4_NIR_TLB_COLOR_READ_INPUT + sample);
  65         load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
  66         nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL);
  67         nir_builder_instr_insert(b, &load->instr);
  68         return &load->dest.ssa;
  69 }
  70
  71 static nir_ssa_def *
  72 vc4_blend_channel_f(nir_builder *b,
  73                     nir_ssa_def **src,
  74                     nir_ssa_def **dst,
  75                     unsigned factor,
  76                     int channel)
  77 {
  78         switch(factor) {
  79         case PIPE_BLENDFACTOR_ONE:
  80                 return nir_imm_float(b, 1.0);
  81         case PIPE_BLENDFACTOR_SRC_COLOR:
  82                 return src[channel];
  83         case PIPE_BLENDFACTOR_SRC_ALPHA:
  84                 return src[3];
  85         case PIPE_BLENDFACTOR_DST_ALPHA:
  86                 return dst[3];
  87         case PIPE_BLENDFACTOR_DST_COLOR:
  88                 return dst[channel];
  89         case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
  90                 if (channel != 3) {
  91                         return nir_fmin(b,
  92                                         src[3],
  93                                         nir_fsub(b,
  94                                                  nir_imm_float(b, 1.0),
  95                                                  dst[3]));
  96                 } else {
  97                         return nir_imm_float(b, 1.0);
  98                 }
  99         case PIPE_BLENDFACTOR_CONST_COLOR:
 100                 return nir_load_system_value(b,
 101                                              nir_intrinsic_load_blend_const_color_r_float +
 102                                              channel,
 103                                              0, 32);
 104         case PIPE_BLENDFACTOR_CONST_ALPHA:
 105                 return nir_load_blend_const_color_a_float(b);
 106         case PIPE_BLENDFACTOR_ZERO:
 107                 return nir_imm_float(b, 0.0);
 108         case PIPE_BLENDFACTOR_INV_SRC_COLOR:
 109                 return nir_fsub(b, nir_imm_float(b, 1.0), src[channel]);
 110         case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
 111                 return nir_fsub(b, nir_imm_float(b, 1.0), src[3]);
 112         case PIPE_BLENDFACTOR_INV_DST_ALPHA:
 113                 return nir_fsub(b, nir_imm_float(b, 1.0), dst[3]);
 114         case PIPE_BLENDFACTOR_INV_DST_COLOR:
 115                 return nir_fsub(b, nir_imm_float(b, 1.0), dst[channel]);
 116         case PIPE_BLENDFACTOR_INV_CONST_COLOR:
 117                 return nir_fsub(b, nir_imm_float(b, 1.0),
 118                                 nir_load_system_value(b,
 119                                                       nir_intrinsic_load_blend_const_color_r_float +
 120                                                       channel,
 121                                                       0, 32));
 122         case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
 123                 return nir_fsub(b, nir_imm_float(b, 1.0),
 124                                 nir_load_blend_const_color_a_float(b));
 125
 126         default:
 127         case PIPE_BLENDFACTOR_SRC1_COLOR:
 128         case PIPE_BLENDFACTOR_SRC1_ALPHA:
 129         case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
 130         case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
 131                 /* Unsupported. */
 132                 fprintf(stderr, "Unknown blend factor %d\n", factor);
 133                 return nir_imm_float(b, 1.0);
 134         }
 135 }
 136
 137 static nir_ssa_def *
 138 vc4_nir_set_packed_chan(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1,
 139                         int chan)
 140 {
 141         unsigned chan_mask = 0xff << (chan * 8);
 142         return nir_ior(b,
 143                        nir_iand(b, src0, nir_imm_int(b, ~chan_mask)),
 144                        nir_iand(b, src1, nir_imm_int(b, chan_mask)));
 145 }
 146
 147 static nir_ssa_def *
 148 vc4_blend_channel_i(nir_builder *b,
 149                     nir_ssa_def *src,
 150                     nir_ssa_def *dst,
 151                     nir_ssa_def *src_a,
 152                     nir_ssa_def *dst_a,
 153                     unsigned factor,
 154                     int a_chan)
 155 {
 156         switch (factor) {
 157         case PIPE_BLENDFACTOR_ONE:
 158                 return nir_imm_int(b, ~0);
 159         case PIPE_BLENDFACTOR_SRC_COLOR:
 160                 return src;
 161         case PIPE_BLENDFACTOR_SRC_ALPHA:
 162                 return src_a;
 163         case PIPE_BLENDFACTOR_DST_ALPHA:
 164                 return dst_a;
 165         case PIPE_BLENDFACTOR_DST_COLOR:
 166                 return dst;
 167         case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
 168                 return vc4_nir_set_packed_chan(b,
 169                                                nir_umin_4x8(b,
 170                                                             src_a,
 171                                                             nir_inot(b, dst_a)),
 172                                                nir_imm_int(b, ~0),
 173                                                a_chan);
 174         case PIPE_BLENDFACTOR_CONST_COLOR:
 175                 return nir_load_blend_const_color_rgba8888_unorm(b);
 176         case PIPE_BLENDFACTOR_CONST_ALPHA:
 177                 return nir_load_blend_const_color_aaaa8888_unorm(b);
 178         case PIPE_BLENDFACTOR_ZERO:
 179                 return nir_imm_int(b, 0);
 180         case PIPE_BLENDFACTOR_INV_SRC_COLOR:
 181                 return nir_inot(b, src);
 182         case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
 183                 return nir_inot(b, src_a);
 184         case PIPE_BLENDFACTOR_INV_DST_ALPHA:
 185                 return nir_inot(b, dst_a);
 186         case PIPE_BLENDFACTOR_INV_DST_COLOR:
 187                 return nir_inot(b, dst);
 188         case PIPE_BLENDFACTOR_INV_CONST_COLOR:
 189                 return nir_inot(b,
 190                                 nir_load_blend_const_color_rgba8888_unorm(b));
 191         case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
 192                 return nir_inot(b,
 193                                 nir_load_blend_const_color_aaaa8888_unorm(b));
 194
 195         default:
 196         case PIPE_BLENDFACTOR_SRC1_COLOR:
 197         case PIPE_BLENDFACTOR_SRC1_ALPHA:
 198         case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
 199         case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
 200                 /* Unsupported. */
 201                 fprintf(stderr, "Unknown blend factor %d\n", factor);
 202                 return nir_imm_int(b, ~0);
 203         }
 204 }
 205
 206 static nir_ssa_def *
 207 vc4_blend_func_f(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
 208                  unsigned func)
 209 {
 210         switch (func) {
 211         case PIPE_BLEND_ADD:
 212                 return nir_fadd(b, src, dst);
 213         case PIPE_BLEND_SUBTRACT:
 214                 return nir_fsub(b, src, dst);
 215         case PIPE_BLEND_REVERSE_SUBTRACT:
 216                 return nir_fsub(b, dst, src);
 217         case PIPE_BLEND_MIN:
 218                 return nir_fmin(b, src, dst);
 219         case PIPE_BLEND_MAX:
 220                 return nir_fmax(b, src, dst);
 221
 222         default:
 223                 /* Unsupported. */
 224                 fprintf(stderr, "Unknown blend func %d\n", func);
 225                 return src;
 226
 227         }
 228 }
 229
 230 static nir_ssa_def *
 231 vc4_blend_func_i(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
 232                  unsigned func)
 233 {
 234         switch (func) {
 235         case PIPE_BLEND_ADD:
 236                 return nir_usadd_4x8(b, src, dst);
 237         case PIPE_BLEND_SUBTRACT:
 238                 return nir_ussub_4x8(b, src, dst);
 239         case PIPE_BLEND_REVERSE_SUBTRACT:
 240                 return nir_ussub_4x8(b, dst, src);
 241         case PIPE_BLEND_MIN:
 242                 return nir_umin_4x8(b, src, dst);
 243         case PIPE_BLEND_MAX:
 244                 return nir_umax_4x8(b, src, dst);
 245
 246         default:
 247                 /* Unsupported. */
 248                 fprintf(stderr, "Unknown blend func %d\n", func);
 249                 return src;
 250
 251         }
 252 }
 253
 254 static void
 255 vc4_do_blending_f(struct vc4_compile *c, nir_builder *b, nir_ssa_def **result,
 256                   nir_ssa_def **src_color, nir_ssa_def **dst_color)
 257 {
 258         struct pipe_rt_blend_state *blend = &c->fs_key->blend;
 259
 260         if (!blend->blend_enable) {
 261                 for (int i = 0; i < 4; i++)
 262                         result[i] = src_color[i];
 263                 return;
 264         }
 265
 266         /* Clamp the src color to [0, 1].  Dest is already clamped. */
 267         for (int i = 0; i < 4; i++)
 268                 src_color[i] = nir_fsat(b, src_color[i]);
 269
 270         nir_ssa_def *src_blend[4], *dst_blend[4];
 271         for (int i = 0; i < 4; i++) {
 272                 int src_factor = ((i != 3) ? blend->rgb_src_factor :
 273                                   blend->alpha_src_factor);
 274                 int dst_factor = ((i != 3) ? blend->rgb_dst_factor :
 275                                   blend->alpha_dst_factor);
 276                 src_blend[i] = nir_fmul(b, src_color[i],
 277                                         vc4_blend_channel_f(b,
 278                                                             src_color, dst_color,
 279                                                             src_factor, i));
 280                 dst_blend[i] = nir_fmul(b, dst_color[i],
 281                                         vc4_blend_channel_f(b,
 282                                                             src_color, dst_color,
 283                                                             dst_factor, i));
 284         }
 285
 286         for (int i = 0; i < 4; i++) {
 287                 result[i] = vc4_blend_func_f(b, src_blend[i], dst_blend[i],
 288                                              ((i != 3) ? blend->rgb_func :
 289                                               blend->alpha_func));
 290         }
 291 }
 292
 293 static nir_ssa_def *
 294 vc4_nir_splat(nir_builder *b, nir_ssa_def *src)
 295 {
 296         nir_ssa_def *or1 = nir_ior(b, src, nir_ishl(b, src, nir_imm_int(b, 8)));
 297         return nir_ior(b, or1, nir_ishl(b, or1, nir_imm_int(b, 16)));
 298 }
 299
 300 static nir_ssa_def *
 301 vc4_do_blending_i(struct vc4_compile *c, nir_builder *b,
 302                   nir_ssa_def *src_color, nir_ssa_def *dst_color,
 303                   nir_ssa_def *src_float_a)
 304 {
 305         struct pipe_rt_blend_state *blend = &c->fs_key->blend;
 306
 307         if (!blend->blend_enable)
 308                 return src_color;
 309
 310         enum pipe_format color_format = c->fs_key->color_format;
 311         const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
 312         nir_ssa_def *imm_0xff = nir_imm_int(b, 0xff);
 313         nir_ssa_def *src_a = nir_pack_unorm_4x8(b, src_float_a);
 314         nir_ssa_def *dst_a;
 315         int alpha_chan;
 316         for (alpha_chan = 0; alpha_chan < 4; alpha_chan++) {
 317                 if (format_swiz[alpha_chan] == 3)
 318                         break;
 319         }
 320         if (alpha_chan != 4) {
 321                 nir_ssa_def *shift = nir_imm_int(b, alpha_chan * 8);
 322                 dst_a = vc4_nir_splat(b, nir_iand(b, nir_ushr(b, dst_color,
 323                                                               shift), imm_0xff));
 324         } else {
 325                 dst_a = nir_imm_int(b, ~0);
 326         }
 327
 328         nir_ssa_def *src_factor = vc4_blend_channel_i(b,
 329                                                       src_color, dst_color,
 330                                                       src_a, dst_a,
 331                                                       blend->rgb_src_factor,
 332                                                       alpha_chan);
 333         nir_ssa_def *dst_factor = vc4_blend_channel_i(b,
 334                                                       src_color, dst_color,
 335                                                       src_a, dst_a,
 336                                                       blend->rgb_dst_factor,
 337                                                       alpha_chan);
 338
 339         if (alpha_chan != 4 &&
 340             blend->alpha_src_factor != blend->rgb_src_factor) {
 341                 nir_ssa_def *src_alpha_factor =
 342                         vc4_blend_channel_i(b,
 343                                             src_color, dst_color,
 344                                             src_a, dst_a,
 345                                             blend->alpha_src_factor,
 346                                             alpha_chan);
 347                 src_factor = vc4_nir_set_packed_chan(b, src_factor,
 348                                                      src_alpha_factor,
 349                                                      alpha_chan);
 350         }
 351         if (alpha_chan != 4 &&
 352             blend->alpha_dst_factor != blend->rgb_dst_factor) {
 353                 nir_ssa_def *dst_alpha_factor =
 354                         vc4_blend_channel_i(b,
 355                                             src_color, dst_color,
 356                                             src_a, dst_a,
 357                                             blend->alpha_dst_factor,
 358                                             alpha_chan);
 359                 dst_factor = vc4_nir_set_packed_chan(b, dst_factor,
 360                                                      dst_alpha_factor,
 361                                                      alpha_chan);
 362         }
 363         nir_ssa_def *src_blend = nir_umul_unorm_4x8(b, src_color, src_factor);
 364         nir_ssa_def *dst_blend = nir_umul_unorm_4x8(b, dst_color, dst_factor);
 365
 366         nir_ssa_def *result =
 367                 vc4_blend_func_i(b, src_blend, dst_blend, blend->rgb_func);
 368         if (alpha_chan != 4 && blend->alpha_func != blend->rgb_func) {
 369                 nir_ssa_def *result_a = vc4_blend_func_i(b,
 370                                                          src_blend,
 371                                                          dst_blend,
 372                                                          blend->alpha_func);
 373                 result = vc4_nir_set_packed_chan(b, result, result_a,
 374                                                  alpha_chan);
 375         }
 376         return result;
 377 }
 378
 379 static nir_ssa_def *
 380 vc4_logicop(nir_builder *b, int logicop_func,
 381             nir_ssa_def *src, nir_ssa_def *dst)
 382 {
 383         switch (logicop_func) {
 384         case PIPE_LOGICOP_CLEAR:
 385                 return nir_imm_int(b, 0);
 386         case PIPE_LOGICOP_NOR:
 387                 return nir_inot(b, nir_ior(b, src, dst));
 388         case PIPE_LOGICOP_AND_INVERTED:
 389                 return nir_iand(b, nir_inot(b, src), dst);
 390         case PIPE_LOGICOP_COPY_INVERTED:
 391                 return nir_inot(b, src);
 392         case PIPE_LOGICOP_AND_REVERSE:
 393                 return nir_iand(b, src, nir_inot(b, dst));
 394         case PIPE_LOGICOP_INVERT:
 395                 return nir_inot(b, dst);
 396         case PIPE_LOGICOP_XOR:
 397                 return nir_ixor(b, src, dst);
 398         case PIPE_LOGICOP_NAND:
 399                 return nir_inot(b, nir_iand(b, src, dst));
 400         case PIPE_LOGICOP_AND:
 401                 return nir_iand(b, src, dst);
 402         case PIPE_LOGICOP_EQUIV:
 403                 return nir_inot(b, nir_ixor(b, src, dst));
 404         case PIPE_LOGICOP_NOOP:
 405                 return dst;
 406         case PIPE_LOGICOP_OR_INVERTED:
 407                 return nir_ior(b, nir_inot(b, src), dst);
 408         case PIPE_LOGICOP_OR_REVERSE:
 409                 return nir_ior(b, src, nir_inot(b, dst));
 410         case PIPE_LOGICOP_OR:
 411                 return nir_ior(b, src, dst);
 412         case PIPE_LOGICOP_SET:
 413                 return nir_imm_int(b, ~0);
 414         default:
 415                 fprintf(stderr, "Unknown logic op %d\n", logicop_func);
 416                 /* FALLTHROUGH */
 417         case PIPE_LOGICOP_COPY:
 418                 return src;
 419         }
 420 }
 421
 422 static nir_ssa_def *
 423 vc4_nir_swizzle_and_pack(struct vc4_compile *c, nir_builder *b,
 424                          nir_ssa_def **colors)
 425 {
 426         enum pipe_format color_format = c->fs_key->color_format;
 427         const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
 428
 429         nir_ssa_def *swizzled[4];
 430         for (int i = 0; i < 4; i++) {
 431                 swizzled[i] = vc4_nir_get_swizzled_channel(b, colors,
 432                                                            format_swiz[i]);
 433         }
 434
 435         return nir_pack_unorm_4x8(b,
 436                                   nir_vec4(b,
 437                                            swizzled[0], swizzled[1],
 438                                            swizzled[2], swizzled[3]));
 439
 440 }
 441
 442 static nir_ssa_def *
 443 vc4_nir_blend_pipeline(struct vc4_compile *c, nir_builder *b, nir_ssa_def *src,
 444                        int sample)
 445 {
 446         enum pipe_format color_format = c->fs_key->color_format;
 447         const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
 448         bool srgb = util_format_is_srgb(color_format);
 449
 450         /* Pull out the float src/dst color components. */
 451         nir_ssa_def *packed_dst_color = vc4_nir_get_dst_color(b, sample);
 452         nir_ssa_def *dst_vec4 = nir_unpack_unorm_4x8(b, packed_dst_color);
 453         nir_ssa_def *src_color[4], *unpacked_dst_color[4];
 454         for (unsigned i = 0; i < 4; i++) {
 455                 src_color[i] = nir_channel(b, src, i);
 456                 unpacked_dst_color[i] = nir_channel(b, dst_vec4, i);
 457         }
 458
 459         if (c->fs_key->sample_alpha_to_one && c->fs_key->msaa)
 460                 src_color[3] = nir_imm_float(b, 1.0);
 461
 462         nir_ssa_def *packed_color;
 463         if (srgb) {
 464                 /* Unswizzle the destination color. */
 465                 nir_ssa_def *dst_color[4];
 466                 for (unsigned i = 0; i < 4; i++) {
 467                         dst_color[i] = vc4_nir_get_swizzled_channel(b,
 468                                                                     unpacked_dst_color,
 469                                                                     format_swiz[i]);
 470                 }
 471
 472                 /* Turn dst color to linear. */
 473                 for (int i = 0; i < 3; i++)
 474                         dst_color[i] = nir_format_srgb_to_linear(b, dst_color[i]);
 475
 476                 nir_ssa_def *blend_color[4];
 477                 vc4_do_blending_f(c, b, blend_color, src_color, dst_color);
 478
 479                 /* sRGB encode the output color */
 480                 for (int i = 0; i < 3; i++)
 481                         blend_color[i] = nir_format_linear_to_srgb(b, blend_color[i]);
 482
 483                 packed_color = vc4_nir_swizzle_and_pack(c, b, blend_color);
 484         } else {
 485                 nir_ssa_def *packed_src_color =
 486                         vc4_nir_swizzle_and_pack(c, b, src_color);
 487
 488                 packed_color =
 489                         vc4_do_blending_i(c, b,
 490                                           packed_src_color, packed_dst_color,
 491                                           src_color[3]);
 492         }
 493
 494         packed_color = vc4_logicop(b, c->fs_key->logicop_func,
 495                                    packed_color, packed_dst_color);
 496
 497         /* If the bit isn't set in the color mask, then just return the
 498          * original dst color, instead.
 499          */
 500         uint32_t colormask = 0xffffffff;
 501         for (int i = 0; i < 4; i++) {
 502                 if (format_swiz[i] < 4 &&
 503                     !(c->fs_key->blend.colormask & (1 << format_swiz[i]))) {
 504                         colormask &= ~(0xff << (i * 8));
 505                 }
 506         }
 507
 508         return nir_ior(b,
 509                        nir_iand(b, packed_color,
 510                                 nir_imm_int(b, colormask)),
 511                        nir_iand(b, packed_dst_color,
 512                                 nir_imm_int(b, ~colormask)));
 513 }
 514
 515 static int
 516 vc4_nir_next_output_driver_location(nir_shader *s)
 517 {
 518         int maxloc = -1;
 519
 520         nir_foreach_variable(var, &s->outputs)
 521                 maxloc = MAX2(maxloc, (int)var->data.driver_location);
 522
 523         return maxloc + 1;
 524 }
 525
 526 static void
 527 vc4_nir_store_sample_mask(struct vc4_compile *c, nir_builder *b,
 528                           nir_ssa_def *val)
 529 {
 530         nir_variable *sample_mask = nir_variable_create(c->s, nir_var_shader_out,
 531                                                         glsl_uint_type(),
 532                                                         "sample_mask");
 533         sample_mask->data.driver_location =
 534                 vc4_nir_next_output_driver_location(c->s);
 535         sample_mask->data.location = FRAG_RESULT_SAMPLE_MASK;
 536
 537         nir_intrinsic_instr *intr =
 538                 nir_intrinsic_instr_create(c->s, nir_intrinsic_store_output);
 539         intr->num_components = 1;
 540         nir_intrinsic_set_base(intr, sample_mask->data.driver_location);
 541
 542         intr->src[0] = nir_src_for_ssa(val);
 543         intr->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
 544         nir_builder_instr_insert(b, &intr->instr);
 545 }
 546
 547 static void
 548 vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,
 549                           nir_intrinsic_instr *intr)
 550 {
 551         nir_ssa_def *frag_color = intr->src[0].ssa;
 552
 553         if (c->fs_key->sample_alpha_to_coverage) {
 554                 nir_ssa_def *a = nir_channel(b, frag_color, 3);
 555
 556                 /* XXX: We should do a nice dither based on the fragment
 557                  * coordinate, instead.
 558                  */
 559                 nir_ssa_def *num_samples = nir_imm_float(b, VC4_MAX_SAMPLES);
 560                 nir_ssa_def *num_bits = nir_f2i32(b, nir_fmul(b, a, num_samples));
 561                 nir_ssa_def *bitmask = nir_isub(b,
 562                                                 nir_ishl(b,
 563                                                          nir_imm_int(b, 1),
 564                                                          num_bits),
 565                                                 nir_imm_int(b, 1));
 566                 vc4_nir_store_sample_mask(c, b, bitmask);
 567         }
 568
 569         /* The TLB color read returns each sample in turn, so if our blending
 570          * depends on the destination color, we're going to have to run the
 571          * blending function separately for each destination sample value, and
 572          * then output the per-sample color using TLB_COLOR_MS.
 573          */
 574         nir_ssa_def *blend_output;
 575         if (c->fs_key->msaa && blend_depends_on_dst_color(c)) {
 576                 c->msaa_per_sample_output = true;
 577
 578                 nir_ssa_def *samples[4];
 579                 for (int i = 0; i < VC4_MAX_SAMPLES; i++)
 580                         samples[i] = vc4_nir_blend_pipeline(c, b, frag_color, i);
 581                 blend_output = nir_vec4(b,
 582                                         samples[0], samples[1],
 583                                         samples[2], samples[3]);
 584         } else {
 585                 blend_output = vc4_nir_blend_pipeline(c, b, frag_color, 0);
 586         }
 587
 588         nir_instr_rewrite_src(&intr->instr, &intr->src[0],
 589                               nir_src_for_ssa(blend_output));
 590         intr->num_components = blend_output->num_components;
 591 }
 592
 593 static bool
 594 vc4_nir_lower_blend_block(nir_block *block, struct vc4_compile *c)
 595 {
 596         nir_foreach_instr_safe(instr, block) {
 597                 if (instr->type != nir_instr_type_intrinsic)
 598                         continue;
 599                 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 600                 if (intr->intrinsic != nir_intrinsic_store_output)
 601                         continue;
 602
 603                 nir_variable *output_var = NULL;
 604                 nir_foreach_variable(var, &c->s->outputs) {
 605                         if (var->data.driver_location ==
 606                             nir_intrinsic_base(intr)) {
 607                                 output_var = var;
 608                                 break;
 609                         }
 610                 }
 611                 assert(output_var);
 612
 613                 if (output_var->data.location != FRAG_RESULT_COLOR &&
 614                     output_var->data.location != FRAG_RESULT_DATA0) {
 615                         continue;
 616                 }
 617
 618                 nir_function_impl *impl =
 619                         nir_cf_node_get_function(&block->cf_node);
 620                 nir_builder b;
 621                 nir_builder_init(&b, impl);
 622                 b.cursor = nir_before_instr(&intr->instr);
 623                 vc4_nir_lower_blend_instr(c, &b, intr);
 624         }
 625         return true;
 626 }
 627
 628 void
 629 vc4_nir_lower_blend(nir_shader *s, struct vc4_compile *c)
 630 {
 631         nir_foreach_function(function, s) {
 632                 if (function->impl) {
 633                         nir_foreach_block(block, function->impl) {
 634                                 vc4_nir_lower_blend_block(block, c);
 635                         }
 636
 637                         nir_metadata_preserve(function->impl,
 638                                               nir_metadata_block_index |
 639                                               nir_metadata_dominance);
 640                 }
 641         }
 642
 643         /* If we didn't do alpha-to-coverage on the output color, we still
 644          * need to pass glSampleMask() through.
 645          */
 646         if (c->fs_key->sample_coverage && !c->fs_key->sample_alpha_to_coverage) {
 647                 nir_function_impl *impl = nir_shader_get_entrypoint(s);
 648                 nir_builder b;
 649                 nir_builder_init(&b, impl);
 650                 b.cursor = nir_after_block(nir_impl_last_block(impl));
 651
 652                 vc4_nir_store_sample_mask(c, &b, nir_load_sample_mask_in(&b));
 653         }
 654 }