src/gallium/drivers/vc4/vc4_nir_lower_blend.c

   1 /*
   2  * Copyright © 2015 Broadcom
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 /**
  25  * Implements most of the fixed function fragment pipeline in shader code.
  26  *
  27  * VC4 doesn't have any hardware support for blending, alpha test, logic ops,
  28  * or color mask.  Instead, you read the current contents of the destination
  29  * from the tile buffer after having waited for the scoreboard (which is
  30  * handled by vc4_qpu_emit.c), then do math using your output color and that
  31  * destination value, and update the output color appropriately.
  32  *
  33  * Once this pass is done, the color write will either have one component (for
  34  * single sample) with packed argb8888, or 4 components with the per-sample
  35  * argb8888 result.
  36  */
  37
  38 /**
  39  * Lowers fixed-function blending to a load of the destination color and a
  40  * series of ALU operations before the store of the output.
  41  */
  42 #include "util/u_format.h"
  43 #include "vc4_qir.h"
  44 #include "compiler/nir/nir_builder.h"
  45 #include "vc4_context.h"
  46
  47 static bool
  48 blend_depends_on_dst_color(struct vc4_compile *c)
  49 {
  50         return (c->fs_key->blend.blend_enable ||
  51                 c->fs_key->blend.colormask != 0xf ||
  52                 c->fs_key->logicop_func != PIPE_LOGICOP_COPY);
  53 }
  54
  55 /** Emits a load of the previous fragment color from the tile buffer. */
  56 static nir_ssa_def *
  57 vc4_nir_get_dst_color(nir_builder *b, int sample)
  58 {
  59         nir_intrinsic_instr *load =
  60                 nir_intrinsic_instr_create(b->shader,
  61                                            nir_intrinsic_load_input);
  62         load->num_components = 1;
  63         nir_intrinsic_set_base(load, VC4_NIR_TLB_COLOR_READ_INPUT + sample);
  64         load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
  65         nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL);
  66         nir_builder_instr_insert(b, &load->instr);
  67         return &load->dest.ssa;
  68 }
  69
  70 static  nir_ssa_def *
  71 vc4_nir_srgb_decode(nir_builder *b, nir_ssa_def *srgb)
  72 {
  73         nir_ssa_def *is_low = nir_flt(b, srgb, nir_imm_float(b, 0.04045));
  74         nir_ssa_def *low = nir_fmul(b, srgb, nir_imm_float(b, 1.0 / 12.92));
  75         nir_ssa_def *high = nir_fpow(b,
  76                                      nir_fmul(b,
  77                                               nir_fadd(b, srgb,
  78                                                        nir_imm_float(b, 0.055)),
  79                                               nir_imm_float(b, 1.0 / 1.055)),
  80                                      nir_imm_float(b, 2.4));
  81
  82         return nir_bcsel(b, is_low, low, high);
  83 }
  84
  85 static  nir_ssa_def *
  86 vc4_nir_srgb_encode(nir_builder *b, nir_ssa_def *linear)
  87 {
  88         nir_ssa_def *is_low = nir_flt(b, linear, nir_imm_float(b, 0.0031308));
  89         nir_ssa_def *low = nir_fmul(b, linear, nir_imm_float(b, 12.92));
  90         nir_ssa_def *high = nir_fsub(b,
  91                                      nir_fmul(b,
  92                                               nir_imm_float(b, 1.055),
  93                                               nir_fpow(b,
  94                                                        linear,
  95                                                        nir_imm_float(b, 0.41666))),
  96                                      nir_imm_float(b, 0.055));
  97
  98         return nir_bcsel(b, is_low, low, high);
  99 }
 100
 101 static nir_ssa_def *
 102 vc4_blend_channel_f(nir_builder *b,
 103                     nir_ssa_def **src,
 104                     nir_ssa_def **dst,
 105                     unsigned factor,
 106                     int channel)
 107 {
 108         switch(factor) {
 109         case PIPE_BLENDFACTOR_ONE:
 110                 return nir_imm_float(b, 1.0);
 111         case PIPE_BLENDFACTOR_SRC_COLOR:
 112                 return src[channel];
 113         case PIPE_BLENDFACTOR_SRC_ALPHA:
 114                 return src[3];
 115         case PIPE_BLENDFACTOR_DST_ALPHA:
 116                 return dst[3];
 117         case PIPE_BLENDFACTOR_DST_COLOR:
 118                 return dst[channel];
 119         case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
 120                 if (channel != 3) {
 121                         return nir_fmin(b,
 122                                         src[3],
 123                                         nir_fsub(b,
 124                                                  nir_imm_float(b, 1.0),
 125                                                  dst[3]));
 126                 } else {
 127                         return nir_imm_float(b, 1.0);
 128                 }
 129         case PIPE_BLENDFACTOR_CONST_COLOR:
 130                 return nir_load_system_value(b,
 131                                              nir_intrinsic_load_blend_const_color_r_float +
 132                                              channel,
 133                                              0);
 134         case PIPE_BLENDFACTOR_CONST_ALPHA:
 135                 return nir_load_blend_const_color_a_float(b);
 136         case PIPE_BLENDFACTOR_ZERO:
 137                 return nir_imm_float(b, 0.0);
 138         case PIPE_BLENDFACTOR_INV_SRC_COLOR:
 139                 return nir_fsub(b, nir_imm_float(b, 1.0), src[channel]);
 140         case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
 141                 return nir_fsub(b, nir_imm_float(b, 1.0), src[3]);
 142         case PIPE_BLENDFACTOR_INV_DST_ALPHA:
 143                 return nir_fsub(b, nir_imm_float(b, 1.0), dst[3]);
 144         case PIPE_BLENDFACTOR_INV_DST_COLOR:
 145                 return nir_fsub(b, nir_imm_float(b, 1.0), dst[channel]);
 146         case PIPE_BLENDFACTOR_INV_CONST_COLOR:
 147                 return nir_fsub(b, nir_imm_float(b, 1.0),
 148                                 nir_load_system_value(b,
 149                                                       nir_intrinsic_load_blend_const_color_r_float +
 150                                                       channel,
 151                                                       0));
 152         case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
 153                 return nir_fsub(b, nir_imm_float(b, 1.0),
 154                                 nir_load_blend_const_color_a_float(b));
 155
 156         default:
 157         case PIPE_BLENDFACTOR_SRC1_COLOR:
 158         case PIPE_BLENDFACTOR_SRC1_ALPHA:
 159         case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
 160         case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
 161                 /* Unsupported. */
 162                 fprintf(stderr, "Unknown blend factor %d\n", factor);
 163                 return nir_imm_float(b, 1.0);
 164         }
 165 }
 166
 167 static nir_ssa_def *
 168 vc4_nir_set_packed_chan(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1,
 169                         int chan)
 170 {
 171         unsigned chan_mask = 0xff << (chan * 8);
 172         return nir_ior(b,
 173                        nir_iand(b, src0, nir_imm_int(b, ~chan_mask)),
 174                        nir_iand(b, src1, nir_imm_int(b, chan_mask)));
 175 }
 176
 177 static nir_ssa_def *
 178 vc4_blend_channel_i(nir_builder *b,
 179                     nir_ssa_def *src,
 180                     nir_ssa_def *dst,
 181                     nir_ssa_def *src_a,
 182                     nir_ssa_def *dst_a,
 183                     unsigned factor,
 184                     int a_chan)
 185 {
 186         switch (factor) {
 187         case PIPE_BLENDFACTOR_ONE:
 188                 return nir_imm_int(b, ~0);
 189         case PIPE_BLENDFACTOR_SRC_COLOR:
 190                 return src;
 191         case PIPE_BLENDFACTOR_SRC_ALPHA:
 192                 return src_a;
 193         case PIPE_BLENDFACTOR_DST_ALPHA:
 194                 return dst_a;
 195         case PIPE_BLENDFACTOR_DST_COLOR:
 196                 return dst;
 197         case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
 198                 return vc4_nir_set_packed_chan(b,
 199                                                nir_umin_4x8(b,
 200                                                             src_a,
 201                                                             nir_inot(b, dst_a)),
 202                                                nir_imm_int(b, ~0),
 203                                                a_chan);
 204         case PIPE_BLENDFACTOR_CONST_COLOR:
 205                 return nir_load_blend_const_color_rgba8888_unorm(b);
 206         case PIPE_BLENDFACTOR_CONST_ALPHA:
 207                 return nir_load_blend_const_color_aaaa8888_unorm(b);
 208         case PIPE_BLENDFACTOR_ZERO:
 209                 return nir_imm_int(b, 0);
 210         case PIPE_BLENDFACTOR_INV_SRC_COLOR:
 211                 return nir_inot(b, src);
 212         case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
 213                 return nir_inot(b, src_a);
 214         case PIPE_BLENDFACTOR_INV_DST_ALPHA:
 215                 return nir_inot(b, dst_a);
 216         case PIPE_BLENDFACTOR_INV_DST_COLOR:
 217                 return nir_inot(b, dst);
 218         case PIPE_BLENDFACTOR_INV_CONST_COLOR:
 219                 return nir_inot(b,
 220                                 nir_load_blend_const_color_rgba8888_unorm(b));
 221         case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
 222                 return nir_inot(b,
 223                                 nir_load_blend_const_color_aaaa8888_unorm(b));
 224
 225         default:
 226         case PIPE_BLENDFACTOR_SRC1_COLOR:
 227         case PIPE_BLENDFACTOR_SRC1_ALPHA:
 228         case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
 229         case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
 230                 /* Unsupported. */
 231                 fprintf(stderr, "Unknown blend factor %d\n", factor);
 232                 return nir_imm_int(b, ~0);
 233         }
 234 }
 235
 236 static nir_ssa_def *
 237 vc4_blend_func_f(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
 238                  unsigned func)
 239 {
 240         switch (func) {
 241         case PIPE_BLEND_ADD:
 242                 return nir_fadd(b, src, dst);
 243         case PIPE_BLEND_SUBTRACT:
 244                 return nir_fsub(b, src, dst);
 245         case PIPE_BLEND_REVERSE_SUBTRACT:
 246                 return nir_fsub(b, dst, src);
 247         case PIPE_BLEND_MIN:
 248                 return nir_fmin(b, src, dst);
 249         case PIPE_BLEND_MAX:
 250                 return nir_fmax(b, src, dst);
 251
 252         default:
 253                 /* Unsupported. */
 254                 fprintf(stderr, "Unknown blend func %d\n", func);
 255                 return src;
 256
 257         }
 258 }
 259
 260 static nir_ssa_def *
 261 vc4_blend_func_i(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
 262                  unsigned func)
 263 {
 264         switch (func) {
 265         case PIPE_BLEND_ADD:
 266                 return nir_usadd_4x8(b, src, dst);
 267         case PIPE_BLEND_SUBTRACT:
 268                 return nir_ussub_4x8(b, src, dst);
 269         case PIPE_BLEND_REVERSE_SUBTRACT:
 270                 return nir_ussub_4x8(b, dst, src);
 271         case PIPE_BLEND_MIN:
 272                 return nir_umin_4x8(b, src, dst);
 273         case PIPE_BLEND_MAX:
 274                 return nir_umax_4x8(b, src, dst);
 275
 276         default:
 277                 /* Unsupported. */
 278                 fprintf(stderr, "Unknown blend func %d\n", func);
 279                 return src;
 280
 281         }
 282 }
 283
 284 static void
 285 vc4_do_blending_f(struct vc4_compile *c, nir_builder *b, nir_ssa_def **result,
 286                   nir_ssa_def **src_color, nir_ssa_def **dst_color)
 287 {
 288         struct pipe_rt_blend_state *blend = &c->fs_key->blend;
 289
 290         if (!blend->blend_enable) {
 291                 for (int i = 0; i < 4; i++)
 292                         result[i] = src_color[i];
 293                 return;
 294         }
 295
 296         /* Clamp the src color to [0, 1].  Dest is already clamped. */
 297         for (int i = 0; i < 4; i++)
 298                 src_color[i] = nir_fsat(b, src_color[i]);
 299
 300         nir_ssa_def *src_blend[4], *dst_blend[4];
 301         for (int i = 0; i < 4; i++) {
 302                 int src_factor = ((i != 3) ? blend->rgb_src_factor :
 303                                   blend->alpha_src_factor);
 304                 int dst_factor = ((i != 3) ? blend->rgb_dst_factor :
 305                                   blend->alpha_dst_factor);
 306                 src_blend[i] = nir_fmul(b, src_color[i],
 307                                         vc4_blend_channel_f(b,
 308                                                             src_color, dst_color,
 309                                                             src_factor, i));
 310                 dst_blend[i] = nir_fmul(b, dst_color[i],
 311                                         vc4_blend_channel_f(b,
 312                                                             src_color, dst_color,
 313                                                             dst_factor, i));
 314         }
 315
 316         for (int i = 0; i < 4; i++) {
 317                 result[i] = vc4_blend_func_f(b, src_blend[i], dst_blend[i],
 318                                              ((i != 3) ? blend->rgb_func :
 319                                               blend->alpha_func));
 320         }
 321 }
 322
 323 static nir_ssa_def *
 324 vc4_nir_splat(nir_builder *b, nir_ssa_def *src)
 325 {
 326         nir_ssa_def *or1 = nir_ior(b, src, nir_ishl(b, src, nir_imm_int(b, 8)));
 327         return nir_ior(b, or1, nir_ishl(b, or1, nir_imm_int(b, 16)));
 328 }
 329
 330 static nir_ssa_def *
 331 vc4_do_blending_i(struct vc4_compile *c, nir_builder *b,
 332                   nir_ssa_def *src_color, nir_ssa_def *dst_color,
 333                   nir_ssa_def *src_float_a)
 334 {
 335         struct pipe_rt_blend_state *blend = &c->fs_key->blend;
 336
 337         if (!blend->blend_enable)
 338                 return src_color;
 339
 340         enum pipe_format color_format = c->fs_key->color_format;
 341         const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
 342         nir_ssa_def *imm_0xff = nir_imm_int(b, 0xff);
 343         nir_ssa_def *src_a = nir_pack_unorm_4x8(b, src_float_a);
 344         nir_ssa_def *dst_a;
 345         int alpha_chan;
 346         for (alpha_chan = 0; alpha_chan < 4; alpha_chan++) {
 347                 if (format_swiz[alpha_chan] == 3)
 348                         break;
 349         }
 350         if (alpha_chan != 4) {
 351                 nir_ssa_def *shift = nir_imm_int(b, alpha_chan * 8);
 352                 dst_a = vc4_nir_splat(b, nir_iand(b, nir_ushr(b, dst_color,
 353                                                               shift), imm_0xff));
 354         } else {
 355                 dst_a = nir_imm_int(b, ~0);
 356         }
 357
 358         nir_ssa_def *src_factor = vc4_blend_channel_i(b,
 359                                                       src_color, dst_color,
 360                                                       src_a, dst_a,
 361                                                       blend->rgb_src_factor,
 362                                                       alpha_chan);
 363         nir_ssa_def *dst_factor = vc4_blend_channel_i(b,
 364                                                       src_color, dst_color,
 365                                                       src_a, dst_a,
 366                                                       blend->rgb_dst_factor,
 367                                                       alpha_chan);
 368
 369         if (alpha_chan != 4 &&
 370             blend->alpha_src_factor != blend->rgb_src_factor) {
 371                 nir_ssa_def *src_alpha_factor =
 372                         vc4_blend_channel_i(b,
 373                                             src_color, dst_color,
 374                                             src_a, dst_a,
 375                                             blend->alpha_src_factor,
 376                                             alpha_chan);
 377                 src_factor = vc4_nir_set_packed_chan(b, src_factor,
 378                                                      src_alpha_factor,
 379                                                      alpha_chan);
 380         }
 381         if (alpha_chan != 4 &&
 382             blend->alpha_dst_factor != blend->rgb_dst_factor) {
 383                 nir_ssa_def *dst_alpha_factor =
 384                         vc4_blend_channel_i(b,
 385                                             src_color, dst_color,
 386                                             src_a, dst_a,
 387                                             blend->alpha_dst_factor,
 388                                             alpha_chan);
 389                 dst_factor = vc4_nir_set_packed_chan(b, dst_factor,
 390                                                      dst_alpha_factor,
 391                                                      alpha_chan);
 392         }
 393         nir_ssa_def *src_blend = nir_umul_unorm_4x8(b, src_color, src_factor);
 394         nir_ssa_def *dst_blend = nir_umul_unorm_4x8(b, dst_color, dst_factor);
 395
 396         nir_ssa_def *result =
 397                 vc4_blend_func_i(b, src_blend, dst_blend, blend->rgb_func);
 398         if (alpha_chan != 4 && blend->alpha_func != blend->rgb_func) {
 399                 nir_ssa_def *result_a = vc4_blend_func_i(b,
 400                                                          src_blend,
 401                                                          dst_blend,
 402                                                          blend->alpha_func);
 403                 result = vc4_nir_set_packed_chan(b, result, result_a,
 404                                                  alpha_chan);
 405         }
 406         return result;
 407 }
 408
 409 static nir_ssa_def *
 410 vc4_logicop(nir_builder *b, int logicop_func,
 411             nir_ssa_def *src, nir_ssa_def *dst)
 412 {
 413         switch (logicop_func) {
 414         case PIPE_LOGICOP_CLEAR:
 415                 return nir_imm_int(b, 0);
 416         case PIPE_LOGICOP_NOR:
 417                 return nir_inot(b, nir_ior(b, src, dst));
 418         case PIPE_LOGICOP_AND_INVERTED:
 419                 return nir_iand(b, nir_inot(b, src), dst);
 420         case PIPE_LOGICOP_COPY_INVERTED:
 421                 return nir_inot(b, src);
 422         case PIPE_LOGICOP_AND_REVERSE:
 423                 return nir_iand(b, src, nir_inot(b, dst));
 424         case PIPE_LOGICOP_INVERT:
 425                 return nir_inot(b, dst);
 426         case PIPE_LOGICOP_XOR:
 427                 return nir_ixor(b, src, dst);
 428         case PIPE_LOGICOP_NAND:
 429                 return nir_inot(b, nir_iand(b, src, dst));
 430         case PIPE_LOGICOP_AND:
 431                 return nir_iand(b, src, dst);
 432         case PIPE_LOGICOP_EQUIV:
 433                 return nir_inot(b, nir_ixor(b, src, dst));
 434         case PIPE_LOGICOP_NOOP:
 435                 return dst;
 436         case PIPE_LOGICOP_OR_INVERTED:
 437                 return nir_ior(b, nir_inot(b, src), dst);
 438         case PIPE_LOGICOP_OR_REVERSE:
 439                 return nir_ior(b, src, nir_inot(b, dst));
 440         case PIPE_LOGICOP_OR:
 441                 return nir_ior(b, src, dst);
 442         case PIPE_LOGICOP_SET:
 443                 return nir_imm_int(b, ~0);
 444         default:
 445                 fprintf(stderr, "Unknown logic op %d\n", logicop_func);
 446                 /* FALLTHROUGH */
 447         case PIPE_LOGICOP_COPY:
 448                 return src;
 449         }
 450 }
 451
 452 static nir_ssa_def *
 453 vc4_nir_swizzle_and_pack(struct vc4_compile *c, nir_builder *b,
 454                          nir_ssa_def **colors)
 455 {
 456         enum pipe_format color_format = c->fs_key->color_format;
 457         const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
 458
 459         nir_ssa_def *swizzled[4];
 460         for (int i = 0; i < 4; i++) {
 461                 swizzled[i] = vc4_nir_get_swizzled_channel(b, colors,
 462                                                            format_swiz[i]);
 463         }
 464
 465         return nir_pack_unorm_4x8(b,
 466                                   nir_vec4(b,
 467                                            swizzled[0], swizzled[1],
 468                                            swizzled[2], swizzled[3]));
 469
 470 }
 471
 472 static nir_ssa_def *
 473 vc4_nir_blend_pipeline(struct vc4_compile *c, nir_builder *b, nir_ssa_def *src,
 474                        int sample)
 475 {
 476         enum pipe_format color_format = c->fs_key->color_format;
 477         const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
 478         bool srgb = util_format_is_srgb(color_format);
 479
 480         /* Pull out the float src/dst color components. */
 481         nir_ssa_def *packed_dst_color = vc4_nir_get_dst_color(b, sample);
 482         nir_ssa_def *dst_vec4 = nir_unpack_unorm_4x8(b, packed_dst_color);
 483         nir_ssa_def *src_color[4], *unpacked_dst_color[4];
 484         for (unsigned i = 0; i < 4; i++) {
 485                 src_color[i] = nir_channel(b, src, i);
 486                 unpacked_dst_color[i] = nir_channel(b, dst_vec4, i);
 487         }
 488
 489         if (c->fs_key->sample_alpha_to_one && c->fs_key->msaa)
 490                 src_color[3] = nir_imm_float(b, 1.0);
 491
 492         nir_ssa_def *packed_color;
 493         if (srgb) {
 494                 /* Unswizzle the destination color. */
 495                 nir_ssa_def *dst_color[4];
 496                 for (unsigned i = 0; i < 4; i++) {
 497                         dst_color[i] = vc4_nir_get_swizzled_channel(b,
 498                                                                     unpacked_dst_color,
 499                                                                     format_swiz[i]);
 500                 }
 501
 502                 /* Turn dst color to linear. */
 503                 for (int i = 0; i < 3; i++)
 504                         dst_color[i] = vc4_nir_srgb_decode(b, dst_color[i]);
 505
 506                 nir_ssa_def *blend_color[4];
 507                 vc4_do_blending_f(c, b, blend_color, src_color, dst_color);
 508
 509                 /* sRGB encode the output color */
 510                 for (int i = 0; i < 3; i++)
 511                         blend_color[i] = vc4_nir_srgb_encode(b, blend_color[i]);
 512
 513                 packed_color = vc4_nir_swizzle_and_pack(c, b, blend_color);
 514         } else {
 515                 nir_ssa_def *packed_src_color =
 516                         vc4_nir_swizzle_and_pack(c, b, src_color);
 517
 518                 packed_color =
 519                         vc4_do_blending_i(c, b,
 520                                           packed_src_color, packed_dst_color,
 521                                           src_color[3]);
 522         }
 523
 524         packed_color = vc4_logicop(b, c->fs_key->logicop_func,
 525                                    packed_color, packed_dst_color);
 526
 527         /* If the bit isn't set in the color mask, then just return the
 528          * original dst color, instead.
 529          */
 530         uint32_t colormask = 0xffffffff;
 531         for (int i = 0; i < 4; i++) {
 532                 if (format_swiz[i] < 4 &&
 533                     !(c->fs_key->blend.colormask & (1 << format_swiz[i]))) {
 534                         colormask &= ~(0xff << (i * 8));
 535                 }
 536         }
 537
 538         return nir_ior(b,
 539                        nir_iand(b, packed_color,
 540                                 nir_imm_int(b, colormask)),
 541                        nir_iand(b, packed_dst_color,
 542                                 nir_imm_int(b, ~colormask)));
 543 }
 544
 545 static int
 546 vc4_nir_next_output_driver_location(nir_shader *s)
 547 {
 548         int maxloc = -1;
 549
 550         nir_foreach_variable(var, &s->outputs)
 551                 maxloc = MAX2(maxloc, (int)var->data.driver_location);
 552
 553         return maxloc + 1;
 554 }
 555
 556 static void
 557 vc4_nir_store_sample_mask(struct vc4_compile *c, nir_builder *b,
 558                           nir_ssa_def *val)
 559 {
 560         nir_variable *sample_mask = nir_variable_create(c->s, nir_var_shader_out,
 561                                                         glsl_uint_type(),
 562                                                         "sample_mask");
 563         sample_mask->data.driver_location =
 564                 vc4_nir_next_output_driver_location(c->s);
 565         sample_mask->data.location = FRAG_RESULT_SAMPLE_MASK;
 566
 567         nir_intrinsic_instr *intr =
 568                 nir_intrinsic_instr_create(c->s, nir_intrinsic_store_output);
 569         intr->num_components = 1;
 570         nir_intrinsic_set_base(intr, sample_mask->data.driver_location);
 571
 572         intr->src[0] = nir_src_for_ssa(val);
 573         intr->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
 574         nir_builder_instr_insert(b, &intr->instr);
 575 }
 576
 577 static void
 578 vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,
 579                           nir_intrinsic_instr *intr)
 580 {
 581         nir_ssa_def *frag_color = intr->src[0].ssa;
 582
 583         if (c->fs_key->sample_alpha_to_coverage) {
 584                 nir_ssa_def *a = nir_channel(b, frag_color, 3);
 585
 586                 /* XXX: We should do a nice dither based on the fragment
 587                  * coordinate, instead.
 588                  */
 589                 nir_ssa_def *num_samples = nir_imm_float(b, VC4_MAX_SAMPLES);
 590                 nir_ssa_def *num_bits = nir_f2i32(b, nir_fmul(b, a, num_samples));
 591                 nir_ssa_def *bitmask = nir_isub(b,
 592                                                 nir_ishl(b,
 593                                                          nir_imm_int(b, 1),
 594                                                          num_bits),
 595                                                 nir_imm_int(b, 1));
 596                 vc4_nir_store_sample_mask(c, b, bitmask);
 597         }
 598
 599         /* The TLB color read returns each sample in turn, so if our blending
 600          * depends on the destination color, we're going to have to run the
 601          * blending function separately for each destination sample value, and
 602          * then output the per-sample color using TLB_COLOR_MS.
 603          */
 604         nir_ssa_def *blend_output;
 605         if (c->fs_key->msaa && blend_depends_on_dst_color(c)) {
 606                 c->msaa_per_sample_output = true;
 607
 608                 nir_ssa_def *samples[4];
 609                 for (int i = 0; i < VC4_MAX_SAMPLES; i++)
 610                         samples[i] = vc4_nir_blend_pipeline(c, b, frag_color, i);
 611                 blend_output = nir_vec4(b,
 612                                         samples[0], samples[1],
 613                                         samples[2], samples[3]);
 614         } else {
 615                 blend_output = vc4_nir_blend_pipeline(c, b, frag_color, 0);
 616         }
 617
 618         nir_instr_rewrite_src(&intr->instr, &intr->src[0],
 619                               nir_src_for_ssa(blend_output));
 620         intr->num_components = blend_output->num_components;
 621 }
 622
 623 static bool
 624 vc4_nir_lower_blend_block(nir_block *block, struct vc4_compile *c)
 625 {
 626         nir_foreach_instr_safe(instr, block) {
 627                 if (instr->type != nir_instr_type_intrinsic)
 628                         continue;
 629                 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 630                 if (intr->intrinsic != nir_intrinsic_store_output)
 631                         continue;
 632
 633                 nir_variable *output_var = NULL;
 634                 nir_foreach_variable(var, &c->s->outputs) {
 635                         if (var->data.driver_location ==
 636                             nir_intrinsic_base(intr)) {
 637                                 output_var = var;
 638                                 break;
 639                         }
 640                 }
 641                 assert(output_var);
 642
 643                 if (output_var->data.location != FRAG_RESULT_COLOR &&
 644                     output_var->data.location != FRAG_RESULT_DATA0) {
 645                         continue;
 646                 }
 647
 648                 nir_function_impl *impl =
 649                         nir_cf_node_get_function(&block->cf_node);
 650                 nir_builder b;
 651                 nir_builder_init(&b, impl);
 652                 b.cursor = nir_before_instr(&intr->instr);
 653                 vc4_nir_lower_blend_instr(c, &b, intr);
 654         }
 655         return true;
 656 }
 657
 658 void
 659 vc4_nir_lower_blend(nir_shader *s, struct vc4_compile *c)
 660 {
 661         nir_foreach_function(function, s) {
 662                 if (function->impl) {
 663                         nir_foreach_block(block, function->impl) {
 664                                 vc4_nir_lower_blend_block(block, c);
 665                         }
 666
 667                         nir_metadata_preserve(function->impl,
 668                                               nir_metadata_block_index |
 669                                               nir_metadata_dominance);
 670                 }
 671         }
 672
 673         /* If we didn't do alpha-to-coverage on the output color, we still
 674          * need to pass glSampleMask() through.
 675          */
 676         if (c->fs_key->sample_coverage && !c->fs_key->sample_alpha_to_coverage) {
 677                 nir_function_impl *impl = nir_shader_get_entrypoint(s);
 678                 nir_builder b;
 679                 nir_builder_init(&b, impl);
 680                 b.cursor = nir_after_block(nir_impl_last_block(impl));
 681
 682                 vc4_nir_store_sample_mask(c, &b, nir_load_sample_mask_in(&b));
 683         }
 684 }