56f6c3bd0e9533a56bc9ffc570b2fe5050886133
[mesa.git] / src / gallium / drivers / vc4 / vc4_nir_lower_blend.c
1 /*
2 * Copyright © 2015 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /**
25 * Implements most of the fixed function fragment pipeline in shader code.
26 *
27 * VC4 doesn't have any hardware support for blending, alpha test, logic ops,
28 * or color mask. Instead, you read the current contents of the destination
29 * from the tile buffer after having waited for the scoreboard (which is
30 * handled by vc4_qpu_emit.c), then do math using your output color and that
31 * destination value, and update the output color appropriately.
32 *
33 * Once this pass is done, the color write will either have one component (for
34 * single sample) with packed argb8888, or 4 components with the per-sample
35 * argb8888 result.
36 */
37
38 /**
39 * Lowers fixed-function blending to a load of the destination color and a
40 * series of ALU operations before the store of the output.
41 */
42 #include "util/u_format.h"
43 #include "vc4_qir.h"
44 #include "compiler/nir/nir_builder.h"
45 #include "vc4_context.h"
46
47 static bool
48 blend_depends_on_dst_color(struct vc4_compile *c)
49 {
50 return (c->fs_key->blend.blend_enable ||
51 c->fs_key->blend.colormask != 0xf ||
52 c->fs_key->logicop_func != PIPE_LOGICOP_COPY);
53 }
54
55 /** Emits a load of the previous fragment color from the tile buffer. */
56 static nir_ssa_def *
57 vc4_nir_get_dst_color(nir_builder *b, int sample)
58 {
59 nir_intrinsic_instr *load =
60 nir_intrinsic_instr_create(b->shader,
61 nir_intrinsic_load_input);
62 load->num_components = 1;
63 nir_intrinsic_set_base(load, VC4_NIR_TLB_COLOR_READ_INPUT + sample);
64 load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
65 nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL);
66 nir_builder_instr_insert(b, &load->instr);
67 return &load->dest.ssa;
68 }
69
70 static nir_ssa_def *
71 vc4_nir_srgb_decode(nir_builder *b, nir_ssa_def *srgb)
72 {
73 nir_ssa_def *is_low = nir_flt(b, srgb, nir_imm_float(b, 0.04045));
74 nir_ssa_def *low = nir_fmul(b, srgb, nir_imm_float(b, 1.0 / 12.92));
75 nir_ssa_def *high = nir_fpow(b,
76 nir_fmul(b,
77 nir_fadd(b, srgb,
78 nir_imm_float(b, 0.055)),
79 nir_imm_float(b, 1.0 / 1.055)),
80 nir_imm_float(b, 2.4));
81
82 return nir_bcsel(b, is_low, low, high);
83 }
84
85 static nir_ssa_def *
86 vc4_nir_srgb_encode(nir_builder *b, nir_ssa_def *linear)
87 {
88 nir_ssa_def *is_low = nir_flt(b, linear, nir_imm_float(b, 0.0031308));
89 nir_ssa_def *low = nir_fmul(b, linear, nir_imm_float(b, 12.92));
90 nir_ssa_def *high = nir_fsub(b,
91 nir_fmul(b,
92 nir_imm_float(b, 1.055),
93 nir_fpow(b,
94 linear,
95 nir_imm_float(b, 0.41666))),
96 nir_imm_float(b, 0.055));
97
98 return nir_bcsel(b, is_low, low, high);
99 }
100
101 static nir_ssa_def *
102 vc4_blend_channel_f(nir_builder *b,
103 nir_ssa_def **src,
104 nir_ssa_def **dst,
105 unsigned factor,
106 int channel)
107 {
108 switch(factor) {
109 case PIPE_BLENDFACTOR_ONE:
110 return nir_imm_float(b, 1.0);
111 case PIPE_BLENDFACTOR_SRC_COLOR:
112 return src[channel];
113 case PIPE_BLENDFACTOR_SRC_ALPHA:
114 return src[3];
115 case PIPE_BLENDFACTOR_DST_ALPHA:
116 return dst[3];
117 case PIPE_BLENDFACTOR_DST_COLOR:
118 return dst[channel];
119 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
120 if (channel != 3) {
121 return nir_fmin(b,
122 src[3],
123 nir_fsub(b,
124 nir_imm_float(b, 1.0),
125 dst[3]));
126 } else {
127 return nir_imm_float(b, 1.0);
128 }
129 case PIPE_BLENDFACTOR_CONST_COLOR:
130 return nir_load_system_value(b,
131 nir_intrinsic_load_blend_const_color_r_float +
132 channel,
133 0);
134 case PIPE_BLENDFACTOR_CONST_ALPHA:
135 return nir_load_blend_const_color_a_float(b);
136 case PIPE_BLENDFACTOR_ZERO:
137 return nir_imm_float(b, 0.0);
138 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
139 return nir_fsub(b, nir_imm_float(b, 1.0), src[channel]);
140 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
141 return nir_fsub(b, nir_imm_float(b, 1.0), src[3]);
142 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
143 return nir_fsub(b, nir_imm_float(b, 1.0), dst[3]);
144 case PIPE_BLENDFACTOR_INV_DST_COLOR:
145 return nir_fsub(b, nir_imm_float(b, 1.0), dst[channel]);
146 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
147 return nir_fsub(b, nir_imm_float(b, 1.0),
148 nir_load_system_value(b,
149 nir_intrinsic_load_blend_const_color_r_float +
150 channel,
151 0));
152 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
153 return nir_fsub(b, nir_imm_float(b, 1.0),
154 nir_load_blend_const_color_a_float(b));
155
156 default:
157 case PIPE_BLENDFACTOR_SRC1_COLOR:
158 case PIPE_BLENDFACTOR_SRC1_ALPHA:
159 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
160 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
161 /* Unsupported. */
162 fprintf(stderr, "Unknown blend factor %d\n", factor);
163 return nir_imm_float(b, 1.0);
164 }
165 }
166
167 static nir_ssa_def *
168 vc4_nir_set_packed_chan(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1,
169 int chan)
170 {
171 unsigned chan_mask = 0xff << (chan * 8);
172 return nir_ior(b,
173 nir_iand(b, src0, nir_imm_int(b, ~chan_mask)),
174 nir_iand(b, src1, nir_imm_int(b, chan_mask)));
175 }
176
177 static nir_ssa_def *
178 vc4_blend_channel_i(nir_builder *b,
179 nir_ssa_def *src,
180 nir_ssa_def *dst,
181 nir_ssa_def *src_a,
182 nir_ssa_def *dst_a,
183 unsigned factor,
184 int a_chan)
185 {
186 switch (factor) {
187 case PIPE_BLENDFACTOR_ONE:
188 return nir_imm_int(b, ~0);
189 case PIPE_BLENDFACTOR_SRC_COLOR:
190 return src;
191 case PIPE_BLENDFACTOR_SRC_ALPHA:
192 return src_a;
193 case PIPE_BLENDFACTOR_DST_ALPHA:
194 return dst_a;
195 case PIPE_BLENDFACTOR_DST_COLOR:
196 return dst;
197 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
198 return vc4_nir_set_packed_chan(b,
199 nir_umin_4x8(b,
200 src_a,
201 nir_inot(b, dst_a)),
202 nir_imm_int(b, ~0),
203 a_chan);
204 case PIPE_BLENDFACTOR_CONST_COLOR:
205 return nir_load_blend_const_color_rgba8888_unorm(b);
206 case PIPE_BLENDFACTOR_CONST_ALPHA:
207 return nir_load_blend_const_color_aaaa8888_unorm(b);
208 case PIPE_BLENDFACTOR_ZERO:
209 return nir_imm_int(b, 0);
210 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
211 return nir_inot(b, src);
212 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
213 return nir_inot(b, src_a);
214 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
215 return nir_inot(b, dst_a);
216 case PIPE_BLENDFACTOR_INV_DST_COLOR:
217 return nir_inot(b, dst);
218 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
219 return nir_inot(b,
220 nir_load_blend_const_color_rgba8888_unorm(b));
221 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
222 return nir_inot(b,
223 nir_load_blend_const_color_aaaa8888_unorm(b));
224
225 default:
226 case PIPE_BLENDFACTOR_SRC1_COLOR:
227 case PIPE_BLENDFACTOR_SRC1_ALPHA:
228 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
229 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
230 /* Unsupported. */
231 fprintf(stderr, "Unknown blend factor %d\n", factor);
232 return nir_imm_int(b, ~0);
233 }
234 }
235
236 static nir_ssa_def *
237 vc4_blend_func_f(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
238 unsigned func)
239 {
240 switch (func) {
241 case PIPE_BLEND_ADD:
242 return nir_fadd(b, src, dst);
243 case PIPE_BLEND_SUBTRACT:
244 return nir_fsub(b, src, dst);
245 case PIPE_BLEND_REVERSE_SUBTRACT:
246 return nir_fsub(b, dst, src);
247 case PIPE_BLEND_MIN:
248 return nir_fmin(b, src, dst);
249 case PIPE_BLEND_MAX:
250 return nir_fmax(b, src, dst);
251
252 default:
253 /* Unsupported. */
254 fprintf(stderr, "Unknown blend func %d\n", func);
255 return src;
256
257 }
258 }
259
260 static nir_ssa_def *
261 vc4_blend_func_i(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
262 unsigned func)
263 {
264 switch (func) {
265 case PIPE_BLEND_ADD:
266 return nir_usadd_4x8(b, src, dst);
267 case PIPE_BLEND_SUBTRACT:
268 return nir_ussub_4x8(b, src, dst);
269 case PIPE_BLEND_REVERSE_SUBTRACT:
270 return nir_ussub_4x8(b, dst, src);
271 case PIPE_BLEND_MIN:
272 return nir_umin_4x8(b, src, dst);
273 case PIPE_BLEND_MAX:
274 return nir_umax_4x8(b, src, dst);
275
276 default:
277 /* Unsupported. */
278 fprintf(stderr, "Unknown blend func %d\n", func);
279 return src;
280
281 }
282 }
283
284 static void
285 vc4_do_blending_f(struct vc4_compile *c, nir_builder *b, nir_ssa_def **result,
286 nir_ssa_def **src_color, nir_ssa_def **dst_color)
287 {
288 struct pipe_rt_blend_state *blend = &c->fs_key->blend;
289
290 if (!blend->blend_enable) {
291 for (int i = 0; i < 4; i++)
292 result[i] = src_color[i];
293 return;
294 }
295
296 /* Clamp the src color to [0, 1]. Dest is already clamped. */
297 for (int i = 0; i < 4; i++)
298 src_color[i] = nir_fsat(b, src_color[i]);
299
300 nir_ssa_def *src_blend[4], *dst_blend[4];
301 for (int i = 0; i < 4; i++) {
302 int src_factor = ((i != 3) ? blend->rgb_src_factor :
303 blend->alpha_src_factor);
304 int dst_factor = ((i != 3) ? blend->rgb_dst_factor :
305 blend->alpha_dst_factor);
306 src_blend[i] = nir_fmul(b, src_color[i],
307 vc4_blend_channel_f(b,
308 src_color, dst_color,
309 src_factor, i));
310 dst_blend[i] = nir_fmul(b, dst_color[i],
311 vc4_blend_channel_f(b,
312 src_color, dst_color,
313 dst_factor, i));
314 }
315
316 for (int i = 0; i < 4; i++) {
317 result[i] = vc4_blend_func_f(b, src_blend[i], dst_blend[i],
318 ((i != 3) ? blend->rgb_func :
319 blend->alpha_func));
320 }
321 }
322
323 static nir_ssa_def *
324 vc4_nir_splat(nir_builder *b, nir_ssa_def *src)
325 {
326 nir_ssa_def *or1 = nir_ior(b, src, nir_ishl(b, src, nir_imm_int(b, 8)));
327 return nir_ior(b, or1, nir_ishl(b, or1, nir_imm_int(b, 16)));
328 }
329
330 static nir_ssa_def *
331 vc4_do_blending_i(struct vc4_compile *c, nir_builder *b,
332 nir_ssa_def *src_color, nir_ssa_def *dst_color,
333 nir_ssa_def *src_float_a)
334 {
335 struct pipe_rt_blend_state *blend = &c->fs_key->blend;
336
337 if (!blend->blend_enable)
338 return src_color;
339
340 enum pipe_format color_format = c->fs_key->color_format;
341 const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
342 nir_ssa_def *imm_0xff = nir_imm_int(b, 0xff);
343 nir_ssa_def *src_a = nir_pack_unorm_4x8(b, src_float_a);
344 nir_ssa_def *dst_a;
345 int alpha_chan;
346 for (alpha_chan = 0; alpha_chan < 4; alpha_chan++) {
347 if (format_swiz[alpha_chan] == 3)
348 break;
349 }
350 if (alpha_chan != 4) {
351 nir_ssa_def *shift = nir_imm_int(b, alpha_chan * 8);
352 dst_a = vc4_nir_splat(b, nir_iand(b, nir_ushr(b, dst_color,
353 shift), imm_0xff));
354 } else {
355 dst_a = nir_imm_int(b, ~0);
356 }
357
358 nir_ssa_def *src_factor = vc4_blend_channel_i(b,
359 src_color, dst_color,
360 src_a, dst_a,
361 blend->rgb_src_factor,
362 alpha_chan);
363 nir_ssa_def *dst_factor = vc4_blend_channel_i(b,
364 src_color, dst_color,
365 src_a, dst_a,
366 blend->rgb_dst_factor,
367 alpha_chan);
368
369 if (alpha_chan != 4 &&
370 blend->alpha_src_factor != blend->rgb_src_factor) {
371 nir_ssa_def *src_alpha_factor =
372 vc4_blend_channel_i(b,
373 src_color, dst_color,
374 src_a, dst_a,
375 blend->alpha_src_factor,
376 alpha_chan);
377 src_factor = vc4_nir_set_packed_chan(b, src_factor,
378 src_alpha_factor,
379 alpha_chan);
380 }
381 if (alpha_chan != 4 &&
382 blend->alpha_dst_factor != blend->rgb_dst_factor) {
383 nir_ssa_def *dst_alpha_factor =
384 vc4_blend_channel_i(b,
385 src_color, dst_color,
386 src_a, dst_a,
387 blend->alpha_dst_factor,
388 alpha_chan);
389 dst_factor = vc4_nir_set_packed_chan(b, dst_factor,
390 dst_alpha_factor,
391 alpha_chan);
392 }
393 nir_ssa_def *src_blend = nir_umul_unorm_4x8(b, src_color, src_factor);
394 nir_ssa_def *dst_blend = nir_umul_unorm_4x8(b, dst_color, dst_factor);
395
396 nir_ssa_def *result =
397 vc4_blend_func_i(b, src_blend, dst_blend, blend->rgb_func);
398 if (alpha_chan != 4 && blend->alpha_func != blend->rgb_func) {
399 nir_ssa_def *result_a = vc4_blend_func_i(b,
400 src_blend,
401 dst_blend,
402 blend->alpha_func);
403 result = vc4_nir_set_packed_chan(b, result, result_a,
404 alpha_chan);
405 }
406 return result;
407 }
408
409 static nir_ssa_def *
410 vc4_logicop(nir_builder *b, int logicop_func,
411 nir_ssa_def *src, nir_ssa_def *dst)
412 {
413 switch (logicop_func) {
414 case PIPE_LOGICOP_CLEAR:
415 return nir_imm_int(b, 0);
416 case PIPE_LOGICOP_NOR:
417 return nir_inot(b, nir_ior(b, src, dst));
418 case PIPE_LOGICOP_AND_INVERTED:
419 return nir_iand(b, nir_inot(b, src), dst);
420 case PIPE_LOGICOP_COPY_INVERTED:
421 return nir_inot(b, src);
422 case PIPE_LOGICOP_AND_REVERSE:
423 return nir_iand(b, src, nir_inot(b, dst));
424 case PIPE_LOGICOP_INVERT:
425 return nir_inot(b, dst);
426 case PIPE_LOGICOP_XOR:
427 return nir_ixor(b, src, dst);
428 case PIPE_LOGICOP_NAND:
429 return nir_inot(b, nir_iand(b, src, dst));
430 case PIPE_LOGICOP_AND:
431 return nir_iand(b, src, dst);
432 case PIPE_LOGICOP_EQUIV:
433 return nir_inot(b, nir_ixor(b, src, dst));
434 case PIPE_LOGICOP_NOOP:
435 return dst;
436 case PIPE_LOGICOP_OR_INVERTED:
437 return nir_ior(b, nir_inot(b, src), dst);
438 case PIPE_LOGICOP_OR_REVERSE:
439 return nir_ior(b, src, nir_inot(b, dst));
440 case PIPE_LOGICOP_OR:
441 return nir_ior(b, src, dst);
442 case PIPE_LOGICOP_SET:
443 return nir_imm_int(b, ~0);
444 default:
445 fprintf(stderr, "Unknown logic op %d\n", logicop_func);
446 /* FALLTHROUGH */
447 case PIPE_LOGICOP_COPY:
448 return src;
449 }
450 }
451
452 static nir_ssa_def *
453 vc4_nir_pipe_compare_func(nir_builder *b, int func,
454 nir_ssa_def *src0, nir_ssa_def *src1)
455 {
456 switch (func) {
457 default:
458 fprintf(stderr, "Unknown compare func %d\n", func);
459 /* FALLTHROUGH */
460 case PIPE_FUNC_NEVER:
461 return nir_imm_int(b, 0);
462 case PIPE_FUNC_ALWAYS:
463 return nir_imm_int(b, ~0);
464 case PIPE_FUNC_EQUAL:
465 return nir_feq(b, src0, src1);
466 case PIPE_FUNC_NOTEQUAL:
467 return nir_fne(b, src0, src1);
468 case PIPE_FUNC_GREATER:
469 return nir_flt(b, src1, src0);
470 case PIPE_FUNC_GEQUAL:
471 return nir_fge(b, src0, src1);
472 case PIPE_FUNC_LESS:
473 return nir_flt(b, src0, src1);
474 case PIPE_FUNC_LEQUAL:
475 return nir_fge(b, src1, src0);
476 }
477 }
478
479 static void
480 vc4_nir_emit_alpha_test_discard(struct vc4_compile *c, nir_builder *b,
481 nir_ssa_def *alpha)
482 {
483 if (!c->fs_key->alpha_test)
484 return;
485
486 nir_ssa_def *condition =
487 vc4_nir_pipe_compare_func(b, c->fs_key->alpha_test_func,
488 alpha,
489 nir_load_alpha_ref_float(b));
490
491 nir_intrinsic_instr *discard =
492 nir_intrinsic_instr_create(b->shader,
493 nir_intrinsic_discard_if);
494 discard->num_components = 1;
495 discard->src[0] = nir_src_for_ssa(nir_inot(b, condition));
496 nir_builder_instr_insert(b, &discard->instr);
497 c->s->info->fs.uses_discard = true;
498 }
499
500 static nir_ssa_def *
501 vc4_nir_swizzle_and_pack(struct vc4_compile *c, nir_builder *b,
502 nir_ssa_def **colors)
503 {
504 enum pipe_format color_format = c->fs_key->color_format;
505 const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
506
507 nir_ssa_def *swizzled[4];
508 for (int i = 0; i < 4; i++) {
509 swizzled[i] = vc4_nir_get_swizzled_channel(b, colors,
510 format_swiz[i]);
511 }
512
513 return nir_pack_unorm_4x8(b,
514 nir_vec4(b,
515 swizzled[0], swizzled[1],
516 swizzled[2], swizzled[3]));
517
518 }
519
520 static nir_ssa_def *
521 vc4_nir_blend_pipeline(struct vc4_compile *c, nir_builder *b, nir_ssa_def *src,
522 int sample)
523 {
524 enum pipe_format color_format = c->fs_key->color_format;
525 const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
526 bool srgb = util_format_is_srgb(color_format);
527
528 /* Pull out the float src/dst color components. */
529 nir_ssa_def *packed_dst_color = vc4_nir_get_dst_color(b, sample);
530 nir_ssa_def *dst_vec4 = nir_unpack_unorm_4x8(b, packed_dst_color);
531 nir_ssa_def *src_color[4], *unpacked_dst_color[4];
532 for (unsigned i = 0; i < 4; i++) {
533 src_color[i] = nir_channel(b, src, i);
534 unpacked_dst_color[i] = nir_channel(b, dst_vec4, i);
535 }
536
537 if (c->fs_key->sample_alpha_to_one && c->fs_key->msaa)
538 src_color[3] = nir_imm_float(b, 1.0);
539
540 vc4_nir_emit_alpha_test_discard(c, b, src_color[3]);
541
542 nir_ssa_def *packed_color;
543 if (srgb) {
544 /* Unswizzle the destination color. */
545 nir_ssa_def *dst_color[4];
546 for (unsigned i = 0; i < 4; i++) {
547 dst_color[i] = vc4_nir_get_swizzled_channel(b,
548 unpacked_dst_color,
549 format_swiz[i]);
550 }
551
552 /* Turn dst color to linear. */
553 for (int i = 0; i < 3; i++)
554 dst_color[i] = vc4_nir_srgb_decode(b, dst_color[i]);
555
556 nir_ssa_def *blend_color[4];
557 vc4_do_blending_f(c, b, blend_color, src_color, dst_color);
558
559 /* sRGB encode the output color */
560 for (int i = 0; i < 3; i++)
561 blend_color[i] = vc4_nir_srgb_encode(b, blend_color[i]);
562
563 packed_color = vc4_nir_swizzle_and_pack(c, b, blend_color);
564 } else {
565 nir_ssa_def *packed_src_color =
566 vc4_nir_swizzle_and_pack(c, b, src_color);
567
568 packed_color =
569 vc4_do_blending_i(c, b,
570 packed_src_color, packed_dst_color,
571 src_color[3]);
572 }
573
574 packed_color = vc4_logicop(b, c->fs_key->logicop_func,
575 packed_color, packed_dst_color);
576
577 /* If the bit isn't set in the color mask, then just return the
578 * original dst color, instead.
579 */
580 uint32_t colormask = 0xffffffff;
581 for (int i = 0; i < 4; i++) {
582 if (format_swiz[i] < 4 &&
583 !(c->fs_key->blend.colormask & (1 << format_swiz[i]))) {
584 colormask &= ~(0xff << (i * 8));
585 }
586 }
587
588 return nir_ior(b,
589 nir_iand(b, packed_color,
590 nir_imm_int(b, colormask)),
591 nir_iand(b, packed_dst_color,
592 nir_imm_int(b, ~colormask)));
593 }
594
595 static int
596 vc4_nir_next_output_driver_location(nir_shader *s)
597 {
598 int maxloc = -1;
599
600 nir_foreach_variable(var, &s->outputs)
601 maxloc = MAX2(maxloc, (int)var->data.driver_location);
602
603 return maxloc + 1;
604 }
605
606 static void
607 vc4_nir_store_sample_mask(struct vc4_compile *c, nir_builder *b,
608 nir_ssa_def *val)
609 {
610 nir_variable *sample_mask = nir_variable_create(c->s, nir_var_shader_out,
611 glsl_uint_type(),
612 "sample_mask");
613 sample_mask->data.driver_location =
614 vc4_nir_next_output_driver_location(c->s);
615 sample_mask->data.location = FRAG_RESULT_SAMPLE_MASK;
616
617 nir_intrinsic_instr *intr =
618 nir_intrinsic_instr_create(c->s, nir_intrinsic_store_output);
619 intr->num_components = 1;
620 nir_intrinsic_set_base(intr, sample_mask->data.driver_location);
621
622 intr->src[0] = nir_src_for_ssa(val);
623 intr->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
624 nir_builder_instr_insert(b, &intr->instr);
625 }
626
627 static void
628 vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,
629 nir_intrinsic_instr *intr)
630 {
631 nir_ssa_def *frag_color = intr->src[0].ssa;
632
633 if (c->fs_key->sample_alpha_to_coverage) {
634 nir_ssa_def *a = nir_channel(b, frag_color, 3);
635
636 /* XXX: We should do a nice dither based on the fragment
637 * coordinate, instead.
638 */
639 nir_ssa_def *num_samples = nir_imm_float(b, VC4_MAX_SAMPLES);
640 nir_ssa_def *num_bits = nir_f2i(b, nir_fmul(b, a, num_samples));
641 nir_ssa_def *bitmask = nir_isub(b,
642 nir_ishl(b,
643 nir_imm_int(b, 1),
644 num_bits),
645 nir_imm_int(b, 1));
646 vc4_nir_store_sample_mask(c, b, bitmask);
647 }
648
649 /* The TLB color read returns each sample in turn, so if our blending
650 * depends on the destination color, we're going to have to run the
651 * blending function separately for each destination sample value, and
652 * then output the per-sample color using TLB_COLOR_MS.
653 */
654 nir_ssa_def *blend_output;
655 if (c->fs_key->msaa && blend_depends_on_dst_color(c)) {
656 c->msaa_per_sample_output = true;
657
658 nir_ssa_def *samples[4];
659 for (int i = 0; i < VC4_MAX_SAMPLES; i++)
660 samples[i] = vc4_nir_blend_pipeline(c, b, frag_color, i);
661 blend_output = nir_vec4(b,
662 samples[0], samples[1],
663 samples[2], samples[3]);
664 } else {
665 blend_output = vc4_nir_blend_pipeline(c, b, frag_color, 0);
666 }
667
668 nir_instr_rewrite_src(&intr->instr, &intr->src[0],
669 nir_src_for_ssa(blend_output));
670 intr->num_components = blend_output->num_components;
671 }
672
673 static bool
674 vc4_nir_lower_blend_block(nir_block *block, struct vc4_compile *c)
675 {
676 nir_foreach_instr_safe(instr, block) {
677 if (instr->type != nir_instr_type_intrinsic)
678 continue;
679 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
680 if (intr->intrinsic != nir_intrinsic_store_output)
681 continue;
682
683 nir_variable *output_var = NULL;
684 nir_foreach_variable(var, &c->s->outputs) {
685 if (var->data.driver_location ==
686 nir_intrinsic_base(intr)) {
687 output_var = var;
688 break;
689 }
690 }
691 assert(output_var);
692
693 if (output_var->data.location != FRAG_RESULT_COLOR &&
694 output_var->data.location != FRAG_RESULT_DATA0) {
695 continue;
696 }
697
698 nir_function_impl *impl =
699 nir_cf_node_get_function(&block->cf_node);
700 nir_builder b;
701 nir_builder_init(&b, impl);
702 b.cursor = nir_before_instr(&intr->instr);
703 vc4_nir_lower_blend_instr(c, &b, intr);
704 }
705 return true;
706 }
707
708 void
709 vc4_nir_lower_blend(nir_shader *s, struct vc4_compile *c)
710 {
711 nir_foreach_function(function, s) {
712 if (function->impl) {
713 nir_foreach_block(block, function->impl) {
714 vc4_nir_lower_blend_block(block, c);
715 }
716
717 nir_metadata_preserve(function->impl,
718 nir_metadata_block_index |
719 nir_metadata_dominance);
720 }
721 }
722
723 /* If we didn't do alpha-to-coverage on the output color, we still
724 * need to pass glSampleMask() through.
725 */
726 if (c->fs_key->sample_coverage && !c->fs_key->sample_alpha_to_coverage) {
727 nir_function_impl *impl = nir_shader_get_entrypoint(s);
728 nir_builder b;
729 nir_builder_init(&b, impl);
730 b.cursor = nir_after_block(nir_impl_last_block(impl));
731
732 vc4_nir_store_sample_mask(c, &b, nir_load_sample_mask_in(&b));
733 }
734 }