util: Move gallium's PIPE_FORMAT utils to /util/format/
[mesa.git] / src / gallium / drivers / vc4 / vc4_nir_lower_blend.c
1 /*
2 * Copyright © 2015 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /**
25 * Implements most of the fixed function fragment pipeline in shader code.
26 *
27 * VC4 doesn't have any hardware support for blending, alpha test, logic ops,
28 * or color mask. Instead, you read the current contents of the destination
29 * from the tile buffer after having waited for the scoreboard (which is
30 * handled by vc4_qpu_emit.c), then do math using your output color and that
31 * destination value, and update the output color appropriately.
32 *
33 * Once this pass is done, the color write will either have one component (for
34 * single sample) with packed argb8888, or 4 components with the per-sample
35 * argb8888 result.
36 */
37
38 /**
39 * Lowers fixed-function blending to a load of the destination color and a
40 * series of ALU operations before the store of the output.
41 */
42 #include "util/format/u_format.h"
43 #include "vc4_qir.h"
44 #include "compiler/nir/nir_builder.h"
45 #include "compiler/nir/nir_format_convert.h"
46 #include "vc4_context.h"
47
48 static bool
49 blend_depends_on_dst_color(struct vc4_compile *c)
50 {
51 return (c->fs_key->blend.blend_enable ||
52 c->fs_key->blend.colormask != 0xf ||
53 c->fs_key->logicop_func != PIPE_LOGICOP_COPY);
54 }
55
56 /** Emits a load of the previous fragment color from the tile buffer. */
57 static nir_ssa_def *
58 vc4_nir_get_dst_color(nir_builder *b, int sample)
59 {
60 nir_intrinsic_instr *load =
61 nir_intrinsic_instr_create(b->shader,
62 nir_intrinsic_load_input);
63 load->num_components = 1;
64 nir_intrinsic_set_base(load, VC4_NIR_TLB_COLOR_READ_INPUT + sample);
65 load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
66 nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL);
67 nir_builder_instr_insert(b, &load->instr);
68 return &load->dest.ssa;
69 }
70
71 static nir_ssa_def *
72 vc4_blend_channel_f(nir_builder *b,
73 nir_ssa_def **src,
74 nir_ssa_def **dst,
75 unsigned factor,
76 int channel)
77 {
78 switch(factor) {
79 case PIPE_BLENDFACTOR_ONE:
80 return nir_imm_float(b, 1.0);
81 case PIPE_BLENDFACTOR_SRC_COLOR:
82 return src[channel];
83 case PIPE_BLENDFACTOR_SRC_ALPHA:
84 return src[3];
85 case PIPE_BLENDFACTOR_DST_ALPHA:
86 return dst[3];
87 case PIPE_BLENDFACTOR_DST_COLOR:
88 return dst[channel];
89 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
90 if (channel != 3) {
91 return nir_fmin(b,
92 src[3],
93 nir_fsub(b,
94 nir_imm_float(b, 1.0),
95 dst[3]));
96 } else {
97 return nir_imm_float(b, 1.0);
98 }
99 case PIPE_BLENDFACTOR_CONST_COLOR:
100 return nir_load_system_value(b,
101 nir_intrinsic_load_blend_const_color_r_float +
102 channel,
103 0, 32);
104 case PIPE_BLENDFACTOR_CONST_ALPHA:
105 return nir_load_blend_const_color_a_float(b);
106 case PIPE_BLENDFACTOR_ZERO:
107 return nir_imm_float(b, 0.0);
108 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
109 return nir_fsub(b, nir_imm_float(b, 1.0), src[channel]);
110 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
111 return nir_fsub(b, nir_imm_float(b, 1.0), src[3]);
112 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
113 return nir_fsub(b, nir_imm_float(b, 1.0), dst[3]);
114 case PIPE_BLENDFACTOR_INV_DST_COLOR:
115 return nir_fsub(b, nir_imm_float(b, 1.0), dst[channel]);
116 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
117 return nir_fsub(b, nir_imm_float(b, 1.0),
118 nir_load_system_value(b,
119 nir_intrinsic_load_blend_const_color_r_float +
120 channel,
121 0, 32));
122 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
123 return nir_fsub(b, nir_imm_float(b, 1.0),
124 nir_load_blend_const_color_a_float(b));
125
126 default:
127 case PIPE_BLENDFACTOR_SRC1_COLOR:
128 case PIPE_BLENDFACTOR_SRC1_ALPHA:
129 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
130 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
131 /* Unsupported. */
132 fprintf(stderr, "Unknown blend factor %d\n", factor);
133 return nir_imm_float(b, 1.0);
134 }
135 }
136
137 static nir_ssa_def *
138 vc4_nir_set_packed_chan(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1,
139 int chan)
140 {
141 unsigned chan_mask = 0xff << (chan * 8);
142 return nir_ior(b,
143 nir_iand(b, src0, nir_imm_int(b, ~chan_mask)),
144 nir_iand(b, src1, nir_imm_int(b, chan_mask)));
145 }
146
147 static nir_ssa_def *
148 vc4_blend_channel_i(nir_builder *b,
149 nir_ssa_def *src,
150 nir_ssa_def *dst,
151 nir_ssa_def *src_a,
152 nir_ssa_def *dst_a,
153 unsigned factor,
154 int a_chan)
155 {
156 switch (factor) {
157 case PIPE_BLENDFACTOR_ONE:
158 return nir_imm_int(b, ~0);
159 case PIPE_BLENDFACTOR_SRC_COLOR:
160 return src;
161 case PIPE_BLENDFACTOR_SRC_ALPHA:
162 return src_a;
163 case PIPE_BLENDFACTOR_DST_ALPHA:
164 return dst_a;
165 case PIPE_BLENDFACTOR_DST_COLOR:
166 return dst;
167 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
168 return vc4_nir_set_packed_chan(b,
169 nir_umin_4x8(b,
170 src_a,
171 nir_inot(b, dst_a)),
172 nir_imm_int(b, ~0),
173 a_chan);
174 case PIPE_BLENDFACTOR_CONST_COLOR:
175 return nir_load_blend_const_color_rgba8888_unorm(b);
176 case PIPE_BLENDFACTOR_CONST_ALPHA:
177 return nir_load_blend_const_color_aaaa8888_unorm(b);
178 case PIPE_BLENDFACTOR_ZERO:
179 return nir_imm_int(b, 0);
180 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
181 return nir_inot(b, src);
182 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
183 return nir_inot(b, src_a);
184 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
185 return nir_inot(b, dst_a);
186 case PIPE_BLENDFACTOR_INV_DST_COLOR:
187 return nir_inot(b, dst);
188 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
189 return nir_inot(b,
190 nir_load_blend_const_color_rgba8888_unorm(b));
191 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
192 return nir_inot(b,
193 nir_load_blend_const_color_aaaa8888_unorm(b));
194
195 default:
196 case PIPE_BLENDFACTOR_SRC1_COLOR:
197 case PIPE_BLENDFACTOR_SRC1_ALPHA:
198 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
199 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
200 /* Unsupported. */
201 fprintf(stderr, "Unknown blend factor %d\n", factor);
202 return nir_imm_int(b, ~0);
203 }
204 }
205
206 static nir_ssa_def *
207 vc4_blend_func_f(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
208 unsigned func)
209 {
210 switch (func) {
211 case PIPE_BLEND_ADD:
212 return nir_fadd(b, src, dst);
213 case PIPE_BLEND_SUBTRACT:
214 return nir_fsub(b, src, dst);
215 case PIPE_BLEND_REVERSE_SUBTRACT:
216 return nir_fsub(b, dst, src);
217 case PIPE_BLEND_MIN:
218 return nir_fmin(b, src, dst);
219 case PIPE_BLEND_MAX:
220 return nir_fmax(b, src, dst);
221
222 default:
223 /* Unsupported. */
224 fprintf(stderr, "Unknown blend func %d\n", func);
225 return src;
226
227 }
228 }
229
230 static nir_ssa_def *
231 vc4_blend_func_i(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
232 unsigned func)
233 {
234 switch (func) {
235 case PIPE_BLEND_ADD:
236 return nir_usadd_4x8(b, src, dst);
237 case PIPE_BLEND_SUBTRACT:
238 return nir_ussub_4x8(b, src, dst);
239 case PIPE_BLEND_REVERSE_SUBTRACT:
240 return nir_ussub_4x8(b, dst, src);
241 case PIPE_BLEND_MIN:
242 return nir_umin_4x8(b, src, dst);
243 case PIPE_BLEND_MAX:
244 return nir_umax_4x8(b, src, dst);
245
246 default:
247 /* Unsupported. */
248 fprintf(stderr, "Unknown blend func %d\n", func);
249 return src;
250
251 }
252 }
253
254 static void
255 vc4_do_blending_f(struct vc4_compile *c, nir_builder *b, nir_ssa_def **result,
256 nir_ssa_def **src_color, nir_ssa_def **dst_color)
257 {
258 struct pipe_rt_blend_state *blend = &c->fs_key->blend;
259
260 if (!blend->blend_enable) {
261 for (int i = 0; i < 4; i++)
262 result[i] = src_color[i];
263 return;
264 }
265
266 /* Clamp the src color to [0, 1]. Dest is already clamped. */
267 for (int i = 0; i < 4; i++)
268 src_color[i] = nir_fsat(b, src_color[i]);
269
270 nir_ssa_def *src_blend[4], *dst_blend[4];
271 for (int i = 0; i < 4; i++) {
272 int src_factor = ((i != 3) ? blend->rgb_src_factor :
273 blend->alpha_src_factor);
274 int dst_factor = ((i != 3) ? blend->rgb_dst_factor :
275 blend->alpha_dst_factor);
276 src_blend[i] = nir_fmul(b, src_color[i],
277 vc4_blend_channel_f(b,
278 src_color, dst_color,
279 src_factor, i));
280 dst_blend[i] = nir_fmul(b, dst_color[i],
281 vc4_blend_channel_f(b,
282 src_color, dst_color,
283 dst_factor, i));
284 }
285
286 for (int i = 0; i < 4; i++) {
287 result[i] = vc4_blend_func_f(b, src_blend[i], dst_blend[i],
288 ((i != 3) ? blend->rgb_func :
289 blend->alpha_func));
290 }
291 }
292
293 static nir_ssa_def *
294 vc4_nir_splat(nir_builder *b, nir_ssa_def *src)
295 {
296 nir_ssa_def *or1 = nir_ior(b, src, nir_ishl(b, src, nir_imm_int(b, 8)));
297 return nir_ior(b, or1, nir_ishl(b, or1, nir_imm_int(b, 16)));
298 }
299
300 static nir_ssa_def *
301 vc4_do_blending_i(struct vc4_compile *c, nir_builder *b,
302 nir_ssa_def *src_color, nir_ssa_def *dst_color,
303 nir_ssa_def *src_float_a)
304 {
305 struct pipe_rt_blend_state *blend = &c->fs_key->blend;
306
307 if (!blend->blend_enable)
308 return src_color;
309
310 enum pipe_format color_format = c->fs_key->color_format;
311 const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
312 nir_ssa_def *imm_0xff = nir_imm_int(b, 0xff);
313 nir_ssa_def *src_a = nir_pack_unorm_4x8(b, src_float_a);
314 nir_ssa_def *dst_a;
315 int alpha_chan;
316 for (alpha_chan = 0; alpha_chan < 4; alpha_chan++) {
317 if (format_swiz[alpha_chan] == 3)
318 break;
319 }
320 if (alpha_chan != 4) {
321 nir_ssa_def *shift = nir_imm_int(b, alpha_chan * 8);
322 dst_a = vc4_nir_splat(b, nir_iand(b, nir_ushr(b, dst_color,
323 shift), imm_0xff));
324 } else {
325 dst_a = nir_imm_int(b, ~0);
326 }
327
328 nir_ssa_def *src_factor = vc4_blend_channel_i(b,
329 src_color, dst_color,
330 src_a, dst_a,
331 blend->rgb_src_factor,
332 alpha_chan);
333 nir_ssa_def *dst_factor = vc4_blend_channel_i(b,
334 src_color, dst_color,
335 src_a, dst_a,
336 blend->rgb_dst_factor,
337 alpha_chan);
338
339 if (alpha_chan != 4 &&
340 blend->alpha_src_factor != blend->rgb_src_factor) {
341 nir_ssa_def *src_alpha_factor =
342 vc4_blend_channel_i(b,
343 src_color, dst_color,
344 src_a, dst_a,
345 blend->alpha_src_factor,
346 alpha_chan);
347 src_factor = vc4_nir_set_packed_chan(b, src_factor,
348 src_alpha_factor,
349 alpha_chan);
350 }
351 if (alpha_chan != 4 &&
352 blend->alpha_dst_factor != blend->rgb_dst_factor) {
353 nir_ssa_def *dst_alpha_factor =
354 vc4_blend_channel_i(b,
355 src_color, dst_color,
356 src_a, dst_a,
357 blend->alpha_dst_factor,
358 alpha_chan);
359 dst_factor = vc4_nir_set_packed_chan(b, dst_factor,
360 dst_alpha_factor,
361 alpha_chan);
362 }
363 nir_ssa_def *src_blend = nir_umul_unorm_4x8(b, src_color, src_factor);
364 nir_ssa_def *dst_blend = nir_umul_unorm_4x8(b, dst_color, dst_factor);
365
366 nir_ssa_def *result =
367 vc4_blend_func_i(b, src_blend, dst_blend, blend->rgb_func);
368 if (alpha_chan != 4 && blend->alpha_func != blend->rgb_func) {
369 nir_ssa_def *result_a = vc4_blend_func_i(b,
370 src_blend,
371 dst_blend,
372 blend->alpha_func);
373 result = vc4_nir_set_packed_chan(b, result, result_a,
374 alpha_chan);
375 }
376 return result;
377 }
378
379 static nir_ssa_def *
380 vc4_logicop(nir_builder *b, int logicop_func,
381 nir_ssa_def *src, nir_ssa_def *dst)
382 {
383 switch (logicop_func) {
384 case PIPE_LOGICOP_CLEAR:
385 return nir_imm_int(b, 0);
386 case PIPE_LOGICOP_NOR:
387 return nir_inot(b, nir_ior(b, src, dst));
388 case PIPE_LOGICOP_AND_INVERTED:
389 return nir_iand(b, nir_inot(b, src), dst);
390 case PIPE_LOGICOP_COPY_INVERTED:
391 return nir_inot(b, src);
392 case PIPE_LOGICOP_AND_REVERSE:
393 return nir_iand(b, src, nir_inot(b, dst));
394 case PIPE_LOGICOP_INVERT:
395 return nir_inot(b, dst);
396 case PIPE_LOGICOP_XOR:
397 return nir_ixor(b, src, dst);
398 case PIPE_LOGICOP_NAND:
399 return nir_inot(b, nir_iand(b, src, dst));
400 case PIPE_LOGICOP_AND:
401 return nir_iand(b, src, dst);
402 case PIPE_LOGICOP_EQUIV:
403 return nir_inot(b, nir_ixor(b, src, dst));
404 case PIPE_LOGICOP_NOOP:
405 return dst;
406 case PIPE_LOGICOP_OR_INVERTED:
407 return nir_ior(b, nir_inot(b, src), dst);
408 case PIPE_LOGICOP_OR_REVERSE:
409 return nir_ior(b, src, nir_inot(b, dst));
410 case PIPE_LOGICOP_OR:
411 return nir_ior(b, src, dst);
412 case PIPE_LOGICOP_SET:
413 return nir_imm_int(b, ~0);
414 default:
415 fprintf(stderr, "Unknown logic op %d\n", logicop_func);
416 /* FALLTHROUGH */
417 case PIPE_LOGICOP_COPY:
418 return src;
419 }
420 }
421
422 static nir_ssa_def *
423 vc4_nir_swizzle_and_pack(struct vc4_compile *c, nir_builder *b,
424 nir_ssa_def **colors)
425 {
426 enum pipe_format color_format = c->fs_key->color_format;
427 const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
428
429 nir_ssa_def *swizzled[4];
430 for (int i = 0; i < 4; i++) {
431 swizzled[i] = vc4_nir_get_swizzled_channel(b, colors,
432 format_swiz[i]);
433 }
434
435 return nir_pack_unorm_4x8(b,
436 nir_vec4(b,
437 swizzled[0], swizzled[1],
438 swizzled[2], swizzled[3]));
439
440 }
441
442 static nir_ssa_def *
443 vc4_nir_blend_pipeline(struct vc4_compile *c, nir_builder *b, nir_ssa_def *src,
444 int sample)
445 {
446 enum pipe_format color_format = c->fs_key->color_format;
447 const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
448 bool srgb = util_format_is_srgb(color_format);
449
450 /* Pull out the float src/dst color components. */
451 nir_ssa_def *packed_dst_color = vc4_nir_get_dst_color(b, sample);
452 nir_ssa_def *dst_vec4 = nir_unpack_unorm_4x8(b, packed_dst_color);
453 nir_ssa_def *src_color[4], *unpacked_dst_color[4];
454 for (unsigned i = 0; i < 4; i++) {
455 src_color[i] = nir_channel(b, src, i);
456 unpacked_dst_color[i] = nir_channel(b, dst_vec4, i);
457 }
458
459 if (c->fs_key->sample_alpha_to_one && c->fs_key->msaa)
460 src_color[3] = nir_imm_float(b, 1.0);
461
462 nir_ssa_def *packed_color;
463 if (srgb) {
464 /* Unswizzle the destination color. */
465 nir_ssa_def *dst_color[4];
466 for (unsigned i = 0; i < 4; i++) {
467 dst_color[i] = vc4_nir_get_swizzled_channel(b,
468 unpacked_dst_color,
469 format_swiz[i]);
470 }
471
472 /* Turn dst color to linear. */
473 for (int i = 0; i < 3; i++)
474 dst_color[i] = nir_format_srgb_to_linear(b, dst_color[i]);
475
476 nir_ssa_def *blend_color[4];
477 vc4_do_blending_f(c, b, blend_color, src_color, dst_color);
478
479 /* sRGB encode the output color */
480 for (int i = 0; i < 3; i++)
481 blend_color[i] = nir_format_linear_to_srgb(b, blend_color[i]);
482
483 packed_color = vc4_nir_swizzle_and_pack(c, b, blend_color);
484 } else {
485 nir_ssa_def *packed_src_color =
486 vc4_nir_swizzle_and_pack(c, b, src_color);
487
488 packed_color =
489 vc4_do_blending_i(c, b,
490 packed_src_color, packed_dst_color,
491 src_color[3]);
492 }
493
494 packed_color = vc4_logicop(b, c->fs_key->logicop_func,
495 packed_color, packed_dst_color);
496
497 /* If the bit isn't set in the color mask, then just return the
498 * original dst color, instead.
499 */
500 uint32_t colormask = 0xffffffff;
501 for (int i = 0; i < 4; i++) {
502 if (format_swiz[i] < 4 &&
503 !(c->fs_key->blend.colormask & (1 << format_swiz[i]))) {
504 colormask &= ~(0xff << (i * 8));
505 }
506 }
507
508 return nir_ior(b,
509 nir_iand(b, packed_color,
510 nir_imm_int(b, colormask)),
511 nir_iand(b, packed_dst_color,
512 nir_imm_int(b, ~colormask)));
513 }
514
515 static int
516 vc4_nir_next_output_driver_location(nir_shader *s)
517 {
518 int maxloc = -1;
519
520 nir_foreach_variable(var, &s->outputs)
521 maxloc = MAX2(maxloc, (int)var->data.driver_location);
522
523 return maxloc + 1;
524 }
525
526 static void
527 vc4_nir_store_sample_mask(struct vc4_compile *c, nir_builder *b,
528 nir_ssa_def *val)
529 {
530 nir_variable *sample_mask = nir_variable_create(c->s, nir_var_shader_out,
531 glsl_uint_type(),
532 "sample_mask");
533 sample_mask->data.driver_location =
534 vc4_nir_next_output_driver_location(c->s);
535 sample_mask->data.location = FRAG_RESULT_SAMPLE_MASK;
536
537 nir_intrinsic_instr *intr =
538 nir_intrinsic_instr_create(c->s, nir_intrinsic_store_output);
539 intr->num_components = 1;
540 nir_intrinsic_set_base(intr, sample_mask->data.driver_location);
541
542 intr->src[0] = nir_src_for_ssa(val);
543 intr->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
544 nir_builder_instr_insert(b, &intr->instr);
545 }
546
547 static void
548 vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,
549 nir_intrinsic_instr *intr)
550 {
551 nir_ssa_def *frag_color = intr->src[0].ssa;
552
553 if (c->fs_key->sample_alpha_to_coverage) {
554 nir_ssa_def *a = nir_channel(b, frag_color, 3);
555
556 /* XXX: We should do a nice dither based on the fragment
557 * coordinate, instead.
558 */
559 nir_ssa_def *num_samples = nir_imm_float(b, VC4_MAX_SAMPLES);
560 nir_ssa_def *num_bits = nir_f2i32(b, nir_fmul(b, a, num_samples));
561 nir_ssa_def *bitmask = nir_isub(b,
562 nir_ishl(b,
563 nir_imm_int(b, 1),
564 num_bits),
565 nir_imm_int(b, 1));
566 vc4_nir_store_sample_mask(c, b, bitmask);
567 }
568
569 /* The TLB color read returns each sample in turn, so if our blending
570 * depends on the destination color, we're going to have to run the
571 * blending function separately for each destination sample value, and
572 * then output the per-sample color using TLB_COLOR_MS.
573 */
574 nir_ssa_def *blend_output;
575 if (c->fs_key->msaa && blend_depends_on_dst_color(c)) {
576 c->msaa_per_sample_output = true;
577
578 nir_ssa_def *samples[4];
579 for (int i = 0; i < VC4_MAX_SAMPLES; i++)
580 samples[i] = vc4_nir_blend_pipeline(c, b, frag_color, i);
581 blend_output = nir_vec4(b,
582 samples[0], samples[1],
583 samples[2], samples[3]);
584 } else {
585 blend_output = vc4_nir_blend_pipeline(c, b, frag_color, 0);
586 }
587
588 nir_instr_rewrite_src(&intr->instr, &intr->src[0],
589 nir_src_for_ssa(blend_output));
590 intr->num_components = blend_output->num_components;
591 }
592
593 static bool
594 vc4_nir_lower_blend_block(nir_block *block, struct vc4_compile *c)
595 {
596 nir_foreach_instr_safe(instr, block) {
597 if (instr->type != nir_instr_type_intrinsic)
598 continue;
599 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
600 if (intr->intrinsic != nir_intrinsic_store_output)
601 continue;
602
603 nir_variable *output_var = NULL;
604 nir_foreach_variable(var, &c->s->outputs) {
605 if (var->data.driver_location ==
606 nir_intrinsic_base(intr)) {
607 output_var = var;
608 break;
609 }
610 }
611 assert(output_var);
612
613 if (output_var->data.location != FRAG_RESULT_COLOR &&
614 output_var->data.location != FRAG_RESULT_DATA0) {
615 continue;
616 }
617
618 nir_function_impl *impl =
619 nir_cf_node_get_function(&block->cf_node);
620 nir_builder b;
621 nir_builder_init(&b, impl);
622 b.cursor = nir_before_instr(&intr->instr);
623 vc4_nir_lower_blend_instr(c, &b, intr);
624 }
625 return true;
626 }
627
628 void
629 vc4_nir_lower_blend(nir_shader *s, struct vc4_compile *c)
630 {
631 nir_foreach_function(function, s) {
632 if (function->impl) {
633 nir_foreach_block(block, function->impl) {
634 vc4_nir_lower_blend_block(block, c);
635 }
636
637 nir_metadata_preserve(function->impl,
638 nir_metadata_block_index |
639 nir_metadata_dominance);
640 }
641 }
642
643 /* If we didn't do alpha-to-coverage on the output color, we still
644 * need to pass glSampleMask() through.
645 */
646 if (c->fs_key->sample_coverage && !c->fs_key->sample_alpha_to_coverage) {
647 nir_function_impl *impl = nir_shader_get_entrypoint(s);
648 nir_builder b;
649 nir_builder_init(&b, impl);
650 b.cursor = nir_after_block(nir_impl_last_block(impl));
651
652 vc4_nir_store_sample_mask(c, &b, nir_load_sample_mask_in(&b));
653 }
654 }