nir: Get rid of *_indirect variants of input/output load/store intrinsics
[mesa.git] / src / gallium / drivers / vc4 / vc4_nir_lower_blend.c
1 /*
2 * Copyright © 2015 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /**
25 * Implements most of the fixed function fragment pipeline in shader code.
26 *
27 * VC4 doesn't have any hardware support for blending, alpha test, logic ops,
28 * or color mask. Instead, you read the current contents of the destination
29 * from the tile buffer after having waited for the scoreboard (which is
30 * handled by vc4_qpu_emit.c), then do math using your output color and that
31 * destination value, and update the output color appropriately.
32 *
33 * Once this pass is done, the color write will either have one component (for
34 * single sample) with packed argb8888, or 4 components with the per-sample
35 * argb8888 result.
36 */
37
38 /**
39 * Lowers fixed-function blending to a load of the destination color and a
40 * series of ALU operations before the store of the output.
41 */
42 #include "util/u_format.h"
43 #include "vc4_qir.h"
44 #include "glsl/nir/nir_builder.h"
45 #include "vc4_context.h"
46
47 static bool
48 blend_depends_on_dst_color(struct vc4_compile *c)
49 {
50 return (c->fs_key->blend.blend_enable ||
51 c->fs_key->blend.colormask != 0xf ||
52 c->fs_key->logicop_func != PIPE_LOGICOP_COPY);
53 }
54
55 /** Emits a load of the previous fragment color from the tile buffer. */
56 static nir_ssa_def *
57 vc4_nir_get_dst_color(nir_builder *b, int sample)
58 {
59 nir_intrinsic_instr *load =
60 nir_intrinsic_instr_create(b->shader,
61 nir_intrinsic_load_input);
62 load->num_components = 1;
63 load->const_index[0] = VC4_NIR_TLB_COLOR_READ_INPUT + sample;
64 load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
65 nir_ssa_dest_init(&load->instr, &load->dest, 1, NULL);
66 nir_builder_instr_insert(b, &load->instr);
67 return &load->dest.ssa;
68 }
69
70 static nir_ssa_def *
71 vc4_nir_srgb_decode(nir_builder *b, nir_ssa_def *srgb)
72 {
73 nir_ssa_def *is_low = nir_flt(b, srgb, nir_imm_float(b, 0.04045));
74 nir_ssa_def *low = nir_fmul(b, srgb, nir_imm_float(b, 1.0 / 12.92));
75 nir_ssa_def *high = nir_fpow(b,
76 nir_fmul(b,
77 nir_fadd(b, srgb,
78 nir_imm_float(b, 0.055)),
79 nir_imm_float(b, 1.0 / 1.055)),
80 nir_imm_float(b, 2.4));
81
82 return nir_bcsel(b, is_low, low, high);
83 }
84
85 static nir_ssa_def *
86 vc4_nir_srgb_encode(nir_builder *b, nir_ssa_def *linear)
87 {
88 nir_ssa_def *is_low = nir_flt(b, linear, nir_imm_float(b, 0.0031308));
89 nir_ssa_def *low = nir_fmul(b, linear, nir_imm_float(b, 12.92));
90 nir_ssa_def *high = nir_fsub(b,
91 nir_fmul(b,
92 nir_imm_float(b, 1.055),
93 nir_fpow(b,
94 linear,
95 nir_imm_float(b, 0.41666))),
96 nir_imm_float(b, 0.055));
97
98 return nir_bcsel(b, is_low, low, high);
99 }
100
101 static nir_ssa_def *
102 vc4_blend_channel_f(nir_builder *b,
103 nir_ssa_def **src,
104 nir_ssa_def **dst,
105 unsigned factor,
106 int channel)
107 {
108 switch(factor) {
109 case PIPE_BLENDFACTOR_ONE:
110 return nir_imm_float(b, 1.0);
111 case PIPE_BLENDFACTOR_SRC_COLOR:
112 return src[channel];
113 case PIPE_BLENDFACTOR_SRC_ALPHA:
114 return src[3];
115 case PIPE_BLENDFACTOR_DST_ALPHA:
116 return dst[3];
117 case PIPE_BLENDFACTOR_DST_COLOR:
118 return dst[channel];
119 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
120 if (channel != 3) {
121 return nir_fmin(b,
122 src[3],
123 nir_fsub(b,
124 nir_imm_float(b, 1.0),
125 dst[3]));
126 } else {
127 return nir_imm_float(b, 1.0);
128 }
129 case PIPE_BLENDFACTOR_CONST_COLOR:
130 return vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_X + channel);
131 case PIPE_BLENDFACTOR_CONST_ALPHA:
132 return vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_W);
133 case PIPE_BLENDFACTOR_ZERO:
134 return nir_imm_float(b, 0.0);
135 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
136 return nir_fsub(b, nir_imm_float(b, 1.0), src[channel]);
137 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
138 return nir_fsub(b, nir_imm_float(b, 1.0), src[3]);
139 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
140 return nir_fsub(b, nir_imm_float(b, 1.0), dst[3]);
141 case PIPE_BLENDFACTOR_INV_DST_COLOR:
142 return nir_fsub(b, nir_imm_float(b, 1.0), dst[channel]);
143 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
144 return nir_fsub(b, nir_imm_float(b, 1.0),
145 vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_X + channel));
146 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
147 return nir_fsub(b, nir_imm_float(b, 1.0),
148 vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_W));
149
150 default:
151 case PIPE_BLENDFACTOR_SRC1_COLOR:
152 case PIPE_BLENDFACTOR_SRC1_ALPHA:
153 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
154 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
155 /* Unsupported. */
156 fprintf(stderr, "Unknown blend factor %d\n", factor);
157 return nir_imm_float(b, 1.0);
158 }
159 }
160
161 static nir_ssa_def *
162 vc4_nir_set_packed_chan(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1,
163 int chan)
164 {
165 unsigned chan_mask = 0xff << (chan * 8);
166 return nir_ior(b,
167 nir_iand(b, src0, nir_imm_int(b, ~chan_mask)),
168 nir_iand(b, src1, nir_imm_int(b, chan_mask)));
169 }
170
171 static nir_ssa_def *
172 vc4_blend_channel_i(nir_builder *b,
173 nir_ssa_def *src,
174 nir_ssa_def *dst,
175 nir_ssa_def *src_a,
176 nir_ssa_def *dst_a,
177 unsigned factor,
178 int a_chan)
179 {
180 switch (factor) {
181 case PIPE_BLENDFACTOR_ONE:
182 return nir_imm_int(b, ~0);
183 case PIPE_BLENDFACTOR_SRC_COLOR:
184 return src;
185 case PIPE_BLENDFACTOR_SRC_ALPHA:
186 return src_a;
187 case PIPE_BLENDFACTOR_DST_ALPHA:
188 return dst_a;
189 case PIPE_BLENDFACTOR_DST_COLOR:
190 return dst;
191 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
192 return vc4_nir_set_packed_chan(b,
193 nir_umin_4x8(b,
194 src_a,
195 nir_inot(b, dst_a)),
196 nir_imm_int(b, ~0),
197 a_chan);
198 case PIPE_BLENDFACTOR_CONST_COLOR:
199 return vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_RGBA);
200 case PIPE_BLENDFACTOR_CONST_ALPHA:
201 return vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_AAAA);
202 case PIPE_BLENDFACTOR_ZERO:
203 return nir_imm_int(b, 0);
204 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
205 return nir_inot(b, src);
206 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
207 return nir_inot(b, src_a);
208 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
209 return nir_inot(b, dst_a);
210 case PIPE_BLENDFACTOR_INV_DST_COLOR:
211 return nir_inot(b, dst);
212 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
213 return nir_inot(b, vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_RGBA));
214 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
215 return nir_inot(b, vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_AAAA));
216
217 default:
218 case PIPE_BLENDFACTOR_SRC1_COLOR:
219 case PIPE_BLENDFACTOR_SRC1_ALPHA:
220 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
221 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
222 /* Unsupported. */
223 fprintf(stderr, "Unknown blend factor %d\n", factor);
224 return nir_imm_int(b, ~0);
225 }
226 }
227
228 static nir_ssa_def *
229 vc4_blend_func_f(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
230 unsigned func)
231 {
232 switch (func) {
233 case PIPE_BLEND_ADD:
234 return nir_fadd(b, src, dst);
235 case PIPE_BLEND_SUBTRACT:
236 return nir_fsub(b, src, dst);
237 case PIPE_BLEND_REVERSE_SUBTRACT:
238 return nir_fsub(b, dst, src);
239 case PIPE_BLEND_MIN:
240 return nir_fmin(b, src, dst);
241 case PIPE_BLEND_MAX:
242 return nir_fmax(b, src, dst);
243
244 default:
245 /* Unsupported. */
246 fprintf(stderr, "Unknown blend func %d\n", func);
247 return src;
248
249 }
250 }
251
252 static nir_ssa_def *
253 vc4_blend_func_i(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
254 unsigned func)
255 {
256 switch (func) {
257 case PIPE_BLEND_ADD:
258 return nir_usadd_4x8(b, src, dst);
259 case PIPE_BLEND_SUBTRACT:
260 return nir_ussub_4x8(b, src, dst);
261 case PIPE_BLEND_REVERSE_SUBTRACT:
262 return nir_ussub_4x8(b, dst, src);
263 case PIPE_BLEND_MIN:
264 return nir_umin_4x8(b, src, dst);
265 case PIPE_BLEND_MAX:
266 return nir_umax_4x8(b, src, dst);
267
268 default:
269 /* Unsupported. */
270 fprintf(stderr, "Unknown blend func %d\n", func);
271 return src;
272
273 }
274 }
275
276 static void
277 vc4_do_blending_f(struct vc4_compile *c, nir_builder *b, nir_ssa_def **result,
278 nir_ssa_def **src_color, nir_ssa_def **dst_color)
279 {
280 struct pipe_rt_blend_state *blend = &c->fs_key->blend;
281
282 if (!blend->blend_enable) {
283 for (int i = 0; i < 4; i++)
284 result[i] = src_color[i];
285 return;
286 }
287
288 /* Clamp the src color to [0, 1]. Dest is already clamped. */
289 for (int i = 0; i < 4; i++)
290 src_color[i] = nir_fsat(b, src_color[i]);
291
292 nir_ssa_def *src_blend[4], *dst_blend[4];
293 for (int i = 0; i < 4; i++) {
294 int src_factor = ((i != 3) ? blend->rgb_src_factor :
295 blend->alpha_src_factor);
296 int dst_factor = ((i != 3) ? blend->rgb_dst_factor :
297 blend->alpha_dst_factor);
298 src_blend[i] = nir_fmul(b, src_color[i],
299 vc4_blend_channel_f(b,
300 src_color, dst_color,
301 src_factor, i));
302 dst_blend[i] = nir_fmul(b, dst_color[i],
303 vc4_blend_channel_f(b,
304 src_color, dst_color,
305 dst_factor, i));
306 }
307
308 for (int i = 0; i < 4; i++) {
309 result[i] = vc4_blend_func_f(b, src_blend[i], dst_blend[i],
310 ((i != 3) ? blend->rgb_func :
311 blend->alpha_func));
312 }
313 }
314
315 static nir_ssa_def *
316 vc4_nir_splat(nir_builder *b, nir_ssa_def *src)
317 {
318 nir_ssa_def *or1 = nir_ior(b, src, nir_ishl(b, src, nir_imm_int(b, 8)));
319 return nir_ior(b, or1, nir_ishl(b, or1, nir_imm_int(b, 16)));
320 }
321
322 static nir_ssa_def *
323 vc4_do_blending_i(struct vc4_compile *c, nir_builder *b,
324 nir_ssa_def *src_color, nir_ssa_def *dst_color,
325 nir_ssa_def *src_float_a)
326 {
327 struct pipe_rt_blend_state *blend = &c->fs_key->blend;
328
329 if (!blend->blend_enable)
330 return src_color;
331
332 enum pipe_format color_format = c->fs_key->color_format;
333 const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
334 nir_ssa_def *imm_0xff = nir_imm_int(b, 0xff);
335 nir_ssa_def *src_a = nir_pack_unorm_4x8(b, src_float_a);
336 nir_ssa_def *dst_a;
337 int alpha_chan;
338 for (alpha_chan = 0; alpha_chan < 4; alpha_chan++) {
339 if (format_swiz[alpha_chan] == 3)
340 break;
341 }
342 if (alpha_chan != 4) {
343 nir_ssa_def *shift = nir_imm_int(b, alpha_chan * 8);
344 dst_a = vc4_nir_splat(b, nir_iand(b, nir_ushr(b, dst_color,
345 shift), imm_0xff));
346 } else {
347 dst_a = nir_imm_int(b, ~0);
348 }
349
350 nir_ssa_def *src_factor = vc4_blend_channel_i(b,
351 src_color, dst_color,
352 src_a, dst_a,
353 blend->rgb_src_factor,
354 alpha_chan);
355 nir_ssa_def *dst_factor = vc4_blend_channel_i(b,
356 src_color, dst_color,
357 src_a, dst_a,
358 blend->rgb_dst_factor,
359 alpha_chan);
360
361 if (alpha_chan != 4 &&
362 blend->alpha_src_factor != blend->rgb_src_factor) {
363 nir_ssa_def *src_alpha_factor =
364 vc4_blend_channel_i(b,
365 src_color, dst_color,
366 src_a, dst_a,
367 blend->alpha_src_factor,
368 alpha_chan);
369 src_factor = vc4_nir_set_packed_chan(b, src_factor,
370 src_alpha_factor,
371 alpha_chan);
372 }
373 if (alpha_chan != 4 &&
374 blend->alpha_dst_factor != blend->rgb_dst_factor) {
375 nir_ssa_def *dst_alpha_factor =
376 vc4_blend_channel_i(b,
377 src_color, dst_color,
378 src_a, dst_a,
379 blend->alpha_dst_factor,
380 alpha_chan);
381 dst_factor = vc4_nir_set_packed_chan(b, dst_factor,
382 dst_alpha_factor,
383 alpha_chan);
384 }
385 nir_ssa_def *src_blend = nir_umul_unorm_4x8(b, src_color, src_factor);
386 nir_ssa_def *dst_blend = nir_umul_unorm_4x8(b, dst_color, dst_factor);
387
388 nir_ssa_def *result =
389 vc4_blend_func_i(b, src_blend, dst_blend, blend->rgb_func);
390 if (alpha_chan != 4 && blend->alpha_func != blend->rgb_func) {
391 nir_ssa_def *result_a = vc4_blend_func_i(b,
392 src_blend,
393 dst_blend,
394 blend->alpha_func);
395 result = vc4_nir_set_packed_chan(b, result, result_a,
396 alpha_chan);
397 }
398 return result;
399 }
400
401 static nir_ssa_def *
402 vc4_logicop(nir_builder *b, int logicop_func,
403 nir_ssa_def *src, nir_ssa_def *dst)
404 {
405 switch (logicop_func) {
406 case PIPE_LOGICOP_CLEAR:
407 return nir_imm_int(b, 0);
408 case PIPE_LOGICOP_NOR:
409 return nir_inot(b, nir_ior(b, src, dst));
410 case PIPE_LOGICOP_AND_INVERTED:
411 return nir_iand(b, nir_inot(b, src), dst);
412 case PIPE_LOGICOP_COPY_INVERTED:
413 return nir_inot(b, src);
414 case PIPE_LOGICOP_AND_REVERSE:
415 return nir_iand(b, src, nir_inot(b, dst));
416 case PIPE_LOGICOP_INVERT:
417 return nir_inot(b, dst);
418 case PIPE_LOGICOP_XOR:
419 return nir_ixor(b, src, dst);
420 case PIPE_LOGICOP_NAND:
421 return nir_inot(b, nir_iand(b, src, dst));
422 case PIPE_LOGICOP_AND:
423 return nir_iand(b, src, dst);
424 case PIPE_LOGICOP_EQUIV:
425 return nir_inot(b, nir_ixor(b, src, dst));
426 case PIPE_LOGICOP_NOOP:
427 return dst;
428 case PIPE_LOGICOP_OR_INVERTED:
429 return nir_ior(b, nir_inot(b, src), dst);
430 case PIPE_LOGICOP_OR_REVERSE:
431 return nir_ior(b, src, nir_inot(b, dst));
432 case PIPE_LOGICOP_OR:
433 return nir_ior(b, src, dst);
434 case PIPE_LOGICOP_SET:
435 return nir_imm_int(b, ~0);
436 default:
437 fprintf(stderr, "Unknown logic op %d\n", logicop_func);
438 /* FALLTHROUGH */
439 case PIPE_LOGICOP_COPY:
440 return src;
441 }
442 }
443
444 static nir_ssa_def *
445 vc4_nir_pipe_compare_func(nir_builder *b, int func,
446 nir_ssa_def *src0, nir_ssa_def *src1)
447 {
448 switch (func) {
449 default:
450 fprintf(stderr, "Unknown compare func %d\n", func);
451 /* FALLTHROUGH */
452 case PIPE_FUNC_NEVER:
453 return nir_imm_int(b, 0);
454 case PIPE_FUNC_ALWAYS:
455 return nir_imm_int(b, ~0);
456 case PIPE_FUNC_EQUAL:
457 return nir_feq(b, src0, src1);
458 case PIPE_FUNC_NOTEQUAL:
459 return nir_fne(b, src0, src1);
460 case PIPE_FUNC_GREATER:
461 return nir_flt(b, src1, src0);
462 case PIPE_FUNC_GEQUAL:
463 return nir_fge(b, src0, src1);
464 case PIPE_FUNC_LESS:
465 return nir_flt(b, src0, src1);
466 case PIPE_FUNC_LEQUAL:
467 return nir_fge(b, src1, src0);
468 }
469 }
470
471 static void
472 vc4_nir_emit_alpha_test_discard(struct vc4_compile *c, nir_builder *b,
473 nir_ssa_def *alpha)
474 {
475 if (!c->fs_key->alpha_test)
476 return;
477
478 nir_ssa_def *alpha_ref =
479 vc4_nir_get_state_uniform(b, QUNIFORM_ALPHA_REF);
480 nir_ssa_def *condition =
481 vc4_nir_pipe_compare_func(b, c->fs_key->alpha_test_func,
482 alpha, alpha_ref);
483
484 nir_intrinsic_instr *discard =
485 nir_intrinsic_instr_create(b->shader,
486 nir_intrinsic_discard_if);
487 discard->num_components = 1;
488 discard->src[0] = nir_src_for_ssa(nir_inot(b, condition));
489 nir_builder_instr_insert(b, &discard->instr);
490 }
491
492 static nir_ssa_def *
493 vc4_nir_swizzle_and_pack(struct vc4_compile *c, nir_builder *b,
494 nir_ssa_def **colors)
495 {
496 enum pipe_format color_format = c->fs_key->color_format;
497 const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
498
499 nir_ssa_def *swizzled[4];
500 for (int i = 0; i < 4; i++) {
501 swizzled[i] = vc4_nir_get_swizzled_channel(b, colors,
502 format_swiz[i]);
503 }
504
505 return nir_pack_unorm_4x8(b,
506 nir_vec4(b,
507 swizzled[0], swizzled[1],
508 swizzled[2], swizzled[3]));
509
510 }
511
512 static nir_ssa_def *
513 vc4_nir_blend_pipeline(struct vc4_compile *c, nir_builder *b, nir_ssa_def *src,
514 int sample)
515 {
516 enum pipe_format color_format = c->fs_key->color_format;
517 const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
518 bool srgb = util_format_is_srgb(color_format);
519
520 /* Pull out the float src/dst color components. */
521 nir_ssa_def *packed_dst_color = vc4_nir_get_dst_color(b, sample);
522 nir_ssa_def *dst_vec4 = nir_unpack_unorm_4x8(b, packed_dst_color);
523 nir_ssa_def *src_color[4], *unpacked_dst_color[4];
524 for (unsigned i = 0; i < 4; i++) {
525 src_color[i] = nir_channel(b, src, i);
526 unpacked_dst_color[i] = nir_channel(b, dst_vec4, i);
527 }
528
529 if (c->fs_key->sample_alpha_to_one && c->fs_key->msaa)
530 src_color[3] = nir_imm_float(b, 1.0);
531
532 vc4_nir_emit_alpha_test_discard(c, b, src_color[3]);
533
534 nir_ssa_def *packed_color;
535 if (srgb) {
536 /* Unswizzle the destination color. */
537 nir_ssa_def *dst_color[4];
538 for (unsigned i = 0; i < 4; i++) {
539 dst_color[i] = vc4_nir_get_swizzled_channel(b,
540 unpacked_dst_color,
541 format_swiz[i]);
542 }
543
544 /* Turn dst color to linear. */
545 for (int i = 0; i < 3; i++)
546 dst_color[i] = vc4_nir_srgb_decode(b, dst_color[i]);
547
548 nir_ssa_def *blend_color[4];
549 vc4_do_blending_f(c, b, blend_color, src_color, dst_color);
550
551 /* sRGB encode the output color */
552 for (int i = 0; i < 3; i++)
553 blend_color[i] = vc4_nir_srgb_encode(b, blend_color[i]);
554
555 packed_color = vc4_nir_swizzle_and_pack(c, b, blend_color);
556 } else {
557 nir_ssa_def *packed_src_color =
558 vc4_nir_swizzle_and_pack(c, b, src_color);
559
560 packed_color =
561 vc4_do_blending_i(c, b,
562 packed_src_color, packed_dst_color,
563 src_color[3]);
564 }
565
566 packed_color = vc4_logicop(b, c->fs_key->logicop_func,
567 packed_color, packed_dst_color);
568
569 /* If the bit isn't set in the color mask, then just return the
570 * original dst color, instead.
571 */
572 uint32_t colormask = 0xffffffff;
573 for (int i = 0; i < 4; i++) {
574 if (format_swiz[i] < 4 &&
575 !(c->fs_key->blend.colormask & (1 << format_swiz[i]))) {
576 colormask &= ~(0xff << (i * 8));
577 }
578 }
579
580 return nir_ior(b,
581 nir_iand(b, packed_color,
582 nir_imm_int(b, colormask)),
583 nir_iand(b, packed_dst_color,
584 nir_imm_int(b, ~colormask)));
585 }
586
587 static int
588 vc4_nir_next_output_driver_location(nir_shader *s)
589 {
590 int maxloc = -1;
591
592 nir_foreach_variable(var, &s->inputs)
593 maxloc = MAX2(maxloc, var->data.driver_location);
594
595 return maxloc;
596 }
597
598 static void
599 vc4_nir_store_sample_mask(struct vc4_compile *c, nir_builder *b,
600 nir_ssa_def *val)
601 {
602 nir_variable *sample_mask = nir_variable_create(c->s, nir_var_shader_out,
603 glsl_uint_type(),
604 "sample_mask");
605 sample_mask->data.driver_location =
606 vc4_nir_next_output_driver_location(c->s);
607 sample_mask->data.location = FRAG_RESULT_SAMPLE_MASK;
608 exec_list_push_tail(&c->s->outputs, &sample_mask->node);
609
610 nir_intrinsic_instr *intr =
611 nir_intrinsic_instr_create(c->s, nir_intrinsic_store_output);
612 intr->num_components = 1;
613 intr->const_index[0] = sample_mask->data.location;
614
615 intr->src[0] = nir_src_for_ssa(val);
616 intr->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
617 nir_builder_instr_insert(b, &intr->instr);
618 }
619
620 static void
621 vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,
622 nir_intrinsic_instr *intr)
623 {
624 nir_ssa_def *frag_color = intr->src[0].ssa;
625
626 if (c->fs_key->sample_coverage) {
627 nir_intrinsic_instr *load =
628 nir_intrinsic_instr_create(b->shader,
629 nir_intrinsic_load_sample_mask_in);
630 load->num_components = 1;
631 nir_ssa_dest_init(&load->instr, &load->dest, 1, NULL);
632 nir_builder_instr_insert(b, &load->instr);
633
634 nir_ssa_def *bitmask = &load->dest.ssa;
635
636 vc4_nir_store_sample_mask(c, b, bitmask);
637 } else if (c->fs_key->sample_alpha_to_coverage) {
638 nir_ssa_def *a = nir_channel(b, frag_color, 3);
639
640 /* XXX: We should do a nice dither based on the fragment
641 * coordinate, instead.
642 */
643 nir_ssa_def *num_samples = nir_imm_float(b, VC4_MAX_SAMPLES);
644 nir_ssa_def *num_bits = nir_f2i(b, nir_fmul(b, a, num_samples));
645 nir_ssa_def *bitmask = nir_isub(b,
646 nir_ishl(b,
647 nir_imm_int(b, 1),
648 num_bits),
649 nir_imm_int(b, 1));
650 vc4_nir_store_sample_mask(c, b, bitmask);
651 }
652
653 /* The TLB color read returns each sample in turn, so if our blending
654 * depends on the destination color, we're going to have to run the
655 * blending function separately for each destination sample value, and
656 * then output the per-sample color using TLB_COLOR_MS.
657 */
658 nir_ssa_def *blend_output;
659 if (c->fs_key->msaa && blend_depends_on_dst_color(c)) {
660 c->msaa_per_sample_output = true;
661
662 nir_ssa_def *samples[4];
663 for (int i = 0; i < VC4_MAX_SAMPLES; i++)
664 samples[i] = vc4_nir_blend_pipeline(c, b, frag_color, i);
665 blend_output = nir_vec4(b,
666 samples[0], samples[1],
667 samples[2], samples[3]);
668 } else {
669 blend_output = vc4_nir_blend_pipeline(c, b, frag_color, 0);
670 }
671
672 nir_instr_rewrite_src(&intr->instr, &intr->src[0],
673 nir_src_for_ssa(blend_output));
674 intr->num_components = blend_output->num_components;
675 }
676
677 static bool
678 vc4_nir_lower_blend_block(nir_block *block, void *state)
679 {
680 struct vc4_compile *c = state;
681
682 nir_foreach_instr_safe(block, instr) {
683 if (instr->type != nir_instr_type_intrinsic)
684 continue;
685 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
686 if (intr->intrinsic != nir_intrinsic_store_output)
687 continue;
688
689 nir_variable *output_var = NULL;
690 nir_foreach_variable(var, &c->s->outputs) {
691 if (var->data.driver_location == intr->const_index[0]) {
692 output_var = var;
693 break;
694 }
695 }
696 assert(output_var);
697
698 if (output_var->data.location != FRAG_RESULT_COLOR &&
699 output_var->data.location != FRAG_RESULT_DATA0) {
700 continue;
701 }
702
703 nir_function_impl *impl =
704 nir_cf_node_get_function(&block->cf_node);
705 nir_builder b;
706 nir_builder_init(&b, impl);
707 b.cursor = nir_before_instr(&intr->instr);
708 vc4_nir_lower_blend_instr(c, &b, intr);
709 }
710 return true;
711 }
712
713 void
714 vc4_nir_lower_blend(struct vc4_compile *c)
715 {
716 nir_foreach_overload(c->s, overload) {
717 if (overload->impl) {
718 nir_foreach_block(overload->impl,
719 vc4_nir_lower_blend_block, c);
720
721 nir_metadata_preserve(overload->impl,
722 nir_metadata_block_index |
723 nir_metadata_dominance);
724 }
725 }
726 }