2 * Copyright © 2015 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * Implements most of the fixed function fragment pipeline in shader code.
27 * VC4 doesn't have any hardware support for blending, alpha test, logic ops,
28 * or color mask. Instead, you read the current contents of the destination
29 * from the tile buffer after having waited for the scoreboard (which is
30 * handled by vc4_qpu_emit.c), then do math using your output color and that
31 * destination value, and update the output color appropriately.
33 * Once this pass is done, the color write will either have one component (for
34 * single sample) with packed argb8888, or 4 components with the per-sample
39 * Lowers fixed-function blending to a load of the destination color and a
40 * series of ALU operations before the store of the output.
42 #include "util/u_format.h"
44 #include "compiler/nir/nir_builder.h"
45 #include "compiler/nir/nir_format_convert.h"
46 #include "vc4_context.h"
49 blend_depends_on_dst_color(struct vc4_compile
*c
)
51 return (c
->fs_key
->blend
.blend_enable
||
52 c
->fs_key
->blend
.colormask
!= 0xf ||
53 c
->fs_key
->logicop_func
!= PIPE_LOGICOP_COPY
);
56 /** Emits a load of the previous fragment color from the tile buffer. */
58 vc4_nir_get_dst_color(nir_builder
*b
, int sample
)
60 nir_intrinsic_instr
*load
=
61 nir_intrinsic_instr_create(b
->shader
,
62 nir_intrinsic_load_input
);
63 load
->num_components
= 1;
64 nir_intrinsic_set_base(load
, VC4_NIR_TLB_COLOR_READ_INPUT
+ sample
);
65 load
->src
[0] = nir_src_for_ssa(nir_imm_int(b
, 0));
66 nir_ssa_dest_init(&load
->instr
, &load
->dest
, 1, 32, NULL
);
67 nir_builder_instr_insert(b
, &load
->instr
);
68 return &load
->dest
.ssa
;
72 vc4_blend_channel_f(nir_builder
*b
,
79 case PIPE_BLENDFACTOR_ONE
:
80 return nir_imm_float(b
, 1.0);
81 case PIPE_BLENDFACTOR_SRC_COLOR
:
83 case PIPE_BLENDFACTOR_SRC_ALPHA
:
85 case PIPE_BLENDFACTOR_DST_ALPHA
:
87 case PIPE_BLENDFACTOR_DST_COLOR
:
89 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE
:
94 nir_imm_float(b
, 1.0),
97 return nir_imm_float(b
, 1.0);
99 case PIPE_BLENDFACTOR_CONST_COLOR
:
100 return nir_load_system_value(b
,
101 nir_intrinsic_load_blend_const_color_r_float
+
104 case PIPE_BLENDFACTOR_CONST_ALPHA
:
105 return nir_load_blend_const_color_a_float(b
);
106 case PIPE_BLENDFACTOR_ZERO
:
107 return nir_imm_float(b
, 0.0);
108 case PIPE_BLENDFACTOR_INV_SRC_COLOR
:
109 return nir_fsub(b
, nir_imm_float(b
, 1.0), src
[channel
]);
110 case PIPE_BLENDFACTOR_INV_SRC_ALPHA
:
111 return nir_fsub(b
, nir_imm_float(b
, 1.0), src
[3]);
112 case PIPE_BLENDFACTOR_INV_DST_ALPHA
:
113 return nir_fsub(b
, nir_imm_float(b
, 1.0), dst
[3]);
114 case PIPE_BLENDFACTOR_INV_DST_COLOR
:
115 return nir_fsub(b
, nir_imm_float(b
, 1.0), dst
[channel
]);
116 case PIPE_BLENDFACTOR_INV_CONST_COLOR
:
117 return nir_fsub(b
, nir_imm_float(b
, 1.0),
118 nir_load_system_value(b
,
119 nir_intrinsic_load_blend_const_color_r_float
+
122 case PIPE_BLENDFACTOR_INV_CONST_ALPHA
:
123 return nir_fsub(b
, nir_imm_float(b
, 1.0),
124 nir_load_blend_const_color_a_float(b
));
127 case PIPE_BLENDFACTOR_SRC1_COLOR
:
128 case PIPE_BLENDFACTOR_SRC1_ALPHA
:
129 case PIPE_BLENDFACTOR_INV_SRC1_COLOR
:
130 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA
:
132 fprintf(stderr
, "Unknown blend factor %d\n", factor
);
133 return nir_imm_float(b
, 1.0);
138 vc4_nir_set_packed_chan(nir_builder
*b
, nir_ssa_def
*src0
, nir_ssa_def
*src1
,
141 unsigned chan_mask
= 0xff << (chan
* 8);
143 nir_iand(b
, src0
, nir_imm_int(b
, ~chan_mask
)),
144 nir_iand(b
, src1
, nir_imm_int(b
, chan_mask
)));
148 vc4_blend_channel_i(nir_builder
*b
,
157 case PIPE_BLENDFACTOR_ONE
:
158 return nir_imm_int(b
, ~0);
159 case PIPE_BLENDFACTOR_SRC_COLOR
:
161 case PIPE_BLENDFACTOR_SRC_ALPHA
:
163 case PIPE_BLENDFACTOR_DST_ALPHA
:
165 case PIPE_BLENDFACTOR_DST_COLOR
:
167 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE
:
168 return vc4_nir_set_packed_chan(b
,
174 case PIPE_BLENDFACTOR_CONST_COLOR
:
175 return nir_load_blend_const_color_rgba8888_unorm(b
);
176 case PIPE_BLENDFACTOR_CONST_ALPHA
:
177 return nir_load_blend_const_color_aaaa8888_unorm(b
);
178 case PIPE_BLENDFACTOR_ZERO
:
179 return nir_imm_int(b
, 0);
180 case PIPE_BLENDFACTOR_INV_SRC_COLOR
:
181 return nir_inot(b
, src
);
182 case PIPE_BLENDFACTOR_INV_SRC_ALPHA
:
183 return nir_inot(b
, src_a
);
184 case PIPE_BLENDFACTOR_INV_DST_ALPHA
:
185 return nir_inot(b
, dst_a
);
186 case PIPE_BLENDFACTOR_INV_DST_COLOR
:
187 return nir_inot(b
, dst
);
188 case PIPE_BLENDFACTOR_INV_CONST_COLOR
:
190 nir_load_blend_const_color_rgba8888_unorm(b
));
191 case PIPE_BLENDFACTOR_INV_CONST_ALPHA
:
193 nir_load_blend_const_color_aaaa8888_unorm(b
));
196 case PIPE_BLENDFACTOR_SRC1_COLOR
:
197 case PIPE_BLENDFACTOR_SRC1_ALPHA
:
198 case PIPE_BLENDFACTOR_INV_SRC1_COLOR
:
199 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA
:
201 fprintf(stderr
, "Unknown blend factor %d\n", factor
);
202 return nir_imm_int(b
, ~0);
207 vc4_blend_func_f(nir_builder
*b
, nir_ssa_def
*src
, nir_ssa_def
*dst
,
212 return nir_fadd(b
, src
, dst
);
213 case PIPE_BLEND_SUBTRACT
:
214 return nir_fsub(b
, src
, dst
);
215 case PIPE_BLEND_REVERSE_SUBTRACT
:
216 return nir_fsub(b
, dst
, src
);
218 return nir_fmin(b
, src
, dst
);
220 return nir_fmax(b
, src
, dst
);
224 fprintf(stderr
, "Unknown blend func %d\n", func
);
231 vc4_blend_func_i(nir_builder
*b
, nir_ssa_def
*src
, nir_ssa_def
*dst
,
236 return nir_usadd_4x8(b
, src
, dst
);
237 case PIPE_BLEND_SUBTRACT
:
238 return nir_ussub_4x8(b
, src
, dst
);
239 case PIPE_BLEND_REVERSE_SUBTRACT
:
240 return nir_ussub_4x8(b
, dst
, src
);
242 return nir_umin_4x8(b
, src
, dst
);
244 return nir_umax_4x8(b
, src
, dst
);
248 fprintf(stderr
, "Unknown blend func %d\n", func
);
255 vc4_do_blending_f(struct vc4_compile
*c
, nir_builder
*b
, nir_ssa_def
**result
,
256 nir_ssa_def
**src_color
, nir_ssa_def
**dst_color
)
258 struct pipe_rt_blend_state
*blend
= &c
->fs_key
->blend
;
260 if (!blend
->blend_enable
) {
261 for (int i
= 0; i
< 4; i
++)
262 result
[i
] = src_color
[i
];
266 /* Clamp the src color to [0, 1]. Dest is already clamped. */
267 for (int i
= 0; i
< 4; i
++)
268 src_color
[i
] = nir_fsat(b
, src_color
[i
]);
270 nir_ssa_def
*src_blend
[4], *dst_blend
[4];
271 for (int i
= 0; i
< 4; i
++) {
272 int src_factor
= ((i
!= 3) ? blend
->rgb_src_factor
:
273 blend
->alpha_src_factor
);
274 int dst_factor
= ((i
!= 3) ? blend
->rgb_dst_factor
:
275 blend
->alpha_dst_factor
);
276 src_blend
[i
] = nir_fmul(b
, src_color
[i
],
277 vc4_blend_channel_f(b
,
278 src_color
, dst_color
,
280 dst_blend
[i
] = nir_fmul(b
, dst_color
[i
],
281 vc4_blend_channel_f(b
,
282 src_color
, dst_color
,
286 for (int i
= 0; i
< 4; i
++) {
287 result
[i
] = vc4_blend_func_f(b
, src_blend
[i
], dst_blend
[i
],
288 ((i
!= 3) ? blend
->rgb_func
:
294 vc4_nir_splat(nir_builder
*b
, nir_ssa_def
*src
)
296 nir_ssa_def
*or1
= nir_ior(b
, src
, nir_ishl(b
, src
, nir_imm_int(b
, 8)));
297 return nir_ior(b
, or1
, nir_ishl(b
, or1
, nir_imm_int(b
, 16)));
301 vc4_do_blending_i(struct vc4_compile
*c
, nir_builder
*b
,
302 nir_ssa_def
*src_color
, nir_ssa_def
*dst_color
,
303 nir_ssa_def
*src_float_a
)
305 struct pipe_rt_blend_state
*blend
= &c
->fs_key
->blend
;
307 if (!blend
->blend_enable
)
310 enum pipe_format color_format
= c
->fs_key
->color_format
;
311 const uint8_t *format_swiz
= vc4_get_format_swizzle(color_format
);
312 nir_ssa_def
*imm_0xff
= nir_imm_int(b
, 0xff);
313 nir_ssa_def
*src_a
= nir_pack_unorm_4x8(b
, src_float_a
);
316 for (alpha_chan
= 0; alpha_chan
< 4; alpha_chan
++) {
317 if (format_swiz
[alpha_chan
] == 3)
320 if (alpha_chan
!= 4) {
321 nir_ssa_def
*shift
= nir_imm_int(b
, alpha_chan
* 8);
322 dst_a
= vc4_nir_splat(b
, nir_iand(b
, nir_ushr(b
, dst_color
,
325 dst_a
= nir_imm_int(b
, ~0);
328 nir_ssa_def
*src_factor
= vc4_blend_channel_i(b
,
329 src_color
, dst_color
,
331 blend
->rgb_src_factor
,
333 nir_ssa_def
*dst_factor
= vc4_blend_channel_i(b
,
334 src_color
, dst_color
,
336 blend
->rgb_dst_factor
,
339 if (alpha_chan
!= 4 &&
340 blend
->alpha_src_factor
!= blend
->rgb_src_factor
) {
341 nir_ssa_def
*src_alpha_factor
=
342 vc4_blend_channel_i(b
,
343 src_color
, dst_color
,
345 blend
->alpha_src_factor
,
347 src_factor
= vc4_nir_set_packed_chan(b
, src_factor
,
351 if (alpha_chan
!= 4 &&
352 blend
->alpha_dst_factor
!= blend
->rgb_dst_factor
) {
353 nir_ssa_def
*dst_alpha_factor
=
354 vc4_blend_channel_i(b
,
355 src_color
, dst_color
,
357 blend
->alpha_dst_factor
,
359 dst_factor
= vc4_nir_set_packed_chan(b
, dst_factor
,
363 nir_ssa_def
*src_blend
= nir_umul_unorm_4x8(b
, src_color
, src_factor
);
364 nir_ssa_def
*dst_blend
= nir_umul_unorm_4x8(b
, dst_color
, dst_factor
);
366 nir_ssa_def
*result
=
367 vc4_blend_func_i(b
, src_blend
, dst_blend
, blend
->rgb_func
);
368 if (alpha_chan
!= 4 && blend
->alpha_func
!= blend
->rgb_func
) {
369 nir_ssa_def
*result_a
= vc4_blend_func_i(b
,
373 result
= vc4_nir_set_packed_chan(b
, result
, result_a
,
380 vc4_logicop(nir_builder
*b
, int logicop_func
,
381 nir_ssa_def
*src
, nir_ssa_def
*dst
)
383 switch (logicop_func
) {
384 case PIPE_LOGICOP_CLEAR
:
385 return nir_imm_int(b
, 0);
386 case PIPE_LOGICOP_NOR
:
387 return nir_inot(b
, nir_ior(b
, src
, dst
));
388 case PIPE_LOGICOP_AND_INVERTED
:
389 return nir_iand(b
, nir_inot(b
, src
), dst
);
390 case PIPE_LOGICOP_COPY_INVERTED
:
391 return nir_inot(b
, src
);
392 case PIPE_LOGICOP_AND_REVERSE
:
393 return nir_iand(b
, src
, nir_inot(b
, dst
));
394 case PIPE_LOGICOP_INVERT
:
395 return nir_inot(b
, dst
);
396 case PIPE_LOGICOP_XOR
:
397 return nir_ixor(b
, src
, dst
);
398 case PIPE_LOGICOP_NAND
:
399 return nir_inot(b
, nir_iand(b
, src
, dst
));
400 case PIPE_LOGICOP_AND
:
401 return nir_iand(b
, src
, dst
);
402 case PIPE_LOGICOP_EQUIV
:
403 return nir_inot(b
, nir_ixor(b
, src
, dst
));
404 case PIPE_LOGICOP_NOOP
:
406 case PIPE_LOGICOP_OR_INVERTED
:
407 return nir_ior(b
, nir_inot(b
, src
), dst
);
408 case PIPE_LOGICOP_OR_REVERSE
:
409 return nir_ior(b
, src
, nir_inot(b
, dst
));
410 case PIPE_LOGICOP_OR
:
411 return nir_ior(b
, src
, dst
);
412 case PIPE_LOGICOP_SET
:
413 return nir_imm_int(b
, ~0);
415 fprintf(stderr
, "Unknown logic op %d\n", logicop_func
);
417 case PIPE_LOGICOP_COPY
:
423 vc4_nir_swizzle_and_pack(struct vc4_compile
*c
, nir_builder
*b
,
424 nir_ssa_def
**colors
)
426 enum pipe_format color_format
= c
->fs_key
->color_format
;
427 const uint8_t *format_swiz
= vc4_get_format_swizzle(color_format
);
429 nir_ssa_def
*swizzled
[4];
430 for (int i
= 0; i
< 4; i
++) {
431 swizzled
[i
] = vc4_nir_get_swizzled_channel(b
, colors
,
435 return nir_pack_unorm_4x8(b
,
437 swizzled
[0], swizzled
[1],
438 swizzled
[2], swizzled
[3]));
443 vc4_nir_blend_pipeline(struct vc4_compile
*c
, nir_builder
*b
, nir_ssa_def
*src
,
446 enum pipe_format color_format
= c
->fs_key
->color_format
;
447 const uint8_t *format_swiz
= vc4_get_format_swizzle(color_format
);
448 bool srgb
= util_format_is_srgb(color_format
);
450 /* Pull out the float src/dst color components. */
451 nir_ssa_def
*packed_dst_color
= vc4_nir_get_dst_color(b
, sample
);
452 nir_ssa_def
*dst_vec4
= nir_unpack_unorm_4x8(b
, packed_dst_color
);
453 nir_ssa_def
*src_color
[4], *unpacked_dst_color
[4];
454 for (unsigned i
= 0; i
< 4; i
++) {
455 src_color
[i
] = nir_channel(b
, src
, i
);
456 unpacked_dst_color
[i
] = nir_channel(b
, dst_vec4
, i
);
459 if (c
->fs_key
->sample_alpha_to_one
&& c
->fs_key
->msaa
)
460 src_color
[3] = nir_imm_float(b
, 1.0);
462 nir_ssa_def
*packed_color
;
464 /* Unswizzle the destination color. */
465 nir_ssa_def
*dst_color
[4];
466 for (unsigned i
= 0; i
< 4; i
++) {
467 dst_color
[i
] = vc4_nir_get_swizzled_channel(b
,
472 /* Turn dst color to linear. */
473 for (int i
= 0; i
< 3; i
++)
474 dst_color
[i
] = nir_format_srgb_to_linear(b
, dst_color
[i
]);
476 nir_ssa_def
*blend_color
[4];
477 vc4_do_blending_f(c
, b
, blend_color
, src_color
, dst_color
);
479 /* sRGB encode the output color */
480 for (int i
= 0; i
< 3; i
++)
481 blend_color
[i
] = nir_format_linear_to_srgb(b
, blend_color
[i
]);
483 packed_color
= vc4_nir_swizzle_and_pack(c
, b
, blend_color
);
485 nir_ssa_def
*packed_src_color
=
486 vc4_nir_swizzle_and_pack(c
, b
, src_color
);
489 vc4_do_blending_i(c
, b
,
490 packed_src_color
, packed_dst_color
,
494 packed_color
= vc4_logicop(b
, c
->fs_key
->logicop_func
,
495 packed_color
, packed_dst_color
);
497 /* If the bit isn't set in the color mask, then just return the
498 * original dst color, instead.
500 uint32_t colormask
= 0xffffffff;
501 for (int i
= 0; i
< 4; i
++) {
502 if (format_swiz
[i
] < 4 &&
503 !(c
->fs_key
->blend
.colormask
& (1 << format_swiz
[i
]))) {
504 colormask
&= ~(0xff << (i
* 8));
509 nir_iand(b
, packed_color
,
510 nir_imm_int(b
, colormask
)),
511 nir_iand(b
, packed_dst_color
,
512 nir_imm_int(b
, ~colormask
)));
516 vc4_nir_next_output_driver_location(nir_shader
*s
)
520 nir_foreach_variable(var
, &s
->outputs
)
521 maxloc
= MAX2(maxloc
, (int)var
->data
.driver_location
);
527 vc4_nir_store_sample_mask(struct vc4_compile
*c
, nir_builder
*b
,
530 nir_variable
*sample_mask
= nir_variable_create(c
->s
, nir_var_shader_out
,
533 sample_mask
->data
.driver_location
=
534 vc4_nir_next_output_driver_location(c
->s
);
535 sample_mask
->data
.location
= FRAG_RESULT_SAMPLE_MASK
;
537 nir_intrinsic_instr
*intr
=
538 nir_intrinsic_instr_create(c
->s
, nir_intrinsic_store_output
);
539 intr
->num_components
= 1;
540 nir_intrinsic_set_base(intr
, sample_mask
->data
.driver_location
);
542 intr
->src
[0] = nir_src_for_ssa(val
);
543 intr
->src
[1] = nir_src_for_ssa(nir_imm_int(b
, 0));
544 nir_builder_instr_insert(b
, &intr
->instr
);
548 vc4_nir_lower_blend_instr(struct vc4_compile
*c
, nir_builder
*b
,
549 nir_intrinsic_instr
*intr
)
551 nir_ssa_def
*frag_color
= intr
->src
[0].ssa
;
553 if (c
->fs_key
->sample_alpha_to_coverage
) {
554 nir_ssa_def
*a
= nir_channel(b
, frag_color
, 3);
556 /* XXX: We should do a nice dither based on the fragment
557 * coordinate, instead.
559 nir_ssa_def
*num_samples
= nir_imm_float(b
, VC4_MAX_SAMPLES
);
560 nir_ssa_def
*num_bits
= nir_f2i32(b
, nir_fmul(b
, a
, num_samples
));
561 nir_ssa_def
*bitmask
= nir_isub(b
,
566 vc4_nir_store_sample_mask(c
, b
, bitmask
);
569 /* The TLB color read returns each sample in turn, so if our blending
570 * depends on the destination color, we're going to have to run the
571 * blending function separately for each destination sample value, and
572 * then output the per-sample color using TLB_COLOR_MS.
574 nir_ssa_def
*blend_output
;
575 if (c
->fs_key
->msaa
&& blend_depends_on_dst_color(c
)) {
576 c
->msaa_per_sample_output
= true;
578 nir_ssa_def
*samples
[4];
579 for (int i
= 0; i
< VC4_MAX_SAMPLES
; i
++)
580 samples
[i
] = vc4_nir_blend_pipeline(c
, b
, frag_color
, i
);
581 blend_output
= nir_vec4(b
,
582 samples
[0], samples
[1],
583 samples
[2], samples
[3]);
585 blend_output
= vc4_nir_blend_pipeline(c
, b
, frag_color
, 0);
588 nir_instr_rewrite_src(&intr
->instr
, &intr
->src
[0],
589 nir_src_for_ssa(blend_output
));
590 intr
->num_components
= blend_output
->num_components
;
594 vc4_nir_lower_blend_block(nir_block
*block
, struct vc4_compile
*c
)
596 nir_foreach_instr_safe(instr
, block
) {
597 if (instr
->type
!= nir_instr_type_intrinsic
)
599 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
600 if (intr
->intrinsic
!= nir_intrinsic_store_output
)
603 nir_variable
*output_var
= NULL
;
604 nir_foreach_variable(var
, &c
->s
->outputs
) {
605 if (var
->data
.driver_location
==
606 nir_intrinsic_base(intr
)) {
613 if (output_var
->data
.location
!= FRAG_RESULT_COLOR
&&
614 output_var
->data
.location
!= FRAG_RESULT_DATA0
) {
618 nir_function_impl
*impl
=
619 nir_cf_node_get_function(&block
->cf_node
);
621 nir_builder_init(&b
, impl
);
622 b
.cursor
= nir_before_instr(&intr
->instr
);
623 vc4_nir_lower_blend_instr(c
, &b
, intr
);
629 vc4_nir_lower_blend(nir_shader
*s
, struct vc4_compile
*c
)
631 nir_foreach_function(function
, s
) {
632 if (function
->impl
) {
633 nir_foreach_block(block
, function
->impl
) {
634 vc4_nir_lower_blend_block(block
, c
);
637 nir_metadata_preserve(function
->impl
,
638 nir_metadata_block_index
|
639 nir_metadata_dominance
);
643 /* If we didn't do alpha-to-coverage on the output color, we still
644 * need to pass glSampleMask() through.
646 if (c
->fs_key
->sample_coverage
&& !c
->fs_key
->sample_alpha_to_coverage
) {
647 nir_function_impl
*impl
= nir_shader_get_entrypoint(s
);
649 nir_builder_init(&b
, impl
);
650 b
.cursor
= nir_after_block(nir_impl_last_block(impl
));
652 vc4_nir_store_sample_mask(c
, &b
, nir_load_sample_mask_in(&b
));