2 * Copyright © 2015 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * Implements most of the fixed function fragment pipeline in shader code.
27 * VC4 doesn't have any hardware support for blending, alpha test, logic ops,
28 * or color mask. Instead, you read the current contents of the destination
29 * from the tile buffer after having waited for the scoreboard (which is
30 * handled by vc4_qpu_emit.c), then do math using your output color and that
31 * destination value, and update the output color appropriately.
35 * Lowers fixed-function blending to a load of the destination color and a
36 * series of ALU operations before the store of the output.
38 #include "util/u_format.h"
40 #include "glsl/nir/nir_builder.h"
41 #include "vc4_context.h"
43 /** Emits a load of the previous fragment color from the tile buffer. */
45 vc4_nir_get_dst_color(nir_builder
*b
)
47 nir_intrinsic_instr
*load
=
48 nir_intrinsic_instr_create(b
->shader
,
49 nir_intrinsic_load_input
);
50 load
->num_components
= 1;
51 load
->const_index
[0] = VC4_NIR_TLB_COLOR_READ_INPUT
;
52 nir_ssa_dest_init(&load
->instr
, &load
->dest
, 1, NULL
);
53 nir_builder_instr_insert(b
, &load
->instr
);
54 return &load
->dest
.ssa
;
58 vc4_nir_srgb_decode(nir_builder
*b
, nir_ssa_def
*srgb
)
60 nir_ssa_def
*is_low
= nir_flt(b
, srgb
, nir_imm_float(b
, 0.04045));
61 nir_ssa_def
*low
= nir_fmul(b
, srgb
, nir_imm_float(b
, 1.0 / 12.92));
62 nir_ssa_def
*high
= nir_fpow(b
,
65 nir_imm_float(b
, 0.055)),
66 nir_imm_float(b
, 1.0 / 1.055)),
67 nir_imm_float(b
, 2.4));
69 return nir_bcsel(b
, is_low
, low
, high
);
73 vc4_nir_srgb_encode(nir_builder
*b
, nir_ssa_def
*linear
)
75 nir_ssa_def
*is_low
= nir_flt(b
, linear
, nir_imm_float(b
, 0.0031308));
76 nir_ssa_def
*low
= nir_fmul(b
, linear
, nir_imm_float(b
, 12.92));
77 nir_ssa_def
*high
= nir_fsub(b
,
79 nir_imm_float(b
, 1.055),
82 nir_imm_float(b
, 0.41666))),
83 nir_imm_float(b
, 0.055));
85 return nir_bcsel(b
, is_low
, low
, high
);
89 vc4_blend_channel_f(nir_builder
*b
,
96 case PIPE_BLENDFACTOR_ONE
:
97 return nir_imm_float(b
, 1.0);
98 case PIPE_BLENDFACTOR_SRC_COLOR
:
100 case PIPE_BLENDFACTOR_SRC_ALPHA
:
102 case PIPE_BLENDFACTOR_DST_ALPHA
:
104 case PIPE_BLENDFACTOR_DST_COLOR
:
106 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE
:
111 nir_imm_float(b
, 1.0),
114 return nir_imm_float(b
, 1.0);
116 case PIPE_BLENDFACTOR_CONST_COLOR
:
117 return vc4_nir_get_state_uniform(b
, QUNIFORM_BLEND_CONST_COLOR_X
+ channel
);
118 case PIPE_BLENDFACTOR_CONST_ALPHA
:
119 return vc4_nir_get_state_uniform(b
, QUNIFORM_BLEND_CONST_COLOR_W
);
120 case PIPE_BLENDFACTOR_ZERO
:
121 return nir_imm_float(b
, 0.0);
122 case PIPE_BLENDFACTOR_INV_SRC_COLOR
:
123 return nir_fsub(b
, nir_imm_float(b
, 1.0), src
[channel
]);
124 case PIPE_BLENDFACTOR_INV_SRC_ALPHA
:
125 return nir_fsub(b
, nir_imm_float(b
, 1.0), src
[3]);
126 case PIPE_BLENDFACTOR_INV_DST_ALPHA
:
127 return nir_fsub(b
, nir_imm_float(b
, 1.0), dst
[3]);
128 case PIPE_BLENDFACTOR_INV_DST_COLOR
:
129 return nir_fsub(b
, nir_imm_float(b
, 1.0), dst
[channel
]);
130 case PIPE_BLENDFACTOR_INV_CONST_COLOR
:
131 return nir_fsub(b
, nir_imm_float(b
, 1.0),
132 vc4_nir_get_state_uniform(b
, QUNIFORM_BLEND_CONST_COLOR_X
+ channel
));
133 case PIPE_BLENDFACTOR_INV_CONST_ALPHA
:
134 return nir_fsub(b
, nir_imm_float(b
, 1.0),
135 vc4_nir_get_state_uniform(b
, QUNIFORM_BLEND_CONST_COLOR_W
));
138 case PIPE_BLENDFACTOR_SRC1_COLOR
:
139 case PIPE_BLENDFACTOR_SRC1_ALPHA
:
140 case PIPE_BLENDFACTOR_INV_SRC1_COLOR
:
141 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA
:
143 fprintf(stderr
, "Unknown blend factor %d\n", factor
);
144 return nir_imm_float(b
, 1.0);
149 vc4_nir_set_packed_chan(nir_builder
*b
, nir_ssa_def
*src0
, nir_ssa_def
*src1
,
152 unsigned chan_mask
= 0xff << (chan
* 8);
154 nir_iand(b
, src0
, nir_imm_int(b
, ~chan_mask
)),
155 nir_iand(b
, src1
, nir_imm_int(b
, chan_mask
)));
159 vc4_blend_channel_i(nir_builder
*b
,
168 case PIPE_BLENDFACTOR_ONE
:
169 return nir_imm_int(b
, ~0);
170 case PIPE_BLENDFACTOR_SRC_COLOR
:
172 case PIPE_BLENDFACTOR_SRC_ALPHA
:
174 case PIPE_BLENDFACTOR_DST_ALPHA
:
176 case PIPE_BLENDFACTOR_DST_COLOR
:
178 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE
:
179 return vc4_nir_set_packed_chan(b
,
185 case PIPE_BLENDFACTOR_CONST_COLOR
:
186 return vc4_nir_get_state_uniform(b
, QUNIFORM_BLEND_CONST_COLOR_RGBA
);
187 case PIPE_BLENDFACTOR_CONST_ALPHA
:
188 return vc4_nir_get_state_uniform(b
, QUNIFORM_BLEND_CONST_COLOR_AAAA
);
189 case PIPE_BLENDFACTOR_ZERO
:
190 return nir_imm_int(b
, 0);
191 case PIPE_BLENDFACTOR_INV_SRC_COLOR
:
192 return nir_inot(b
, src
);
193 case PIPE_BLENDFACTOR_INV_SRC_ALPHA
:
194 return nir_inot(b
, src_a
);
195 case PIPE_BLENDFACTOR_INV_DST_ALPHA
:
196 return nir_inot(b
, dst_a
);
197 case PIPE_BLENDFACTOR_INV_DST_COLOR
:
198 return nir_inot(b
, dst
);
199 case PIPE_BLENDFACTOR_INV_CONST_COLOR
:
200 return nir_inot(b
, vc4_nir_get_state_uniform(b
, QUNIFORM_BLEND_CONST_COLOR_RGBA
));
201 case PIPE_BLENDFACTOR_INV_CONST_ALPHA
:
202 return nir_inot(b
, vc4_nir_get_state_uniform(b
, QUNIFORM_BLEND_CONST_COLOR_AAAA
));
205 case PIPE_BLENDFACTOR_SRC1_COLOR
:
206 case PIPE_BLENDFACTOR_SRC1_ALPHA
:
207 case PIPE_BLENDFACTOR_INV_SRC1_COLOR
:
208 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA
:
210 fprintf(stderr
, "Unknown blend factor %d\n", factor
);
211 return nir_imm_int(b
, ~0);
216 vc4_blend_func_f(nir_builder
*b
, nir_ssa_def
*src
, nir_ssa_def
*dst
,
221 return nir_fadd(b
, src
, dst
);
222 case PIPE_BLEND_SUBTRACT
:
223 return nir_fsub(b
, src
, dst
);
224 case PIPE_BLEND_REVERSE_SUBTRACT
:
225 return nir_fsub(b
, dst
, src
);
227 return nir_fmin(b
, src
, dst
);
229 return nir_fmax(b
, src
, dst
);
233 fprintf(stderr
, "Unknown blend func %d\n", func
);
240 vc4_blend_func_i(nir_builder
*b
, nir_ssa_def
*src
, nir_ssa_def
*dst
,
245 return nir_usadd_4x8(b
, src
, dst
);
246 case PIPE_BLEND_SUBTRACT
:
247 return nir_ussub_4x8(b
, src
, dst
);
248 case PIPE_BLEND_REVERSE_SUBTRACT
:
249 return nir_ussub_4x8(b
, dst
, src
);
251 return nir_umin_4x8(b
, src
, dst
);
253 return nir_umax_4x8(b
, src
, dst
);
257 fprintf(stderr
, "Unknown blend func %d\n", func
);
264 vc4_do_blending_f(struct vc4_compile
*c
, nir_builder
*b
, nir_ssa_def
**result
,
265 nir_ssa_def
**src_color
, nir_ssa_def
**dst_color
)
267 struct pipe_rt_blend_state
*blend
= &c
->fs_key
->blend
;
269 if (!blend
->blend_enable
) {
270 for (int i
= 0; i
< 4; i
++)
271 result
[i
] = src_color
[i
];
275 /* Clamp the src color to [0, 1]. Dest is already clamped. */
276 for (int i
= 0; i
< 4; i
++)
277 src_color
[i
] = nir_fsat(b
, src_color
[i
]);
279 nir_ssa_def
*src_blend
[4], *dst_blend
[4];
280 for (int i
= 0; i
< 4; i
++) {
281 int src_factor
= ((i
!= 3) ? blend
->rgb_src_factor
:
282 blend
->alpha_src_factor
);
283 int dst_factor
= ((i
!= 3) ? blend
->rgb_dst_factor
:
284 blend
->alpha_dst_factor
);
285 src_blend
[i
] = nir_fmul(b
, src_color
[i
],
286 vc4_blend_channel_f(b
,
287 src_color
, dst_color
,
289 dst_blend
[i
] = nir_fmul(b
, dst_color
[i
],
290 vc4_blend_channel_f(b
,
291 src_color
, dst_color
,
295 for (int i
= 0; i
< 4; i
++) {
296 result
[i
] = vc4_blend_func_f(b
, src_blend
[i
], dst_blend
[i
],
297 ((i
!= 3) ? blend
->rgb_func
:
303 vc4_nir_splat(nir_builder
*b
, nir_ssa_def
*src
)
305 nir_ssa_def
*or1
= nir_ior(b
, src
, nir_ishl(b
, src
, nir_imm_int(b
, 8)));
306 return nir_ior(b
, or1
, nir_ishl(b
, or1
, nir_imm_int(b
, 16)));
310 vc4_do_blending_i(struct vc4_compile
*c
, nir_builder
*b
,
311 nir_ssa_def
*src_color
, nir_ssa_def
*dst_color
,
312 nir_ssa_def
*src_float_a
)
314 struct pipe_rt_blend_state
*blend
= &c
->fs_key
->blend
;
316 if (!blend
->blend_enable
)
319 enum pipe_format color_format
= c
->fs_key
->color_format
;
320 const uint8_t *format_swiz
= vc4_get_format_swizzle(color_format
);
321 nir_ssa_def
*imm_0xff
= nir_imm_int(b
, 0xff);
322 nir_ssa_def
*src_a
= nir_pack_unorm_4x8(b
, src_float_a
);
325 for (alpha_chan
= 0; alpha_chan
< 4; alpha_chan
++) {
326 if (format_swiz
[alpha_chan
] == 3)
329 if (alpha_chan
!= 4) {
330 nir_ssa_def
*shift
= nir_imm_int(b
, alpha_chan
* 8);
331 dst_a
= vc4_nir_splat(b
, nir_iand(b
, nir_ushr(b
, dst_color
,
334 dst_a
= nir_imm_int(b
, ~0);
337 nir_ssa_def
*src_factor
= vc4_blend_channel_i(b
,
338 src_color
, dst_color
,
340 blend
->rgb_src_factor
,
342 nir_ssa_def
*dst_factor
= vc4_blend_channel_i(b
,
343 src_color
, dst_color
,
345 blend
->rgb_dst_factor
,
348 if (alpha_chan
!= 4 &&
349 blend
->alpha_src_factor
!= blend
->rgb_src_factor
) {
350 nir_ssa_def
*src_alpha_factor
=
351 vc4_blend_channel_i(b
,
352 src_color
, dst_color
,
354 blend
->alpha_src_factor
,
356 src_factor
= vc4_nir_set_packed_chan(b
, src_factor
,
360 if (alpha_chan
!= 4 &&
361 blend
->alpha_dst_factor
!= blend
->rgb_dst_factor
) {
362 nir_ssa_def
*dst_alpha_factor
=
363 vc4_blend_channel_i(b
,
364 src_color
, dst_color
,
366 blend
->alpha_dst_factor
,
368 dst_factor
= vc4_nir_set_packed_chan(b
, dst_factor
,
372 nir_ssa_def
*src_blend
= nir_umul_unorm_4x8(b
, src_color
, src_factor
);
373 nir_ssa_def
*dst_blend
= nir_umul_unorm_4x8(b
, dst_color
, dst_factor
);
375 nir_ssa_def
*result
=
376 vc4_blend_func_i(b
, src_blend
, dst_blend
, blend
->rgb_func
);
377 if (alpha_chan
!= 4 && blend
->alpha_func
!= blend
->rgb_func
) {
378 nir_ssa_def
*result_a
= vc4_blend_func_i(b
,
382 result
= vc4_nir_set_packed_chan(b
, result
, result_a
,
389 vc4_logicop(nir_builder
*b
, int logicop_func
,
390 nir_ssa_def
*src
, nir_ssa_def
*dst
)
392 switch (logicop_func
) {
393 case PIPE_LOGICOP_CLEAR
:
394 return nir_imm_int(b
, 0);
395 case PIPE_LOGICOP_NOR
:
396 return nir_inot(b
, nir_ior(b
, src
, dst
));
397 case PIPE_LOGICOP_AND_INVERTED
:
398 return nir_iand(b
, nir_inot(b
, src
), dst
);
399 case PIPE_LOGICOP_COPY_INVERTED
:
400 return nir_inot(b
, src
);
401 case PIPE_LOGICOP_AND_REVERSE
:
402 return nir_iand(b
, src
, nir_inot(b
, dst
));
403 case PIPE_LOGICOP_INVERT
:
404 return nir_inot(b
, dst
);
405 case PIPE_LOGICOP_XOR
:
406 return nir_ixor(b
, src
, dst
);
407 case PIPE_LOGICOP_NAND
:
408 return nir_inot(b
, nir_iand(b
, src
, dst
));
409 case PIPE_LOGICOP_AND
:
410 return nir_iand(b
, src
, dst
);
411 case PIPE_LOGICOP_EQUIV
:
412 return nir_inot(b
, nir_ixor(b
, src
, dst
));
413 case PIPE_LOGICOP_NOOP
:
415 case PIPE_LOGICOP_OR_INVERTED
:
416 return nir_ior(b
, nir_inot(b
, src
), dst
);
417 case PIPE_LOGICOP_OR_REVERSE
:
418 return nir_ior(b
, src
, nir_inot(b
, dst
));
419 case PIPE_LOGICOP_OR
:
420 return nir_ior(b
, src
, dst
);
421 case PIPE_LOGICOP_SET
:
422 return nir_imm_int(b
, ~0);
424 fprintf(stderr
, "Unknown logic op %d\n", logicop_func
);
426 case PIPE_LOGICOP_COPY
:
432 vc4_nir_pipe_compare_func(nir_builder
*b
, int func
,
433 nir_ssa_def
*src0
, nir_ssa_def
*src1
)
437 fprintf(stderr
, "Unknown compare func %d\n", func
);
439 case PIPE_FUNC_NEVER
:
440 return nir_imm_int(b
, 0);
441 case PIPE_FUNC_ALWAYS
:
442 return nir_imm_int(b
, ~0);
443 case PIPE_FUNC_EQUAL
:
444 return nir_feq(b
, src0
, src1
);
445 case PIPE_FUNC_NOTEQUAL
:
446 return nir_fne(b
, src0
, src1
);
447 case PIPE_FUNC_GREATER
:
448 return nir_flt(b
, src1
, src0
);
449 case PIPE_FUNC_GEQUAL
:
450 return nir_fge(b
, src0
, src1
);
452 return nir_flt(b
, src0
, src1
);
453 case PIPE_FUNC_LEQUAL
:
454 return nir_fge(b
, src1
, src0
);
459 vc4_nir_emit_alpha_test_discard(struct vc4_compile
*c
, nir_builder
*b
,
462 if (!c
->fs_key
->alpha_test
)
465 nir_ssa_def
*alpha_ref
=
466 vc4_nir_get_state_uniform(b
, QUNIFORM_ALPHA_REF
);
467 nir_ssa_def
*condition
=
468 vc4_nir_pipe_compare_func(b
, c
->fs_key
->alpha_test_func
,
471 nir_intrinsic_instr
*discard
=
472 nir_intrinsic_instr_create(b
->shader
,
473 nir_intrinsic_discard_if
);
474 discard
->num_components
= 1;
475 discard
->src
[0] = nir_src_for_ssa(nir_inot(b
, condition
));
476 nir_builder_instr_insert(b
, &discard
->instr
);
480 vc4_nir_swizzle_and_pack(struct vc4_compile
*c
, nir_builder
*b
,
481 nir_ssa_def
**colors
)
483 enum pipe_format color_format
= c
->fs_key
->color_format
;
484 const uint8_t *format_swiz
= vc4_get_format_swizzle(color_format
);
486 nir_ssa_def
*swizzled
[4];
487 for (int i
= 0; i
< 4; i
++) {
488 swizzled
[i
] = vc4_nir_get_swizzled_channel(b
, colors
,
492 return nir_pack_unorm_4x8(b
,
494 swizzled
[0], swizzled
[1],
495 swizzled
[2], swizzled
[3]));
500 vc4_nir_lower_blend_instr(struct vc4_compile
*c
, nir_builder
*b
,
501 nir_intrinsic_instr
*intr
)
503 enum pipe_format color_format
= c
->fs_key
->color_format
;
504 const uint8_t *format_swiz
= vc4_get_format_swizzle(color_format
);
505 bool srgb
= util_format_is_srgb(color_format
);
507 /* Pull out the float src/dst color components. */
508 nir_ssa_def
*packed_dst_color
= vc4_nir_get_dst_color(b
);
509 nir_ssa_def
*dst_vec4
= nir_unpack_unorm_4x8(b
, packed_dst_color
);
510 nir_ssa_def
*src_color
[4], *unpacked_dst_color
[4];
511 for (unsigned i
= 0; i
< 4; i
++) {
512 src_color
[i
] = nir_swizzle(b
, intr
->src
[0].ssa
, &i
, 1, false);
513 unpacked_dst_color
[i
] = nir_swizzle(b
, dst_vec4
, &i
, 1, false);
516 vc4_nir_emit_alpha_test_discard(c
, b
, src_color
[3]);
518 nir_ssa_def
*packed_color
;
520 /* Unswizzle the destination color. */
521 nir_ssa_def
*dst_color
[4];
522 for (unsigned i
= 0; i
< 4; i
++) {
523 dst_color
[i
] = vc4_nir_get_swizzled_channel(b
,
528 /* Turn dst color to linear. */
529 for (int i
= 0; i
< 3; i
++)
530 dst_color
[i
] = vc4_nir_srgb_decode(b
, dst_color
[i
]);
532 nir_ssa_def
*blend_color
[4];
533 vc4_do_blending_f(c
, b
, blend_color
, src_color
, dst_color
);
535 /* sRGB encode the output color */
536 for (int i
= 0; i
< 3; i
++)
537 blend_color
[i
] = vc4_nir_srgb_encode(b
, blend_color
[i
]);
539 packed_color
= vc4_nir_swizzle_and_pack(c
, b
, blend_color
);
541 nir_ssa_def
*packed_src_color
=
542 vc4_nir_swizzle_and_pack(c
, b
, src_color
);
545 vc4_do_blending_i(c
, b
,
546 packed_src_color
, packed_dst_color
,
550 packed_color
= vc4_logicop(b
, c
->fs_key
->logicop_func
,
551 packed_color
, packed_dst_color
);
553 /* If the bit isn't set in the color mask, then just return the
554 * original dst color, instead.
556 uint32_t colormask
= 0xffffffff;
557 for (int i
= 0; i
< 4; i
++) {
558 if (format_swiz
[i
] < 4 &&
559 !(c
->fs_key
->blend
.colormask
& (1 << format_swiz
[i
]))) {
560 colormask
&= ~(0xff << (i
* 8));
563 packed_color
= nir_ior(b
,
564 nir_iand(b
, packed_color
,
565 nir_imm_int(b
, colormask
)),
566 nir_iand(b
, packed_dst_color
,
567 nir_imm_int(b
, ~colormask
)));
569 /* Turn the old vec4 output into a store of the packed color. */
570 nir_instr_rewrite_src(&intr
->instr
, &intr
->src
[0],
571 nir_src_for_ssa(packed_color
));
572 intr
->num_components
= 1;
576 vc4_nir_lower_blend_block(nir_block
*block
, void *state
)
578 struct vc4_compile
*c
= state
;
580 nir_foreach_instr(block
, instr
) {
581 if (instr
->type
!= nir_instr_type_intrinsic
)
583 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
584 if (intr
->intrinsic
!= nir_intrinsic_store_output
)
587 nir_variable
*output_var
= NULL
;
588 nir_foreach_variable(var
, &c
->s
->outputs
) {
589 if (var
->data
.driver_location
== intr
->const_index
[0]) {
596 if (output_var
->data
.location
!= FRAG_RESULT_COLOR
&&
597 output_var
->data
.location
!= FRAG_RESULT_DATA0
) {
601 nir_function_impl
*impl
=
602 nir_cf_node_get_function(&block
->cf_node
);
604 nir_builder_init(&b
, impl
);
605 b
.cursor
= nir_before_instr(&intr
->instr
);
606 vc4_nir_lower_blend_instr(c
, &b
, intr
);
612 vc4_nir_lower_blend(struct vc4_compile
*c
)
614 nir_foreach_overload(c
->s
, overload
) {
615 if (overload
->impl
) {
616 nir_foreach_block(overload
->impl
,
617 vc4_nir_lower_blend_block
, c
);
619 nir_metadata_preserve(overload
->impl
,
620 nir_metadata_block_index
|
621 nir_metadata_dominance
);